Rewrite wxTextFile's newlines parsing to fix multiple bugs.

Remove the complicated parsing state machine that contained bugs with
unexpected inputs and was very hard to modify correctly.  Replace it
with much simpler code that looks ahead, instead of deducing line
endings from past characters.

The new code never looses lines with data and calls AddLine() on the
first newline character it encounters, peeking ahead to determine the
line ending type.

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@75799 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Václav Slavík 2014-02-05 18:28:57 +00:00
parent fe6f8254fb
commit 8dd971e45b

View File

@ -208,107 +208,46 @@ bool wxTextFile::OnRead(const wxMBConv& conv)
// now break the buffer in lines
// was the last processed character a CR?
bool lastWasCR = false;
// the beginning of the current line, changes inside the loop
wxString::const_iterator lineStart = str.begin();
const wxString::const_iterator end = str.end();
for ( wxString::const_iterator p = lineStart; p != end; p++ )
{
const wxChar ch = *p;
switch ( ch )
if ( ch == '\r' || ch == '\n' )
{
case '\n':
// could be a DOS or Unix EOL
if ( lastWasCR )
{
if ( p - 1 >= lineStart )
{
AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
}
else
{
// there were two line endings, so add an empty line:
AddLine(wxEmptyString, wxTextFileType_Dos);
}
}
else // bare '\n', Unix style
{
AddLine(wxString(lineStart, p), wxTextFileType_Unix);
}
// Determine the kind of line ending this is.
wxTextFileType lineType = wxTextFileType_None;
if ( ch == '\r' )
{
wxString::const_iterator next = p + 1;
if ( next != end && *next == '\n' )
lineType = wxTextFileType_Dos;
else
lineType = wxTextFileType_Mac;
}
else // ch == '\n'
{
lineType = wxTextFileType_Unix;
}
lineStart = p + 1;
lastWasCR = false;
break;
AddLine(wxString(lineStart, p), lineType);
case '\r':
if ( lastWasCR )
{
wxString::const_iterator next = p + 1;
// Peek at the next character to detect weirdly formatted
// files ending in CRCRLF. Without this, we would silently
// loose all the lines; this way, we insert empty lines
// (as some editors do), but don't loose any data.
// See here for more information:
// http://stackoverflow.com/questions/6998506/text-file-with-0d-0d-0a-line-breaks
if ( next != end && *next == '\n' )
{
AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
}
else
{
// Mac empty line
AddLine(wxEmptyString, wxTextFileType_Mac);
}
lineStart = next;
}
//else: we don't know what this is yet -- could be a Mac EOL or
// start of DOS EOL so wait for next char
// DOS EOL is the only one consisting of two chars, not one.
if ( lineType == wxTextFileType_Dos )
p++;
lastWasCR = true;
break;
default:
if ( lastWasCR )
{
// Mac line termination
if ( p - 1 >= lineStart )
{
AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
}
else
{
// there were two line endings, so add an empty line:
AddLine(wxEmptyString, wxTextFileType_Mac);
}
lineStart = p;
}
lastWasCR = false;
lineStart = p + 1;
}
}
// anything in the last line?
if ( lineStart != end )
{
// add the last line, notice that it may have been terminated with CR
// as we don't end the line immediately when we see a CR, as it could
// be followed by a LF.
// Add the last line; notice that it is certainly not terminated with a
// newline, otherwise it would be handled above.
wxString lastLine(lineStart, end);
wxTextFileType lastType;
if ( lastWasCR )
{
// last line had Mac EOL, exclude it from the string
lastLine.RemoveLast();
lastType = wxTextFileType_Mac;
}
else
{
// last line wasn't terminated at all
lastType = wxTextFileType_None;
}
AddLine(lastLine, lastType);
AddLine(lastLine, wxTextFileType_None);
}
return true;