Rewrite wxTextFile's newlines parsing to fix multiple bugs.
Remove the complicated parsing state machine that contained bugs with unexpected inputs and was very hard to modify correctly. Replace it with much simpler code that looks ahead, instead of deducing line endings from past characters. The new code never looses lines with data and calls AddLine() on the first newline character it encounters, peeking ahead to determine the line ending type. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@75799 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
parent
fe6f8254fb
commit
8dd971e45b
@ -208,107 +208,46 @@ bool wxTextFile::OnRead(const wxMBConv& conv)
|
||||
|
||||
// now break the buffer in lines
|
||||
|
||||
// was the last processed character a CR?
|
||||
bool lastWasCR = false;
|
||||
|
||||
// the beginning of the current line, changes inside the loop
|
||||
wxString::const_iterator lineStart = str.begin();
|
||||
const wxString::const_iterator end = str.end();
|
||||
for ( wxString::const_iterator p = lineStart; p != end; p++ )
|
||||
{
|
||||
const wxChar ch = *p;
|
||||
switch ( ch )
|
||||
if ( ch == '\r' || ch == '\n' )
|
||||
{
|
||||
case '\n':
|
||||
// could be a DOS or Unix EOL
|
||||
if ( lastWasCR )
|
||||
{
|
||||
if ( p - 1 >= lineStart )
|
||||
{
|
||||
AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
|
||||
}
|
||||
else
|
||||
{
|
||||
// there were two line endings, so add an empty line:
|
||||
AddLine(wxEmptyString, wxTextFileType_Dos);
|
||||
}
|
||||
}
|
||||
else // bare '\n', Unix style
|
||||
{
|
||||
AddLine(wxString(lineStart, p), wxTextFileType_Unix);
|
||||
}
|
||||
// Determine the kind of line ending this is.
|
||||
wxTextFileType lineType = wxTextFileType_None;
|
||||
if ( ch == '\r' )
|
||||
{
|
||||
wxString::const_iterator next = p + 1;
|
||||
if ( next != end && *next == '\n' )
|
||||
lineType = wxTextFileType_Dos;
|
||||
else
|
||||
lineType = wxTextFileType_Mac;
|
||||
}
|
||||
else // ch == '\n'
|
||||
{
|
||||
lineType = wxTextFileType_Unix;
|
||||
}
|
||||
|
||||
lineStart = p + 1;
|
||||
lastWasCR = false;
|
||||
break;
|
||||
AddLine(wxString(lineStart, p), lineType);
|
||||
|
||||
case '\r':
|
||||
if ( lastWasCR )
|
||||
{
|
||||
wxString::const_iterator next = p + 1;
|
||||
// Peek at the next character to detect weirdly formatted
|
||||
// files ending in CRCRLF. Without this, we would silently
|
||||
// loose all the lines; this way, we insert empty lines
|
||||
// (as some editors do), but don't loose any data.
|
||||
// See here for more information:
|
||||
// http://stackoverflow.com/questions/6998506/text-file-with-0d-0d-0a-line-breaks
|
||||
if ( next != end && *next == '\n' )
|
||||
{
|
||||
AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Mac empty line
|
||||
AddLine(wxEmptyString, wxTextFileType_Mac);
|
||||
}
|
||||
lineStart = next;
|
||||
}
|
||||
//else: we don't know what this is yet -- could be a Mac EOL or
|
||||
// start of DOS EOL so wait for next char
|
||||
// DOS EOL is the only one consisting of two chars, not one.
|
||||
if ( lineType == wxTextFileType_Dos )
|
||||
p++;
|
||||
|
||||
lastWasCR = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
if ( lastWasCR )
|
||||
{
|
||||
// Mac line termination
|
||||
if ( p - 1 >= lineStart )
|
||||
{
|
||||
AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
|
||||
}
|
||||
else
|
||||
{
|
||||
// there were two line endings, so add an empty line:
|
||||
AddLine(wxEmptyString, wxTextFileType_Mac);
|
||||
}
|
||||
lineStart = p;
|
||||
}
|
||||
lastWasCR = false;
|
||||
lineStart = p + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// anything in the last line?
|
||||
if ( lineStart != end )
|
||||
{
|
||||
// add the last line, notice that it may have been terminated with CR
|
||||
// as we don't end the line immediately when we see a CR, as it could
|
||||
// be followed by a LF.
|
||||
// Add the last line; notice that it is certainly not terminated with a
|
||||
// newline, otherwise it would be handled above.
|
||||
wxString lastLine(lineStart, end);
|
||||
wxTextFileType lastType;
|
||||
if ( lastWasCR )
|
||||
{
|
||||
// last line had Mac EOL, exclude it from the string
|
||||
lastLine.RemoveLast();
|
||||
lastType = wxTextFileType_Mac;
|
||||
}
|
||||
else
|
||||
{
|
||||
// last line wasn't terminated at all
|
||||
lastType = wxTextFileType_None;
|
||||
}
|
||||
|
||||
AddLine(lastLine, lastType);
|
||||
AddLine(lastLine, wxTextFileType_None);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
Loading…
Reference in New Issue
Block a user