diff --git a/include/wx/txtstrm.h b/include/wx/txtstrm.h
index 8fc43a176f..0bcf10e2cd 100644
--- a/include/wx/txtstrm.h
+++ b/include/wx/txtstrm.h
@@ -25,7 +25,8 @@ typedef wxTextOutputStream& (*__wxTextOutputManip)(wxTextOutputStream&);
 WXDLLIMPEXP_BASE wxTextOutputStream &endl( wxTextOutputStream &stream );
 
 
-#define wxEOT wxT('\4') // the End-Of-Text control code (used only inside wxTextInputStream)
+// Obsolete constant defined only for compatibility, not used.
+#define wxEOT wxT('\4')
 
 // If you're scanning through a file using wxTextInputStream, you should check for EOF _before_
 // reading the next item (word / number), because otherwise the last item may get lost.
@@ -58,7 +59,7 @@ public:
     double   ReadDouble();
     wxString ReadLine();
     wxString ReadWord();
-    wxChar   GetChar() { wxChar c = NextChar(); return (wxChar)(c != wxEOT ? c : 0); }
+    wxChar   GetChar();
 
     wxString GetStringSeparators() const { return m_separators; }
     void SetStringSeparators(const wxString &c) { m_separators = c; }
@@ -83,7 +84,22 @@ public:
 protected:
     wxInputStream &m_input;
     wxString m_separators;
-    char m_lastBytes[10]; // stores the bytes that were read for the last character
+
+    // Data possibly (see m_validXXX) read from the stream but not decoded yet.
+    // This is necessary because GetChar() may only return a single character
+    // but we may get more than one character when decoding raw input bytes.
+    char m_lastBytes[10];
+
+    // The bytes [0, m_validEnd) of m_lastBytes contain the bytes read by the
+    // last GetChar() call (this interval may be empty if GetChar() hasn't been
+    // called yet). The bytes [0, m_validBegin) have been already decoded and
+    // returned to caller or stored in m_lastWChar in the particularly
+    // egregious case of decoding a non-BMP character when using UTF-16 for
+    // wchar_t. Finally, the bytes [m_validBegin, m_validEnd) remain to be
+    // decoded and returned during the next call (again, this interval can, and
+    // usually will, be empty too if m_validBegin == m_validEnd).
+    size_t m_validBegin,
+           m_validEnd;
 
 #if wxUSE_UNICODE
     wxMBConv *m_conv;
@@ -100,8 +116,6 @@ protected:
 
     bool   EatEOL(const wxChar &c);
     void   UngetLast(); // should be used instead of wxInputStream::Ungetch() because of Unicode issues
-    // returns EOT (\4) if there is a stream error, or end of file
-    wxChar NextChar();   // this should be used instead of GetC() because of Unicode issues
     wxChar NextNonSeparators();
 
     wxDECLARE_NO_COPY_CLASS(wxTextInputStream);
diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp
index 11d1adbc54..dcdf7ba76d 100644
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -103,41 +103,44 @@ static size_t encode_utf16(wxUint32 input, wxUint16 *output)
     }
 }
 
-static size_t decode_utf16(const wxUint16* input, wxUint32& output)
-{
-    if ((*input < 0xd800) || (*input > 0xdfff))
-    {
-        output = *input;
-        return 1;
-    }
-    else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
-    {
-        output = *input;
-        return wxCONV_FAILED;
-    }
-    else
-    {
-        output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
-        return 2;
-    }
-}
-
-// returns the next UTF-32 character from the wchar_t buffer and advances the
-// pointer to the character after this one
+// Returns the next UTF-32 character from the wchar_t buffer terminated by the
+// "end" pointer (the caller must ensure that on input "*pSrc < end") and
+// advances the pointer to the character after this one.
 //
-// if an invalid character is found, *pSrc is set to NULL, the caller must
-// check for this
-static wxUint32 wxDecodeSurrogate(const wxChar16 **pSrc)
+// If an invalid or incomplete character is found, *pSrc is set to NULL, the
+// caller must check for this.
+static wxUint32 wxDecodeSurrogate(const wxChar16 **pSrc, const wxChar16* end)
 {
-    wxUint32 out;
-    const size_t
-        n = decode_utf16(reinterpret_cast<const wxUint16 *>(*pSrc), out);
-    if ( n == wxCONV_FAILED )
-        *pSrc = NULL;
-    else
-        *pSrc += n;
+    const wxChar16*& src = *pSrc;
 
-    return out;
+    // Is this a BMP character?
+    const wxUint16 u = *src++;
+    if ((u < 0xd800) || (u > 0xdfff))
+    {
+        // Yes, just return it.
+        return u;
+    }
+
+    // No, we have the first half of a surrogate, check if we also have the
+    // second half (notice that this check does nothing if end == NULL, as it
+    // is allowed to be, and this is correct).
+    if ( src == end )
+    {
+        // No, we don't because this is the end of input.
+        src = NULL;
+        return 0;
+    }
+
+    const wxUint16 u2 = *src++;
+    if ( (u2 < 0xdc00) || (u2 > 0xdfff) )
+    {
+        // No, it's not in the low surrogate range.
+        src = NULL;
+        return 0;
+    }
+
+    // Yes, decode it and return the corresponding Unicode character.
+    return ((u - 0xd7c0) << 10) + (u2 - 0xdc00);
 }
 
 // ----------------------------------------------------------------------------
@@ -699,6 +702,10 @@ size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen,
             // start of an encoded segment?
             if ( cc == '+' )
             {
+                // Can't end with a plus sign.
+                if ( src == srcEnd )
+                    return wxCONV_FAILED;
+
                 if ( *src == '-' )
                 {
                     // just the encoded plus sign, don't switch to shifted mode
@@ -1070,9 +1077,10 @@ wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
     char *out = dstLen ? dst : NULL;
     size_t written = 0;
 
-    for ( const wchar_t *wp = src; ; wp++ )
+    const wchar_t* const end = srcLen == wxNO_LEN ? NULL : src + srcLen;
+    for ( const wchar_t *wp = src; ; )
     {
-        if ( (srcLen == wxNO_LEN ? !*wp : !srcLen) )
+        if ( end ? wp == end : !*wp )
         {
             // all done successfully, just add the trailing NULL if we are not
             // using explicit length
@@ -1092,38 +1100,13 @@ wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
             return written;
         }
 
-        if ( srcLen != wxNO_LEN )
-            srcLen--;
-
         wxUint32 code;
 #ifdef WC_UTF16
-        // Be careful here: decode_utf16() may need to read the next wchar_t
-        // but we might not have any left, so pass it a temporary buffer which
-        // always has 2 wide characters and take care to set its second element
-        // to 0, which is invalid as a second half of a surrogate, to ensure
-        // that we return an error when trying to convert a buffer ending with
-        // half of a surrogate.
-        wxUint16 tmp[2];
-        tmp[0] = wp[0];
-        tmp[1] = srcLen != 0 ? wp[1] : 0;
-        switch ( decode_utf16(tmp, code) )
-        {
-            case 1:
-                // Nothing special to do, just a character from BMP.
-                break;
-
-            case 2:
-                // skip the next char too as we decoded a surrogate
-                wp++;
-                if ( srcLen != wxNO_LEN )
-                    srcLen--;
-                break;
-
-            case wxCONV_FAILED:
-                return wxCONV_FAILED;
-        }
+        code = wxDecodeSurrogate(&wp, end);
+        if ( !wp )
+            return wxCONV_FAILED;
 #else // wchar_t is UTF-32
-        code = *wp & 0x7fffffff;
+        code = *wp++ & 0x7fffffff;
 #endif
 
         unsigned len;
@@ -1381,20 +1364,15 @@ size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
 
     // The length can be either given explicitly or computed implicitly for the
     // NUL-terminated strings.
-    const bool isNulTerminated = srcLen == wxNO_LEN;
-    while ((isNulTerminated ? *psz : srcLen--) && ((!buf) || (len < n)))
+    const wchar_t* const end = srcLen == wxNO_LEN ? NULL : psz + srcLen;
+    while ((end ? psz < end : *psz) && ((!buf) || (len < n)))
     {
         wxUint32 cc;
 
 #ifdef WC_UTF16
-        // cast is ok for WC_UTF16
-        size_t pa = decode_utf16((const wxUint16 *)psz, cc);
-
-        // we could have consumed two input code units if we decoded a
-        // surrogate, so adjust the input pointer and, if necessary, the length
-        psz += (pa == wxCONV_FAILED) ? 1 : pa;
-        if ( pa == 2 && !isNulTerminated )
-            srcLen--;
+        cc = wxDecodeSurrogate(&psz, end);
+        if ( !psz )
+            return wxCONV_FAILED;
 #else
         cc = (*psz++) & 0x7fffffff;
 #endif
@@ -1455,7 +1433,7 @@ size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
         }
     }
 
-    if ( isNulTerminated )
+    if ( !end )
     {
         // Add the trailing NUL in this case if we have a large enough buffer.
         if ( buf && (len < n) )
@@ -1623,7 +1601,7 @@ wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
     {
-        const wxUint32 ch = wxDecodeSurrogate(&inBuff);
+        const wxUint32 ch = wxDecodeSurrogate(&inBuff, inEnd);
         if ( !inBuff )
             return wxCONV_FAILED;
 
@@ -1693,29 +1671,26 @@ wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
     const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
     for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
     {
-        wxUint32 ch;
         wxUint16 tmp[2];
+        const wxUint16* tmpEnd = tmp;
 
         tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
-        if ( ++inBuff < inEnd )
+        tmpEnd++;
+
+        if ( inBuff + 1 < inEnd )
         {
             // Normal case, we have a next character to decode.
-            tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
-        }
-        else // End of input.
-        {
-            // Setting the second character to 0 ensures we correctly return
-            // wxCONV_FAILED if the first one is the first half of a surrogate
-            // as the second half can't be 0 in this case.
-            tmp[1] = 0;
+            tmp[1] = wxUINT16_SWAP_ALWAYS(inBuff[1]);
+            tmpEnd++;
         }
 
-        const size_t numChars = decode_utf16(tmp, ch);
-        if ( numChars == wxCONV_FAILED )
+        const wxUint16* p = tmp;
+        const wxUint32 ch = wxDecodeSurrogate(&p, tmpEnd);
+        if ( !p )
             return wxCONV_FAILED;
 
-        if ( numChars == 2 )
-            inBuff++;
+        // Move the real pointer by the same amount as "p" was updated by.
+        inBuff += p - tmp;
 
         outLen++;
 
@@ -1861,7 +1836,7 @@ wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
     size_t outLen = 0;
     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
     {
-        const wxUint32 ch = wxDecodeSurrogate(&src);
+        const wxUint32 ch = wxDecodeSurrogate(&src, srcEnd);
         if ( !src )
             return wxCONV_FAILED;
 
@@ -1930,7 +1905,7 @@ wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
     size_t outLen = 0;
     for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
     {
-        const wxUint32 ch = wxDecodeSurrogate(&src);
+        const wxUint32 ch = wxDecodeSurrogate(&src, srcEnd);
         if ( !src )
             return wxCONV_FAILED;
 
diff --git a/src/common/txtstrm.cpp b/src/common/txtstrm.cpp
index 8c48cd6c77..d174e4e13f 100644
--- a/src/common/txtstrm.cpp
+++ b/src/common/txtstrm.cpp
@@ -35,7 +35,8 @@ wxTextInputStream::wxTextInputStream(wxInputStream &s,
                                      const wxMBConv& conv)
   : m_input(s), m_separators(sep), m_conv(conv.Clone())
 {
-    memset((void*)m_lastBytes, 0, 10);
+    m_validBegin =
+    m_validEnd = 0;
 
 #if SIZEOF_WCHAR_T == 2
     m_lastWChar = 0;
@@ -45,7 +46,10 @@ wxTextInputStream::wxTextInputStream(wxInputStream &s,
 wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep)
   : m_input(s), m_separators(sep)
 {
-    memset((void*)m_lastBytes, 0, 10);
+    m_validBegin =
+    m_validEnd = 0;
+
+    m_lastBytes[0] = 0;
 }
 #endif
 
@@ -58,14 +62,16 @@ wxTextInputStream::~wxTextInputStream()
 
 void wxTextInputStream::UngetLast()
 {
-    size_t byteCount = 0;
-    while(m_lastBytes[byteCount]) // pseudo ANSI strlen (even for Unicode!)
-        byteCount++;
-    m_input.Ungetch(m_lastBytes, byteCount);
-    memset((void*)m_lastBytes, 0, 10);
+    if ( m_validEnd )
+    {
+        m_input.Ungetch(m_lastBytes, m_validEnd);
+
+        m_validBegin =
+        m_validEnd = 0;
+    }
 }
 
-wxChar wxTextInputStream::NextChar()
+wxChar wxTextInputStream::GetChar()
 {
 #if wxUSE_UNICODE
 #if SIZEOF_WCHAR_T == 2
@@ -77,17 +83,37 @@ wxChar wxTextInputStream::NextChar()
         m_lastWChar = 0;
         return wc;
     }
-#endif // !SWIG_ONLY_SCRIPT_API
+#endif // SIZEOF_WCHAR_T
 
-    wxChar wbuf[2];
-    memset((void*)m_lastBytes, 0, 10);
-    for(size_t inlen = 0; inlen < 9; inlen++)
+    // If we have any non-decoded bytes left from the last call, shift them to
+    // be at the beginning of the buffer.
+    if ( m_validBegin < m_validEnd )
     {
-        // actually read the next character
-        m_lastBytes[inlen] = m_input.GetC();
+        m_validEnd -= m_validBegin;
+        memmove(m_lastBytes, m_lastBytes + m_validBegin, m_validEnd);
+    }
+    else // All bytes were already decoded and consumed.
+    {
+        m_validEnd = 0;
+    }
 
-        if(m_input.LastRead() <= 0)
-            return wxEOT;
+    // We may need to decode up to 4 characters if we have input starting with
+    // 3 BOM-like bytes, but not actually containing a BOM, as decoding it will
+    // only succeed when 4 bytes are read -- and will yield 4 wide characters.
+    wxChar wbuf[4];
+    for(size_t inlen = 0; inlen < sizeof(m_lastBytes); inlen++)
+    {
+        if ( inlen >= m_validEnd )
+        {
+            // actually read the next character
+            m_lastBytes[inlen] = m_input.GetC();
+
+            if(m_input.LastRead() <= 0)
+                return 0;
+
+            m_validEnd++;
+        }
+        //else: Retry decoding what we already have in the buffer.
 
         switch ( m_conv->ToWChar(wbuf, WXSIZEOF(wbuf), m_lastBytes, inlen + 1) )
         {
@@ -103,12 +129,17 @@ wxChar wxTextInputStream::NextChar()
                 break;
 
             default:
-                // if we couldn't decode a single character during the last
-                // loop iteration we shouldn't be able to decode 2 or more of
-                // them with an extra single byte, something fishy is going on
-                // (except if we use UTF-16, see below)
-                wxFAIL_MSG("unexpected decoding result");
-                return wxEOT;
+                // If we couldn't decode a single character during the last
+                // loop iteration, but decoded more than one of them with just
+                // one extra byte, the only explanation is that we were using a
+                // wxConvAuto conversion recognizing the initial BOM and that
+                // it couldn't detect the presence or absence of BOM so far,
+                // but now finally has enough data to see that there is none.
+                // As we must have fallen back to Latin-1 in this case, return
+                // just the first byte and keep the other ones for the next
+                // time.
+                m_validBegin = 1;
+                return wbuf[0];
 
 #if SIZEOF_WCHAR_T == 2
             case 2:
@@ -118,25 +149,37 @@ wxChar wxTextInputStream::NextChar()
                 // remember the second one for the next call, as there is no
                 // way to fit both of them into a single wxChar in this case.
                 m_lastWChar = wbuf[1];
-#endif // !SWIG_ONLY_SCRIPT_API
+#endif // SIZEOF_WCHAR_T == 2
                 wxFALLTHROUGH;
 
             case 1:
+                m_validBegin = inlen + 1;
 
                 // we finally decoded a character
                 return wbuf[0];
         }
     }
 
-    // there should be no encoding which requires more than nine bytes for one
-    // character so something must be wrong with our conversion but we have no
-    // way to signal it from here
-    return wxEOT;
+    // There should be no encoding which requires more than 10 bytes to decode
+    // at least one character (the most actually seems to be 7: 3 for the
+    // initial BOM, which is ignored, and 4 for the longest possible encoding
+    // of a Unicode character in UTF-8), so something must be wrong with our
+    // conversion but we have no way to signal it from here and just return 0
+    // as if we reached the end of the stream.
+    m_validBegin = 0;
+    m_validEnd = sizeof(m_lastBytes);
+
+    return 0;
 #else
     m_lastBytes[0] = m_input.GetC();
 
     if(m_input.LastRead() <= 0)
-        return wxEOT;
+    {
+        m_validEnd = 0;
+        return 0;
+    }
+
+    m_validEnd = 1;
 
     return m_lastBytes[0];
 #endif
@@ -147,8 +190,9 @@ wxChar wxTextInputStream::NextNonSeparators()
 {
     for (;;)
     {
-        wxChar c = NextChar();
-        if (c == wxEOT) return (wxChar) 0;
+        wxChar c = GetChar();
+        if (!c)
+            return c;
 
         if (c != wxT('\n') &&
             c != wxT('\r') &&
@@ -164,8 +208,8 @@ bool wxTextInputStream::EatEOL(const wxChar &c)
 
     if (c == wxT('\r')) // eat on both Mac and DOS
     {
-        wxChar c2 = NextChar();
-        if(c2 == wxEOT) return true; // end of stream reached, had enough :-)
+        wxChar c2 = GetChar();
+        if (!c2) return true; // end of stream reached, had enough :-)
 
         if (c2 != wxT('\n')) UngetLast(); // Don't eat on Mac
         return true;
@@ -261,8 +305,8 @@ wxString wxTextInputStream::ReadLine()
 
     while ( !m_input.Eof() )
     {
-        wxChar c = NextChar();
-        if(c == wxEOT)
+        wxChar c = GetChar();
+        if (!c)
             break;
 
         if (EatEOL(c))
@@ -289,8 +333,8 @@ wxString wxTextInputStream::ReadWord()
 
     while ( !m_input.Eof() )
     {
-        c = NextChar();
-        if(c == wxEOT)
+        c = GetChar();
+        if (!c)
             break;
 
         if (m_separators.Find(c) >= 0)
diff --git a/tests/mbconv/mbconvtest.cpp b/tests/mbconv/mbconvtest.cpp
index 5a0afa4e5b..7edee7bdd6 100644
--- a/tests/mbconv/mbconvtest.cpp
+++ b/tests/mbconv/mbconvtest.cpp
@@ -985,6 +985,7 @@ void MBConvTestCase::NonBMPCharTests()
         TestDecoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
         TestEncoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
     }
+    SECTION("wxMBConvUTF16LE")
     {
         char u16le[sizeof(u16)];
         for (size_t i = 0; i < sizeof(u16)/2; ++i) {
@@ -995,6 +996,7 @@ void MBConvTestCase::NonBMPCharTests()
         TestDecoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
         TestEncoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
     }
+    SECTION("wxMBConvUTF16BE")
     {
         char u16be[sizeof(u16)];
         for (size_t i = 0; i < sizeof(u16)/2; ++i) {
@@ -1005,6 +1007,7 @@ void MBConvTestCase::NonBMPCharTests()
         TestDecoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
         TestEncoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
     }
+    SECTION("wxMBConvUTF32LE")
     {
         char u32le[sizeof(u32)];
         for (size_t i = 0; i < sizeof(u32)/4; ++i) {
@@ -1017,6 +1020,7 @@ void MBConvTestCase::NonBMPCharTests()
         TestDecoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
         TestEncoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
     }
+    SECTION("wxMBConvUTF32BE")
     {
         char u32be[sizeof(u32)];
         for (size_t i = 0; i < sizeof(u32)/4; ++i) {
diff --git a/tests/streams/textstreamtest.cpp b/tests/streams/textstreamtest.cpp
index 0975e7526e..edb6eaa8a2 100644
--- a/tests/streams/textstreamtest.cpp
+++ b/tests/streams/textstreamtest.cpp
@@ -290,4 +290,40 @@ void TextStreamTestCase::TestInput(const wxMBConv& conv,
     CPPUNIT_ASSERT_EQUAL( 0, memcmp(txtWchar, temp.wc_str(), sizeof(txtWchar)) );
 }
 
+TEST_CASE("wxTextInputStream::GetChar", "[text][input][stream][char]")
+{
+    // This is the simplest possible test that used to trigger assertion in
+    // wxTextInputStream::GetChar().
+    SECTION("starts-with-nul")
+    {
+        const wxUint8 buf[] = { 0x00, 0x01, };
+        wxMemoryInputStream mis(buf, sizeof(buf));
+        wxTextInputStream tis(mis);
+
+        REQUIRE( tis.GetChar() == 0x00 );
+        REQUIRE( tis.GetChar() == 0x01 );
+        REQUIRE( tis.GetChar() == 0x00 );
+        CHECK( tis.GetInputStream().Eof() );
+    }
+
+    // This exercises a problematic path in GetChar() as the first 3 bytes of
+    // this stream look like the start of UTF-32BE BOM, but this is not
+    // actually a BOM because the 4th byte is 0xFE and not 0xFF, so the stream
+    // should decode the buffer as Latin-1 once it gets there.
+    SECTION("almost-UTF-32-BOM")
+    {
+        const wxUint8 buf[] = { 0x00, 0x00, 0xFE, 0xFE, 0x01 };
+        wxMemoryInputStream mis(buf, sizeof(buf));
+        wxTextInputStream tis(mis);
+
+        REQUIRE( tis.GetChar() == 0x00 );
+        REQUIRE( tis.GetChar() == 0x00 );
+        REQUIRE( tis.GetChar() == 0xFE );
+        REQUIRE( tis.GetChar() == 0xFE );
+        REQUIRE( tis.GetChar() == 0x01 );
+        REQUIRE( tis.GetChar() == 0x00 );
+        CHECK( tis.GetInputStream().Eof() );
+    }
+}
+
 #endif // wxUSE_UNICODE