lib: Relax fix to CVE-2022-25236 with regard to RFC 3986 URI characters

This commit is contained in:
Sebastian Pipping 2022-02-27 16:58:08 +01:00
parent c99e0e7f2b
commit 2ba6c76fca

View File

@ -3705,6 +3705,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
return XML_ERROR_NONE; return XML_ERROR_NONE;
} }
static XML_Bool
is_rfc3986_uri_char(XML_Char candidate) {
// For the RFC 3986 ANBF grammar see
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
switch (candidate) {
// From rule "ALPHA" (uppercase half)
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
// From rule "ALPHA" (lowercase half)
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
// From rule "DIGIT"
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// From rule "pct-encoded"
case '%':
// From rule "unreserved"
case '-':
case '.':
case '_':
case '~':
// From rule "gen-delims"
case ':':
case '/':
case '?':
case '#':
case '[':
case ']':
case '@':
// From rule "sub-delims"
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
case '=':
return XML_TRUE;
default:
return XML_FALSE;
}
}
/* addBinding() overwrites the value of prefix->binding without checking. /* addBinding() overwrites the value of prefix->binding without checking.
Therefore one must keep track of the old value outside of addBinding(). Therefore one must keep track of the old value outside of addBinding().
*/ */
@ -3763,14 +3874,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE; isXMLNS = XML_FALSE;
// NOTE: While Expat does not validate namespace URIs against RFC 3986, // NOTE: While Expat does not validate namespace URIs against RFC 3986
// we have to at least make sure that the XML processor on top of // today (and is not REQUIRED to do so with regard to the XML 1.0
// Expat (that is splitting tag names by namespace separator into // namespaces specification) we have to at least make sure, that
// 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused // the application on top of Expat (that is likely splitting expanded
// by an attacker putting additional namespace separator characters // element names ("qualified names") of form
// into namespace declarations. That would be ambiguous and not to // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
// be expected. // in its element handler code) cannot be confused by an attacker
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { // putting additional namespace separator characters into namespace
// declarations. That would be ambiguous and not to be expected.
//
// While the HTML API docs of function XML_ParserCreateNS have been
// advising against use of a namespace separator character that can
// appear in a URI for >20 years now, some widespread applications
// are using URI characters (':' (colon) in particular) for a
// namespace separator, in practice. To keep these applications
// functional, we only reject namespaces URIs containing the
// application-chosen namespace separator if the chosen separator
// is a non-URI character with regard to RFC 3986.
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
&& ! is_rfc3986_uri_char(uri[len])) {
return XML_ERROR_SYNTAX; return XML_ERROR_SYNTAX;
} }
} }