From 55ec9989e5ecd0ccabe621d139ba7c3df384de89 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 13 Oct 1999 03:58:17 +0000 Subject: [PATCH] Don't assume ASCII execution character set --- expat/files.txt | 1 + expat/xmltok/ascii.h | 111 ++++++++++++++++++++++++++++++++++ expat/xmltok/xmlrole.c | 89 +++++++++++++++++---------- expat/xmltok/xmltok.c | 92 ++++++++++++++++++++-------- expat/xmltok/xmltok_impl.c | 121 +++++++++++++++++++------------------ expat/xmltok/xmltok_ns.c | 2 +- 6 files changed, 297 insertions(+), 119 deletions(-) create mode 100755 expat/xmltok/ascii.h diff --git a/expat/files.txt b/expat/files.txt index febee1be..980598f9 100755 --- a/expat/files.txt +++ b/expat/files.txt @@ -1,4 +1,5 @@ expat.html +xmltok/ascii.h xmltok/asciitab.h xmltok/dllmain.c gennmtab/gennmtab.c diff --git a/expat/xmltok/ascii.h b/expat/xmltok/ascii.h new file mode 100755 index 00000000..7d43d3f4 --- /dev/null +++ b/expat/xmltok/ascii.h @@ -0,0 +1,111 @@ +/* +The contents of this file are subject to the Mozilla Public License +Version 1.1 (the "License"); you may not use this file except in +compliance with the License. You may obtain a copy of the License at +http://www.mozilla.org/MPL/ + +Software distributed under the License is distributed on an "AS IS" +basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific language governing rights and limitations +under the License. + +The Original Code is expat. + +The Initial Developer of the Original Code is James Clark. +Portions created by James Clark are Copyright (C) 1998, 1999 +James Clark. All Rights Reserved. + +Contributor(s): + +Alternatively, the contents of this file may be used under the terms +of the GNU General Public License (the "GPL"), in which case the +provisions of the GPL are applicable instead of those above. If you +wish to allow use of your version of this file only under the terms of +the GPL and not to allow others to use your version of this file under +the MPL, indicate your decision by deleting the provisions above and +replace them with the notice and other provisions required by the +GPL. If you do not delete the provisions above, a recipient may use +your version of this file under either the MPL or the GPL. +*/ + +#define ASCII_A 0x41 +#define ASCII_B 0x42 +#define ASCII_C 0x43 +#define ASCII_D 0x44 +#define ASCII_E 0x45 +#define ASCII_F 0x46 +#define ASCII_G 0x47 +#define ASCII_H 0x48 +#define ASCII_I 0x49 +#define ASCII_J 0x4A +#define ASCII_K 0x4B +#define ASCII_L 0x4C +#define ASCII_M 0x4D +#define ASCII_N 0x4E +#define ASCII_O 0x4F +#define ASCII_P 0x50 +#define ASCII_Q 0x51 +#define ASCII_R 0x52 +#define ASCII_S 0x53 +#define ASCII_T 0x54 +#define ASCII_U 0x55 +#define ASCII_V 0x56 +#define ASCII_W 0x57 +#define ASCII_X 0x58 +#define ASCII_Y 0x59 +#define ASCII_Z 0x5A + +#define ASCII_a 0x61 +#define ASCII_b 0x62 +#define ASCII_c 0x63 +#define ASCII_d 0x64 +#define ASCII_e 0x65 +#define ASCII_f 0x66 +#define ASCII_g 0x67 +#define ASCII_h 0x68 +#define ASCII_i 0x69 +#define ASCII_j 0x6A +#define ASCII_k 0x6B +#define ASCII_l 0x6C +#define ASCII_m 0x6D +#define ASCII_n 0x6E +#define ASCII_o 0x6F +#define ASCII_p 0x70 +#define ASCII_q 0x71 +#define ASCII_r 0x72 +#define ASCII_s 0x73 +#define ASCII_t 0x74 +#define ASCII_u 0x75 +#define ASCII_v 0x76 +#define ASCII_w 0x77 +#define ASCII_x 0x78 +#define ASCII_y 0x79 +#define ASCII_z 0x7A + +#define ASCII_0 0x30 +#define ASCII_1 0x31 +#define ASCII_2 0x32 +#define ASCII_3 0x33 +#define ASCII_4 0x34 +#define ASCII_5 0x35 +#define ASCII_6 0x36 +#define ASCII_7 0x37 +#define ASCII_8 0x38 +#define ASCII_9 0x39 + +#define ASCII_TAB 0x09 +#define ASCII_SPACE 0x20 +#define ASCII_EXCL 0x21 +#define ASCII_QUOT 0x22 +#define ASCII_AMP 0x26 +#define ASCII_APOS 0x27 +#define ASCII_MINUS 0x2D +#define ASCII_PERIOD 0x2E +#define ASCII_COLON 0x3A +#define ASCII_SEMI 0x3B +#define ASCII_LT 0x3C +#define ASCII_EQUALS 0x3D +#define ASCII_GT 0x3E +#define ASCII_LSQB 0x5B +#define ASCII_RSQB 0x5D +#define ASCII_UNDERSCORE 0x5F diff --git a/expat/xmltok/xmlrole.c b/expat/xmltok/xmlrole.c index a8fdda78..2cc864ee 100755 --- a/expat/xmltok/xmlrole.c +++ b/expat/xmltok/xmlrole.c @@ -30,6 +30,7 @@ your version of this file under either the MPL or the GPL. #include "xmldef.h" #include "xmlrole.h" +#include "ascii.h" /* Doesn't check: @@ -38,6 +39,30 @@ your version of this file under either the MPL or the GPL. */ +static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' }; +static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; +static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; +static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; +static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; +static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' }; +static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; +static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; +static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' }; +static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; +static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; +static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; +static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; +static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; +static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; +static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' }; +static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' }; +static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; +static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' }; +static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; + #ifndef MIN_BYTES_PER_CHAR #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) #endif @@ -103,7 +128,7 @@ int prolog0(PROLOG_STATE *state, if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, - "DOCTYPE")) + KW_DOCTYPE)) break; state->handler = doctype0; return XML_ROLE_NONE; @@ -132,7 +157,7 @@ int prolog1(PROLOG_STATE *state, if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, - "DOCTYPE")) + KW_DOCTYPE)) break; state->handler = doctype0; return XML_ROLE_NONE; @@ -198,11 +223,11 @@ int doctype1(PROLOG_STATE *state, state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = doctype3; return XML_ROLE_NONE; } - if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = doctype2; return XML_ROLE_NONE; } @@ -296,28 +321,28 @@ int internalSubset(PROLOG_STATE *state, if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, - "ENTITY")) { + KW_ENTITY)) { state->handler = entity0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, - "ATTLIST")) { + KW_ATTLIST)) { state->handler = attlist0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, - "ELEMENT")) { + KW_ELEMENT)) { state->handler = element0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, - "NOTATION")) { + KW_NOTATION)) { state->handler = notation0; return XML_ROLE_NONE; } @@ -429,11 +454,11 @@ int entity2(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity4; return XML_ROLE_NONE; } - if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity3; return XML_ROLE_NONE; } @@ -494,7 +519,7 @@ int entity5(PROLOG_STATE *state, setTopLevel(state); return XML_ROLE_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, end, "NDATA")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { state->handler = entity6; return XML_ROLE_NONE; } @@ -531,11 +556,11 @@ int entity7(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity9; return XML_ROLE_NONE; } - if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity8; return XML_ROLE_NONE; } @@ -609,11 +634,11 @@ int notation1(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = notation3; return XML_ROLE_NONE; } - if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = notation2; return XML_ROLE_NONE; } @@ -728,14 +753,14 @@ int attlist2(PROLOG_STATE *state, case XML_TOK_NAME: { static const char *types[] = { - "CDATA", - "ID", - "IDREF", - "IDREFS", - "ENTITY", - "ENTITIES", - "NMTOKEN", - "NMTOKENS", + KW_CDATA, + KW_ID, + KW_IDREF, + KW_IDREFS, + KW_ENTITY, + KW_ENTITIES, + KW_NMTOKEN, + KW_NMTOKENS, }; int i; for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++) @@ -744,7 +769,7 @@ int attlist2(PROLOG_STATE *state, return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; } } - if (XmlNameMatchesAscii(enc, ptr, end, "NOTATION")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { state->handler = attlist5; return XML_ROLE_NONE; } @@ -865,21 +890,21 @@ int attlist8(PROLOG_STATE *state, if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, - "IMPLIED")) { + KW_IMPLIED)) { state->handler = attlist1; return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; } if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, - "REQUIRED")) { + KW_REQUIRED)) { state->handler = attlist1; return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; } if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, - "FIXED")) { + KW_FIXED)) { state->handler = attlist9; return XML_ROLE_NONE; } @@ -937,11 +962,11 @@ int element1(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, end, "EMPTY")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { state->handler = declClose; return XML_ROLE_CONTENT_EMPTY; } - if (XmlNameMatchesAscii(enc, ptr, end, "ANY")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { state->handler = declClose; return XML_ROLE_CONTENT_ANY; } @@ -968,7 +993,7 @@ int element2(PROLOG_STATE *state, if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, - "PCDATA")) { + KW_PCDATA)) { state->handler = element3; return XML_ROLE_CONTENT_PCDATA; } @@ -1136,11 +1161,11 @@ int condSect0(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, end, "INCLUDE")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) { state->handler = condSect1; return XML_ROLE_NONE; } - if (XmlNameMatchesAscii(enc, ptr, end, "IGNORE")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) { state->handler = condSect2; return XML_ROLE_NONE; } diff --git a/expat/xmltok/xmltok.c b/expat/xmltok/xmltok.c index fd126031..c035b91a 100755 --- a/expat/xmltok/xmltok.c +++ b/expat/xmltok/xmltok.c @@ -193,6 +193,7 @@ struct normal_encoding { static int checkCharRefNumber(int); #include "xmltok_impl.h" +#include "ascii.h" #ifdef XML_MIN_SIZE #define sb_isNameMin isNever @@ -882,10 +883,10 @@ int streqci(const char *s1, const char *s2) for (;;) { char c1 = *s1++; char c2 = *s2++; - if ('a' <= c1 && c1 <= 'z') - c1 += 'A' - 'a'; - if ('a' <= c2 && c2 <= 'z') - c2 += 'A' - 'a'; + if (ASCII_a <= c1 && c1 <= ASCII_z) + c1 += ASCII_A - ASCII_a; + if (ASCII_a <= c2 && c2 <= ASCII_z) + c2 += ASCII_A - ASCII_a; if (c1 != c2) return 0; if (!c1) @@ -961,7 +962,7 @@ int parsePseudoAttribute(const ENCODING *enc, *nextTokPtr = ptr; return 0; } - if (c == '=') { + if (c == ASCII_EQUALS) { *nameEndPtr = ptr; break; } @@ -970,7 +971,7 @@ int parsePseudoAttribute(const ENCODING *enc, do { ptr += enc->minBytesPerChar; } while (isSpace(c = toAscii(enc, ptr, end))); - if (c != '=') { + if (c != ASCII_EQUALS) { *nextTokPtr = ptr; return 0; } @@ -988,7 +989,7 @@ int parsePseudoAttribute(const ENCODING *enc, ptr += enc->minBytesPerChar; c = toAscii(enc, ptr, end); } - if (c != '"' && c != '\'') { + if (c != ASCII_QUOT && c != ASCII_APOS) { *nextTokPtr = ptr; return 0; } @@ -999,12 +1000,12 @@ int parsePseudoAttribute(const ENCODING *enc, c = toAscii(enc, ptr, end); if (c == open) break; - if (!('a' <= c && c <= 'z') - && !('A' <= c && c <= 'Z') - && !('0' <= c && c <= '9') - && c != '.' - && c != '-' - && c != '_') { + if (!(ASCII_a <= c && c <= ASCII_z) + && !(ASCII_A <= c && c <= ASCII_Z) + && !(ASCII_0 <= c && c <= ASCII_9) + && c != ASCII_PERIOD + && c != ASCII_MINUS + && c != ASCII_UNDERSCORE) { *nextTokPtr = ptr; return 0; } @@ -1013,6 +1014,26 @@ int parsePseudoAttribute(const ENCODING *enc, return 1; } +static const char KW_version[] = { + ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0' +}; + +static const char KW_encoding[] = { + ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0' +}; + +static const char KW_standalone[] = { + ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0' +}; + +static const char KW_yes[] = { + ASCII_y, ASCII_e, ASCII_s, '\0' +}; + +static const char KW_no[] = { + ASCII_n, ASCII_o, '\0' +}; + static int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, @@ -1036,7 +1057,7 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, *badPtr = ptr; return 0; } - if (!XmlNameMatchesAscii(enc, name, nameEnd, "version")) { + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { if (!isGeneralTextEntity) { *badPtr = name; return 0; @@ -1058,9 +1079,9 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, return 1; } } - if (XmlNameMatchesAscii(enc, name, nameEnd, "encoding")) { + if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { int c = toAscii(enc, val, end); - if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) { + if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) { *badPtr = val; return 0; } @@ -1075,15 +1096,15 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, if (!name) return 1; } - if (!XmlNameMatchesAscii(enc, name, nameEnd, "standalone") || isGeneralTextEntity) { + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) { *badPtr = name; return 0; } - if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, "yes")) { + if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { if (standalone) *standalone = 1; } - else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, "no")) { + else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { if (standalone) *standalone = 0; } @@ -1360,16 +1381,35 @@ enum { NO_ENC }; +static const char KW_ISO_8859_1[] = { + ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0' +}; +static const char KW_US_ASCII[] = { + ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, '\0' +}; +static const char KW_UTF_8[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' +}; +static const char KW_UTF_16[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' +}; +static const char KW_UTF_16BE[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, '\0' +}; +static const char KW_UTF_16LE[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, '\0' +}; + static int getEncodingIndex(const char *name) { static const char *encodingNames[] = { - "ISO-8859-1", - "US-ASCII", - "UTF-8", - "UTF-16", - "UTF-16BE" - "UTF-16LE", + KW_ISO_8859_1, + KW_US_ASCII, + KW_UTF_8, + KW_UTF_16, + KW_UTF_16BE, + KW_UTF_16LE, }; int i; if (name == 0) @@ -1535,7 +1575,7 @@ XmlInitUnknownEncodingNS(void *mem, { ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); if (enc) - ((struct normal_encoding *)enc)->type[':'] = BT_COLON; + ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; return enc; } diff --git a/expat/xmltok/xmltok_impl.c b/expat/xmltok/xmltok_impl.c index b3d358dd..4348c840 100755 --- a/expat/xmltok/xmltok_impl.c +++ b/expat/xmltok/xmltok_impl.c @@ -117,7 +117,7 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr != end) { - if (!CHAR_MATCHES(enc, ptr, '-')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -128,10 +128,10 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, case BT_MINUS: if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '-')) { + if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -205,9 +205,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, if (end - ptr != MINBPC(enc)*3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { - case 'x': + case ASCII_x: break; - case 'X': + case ASCII_X: upper = 1; break; default: @@ -215,9 +215,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { - case 'm': + case ASCII_m: break; - case 'M': + case ASCII_M: upper = 1; break; default: @@ -225,9 +225,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { - case 'l': + case ASCII_l: break; - case 'L': + case ASCII_L: upper = 1; break; default: @@ -271,7 +271,7 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '>')) { + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; } @@ -290,7 +290,7 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '>')) { + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; } @@ -308,12 +308,13 @@ static int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { + static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; int i; /* CDATA[ */ if (end - ptr < 6 * MINBPC(enc)) return XML_TOK_PARTIAL; for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { - if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) { + if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -342,12 +343,12 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ']')) + if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } @@ -485,7 +486,7 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr != end) { - if (CHAR_MATCHES(enc, ptr, 'x')) + if (CHAR_MATCHES(enc, ptr, ASCII_x)) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: @@ -678,7 +679,7 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -787,7 +788,7 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -836,12 +837,12 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ']')) + if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } @@ -866,12 +867,12 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, #undef LEAD_CASE case BT_RSQB: if (ptr + MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ']')) { + if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { ptr += MINBPC(enc); break; } if (ptr + 2*MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), '>')) { + if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { ptr += MINBPC(enc); break; } @@ -1067,10 +1068,10 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return -XML_TOK_CLOSE_BRACKET; - if (CHAR_MATCHES(enc, ptr, ']')) { + if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (ptr + MINBPC(enc) == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr + MINBPC(enc), '>')) { + if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { *nextTokPtr = ptr + 2*MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; } @@ -1346,10 +1347,10 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e case BT_LT: if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '!')) { + if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '[')) { + if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { ++level; ptr += MINBPC(enc); } @@ -1358,10 +1359,10 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e case BT_RSQB: if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ']')) { + if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '>')) { + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr += MINBPC(enc); if (level == 0) { *nextTokPtr = ptr; @@ -1412,7 +1413,7 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, #endif break; case BT_S: - if (CHAR_MATCHES(enc, ptr, '\t')) { + if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { *badPtr = ptr; return 0; } @@ -1508,8 +1509,8 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr - || BYTE_TO_ASCII(enc, ptr) != ' ' - || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ' ' + || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE + || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) atts[nAtts].normalized = 0; break; @@ -1539,22 +1540,22 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) int result = 0; /* skip &# */ ptr += 2*MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'x')) { - for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { + if (CHAR_MATCHES(enc, ptr, ASCII_x)) { + for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': + case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: + case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: result <<= 4; - result |= (c - '0'); + result |= (c - ASCII_0); break; - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: result <<= 4; - result += 10 + (c - 'A'); + result += 10 + (c - ASCII_A); break; - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: result <<= 4; - result += 10 + (c - 'a'); + result += 10 + (c - ASCII_a); break; } if (result >= 0x110000) @@ -1562,10 +1563,10 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) } } else { - for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { + for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; - result += (c - '0'); + result += (c - ASCII_0); if (result >= 0x110000) return -1; } @@ -1578,46 +1579,46 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha { switch ((end - ptr)/MINBPC(enc)) { case 2: - if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 't')) { + if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { switch (BYTE_TO_ASCII(enc, ptr)) { - case 'l': - return '<'; - case 'g': - return '>'; + case ASCII_l: + return ASCII_LT; + case ASCII_g: + return ASCII_GT; } } break; case 3: - if (CHAR_MATCHES(enc, ptr, 'a')) { + if (CHAR_MATCHES(enc, ptr, ASCII_a)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'm')) { + if (CHAR_MATCHES(enc, ptr, ASCII_m)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'p')) - return '&'; + if (CHAR_MATCHES(enc, ptr, ASCII_p)) + return ASCII_AMP; } } break; case 4: switch (BYTE_TO_ASCII(enc, ptr)) { - case 'q': + case ASCII_q: ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'u')) { + if (CHAR_MATCHES(enc, ptr, ASCII_u)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'o')) { + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 't')) - return '"'; + if (CHAR_MATCHES(enc, ptr, ASCII_t)) + return ASCII_QUOT; } } break; - case 'a': + case ASCII_a: ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'p')) { + if (CHAR_MATCHES(enc, ptr, ASCII_p)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'o')) { + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 's')) - return '\''; + if (CHAR_MATCHES(enc, ptr, ASCII_s)) + return ASCII_APOS; } } break; diff --git a/expat/xmltok/xmltok_ns.c b/expat/xmltok/xmltok_ns.c index 77d0e739..24278989 100755 --- a/expat/xmltok/xmltok_ns.c +++ b/expat/xmltok/xmltok_ns.c @@ -65,7 +65,7 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha if (ptr != end) return 0; *p = 0; - if (streqci(buf, "UTF-16") && enc->minBytesPerChar == 2) + if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) return enc; i = getEncodingIndex(buf); if (i == UNKNOWN_ENC)