Don't assume ASCII execution character set

This commit is contained in:
James Clark 1999-10-13 03:58:17 +00:00
parent 3eac12050f
commit 55ec9989e5
6 changed files with 297 additions and 119 deletions

View File

@ -1,4 +1,5 @@
expat.html
xmltok/ascii.h
xmltok/asciitab.h
xmltok/dllmain.c
gennmtab/gennmtab.c

111
expat/xmltok/ascii.h Executable file
View File

@ -0,0 +1,111 @@
/*
The contents of this file are subject to the Mozilla Public License
Version 1.1 (the "License"); you may not use this file except in
compliance with the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS"
basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific language governing rights and limitations
under the License.
The Original Code is expat.
The Initial Developer of the Original Code is James Clark.
Portions created by James Clark are Copyright (C) 1998, 1999
James Clark. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms
of the GNU General Public License (the "GPL"), in which case the
provisions of the GPL are applicable instead of those above. If you
wish to allow use of your version of this file only under the terms of
the GPL and not to allow others to use your version of this file under
the MPL, indicate your decision by deleting the provisions above and
replace them with the notice and other provisions required by the
GPL. If you do not delete the provisions above, a recipient may use
your version of this file under either the MPL or the GPL.
*/
#define ASCII_A 0x41
#define ASCII_B 0x42
#define ASCII_C 0x43
#define ASCII_D 0x44
#define ASCII_E 0x45
#define ASCII_F 0x46
#define ASCII_G 0x47
#define ASCII_H 0x48
#define ASCII_I 0x49
#define ASCII_J 0x4A
#define ASCII_K 0x4B
#define ASCII_L 0x4C
#define ASCII_M 0x4D
#define ASCII_N 0x4E
#define ASCII_O 0x4F
#define ASCII_P 0x50
#define ASCII_Q 0x51
#define ASCII_R 0x52
#define ASCII_S 0x53
#define ASCII_T 0x54
#define ASCII_U 0x55
#define ASCII_V 0x56
#define ASCII_W 0x57
#define ASCII_X 0x58
#define ASCII_Y 0x59
#define ASCII_Z 0x5A
#define ASCII_a 0x61
#define ASCII_b 0x62
#define ASCII_c 0x63
#define ASCII_d 0x64
#define ASCII_e 0x65
#define ASCII_f 0x66
#define ASCII_g 0x67
#define ASCII_h 0x68
#define ASCII_i 0x69
#define ASCII_j 0x6A
#define ASCII_k 0x6B
#define ASCII_l 0x6C
#define ASCII_m 0x6D
#define ASCII_n 0x6E
#define ASCII_o 0x6F
#define ASCII_p 0x70
#define ASCII_q 0x71
#define ASCII_r 0x72
#define ASCII_s 0x73
#define ASCII_t 0x74
#define ASCII_u 0x75
#define ASCII_v 0x76
#define ASCII_w 0x77
#define ASCII_x 0x78
#define ASCII_y 0x79
#define ASCII_z 0x7A
#define ASCII_0 0x30
#define ASCII_1 0x31
#define ASCII_2 0x32
#define ASCII_3 0x33
#define ASCII_4 0x34
#define ASCII_5 0x35
#define ASCII_6 0x36
#define ASCII_7 0x37
#define ASCII_8 0x38
#define ASCII_9 0x39
#define ASCII_TAB 0x09
#define ASCII_SPACE 0x20
#define ASCII_EXCL 0x21
#define ASCII_QUOT 0x22
#define ASCII_AMP 0x26
#define ASCII_APOS 0x27
#define ASCII_MINUS 0x2D
#define ASCII_PERIOD 0x2E
#define ASCII_COLON 0x3A
#define ASCII_SEMI 0x3B
#define ASCII_LT 0x3C
#define ASCII_EQUALS 0x3D
#define ASCII_GT 0x3E
#define ASCII_LSQB 0x5B
#define ASCII_RSQB 0x5D
#define ASCII_UNDERSCORE 0x5F

View File

@ -30,6 +30,7 @@ your version of this file under either the MPL or the GPL.
#include "xmldef.h"
#include "xmlrole.h"
#include "ascii.h"
/* Doesn't check:
@ -38,6 +39,30 @@ your version of this file under either the MPL or the GPL.
*/
static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' };
static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' };
static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' };
static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' };
static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
#ifndef MIN_BYTES_PER_CHAR
#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
#endif
@ -103,7 +128,7 @@ int prolog0(PROLOG_STATE *state,
if (!XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
"DOCTYPE"))
KW_DOCTYPE))
break;
state->handler = doctype0;
return XML_ROLE_NONE;
@ -132,7 +157,7 @@ int prolog1(PROLOG_STATE *state,
if (!XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
"DOCTYPE"))
KW_DOCTYPE))
break;
state->handler = doctype0;
return XML_ROLE_NONE;
@ -198,11 +223,11 @@ int doctype1(PROLOG_STATE *state,
state->handler = prolog2;
return XML_ROLE_DOCTYPE_CLOSE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = doctype3;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = doctype2;
return XML_ROLE_NONE;
}
@ -296,28 +321,28 @@ int internalSubset(PROLOG_STATE *state,
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
"ENTITY")) {
KW_ENTITY)) {
state->handler = entity0;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
"ATTLIST")) {
KW_ATTLIST)) {
state->handler = attlist0;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
"ELEMENT")) {
KW_ELEMENT)) {
state->handler = element0;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
"NOTATION")) {
KW_NOTATION)) {
state->handler = notation0;
return XML_ROLE_NONE;
}
@ -429,11 +454,11 @@ int entity2(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = entity4;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = entity3;
return XML_ROLE_NONE;
}
@ -494,7 +519,7 @@ int entity5(PROLOG_STATE *state,
setTopLevel(state);
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, "NDATA")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
state->handler = entity6;
return XML_ROLE_NONE;
}
@ -531,11 +556,11 @@ int entity7(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = entity9;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = entity8;
return XML_ROLE_NONE;
}
@ -609,11 +634,11 @@ int notation1(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, "SYSTEM")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = notation3;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc, ptr, end, "PUBLIC")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = notation2;
return XML_ROLE_NONE;
}
@ -728,14 +753,14 @@ int attlist2(PROLOG_STATE *state,
case XML_TOK_NAME:
{
static const char *types[] = {
"CDATA",
"ID",
"IDREF",
"IDREFS",
"ENTITY",
"ENTITIES",
"NMTOKEN",
"NMTOKENS",
KW_CDATA,
KW_ID,
KW_IDREF,
KW_IDREFS,
KW_ENTITY,
KW_ENTITIES,
KW_NMTOKEN,
KW_NMTOKENS,
};
int i;
for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
@ -744,7 +769,7 @@ int attlist2(PROLOG_STATE *state,
return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
}
}
if (XmlNameMatchesAscii(enc, ptr, end, "NOTATION")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
state->handler = attlist5;
return XML_ROLE_NONE;
}
@ -865,21 +890,21 @@ int attlist8(PROLOG_STATE *state,
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
"IMPLIED")) {
KW_IMPLIED)) {
state->handler = attlist1;
return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
}
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
"REQUIRED")) {
KW_REQUIRED)) {
state->handler = attlist1;
return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
}
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
"FIXED")) {
KW_FIXED)) {
state->handler = attlist9;
return XML_ROLE_NONE;
}
@ -937,11 +962,11 @@ int element1(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, "EMPTY")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
state->handler = declClose;
return XML_ROLE_CONTENT_EMPTY;
}
if (XmlNameMatchesAscii(enc, ptr, end, "ANY")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
state->handler = declClose;
return XML_ROLE_CONTENT_ANY;
}
@ -968,7 +993,7 @@ int element2(PROLOG_STATE *state,
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
"PCDATA")) {
KW_PCDATA)) {
state->handler = element3;
return XML_ROLE_CONTENT_PCDATA;
}
@ -1136,11 +1161,11 @@ int condSect0(PROLOG_STATE *state,
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, "INCLUDE")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
state->handler = condSect1;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc, ptr, end, "IGNORE")) {
if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
state->handler = condSect2;
return XML_ROLE_NONE;
}

View File

@ -193,6 +193,7 @@ struct normal_encoding {
static int checkCharRefNumber(int);
#include "xmltok_impl.h"
#include "ascii.h"
#ifdef XML_MIN_SIZE
#define sb_isNameMin isNever
@ -882,10 +883,10 @@ int streqci(const char *s1, const char *s2)
for (;;) {
char c1 = *s1++;
char c2 = *s2++;
if ('a' <= c1 && c1 <= 'z')
c1 += 'A' - 'a';
if ('a' <= c2 && c2 <= 'z')
c2 += 'A' - 'a';
if (ASCII_a <= c1 && c1 <= ASCII_z)
c1 += ASCII_A - ASCII_a;
if (ASCII_a <= c2 && c2 <= ASCII_z)
c2 += ASCII_A - ASCII_a;
if (c1 != c2)
return 0;
if (!c1)
@ -961,7 +962,7 @@ int parsePseudoAttribute(const ENCODING *enc,
*nextTokPtr = ptr;
return 0;
}
if (c == '=') {
if (c == ASCII_EQUALS) {
*nameEndPtr = ptr;
break;
}
@ -970,7 +971,7 @@ int parsePseudoAttribute(const ENCODING *enc,
do {
ptr += enc->minBytesPerChar;
} while (isSpace(c = toAscii(enc, ptr, end)));
if (c != '=') {
if (c != ASCII_EQUALS) {
*nextTokPtr = ptr;
return 0;
}
@ -988,7 +989,7 @@ int parsePseudoAttribute(const ENCODING *enc,
ptr += enc->minBytesPerChar;
c = toAscii(enc, ptr, end);
}
if (c != '"' && c != '\'') {
if (c != ASCII_QUOT && c != ASCII_APOS) {
*nextTokPtr = ptr;
return 0;
}
@ -999,12 +1000,12 @@ int parsePseudoAttribute(const ENCODING *enc,
c = toAscii(enc, ptr, end);
if (c == open)
break;
if (!('a' <= c && c <= 'z')
&& !('A' <= c && c <= 'Z')
&& !('0' <= c && c <= '9')
&& c != '.'
&& c != '-'
&& c != '_') {
if (!(ASCII_a <= c && c <= ASCII_z)
&& !(ASCII_A <= c && c <= ASCII_Z)
&& !(ASCII_0 <= c && c <= ASCII_9)
&& c != ASCII_PERIOD
&& c != ASCII_MINUS
&& c != ASCII_UNDERSCORE) {
*nextTokPtr = ptr;
return 0;
}
@ -1013,6 +1014,26 @@ int parsePseudoAttribute(const ENCODING *enc,
return 1;
}
static const char KW_version[] = {
ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'
};
static const char KW_encoding[] = {
ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0'
};
static const char KW_standalone[] = {
ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'
};
static const char KW_yes[] = {
ASCII_y, ASCII_e, ASCII_s, '\0'
};
static const char KW_no[] = {
ASCII_n, ASCII_o, '\0'
};
static
int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
const char *,
@ -1036,7 +1057,7 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
*badPtr = ptr;
return 0;
}
if (!XmlNameMatchesAscii(enc, name, nameEnd, "version")) {
if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
if (!isGeneralTextEntity) {
*badPtr = name;
return 0;
@ -1058,9 +1079,9 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
return 1;
}
}
if (XmlNameMatchesAscii(enc, name, nameEnd, "encoding")) {
if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
int c = toAscii(enc, val, end);
if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) {
if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) {
*badPtr = val;
return 0;
}
@ -1075,15 +1096,15 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
if (!name)
return 1;
}
if (!XmlNameMatchesAscii(enc, name, nameEnd, "standalone") || isGeneralTextEntity) {
if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) {
*badPtr = name;
return 0;
}
if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, "yes")) {
if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
if (standalone)
*standalone = 1;
}
else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, "no")) {
else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
if (standalone)
*standalone = 0;
}
@ -1360,16 +1381,35 @@ enum {
NO_ENC
};
static const char KW_ISO_8859_1[] = {
ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0'
};
static const char KW_US_ASCII[] = {
ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, '\0'
};
static const char KW_UTF_8[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'
};
static const char KW_UTF_16[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'
};
static const char KW_UTF_16BE[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, '\0'
};
static const char KW_UTF_16LE[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, '\0'
};
static
int getEncodingIndex(const char *name)
{
static const char *encodingNames[] = {
"ISO-8859-1",
"US-ASCII",
"UTF-8",
"UTF-16",
"UTF-16BE"
"UTF-16LE",
KW_ISO_8859_1,
KW_US_ASCII,
KW_UTF_8,
KW_UTF_16,
KW_UTF_16BE,
KW_UTF_16LE,
};
int i;
if (name == 0)
@ -1535,7 +1575,7 @@ XmlInitUnknownEncodingNS(void *mem,
{
ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
if (enc)
((struct normal_encoding *)enc)->type[':'] = BT_COLON;
((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
return enc;
}

View File

@ -117,7 +117,7 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr != end) {
if (!CHAR_MATCHES(enc, ptr, '-')) {
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
@ -128,10 +128,10 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
case BT_MINUS:
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '-')) {
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, '>')) {
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
@ -205,9 +205,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
if (end - ptr != MINBPC(enc)*3)
return 1;
switch (BYTE_TO_ASCII(enc, ptr)) {
case 'x':
case ASCII_x:
break;
case 'X':
case ASCII_X:
upper = 1;
break;
default:
@ -215,9 +215,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
}
ptr += MINBPC(enc);
switch (BYTE_TO_ASCII(enc, ptr)) {
case 'm':
case ASCII_m:
break;
case 'M':
case ASCII_M:
upper = 1;
break;
default:
@ -225,9 +225,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
}
ptr += MINBPC(enc);
switch (BYTE_TO_ASCII(enc, ptr)) {
case 'l':
case ASCII_l:
break;
case 'L':
case ASCII_L:
upper = 1;
break;
default:
@ -271,7 +271,7 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '>')) {
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
}
@ -290,7 +290,7 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '>')) {
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
}
@ -308,12 +308,13 @@ static
int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
int i;
/* CDATA[ */
if (end - ptr < 6 * MINBPC(enc))
return XML_TOK_PARTIAL;
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) {
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
@ -342,12 +343,12 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ']'))
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break;
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, '>')) {
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc);
break;
}
@ -485,7 +486,7 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr != end) {
if (CHAR_MATCHES(enc, ptr, 'x'))
if (CHAR_MATCHES(enc, ptr, ASCII_x))
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
@ -678,7 +679,7 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, '>')) {
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
@ -787,7 +788,7 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, '>')) {
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
@ -836,12 +837,12 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ']'))
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break;
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, '>')) {
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc);
break;
}
@ -866,12 +867,12 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
#undef LEAD_CASE
case BT_RSQB:
if (ptr + MINBPC(enc) != end) {
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ']')) {
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
ptr += MINBPC(enc);
break;
}
if (ptr + 2*MINBPC(enc) != end) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), '>')) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
ptr += MINBPC(enc);
break;
}
@ -1067,10 +1068,10 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
if (ptr == end)
return -XML_TOK_CLOSE_BRACKET;
if (CHAR_MATCHES(enc, ptr, ']')) {
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
if (ptr + MINBPC(enc) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), '>')) {
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
*nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_COND_SECT_CLOSE;
}
@ -1346,10 +1347,10 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e
case BT_LT:
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '!')) {
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '[')) {
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
++level;
ptr += MINBPC(enc);
}
@ -1358,10 +1359,10 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e
case BT_RSQB:
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ']')) {
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '>')) {
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr += MINBPC(enc);
if (level == 0) {
*nextTokPtr = ptr;
@ -1412,7 +1413,7 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
#endif
break;
case BT_S:
if (CHAR_MATCHES(enc, ptr, '\t')) {
if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
*badPtr = ptr;
return 0;
}
@ -1508,8 +1509,8 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
&& nAtts < attsMax
&& atts[nAtts].normalized
&& (ptr == atts[nAtts].valuePtr
|| BYTE_TO_ASCII(enc, ptr) != ' '
|| BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ' '
|| BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
|| BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
|| BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
atts[nAtts].normalized = 0;
break;
@ -1539,22 +1540,22 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
int result = 0;
/* skip &# */
ptr += 2*MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, 'x')) {
for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) {
if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
int c = BYTE_TO_ASCII(enc, ptr);
switch (c) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
result <<= 4;
result |= (c - '0');
result |= (c - ASCII_0);
break;
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
result <<= 4;
result += 10 + (c - 'A');
result += 10 + (c - ASCII_A);
break;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
result <<= 4;
result += 10 + (c - 'a');
result += 10 + (c - ASCII_a);
break;
}
if (result >= 0x110000)
@ -1562,10 +1563,10 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
}
}
else {
for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) {
for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
int c = BYTE_TO_ASCII(enc, ptr);
result *= 10;
result += (c - '0');
result += (c - ASCII_0);
if (result >= 0x110000)
return -1;
}
@ -1578,46 +1579,46 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha
{
switch ((end - ptr)/MINBPC(enc)) {
case 2:
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 't')) {
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
switch (BYTE_TO_ASCII(enc, ptr)) {
case 'l':
return '<';
case 'g':
return '>';
case ASCII_l:
return ASCII_LT;
case ASCII_g:
return ASCII_GT;
}
}
break;
case 3:
if (CHAR_MATCHES(enc, ptr, 'a')) {
if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, 'm')) {
if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, 'p'))
return '&';
if (CHAR_MATCHES(enc, ptr, ASCII_p))
return ASCII_AMP;
}
}
break;
case 4:
switch (BYTE_TO_ASCII(enc, ptr)) {
case 'q':
case ASCII_q:
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, 'u')) {
if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, 'o')) {
if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, 't'))
return '"';
if (CHAR_MATCHES(enc, ptr, ASCII_t))
return ASCII_QUOT;
}
}
break;
case 'a':
case ASCII_a:
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, 'p')) {
if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, 'o')) {
if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, 's'))
return '\'';
if (CHAR_MATCHES(enc, ptr, ASCII_s))
return ASCII_APOS;
}
}
break;

View File

@ -65,7 +65,7 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha
if (ptr != end)
return 0;
*p = 0;
if (streqci(buf, "UTF-16") && enc->minBytesPerChar == 2)
if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2)
return enc;
i = getEncodingIndex(buf);
if (i == UNKNOWN_ENC)