Initial Revision

This commit is contained in:
James Clark 1997-11-10 06:10:11 +00:00
parent 49946f3510
commit 7834fbbf8a
5 changed files with 865 additions and 0 deletions

32
expat/xmltok/asciitab.h Executable file
View File

@ -0,0 +1,32 @@
/* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x08 */ BT_NONXML, BT_S, BT_S, BT_NONXML,
/* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML,
/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM,
/* 0x24 */ BT_OTHER, BT_OTHER, BT_AMP, BT_APOS,
/* 0x28 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x2C */ BT_OTHER, BT_MINUS, BT_NAME, BT_SOL,
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_NMSTRT, BT_SEMI,
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB,
/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT,
/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0x7C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,

32
expat/xmltok/latin1tab.h Executable file
View File

@ -0,0 +1,32 @@
/* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER,
/* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME,
/* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER,
/* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,

33
expat/xmltok/utf8tab.h Executable file
View File

@ -0,0 +1,33 @@
/* 0x80 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4,
/* 0xF4 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4,
/* 0xF8 */ BT_LEAD5, BT_LEAD5, BT_LEAD5, BT_LEAD5,
/* 0xFC */ BT_LEAD6, BT_LEAD6, BT_MALFORM, BT_MALFORM,

735
expat/xmltok/xmltok_impl.c Executable file
View File

@ -0,0 +1,735 @@
#define DO_LEAD_CASE(n, ptr, end, ret) \
case BT_LEAD ## n: \
if (end - ptr < n) \
return ret; \
ptr += n; \
break;
#define MULTIBYTE_CASES(ptr, end, ret) \
DO_LEAD_CASE(2, ptr, end, ret) \
DO_LEAD_CASE(3, ptr, end, ret) \
DO_LEAD_CASE(4, ptr, end, ret) \
DO_LEAD_CASE(5, ptr, end, ret) \
DO_LEAD_CASE(6, ptr, end, ret)
#define INVALID_CASES(ptr, nextTokPtr) \
case BT_NONXML: \
case BT_MALFORM: \
case BT_TRAIL: \
*(nextTokPtr) = (ptr); \
return XML_TOK_INVALID;
#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (!IS_NAME_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
case BT_NONASCII: \
if (!IS_NAME_CHAR(enc, ptr, MINBPC)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
case BT_NMSTRT: \
case BT_HEX: \
case BT_DIGIT: \
case BT_NAME: \
case BT_MINUS: \
ptr += MINBPC; \
break; \
CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) \
CHECK_NAME_CASE(5, enc, ptr, end, nextTokPtr) \
CHECK_NAME_CASE(6, enc, ptr, end, nextTokPtr)
#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
case BT_NONASCII: \
if (!IS_NMSTRT_CHAR(enc, ptr, MINBPC)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
case BT_NMSTRT: \
case BT_HEX: \
ptr += MINBPC; \
break; \
CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) \
CHECK_NMSTRT_CASE(5, enc, ptr, end, nextTokPtr) \
CHECK_NMSTRT_CASE(6, enc, ptr, end, nextTokPtr)
#ifndef PREFIX
#define PREFIX(ident) ident
#endif
/* ptr points to character following "<!-" */
static
int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr != end) {
if (!CHAR_MATCHES(enc, ptr, '-')) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC;
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
INVALID_CASES(ptr, nextTokPtr)
case BT_MINUS:
if ((ptr += MINBPC) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '-')) {
if ((ptr += MINBPC) == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, '>')) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC;
return XML_TOK_COMMENT;
}
/* fall through */
default:
ptr += MINBPC;
break;
}
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "<!" */
static
int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr != end) {
if (BYTE_TYPE(enc, ptr) == BT_MINUS)
return PREFIX(scanComment)(enc, ptr + MINBPC, end, nextTokPtr);
do {
switch (BYTE_TYPE(enc, ptr)) {
MULTIBYTE_CASES(ptr, end, (*nextTokPtr = ptr, XML_TOK_PROLOG_CHARS))
INVALID_CASES(ptr, nextTokPtr)
case BT_APOS:
case BT_QUOT:
case BT_LT:
*nextTokPtr = ptr;
return XML_TOK_PROLOG_CHARS;
}
} while ((ptr += MINBPC) != end);
*nextTokPtr = ptr;
return XML_TOK_PROLOG_CHARS;
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "<?" */
static
int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S:
ptr += MINBPC;
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
INVALID_CASES(ptr, nextTokPtr)
case BT_QUEST:
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '>')) {
*nextTokPtr = ptr + MINBPC;
return XML_TOK_PI;
}
break;
default:
ptr += MINBPC;
break;
}
}
return XML_TOK_PARTIAL;
case BT_QUEST:
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, '>')) {
*nextTokPtr = ptr + MINBPC;
return XML_TOK_PI;
}
/* fall through */
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "<![" */
static
int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
int i;
/* CDATA[]]> */
if (end - ptr < 9 * MINBPC)
return XML_TOK_PARTIAL;
for (i = 0; i < 6; i++, ptr += MINBPC) {
if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
end -= 2 * MINBPC;
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
INVALID_CASES(ptr, nextTokPtr)
case BT_RSQB:
if (CHAR_MATCHES(enc, ptr + MINBPC, ']')
&& CHAR_MATCHES(enc, ptr + 2 * MINBPC, '>')) {
*nextTokPtr = ptr + 3 * MINBPC;
return XML_TOK_CDATA_SECTION;
}
/* fall through */
default:
ptr += MINBPC;
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "</" */
static
int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S:
for (ptr += MINBPC; ptr != end; ptr += MINBPC) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_S:
break;
case BT_GT:
*nextTokPtr = ptr + MINBPC;
return XML_TOK_END_TAG;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
case BT_GT:
*nextTokPtr = ptr + MINBPC;
return XML_TOK_END_TAG;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "&#X" */
static
int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
for (ptr += MINBPC; ptr != end; ptr += MINBPC) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
break;
case BT_SEMI:
*nextTokPtr = ptr + MINBPC;
return XML_TOK_CHAR_REF;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "&#" */
static
int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr != end) {
if (CHAR_MATCHES(enc, ptr, 'x'))
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC, end, nextTokPtr);
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
for (ptr += MINBPC; ptr != end; ptr += MINBPC) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
break;
case BT_SEMI:
*nextTokPtr = ptr + MINBPC;
return XML_TOK_CHAR_REF;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "&" */
static
int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(end, ptr, end, nextTokPtr)
case BT_NUM:
return PREFIX(scanCharRef)(enc, ptr + MINBPC, end, nextTokPtr);
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI:
*nextTokPtr = ptr + MINBPC;
return XML_TOK_ENTITY_REF;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following first character of attribute name */
static
int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S:
for (;;) {
int t;
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
t = BYTE_TYPE(enc, ptr);
if (t == BT_EQUALS)
break;
if (t != BT_S) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
/* fall through */
case BT_EQUALS:
{
int open;
for (;;) {
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
open = BYTE_TYPE(enc, ptr);
if (open == BT_QUOT || open == BT_APOS)
break;
if (open != BT_S) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
ptr += MINBPC;
/* in attribute value */
for (;;) {
int t;
if (ptr == end)
return XML_TOK_PARTIAL;
t = BYTE_TYPE(enc, ptr);
if (t == open)
break;
switch (t) {
INVALID_CASES(ptr, nextTokPtr)
MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
case BT_AMP:
{
int tok = PREFIX(scanRef)(enc, ptr + MINBPC, end, &ptr);
if (tok <= 0) {
if (tok == XML_TOK_INVALID)
*nextTokPtr = ptr;
return tok;
}
break;
}
case BT_LT:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
default:
ptr += MINBPC;
break;
}
}
/* ptr points to closing quote */
for (;;) {
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S:
continue;
case BT_GT:
*nextTokPtr = ptr + MINBPC;
return XML_TOK_START_TAG;
case BT_SOL:
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, '>')) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC;
return XML_TOK_EMPTY_ELEMENT;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
}
break;
}
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "<" */
static
int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_EXCL:
if ((ptr += MINBPC) == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC, end, nextTokPtr);
case BT_LSQB:
return PREFIX(scanCdataSection)(enc, ptr + MINBPC, end, nextTokPtr);
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
case BT_QUEST:
return PREFIX(scanPi)(enc, ptr + MINBPC, end, nextTokPtr);
case BT_SOL:
return PREFIX(scanEndTag)(enc, ptr + MINBPC, end, nextTokPtr);
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
/* we have a start-tag */
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S:
{
ptr += MINBPC;
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_GT:
goto gt;
case BT_SOL:
goto sol;
case BT_S:
ptr += MINBPC;
continue;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
}
return XML_TOK_PARTIAL;
}
case BT_GT:
gt:
*nextTokPtr = ptr + MINBPC;
return XML_TOK_START_TAG;
case BT_SOL:
sol:
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, '>')) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC;
return XML_TOK_EMPTY_ELEMENT;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
static
int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_NONE;
#if MINBPC > 1
{
size_t n = end - ptr;
if (n & (MINBPC - 1)) {
n &= ~(MINBPC - 1);
if (n == 0)
return XML_TOK_PARTIAL;
end = ptr + n;
}
}
#endif
switch (BYTE_TYPE(enc, ptr)) {
case BT_LT:
return PREFIX(scanLt)(enc, ptr + MINBPC, end, nextTokPtr);
case BT_AMP:
return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr);
case BT_RSQB:
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ']'))
break;
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, '>'))
break;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
INVALID_CASES(ptr, nextTokPtr)
MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
default:
ptr += MINBPC;
break;
}
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
MULTIBYTE_CASES(ptr, end, (*nextTokPtr = ptr, XML_TOK_DATA_CHARS))
case BT_RSQB:
if (ptr + MINBPC != end) {
if (!CHAR_MATCHES(enc, ptr + MINBPC, ']')) {
ptr += MINBPC;
break;
}
if (ptr + 2*MINBPC != end) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC, '>')) {
ptr += MINBPC;
break;
}
*nextTokPtr = ptr + 2*MINBPC;
return XML_TOK_INVALID;
}
}
/* fall through */
case BT_AMP:
case BT_LT:
case BT_NONXML:
case BT_MALFORM:
case BT_TRAIL:
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
default:
ptr += MINBPC;
break;
}
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
}
static
int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_NONE;
#if MINBPC > 1
{
size_t n = end - ptr;
if (n & (MINBPC - 1)) {
n &= ~(MINBPC - 1);
if (n == 0)
return XML_TOK_PARTIAL;
end = ptr + n;
}
}
#endif
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
case BT_QUOT:
{
for (ptr += MINBPC; ptr != end; ptr += MINBPC) {
if (BYTE_TYPE(enc, ptr) == BT_QUOT) {
*nextTokPtr = ptr + MINBPC;
return XML_TOK_LITERAL;
}
}
return XML_TOK_PARTIAL;
}
case BT_APOS:
{
for (ptr += MINBPC; ptr != end; ptr += MINBPC) {
if (BYTE_TYPE(enc, ptr) == BT_APOS) {
*nextTokPtr = ptr + MINBPC;
return XML_TOK_LITERAL;
}
}
return XML_TOK_PARTIAL;
}
case BT_LT:
{
ptr += MINBPC;
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
case BT_EXCL:
return PREFIX(scanDecl)(enc, ptr + MINBPC, end, nextTokPtr);
case BT_QUEST:
return PREFIX(scanPi)(enc, ptr + MINBPC, end, nextTokPtr);
case BT_NMSTRT:
case BT_HEX:
case BT_NONASCII:
case BT_LEAD2:
case BT_LEAD3:
case BT_LEAD4:
case BT_LEAD5:
return PREFIX(contentTok)(enc, ptr - MINBPC, end, nextTokPtr);
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_S:
do {
ptr += MINBPC;
} while (ptr != end && BYTE_TYPE(enc, ptr) == BT_S);
*nextTokPtr = ptr;
return XML_TOK_PROLOG_S;
default:
ptr += MINBPC;
break;
}
for (; ptr != end;) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_LT:
case BT_QUOT:
case BT_APOS:
case BT_NONXML:
case BT_MALFORM:
case BT_TRAIL:
case BT_S:
*nextTokPtr = ptr;
return XML_TOK_PROLOG_CHARS;
MULTIBYTE_CASES(ptr, end, (*nextTokPtr = ptr, XML_TOK_PROLOG_CHARS))
default:
ptr += MINBPC;
break;
}
}
*nextTokPtr = ptr;
return XML_TOK_PROLOG_CHARS;
}
#undef DO_LEAD_CASE
#undef MULTIBYTE_CASES
#undef INVALID_CASES
#undef CHECK_NAME_CASE
#undef CHECK_NAME_CASES
#undef CHECK_NMSTRT_CASE
#undef CHECK_NMSTRT_CASES

33
expat/xmltok/xmltok_impl.h Executable file
View File

@ -0,0 +1,33 @@
enum {
BT_NONXML,
BT_MALFORM,
BT_LT,
BT_AMP,
BT_LEAD2,
BT_LEAD3,
BT_LEAD4,
BT_LEAD5,
BT_LEAD6,
BT_TRAIL,
BT_GT,
BT_QUOT,
BT_APOS,
BT_EQUALS,
BT_QUEST,
BT_EXCL,
BT_SOL,
BT_SEMI,
BT_NUM,
BT_LSQB,
BT_RSQB,
BT_S,
BT_NMSTRT,
BT_HEX,
BT_DIGIT,
BT_NAME,
BT_MINUS,
BT_OTHER, /* known not to be a name or name start character */
BT_NONASCII /* might be a name or name start character */
};
#include <stddef.h>