From 0a8e3db8c7411c197e93f9b8e89dcad820243a0d Mon Sep 17 00:00:00 2001 From: James Clark Date: Sat, 7 Feb 1998 04:53:44 +0000 Subject: [PATCH] Deal with externally specified encodings --- expat/xmlparse/xmlparse.c | 15 ++++++++++- expat/xmltok/xmltok.c | 53 ++++++++++++++++++++++++++------------- expat/xmltok/xmltok.h | 2 +- expat/xmlwf/xmlwf.c | 13 +++++++++- 4 files changed, 63 insertions(+), 20 deletions(-) diff --git a/expat/xmlparse/xmlparse.c b/expat/xmlparse/xmlparse.c index bc767e10..75a0dca6 100755 --- a/expat/xmlparse/xmlparse.c +++ b/expat/xmlparse/xmlparse.c @@ -84,6 +84,7 @@ typedef enum XML_Error Processor(XML_Parser parser, static Processor prologProcessor; static Processor contentProcessor; static Processor epilogProcessor; +static Processor errorProcessor; static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, @@ -210,7 +211,6 @@ XML_Parser XML_ParserCreate(const char *encodingName) if (!parser) return parser; processor = prologProcessor; - XmlInitEncoding(&initEncoding, &encoding); XmlPrologStateInit(&prologState); userData = 0; startElementHandler = 0; @@ -245,6 +245,10 @@ XML_Parser XML_ParserCreate(const char *encodingName) return 0; } dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; + if (!XmlInitEncoding(&initEncoding, &encoding, encodingName)) { + errorCode = XML_ERROR_UNKNOWN_ENCODING; + processor = errorProcessor; + } return parser; } @@ -1147,6 +1151,15 @@ enum XML_Error epilogProcessor(XML_Parser parser, } } +static +enum XML_Error errorProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + return errorCode; +} + static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, const char *ptr, const char *end, diff --git a/expat/xmltok/xmltok.c b/expat/xmltok/xmltok.c index 2ec85619..96ee85f4 100755 --- a/expat/xmltok/xmltok.c +++ b/expat/xmltok/xmltok.c @@ -393,6 +393,24 @@ static const struct encoding big2_encoding = { VTABLE, 2 }; #undef PREFIX +static +int streqci(const char *s1, const char *s2) +{ + for (;;) { + char c1 = *s1++; + char c2 = *s2++; + if ('a' <= c1 && c1 <= 'z') + c1 += 'A' - 'a'; + if ('a' <= c2 && c2 <= 'z') + c2 += 'A' - 'a'; + if (c1 != c2) + return 0; + if (!c1) + break; + } + return 1; +} + static int initScan(const ENCODING *enc, int state, const char *ptr, const char *end, const char **nextTokPtr) @@ -463,14 +481,27 @@ const ENCODING *XmlGetInternalEncoding(int e) return 0; } -void XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr) +int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr, const char *name) { + if (name) { + if (streqci(name, "ISO-8859-1")) { + *encPtr = &latin1_encoding.enc; + return 1; + } + if (streqci(name, "UTF-8")) { + *encPtr = &utf8_encoding.enc; + return 1; + } + if (!streqci(name, "UTF-16")) + return 0; + } p->initEnc.scanners[XML_PROLOG_STATE] = initScanProlog; p->initEnc.scanners[XML_CONTENT_STATE] = initScanContent; p->initEnc.updatePosition = initUpdatePosition; p->initEnc.minBytesPerChar = 1; p->encPtr = encPtr; *encPtr = &(p->initEnc); + return 1; } static @@ -581,15 +612,6 @@ int parsePseudoAttribute(const ENCODING *enc, return 1; } -static -int streq(const char *s1, const char *s2) -{ - for (; *s1 == *s2; s1++, s2++) - if (!*s1) - return 1; - return 0; -} - static const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *end) { @@ -605,18 +627,15 @@ const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *e if ('a' <= buf[i] && buf[i] <= 'z') buf[i] += 'A' - 'a'; } - if (streq(buf, "UTF-8")) + if (streqci(buf, "UTF-8")) return &utf8_encoding.enc; - if (streq(buf, "ISO-8859-1")) + if (streqci(buf, "ISO-8859-1")) return &latin1_encoding.enc; - if (streq(buf, "UTF-16")) { + if (streqci(buf, "UTF-16")) { static const unsigned short n = 1; if (enc->minBytesPerChar == 2) return enc; - if (*(const char *)&n) - return &little2_encoding; - else - return &big2_encoding; + return &big2_encoding; } return 0; } diff --git a/expat/xmltok/xmltok.h b/expat/xmltok/xmltok.h index 4dc4d439..2e1dcccf 100755 --- a/expat/xmltok/xmltok.h +++ b/expat/xmltok/xmltok.h @@ -222,7 +222,7 @@ int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING **namedEncodingPtr, int *standalonePtr); -void XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **); +int XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); const ENCODING XMLTOKAPI *XmlGetInternalEncoding(int); #ifdef __cplusplus diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c index ce52457c..e3a5b22b 100755 --- a/expat/xmlwf/xmlwf.c +++ b/expat/xmlwf/xmlwf.c @@ -184,6 +184,7 @@ int main(int argc, char **argv) { int i; const char *outputDir = 0; + const char *encoding = 0; int useFilemap = 1; #ifdef _MSC_VER @@ -212,6 +213,16 @@ int main(int argc, char **argv) outputDir = argv[i] + j + 1; i++; } + if (argv[i][j] == 'e') { + if (argv[i][j + 1] == '\0') { + if (++i == argc) + usage(argv[0]); + encoding = argv[i]; + } + else + encoding = argv[i] + j + 1; + i++; + } else if (argv[i][j] == '\0' && j > 1) i++; else @@ -223,7 +234,7 @@ int main(int argc, char **argv) FILE *fp = 0; char *outName = 0; int result; - XML_Parser parser = XML_ParserCreate(0); + XML_Parser parser = XML_ParserCreate(encoding); if (outputDir) { outName = malloc(strlen(outputDir) + strlen(argv[i]) + 2); strcpy(outName, outputDir);