Deal with externally specified encodings

This commit is contained in:
James Clark 1998-02-07 04:53:44 +00:00
parent da063a16cf
commit 0a8e3db8c7
4 changed files with 63 additions and 20 deletions

View File

@ -84,6 +84,7 @@ typedef enum XML_Error Processor(XML_Parser parser,
static Processor prologProcessor; static Processor prologProcessor;
static Processor contentProcessor; static Processor contentProcessor;
static Processor epilogProcessor; static Processor epilogProcessor;
static Processor errorProcessor;
static enum XML_Error static enum XML_Error
doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
@ -210,7 +211,6 @@ XML_Parser XML_ParserCreate(const char *encodingName)
if (!parser) if (!parser)
return parser; return parser;
processor = prologProcessor; processor = prologProcessor;
XmlInitEncoding(&initEncoding, &encoding);
XmlPrologStateInit(&prologState); XmlPrologStateInit(&prologState);
userData = 0; userData = 0;
startElementHandler = 0; startElementHandler = 0;
@ -245,6 +245,10 @@ XML_Parser XML_ParserCreate(const char *encodingName)
return 0; return 0;
} }
dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
if (!XmlInitEncoding(&initEncoding, &encoding, encodingName)) {
errorCode = XML_ERROR_UNKNOWN_ENCODING;
processor = errorProcessor;
}
return parser; return parser;
} }
@ -1147,6 +1151,15 @@ enum XML_Error epilogProcessor(XML_Parser parser,
} }
} }
static
enum XML_Error errorProcessor(XML_Parser parser,
const char *s,
const char *end,
const char **nextPtr)
{
return errorCode;
}
static enum XML_Error static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
const char *ptr, const char *end, const char *ptr, const char *end,

View File

@ -393,6 +393,24 @@ static const struct encoding big2_encoding = { VTABLE, 2 };
#undef PREFIX #undef PREFIX
static
int streqci(const char *s1, const char *s2)
{
for (;;) {
char c1 = *s1++;
char c2 = *s2++;
if ('a' <= c1 && c1 <= 'z')
c1 += 'A' - 'a';
if ('a' <= c2 && c2 <= 'z')
c2 += 'A' - 'a';
if (c1 != c2)
return 0;
if (!c1)
break;
}
return 1;
}
static static
int initScan(const ENCODING *enc, int state, const char *ptr, const char *end, int initScan(const ENCODING *enc, int state, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
@ -463,14 +481,27 @@ const ENCODING *XmlGetInternalEncoding(int e)
return 0; return 0;
} }
void XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr) int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr, const char *name)
{ {
if (name) {
if (streqci(name, "ISO-8859-1")) {
*encPtr = &latin1_encoding.enc;
return 1;
}
if (streqci(name, "UTF-8")) {
*encPtr = &utf8_encoding.enc;
return 1;
}
if (!streqci(name, "UTF-16"))
return 0;
}
p->initEnc.scanners[XML_PROLOG_STATE] = initScanProlog; p->initEnc.scanners[XML_PROLOG_STATE] = initScanProlog;
p->initEnc.scanners[XML_CONTENT_STATE] = initScanContent; p->initEnc.scanners[XML_CONTENT_STATE] = initScanContent;
p->initEnc.updatePosition = initUpdatePosition; p->initEnc.updatePosition = initUpdatePosition;
p->initEnc.minBytesPerChar = 1; p->initEnc.minBytesPerChar = 1;
p->encPtr = encPtr; p->encPtr = encPtr;
*encPtr = &(p->initEnc); *encPtr = &(p->initEnc);
return 1;
} }
static static
@ -581,15 +612,6 @@ int parsePseudoAttribute(const ENCODING *enc,
return 1; return 1;
} }
static
int streq(const char *s1, const char *s2)
{
for (; *s1 == *s2; s1++, s2++)
if (!*s1)
return 1;
return 0;
}
static static
const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *end) const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *end)
{ {
@ -605,18 +627,15 @@ const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *e
if ('a' <= buf[i] && buf[i] <= 'z') if ('a' <= buf[i] && buf[i] <= 'z')
buf[i] += 'A' - 'a'; buf[i] += 'A' - 'a';
} }
if (streq(buf, "UTF-8")) if (streqci(buf, "UTF-8"))
return &utf8_encoding.enc; return &utf8_encoding.enc;
if (streq(buf, "ISO-8859-1")) if (streqci(buf, "ISO-8859-1"))
return &latin1_encoding.enc; return &latin1_encoding.enc;
if (streq(buf, "UTF-16")) { if (streqci(buf, "UTF-16")) {
static const unsigned short n = 1; static const unsigned short n = 1;
if (enc->minBytesPerChar == 2) if (enc->minBytesPerChar == 2)
return enc; return enc;
if (*(const char *)&n) return &big2_encoding;
return &little2_encoding;
else
return &big2_encoding;
} }
return 0; return 0;
} }

View File

@ -222,7 +222,7 @@ int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity,
const ENCODING **namedEncodingPtr, const ENCODING **namedEncodingPtr,
int *standalonePtr); int *standalonePtr);
void XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **); int XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING XMLTOKAPI *XmlGetInternalEncoding(int); const ENCODING XMLTOKAPI *XmlGetInternalEncoding(int);
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -184,6 +184,7 @@ int main(int argc, char **argv)
{ {
int i; int i;
const char *outputDir = 0; const char *outputDir = 0;
const char *encoding = 0;
int useFilemap = 1; int useFilemap = 1;
#ifdef _MSC_VER #ifdef _MSC_VER
@ -212,6 +213,16 @@ int main(int argc, char **argv)
outputDir = argv[i] + j + 1; outputDir = argv[i] + j + 1;
i++; i++;
} }
if (argv[i][j] == 'e') {
if (argv[i][j + 1] == '\0') {
if (++i == argc)
usage(argv[0]);
encoding = argv[i];
}
else
encoding = argv[i] + j + 1;
i++;
}
else if (argv[i][j] == '\0' && j > 1) else if (argv[i][j] == '\0' && j > 1)
i++; i++;
else else
@ -223,7 +234,7 @@ int main(int argc, char **argv)
FILE *fp = 0; FILE *fp = 0;
char *outName = 0; char *outName = 0;
int result; int result;
XML_Parser parser = XML_ParserCreate(0); XML_Parser parser = XML_ParserCreate(encoding);
if (outputDir) { if (outputDir) {
outName = malloc(strlen(outputDir) + strlen(argv[i]) + 2); outName = malloc(strlen(outputDir) + strlen(argv[i]) + 2);
strcpy(outName, outputDir); strcpy(outName, outputDir);