2122 lines
54 KiB
C
Executable File
2122 lines
54 KiB
C
Executable File
/*
|
|
The contents of this file are subject to the Mozilla Public License
|
|
Version 1.0 (the "License"); you may not use this file except in
|
|
compliance with the License. You may obtain a copy of the License at
|
|
http://www.mozilla.org/MPL/
|
|
|
|
Software distributed under the License is distributed on an "AS IS"
|
|
basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
License for the specific language governing rights and limitations
|
|
under the License.
|
|
|
|
The Original Code is expat.
|
|
|
|
The Initial Developer of the Original Code is James Clark.
|
|
Portions created by James Clark are Copyright (C) 1998
|
|
James Clark. All Rights Reserved.
|
|
|
|
Contributor(s):
|
|
*/
|
|
|
|
#include "xmldef.h"
|
|
#include "xmlparse.h"
|
|
#include "xmltok.h"
|
|
#include "xmlrole.h"
|
|
#include "hashtable.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define INIT_TAG_BUF_SIZE 32
|
|
#define INIT_DATA_BUF_SIZE 1024
|
|
#define INIT_ATTS_SIZE 16
|
|
#define INIT_BLOCK_SIZE 1024
|
|
#define INIT_BUFFER_SIZE 1024
|
|
|
|
typedef struct tag {
|
|
struct tag *parent;
|
|
const char *rawName;
|
|
int rawNameLength;
|
|
const char *name;
|
|
char *buf;
|
|
char *bufEnd;
|
|
} TAG;
|
|
|
|
typedef struct {
|
|
const char *name;
|
|
const char *textPtr;
|
|
int textLen;
|
|
const char *systemId;
|
|
const char *base;
|
|
const char *publicId;
|
|
const char *notation;
|
|
char open;
|
|
char magic;
|
|
} ENTITY;
|
|
|
|
typedef struct block {
|
|
struct block *next;
|
|
int size;
|
|
char s[1];
|
|
} BLOCK;
|
|
|
|
typedef struct {
|
|
BLOCK *blocks;
|
|
BLOCK *freeBlocks;
|
|
const char *end;
|
|
char *ptr;
|
|
char *start;
|
|
} STRING_POOL;
|
|
|
|
/* The byte before the name is a scratch byte used to determine whether
|
|
an attribute has been specified. */
|
|
typedef struct {
|
|
char *name;
|
|
char maybeTokenized;
|
|
} ATTRIBUTE_ID;
|
|
|
|
typedef struct {
|
|
const ATTRIBUTE_ID *id;
|
|
char isCdata;
|
|
const char *value;
|
|
} DEFAULT_ATTRIBUTE;
|
|
|
|
typedef struct {
|
|
const char *name;
|
|
int nDefaultAtts;
|
|
int allocDefaultAtts;
|
|
DEFAULT_ATTRIBUTE *defaultAtts;
|
|
} ELEMENT_TYPE;
|
|
|
|
typedef struct {
|
|
HASH_TABLE generalEntities;
|
|
HASH_TABLE elementTypes;
|
|
HASH_TABLE attributeIds;
|
|
STRING_POOL pool;
|
|
int complete;
|
|
int standalone;
|
|
const char *base;
|
|
} DTD;
|
|
|
|
typedef enum XML_Error Processor(XML_Parser parser,
|
|
const char *start,
|
|
const char *end,
|
|
const char **endPtr);
|
|
|
|
static Processor prologProcessor;
|
|
static Processor contentProcessor;
|
|
static Processor cdataSectionProcessor;
|
|
static Processor epilogProcessor;
|
|
static Processor errorProcessor;
|
|
static Processor externalEntityInitProcessor;
|
|
static Processor externalEntityContentProcessor;
|
|
|
|
static enum XML_Error
|
|
doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
|
|
const char *start, const char *end, const char **endPtr);
|
|
static enum XML_Error
|
|
doCdataSection(XML_Parser parser, const char **startPtr, const char *end, const char **nextPtr);
|
|
static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *tagName, const char *s);
|
|
static int
|
|
defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const char *dfltValue);
|
|
static enum XML_Error
|
|
storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
|
|
STRING_POOL *);
|
|
static enum XML_Error
|
|
appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
|
|
STRING_POOL *);
|
|
static ATTRIBUTE_ID *
|
|
getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
|
|
static enum XML_Error
|
|
storeEntityValue(XML_Parser parser, const char *start, const char *end);
|
|
static int
|
|
reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
|
|
|
|
static const char *getOpenEntityNames(XML_Parser parser);
|
|
static int setOpenEntityNames(XML_Parser parser, const char *openEntityNames);
|
|
static void normalizePublicId(char *s);
|
|
static int dtdInit(DTD *);
|
|
static void dtdDestroy(DTD *);
|
|
static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
|
|
static void poolInit(STRING_POOL *);
|
|
static void poolClear(STRING_POOL *);
|
|
static void poolDestroy(STRING_POOL *);
|
|
static char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
|
|
const char *ptr, const char *end);
|
|
static char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
|
|
const char *ptr, const char *end);
|
|
static int poolGrow(STRING_POOL *pool);
|
|
static const char *poolCopyString(STRING_POOL *pool, const char *s);
|
|
static const char *poolCopyStringN(STRING_POOL *pool, const char *s, int n);
|
|
|
|
#define poolStart(pool) ((pool)->start)
|
|
#define poolEnd(pool) ((pool)->ptr)
|
|
#define poolLength(pool) ((pool)->ptr - (pool)->start)
|
|
#define poolChop(pool) ((void)--(pool->ptr))
|
|
#define poolLastByte(pool) (((pool)->ptr)[-1])
|
|
#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
|
|
#define poolFinish(pool) ((pool)->start = (pool)->ptr)
|
|
#define poolAppendByte(pool, c) \
|
|
(((pool)->ptr == (pool)->end && !poolGrow(pool)) \
|
|
? 0 \
|
|
: ((*((pool)->ptr)++ = c), 1))
|
|
|
|
typedef struct {
|
|
char *buffer;
|
|
/* first character to be parsed */
|
|
const char *bufferPtr;
|
|
/* past last character to be parsed */
|
|
char *bufferEnd;
|
|
/* allocated end of buffer */
|
|
const char *bufferLim;
|
|
long bufferEndByteIndex;
|
|
char *dataBuf;
|
|
char *dataBufEnd;
|
|
void *userData;
|
|
XML_StartElementHandler startElementHandler;
|
|
XML_EndElementHandler endElementHandler;
|
|
XML_CharacterDataHandler characterDataHandler;
|
|
XML_ProcessingInstructionHandler processingInstructionHandler;
|
|
XML_ExternalEntityRefHandler externalEntityRefHandler;
|
|
const ENCODING *encoding;
|
|
INIT_ENCODING initEncoding;
|
|
PROLOG_STATE prologState;
|
|
Processor *processor;
|
|
enum XML_Error errorCode;
|
|
const char *errorPtr;
|
|
int tagLevel;
|
|
ENTITY *declEntity;
|
|
ELEMENT_TYPE *declElementType;
|
|
ATTRIBUTE_ID *declAttributeId;
|
|
char declAttributeIsCdata;
|
|
DTD dtd;
|
|
TAG *tagStack;
|
|
TAG *freeTagList;
|
|
int attsSize;
|
|
ATTRIBUTE *atts;
|
|
POSITION position;
|
|
long errorByteIndex;
|
|
STRING_POOL tempPool;
|
|
STRING_POOL temp2Pool;
|
|
char *groupConnector;
|
|
unsigned groupSize;
|
|
int hadExternalDoctype;
|
|
} Parser;
|
|
|
|
#define userData (((Parser *)parser)->userData)
|
|
#define startElementHandler (((Parser *)parser)->startElementHandler)
|
|
#define endElementHandler (((Parser *)parser)->endElementHandler)
|
|
#define characterDataHandler (((Parser *)parser)->characterDataHandler)
|
|
#define processingInstructionHandler (((Parser *)parser)->processingInstructionHandler)
|
|
#define externalEntityRefHandler (((Parser *)parser)->externalEntityRefHandler)
|
|
#define encoding (((Parser *)parser)->encoding)
|
|
#define initEncoding (((Parser *)parser)->initEncoding)
|
|
#define prologState (((Parser *)parser)->prologState)
|
|
#define processor (((Parser *)parser)->processor)
|
|
#define errorCode (((Parser *)parser)->errorCode)
|
|
#define errorPtr (((Parser *)parser)->errorPtr)
|
|
#define errorByteIndex (((Parser *)parser)->errorByteIndex)
|
|
#define position (((Parser *)parser)->position)
|
|
#define tagLevel (((Parser *)parser)->tagLevel)
|
|
#define buffer (((Parser *)parser)->buffer)
|
|
#define bufferPtr (((Parser *)parser)->bufferPtr)
|
|
#define bufferEnd (((Parser *)parser)->bufferEnd)
|
|
#define bufferEndByteIndex (((Parser *)parser)->bufferEndByteIndex)
|
|
#define bufferLim (((Parser *)parser)->bufferLim)
|
|
#define dataBuf (((Parser *)parser)->dataBuf)
|
|
#define dataBufEnd (((Parser *)parser)->dataBufEnd)
|
|
#define dtd (((Parser *)parser)->dtd)
|
|
#define declEntity (((Parser *)parser)->declEntity)
|
|
#define declElementType (((Parser *)parser)->declElementType)
|
|
#define declAttributeId (((Parser *)parser)->declAttributeId)
|
|
#define declAttributeIsCdata (((Parser *)parser)->declAttributeIsCdata)
|
|
#define freeTagList (((Parser *)parser)->freeTagList)
|
|
#define tagStack (((Parser *)parser)->tagStack)
|
|
#define atts (((Parser *)parser)->atts)
|
|
#define attsSize (((Parser *)parser)->attsSize)
|
|
#define tempPool (((Parser *)parser)->tempPool)
|
|
#define temp2Pool (((Parser *)parser)->temp2Pool)
|
|
#define groupConnector (((Parser *)parser)->groupConnector)
|
|
#define groupSize (((Parser *)parser)->groupSize)
|
|
#define hadExternalDoctype (((Parser *)parser)->hadExternalDoctype)
|
|
|
|
XML_Parser XML_ParserCreate(const char *encodingName)
|
|
{
|
|
XML_Parser parser = malloc(sizeof(Parser));
|
|
if (!parser)
|
|
return parser;
|
|
processor = prologProcessor;
|
|
XmlPrologStateInit(&prologState);
|
|
userData = 0;
|
|
startElementHandler = 0;
|
|
endElementHandler = 0;
|
|
characterDataHandler = 0;
|
|
processingInstructionHandler = 0;
|
|
externalEntityRefHandler = 0;
|
|
buffer = 0;
|
|
bufferPtr = 0;
|
|
bufferEnd = 0;
|
|
bufferEndByteIndex = 0;
|
|
bufferLim = 0;
|
|
declElementType = 0;
|
|
declAttributeId = 0;
|
|
declEntity = 0;
|
|
memset(&position, 0, sizeof(POSITION));
|
|
errorCode = XML_ERROR_NONE;
|
|
errorByteIndex = 0;
|
|
errorPtr = 0;
|
|
tagLevel = 0;
|
|
tagStack = 0;
|
|
freeTagList = 0;
|
|
attsSize = INIT_ATTS_SIZE;
|
|
atts = malloc(attsSize * sizeof(ATTRIBUTE));
|
|
dataBuf = malloc(INIT_DATA_BUF_SIZE);
|
|
groupSize = 0;
|
|
groupConnector = 0;
|
|
hadExternalDoctype = 0;
|
|
poolInit(&tempPool);
|
|
poolInit(&temp2Pool);
|
|
if (!dtdInit(&dtd) || !atts || !dataBuf) {
|
|
XML_ParserFree(parser);
|
|
return 0;
|
|
}
|
|
dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
|
|
if (!XmlInitEncoding(&initEncoding, &encoding, encodingName)) {
|
|
errorCode = XML_ERROR_UNKNOWN_ENCODING;
|
|
processor = errorProcessor;
|
|
}
|
|
return parser;
|
|
}
|
|
|
|
XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
|
|
const char *openEntityNames,
|
|
const char *encodingName)
|
|
{
|
|
XML_Parser parser = oldParser;
|
|
DTD *oldDtd = &dtd;
|
|
XML_StartElementHandler oldStartElementHandler = startElementHandler;
|
|
XML_EndElementHandler oldEndElementHandler = endElementHandler;
|
|
XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
|
|
XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
|
|
XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
|
|
void *oldUserData = userData;
|
|
|
|
parser = XML_ParserCreate(encodingName);
|
|
if (!parser)
|
|
return 0;
|
|
startElementHandler = oldStartElementHandler;
|
|
endElementHandler = oldEndElementHandler;
|
|
characterDataHandler = oldCharacterDataHandler;
|
|
processingInstructionHandler = oldProcessingInstructionHandler;
|
|
externalEntityRefHandler = oldExternalEntityRefHandler;
|
|
userData = oldUserData;
|
|
|
|
if (!dtdCopy(&dtd, oldDtd) || !setOpenEntityNames(parser, openEntityNames)) {
|
|
XML_ParserFree(parser);
|
|
return 0;
|
|
}
|
|
|
|
processor = externalEntityInitProcessor;
|
|
return parser;
|
|
}
|
|
|
|
void XML_ParserFree(XML_Parser parser)
|
|
{
|
|
for (;;) {
|
|
TAG *p;
|
|
if (tagStack == 0) {
|
|
if (freeTagList == 0)
|
|
break;
|
|
tagStack = freeTagList;
|
|
freeTagList = 0;
|
|
}
|
|
p = tagStack;
|
|
tagStack = tagStack->parent;
|
|
free(p->buf);
|
|
free(p);
|
|
}
|
|
poolDestroy(&tempPool);
|
|
poolDestroy(&temp2Pool);
|
|
dtdDestroy(&dtd);
|
|
free((void *)atts);
|
|
free(groupConnector);
|
|
free(buffer);
|
|
free(dataBuf);
|
|
free(parser);
|
|
}
|
|
|
|
void XML_SetUserData(XML_Parser parser, void *p)
|
|
{
|
|
userData = p;
|
|
}
|
|
|
|
void *XML_GetUserData(XML_Parser parser)
|
|
{
|
|
return userData;
|
|
}
|
|
|
|
int XML_SetBase(XML_Parser parser, const char *p)
|
|
{
|
|
if (p) {
|
|
p = poolCopyString(&dtd.pool, p);
|
|
if (!p)
|
|
return 0;
|
|
dtd.base = p;
|
|
}
|
|
else
|
|
dtd.base = 0;
|
|
return 1;
|
|
}
|
|
|
|
void XML_SetElementHandler(XML_Parser parser,
|
|
XML_StartElementHandler start,
|
|
XML_EndElementHandler end)
|
|
{
|
|
startElementHandler = start;
|
|
endElementHandler = end;
|
|
}
|
|
|
|
void XML_SetCharacterDataHandler(XML_Parser parser,
|
|
XML_CharacterDataHandler handler)
|
|
{
|
|
characterDataHandler = handler;
|
|
}
|
|
|
|
void XML_SetProcessingInstructionHandler(XML_Parser parser,
|
|
XML_ProcessingInstructionHandler handler)
|
|
{
|
|
processingInstructionHandler = handler;
|
|
}
|
|
|
|
void XML_SetExternalEntityRefHandler(XML_Parser parser,
|
|
XML_ExternalEntityRefHandler handler)
|
|
{
|
|
externalEntityRefHandler = handler;
|
|
}
|
|
|
|
int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
|
|
{
|
|
bufferEndByteIndex += len;
|
|
if (len == 0) {
|
|
if (!isFinal)
|
|
return 1;
|
|
errorCode = processor(parser, bufferPtr, bufferEnd, 0);
|
|
return errorCode == XML_ERROR_NONE;
|
|
}
|
|
else if (bufferPtr == bufferEnd) {
|
|
const char *end;
|
|
int nLeftOver;
|
|
if (isFinal) {
|
|
errorCode = processor(parser, s, s + len, 0);
|
|
if (errorCode == XML_ERROR_NONE)
|
|
return 1;
|
|
if (errorPtr) {
|
|
errorByteIndex = bufferEndByteIndex - (s + len - errorPtr);
|
|
XmlUpdatePosition(encoding, s, errorPtr, &position);
|
|
}
|
|
return 0;
|
|
}
|
|
errorCode = processor(parser, s, s + len, &end);
|
|
if (errorCode != XML_ERROR_NONE) {
|
|
if (errorPtr) {
|
|
errorByteIndex = bufferEndByteIndex - (s + len - errorPtr);
|
|
XmlUpdatePosition(encoding, s, errorPtr, &position);
|
|
}
|
|
return 0;
|
|
}
|
|
XmlUpdatePosition(encoding, s, end, &position);
|
|
nLeftOver = s + len - end;
|
|
if (nLeftOver) {
|
|
if (buffer == 0 || nLeftOver > bufferLim - buffer) {
|
|
/* FIXME avoid integer overflow */
|
|
buffer = realloc(buffer, len * 2);
|
|
if (!buffer) {
|
|
errorCode = XML_ERROR_NO_MEMORY;
|
|
return 0;
|
|
}
|
|
bufferLim = buffer + len * 2;
|
|
}
|
|
memcpy(buffer, end, nLeftOver);
|
|
bufferPtr = buffer;
|
|
bufferEnd = buffer + nLeftOver;
|
|
}
|
|
return 1;
|
|
}
|
|
else {
|
|
memcpy(XML_GetBuffer(parser, len), s, len);
|
|
return XML_ParseBuffer(parser, len, isFinal);
|
|
}
|
|
}
|
|
|
|
int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
|
|
{
|
|
const char *start = bufferPtr;
|
|
bufferEnd += len;
|
|
errorCode = processor(parser, bufferPtr, bufferEnd,
|
|
isFinal ? (const char **)0 : &bufferPtr);
|
|
if (errorCode == XML_ERROR_NONE) {
|
|
if (!isFinal)
|
|
XmlUpdatePosition(encoding, start, bufferPtr, &position);
|
|
return 1;
|
|
}
|
|
else {
|
|
if (errorPtr) {
|
|
errorByteIndex = bufferEndByteIndex - (bufferEnd - errorPtr);
|
|
XmlUpdatePosition(encoding, start, errorPtr, &position);
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void *XML_GetBuffer(XML_Parser parser, int len)
|
|
{
|
|
if (len > bufferLim - bufferEnd) {
|
|
/* FIXME avoid integer overflow */
|
|
int neededSize = len + (bufferEnd - bufferPtr);
|
|
if (neededSize <= bufferLim - buffer) {
|
|
memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
|
|
bufferEnd = buffer + (bufferEnd - bufferPtr);
|
|
bufferPtr = buffer;
|
|
}
|
|
else {
|
|
char *newBuf;
|
|
int bufferSize = bufferLim - bufferPtr;
|
|
if (bufferSize == 0)
|
|
bufferSize = INIT_BUFFER_SIZE;
|
|
do {
|
|
bufferSize *= 2;
|
|
} while (bufferSize < neededSize);
|
|
newBuf = malloc(bufferSize);
|
|
if (newBuf == 0) {
|
|
errorCode = XML_ERROR_NO_MEMORY;
|
|
return 0;
|
|
}
|
|
bufferLim = newBuf + bufferSize;
|
|
if (bufferPtr) {
|
|
memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
|
|
free(buffer);
|
|
}
|
|
bufferEnd = newBuf + (bufferEnd - bufferPtr);
|
|
bufferPtr = buffer = newBuf;
|
|
}
|
|
}
|
|
return bufferEnd;
|
|
}
|
|
|
|
int XML_GetErrorCode(XML_Parser parser)
|
|
{
|
|
return errorCode;
|
|
}
|
|
|
|
int XML_GetErrorLineNumber(XML_Parser parser)
|
|
{
|
|
return position.lineNumber + 1;
|
|
}
|
|
|
|
int XML_GetErrorColumnNumber(XML_Parser parser)
|
|
{
|
|
return position.columnNumber;
|
|
}
|
|
|
|
long XML_GetErrorByteIndex(XML_Parser parser)
|
|
{
|
|
return errorByteIndex;
|
|
}
|
|
|
|
const char *XML_ErrorString(int code)
|
|
{
|
|
static const char *message[] = {
|
|
0,
|
|
"out of memory",
|
|
"syntax error",
|
|
"no element found",
|
|
"not well-formed",
|
|
"unclosed token",
|
|
"unclosed token",
|
|
"mismatched tag",
|
|
"duplicate attribute",
|
|
"junk after document element",
|
|
"illegal parameter entity reference",
|
|
"undefined entity",
|
|
"recursive entity reference",
|
|
"asynchronous entity",
|
|
"reference to invalid character number",
|
|
"reference to binary entity",
|
|
"reference to external entity in attribute",
|
|
"xml processing instruction not at start of external entity",
|
|
"unknown encoding",
|
|
"encoding specified in XML declaration is incorrect",
|
|
"unclosed CDATA section",
|
|
"error in processing external entity reference"
|
|
};
|
|
if (code > 0 && code < sizeof(message)/sizeof(message[0]))
|
|
return message[code];
|
|
return 0;
|
|
}
|
|
|
|
static
|
|
enum XML_Error contentProcessor(XML_Parser parser,
|
|
const char *start,
|
|
const char *end,
|
|
const char **endPtr)
|
|
{
|
|
return doContent(parser, 0, encoding, start, end, endPtr);
|
|
}
|
|
|
|
static
|
|
enum XML_Error externalEntityInitProcessor(XML_Parser parser,
|
|
const char *start,
|
|
const char *end,
|
|
const char **endPtr)
|
|
{
|
|
const char *next;
|
|
int tok = XmlContentTok(encoding, start, end, &next);
|
|
switch (tok) {
|
|
case XML_TOK_BOM:
|
|
start = next;
|
|
tok = XmlContentTok(encoding, start, end, &next);
|
|
if (tok != XML_TOK_XML_DECL)
|
|
break;
|
|
/* fall through */
|
|
case XML_TOK_XML_DECL:
|
|
{
|
|
const char *encodingName = 0;
|
|
const ENCODING *newEncoding = 0;
|
|
const char *version;
|
|
int standalone;
|
|
if (!XmlParseXmlDecl(1,
|
|
encoding,
|
|
start,
|
|
next,
|
|
&errorPtr,
|
|
&version,
|
|
&encodingName,
|
|
&newEncoding,
|
|
&standalone))
|
|
return XML_ERROR_SYNTAX;
|
|
if (newEncoding) {
|
|
if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
|
|
errorPtr = encodingName;
|
|
return XML_ERROR_INCORRECT_ENCODING;
|
|
}
|
|
encoding = newEncoding;
|
|
}
|
|
else if (encodingName) {
|
|
errorPtr = encodingName;
|
|
return XML_ERROR_UNKNOWN_ENCODING;
|
|
}
|
|
start = next;
|
|
break;
|
|
}
|
|
case XML_TOK_PARTIAL:
|
|
if (endPtr) {
|
|
*endPtr = start;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
errorPtr = start;
|
|
return XML_ERROR_UNCLOSED_TOKEN;
|
|
case XML_TOK_PARTIAL_CHAR:
|
|
if (endPtr) {
|
|
*endPtr = start;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
errorPtr = start;
|
|
return XML_ERROR_PARTIAL_CHAR;
|
|
}
|
|
processor = externalEntityContentProcessor;
|
|
tagLevel = 1;
|
|
return doContent(parser, 1, encoding, start, end, endPtr);
|
|
}
|
|
|
|
static
|
|
enum XML_Error externalEntityContentProcessor(XML_Parser parser,
|
|
const char *start,
|
|
const char *end,
|
|
const char **endPtr)
|
|
{
|
|
return doContent(parser, 1, encoding, start, end, endPtr);
|
|
}
|
|
|
|
static enum XML_Error
|
|
doContent(XML_Parser parser,
|
|
int startTagLevel,
|
|
const ENCODING *enc,
|
|
const char *s,
|
|
const char *end,
|
|
const char **nextPtr)
|
|
{
|
|
const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
|
|
for (;;) {
|
|
const char *next;
|
|
int tok = XmlContentTok(enc, s, end, &next);
|
|
switch (tok) {
|
|
case XML_TOK_TRAILING_CR:
|
|
if (nextPtr) {
|
|
*nextPtr = s;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
if (characterDataHandler) {
|
|
char c = '\n';
|
|
characterDataHandler(userData, &c, 1);
|
|
}
|
|
if (startTagLevel == 0) {
|
|
errorPtr = s;
|
|
return XML_ERROR_NO_ELEMENTS;
|
|
|
|
}
|
|
if (tagLevel != startTagLevel) {
|
|
errorPtr = s;
|
|
return XML_ERROR_ASYNC_ENTITY;
|
|
}
|
|
return XML_ERROR_NONE;
|
|
case XML_TOK_NONE:
|
|
if (nextPtr) {
|
|
*nextPtr = s;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
if (startTagLevel > 0) {
|
|
if (tagLevel != startTagLevel) {
|
|
errorPtr = s;
|
|
return XML_ERROR_ASYNC_ENTITY;
|
|
}
|
|
return XML_ERROR_NONE;
|
|
}
|
|
errorPtr = s;
|
|
return XML_ERROR_NO_ELEMENTS;
|
|
case XML_TOK_INVALID:
|
|
errorPtr = next;
|
|
return XML_ERROR_INVALID_TOKEN;
|
|
case XML_TOK_PARTIAL:
|
|
if (nextPtr) {
|
|
*nextPtr = s;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
errorPtr = s;
|
|
return XML_ERROR_UNCLOSED_TOKEN;
|
|
case XML_TOK_PARTIAL_CHAR:
|
|
if (nextPtr) {
|
|
*nextPtr = s;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
errorPtr = s;
|
|
return XML_ERROR_PARTIAL_CHAR;
|
|
case XML_TOK_ENTITY_REF:
|
|
{
|
|
const char *name = poolStoreString(&dtd.pool, enc,
|
|
s + enc->minBytesPerChar,
|
|
next - enc->minBytesPerChar);
|
|
ENTITY *entity;
|
|
if (!name)
|
|
return XML_ERROR_NO_MEMORY;
|
|
entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
|
|
poolDiscard(&dtd.pool);
|
|
if (!entity) {
|
|
if (dtd.complete || dtd.standalone) {
|
|
errorPtr = s;
|
|
return XML_ERROR_UNDEFINED_ENTITY;
|
|
}
|
|
break;
|
|
}
|
|
if (entity->magic) {
|
|
if (characterDataHandler)
|
|
characterDataHandler(userData, entity->textPtr, entity->textLen);
|
|
break;
|
|
}
|
|
if (entity->open) {
|
|
errorPtr = s;
|
|
return XML_ERROR_RECURSIVE_ENTITY_REF;
|
|
}
|
|
if (entity->notation) {
|
|
errorPtr = s;
|
|
return XML_ERROR_BINARY_ENTITY_REF;
|
|
}
|
|
if (entity) {
|
|
if (entity->textPtr) {
|
|
enum XML_Error result;
|
|
entity->open = 1;
|
|
result = doContent(parser,
|
|
tagLevel,
|
|
utf8,
|
|
entity->textPtr,
|
|
entity->textPtr + entity->textLen,
|
|
0);
|
|
entity->open = 0;
|
|
if (result) {
|
|
errorPtr = s;
|
|
return result;
|
|
}
|
|
}
|
|
else if (externalEntityRefHandler) {
|
|
const char *openEntityNames;
|
|
entity->open = 1;
|
|
openEntityNames = getOpenEntityNames(parser);
|
|
entity->open = 0;
|
|
if (!openEntityNames)
|
|
return XML_ERROR_NO_MEMORY;
|
|
if (!externalEntityRefHandler(parser, openEntityNames, dtd.base, entity->systemId, entity->publicId)) {
|
|
errorPtr = s;
|
|
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case XML_TOK_START_TAG_WITH_ATTS:
|
|
if (!startElementHandler) {
|
|
enum XML_Error result = storeAtts(parser, enc, 0, s);
|
|
if (result)
|
|
return result;
|
|
}
|
|
/* fall through */
|
|
case XML_TOK_START_TAG_NO_ATTS:
|
|
{
|
|
TAG *tag;
|
|
if (freeTagList) {
|
|
tag = freeTagList;
|
|
freeTagList = freeTagList->parent;
|
|
}
|
|
else {
|
|
tag = malloc(sizeof(TAG));
|
|
if (!tag)
|
|
return XML_ERROR_NO_MEMORY;
|
|
tag->buf = malloc(INIT_TAG_BUF_SIZE);
|
|
if (!tag->buf)
|
|
return XML_ERROR_NO_MEMORY;
|
|
tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
|
|
}
|
|
tag->parent = tagStack;
|
|
tagStack = tag;
|
|
tag->rawName = s + enc->minBytesPerChar;
|
|
tag->rawNameLength = XmlNameLength(enc, tag->rawName);
|
|
if (nextPtr) {
|
|
if (tag->rawNameLength > tag->bufEnd - tag->buf) {
|
|
int bufSize = tag->rawNameLength * 4;
|
|
tag->buf = realloc(tag->buf, bufSize);
|
|
if (!tag->buf)
|
|
return XML_ERROR_NO_MEMORY;
|
|
tag->bufEnd = tag->buf + bufSize;
|
|
}
|
|
memcpy(tag->buf, tag->rawName, tag->rawNameLength);
|
|
tag->rawName = tag->buf;
|
|
}
|
|
++tagLevel;
|
|
if (startElementHandler) {
|
|
enum XML_Error result;
|
|
char *toPtr;
|
|
for (;;) {
|
|
const char *rawNameEnd = tag->rawName + tag->rawNameLength;
|
|
const char *fromPtr = tag->rawName;
|
|
int bufSize;
|
|
toPtr = tag->buf;
|
|
if (nextPtr)
|
|
toPtr += tag->rawNameLength;
|
|
tag->name = toPtr;
|
|
XmlConvert(enc, XML_UTF8_ENCODING,
|
|
&fromPtr, rawNameEnd,
|
|
&toPtr, tag->bufEnd - 1);
|
|
if (fromPtr == rawNameEnd)
|
|
break;
|
|
bufSize = (tag->bufEnd - tag->buf) << 1;
|
|
tag->buf = realloc(tag->buf, bufSize);
|
|
if (!tag->buf)
|
|
return XML_ERROR_NO_MEMORY;
|
|
tag->bufEnd = tag->buf + bufSize;
|
|
if (nextPtr)
|
|
tag->rawName = tag->buf;
|
|
}
|
|
*toPtr = 0;
|
|
result = storeAtts(parser, enc, tag->name, s);
|
|
if (result)
|
|
return result;
|
|
startElementHandler(userData, tag->name, (const char **)atts);
|
|
poolClear(&tempPool);
|
|
}
|
|
else
|
|
tag->name = 0;
|
|
break;
|
|
}
|
|
case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
|
|
if (!startElementHandler) {
|
|
enum XML_Error result = storeAtts(parser, enc, 0, s);
|
|
if (result)
|
|
return result;
|
|
}
|
|
/* fall through */
|
|
case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
|
|
if (startElementHandler || endElementHandler) {
|
|
const char *rawName = s + enc->minBytesPerChar;
|
|
const char *name = poolStoreString(&tempPool, enc, rawName,
|
|
rawName
|
|
+ XmlNameLength(enc, rawName));
|
|
if (!name)
|
|
return XML_ERROR_NO_MEMORY;
|
|
poolFinish(&tempPool);
|
|
if (startElementHandler) {
|
|
enum XML_Error result = storeAtts(parser, enc, name, s);
|
|
if (result)
|
|
return result;
|
|
startElementHandler(userData, name, (const char **)atts);
|
|
}
|
|
if (endElementHandler)
|
|
endElementHandler(userData, name);
|
|
poolClear(&tempPool);
|
|
}
|
|
if (tagLevel == 0)
|
|
return epilogProcessor(parser, next, end, nextPtr);
|
|
break;
|
|
case XML_TOK_END_TAG:
|
|
if (tagLevel == startTagLevel) {
|
|
errorPtr = s;
|
|
return XML_ERROR_ASYNC_ENTITY;
|
|
}
|
|
else {
|
|
int len;
|
|
const char *rawName;
|
|
TAG *tag = tagStack;
|
|
tagStack = tag->parent;
|
|
tag->parent = freeTagList;
|
|
freeTagList = tag;
|
|
rawName = s + enc->minBytesPerChar*2;
|
|
len = XmlNameLength(enc, rawName);
|
|
if (len != tag->rawNameLength
|
|
|| memcmp(tag->rawName, rawName, len) != 0) {
|
|
errorPtr = rawName;
|
|
return XML_ERROR_TAG_MISMATCH;
|
|
}
|
|
--tagLevel;
|
|
if (endElementHandler) {
|
|
if (tag->name)
|
|
endElementHandler(userData, tag->name);
|
|
else {
|
|
const char *name = poolStoreString(&tempPool, enc, rawName,
|
|
rawName + len);
|
|
if (!name)
|
|
return XML_ERROR_NO_MEMORY;
|
|
endElementHandler(userData, name);
|
|
poolClear(&tempPool);
|
|
}
|
|
}
|
|
if (tagLevel == 0)
|
|
return epilogProcessor(parser, next, end, nextPtr);
|
|
}
|
|
break;
|
|
case XML_TOK_CHAR_REF:
|
|
{
|
|
int n = XmlCharRefNumber(enc, s);
|
|
if (n < 0) {
|
|
errorPtr = s;
|
|
return XML_ERROR_BAD_CHAR_REF;
|
|
}
|
|
if (characterDataHandler) {
|
|
char buf[XML_MAX_BYTES_PER_CHAR];
|
|
characterDataHandler(userData, buf, XmlEncode(utf8, n, buf));
|
|
}
|
|
}
|
|
break;
|
|
case XML_TOK_XML_DECL:
|
|
errorPtr = s;
|
|
return XML_ERROR_MISPLACED_XML_PI;
|
|
case XML_TOK_DATA_NEWLINE:
|
|
if (characterDataHandler) {
|
|
char c = '\n';
|
|
characterDataHandler(userData, &c, 1);
|
|
}
|
|
break;
|
|
case XML_TOK_CDATA_SECT_OPEN:
|
|
{
|
|
enum XML_Error result = doCdataSection(parser, &next, end, nextPtr);
|
|
if (!next) {
|
|
processor = cdataSectionProcessor;
|
|
return result;
|
|
}
|
|
}
|
|
break;
|
|
case XML_TOK_TRAILING_RSQB:
|
|
if (nextPtr) {
|
|
*nextPtr = s;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
if (characterDataHandler) {
|
|
char *dataPtr = dataBuf;
|
|
XmlConvert(enc, XML_UTF8_ENCODING, &s, end, &dataPtr, dataBufEnd);
|
|
characterDataHandler(userData, dataBuf, dataPtr - dataBuf);
|
|
}
|
|
if (startTagLevel == 0) {
|
|
errorPtr = end;
|
|
return XML_ERROR_NO_ELEMENTS;
|
|
}
|
|
if (tagLevel != startTagLevel) {
|
|
errorPtr = end;
|
|
return XML_ERROR_ASYNC_ENTITY;
|
|
}
|
|
return XML_ERROR_NONE;
|
|
case XML_TOK_DATA_CHARS:
|
|
if (characterDataHandler) {
|
|
do {
|
|
char *dataPtr = dataBuf;
|
|
XmlConvert(enc, XML_UTF8_ENCODING, &s, next, &dataPtr, dataBufEnd);
|
|
characterDataHandler(userData, dataBuf, dataPtr - dataBuf);
|
|
} while (s != next);
|
|
}
|
|
break;
|
|
case XML_TOK_PI:
|
|
if (!reportProcessingInstruction(parser, enc, s, next))
|
|
return XML_ERROR_NO_MEMORY;
|
|
break;
|
|
}
|
|
s = next;
|
|
}
|
|
/* not reached */
|
|
}
|
|
|
|
/* If tagName is non-null, build a real list of attributes,
|
|
otherwise just check the attributes for well-formedness. */
|
|
|
|
static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
|
|
const char *tagName, const char *s)
|
|
{
|
|
ELEMENT_TYPE *elementType = 0;
|
|
int nDefaultAtts = 0;
|
|
const char **appAtts;
|
|
int i;
|
|
int n;
|
|
|
|
if (tagName) {
|
|
elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagName, 0);
|
|
if (elementType)
|
|
nDefaultAtts = elementType->nDefaultAtts;
|
|
}
|
|
|
|
n = XmlGetAttributes(enc, s, attsSize, atts);
|
|
if (n + nDefaultAtts > attsSize) {
|
|
int oldAttsSize = attsSize;
|
|
attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
|
|
atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
|
|
if (!atts)
|
|
return XML_ERROR_NO_MEMORY;
|
|
if (n > oldAttsSize)
|
|
XmlGetAttributes(enc, s, n, atts);
|
|
}
|
|
appAtts = (const char **)atts;
|
|
for (i = 0; i < n; i++) {
|
|
ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
|
|
atts[i].name
|
|
+ XmlNameLength(enc, atts[i].name));
|
|
if (!attId)
|
|
return XML_ERROR_NO_MEMORY;
|
|
if ((attId->name)[-1]) {
|
|
errorPtr = atts[i].name;
|
|
return XML_ERROR_DUPLICATE_ATTRIBUTE;
|
|
}
|
|
(attId->name)[-1] = 1;
|
|
appAtts[i << 1] = attId->name;
|
|
if (!atts[i].normalized) {
|
|
enum XML_Error result;
|
|
int isCdata = 1;
|
|
|
|
if (attId->maybeTokenized) {
|
|
int j;
|
|
for (j = 0; j < nDefaultAtts; j++) {
|
|
if (attId == elementType->defaultAtts[j].id) {
|
|
isCdata = elementType->defaultAtts[j].isCdata;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
result = storeAttributeValue(parser, enc, isCdata,
|
|
atts[i].valuePtr, atts[i].valueEnd,
|
|
&tempPool);
|
|
if (result)
|
|
return result;
|
|
if (tagName) {
|
|
appAtts[(i << 1) + 1] = poolStart(&tempPool);
|
|
poolFinish(&tempPool);
|
|
}
|
|
else
|
|
poolDiscard(&tempPool);
|
|
}
|
|
else if (tagName) {
|
|
appAtts[(i << 1) + 1] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
|
|
if (appAtts[(i << 1) + 1] == 0)
|
|
return XML_ERROR_NO_MEMORY;
|
|
poolFinish(&tempPool);
|
|
}
|
|
}
|
|
if (tagName) {
|
|
int j;
|
|
for (j = 0; j < nDefaultAtts; j++) {
|
|
const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
|
|
if (!(da->id->name)[-1] && da->value) {
|
|
(da->id->name)[-1] = 1;
|
|
appAtts[i << 1] = da->id->name;
|
|
appAtts[(i << 1) + 1] = da->value;
|
|
i++;
|
|
}
|
|
}
|
|
appAtts[i << 1] = 0;
|
|
}
|
|
while (i-- > 0)
|
|
((char *)appAtts[i << 1])[-1] = 0;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
|
|
/* The idea here is to avoid using stack for each CDATA section when
|
|
the whole file is parsed with one call. */
|
|
|
|
static
|
|
enum XML_Error cdataSectionProcessor(XML_Parser parser,
|
|
const char *start,
|
|
const char *end,
|
|
const char **endPtr)
|
|
{
|
|
enum XML_Error result = doCdataSection(parser, &start, end, endPtr);
|
|
if (start) {
|
|
processor = contentProcessor;
|
|
return contentProcessor(parser, start, end, endPtr);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/* startPtr gets set to non-null is the section is closed, and to null if
|
|
the section is not yet closed. */
|
|
|
|
static
|
|
enum XML_Error doCdataSection(XML_Parser parser,
|
|
const char **startPtr,
|
|
const char *end,
|
|
const char **nextPtr)
|
|
{
|
|
const char *s = *startPtr;
|
|
*startPtr = 0;
|
|
for (;;) {
|
|
const char *next;
|
|
int tok = XmlCdataSectionTok(encoding, s, end, &next);
|
|
switch (tok) {
|
|
case XML_TOK_CDATA_SECT_CLOSE:
|
|
*startPtr = next;
|
|
return XML_ERROR_NONE;
|
|
case XML_TOK_DATA_NEWLINE:
|
|
if (characterDataHandler) {
|
|
char c = '\n';
|
|
characterDataHandler(userData, &c, 1);
|
|
}
|
|
break;
|
|
case XML_TOK_DATA_CHARS:
|
|
if (characterDataHandler) {
|
|
do {
|
|
char *dataPtr = dataBuf;
|
|
XmlConvert(encoding, XML_UTF8_ENCODING, &s, next, &dataPtr, dataBufEnd);
|
|
characterDataHandler(userData, dataBuf, dataPtr - dataBuf);
|
|
} while (s != next);
|
|
}
|
|
break;
|
|
case XML_TOK_INVALID:
|
|
errorPtr = next;
|
|
return XML_ERROR_INVALID_TOKEN;
|
|
case XML_TOK_PARTIAL_CHAR:
|
|
if (nextPtr) {
|
|
*nextPtr = s;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
errorPtr = s;
|
|
return XML_ERROR_PARTIAL_CHAR;
|
|
case XML_TOK_PARTIAL:
|
|
case XML_TOK_NONE:
|
|
if (nextPtr) {
|
|
*nextPtr = s;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
errorPtr = s;
|
|
return XML_ERROR_UNCLOSED_CDATA_SECTION;
|
|
default:
|
|
abort();
|
|
}
|
|
s = next;
|
|
}
|
|
/* not reached */
|
|
}
|
|
|
|
|
|
static enum XML_Error
|
|
prologProcessor(XML_Parser parser,
|
|
const char *s,
|
|
const char *end,
|
|
const char **nextPtr)
|
|
{
|
|
for (;;) {
|
|
const char *next;
|
|
int tok = XmlPrologTok(encoding, s, end, &next);
|
|
if (tok <= 0) {
|
|
if (nextPtr != 0 && tok != XML_TOK_INVALID) {
|
|
*nextPtr = s;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
switch (tok) {
|
|
case XML_TOK_INVALID:
|
|
errorPtr = next;
|
|
return XML_ERROR_INVALID_TOKEN;
|
|
case XML_TOK_NONE:
|
|
return XML_ERROR_NO_ELEMENTS;
|
|
case XML_TOK_PARTIAL:
|
|
return XML_ERROR_UNCLOSED_TOKEN;
|
|
case XML_TOK_PARTIAL_CHAR:
|
|
return XML_ERROR_PARTIAL_CHAR;
|
|
case XML_TOK_TRAILING_CR:
|
|
errorPtr = s + encoding->minBytesPerChar;
|
|
return XML_ERROR_NO_ELEMENTS;
|
|
default:
|
|
abort();
|
|
}
|
|
}
|
|
switch (XmlTokenRole(&prologState, tok, s, next, encoding)) {
|
|
case XML_ROLE_XML_DECL:
|
|
{
|
|
const char *encodingName = 0;
|
|
const ENCODING *newEncoding = 0;
|
|
const char *version;
|
|
int standalone = -1;
|
|
if (!XmlParseXmlDecl(0,
|
|
encoding,
|
|
s,
|
|
next,
|
|
&errorPtr,
|
|
&version,
|
|
&encodingName,
|
|
&newEncoding,
|
|
&standalone))
|
|
return XML_ERROR_SYNTAX;
|
|
if (newEncoding) {
|
|
if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
|
|
errorPtr = encodingName;
|
|
return XML_ERROR_INCORRECT_ENCODING;
|
|
}
|
|
encoding = newEncoding;
|
|
}
|
|
else if (encodingName) {
|
|
errorPtr = encodingName;
|
|
return XML_ERROR_UNKNOWN_ENCODING;
|
|
}
|
|
if (standalone == 1)
|
|
dtd.standalone = 1;
|
|
break;
|
|
}
|
|
case XML_ROLE_DOCTYPE_SYSTEM_ID:
|
|
hadExternalDoctype = 1;
|
|
break;
|
|
case XML_ROLE_ENTITY_PUBLIC_ID:
|
|
if (!XmlIsPublicId(encoding, s, next, &errorPtr))
|
|
return XML_ERROR_SYNTAX;
|
|
if (declEntity) {
|
|
char *tem = poolStoreString(&dtd.pool,
|
|
encoding,
|
|
s + encoding->minBytesPerChar,
|
|
next - encoding->minBytesPerChar);
|
|
if (!tem)
|
|
return XML_ERROR_NO_MEMORY;
|
|
normalizePublicId(tem);
|
|
declEntity->publicId = tem;
|
|
poolFinish(&dtd.pool);
|
|
}
|
|
break;
|
|
case XML_ROLE_DOCTYPE_PUBLIC_ID:
|
|
case XML_ROLE_NOTATION_PUBLIC_ID:
|
|
if (!XmlIsPublicId(encoding, s, next, &errorPtr))
|
|
return XML_ERROR_SYNTAX;
|
|
break;
|
|
case XML_ROLE_INSTANCE_START:
|
|
processor = contentProcessor;
|
|
if (hadExternalDoctype)
|
|
dtd.complete = 0;
|
|
return contentProcessor(parser, s, end, nextPtr);
|
|
case XML_ROLE_ATTLIST_ELEMENT_NAME:
|
|
{
|
|
const char *name = poolStoreString(&dtd.pool, encoding, s, next);
|
|
if (!name)
|
|
return XML_ERROR_NO_MEMORY;
|
|
declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
|
|
if (!declElementType)
|
|
return XML_ERROR_NO_MEMORY;
|
|
if (declElementType->name != name)
|
|
poolDiscard(&dtd.pool);
|
|
else
|
|
poolFinish(&dtd.pool);
|
|
break;
|
|
}
|
|
case XML_ROLE_ATTRIBUTE_NAME:
|
|
declAttributeId = getAttributeId(parser, encoding, s, next);
|
|
if (!declAttributeId)
|
|
return XML_ERROR_NO_MEMORY;
|
|
declAttributeIsCdata = 0;
|
|
break;
|
|
case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
|
|
declAttributeIsCdata = 1;
|
|
break;
|
|
case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
|
|
case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
|
|
if (dtd.complete
|
|
&& !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
|
|
return XML_ERROR_NO_MEMORY;
|
|
break;
|
|
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
|
|
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
|
|
{
|
|
const char *attVal;
|
|
enum XML_Error result
|
|
= storeAttributeValue(parser, encoding, declAttributeIsCdata,
|
|
s + encoding->minBytesPerChar,
|
|
next - encoding->minBytesPerChar,
|
|
&dtd.pool);
|
|
if (result)
|
|
return result;
|
|
attVal = poolStart(&dtd.pool);
|
|
poolFinish(&dtd.pool);
|
|
if (dtd.complete
|
|
&& !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
|
|
return XML_ERROR_NO_MEMORY;
|
|
break;
|
|
}
|
|
case XML_ROLE_ENTITY_VALUE:
|
|
{
|
|
enum XML_Error result = storeEntityValue(parser, s, next);
|
|
if (result != XML_ERROR_NONE)
|
|
return result;
|
|
}
|
|
break;
|
|
case XML_ROLE_ENTITY_SYSTEM_ID:
|
|
if (declEntity) {
|
|
declEntity->systemId = poolStoreString(&dtd.pool, encoding,
|
|
s + encoding->minBytesPerChar,
|
|
next - encoding->minBytesPerChar);
|
|
if (!declEntity->systemId)
|
|
return XML_ERROR_NO_MEMORY;
|
|
declEntity->base = dtd.base;
|
|
poolFinish(&dtd.pool);
|
|
}
|
|
break;
|
|
case XML_ROLE_ENTITY_NOTATION_NAME:
|
|
if (declEntity) {
|
|
declEntity->notation = poolStoreString(&dtd.pool, encoding, s, next);
|
|
if (!declEntity->notation)
|
|
return XML_ERROR_NO_MEMORY;
|
|
poolFinish(&dtd.pool);
|
|
}
|
|
break;
|
|
case XML_ROLE_GENERAL_ENTITY_NAME:
|
|
{
|
|
const char *name = poolStoreString(&dtd.pool, encoding, s, next);
|
|
if (!name)
|
|
return XML_ERROR_NO_MEMORY;
|
|
if (dtd.complete) {
|
|
declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
|
|
if (!declEntity)
|
|
return XML_ERROR_NO_MEMORY;
|
|
if (declEntity->name != name) {
|
|
poolDiscard(&dtd.pool);
|
|
declEntity = 0;
|
|
}
|
|
else
|
|
poolFinish(&dtd.pool);
|
|
}
|
|
else {
|
|
poolDiscard(&dtd.pool);
|
|
declEntity = 0;
|
|
}
|
|
}
|
|
break;
|
|
case XML_ROLE_PARAM_ENTITY_NAME:
|
|
declEntity = 0;
|
|
break;
|
|
case XML_ROLE_ERROR:
|
|
errorPtr = s;
|
|
switch (tok) {
|
|
case XML_TOK_PARAM_ENTITY_REF:
|
|
return XML_ERROR_PARAM_ENTITY_REF;
|
|
case XML_TOK_XML_DECL:
|
|
return XML_ERROR_MISPLACED_XML_PI;
|
|
default:
|
|
return XML_ERROR_SYNTAX;
|
|
}
|
|
case XML_ROLE_GROUP_OPEN:
|
|
if (prologState.level >= groupSize) {
|
|
if (groupSize)
|
|
groupConnector = realloc(groupConnector, groupSize *= 2);
|
|
else
|
|
groupConnector = malloc(groupSize = 32);
|
|
if (!groupConnector)
|
|
return XML_ERROR_NO_MEMORY;
|
|
}
|
|
groupConnector[prologState.level] = 0;
|
|
break;
|
|
case XML_ROLE_GROUP_SEQUENCE:
|
|
if (groupConnector[prologState.level] == '|') {
|
|
errorPtr = s;
|
|
return XML_ERROR_SYNTAX;
|
|
}
|
|
groupConnector[prologState.level] = ',';
|
|
break;
|
|
case XML_ROLE_GROUP_CHOICE:
|
|
if (groupConnector[prologState.level] == ',') {
|
|
errorPtr = s;
|
|
return XML_ERROR_SYNTAX;
|
|
}
|
|
groupConnector[prologState.level] = '|';
|
|
break;
|
|
case XML_ROLE_PARAM_ENTITY_REF:
|
|
dtd.complete = 0;
|
|
break;
|
|
case XML_ROLE_NONE:
|
|
switch (tok) {
|
|
case XML_TOK_PI:
|
|
if (!reportProcessingInstruction(parser, encoding, s, next))
|
|
return XML_ERROR_NO_MEMORY;
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
s = next;
|
|
}
|
|
/* not reached */
|
|
}
|
|
|
|
static
|
|
enum XML_Error epilogProcessor(XML_Parser parser,
|
|
const char *s,
|
|
const char *end,
|
|
const char **nextPtr)
|
|
{
|
|
processor = epilogProcessor;
|
|
for (;;) {
|
|
const char *next;
|
|
int tok = XmlPrologTok(encoding, s, end, &next);
|
|
switch (tok) {
|
|
case XML_TOK_TRAILING_CR:
|
|
case XML_TOK_NONE:
|
|
if (nextPtr)
|
|
*nextPtr = end;
|
|
return XML_ERROR_NONE;
|
|
case XML_TOK_PROLOG_S:
|
|
case XML_TOK_COMMENT:
|
|
break;
|
|
case XML_TOK_PI:
|
|
if (!reportProcessingInstruction(parser, encoding, s, next))
|
|
return XML_ERROR_NO_MEMORY;
|
|
break;
|
|
case XML_TOK_INVALID:
|
|
errorPtr = next;
|
|
return XML_ERROR_INVALID_TOKEN;
|
|
case XML_TOK_PARTIAL:
|
|
if (nextPtr) {
|
|
*nextPtr = s;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
errorPtr = s;
|
|
return XML_ERROR_UNCLOSED_TOKEN;
|
|
case XML_TOK_PARTIAL_CHAR:
|
|
if (nextPtr) {
|
|
*nextPtr = s;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
errorPtr = s;
|
|
return XML_ERROR_PARTIAL_CHAR;
|
|
default:
|
|
errorPtr = s;
|
|
return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
|
|
}
|
|
s = next;
|
|
}
|
|
}
|
|
|
|
static
|
|
enum XML_Error errorProcessor(XML_Parser parser,
|
|
const char *s,
|
|
const char *end,
|
|
const char **nextPtr)
|
|
{
|
|
return errorCode;
|
|
}
|
|
|
|
static enum XML_Error
|
|
storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
|
|
const char *ptr, const char *end,
|
|
STRING_POOL *pool)
|
|
{
|
|
enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
|
|
if (result)
|
|
return result;
|
|
if (!isCdata && poolLength(pool) && poolLastByte(pool) == ' ')
|
|
poolChop(pool);
|
|
if (!poolAppendByte(pool, 0))
|
|
return XML_ERROR_NO_MEMORY;
|
|
return XML_ERROR_NONE;
|
|
}
|
|
|
|
static enum XML_Error
|
|
appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
|
|
const char *ptr, const char *end,
|
|
STRING_POOL *pool)
|
|
{
|
|
const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
|
|
for (;;) {
|
|
const char *next;
|
|
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
|
|
switch (tok) {
|
|
case XML_TOK_NONE:
|
|
return XML_ERROR_NONE;
|
|
case XML_TOK_INVALID:
|
|
errorPtr = next;
|
|
return XML_ERROR_INVALID_TOKEN;
|
|
case XML_TOK_PARTIAL:
|
|
errorPtr = ptr;
|
|
return XML_ERROR_INVALID_TOKEN;
|
|
case XML_TOK_CHAR_REF:
|
|
{
|
|
char buf[XML_MAX_BYTES_PER_CHAR];
|
|
int i;
|
|
int n = XmlCharRefNumber(enc, ptr);
|
|
if (n < 0) {
|
|
errorPtr = ptr;
|
|
return XML_ERROR_BAD_CHAR_REF;
|
|
}
|
|
if (!isCdata
|
|
&& n == ' '
|
|
&& (poolLength(pool) == 0 || poolLastByte(pool) == ' '))
|
|
break;
|
|
n = XmlEncode(utf8, n, buf);
|
|
if (!n) {
|
|
errorPtr = ptr;
|
|
return XML_ERROR_BAD_CHAR_REF;
|
|
}
|
|
for (i = 0; i < n; i++) {
|
|
if (!poolAppendByte(pool, buf[i]))
|
|
return XML_ERROR_NO_MEMORY;
|
|
}
|
|
}
|
|
break;
|
|
case XML_TOK_DATA_CHARS:
|
|
if (!poolAppend(pool, enc, ptr, next))
|
|
return XML_ERROR_NO_MEMORY;
|
|
break;
|
|
break;
|
|
case XML_TOK_TRAILING_CR:
|
|
next = ptr + enc->minBytesPerChar;
|
|
/* fall through */
|
|
case XML_TOK_ATTRIBUTE_VALUE_S:
|
|
case XML_TOK_DATA_NEWLINE:
|
|
if (!isCdata && (poolLength(pool) == 0 || poolLastByte(pool) == ' '))
|
|
break;
|
|
if (!poolAppendByte(pool, ' '))
|
|
return XML_ERROR_NO_MEMORY;
|
|
break;
|
|
case XML_TOK_ENTITY_REF:
|
|
{
|
|
const char *name = poolStoreString(&temp2Pool, enc,
|
|
ptr + enc->minBytesPerChar,
|
|
next - enc->minBytesPerChar);
|
|
ENTITY *entity;
|
|
if (!name)
|
|
return XML_ERROR_NO_MEMORY;
|
|
entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
|
|
poolDiscard(&temp2Pool);
|
|
if (!entity) {
|
|
if (dtd.complete) {
|
|
errorPtr = ptr;
|
|
return XML_ERROR_UNDEFINED_ENTITY;
|
|
}
|
|
}
|
|
else if (entity->open) {
|
|
errorPtr = ptr;
|
|
return XML_ERROR_RECURSIVE_ENTITY_REF;
|
|
}
|
|
else if (entity->notation) {
|
|
errorPtr = ptr;
|
|
return XML_ERROR_BINARY_ENTITY_REF;
|
|
}
|
|
else if (entity->magic) {
|
|
int i;
|
|
for (i = 0; i < entity->textLen; i++)
|
|
if (!poolAppendByte(pool, entity->textPtr[i]))
|
|
return XML_ERROR_NO_MEMORY;
|
|
}
|
|
else if (!entity->textPtr) {
|
|
errorPtr = ptr;
|
|
return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
|
|
}
|
|
else {
|
|
enum XML_Error result;
|
|
const char *textEnd = entity->textPtr + entity->textLen;
|
|
entity->open = 1;
|
|
result = appendAttributeValue(parser, utf8, isCdata, entity->textPtr, textEnd, pool);
|
|
entity->open = 0;
|
|
if (result) {
|
|
errorPtr = ptr;
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
abort();
|
|
}
|
|
ptr = next;
|
|
}
|
|
/* not reached */
|
|
}
|
|
|
|
static
|
|
enum XML_Error storeEntityValue(XML_Parser parser,
|
|
const char *entityTextPtr,
|
|
const char *entityTextEnd)
|
|
{
|
|
const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
|
|
STRING_POOL *pool = &(dtd.pool);
|
|
entityTextPtr += encoding->minBytesPerChar;
|
|
entityTextEnd -= encoding->minBytesPerChar;
|
|
for (;;) {
|
|
const char *next;
|
|
int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next);
|
|
switch (tok) {
|
|
case XML_TOK_PARAM_ENTITY_REF:
|
|
errorPtr = entityTextPtr;
|
|
return XML_ERROR_SYNTAX;
|
|
case XML_TOK_NONE:
|
|
if (declEntity) {
|
|
declEntity->textPtr = pool->start;
|
|
declEntity->textLen = pool->ptr - pool->start;
|
|
poolFinish(pool);
|
|
}
|
|
else
|
|
poolDiscard(pool);
|
|
return XML_ERROR_NONE;
|
|
case XML_TOK_ENTITY_REF:
|
|
case XML_TOK_DATA_CHARS:
|
|
if (!poolAppend(pool, encoding, entityTextPtr, next))
|
|
return XML_ERROR_NO_MEMORY;
|
|
break;
|
|
case XML_TOK_TRAILING_CR:
|
|
next = entityTextPtr + encoding->minBytesPerChar;
|
|
/* fall through */
|
|
case XML_TOK_DATA_NEWLINE:
|
|
if (pool->end == pool->ptr && !poolGrow(pool))
|
|
return XML_ERROR_NO_MEMORY;
|
|
*(pool->ptr)++ = '\n';
|
|
break;
|
|
case XML_TOK_CHAR_REF:
|
|
{
|
|
char buf[XML_MAX_BYTES_PER_CHAR];
|
|
int i;
|
|
int n = XmlCharRefNumber(encoding, entityTextPtr);
|
|
if (n < 0) {
|
|
errorPtr = entityTextPtr;
|
|
return XML_ERROR_BAD_CHAR_REF;
|
|
}
|
|
n = XmlEncode(utf8, n, buf);
|
|
if (!n) {
|
|
errorPtr = entityTextPtr;
|
|
return XML_ERROR_BAD_CHAR_REF;
|
|
}
|
|
for (i = 0; i < n; i++) {
|
|
if (pool->end == pool->ptr && !poolGrow(pool))
|
|
return XML_ERROR_NO_MEMORY;
|
|
*(pool->ptr)++ = buf[i];
|
|
}
|
|
}
|
|
break;
|
|
case XML_TOK_PARTIAL:
|
|
errorPtr = entityTextPtr;
|
|
return XML_ERROR_INVALID_TOKEN;
|
|
case XML_TOK_INVALID:
|
|
errorPtr = next;
|
|
return XML_ERROR_INVALID_TOKEN;
|
|
default:
|
|
abort();
|
|
}
|
|
entityTextPtr = next;
|
|
}
|
|
/* not reached */
|
|
}
|
|
|
|
static void
|
|
normalizeLines(char *s)
|
|
{
|
|
char *p;
|
|
s = strchr(s, '\r');
|
|
if (!s)
|
|
return;
|
|
p = s;
|
|
while (*s) {
|
|
if (*s == '\r') {
|
|
*p++ = '\n';
|
|
if (*++s == '\n')
|
|
s++;
|
|
}
|
|
else
|
|
*p++ = *s++;
|
|
}
|
|
*p = '\0';
|
|
}
|
|
|
|
static int
|
|
reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
|
|
{
|
|
const char *target;
|
|
char *data;
|
|
const char *tem;
|
|
if (!processingInstructionHandler)
|
|
return 1;
|
|
target = start + enc->minBytesPerChar * 2;
|
|
tem = target + XmlNameLength(enc, target);
|
|
target = poolStoreString(&tempPool, enc, target, tem);
|
|
if (!target)
|
|
return 0;
|
|
poolFinish(&tempPool);
|
|
data = poolStoreString(&tempPool, enc,
|
|
XmlSkipS(enc, tem),
|
|
end - enc->minBytesPerChar*2);
|
|
if (!data)
|
|
return 0;
|
|
normalizeLines(data);
|
|
processingInstructionHandler(userData, target, data);
|
|
poolClear(&tempPool);
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const char *value)
|
|
{
|
|
DEFAULT_ATTRIBUTE *att;
|
|
if (type->nDefaultAtts == type->allocDefaultAtts) {
|
|
if (type->allocDefaultAtts == 0)
|
|
type->allocDefaultAtts = 8;
|
|
else
|
|
type->allocDefaultAtts *= 2;
|
|
type->defaultAtts = realloc(type->defaultAtts,
|
|
type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
|
|
if (!type->defaultAtts)
|
|
return 0;
|
|
}
|
|
att = type->defaultAtts + type->nDefaultAtts;
|
|
att->id = attId;
|
|
att->value = value;
|
|
att->isCdata = isCdata;
|
|
if (!isCdata)
|
|
attId->maybeTokenized = 1;
|
|
type->nDefaultAtts += 1;
|
|
return 1;
|
|
}
|
|
|
|
static ATTRIBUTE_ID *
|
|
getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
|
|
{
|
|
ATTRIBUTE_ID *id;
|
|
const char *name;
|
|
if (!poolAppendByte(&dtd.pool, 0))
|
|
return 0;
|
|
name = poolStoreString(&dtd.pool, enc, start, end);
|
|
if (!name)
|
|
return 0;
|
|
++name;
|
|
id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
|
|
if (!id)
|
|
return 0;
|
|
if (id->name != name)
|
|
poolDiscard(&dtd.pool);
|
|
else
|
|
poolFinish(&dtd.pool);
|
|
return id;
|
|
}
|
|
|
|
static
|
|
const char *getOpenEntityNames(XML_Parser parser)
|
|
{
|
|
HASH_TABLE_ITER iter;
|
|
|
|
hashTableIterInit(&iter, &(dtd.generalEntities));
|
|
for (;;) {
|
|
const char *s;
|
|
ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
|
|
if (!e)
|
|
break;
|
|
if (!e->open)
|
|
continue;
|
|
if (poolLength(&tempPool) > 0 && !poolAppendByte(&tempPool, ' '))
|
|
return 0;
|
|
for (s = e->name; *s; s++)
|
|
if (!poolAppendByte(&tempPool, *s))
|
|
return 0;
|
|
}
|
|
|
|
if (!poolAppendByte(&tempPool, '\0'))
|
|
return 0;
|
|
return tempPool.start;
|
|
}
|
|
|
|
static
|
|
int setOpenEntityNames(XML_Parser parser, const char *openEntityNames)
|
|
{
|
|
const char *s = openEntityNames;
|
|
while (*openEntityNames != '\0') {
|
|
if (*s == ' ' || *s == '\0') {
|
|
ENTITY *e;
|
|
if (!poolAppendByte(&tempPool, '\0'))
|
|
return 0;
|
|
e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
|
|
if (e)
|
|
e->open = 1;
|
|
if (*s == ' ')
|
|
s++;
|
|
openEntityNames = s;
|
|
poolDiscard(&tempPool);
|
|
}
|
|
else {
|
|
if (!poolAppendByte(&tempPool, *s))
|
|
return 0;
|
|
s++;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
static
|
|
void normalizePublicId(char *publicId)
|
|
{
|
|
char *p = publicId;
|
|
char *s;
|
|
for (s = publicId; *s; s++) {
|
|
switch (*s) {
|
|
case ' ':
|
|
case '\r':
|
|
case '\n':
|
|
if (p != publicId && p[-1] != ' ')
|
|
*p++ = ' ';
|
|
break;
|
|
default:
|
|
*p++ = *s;
|
|
}
|
|
}
|
|
if (p != publicId && p[-1] == ' ')
|
|
--p;
|
|
*p = '\0';
|
|
}
|
|
|
|
static int dtdInit(DTD *p)
|
|
{
|
|
static const char *names[] = { "lt", "amp", "gt", "quot", "apos" };
|
|
static const char chars[] = { '<', '&', '>', '"', '\'' };
|
|
int i;
|
|
|
|
poolInit(&(p->pool));
|
|
hashTableInit(&(p->generalEntities));
|
|
for (i = 0; i < 5; i++) {
|
|
ENTITY *entity = (ENTITY *)lookup(&(p->generalEntities), names[i], sizeof(ENTITY));
|
|
if (!entity)
|
|
return 0;
|
|
entity->textPtr = chars + i;
|
|
entity->textLen = 1;
|
|
entity->magic = 1;
|
|
}
|
|
hashTableInit(&(p->elementTypes));
|
|
hashTableInit(&(p->attributeIds));
|
|
p->complete = 1;
|
|
p->base = 0;
|
|
return 1;
|
|
}
|
|
|
|
static void dtdDestroy(DTD *p)
|
|
{
|
|
HASH_TABLE_ITER iter;
|
|
hashTableIterInit(&iter, &(p->elementTypes));
|
|
for (;;) {
|
|
ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
|
|
if (!e)
|
|
break;
|
|
free(e->defaultAtts);
|
|
}
|
|
hashTableDestroy(&(p->generalEntities));
|
|
hashTableDestroy(&(p->elementTypes));
|
|
hashTableDestroy(&(p->attributeIds));
|
|
poolDestroy(&(p->pool));
|
|
}
|
|
|
|
/* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise.
|
|
The new DTD has already been initialized. */
|
|
|
|
static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
|
|
{
|
|
HASH_TABLE_ITER iter;
|
|
|
|
if (oldDtd->base) {
|
|
const char *tem = poolCopyString(&(newDtd->pool), oldDtd->base);
|
|
if (!tem)
|
|
return 0;
|
|
newDtd->base = tem;
|
|
}
|
|
|
|
hashTableIterInit(&iter, &(oldDtd->attributeIds));
|
|
|
|
/* Copy the attribute id table. */
|
|
|
|
for (;;) {
|
|
ATTRIBUTE_ID *newA;
|
|
const char *name;
|
|
const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
|
|
|
|
if (!oldA)
|
|
break;
|
|
/* Remember to allocate the scratch byte before the name. */
|
|
if (!poolAppendByte(&(newDtd->pool), 0))
|
|
return 0;
|
|
name = poolCopyString(&(newDtd->pool), oldA->name);
|
|
if (!name)
|
|
return 0;
|
|
++name;
|
|
newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
|
|
if (!newA)
|
|
return 0;
|
|
newA->maybeTokenized = oldA->maybeTokenized;
|
|
}
|
|
|
|
/* Copy the element type table. */
|
|
|
|
hashTableIterInit(&iter, &(oldDtd->elementTypes));
|
|
|
|
for (;;) {
|
|
int i;
|
|
ELEMENT_TYPE *newE;
|
|
const char *name;
|
|
const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
|
|
if (!oldE)
|
|
break;
|
|
name = poolCopyString(&(newDtd->pool), oldE->name);
|
|
if (!name)
|
|
return 0;
|
|
newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
|
|
if (!newE)
|
|
return 0;
|
|
newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
|
|
if (!newE->defaultAtts)
|
|
return 0;
|
|
newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
|
|
for (i = 0; i < newE->nDefaultAtts; i++) {
|
|
newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
|
|
newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
|
|
newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
|
|
if (!newE->defaultAtts[i].value)
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* Copy the entity table. */
|
|
|
|
hashTableIterInit(&iter, &(oldDtd->generalEntities));
|
|
|
|
for (;;) {
|
|
ENTITY *newE;
|
|
const char *name;
|
|
const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
|
|
if (!oldE)
|
|
break;
|
|
if (oldE->magic)
|
|
continue;
|
|
name = poolCopyString(&(newDtd->pool), oldE->name);
|
|
if (!name)
|
|
return 0;
|
|
newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY));
|
|
if (!newE)
|
|
return 0;
|
|
if (oldE->systemId) {
|
|
const char *tem = poolCopyString(&(newDtd->pool), oldE->systemId);
|
|
if (!tem)
|
|
return 0;
|
|
newE->systemId = tem;
|
|
if (oldE->base) {
|
|
if (oldE->base == oldDtd->base)
|
|
newE->base = newDtd->base;
|
|
tem = poolCopyString(&(newDtd->pool), oldE->base);
|
|
if (!tem)
|
|
return 0;
|
|
newE->base = tem;
|
|
}
|
|
}
|
|
else {
|
|
const char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen);
|
|
if (!tem)
|
|
return 0;
|
|
newE->textPtr = tem;
|
|
newE->textLen = oldE->textLen;
|
|
}
|
|
if (oldE->notation) {
|
|
const char *tem = poolCopyString(&(newDtd->pool), oldE->notation);
|
|
if (!tem)
|
|
return 0;
|
|
newE->notation = tem;
|
|
}
|
|
}
|
|
|
|
newDtd->complete = oldDtd->complete;
|
|
newDtd->standalone = oldDtd->standalone;
|
|
return 1;
|
|
}
|
|
|
|
static
|
|
void poolInit(STRING_POOL *pool)
|
|
{
|
|
pool->blocks = 0;
|
|
pool->freeBlocks = 0;
|
|
pool->start = 0;
|
|
pool->ptr = 0;
|
|
pool->end = 0;
|
|
}
|
|
|
|
static
|
|
void poolClear(STRING_POOL *pool)
|
|
{
|
|
if (!pool->freeBlocks)
|
|
pool->freeBlocks = pool->blocks;
|
|
else {
|
|
BLOCK *p = pool->blocks;
|
|
while (p) {
|
|
BLOCK *tem = p->next;
|
|
p->next = pool->freeBlocks;
|
|
pool->freeBlocks = p;
|
|
p = tem;
|
|
}
|
|
}
|
|
pool->blocks = 0;
|
|
pool->start = 0;
|
|
pool->ptr = 0;
|
|
pool->end = 0;
|
|
}
|
|
|
|
static
|
|
void poolDestroy(STRING_POOL *pool)
|
|
{
|
|
BLOCK *p = pool->blocks;
|
|
while (p) {
|
|
BLOCK *tem = p->next;
|
|
free(p);
|
|
p = tem;
|
|
}
|
|
pool->blocks = 0;
|
|
p = pool->freeBlocks;
|
|
while (p) {
|
|
BLOCK *tem = p->next;
|
|
free(p);
|
|
p = tem;
|
|
}
|
|
pool->freeBlocks = 0;
|
|
pool->ptr = 0;
|
|
pool->start = 0;
|
|
pool->end = 0;
|
|
}
|
|
|
|
static
|
|
char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
|
|
const char *ptr, const char *end)
|
|
{
|
|
if (!pool->ptr && !poolGrow(pool))
|
|
return 0;
|
|
for (;;) {
|
|
XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &(pool->ptr), pool->end);
|
|
if (ptr == end)
|
|
break;
|
|
if (!poolGrow(pool))
|
|
return 0;
|
|
}
|
|
return pool->start;
|
|
}
|
|
|
|
static const char *poolCopyString(STRING_POOL *pool, const char *s)
|
|
{
|
|
do {
|
|
if (!poolAppendByte(pool, *s))
|
|
return 0;
|
|
} while (*s++);
|
|
s = pool->start;
|
|
poolFinish(pool);
|
|
return s;
|
|
}
|
|
|
|
static const char *poolCopyStringN(STRING_POOL *pool, const char *s, int n)
|
|
{
|
|
if (!pool->ptr && !poolGrow(pool))
|
|
return 0;
|
|
for (; n > 0; --n, s++) {
|
|
if (!poolAppendByte(pool, *s))
|
|
return 0;
|
|
|
|
}
|
|
s = pool->start;
|
|
poolFinish(pool);
|
|
return s;
|
|
}
|
|
|
|
static
|
|
char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
|
|
const char *ptr, const char *end)
|
|
{
|
|
if (!poolAppend(pool, enc, ptr, end))
|
|
return 0;
|
|
if (pool->ptr == pool->end && !poolGrow(pool))
|
|
return 0;
|
|
*(pool->ptr)++ = 0;
|
|
return pool->start;
|
|
}
|
|
|
|
static
|
|
int poolGrow(STRING_POOL *pool)
|
|
{
|
|
if (pool->freeBlocks) {
|
|
if (pool->start == 0) {
|
|
pool->blocks = pool->freeBlocks;
|
|
pool->freeBlocks = pool->freeBlocks->next;
|
|
pool->blocks->next = 0;
|
|
pool->start = pool->blocks->s;
|
|
pool->end = pool->start + pool->blocks->size;
|
|
pool->ptr = pool->start;
|
|
return 1;
|
|
}
|
|
if (pool->end - pool->start < pool->freeBlocks->size) {
|
|
BLOCK *tem = pool->freeBlocks->next;
|
|
pool->freeBlocks->next = pool->blocks;
|
|
pool->blocks = pool->freeBlocks;
|
|
pool->freeBlocks = tem;
|
|
memcpy(pool->blocks->s, pool->start, pool->end - pool->start);
|
|
pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
|
|
pool->start = pool->blocks->s;
|
|
pool->end = pool->start + pool->blocks->size;
|
|
return 1;
|
|
}
|
|
}
|
|
if (pool->blocks && pool->start == pool->blocks->s) {
|
|
int blockSize = (pool->end - pool->start)*2;
|
|
pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize);
|
|
if (!pool->blocks)
|
|
return 0;
|
|
pool->blocks->size = blockSize;
|
|
pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
|
|
pool->start = pool->blocks->s;
|
|
pool->end = pool->start + blockSize;
|
|
}
|
|
else {
|
|
BLOCK *tem;
|
|
int blockSize = pool->end - pool->start;
|
|
if (blockSize < INIT_BLOCK_SIZE)
|
|
blockSize = INIT_BLOCK_SIZE;
|
|
else
|
|
blockSize *= 2;
|
|
tem = malloc(offsetof(BLOCK, s) + blockSize);
|
|
if (!tem)
|
|
return 0;
|
|
tem->size = blockSize;
|
|
tem->next = pool->blocks;
|
|
pool->blocks = tem;
|
|
memcpy(tem->s, pool->start, pool->ptr - pool->start);
|
|
pool->ptr = tem->s + (pool->ptr - pool->start);
|
|
pool->start = tem->s;
|
|
pool->end = tem->s + blockSize;
|
|
}
|
|
return 1;
|
|
}
|