libexpat/expat/xmlwf/wfcheck.c

954 lines
23 KiB
C
Raw Normal View History

1997-11-11 00:52:10 -05:00
#include <stdlib.h>
1997-11-12 05:38:58 -05:00
#include <string.h>
1997-11-11 00:52:10 -05:00
1997-12-10 02:44:19 -05:00
#include "wfcheck.h"
#include "hashtable.h"
1997-11-11 00:52:10 -05:00
#include "xmltok.h"
#include "xmlrole.h"
typedef struct {
const char *name;
1997-12-10 02:44:19 -05:00
const char *textPtr;
size_t textLen;
1997-12-10 19:00:08 -05:00
const char *docTextPtr;
1997-12-10 02:44:19 -05:00
const char *systemId;
const char *publicId;
const char *notation;
char open;
char wfInContent;
char wfInAttribute;
char magic;
} ENTITY;
1997-12-10 02:44:19 -05:00
#define INIT_BLOCK_SIZE 1024
typedef struct block {
struct block *next;
char s[1];
} BLOCK;
typedef struct {
BLOCK *blocks;
const char *end;
1997-12-10 02:44:19 -05:00
char *ptr;
char *start;
} STRING_POOL;
typedef struct {
HASH_TABLE generalEntities;
HASH_TABLE paramEntities;
1997-12-10 02:44:19 -05:00
STRING_POOL pool;
int containsRef;
int standalone;
char *groupConnector;
size_t groupSize;
} DTD;
1997-12-10 02:44:19 -05:00
typedef struct {
DTD dtd;
size_t stackSize;
const char **startName;
int attsSize;
ATTRIBUTE *atts;
} CONTEXT;
static void poolInit(STRING_POOL *);
static void poolDestroy(STRING_POOL *);
static const char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
const char *ptr, const char *end);
static const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
const char *ptr, const char *end);
static int poolGrow(STRING_POOL *);
static int dtdInit(DTD *);
static void dtdDestroy(DTD *);
static int contextInit(CONTEXT *);
static void contextDestroy(CONTEXT *);
#define poolStart(pool) ((pool)->start)
#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
#define poolFinish(pool) ((pool)->start = (pool)->ptr)
static enum WfCheckResult
checkProlog(DTD *, const char *s, const char *end, const char **, const ENCODING **enc);
static enum WfCheckResult
checkContent(size_t level, CONTEXT *context, const ENCODING *enc,
const char *s, const char *end, const char **badPtr);
static enum WfCheckResult
checkGeneralTextEntity(CONTEXT *context,
const char *s, const char *end,
const char **nextPtr,
const ENCODING **enc);
static enum WfCheckResult
checkAttributeValue(DTD *, const ENCODING *, const char *, const char *, const char **);
static enum WfCheckResult
1997-12-10 02:44:19 -05:00
checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts,
const char **badPtr);
1997-12-10 19:00:08 -05:00
static enum WfCheckResult
checkParsedEntities(CONTEXT *context, const char **badPtr);
1997-11-11 00:52:10 -05:00
1997-11-12 05:38:58 -05:00
static
1997-12-10 02:44:19 -05:00
enum WfCheckResult storeEntity(DTD *dtd,
const ENCODING *enc,
int isParam,
1997-12-10 02:44:19 -05:00
const char *entityNamePtr,
const char *entityNameEnd,
const char *entityTextPtr,
const char *entityTextEnd,
const char **badPtr);
1997-11-11 00:52:10 -05:00
enum WfCheckResult
1997-12-10 02:44:19 -05:00
wfCheck(enum EntityType entityType, const char *s, size_t n,
1997-11-12 05:38:58 -05:00
const char **badPtr, unsigned long *badLine, unsigned long *badCol)
1997-11-11 00:52:10 -05:00
{
1997-12-10 02:44:19 -05:00
CONTEXT context;
const ENCODING *enc;
1997-11-11 00:52:10 -05:00
const char *start = s;
const char *end = s + n;
1997-12-10 02:44:19 -05:00
const char *next = 0;
enum WfCheckResult result;
if (!contextInit(&context)) {
contextDestroy(&context);
1997-11-11 00:52:10 -05:00
return noMemory;
}
1997-12-10 02:44:19 -05:00
if (entityType == documentEntity) {
result = checkProlog(&context.dtd, s, end, &next, &enc);
s = next;
if (!result) {
1997-12-10 19:00:08 -05:00
result = checkParsedEntities(&context, &next);
1997-12-10 02:44:19 -05:00
s = next;
1997-12-10 19:00:08 -05:00
if (!result) {
result = checkContent(0, &context, enc, s, end, &next);
s = next;
}
1997-12-10 02:44:19 -05:00
}
}
else {
result = checkGeneralTextEntity(&context, s, end, &next, &enc);
s = next;
}
if (result && s) {
POSITION pos;
memset(&pos, 0, sizeof(POSITION));
XmlUpdatePosition(enc, start, s, &pos);
*badPtr = s;
*badLine = pos.lineNumber;
*badCol = pos.columnNumber;
}
contextDestroy(&context);
return result;
}
static
int contextInit(CONTEXT *p)
{
p->stackSize = 1024;
p->startName = malloc(p->stackSize * sizeof(char *));
p->attsSize = 1024;
p->atts = malloc(p->attsSize * sizeof(ATTRIBUTE));
return dtdInit(&(p->dtd)) && p->atts && p->startName;
}
static
void contextDestroy(CONTEXT *p)
{
dtdDestroy(&(p->dtd));
free((void *)p->startName);
free((void *)p->atts);
}
static enum WfCheckResult
checkContent(size_t level, CONTEXT *context, const ENCODING *enc,
const char *s, const char *end, const char **badPtr)
{
size_t startLevel = level;
const char *next;
int tok = XmlContentTok(enc, s, end, &next);
1997-11-11 00:52:10 -05:00
for (;;) {
switch (tok) {
1997-12-10 02:44:19 -05:00
case XML_TOK_TRAILING_CR:
1997-11-11 00:52:10 -05:00
case XML_TOK_NONE:
1997-12-10 02:44:19 -05:00
if (startLevel > 0) {
if (level != startLevel) {
*badPtr = s;
return asyncEntity;
}
return wellFormed;
}
*badPtr = s;
return noElements;
1997-11-11 00:52:10 -05:00
case XML_TOK_INVALID:
1997-12-10 02:44:19 -05:00
*badPtr = next;
return invalidToken;
1997-11-11 00:52:10 -05:00
case XML_TOK_PARTIAL:
1997-12-10 02:44:19 -05:00
*badPtr = s;
return unclosedToken;
1997-11-11 00:52:10 -05:00
case XML_TOK_PARTIAL_CHAR:
1997-12-10 02:44:19 -05:00
*badPtr = s;
return partialChar;
1997-11-11 00:52:10 -05:00
case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
break;
1997-12-10 02:44:19 -05:00
case XML_TOK_ENTITY_REF:
{
const char *name = poolStoreString(&context->dtd.pool, enc,
s + enc->minBytesPerChar,
next - enc->minBytesPerChar);
ENTITY *entity = (ENTITY *)lookup(&context->dtd.generalEntities, name, 0);
poolDiscard(&context->dtd.pool);
if (!entity) {
if (!context->dtd.containsRef || context->dtd.standalone) {
*badPtr = s;
return undefinedEntity;
}
break;
}
if (entity->wfInContent)
break;
if (entity->open) {
*badPtr = s;
return recursiveEntityRef;
}
if (entity->notation) {
*badPtr = s;
return binaryEntityRef;
}
if (entity) {
if (entity->textPtr) {
enum WfCheckResult result;
const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
entity->open = 1;
result = checkContent(level, context, internalEnc,
entity->textPtr, entity->textPtr + entity->textLen,
badPtr);
entity->open = 0;
if (result && *badPtr) {
*badPtr = s;
return result;
}
entity->wfInContent = 1;
}
}
break;
}
1997-11-11 00:52:10 -05:00
case XML_TOK_START_TAG_NO_ATTS:
1997-12-10 02:44:19 -05:00
if (level == context->stackSize) {
context->startName
= realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *));
if (!context->startName)
1997-11-11 00:52:10 -05:00
return noMemory;
}
1997-12-10 02:44:19 -05:00
context->startName[level++] = s + enc->minBytesPerChar;
1997-11-11 00:52:10 -05:00
break;
case XML_TOK_START_TAG_WITH_ATTS:
1997-12-10 02:44:19 -05:00
if (level == context->stackSize) {
context->startName = realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *));
if (!context->startName)
1997-11-11 00:52:10 -05:00
return noMemory;
}
1997-12-10 02:44:19 -05:00
context->startName[level++] = s + enc->minBytesPerChar;
1997-11-11 00:52:10 -05:00
/* fall through */
case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
{
int i;
1997-12-10 02:44:19 -05:00
int n = XmlGetAttributes(enc, s, context->attsSize, context->atts);
if (n > context->attsSize) {
context->attsSize = 2*n;
context->atts = realloc((void *)context->atts, context->attsSize * sizeof(ATTRIBUTE));
if (!context->atts)
1997-11-11 00:52:10 -05:00
return noMemory;
1997-12-10 02:44:19 -05:00
XmlGetAttributes(enc, s, n, context->atts);
}
for (i = 0; i < n; i++) {
1998-02-02 08:48:35 -05:00
if (!context->atts[i].normalized) {
1997-12-10 02:44:19 -05:00
enum WfCheckResult result
= checkAttributeValue(&context->dtd, enc,
context->atts[i].valuePtr,
context->atts[i].valueEnd,
badPtr);
if (result)
return result;
1997-11-11 00:52:10 -05:00
}
1997-12-10 02:44:19 -05:00
}
if (i > 1) {
enum WfCheckResult result = checkAttributeUniqueness(context, enc, n, badPtr);
if (result)
return result;
1997-11-11 00:52:10 -05:00
}
}
break;
case XML_TOK_END_TAG:
1997-12-10 02:44:19 -05:00
if (level == startLevel) {
*badPtr = s;
return asyncEntity;
}
1997-11-11 00:52:10 -05:00
--level;
1997-12-10 02:44:19 -05:00
if (!XmlSameName(enc, context->startName[level], s + enc->minBytesPerChar * 2)) {
*badPtr = s;
return tagMismatch;
}
break;
case XML_TOK_CHAR_REF:
if (XmlCharRefNumber(enc, s) < 0) {
*badPtr = s;
return badCharRef;
}
break;
1997-12-11 19:48:27 -05:00
case XML_TOK_XML_DECL:
*badPtr = s;
return misplacedXmlPi;
1997-11-11 00:52:10 -05:00
}
s = next;
if (level == 0) {
do {
tok = XmlPrologTok(enc, s, end, &next);
switch (tok) {
1997-12-10 02:44:19 -05:00
case XML_TOK_TRAILING_CR:
1997-11-11 00:52:10 -05:00
case XML_TOK_NONE:
1997-12-10 02:44:19 -05:00
return wellFormed;
1997-11-11 00:52:10 -05:00
case XML_TOK_PROLOG_S:
case XML_TOK_COMMENT:
case XML_TOK_PI:
s = next;
break;
default:
if (tok > 0) {
1997-12-10 02:44:19 -05:00
*badPtr = s;
return junkAfterDocElement;
1997-11-11 00:52:10 -05:00
}
break;
}
} while (tok > 0);
}
else
tok = XmlContentTok(enc, s, end, &next);
}
/* not reached */
}
static
1997-12-10 02:44:19 -05:00
int attcmp(const void *p1, const void *p2)
{
const ATTRIBUTE *a1 = p1;
const ATTRIBUTE *a2 = p2;
size_t n1 = a1->valuePtr - a1->name;
size_t n2 = a2->valuePtr - a2->name;
if (n1 == n2) {
int n = memcmp(a1->name, a2->name, n1);
if (n)
return n;
/* Sort identical attribute names by position, so that we always
report the first duplicate attribute. */
if (a1->name < a2->name)
return -1;
else if (a1->name > a2->name)
return 1;
else
return 0;
}
else if (n1 < n2)
return -1;
else
return 1;
}
/* Note that this trashes the attribute values. */
static enum WfCheckResult
checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts,
const char **badPtr)
1997-11-11 00:52:10 -05:00
{
1997-12-10 02:44:19 -05:00
#define QSORT_MIN_ATTS 10
if (nAtts < QSORT_MIN_ATTS) {
int i;
for (i = 1; i < nAtts; i++) {
int j;
for (j = 0; j < i; j++) {
if (XmlSameName(enc, context->atts[i].name, context->atts[j].name)) {
*badPtr = context->atts[i].name;
return duplicateAttribute;
}
}
}
}
else {
int i;
const char *dup = 0;
/* Store the end of the name in valuePtr */
for (i = 0; i < nAtts; i++) {
ATTRIBUTE *a = context->atts + i;
a->valuePtr = a->name + XmlNameLength(enc, a->name);
}
qsort(context->atts, nAtts, sizeof(ATTRIBUTE), attcmp);
for (i = 1; i < nAtts; i++) {
ATTRIBUTE *a = context->atts + i;
if (XmlSameName(enc, a->name, a[-1].name)) {
if (!dup || a->name < dup)
dup = a->name;
}
}
if (dup) {
*badPtr = dup;
return duplicateAttribute;
}
}
return wellFormed;
}
static enum WfCheckResult
checkProlog(DTD *dtd, const char *s, const char *end,
const char **nextPtr, const ENCODING **enc)
{
const char *entityNamePtr, *entityNameEnd;
int entityIsParam;
PROLOG_STATE state;
1997-12-10 02:44:19 -05:00
ENTITY *entity;
1997-11-11 00:52:10 -05:00
INIT_ENCODING initEnc;
XmlInitEncoding(&initEnc, enc);
XmlPrologStateInit(&state);
1997-11-11 00:52:10 -05:00
for (;;) {
1997-12-10 02:44:19 -05:00
const char *next;
int tok = XmlPrologTok(*enc, s, end, &next);
1997-12-11 19:48:27 -05:00
switch (XmlTokenRole(&state, tok, s, next, *enc)) {
case XML_ROLE_XML_DECL:
{
const char *encodingName = 0;
const ENCODING *encoding = 0;
const char *version;
int standalone = -1;
if (!XmlParseXmlDecl(0,
*enc,
s,
next,
nextPtr,
&version,
&encodingName,
&encoding,
&standalone))
1997-12-10 02:44:19 -05:00
return syntaxError;
1997-12-11 19:48:27 -05:00
if (encoding) {
if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
*nextPtr = encodingName;
return incorrectEncoding;
1997-12-10 02:44:19 -05:00
}
1997-12-11 19:48:27 -05:00
*enc = encoding;
1997-12-10 02:44:19 -05:00
}
1997-12-11 19:48:27 -05:00
else if (encodingName) {
*nextPtr = encodingName;
return unknownEncoding;
1997-12-10 02:44:19 -05:00
}
1997-12-11 19:48:27 -05:00
if (standalone == 1)
dtd->standalone = 1;
1997-12-10 02:44:19 -05:00
break;
1997-12-11 19:48:27 -05:00
}
case XML_ROLE_DOCTYPE_SYSTEM_ID:
dtd->containsRef = 1;
break;
case XML_ROLE_DOCTYPE_PUBLIC_ID:
case XML_ROLE_ENTITY_PUBLIC_ID:
case XML_ROLE_NOTATION_PUBLIC_ID:
if (!XmlIsPublicId(*enc, s, next, nextPtr))
return syntaxError;
break;
case XML_ROLE_INSTANCE_START:
*nextPtr = s;
return wellFormed;
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
{
const char *tem = 0;
enum WfCheckResult result
= checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar,
next - (*enc)->minBytesPerChar,
&tem);
if (result) {
if (tem)
*nextPtr = tem;
return result;
1997-12-10 02:44:19 -05:00
}
break;
1997-12-11 19:48:27 -05:00
}
case XML_ROLE_ENTITY_VALUE:
{
enum WfCheckResult result
= storeEntity(dtd,
*enc,
entityIsParam,
1997-12-11 19:48:27 -05:00
entityNamePtr,
entityNameEnd,
s,
next,
nextPtr);
if (result != wellFormed)
return result;
}
break;
1997-12-11 19:48:27 -05:00
case XML_ROLE_ENTITY_SYSTEM_ID:
{
1997-12-11 19:48:27 -05:00
const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd);
entity = (ENTITY *)lookup(entityIsParam ? &dtd->paramEntities : &dtd->generalEntities,
name, sizeof(ENTITY));
1997-12-11 19:48:27 -05:00
if (entity->name != name) {
poolDiscard(&dtd->pool);
entity = 0;
}
else {
poolFinish(&dtd->pool);
entity->systemId = poolStoreString(&dtd->pool, *enc,
s + (*enc)->minBytesPerChar,
next - (*enc)->minBytesPerChar);
1997-12-10 02:44:19 -05:00
poolFinish(&dtd->pool);
}
1997-12-11 19:48:27 -05:00
}
break;
case XML_ROLE_PARAM_ENTITY_REF:
{
const char *name = poolStoreString(&dtd->pool, *enc,
s + (*enc)->minBytesPerChar,
next - (*enc)->minBytesPerChar);
ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0);
poolDiscard(&dtd->pool);
if (!entity) {
if (!dtd->containsRef || dtd->standalone) {
*nextPtr = s;
return undefinedEntity;
}
}
}
break;
1997-12-11 19:48:27 -05:00
case XML_ROLE_ENTITY_NOTATION_NAME:
if (entity) {
entity->notation = poolStoreString(&dtd->pool, *enc, s, next);
poolFinish(&dtd->pool);
}
break;
case XML_ROLE_GENERAL_ENTITY_NAME:
entityNamePtr = s;
entityNameEnd = next;
entityIsParam = 0;
1997-12-11 19:48:27 -05:00
break;
case XML_ROLE_PARAM_ENTITY_NAME:
entityNamePtr = s;
entityNameEnd = next;
entityIsParam = 1;
1997-12-11 19:48:27 -05:00
break;
case XML_ROLE_ERROR:
*nextPtr = s;
switch (tok) {
case XML_TOK_PARAM_ENTITY_REF:
return paramEntityRef;
case XML_TOK_INVALID:
*nextPtr = next;
return invalidToken;
case XML_TOK_NONE:
return noElements;
case XML_TOK_PARTIAL:
return unclosedToken;
case XML_TOK_PARTIAL_CHAR:
return partialChar;
case XML_TOK_TRAILING_CR:
*nextPtr = s + (*enc)->minBytesPerChar;
return noElements;
case XML_TOK_XML_DECL:
return misplacedXmlPi;
default:
return syntaxError;
}
case XML_ROLE_GROUP_OPEN:
if (state.level >= dtd->groupSize) {
if (dtd->groupSize)
dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2);
else
dtd->groupConnector = malloc(dtd->groupSize = 32);
if (!dtd->groupConnector)
return noMemory;
}
dtd->groupConnector[state.level] = 0;
break;
case XML_ROLE_GROUP_SEQUENCE:
if (dtd->groupConnector[state.level] == '|') {
1997-12-10 02:44:19 -05:00
*nextPtr = s;
1997-12-11 19:48:27 -05:00
return syntaxError;
}
dtd->groupConnector[state.level] = ',';
break;
case XML_ROLE_GROUP_CHOICE:
if (dtd->groupConnector[state.level] == ',') {
*nextPtr = s;
return syntaxError;
1997-12-10 02:44:19 -05:00
}
1997-12-11 19:48:27 -05:00
dtd->groupConnector[state.level] = '|';
break;
case XML_ROLE_NONE:
if (tok == XML_TOK_PARAM_ENTITY_REF)
dtd->containsRef = 1;
break;
1997-12-10 02:44:19 -05:00
}
s = next;
}
/* not reached */
}
1997-12-10 19:00:08 -05:00
static enum WfCheckResult
checkParsedEntities(CONTEXT *context, const char **badPtr)
{
HASH_TABLE_ITER iter;
hashTableIterInit(&iter, &context->dtd.generalEntities);
for (;;) {
ENTITY *entity = (ENTITY *)hashTableIterNext(&iter);
if (!entity)
break;
if (entity->textPtr && !entity->wfInContent && !entity->magic) {
enum WfCheckResult result;
const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
entity->open = 1;
result = checkContent(1, context, internalEnc,
entity->textPtr, entity->textPtr + entity->textLen,
badPtr);
entity->open = 0;
if (result && *badPtr) {
*badPtr = entity->docTextPtr;
return result;
}
entity->wfInContent = 1;
}
}
return wellFormed;
}
1997-12-10 02:44:19 -05:00
static enum WfCheckResult
checkGeneralTextEntity(CONTEXT *context,
const char *s, const char *end,
const char **nextPtr,
const ENCODING **enc)
{
INIT_ENCODING initEnc;
const char *next;
int tok;
XmlInitEncoding(&initEnc, enc);
tok = XmlContentTok(*enc, s, end, &next);
if (tok == XML_TOK_BOM) {
s = next;
tok = XmlContentTok(*enc, s, end, &next);
}
1997-12-11 19:48:27 -05:00
if (tok == XML_TOK_XML_DECL) {
1997-12-10 02:44:19 -05:00
const char *encodingName = 0;
const ENCODING *encoding = 0;
const char *version;
if (!XmlParseXmlDecl(1,
*enc,
s,
next,
nextPtr,
&version,
&encodingName,
&encoding,
0))
return syntaxError;
if (encoding) {
if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
*nextPtr = encodingName;
return incorrectEncoding;
}
*enc = encoding;
}
else if (encodingName) {
*nextPtr = encodingName;
return unknownEncoding;
}
s = next;
}
context->dtd.containsRef = 1;
return checkContent(1, context, *enc, s, end, nextPtr);
}
static enum WfCheckResult
checkAttributeValue(DTD *dtd, const ENCODING *enc,
const char *ptr, const char *end, const char **badPtr)
{
for (;;) {
const char *next;
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
1997-11-11 00:52:10 -05:00
switch (tok) {
1997-12-10 02:44:19 -05:00
case XML_TOK_TRAILING_CR:
1997-11-11 00:52:10 -05:00
case XML_TOK_NONE:
return wellFormed;
1997-12-10 02:44:19 -05:00
case XML_TOK_INVALID:
*badPtr = next;
return invalidToken;
case XML_TOK_PARTIAL:
*badPtr = ptr;
return invalidToken;
case XML_TOK_CHAR_REF:
if (XmlCharRefNumber(enc, ptr) < 0) {
*badPtr = ptr;
return badCharRef;
}
1997-11-13 04:05:46 -05:00
break;
1997-12-10 02:44:19 -05:00
case XML_TOK_DATA_CHARS:
case XML_TOK_DATA_NEWLINE:
break;
case XML_TOK_ENTITY_REF:
{
const char *name = poolStoreString(&dtd->pool, enc,
ptr + enc->minBytesPerChar,
next - enc->minBytesPerChar);
ENTITY *entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0);
poolDiscard(&dtd->pool);
if (!entity) {
if (!dtd->containsRef) {
*badPtr = ptr;
return undefinedEntity;
}
break;
}
if (entity->wfInAttribute)
break;
if (entity->open) {
*badPtr = ptr;
return recursiveEntityRef;
}
if (entity->notation) {
*badPtr = ptr;
return binaryEntityRef;
}
if (entity) {
if (entity->textPtr) {
enum WfCheckResult result;
const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
const char *textEnd = entity->textPtr + entity->textLen;
entity->open = 1;
result = checkAttributeValue(dtd, internalEnc, entity->textPtr, textEnd, badPtr);
entity->open = 0;
if (result && *badPtr) {
*badPtr = ptr;
return result;
}
entity->wfInAttribute = 1;
}
else {
*badPtr = ptr;
return attributeExternalEntityRef;
}
}
break;
1997-11-13 04:05:46 -05:00
}
1997-11-11 00:52:10 -05:00
break;
1997-12-10 02:44:19 -05:00
default:
abort();
1997-11-11 00:52:10 -05:00
}
1997-12-10 02:44:19 -05:00
ptr = next;
1997-11-11 00:52:10 -05:00
}
/* not reached */
}
1997-11-12 05:38:58 -05:00
static
1997-12-10 02:44:19 -05:00
void poolInit(STRING_POOL *pool)
{
pool->blocks = 0;
pool->start = 0;
pool->ptr = 0;
pool->end = 0;
}
static
void poolDestroy(STRING_POOL *pool)
{
BLOCK *p = pool->blocks;
while (p) {
BLOCK *tem = p->next;
free(p);
p = tem;
}
pool->blocks = 0;
pool->ptr = 0;
pool->start = 0;
pool->end = 0;
}
static
const char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
const char *ptr, const char *end)
{
for (;;) {
XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &(pool->ptr), pool->end);
if (ptr == end)
break;
if (!poolGrow(pool))
return 0;
}
return pool->start;
}
static
const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
const char *ptr, const char *end)
{
if (!poolAppend(pool, enc, ptr, end))
return 0;
if (pool->ptr == pool->end && !poolGrow(pool))
return 0;
*(pool->ptr)++ = 0;
return pool->start;
}
static
int poolGrow(STRING_POOL *pool)
{
if (pool->blocks && pool->start == pool->blocks->s) {
size_t blockSize = (pool->end - pool->start)*2;
pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize);
if (!pool->blocks)
return 0;
pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
pool->start = pool->blocks->s;
pool->end = pool->start + blockSize;
}
else {
BLOCK *tem;
size_t blockSize = pool->end - pool->start;
if (blockSize < INIT_BLOCK_SIZE)
blockSize = INIT_BLOCK_SIZE;
else
blockSize *= 2;
tem = malloc(offsetof(BLOCK, s) + blockSize);
if (!tem)
return 0;
tem->next = pool->blocks;
pool->blocks = tem;
memcpy(tem->s, pool->start, pool->ptr - pool->start);
pool->ptr = tem->s + (pool->ptr - pool->start);
pool->start = tem->s;
pool->end = tem->s + blockSize;
}
return 1;
}
static int dtdInit(DTD *dtd)
{
static const char *names[] = { "lt", "amp", "gt", "quot", "apos" };
static const char chars[] = { '<', '&', '>', '"', '\'' };
int i;
poolInit(&(dtd->pool));
hashTableInit(&(dtd->generalEntities));
for (i = 0; i < 5; i++) {
ENTITY *entity = (ENTITY *)lookup(&(dtd->generalEntities), names[i], sizeof(ENTITY));
if (!entity)
return 0;
entity->textPtr = chars + i;
entity->textLen = 1;
entity->magic = 1;
entity->wfInContent = 1;
entity->wfInAttribute = 1;
}
hashTableInit(&(dtd->paramEntities));
1997-12-10 02:44:19 -05:00
dtd->containsRef = 0;
dtd->groupSize = 0;
dtd->groupConnector = 0;
return 1;
}
static void dtdDestroy(DTD *dtd)
{
poolDestroy(&(dtd->pool));
hashTableDestroy(&(dtd->generalEntities));
hashTableDestroy(&(dtd->paramEntities));
1997-12-10 02:44:19 -05:00
free(dtd->groupConnector);
}
static
enum WfCheckResult storeEntity(DTD *dtd,
const ENCODING *enc,
int isParam,
1997-12-10 02:44:19 -05:00
const char *entityNamePtr,
const char *entityNameEnd,
const char *entityTextPtr,
const char *entityTextEnd,
const char **badPtr)
{
ENTITY *entity;
const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
STRING_POOL *pool = &(dtd->pool);
if (!poolStoreString(pool, enc, entityNamePtr, entityNameEnd))
return noMemory;
entity = (ENTITY *)lookup(isParam ? &(dtd->paramEntities) : &(dtd->generalEntities),
pool->start,
sizeof(ENTITY));
if (entity->name != pool->start) {
poolDiscard(pool);
entityNamePtr = 0;
1997-12-10 02:44:19 -05:00
}
else
poolFinish(pool);
1997-12-10 02:44:19 -05:00
entityTextPtr += enc->minBytesPerChar;
entityTextEnd -= enc->minBytesPerChar;
entity->docTextPtr = entityTextPtr;
1997-12-10 02:44:19 -05:00
for (;;) {
const char *next;
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
switch (tok) {
case XML_TOK_PARAM_ENTITY_REF:
*badPtr = entityTextPtr;
return syntaxError;
case XML_TOK_NONE:
if (entityNamePtr) {
entity->textPtr = pool->start;
entity->textLen = pool->ptr - pool->start;
poolFinish(pool);
}
else
poolDiscard(pool);
return wellFormed;
case XML_TOK_ENTITY_REF:
case XML_TOK_DATA_CHARS:
if (!poolAppend(pool, enc, entityTextPtr, next))
return noMemory;
break;
case XML_TOK_TRAILING_CR:
next = entityTextPtr + enc->minBytesPerChar;
/* fall through */
case XML_TOK_DATA_NEWLINE:
if (pool->end == pool->ptr && !poolGrow(pool))
return noMemory;
*(pool->ptr)++ = '\n';
break;
case XML_TOK_CHAR_REF:
{
char buf[XML_MAX_BYTES_PER_CHAR];
int i;
int n = XmlCharRefNumber(enc, entityTextPtr);
if (n < 0) {
*badPtr = entityTextPtr;
return badCharRef;
}
n = XmlEncode(utf8, n, buf);
if (!n) {
*badPtr = entityTextPtr;
return badCharRef;
}
for (i = 0; i < n; i++) {
if (pool->end == pool->ptr && !poolGrow(pool))
return noMemory;
*(pool->ptr)++ = buf[i];
}
}
break;
case XML_TOK_PARTIAL:
*badPtr = entityTextPtr;
return invalidToken;
case XML_TOK_INVALID:
*badPtr = next;
return invalidToken;
default:
abort();
}
entityTextPtr = next;
}
/* not reached */
1997-11-12 05:38:58 -05:00
}