1997-11-11 00:52:10 -05:00
|
|
|
#include <stdlib.h>
|
1997-11-12 05:38:58 -05:00
|
|
|
#include <string.h>
|
1997-11-11 00:52:10 -05:00
|
|
|
|
1997-12-10 02:44:19 -05:00
|
|
|
#include "wfcheck.h"
|
|
|
|
#include "hashtable.h"
|
1997-11-11 00:52:10 -05:00
|
|
|
|
|
|
|
#include "xmltok.h"
|
1997-11-14 21:44:29 -05:00
|
|
|
#include "xmlrole.h"
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
const char *name;
|
1997-12-10 02:44:19 -05:00
|
|
|
const char *textPtr;
|
|
|
|
size_t textLen;
|
1997-12-10 19:00:08 -05:00
|
|
|
const char *docTextPtr;
|
1997-12-10 02:44:19 -05:00
|
|
|
const char *systemId;
|
|
|
|
const char *publicId;
|
|
|
|
const char *notation;
|
|
|
|
char open;
|
|
|
|
char wfInContent;
|
|
|
|
char wfInAttribute;
|
|
|
|
char magic;
|
|
|
|
} ENTITY;
|
1997-11-14 21:44:29 -05:00
|
|
|
|
1997-12-10 02:44:19 -05:00
|
|
|
#define INIT_BLOCK_SIZE 1024
|
1997-11-14 21:44:29 -05:00
|
|
|
|
|
|
|
typedef struct block {
|
|
|
|
struct block *next;
|
|
|
|
char s[1];
|
|
|
|
} BLOCK;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
BLOCK *blocks;
|
|
|
|
const char *end;
|
1997-12-10 02:44:19 -05:00
|
|
|
char *ptr;
|
|
|
|
char *start;
|
1997-11-14 21:44:29 -05:00
|
|
|
} STRING_POOL;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
HASH_TABLE generalEntities;
|
1997-12-10 02:44:19 -05:00
|
|
|
STRING_POOL pool;
|
|
|
|
int containsRef;
|
|
|
|
int standalone;
|
|
|
|
char *groupConnector;
|
|
|
|
size_t groupSize;
|
1997-11-14 21:44:29 -05:00
|
|
|
} DTD;
|
|
|
|
|
1997-12-10 02:44:19 -05:00
|
|
|
typedef struct {
|
|
|
|
DTD dtd;
|
|
|
|
size_t stackSize;
|
|
|
|
const char **startName;
|
|
|
|
int attsSize;
|
|
|
|
ATTRIBUTE *atts;
|
|
|
|
} CONTEXT;
|
|
|
|
|
|
|
|
static void poolInit(STRING_POOL *);
|
|
|
|
static void poolDestroy(STRING_POOL *);
|
|
|
|
static const char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
|
|
|
|
const char *ptr, const char *end);
|
|
|
|
static const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
|
|
|
|
const char *ptr, const char *end);
|
|
|
|
static int poolGrow(STRING_POOL *);
|
|
|
|
static int dtdInit(DTD *);
|
|
|
|
static void dtdDestroy(DTD *);
|
|
|
|
static int contextInit(CONTEXT *);
|
|
|
|
static void contextDestroy(CONTEXT *);
|
|
|
|
|
|
|
|
#define poolStart(pool) ((pool)->start)
|
|
|
|
#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
|
|
|
|
#define poolFinish(pool) ((pool)->start = (pool)->ptr)
|
|
|
|
|
|
|
|
static enum WfCheckResult
|
|
|
|
checkProlog(DTD *, const char *s, const char *end, const char **, const ENCODING **enc);
|
|
|
|
static enum WfCheckResult
|
|
|
|
checkContent(size_t level, CONTEXT *context, const ENCODING *enc,
|
|
|
|
const char *s, const char *end, const char **badPtr);
|
|
|
|
static enum WfCheckResult
|
|
|
|
checkGeneralTextEntity(CONTEXT *context,
|
|
|
|
const char *s, const char *end,
|
|
|
|
const char **nextPtr,
|
|
|
|
const ENCODING **enc);
|
|
|
|
static enum WfCheckResult
|
|
|
|
checkAttributeValue(DTD *, const ENCODING *, const char *, const char *, const char **);
|
1997-11-14 21:44:29 -05:00
|
|
|
static enum WfCheckResult
|
1997-12-10 02:44:19 -05:00
|
|
|
checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts,
|
|
|
|
const char **badPtr);
|
1997-12-10 19:00:08 -05:00
|
|
|
static enum WfCheckResult
|
|
|
|
checkParsedEntities(CONTEXT *context, const char **badPtr);
|
1997-11-11 00:52:10 -05:00
|
|
|
|
1997-11-12 05:38:58 -05:00
|
|
|
static
|
1997-12-10 02:44:19 -05:00
|
|
|
enum WfCheckResult storeEntity(DTD *dtd,
|
|
|
|
const ENCODING *enc,
|
|
|
|
const char *entityNamePtr,
|
|
|
|
const char *entityNameEnd,
|
|
|
|
const char *entityTextPtr,
|
|
|
|
const char *entityTextEnd,
|
|
|
|
const char **badPtr);
|
|
|
|
|
1997-11-11 00:52:10 -05:00
|
|
|
|
|
|
|
enum WfCheckResult
|
1997-12-10 02:44:19 -05:00
|
|
|
wfCheck(enum EntityType entityType, const char *s, size_t n,
|
1997-11-12 05:38:58 -05:00
|
|
|
const char **badPtr, unsigned long *badLine, unsigned long *badCol)
|
1997-11-11 00:52:10 -05:00
|
|
|
{
|
1997-12-10 02:44:19 -05:00
|
|
|
CONTEXT context;
|
|
|
|
const ENCODING *enc;
|
1997-11-11 00:52:10 -05:00
|
|
|
const char *start = s;
|
|
|
|
const char *end = s + n;
|
1997-12-10 02:44:19 -05:00
|
|
|
const char *next = 0;
|
|
|
|
enum WfCheckResult result;
|
|
|
|
|
|
|
|
if (!contextInit(&context)) {
|
|
|
|
contextDestroy(&context);
|
1997-11-11 00:52:10 -05:00
|
|
|
return noMemory;
|
1997-11-14 21:44:29 -05:00
|
|
|
}
|
1997-12-10 02:44:19 -05:00
|
|
|
if (entityType == documentEntity) {
|
|
|
|
result = checkProlog(&context.dtd, s, end, &next, &enc);
|
|
|
|
s = next;
|
|
|
|
if (!result) {
|
1997-12-10 19:00:08 -05:00
|
|
|
result = checkParsedEntities(&context, &next);
|
1997-12-10 02:44:19 -05:00
|
|
|
s = next;
|
1997-12-10 19:00:08 -05:00
|
|
|
if (!result) {
|
|
|
|
result = checkContent(0, &context, enc, s, end, &next);
|
|
|
|
s = next;
|
|
|
|
}
|
1997-12-10 02:44:19 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
result = checkGeneralTextEntity(&context, s, end, &next, &enc);
|
|
|
|
s = next;
|
|
|
|
}
|
|
|
|
if (result && s) {
|
|
|
|
POSITION pos;
|
|
|
|
memset(&pos, 0, sizeof(POSITION));
|
|
|
|
XmlUpdatePosition(enc, start, s, &pos);
|
|
|
|
*badPtr = s;
|
|
|
|
*badLine = pos.lineNumber;
|
|
|
|
*badCol = pos.columnNumber;
|
|
|
|
}
|
|
|
|
contextDestroy(&context);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
int contextInit(CONTEXT *p)
|
|
|
|
{
|
|
|
|
p->stackSize = 1024;
|
|
|
|
p->startName = malloc(p->stackSize * sizeof(char *));
|
|
|
|
p->attsSize = 1024;
|
|
|
|
p->atts = malloc(p->attsSize * sizeof(ATTRIBUTE));
|
|
|
|
return dtdInit(&(p->dtd)) && p->atts && p->startName;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
void contextDestroy(CONTEXT *p)
|
|
|
|
{
|
|
|
|
dtdDestroy(&(p->dtd));
|
|
|
|
free((void *)p->startName);
|
|
|
|
free((void *)p->atts);
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum WfCheckResult
|
|
|
|
checkContent(size_t level, CONTEXT *context, const ENCODING *enc,
|
|
|
|
const char *s, const char *end, const char **badPtr)
|
|
|
|
{
|
|
|
|
size_t startLevel = level;
|
|
|
|
const char *next;
|
|
|
|
int tok = XmlContentTok(enc, s, end, &next);
|
1997-11-11 00:52:10 -05:00
|
|
|
for (;;) {
|
|
|
|
switch (tok) {
|
1997-12-10 02:44:19 -05:00
|
|
|
case XML_TOK_TRAILING_CR:
|
1997-11-11 00:52:10 -05:00
|
|
|
case XML_TOK_NONE:
|
1997-12-10 02:44:19 -05:00
|
|
|
if (startLevel > 0) {
|
|
|
|
if (level != startLevel) {
|
|
|
|
*badPtr = s;
|
|
|
|
return asyncEntity;
|
|
|
|
}
|
|
|
|
return wellFormed;
|
|
|
|
}
|
|
|
|
*badPtr = s;
|
|
|
|
return noElements;
|
1997-11-11 00:52:10 -05:00
|
|
|
case XML_TOK_INVALID:
|
1997-12-10 02:44:19 -05:00
|
|
|
*badPtr = next;
|
|
|
|
return invalidToken;
|
1997-11-11 00:52:10 -05:00
|
|
|
case XML_TOK_PARTIAL:
|
1997-12-10 02:44:19 -05:00
|
|
|
*badPtr = s;
|
|
|
|
return unclosedToken;
|
1997-11-11 00:52:10 -05:00
|
|
|
case XML_TOK_PARTIAL_CHAR:
|
1997-12-10 02:44:19 -05:00
|
|
|
*badPtr = s;
|
|
|
|
return partialChar;
|
1997-11-11 00:52:10 -05:00
|
|
|
case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
|
|
|
|
break;
|
1997-12-10 02:44:19 -05:00
|
|
|
case XML_TOK_ENTITY_REF:
|
|
|
|
{
|
|
|
|
const char *name = poolStoreString(&context->dtd.pool, enc,
|
|
|
|
s + enc->minBytesPerChar,
|
|
|
|
next - enc->minBytesPerChar);
|
|
|
|
ENTITY *entity = (ENTITY *)lookup(&context->dtd.generalEntities, name, 0);
|
|
|
|
poolDiscard(&context->dtd.pool);
|
|
|
|
if (!entity) {
|
|
|
|
if (!context->dtd.containsRef || context->dtd.standalone) {
|
|
|
|
*badPtr = s;
|
|
|
|
return undefinedEntity;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (entity->wfInContent)
|
|
|
|
break;
|
|
|
|
if (entity->open) {
|
|
|
|
*badPtr = s;
|
|
|
|
return recursiveEntityRef;
|
|
|
|
}
|
|
|
|
if (entity->notation) {
|
|
|
|
*badPtr = s;
|
|
|
|
return binaryEntityRef;
|
|
|
|
}
|
|
|
|
if (entity) {
|
|
|
|
if (entity->textPtr) {
|
|
|
|
enum WfCheckResult result;
|
|
|
|
const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
|
|
|
|
entity->open = 1;
|
|
|
|
result = checkContent(level, context, internalEnc,
|
|
|
|
entity->textPtr, entity->textPtr + entity->textLen,
|
|
|
|
badPtr);
|
|
|
|
entity->open = 0;
|
|
|
|
if (result && *badPtr) {
|
|
|
|
*badPtr = s;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
entity->wfInContent = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
1997-11-11 00:52:10 -05:00
|
|
|
case XML_TOK_START_TAG_NO_ATTS:
|
1997-12-10 02:44:19 -05:00
|
|
|
if (level == context->stackSize) {
|
|
|
|
context->startName
|
|
|
|
= realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *));
|
|
|
|
if (!context->startName)
|
1997-11-11 00:52:10 -05:00
|
|
|
return noMemory;
|
|
|
|
}
|
1997-12-10 02:44:19 -05:00
|
|
|
context->startName[level++] = s + enc->minBytesPerChar;
|
1997-11-11 00:52:10 -05:00
|
|
|
break;
|
|
|
|
case XML_TOK_START_TAG_WITH_ATTS:
|
1997-12-10 02:44:19 -05:00
|
|
|
if (level == context->stackSize) {
|
|
|
|
context->startName = realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *));
|
|
|
|
if (!context->startName)
|
1997-11-11 00:52:10 -05:00
|
|
|
return noMemory;
|
|
|
|
}
|
1997-12-10 02:44:19 -05:00
|
|
|
context->startName[level++] = s + enc->minBytesPerChar;
|
1997-11-11 00:52:10 -05:00
|
|
|
/* fall through */
|
|
|
|
case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
|
|
|
|
{
|
|
|
|
int i;
|
1997-12-10 02:44:19 -05:00
|
|
|
int n = XmlGetAttributes(enc, s, context->attsSize, context->atts);
|
|
|
|
if (n > context->attsSize) {
|
|
|
|
context->attsSize = 2*n;
|
|
|
|
context->atts = realloc((void *)context->atts, context->attsSize * sizeof(ATTRIBUTE));
|
|
|
|
if (!context->atts)
|
1997-11-11 00:52:10 -05:00
|
|
|
return noMemory;
|
1997-12-10 02:44:19 -05:00
|
|
|
XmlGetAttributes(enc, s, n, context->atts);
|
|
|
|
}
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
if (context->atts[i].containsRef) {
|
|
|
|
enum WfCheckResult result
|
|
|
|
= checkAttributeValue(&context->dtd, enc,
|
|
|
|
context->atts[i].valuePtr,
|
|
|
|
context->atts[i].valueEnd,
|
|
|
|
badPtr);
|
|
|
|
if (result)
|
|
|
|
return result;
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|
1997-12-10 02:44:19 -05:00
|
|
|
}
|
|
|
|
if (i > 1) {
|
|
|
|
enum WfCheckResult result = checkAttributeUniqueness(context, enc, n, badPtr);
|
|
|
|
if (result)
|
|
|
|
return result;
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case XML_TOK_END_TAG:
|
1997-12-10 02:44:19 -05:00
|
|
|
if (level == startLevel) {
|
|
|
|
*badPtr = s;
|
|
|
|
return asyncEntity;
|
|
|
|
}
|
1997-11-11 00:52:10 -05:00
|
|
|
--level;
|
1997-12-10 02:44:19 -05:00
|
|
|
if (!XmlSameName(enc, context->startName[level], s + enc->minBytesPerChar * 2)) {
|
|
|
|
*badPtr = s;
|
|
|
|
return tagMismatch;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case XML_TOK_CHAR_REF:
|
|
|
|
if (XmlCharRefNumber(enc, s) < 0) {
|
|
|
|
*badPtr = s;
|
|
|
|
return badCharRef;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case XML_TOK_PI:
|
|
|
|
if (XmlNameMatchesAscii(enc, s + 2 * enc->minBytesPerChar, "xml")) {
|
|
|
|
*badPtr = s;
|
|
|
|
return misplacedXmlPi;
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
s = next;
|
|
|
|
if (level == 0) {
|
|
|
|
do {
|
|
|
|
tok = XmlPrologTok(enc, s, end, &next);
|
|
|
|
switch (tok) {
|
1997-12-10 02:44:19 -05:00
|
|
|
case XML_TOK_TRAILING_CR:
|
1997-11-11 00:52:10 -05:00
|
|
|
case XML_TOK_NONE:
|
1997-12-10 02:44:19 -05:00
|
|
|
return wellFormed;
|
1997-11-11 00:52:10 -05:00
|
|
|
case XML_TOK_PROLOG_S:
|
|
|
|
case XML_TOK_COMMENT:
|
|
|
|
case XML_TOK_PI:
|
|
|
|
s = next;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (tok > 0) {
|
1997-12-10 02:44:19 -05:00
|
|
|
*badPtr = s;
|
|
|
|
return junkAfterDocElement;
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} while (tok > 0);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
tok = XmlContentTok(enc, s, end, &next);
|
|
|
|
}
|
|
|
|
/* not reached */
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
1997-12-10 02:44:19 -05:00
|
|
|
int attcmp(const void *p1, const void *p2)
|
|
|
|
{
|
|
|
|
const ATTRIBUTE *a1 = p1;
|
|
|
|
const ATTRIBUTE *a2 = p2;
|
|
|
|
size_t n1 = a1->valuePtr - a1->name;
|
|
|
|
size_t n2 = a2->valuePtr - a2->name;
|
|
|
|
|
|
|
|
if (n1 == n2) {
|
|
|
|
int n = memcmp(a1->name, a2->name, n1);
|
|
|
|
if (n)
|
|
|
|
return n;
|
|
|
|
/* Sort identical attribute names by position, so that we always
|
|
|
|
report the first duplicate attribute. */
|
|
|
|
if (a1->name < a2->name)
|
|
|
|
return -1;
|
|
|
|
else if (a1->name > a2->name)
|
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else if (n1 < n2)
|
|
|
|
return -1;
|
|
|
|
else
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Note that this trashes the attribute values. */
|
|
|
|
|
|
|
|
static enum WfCheckResult
|
|
|
|
checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts,
|
|
|
|
const char **badPtr)
|
1997-11-11 00:52:10 -05:00
|
|
|
{
|
1997-12-10 02:44:19 -05:00
|
|
|
#define QSORT_MIN_ATTS 10
|
|
|
|
if (nAtts < QSORT_MIN_ATTS) {
|
|
|
|
int i;
|
|
|
|
for (i = 1; i < nAtts; i++) {
|
|
|
|
int j;
|
|
|
|
for (j = 0; j < i; j++) {
|
|
|
|
if (XmlSameName(enc, context->atts[i].name, context->atts[j].name)) {
|
|
|
|
*badPtr = context->atts[i].name;
|
|
|
|
return duplicateAttribute;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
int i;
|
|
|
|
const char *dup = 0;
|
|
|
|
/* Store the end of the name in valuePtr */
|
|
|
|
for (i = 0; i < nAtts; i++) {
|
|
|
|
ATTRIBUTE *a = context->atts + i;
|
|
|
|
a->valuePtr = a->name + XmlNameLength(enc, a->name);
|
|
|
|
}
|
|
|
|
qsort(context->atts, nAtts, sizeof(ATTRIBUTE), attcmp);
|
|
|
|
for (i = 1; i < nAtts; i++) {
|
|
|
|
ATTRIBUTE *a = context->atts + i;
|
|
|
|
if (XmlSameName(enc, a->name, a[-1].name)) {
|
|
|
|
if (!dup || a->name < dup)
|
|
|
|
dup = a->name;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (dup) {
|
|
|
|
*badPtr = dup;
|
|
|
|
return duplicateAttribute;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return wellFormed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum WfCheckResult
|
|
|
|
checkProlog(DTD *dtd, const char *s, const char *end,
|
|
|
|
const char **nextPtr, const ENCODING **enc)
|
|
|
|
{
|
|
|
|
const char *entityNamePtr, *entityNameEnd;
|
1997-11-14 21:44:29 -05:00
|
|
|
PROLOG_STATE state;
|
1997-12-10 02:44:19 -05:00
|
|
|
ENTITY *entity;
|
1997-11-11 00:52:10 -05:00
|
|
|
INIT_ENCODING initEnc;
|
|
|
|
XmlInitEncoding(&initEnc, enc);
|
1997-11-14 21:44:29 -05:00
|
|
|
XmlPrologStateInit(&state);
|
1997-11-11 00:52:10 -05:00
|
|
|
for (;;) {
|
1997-12-10 02:44:19 -05:00
|
|
|
const char *next;
|
|
|
|
int tok = XmlPrologTok(*enc, s, end, &next);
|
|
|
|
if (tok != XML_TOK_PROLOG_S) {
|
|
|
|
switch (XmlTokenRole(&state, tok, s, next, *enc)) {
|
|
|
|
case XML_ROLE_XML_DECL:
|
|
|
|
{
|
|
|
|
const char *encodingName = 0;
|
|
|
|
const ENCODING *encoding = 0;
|
|
|
|
const char *version;
|
|
|
|
int standalone = -1;
|
|
|
|
if (!XmlParseXmlDecl(0,
|
|
|
|
*enc,
|
|
|
|
s,
|
|
|
|
next,
|
|
|
|
nextPtr,
|
|
|
|
&version,
|
|
|
|
&encodingName,
|
|
|
|
&encoding,
|
|
|
|
&standalone))
|
|
|
|
return syntaxError;
|
|
|
|
if (encoding) {
|
|
|
|
if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
|
|
|
|
*nextPtr = encodingName;
|
|
|
|
return incorrectEncoding;
|
|
|
|
}
|
|
|
|
*enc = encoding;
|
|
|
|
}
|
|
|
|
else if (encodingName) {
|
|
|
|
*nextPtr = encodingName;
|
|
|
|
return unknownEncoding;
|
|
|
|
}
|
|
|
|
if (standalone == 1)
|
|
|
|
dtd->standalone = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case XML_ROLE_DOCTYPE_SYSTEM_ID:
|
|
|
|
dtd->containsRef = 1;
|
|
|
|
break;
|
|
|
|
case XML_ROLE_DOCTYPE_PUBLIC_ID:
|
|
|
|
case XML_ROLE_ENTITY_PUBLIC_ID:
|
|
|
|
case XML_ROLE_NOTATION_PUBLIC_ID:
|
|
|
|
if (!XmlIsPublicId(*enc, s, next, nextPtr))
|
|
|
|
return syntaxError;
|
|
|
|
break;
|
|
|
|
case XML_ROLE_INSTANCE_START:
|
|
|
|
*nextPtr = s;
|
|
|
|
return wellFormed;
|
|
|
|
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
|
|
|
|
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
|
|
|
|
{
|
|
|
|
const char *tem = 0;
|
|
|
|
enum WfCheckResult result
|
|
|
|
= checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar,
|
|
|
|
next - (*enc)->minBytesPerChar,
|
|
|
|
&tem);
|
|
|
|
if (result) {
|
|
|
|
if (tem)
|
|
|
|
*nextPtr = tem;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case XML_ROLE_ENTITY_VALUE:
|
|
|
|
{
|
|
|
|
enum WfCheckResult result
|
|
|
|
= storeEntity(dtd,
|
|
|
|
*enc,
|
|
|
|
entityNamePtr,
|
|
|
|
entityNameEnd,
|
|
|
|
s,
|
|
|
|
next,
|
|
|
|
nextPtr);
|
|
|
|
if (result != wellFormed)
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case XML_ROLE_ENTITY_SYSTEM_ID:
|
|
|
|
if (entityNamePtr) {
|
|
|
|
const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd);
|
|
|
|
entity = (ENTITY *)lookup(&dtd->generalEntities, name, sizeof(ENTITY));
|
|
|
|
if (entity->name != name) {
|
|
|
|
poolDiscard(&dtd->pool);
|
|
|
|
entity = 0;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
poolFinish(&dtd->pool);
|
|
|
|
entity->systemId = poolStoreString(&dtd->pool, *enc,
|
|
|
|
s + (*enc)->minBytesPerChar,
|
|
|
|
next - (*enc)->minBytesPerChar);
|
|
|
|
poolFinish(&dtd->pool);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case XML_ROLE_ENTITY_NOTATION_NAME:
|
|
|
|
if (entity) {
|
|
|
|
entity->notation = poolStoreString(&dtd->pool, *enc, s, next);
|
|
|
|
poolFinish(&dtd->pool);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case XML_ROLE_GENERAL_ENTITY_NAME:
|
|
|
|
entityNamePtr = s;
|
|
|
|
entityNameEnd = next;
|
|
|
|
break;
|
|
|
|
case XML_ROLE_PARAM_ENTITY_NAME:
|
|
|
|
entityNamePtr = 0;
|
|
|
|
entityNameEnd = 0;
|
|
|
|
break;
|
|
|
|
case XML_ROLE_ERROR:
|
|
|
|
*nextPtr = s;
|
|
|
|
switch (tok) {
|
|
|
|
case XML_TOK_COND_SECT_OPEN:
|
|
|
|
return condSect;
|
|
|
|
case XML_TOK_PARAM_ENTITY_REF:
|
|
|
|
return paramEntityRef;
|
|
|
|
case XML_TOK_INVALID:
|
|
|
|
*nextPtr = next;
|
|
|
|
return invalidToken;
|
|
|
|
case XML_TOK_NONE:
|
|
|
|
return noElements;
|
|
|
|
case XML_TOK_PARTIAL:
|
|
|
|
return unclosedToken;
|
|
|
|
case XML_TOK_PARTIAL_CHAR:
|
|
|
|
return partialChar;
|
|
|
|
case XML_TOK_TRAILING_CR:
|
|
|
|
*nextPtr = s + (*enc)->minBytesPerChar;
|
|
|
|
return noElements;
|
|
|
|
case XML_TOK_PI:
|
|
|
|
if (XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml"))
|
|
|
|
return misplacedXmlPi;
|
|
|
|
default:
|
|
|
|
return syntaxError;
|
|
|
|
}
|
|
|
|
case XML_ROLE_GROUP_OPEN:
|
|
|
|
if (state.level >= dtd->groupSize) {
|
|
|
|
if (dtd->groupSize)
|
|
|
|
dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2);
|
|
|
|
else
|
|
|
|
dtd->groupConnector = malloc(dtd->groupSize = 32);
|
|
|
|
if (!dtd->groupConnector)
|
|
|
|
return noMemory;
|
|
|
|
}
|
|
|
|
dtd->groupConnector[state.level] = 0;
|
|
|
|
break;
|
|
|
|
case XML_ROLE_GROUP_SEQUENCE:
|
|
|
|
if (dtd->groupConnector[state.level] == '|') {
|
|
|
|
*nextPtr = s;
|
|
|
|
return syntaxError;
|
|
|
|
}
|
|
|
|
dtd->groupConnector[state.level] = ',';
|
|
|
|
break;
|
|
|
|
case XML_ROLE_GROUP_CHOICE:
|
|
|
|
if (dtd->groupConnector[state.level] == ',') {
|
|
|
|
*nextPtr = s;
|
|
|
|
return syntaxError;
|
|
|
|
}
|
|
|
|
dtd->groupConnector[state.level] = '|';
|
|
|
|
break;
|
|
|
|
case XML_ROLE_NONE:
|
|
|
|
if (tok == XML_TOK_PARAM_ENTITY_REF)
|
|
|
|
dtd->containsRef = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s = next;
|
|
|
|
}
|
|
|
|
/* not reached */
|
|
|
|
}
|
|
|
|
|
1997-12-10 19:00:08 -05:00
|
|
|
static enum WfCheckResult
|
|
|
|
checkParsedEntities(CONTEXT *context, const char **badPtr)
|
|
|
|
{
|
|
|
|
HASH_TABLE_ITER iter;
|
|
|
|
hashTableIterInit(&iter, &context->dtd.generalEntities);
|
|
|
|
for (;;) {
|
|
|
|
ENTITY *entity = (ENTITY *)hashTableIterNext(&iter);
|
|
|
|
if (!entity)
|
|
|
|
break;
|
|
|
|
if (entity->textPtr && !entity->wfInContent && !entity->magic) {
|
|
|
|
enum WfCheckResult result;
|
|
|
|
const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
|
|
|
|
entity->open = 1;
|
|
|
|
result = checkContent(1, context, internalEnc,
|
|
|
|
entity->textPtr, entity->textPtr + entity->textLen,
|
|
|
|
badPtr);
|
|
|
|
entity->open = 0;
|
|
|
|
if (result && *badPtr) {
|
|
|
|
*badPtr = entity->docTextPtr;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
entity->wfInContent = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return wellFormed;
|
|
|
|
}
|
|
|
|
|
1997-12-10 02:44:19 -05:00
|
|
|
static enum WfCheckResult
|
|
|
|
checkGeneralTextEntity(CONTEXT *context,
|
|
|
|
const char *s, const char *end,
|
|
|
|
const char **nextPtr,
|
|
|
|
const ENCODING **enc)
|
|
|
|
{
|
|
|
|
INIT_ENCODING initEnc;
|
|
|
|
const char *next;
|
|
|
|
int tok;
|
|
|
|
|
|
|
|
XmlInitEncoding(&initEnc, enc);
|
|
|
|
tok = XmlContentTok(*enc, s, end, &next);
|
|
|
|
|
|
|
|
if (tok == XML_TOK_BOM) {
|
|
|
|
s = next;
|
|
|
|
tok = XmlContentTok(*enc, s, end, &next);
|
|
|
|
}
|
|
|
|
if (tok == XML_TOK_PI
|
|
|
|
&& XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml")) {
|
|
|
|
const char *encodingName = 0;
|
|
|
|
const ENCODING *encoding = 0;
|
|
|
|
const char *version;
|
|
|
|
if (!XmlParseXmlDecl(1,
|
|
|
|
*enc,
|
|
|
|
s,
|
|
|
|
next,
|
|
|
|
nextPtr,
|
|
|
|
&version,
|
|
|
|
&encodingName,
|
|
|
|
&encoding,
|
|
|
|
0))
|
|
|
|
return syntaxError;
|
|
|
|
if (encoding) {
|
|
|
|
if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
|
|
|
|
*nextPtr = encodingName;
|
|
|
|
return incorrectEncoding;
|
|
|
|
}
|
|
|
|
*enc = encoding;
|
|
|
|
}
|
|
|
|
else if (encodingName) {
|
|
|
|
*nextPtr = encodingName;
|
|
|
|
return unknownEncoding;
|
|
|
|
}
|
|
|
|
s = next;
|
|
|
|
}
|
|
|
|
context->dtd.containsRef = 1;
|
|
|
|
return checkContent(1, context, *enc, s, end, nextPtr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum WfCheckResult
|
|
|
|
checkAttributeValue(DTD *dtd, const ENCODING *enc,
|
|
|
|
const char *ptr, const char *end, const char **badPtr)
|
|
|
|
{
|
|
|
|
for (;;) {
|
|
|
|
const char *next;
|
|
|
|
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
|
1997-11-11 00:52:10 -05:00
|
|
|
switch (tok) {
|
1997-12-10 02:44:19 -05:00
|
|
|
case XML_TOK_TRAILING_CR:
|
1997-11-11 00:52:10 -05:00
|
|
|
case XML_TOK_NONE:
|
1997-11-14 21:44:29 -05:00
|
|
|
return wellFormed;
|
1997-12-10 02:44:19 -05:00
|
|
|
case XML_TOK_INVALID:
|
|
|
|
*badPtr = next;
|
|
|
|
return invalidToken;
|
|
|
|
case XML_TOK_PARTIAL:
|
|
|
|
*badPtr = ptr;
|
|
|
|
return invalidToken;
|
|
|
|
case XML_TOK_CHAR_REF:
|
|
|
|
if (XmlCharRefNumber(enc, ptr) < 0) {
|
|
|
|
*badPtr = ptr;
|
|
|
|
return badCharRef;
|
|
|
|
}
|
1997-11-13 04:05:46 -05:00
|
|
|
break;
|
1997-12-10 02:44:19 -05:00
|
|
|
case XML_TOK_DATA_CHARS:
|
|
|
|
case XML_TOK_DATA_NEWLINE:
|
|
|
|
break;
|
|
|
|
case XML_TOK_ENTITY_REF:
|
|
|
|
{
|
|
|
|
const char *name = poolStoreString(&dtd->pool, enc,
|
|
|
|
ptr + enc->minBytesPerChar,
|
|
|
|
next - enc->minBytesPerChar);
|
|
|
|
ENTITY *entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0);
|
|
|
|
poolDiscard(&dtd->pool);
|
|
|
|
if (!entity) {
|
|
|
|
if (!dtd->containsRef) {
|
|
|
|
*badPtr = ptr;
|
|
|
|
return undefinedEntity;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (entity->wfInAttribute)
|
|
|
|
break;
|
|
|
|
if (entity->open) {
|
|
|
|
*badPtr = ptr;
|
|
|
|
return recursiveEntityRef;
|
|
|
|
}
|
|
|
|
if (entity->notation) {
|
|
|
|
*badPtr = ptr;
|
|
|
|
return binaryEntityRef;
|
|
|
|
}
|
|
|
|
if (entity) {
|
|
|
|
if (entity->textPtr) {
|
|
|
|
enum WfCheckResult result;
|
|
|
|
const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
|
|
|
|
const char *textEnd = entity->textPtr + entity->textLen;
|
|
|
|
entity->open = 1;
|
|
|
|
result = checkAttributeValue(dtd, internalEnc, entity->textPtr, textEnd, badPtr);
|
|
|
|
entity->open = 0;
|
|
|
|
if (result && *badPtr) {
|
|
|
|
*badPtr = ptr;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
entity->wfInAttribute = 1;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
*badPtr = ptr;
|
|
|
|
return attributeExternalEntityRef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
1997-11-13 04:05:46 -05:00
|
|
|
}
|
1997-11-11 00:52:10 -05:00
|
|
|
break;
|
1997-12-10 02:44:19 -05:00
|
|
|
default:
|
|
|
|
abort();
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|
1997-12-10 02:44:19 -05:00
|
|
|
ptr = next;
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|
|
|
|
/* not reached */
|
|
|
|
}
|
1997-11-12 05:38:58 -05:00
|
|
|
|
|
|
|
static
|
1997-12-10 02:44:19 -05:00
|
|
|
void poolInit(STRING_POOL *pool)
|
|
|
|
{
|
|
|
|
pool->blocks = 0;
|
|
|
|
pool->start = 0;
|
|
|
|
pool->ptr = 0;
|
|
|
|
pool->end = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
void poolDestroy(STRING_POOL *pool)
|
|
|
|
{
|
|
|
|
BLOCK *p = pool->blocks;
|
|
|
|
while (p) {
|
|
|
|
BLOCK *tem = p->next;
|
|
|
|
free(p);
|
|
|
|
p = tem;
|
|
|
|
}
|
|
|
|
pool->blocks = 0;
|
|
|
|
pool->ptr = 0;
|
|
|
|
pool->start = 0;
|
|
|
|
pool->end = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
const char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
|
|
|
|
const char *ptr, const char *end)
|
|
|
|
{
|
|
|
|
for (;;) {
|
|
|
|
XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &(pool->ptr), pool->end);
|
|
|
|
if (ptr == end)
|
|
|
|
break;
|
|
|
|
if (!poolGrow(pool))
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return pool->start;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
|
|
|
|
const char *ptr, const char *end)
|
|
|
|
{
|
|
|
|
if (!poolAppend(pool, enc, ptr, end))
|
|
|
|
return 0;
|
|
|
|
if (pool->ptr == pool->end && !poolGrow(pool))
|
|
|
|
return 0;
|
|
|
|
*(pool->ptr)++ = 0;
|
|
|
|
return pool->start;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
int poolGrow(STRING_POOL *pool)
|
|
|
|
{
|
|
|
|
if (pool->blocks && pool->start == pool->blocks->s) {
|
|
|
|
size_t blockSize = (pool->end - pool->start)*2;
|
|
|
|
pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize);
|
|
|
|
if (!pool->blocks)
|
|
|
|
return 0;
|
|
|
|
pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
|
|
|
|
pool->start = pool->blocks->s;
|
|
|
|
pool->end = pool->start + blockSize;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
BLOCK *tem;
|
|
|
|
size_t blockSize = pool->end - pool->start;
|
|
|
|
if (blockSize < INIT_BLOCK_SIZE)
|
|
|
|
blockSize = INIT_BLOCK_SIZE;
|
|
|
|
else
|
|
|
|
blockSize *= 2;
|
|
|
|
tem = malloc(offsetof(BLOCK, s) + blockSize);
|
|
|
|
if (!tem)
|
|
|
|
return 0;
|
|
|
|
tem->next = pool->blocks;
|
|
|
|
pool->blocks = tem;
|
|
|
|
memcpy(tem->s, pool->start, pool->ptr - pool->start);
|
|
|
|
pool->ptr = tem->s + (pool->ptr - pool->start);
|
|
|
|
pool->start = tem->s;
|
|
|
|
pool->end = tem->s + blockSize;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dtdInit(DTD *dtd)
|
|
|
|
{
|
|
|
|
static const char *names[] = { "lt", "amp", "gt", "quot", "apos" };
|
|
|
|
static const char chars[] = { '<', '&', '>', '"', '\'' };
|
|
|
|
int i;
|
|
|
|
|
|
|
|
poolInit(&(dtd->pool));
|
|
|
|
hashTableInit(&(dtd->generalEntities));
|
|
|
|
for (i = 0; i < 5; i++) {
|
|
|
|
ENTITY *entity = (ENTITY *)lookup(&(dtd->generalEntities), names[i], sizeof(ENTITY));
|
|
|
|
if (!entity)
|
|
|
|
return 0;
|
|
|
|
entity->textPtr = chars + i;
|
|
|
|
entity->textLen = 1;
|
|
|
|
entity->magic = 1;
|
|
|
|
entity->wfInContent = 1;
|
|
|
|
entity->wfInAttribute = 1;
|
|
|
|
}
|
|
|
|
dtd->containsRef = 0;
|
|
|
|
dtd->groupSize = 0;
|
|
|
|
dtd->groupConnector = 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dtdDestroy(DTD *dtd)
|
|
|
|
{
|
|
|
|
poolDestroy(&(dtd->pool));
|
|
|
|
hashTableDestroy(&(dtd->generalEntities));
|
|
|
|
free(dtd->groupConnector);
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
enum WfCheckResult storeEntity(DTD *dtd,
|
|
|
|
const ENCODING *enc,
|
|
|
|
const char *entityNamePtr,
|
|
|
|
const char *entityNameEnd,
|
|
|
|
const char *entityTextPtr,
|
|
|
|
const char *entityTextEnd,
|
|
|
|
const char **badPtr)
|
|
|
|
{
|
|
|
|
ENTITY *entity;
|
|
|
|
const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
|
|
|
|
STRING_POOL *pool = &(dtd->pool);
|
|
|
|
if (entityNamePtr) {
|
|
|
|
if (!poolStoreString(pool, enc, entityNamePtr, entityNameEnd))
|
|
|
|
return noMemory;
|
|
|
|
entity = (ENTITY *)lookup(&(dtd->generalEntities), pool->start, sizeof(ENTITY));
|
|
|
|
if (entity->name != pool->start) {
|
|
|
|
poolDiscard(pool);
|
|
|
|
entityNamePtr = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
poolFinish(pool);
|
|
|
|
}
|
|
|
|
entityTextPtr += enc->minBytesPerChar;
|
|
|
|
entityTextEnd -= enc->minBytesPerChar;
|
1997-12-10 19:00:08 -05:00
|
|
|
if (entityNamePtr)
|
|
|
|
entity->docTextPtr = entityTextPtr;
|
1997-12-10 02:44:19 -05:00
|
|
|
for (;;) {
|
|
|
|
const char *next;
|
|
|
|
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
|
|
|
|
switch (tok) {
|
|
|
|
case XML_TOK_PARAM_ENTITY_REF:
|
|
|
|
*badPtr = entityTextPtr;
|
|
|
|
return syntaxError;
|
|
|
|
case XML_TOK_NONE:
|
|
|
|
if (entityNamePtr) {
|
|
|
|
entity->textPtr = pool->start;
|
|
|
|
entity->textLen = pool->ptr - pool->start;
|
|
|
|
poolFinish(pool);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
poolDiscard(pool);
|
|
|
|
return wellFormed;
|
|
|
|
case XML_TOK_ENTITY_REF:
|
|
|
|
case XML_TOK_DATA_CHARS:
|
|
|
|
if (!poolAppend(pool, enc, entityTextPtr, next))
|
|
|
|
return noMemory;
|
|
|
|
break;
|
|
|
|
case XML_TOK_TRAILING_CR:
|
|
|
|
next = entityTextPtr + enc->minBytesPerChar;
|
|
|
|
/* fall through */
|
|
|
|
case XML_TOK_DATA_NEWLINE:
|
|
|
|
if (pool->end == pool->ptr && !poolGrow(pool))
|
|
|
|
return noMemory;
|
|
|
|
*(pool->ptr)++ = '\n';
|
|
|
|
break;
|
|
|
|
case XML_TOK_CHAR_REF:
|
|
|
|
{
|
|
|
|
char buf[XML_MAX_BYTES_PER_CHAR];
|
|
|
|
int i;
|
|
|
|
int n = XmlCharRefNumber(enc, entityTextPtr);
|
|
|
|
if (n < 0) {
|
|
|
|
*badPtr = entityTextPtr;
|
|
|
|
return badCharRef;
|
|
|
|
}
|
|
|
|
n = XmlEncode(utf8, n, buf);
|
|
|
|
if (!n) {
|
|
|
|
*badPtr = entityTextPtr;
|
|
|
|
return badCharRef;
|
|
|
|
}
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
if (pool->end == pool->ptr && !poolGrow(pool))
|
|
|
|
return noMemory;
|
|
|
|
*(pool->ptr)++ = buf[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case XML_TOK_PARTIAL:
|
|
|
|
*badPtr = entityTextPtr;
|
|
|
|
return invalidToken;
|
|
|
|
case XML_TOK_INVALID:
|
|
|
|
*badPtr = next;
|
|
|
|
return invalidToken;
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
entityTextPtr = next;
|
|
|
|
}
|
|
|
|
/* not reached */
|
1997-11-12 05:38:58 -05:00
|
|
|
}
|