Normalize attribute values.

Attribute bug fixes.
This commit is contained in:
James Clark 1998-02-03 09:09:48 +00:00
parent 316f90012d
commit 2f74758c1f
3 changed files with 115 additions and 31 deletions

View File

@ -1,5 +1,3 @@
/* FIXME Normalize tokenized attribute values. */
#include "xmlparse.h" #include "xmlparse.h"
#include "xmltok.h" #include "xmltok.h"
#include "xmlrole.h" #include "xmlrole.h"
@ -39,10 +37,12 @@ typedef struct {
an attribute has been specified. */ an attribute has been specified. */
typedef struct { typedef struct {
char *name; char *name;
char maybeTokenized;
} ATTRIBUTE_ID; } ATTRIBUTE_ID;
typedef struct { typedef struct {
const ATTRIBUTE_ID *id; const ATTRIBUTE_ID *id;
char isCdata;
const char *value; const char *value;
} DEFAULT_ATTRIBUTE; } DEFAULT_ATTRIBUTE;
@ -87,9 +87,12 @@ checkGeneralTextEntity(XML_Parser parser,
const ENCODING **enc); const ENCODING **enc);
static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *tagName, const char *s); static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *tagName, const char *s);
static int static int
addDefaultAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, const char *value); defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const char *dfltValue);
static enum XML_Error static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *, const char *, const char *, storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
STRING_POOL *);
static enum XML_Error
appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
STRING_POOL *); STRING_POOL *);
static ATTRIBUTE_ID * static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
@ -115,6 +118,8 @@ static int poolGrow(STRING_POOL *pool);
#define poolStart(pool) ((pool)->start) #define poolStart(pool) ((pool)->start)
#define poolEnd(pool) ((pool)->ptr) #define poolEnd(pool) ((pool)->ptr)
#define poolLength(pool) ((pool)->ptr - (pool)->start) #define poolLength(pool) ((pool)->ptr - (pool)->start)
#define poolChop(pool) ((void)--(pool->ptr))
#define poolLastByte(pool) (((pool)->ptr)[-1])
#define poolDiscard(pool) ((pool)->ptr = (pool)->start) #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
#define poolFinish(pool) ((pool)->start = (pool)->ptr) #define poolFinish(pool) ((pool)->start = (pool)->ptr)
#define poolAppendByte(pool, c) \ #define poolAppendByte(pool, c) \
@ -146,6 +151,7 @@ typedef struct {
ENTITY *declEntity; ENTITY *declEntity;
ELEMENT_TYPE *declElementType; ELEMENT_TYPE *declElementType;
ATTRIBUTE_ID *declAttributeId; ATTRIBUTE_ID *declAttributeId;
char declAttributeIsCdata;
DTD dtd; DTD dtd;
char *tagStack; char *tagStack;
char *tagStackPtr; char *tagStackPtr;
@ -155,6 +161,7 @@ typedef struct {
POSITION position; POSITION position;
long errorByteIndex; long errorByteIndex;
STRING_POOL tempPool; STRING_POOL tempPool;
STRING_POOL temp2Pool;
char *groupConnector; char *groupConnector;
size_t groupSize; size_t groupSize;
} Parser; } Parser;
@ -182,12 +189,14 @@ typedef struct {
#define declEntity (((Parser *)parser)->declEntity) #define declEntity (((Parser *)parser)->declEntity)
#define declElementType (((Parser *)parser)->declElementType) #define declElementType (((Parser *)parser)->declElementType)
#define declAttributeId (((Parser *)parser)->declAttributeId) #define declAttributeId (((Parser *)parser)->declAttributeId)
#define declAttributeIsCdata (((Parser *)parser)->declAttributeIsCdata)
#define tagStackEnd (((Parser *)parser)->tagStackEnd) #define tagStackEnd (((Parser *)parser)->tagStackEnd)
#define tagStackPtr (((Parser *)parser)->tagStackPtr) #define tagStackPtr (((Parser *)parser)->tagStackPtr)
#define tagStack (((Parser *)parser)->tagStack) #define tagStack (((Parser *)parser)->tagStack)
#define atts (((Parser *)parser)->atts) #define atts (((Parser *)parser)->atts)
#define attsSize (((Parser *)parser)->attsSize) #define attsSize (((Parser *)parser)->attsSize)
#define tempPool (((Parser *)parser)->tempPool) #define tempPool (((Parser *)parser)->tempPool)
#define temp2Pool (((Parser *)parser)->temp2Pool)
#define groupConnector (((Parser *)parser)->groupConnector) #define groupConnector (((Parser *)parser)->groupConnector)
#define groupSize (((Parser *)parser)->groupSize) #define groupSize (((Parser *)parser)->groupSize)
@ -224,6 +233,7 @@ XML_Parser XML_ParserCreate(const char *encodingName)
groupSize = 0; groupSize = 0;
groupConnector = 0; groupConnector = 0;
poolInit(&tempPool); poolInit(&tempPool);
poolInit(&temp2Pool);
if (!dtdInit(&dtd) || !atts || !tagStack) { if (!dtdInit(&dtd) || !atts || !tagStack) {
XML_ParserFree(parser); XML_ParserFree(parser);
return 0; return 0;
@ -236,6 +246,7 @@ XML_Parser XML_ParserCreate(const char *encodingName)
void XML_ParserFree(XML_Parser parser) void XML_ParserFree(XML_Parser parser)
{ {
poolDestroy(&tempPool); poolDestroy(&tempPool);
poolDestroy(&temp2Pool);
dtdDestroy(&dtd); dtdDestroy(&dtd);
free((void *)tagStack); free((void *)tagStack);
free((void *)atts); free((void *)atts);
@ -367,7 +378,10 @@ void *XML_GetBuffer(XML_Parser parser, size_t len)
return 0; return 0;
} }
bufferLim = newBuf + bufferSize; bufferLim = newBuf + bufferSize;
memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); if (bufferPtr) {
memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
free(buffer);
}
bufferEnd = newBuf + (bufferEnd - bufferPtr); bufferEnd = newBuf + (bufferEnd - bufferPtr);
bufferPtr = buffer = newBuf; bufferPtr = buffer = newBuf;
} }
@ -736,22 +750,31 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
+ XmlNameLength(enc, atts[i].name)); + XmlNameLength(enc, atts[i].name));
if (!attId) if (!attId)
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
if (attId->name[-1]) { if ((attId->name)[-1]) {
errorPtr = atts[i].name; errorPtr = atts[i].name;
return XML_ERROR_DUPLICATE_ATTRIBUTE; return XML_ERROR_DUPLICATE_ATTRIBUTE;
} }
attId->name[-1] = 1; (attId->name)[-1] = 1;
appAtts[i << 1] = attId->name; appAtts[i << 1] = attId->name;
if (!atts[i].normalized) { if (!atts[i].normalized) {
enum XML_Error result enum XML_Error result;
= storeAttributeValue(parser, enc, int isCdata = 1;
atts[i].valuePtr,
atts[i].valueEnd, if (attId->maybeTokenized) {
&tempPool); int j;
for (j = 0; j < nDefaultAtts; j++) {
if (attId == elementType->defaultAtts[j].id) {
isCdata = elementType->defaultAtts[j].isCdata;
break;
}
}
}
result = storeAttributeValue(parser, enc, isCdata,
atts[i].valuePtr, atts[i].valueEnd,
&tempPool);
if (result) if (result)
return result; return result;
if (!poolAppendByte(&tempPool, '\0'))
return XML_ERROR_NO_MEMORY;
if (tagName) { if (tagName) {
appAtts[(i << 1) + 1] = poolStart(&tempPool); appAtts[(i << 1) + 1] = poolStart(&tempPool);
poolFinish(&tempPool); poolFinish(&tempPool);
@ -770,8 +793,8 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
int j; int j;
for (j = 0; j < nDefaultAtts; j++) { for (j = 0; j < nDefaultAtts; j++) {
const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j; const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
if (!da->id->name[-1]) { if (!(da->id->name)[-1] && da->value) {
da->id->name[-1] = 1; (da->id->name)[-1] = 1;
appAtts[i << 1] = da->id->name; appAtts[i << 1] = da->id->name;
appAtts[(i << 1) + 1] = da->value; appAtts[(i << 1) + 1] = da->value;
i++; i++;
@ -779,7 +802,7 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
} }
appAtts[i << 1] = 0; appAtts[i << 1] = 0;
} }
for (i = 0; i < n; i++) while (i-- > 0)
((char *)appAtts[i << 1])[-1] = 0; ((char *)appAtts[i << 1])[-1] = 0;
return XML_ERROR_NONE; return XML_ERROR_NONE;
} }
@ -877,21 +900,31 @@ prologProcessor(XML_Parser parser,
declAttributeId = getAttributeId(parser, encoding, s, next); declAttributeId = getAttributeId(parser, encoding, s, next);
if (!declAttributeId) if (!declAttributeId)
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
declAttributeIsCdata = 0;
break;
case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
declAttributeIsCdata = 1;
break;
case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
if (!defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
return XML_ERROR_NO_MEMORY;
break; break;
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
case XML_ROLE_FIXED_ATTRIBUTE_VALUE: case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
{ {
const char *attVal;
enum XML_Error result enum XML_Error result
= storeAttributeValue(parser, encoding, s + encoding->minBytesPerChar, = storeAttributeValue(parser, encoding, declAttributeIsCdata,
s + encoding->minBytesPerChar,
next - encoding->minBytesPerChar, next - encoding->minBytesPerChar,
&dtd.pool); &dtd.pool);
if (result) if (result)
return result; return result;
if (!poolAppendByte(&dtd.pool, 0)) attVal = poolStart(&dtd.pool);
return XML_ERROR_NO_MEMORY;
if (!addDefaultAttribute(declElementType, declAttributeId, poolStart(&dtd.pool)))
return XML_ERROR_NO_MEMORY;
poolFinish(&dtd.pool); poolFinish(&dtd.pool);
if (!defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
return XML_ERROR_NO_MEMORY;
break; break;
} }
case XML_ROLE_ENTITY_VALUE: case XML_ROLE_ENTITY_VALUE:
@ -1070,9 +1103,24 @@ enum XML_Error epilogProcessor(XML_Parser parser,
} }
static enum XML_Error static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *enc, storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
const char *ptr, const char *end, const char *ptr, const char *end,
STRING_POOL *pool) STRING_POOL *pool)
{
enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
if (result)
return result;
if (!isCdata && poolLength(pool) && poolLastByte(pool) == ' ')
poolChop(pool);
if (!poolAppendByte(pool, 0))
return XML_ERROR_NO_MEMORY;
return XML_ERROR_NONE;
}
static enum XML_Error
appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
const char *ptr, const char *end,
STRING_POOL *pool)
{ {
const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING); const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
for (;;) { for (;;) {
@ -1119,20 +1167,23 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc,
case XML_TOK_TRAILING_CR: case XML_TOK_TRAILING_CR:
next = ptr + enc->minBytesPerChar; next = ptr + enc->minBytesPerChar;
/* fall through */ /* fall through */
case XML_TOK_ATTRIBUTE_VALUE_S:
case XML_TOK_DATA_NEWLINE: case XML_TOK_DATA_NEWLINE:
if (!isCdata && (poolLength(pool) == 0 || poolLastByte(pool) == ' '))
break;
if (!poolAppendByte(pool, ' ')) if (!poolAppendByte(pool, ' '))
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
break; break;
case XML_TOK_ENTITY_REF: case XML_TOK_ENTITY_REF:
{ {
const char *name = poolStoreString(&dtd.pool, enc, const char *name = poolStoreString(&temp2Pool, enc,
ptr + enc->minBytesPerChar, ptr + enc->minBytesPerChar,
next - enc->minBytesPerChar); next - enc->minBytesPerChar);
ENTITY *entity; ENTITY *entity;
if (!name) if (!name)
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
poolDiscard(&dtd.pool); poolDiscard(&temp2Pool);
if (!entity) { if (!entity) {
if (!dtd.containsRef) { if (!dtd.containsRef) {
errorPtr = ptr; errorPtr = ptr;
@ -1161,7 +1212,7 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc,
enum XML_Error result; enum XML_Error result;
const char *textEnd = entity->textPtr + entity->textLen; const char *textEnd = entity->textPtr + entity->textLen;
entity->open = 1; entity->open = 1;
result = storeAttributeValue(parser, utf8, entity->textPtr, textEnd, pool); result = appendAttributeValue(parser, utf8, isCdata, entity->textPtr, textEnd, pool);
entity->open = 0; entity->open = 0;
if (result) { if (result) {
errorPtr = ptr; errorPtr = ptr;
@ -1275,7 +1326,7 @@ reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *
} }
static int static int
addDefaultAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, const char *value) defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const char *value)
{ {
DEFAULT_ATTRIBUTE *att; DEFAULT_ATTRIBUTE *att;
if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->nDefaultAtts == type->allocDefaultAtts) {
@ -1283,13 +1334,17 @@ addDefaultAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, const char *value)
type->allocDefaultAtts = 8; type->allocDefaultAtts = 8;
else else
type->allocDefaultAtts *= 2; type->allocDefaultAtts *= 2;
type->defaultAtts = realloc(type->defaultAtts, type->allocDefaultAtts); type->defaultAtts = realloc(type->defaultAtts,
type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
if (!type->defaultAtts) if (!type->defaultAtts)
return 0; return 0;
} }
att = type->defaultAtts + type->nDefaultAtts; att = type->defaultAtts + type->nDefaultAtts;
att->id = attId; att->id = attId;
att->value = value; att->value = value;
att->isCdata = isCdata;
if (!isCdata)
attId->maybeTokenized = 1;
type->nDefaultAtts += 1; type->nDefaultAtts += 1;
return 1; return 1;
} }
@ -1340,11 +1395,19 @@ static int dtdInit(DTD *p)
static void dtdDestroy(DTD *p) static void dtdDestroy(DTD *p)
{ {
poolDestroy(&(p->pool)); HASH_TABLE_ITER iter;
hashTableIterInit(&iter, &(p->elementTypes));
for (;;) {
ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
if (!e)
break;
free(e->defaultAtts);
}
hashTableDestroy(&(p->generalEntities)); hashTableDestroy(&(p->generalEntities));
hashTableDestroy(&(p->paramEntities)); hashTableDestroy(&(p->paramEntities));
hashTableDestroy(&(p->elementTypes)); hashTableDestroy(&(p->elementTypes));
hashTableDestroy(&(p->attributeIds)); hashTableDestroy(&(p->attributeIds));
poolDestroy(&(p->pool));
} }
static static

View File

@ -65,6 +65,9 @@ extern "C" {
#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
#define XML_TOK_COMMA 38 #define XML_TOK_COMMA 38
/* The following tokens is returned only by XmlAttributeValueTok */
#define XML_TOK_ATTRIBUTE_VALUE_S 39
#define XML_N_STATES 2 #define XML_N_STATES 2
#define XML_PROLOG_STATE 0 #define XML_PROLOG_STATE 0
#define XML_CONTENT_STATE 1 #define XML_CONTENT_STATE 1

View File

@ -1066,6 +1066,13 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *
} }
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_DATA_CHARS; return XML_TOK_DATA_CHARS;
case BT_S:
if (ptr == start) {
*nextTokPtr = ptr + MINBPC;
return XML_TOK_ATTRIBUTE_VALUE_S;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
default: default:
ptr += MINBPC; ptr += MINBPC;
break; break;
@ -1225,12 +1232,23 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
case BT_AMP: case BT_AMP:
atts[nAtts].normalized = 0; atts[nAtts].normalized = 0;
break; break;
case BT_S: case BT_CR: case BT_LF: case BT_S:
if (state == inName)
state = other;
else if (state == inValue
&& atts[nAtts].normalized
&& (ptr == atts[nAtts].valuePtr
|| BYTE_TO_ASCII(enc, ptr) != ' '
|| BYTE_TO_ASCII(enc, ptr + MINBPC) == ' '
|| BYTE_TYPE(enc, ptr + MINBPC) == open))
atts[nAtts].normalized = 0;
break;
case BT_CR: case BT_LF:
/* This case ensures that the first attribute name is counted /* This case ensures that the first attribute name is counted
Apart from that we could just change state on the quote. */ Apart from that we could just change state on the quote. */
if (state == inName) if (state == inName)
state = other; state = other;
if (state == inValue) else if (state == inValue)
atts[nAtts].normalized = 0; atts[nAtts].normalized = 0;
break; break;
case BT_GT: case BT_GT: