Give a separate message for syntax errors

This commit is contained in:
James Clark 1997-11-15 02:44:29 +00:00
parent 80b9565677
commit 6a9cec2f7a
3 changed files with 61 additions and 37 deletions

View File

@ -7,26 +7,62 @@
#endif #endif
#include "xmltok.h" #include "xmltok.h"
#include "xmlrole.h"
typedef struct {
const char *name;
} NAMED;
typedef struct {
NAMED **v;
size_t size;
size_t used;
size_t usedLim;
} HASH_TABLE;
#define BLOCK_SIZE 1024
typedef struct block {
struct block *next;
char s[1];
} BLOCK;
typedef struct {
BLOCK *blocks;
const char *end;
const char *ptr;
const char *start;
} STRING_POOL;
typedef struct {
STRING_POOL pool;
HASH_TABLE paramEntities;
HASH_TABLE generalEntities;
} DTD;
static enum WfCheckResult
checkProlog(int *tok, const char **s, const char *end, const char **nextTokP,
const ENCODING **enc);
static
int skipProlog(const char **s, const char *end, const char **nextTokP,
const ENCODING **enc, const char **doctypeP);
static static
void setPosition(const ENCODING *enc, void setPosition(const ENCODING *enc,
const char *start, const char *end, const char *start,
const char **badPtr, unsigned long *badLine, unsigned long *badCol); const char *end,
const char **badPtr,
unsigned long *badLine,
unsigned long *badCol);
enum WfCheckResult enum WfCheckResult
wfCheck(const char *s, size_t n, wfCheck(const char *s, size_t n,
const char **badPtr, unsigned long *badLine, unsigned long *badCol) const char **badPtr, unsigned long *badLine, unsigned long *badCol)
{ {
enum WfCheckResult result;
unsigned nElements = 0; unsigned nElements = 0;
unsigned nAtts = 0; unsigned nAtts = 0;
const char *start = s; const char *start = s;
const char *end = s + n; const char *end = s + n;
const char *next; const char *next;
const ENCODING *enc; const ENCODING *enc;
const char *doctype = 0;
size_t stackSize = 1024; size_t stackSize = 1024;
size_t level = 0; size_t level = 0;
int tok; int tok;
@ -36,7 +72,11 @@ wfCheck(const char *s, size_t n,
#define RETURN_CLEANUP(n) return (free((void *)startName), free((void *)atts), (n)) #define RETURN_CLEANUP(n) return (free((void *)startName), free((void *)atts), (n))
if (!startName) if (!startName)
return noMemory; return noMemory;
tok = skipProlog(&s, end, &next, &enc, &doctype); result = checkProlog(&tok, &s, end, &next, &enc);
if (result) {
setPosition(enc, start, s, badPtr, badLine, badCol);
RETURN_CLEANUP(result);
}
for (;;) { for (;;) {
switch (tok) { switch (tok) {
case XML_TOK_NONE: case XML_TOK_NONE:
@ -115,10 +155,6 @@ wfCheck(const char *s, size_t n,
tok = XmlPrologTok(enc, s, end, &next); tok = XmlPrologTok(enc, s, end, &next);
switch (tok) { switch (tok) {
case XML_TOK_NONE: case XML_TOK_NONE:
if (doctype) {
setPosition(enc, start, doctype, badPtr, badLine, badCol);
RETURN_CLEANUP(wellFormedOutsideDtd);
}
RETURN_CLEANUP(wellFormed); RETURN_CLEANUP(wellFormed);
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
case XML_TOK_COMMENT: case XML_TOK_COMMENT:
@ -142,14 +178,15 @@ wfCheck(const char *s, size_t n,
} }
static static
int skipProlog(const char **startp, const char *end, int checkProlog(int *tokp,
const char **nextTokP, const ENCODING **enc, const char **startp, const char *end,
const char **doctypeP) const char **nextTokP, const ENCODING **enc)
{ {
PROLOG_STATE state;
const char *s = *startp; const char *s = *startp;
INIT_ENCODING initEnc; INIT_ENCODING initEnc;
XmlInitEncoding(&initEnc, enc); XmlInitEncoding(&initEnc, enc);
*doctypeP = 0; XmlPrologStateInit(&state);
for (;;) { for (;;) {
int tok = XmlPrologTok(*enc, s, end, nextTokP); int tok = XmlPrologTok(*enc, s, end, nextTokP);
switch (tok) { switch (tok) {
@ -160,30 +197,17 @@ int skipProlog(const char **startp, const char *end,
case XML_TOK_INVALID: case XML_TOK_INVALID:
case XML_TOK_NONE: case XML_TOK_NONE:
case XML_TOK_PARTIAL: case XML_TOK_PARTIAL:
*tokp = tok;
*startp = s; *startp = s;
return tok; return wellFormed;
case XML_TOK_DECL_OPEN: case XML_TOK_BOM:
if (!*doctypeP) {
if (XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "DOCTYPE"))
*doctypeP = s;
else {
*startp = s;
return XML_TOK_INVALID;
}
}
break;
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
case XML_TOK_LITERAL:
case XML_TOK_COMMENT:
case XML_TOK_PI:
break; break;
case XML_TOK_COND_SECT_OPEN:
*startp = s;
return XML_TOK_INVALID;
default: default:
if (!*doctypeP) { switch (XmlTokenRole(&state, tok, s, *nextTokP, *enc)) {
case XML_ROLE_ERROR:
*startp = s; *startp = s;
return XML_TOK_INVALID; return syntaxError;
} }
break; break;
} }

View File

@ -3,8 +3,8 @@
enum WfCheckResult { enum WfCheckResult {
wellFormed, wellFormed,
wellFormedOutsideDtd,
noMemory, noMemory,
syntaxError,
noElements, noElements,
invalidToken, invalidToken,
unclosedToken, unclosedToken,

View File

@ -15,8 +15,8 @@ void processFile(const void *data, size_t size, const char *filename, void *arg)
if (result) { if (result) {
static const char *message[] = { static const char *message[] = {
0, 0,
"DOCTYPE declaration ignored",
"out of memory", "out of memory",
"syntax error",
"no element found", "no element found",
"invalid token", "invalid token",
"unclosed token", "unclosed token",
@ -28,7 +28,7 @@ void processFile(const void *data, size_t size, const char *filename, void *arg)
fprintf(stderr, "%s:", filename); fprintf(stderr, "%s:", filename);
if (badPtr != 0) if (badPtr != 0)
fprintf(stderr, "%lu:%lu:", badLine+1, badCol); fprintf(stderr, "%lu:%lu:", badLine+1, badCol);
fprintf(stderr, "%c: %s", (result == wellFormedOutsideDtd ? 'W' : 'E'), message[result]); fprintf(stderr, "E: %s", message[result]);
putc('\n', stderr); putc('\n', stderr);
if (!*ret) if (!*ret)
*ret = 1; *ret = 1;