Give a separate message for syntax errors
This commit is contained in:
parent
80b9565677
commit
6a9cec2f7a
@ -7,26 +7,62 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "xmltok.h"
|
#include "xmltok.h"
|
||||||
|
#include "xmlrole.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char *name;
|
||||||
|
} NAMED;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
NAMED **v;
|
||||||
|
size_t size;
|
||||||
|
size_t used;
|
||||||
|
size_t usedLim;
|
||||||
|
} HASH_TABLE;
|
||||||
|
|
||||||
|
#define BLOCK_SIZE 1024
|
||||||
|
|
||||||
|
typedef struct block {
|
||||||
|
struct block *next;
|
||||||
|
char s[1];
|
||||||
|
} BLOCK;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
BLOCK *blocks;
|
||||||
|
const char *end;
|
||||||
|
const char *ptr;
|
||||||
|
const char *start;
|
||||||
|
} STRING_POOL;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
STRING_POOL pool;
|
||||||
|
HASH_TABLE paramEntities;
|
||||||
|
HASH_TABLE generalEntities;
|
||||||
|
} DTD;
|
||||||
|
|
||||||
|
static enum WfCheckResult
|
||||||
|
checkProlog(int *tok, const char **s, const char *end, const char **nextTokP,
|
||||||
|
const ENCODING **enc);
|
||||||
|
|
||||||
static
|
|
||||||
int skipProlog(const char **s, const char *end, const char **nextTokP,
|
|
||||||
const ENCODING **enc, const char **doctypeP);
|
|
||||||
static
|
static
|
||||||
void setPosition(const ENCODING *enc,
|
void setPosition(const ENCODING *enc,
|
||||||
const char *start, const char *end,
|
const char *start,
|
||||||
const char **badPtr, unsigned long *badLine, unsigned long *badCol);
|
const char *end,
|
||||||
|
const char **badPtr,
|
||||||
|
unsigned long *badLine,
|
||||||
|
unsigned long *badCol);
|
||||||
|
|
||||||
enum WfCheckResult
|
enum WfCheckResult
|
||||||
wfCheck(const char *s, size_t n,
|
wfCheck(const char *s, size_t n,
|
||||||
const char **badPtr, unsigned long *badLine, unsigned long *badCol)
|
const char **badPtr, unsigned long *badLine, unsigned long *badCol)
|
||||||
{
|
{
|
||||||
|
enum WfCheckResult result;
|
||||||
unsigned nElements = 0;
|
unsigned nElements = 0;
|
||||||
unsigned nAtts = 0;
|
unsigned nAtts = 0;
|
||||||
const char *start = s;
|
const char *start = s;
|
||||||
const char *end = s + n;
|
const char *end = s + n;
|
||||||
const char *next;
|
const char *next;
|
||||||
const ENCODING *enc;
|
const ENCODING *enc;
|
||||||
const char *doctype = 0;
|
|
||||||
size_t stackSize = 1024;
|
size_t stackSize = 1024;
|
||||||
size_t level = 0;
|
size_t level = 0;
|
||||||
int tok;
|
int tok;
|
||||||
@ -36,7 +72,11 @@ wfCheck(const char *s, size_t n,
|
|||||||
#define RETURN_CLEANUP(n) return (free((void *)startName), free((void *)atts), (n))
|
#define RETURN_CLEANUP(n) return (free((void *)startName), free((void *)atts), (n))
|
||||||
if (!startName)
|
if (!startName)
|
||||||
return noMemory;
|
return noMemory;
|
||||||
tok = skipProlog(&s, end, &next, &enc, &doctype);
|
result = checkProlog(&tok, &s, end, &next, &enc);
|
||||||
|
if (result) {
|
||||||
|
setPosition(enc, start, s, badPtr, badLine, badCol);
|
||||||
|
RETURN_CLEANUP(result);
|
||||||
|
}
|
||||||
for (;;) {
|
for (;;) {
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_NONE:
|
case XML_TOK_NONE:
|
||||||
@ -115,10 +155,6 @@ wfCheck(const char *s, size_t n,
|
|||||||
tok = XmlPrologTok(enc, s, end, &next);
|
tok = XmlPrologTok(enc, s, end, &next);
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_NONE:
|
case XML_TOK_NONE:
|
||||||
if (doctype) {
|
|
||||||
setPosition(enc, start, doctype, badPtr, badLine, badCol);
|
|
||||||
RETURN_CLEANUP(wellFormedOutsideDtd);
|
|
||||||
}
|
|
||||||
RETURN_CLEANUP(wellFormed);
|
RETURN_CLEANUP(wellFormed);
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
case XML_TOK_COMMENT:
|
case XML_TOK_COMMENT:
|
||||||
@ -142,14 +178,15 @@ wfCheck(const char *s, size_t n,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
int skipProlog(const char **startp, const char *end,
|
int checkProlog(int *tokp,
|
||||||
const char **nextTokP, const ENCODING **enc,
|
const char **startp, const char *end,
|
||||||
const char **doctypeP)
|
const char **nextTokP, const ENCODING **enc)
|
||||||
{
|
{
|
||||||
|
PROLOG_STATE state;
|
||||||
const char *s = *startp;
|
const char *s = *startp;
|
||||||
INIT_ENCODING initEnc;
|
INIT_ENCODING initEnc;
|
||||||
XmlInitEncoding(&initEnc, enc);
|
XmlInitEncoding(&initEnc, enc);
|
||||||
*doctypeP = 0;
|
XmlPrologStateInit(&state);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
int tok = XmlPrologTok(*enc, s, end, nextTokP);
|
int tok = XmlPrologTok(*enc, s, end, nextTokP);
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
@ -160,30 +197,17 @@ int skipProlog(const char **startp, const char *end,
|
|||||||
case XML_TOK_INVALID:
|
case XML_TOK_INVALID:
|
||||||
case XML_TOK_NONE:
|
case XML_TOK_NONE:
|
||||||
case XML_TOK_PARTIAL:
|
case XML_TOK_PARTIAL:
|
||||||
|
*tokp = tok;
|
||||||
*startp = s;
|
*startp = s;
|
||||||
return tok;
|
return wellFormed;
|
||||||
case XML_TOK_DECL_OPEN:
|
case XML_TOK_BOM:
|
||||||
if (!*doctypeP) {
|
|
||||||
if (XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "DOCTYPE"))
|
|
||||||
*doctypeP = s;
|
|
||||||
else {
|
|
||||||
*startp = s;
|
|
||||||
return XML_TOK_INVALID;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
case XML_TOK_LITERAL:
|
|
||||||
case XML_TOK_COMMENT:
|
|
||||||
case XML_TOK_PI:
|
|
||||||
break;
|
break;
|
||||||
case XML_TOK_COND_SECT_OPEN:
|
|
||||||
*startp = s;
|
|
||||||
return XML_TOK_INVALID;
|
|
||||||
default:
|
default:
|
||||||
if (!*doctypeP) {
|
switch (XmlTokenRole(&state, tok, s, *nextTokP, *enc)) {
|
||||||
|
case XML_ROLE_ERROR:
|
||||||
*startp = s;
|
*startp = s;
|
||||||
return XML_TOK_INVALID;
|
return syntaxError;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -3,8 +3,8 @@
|
|||||||
|
|
||||||
enum WfCheckResult {
|
enum WfCheckResult {
|
||||||
wellFormed,
|
wellFormed,
|
||||||
wellFormedOutsideDtd,
|
|
||||||
noMemory,
|
noMemory,
|
||||||
|
syntaxError,
|
||||||
noElements,
|
noElements,
|
||||||
invalidToken,
|
invalidToken,
|
||||||
unclosedToken,
|
unclosedToken,
|
||||||
|
@ -15,8 +15,8 @@ void processFile(const void *data, size_t size, const char *filename, void *arg)
|
|||||||
if (result) {
|
if (result) {
|
||||||
static const char *message[] = {
|
static const char *message[] = {
|
||||||
0,
|
0,
|
||||||
"DOCTYPE declaration ignored",
|
|
||||||
"out of memory",
|
"out of memory",
|
||||||
|
"syntax error",
|
||||||
"no element found",
|
"no element found",
|
||||||
"invalid token",
|
"invalid token",
|
||||||
"unclosed token",
|
"unclosed token",
|
||||||
@ -28,7 +28,7 @@ void processFile(const void *data, size_t size, const char *filename, void *arg)
|
|||||||
fprintf(stderr, "%s:", filename);
|
fprintf(stderr, "%s:", filename);
|
||||||
if (badPtr != 0)
|
if (badPtr != 0)
|
||||||
fprintf(stderr, "%lu:%lu:", badLine+1, badCol);
|
fprintf(stderr, "%lu:%lu:", badLine+1, badCol);
|
||||||
fprintf(stderr, "%c: %s", (result == wellFormedOutsideDtd ? 'W' : 'E'), message[result]);
|
fprintf(stderr, "E: %s", message[result]);
|
||||||
putc('\n', stderr);
|
putc('\n', stderr);
|
||||||
if (!*ret)
|
if (!*ret)
|
||||||
*ret = 1;
|
*ret = 1;
|
||||||
|
Loading…
Reference in New Issue
Block a user