1998-04-05 11:11:29 -04:00
|
|
|
/*
|
|
|
|
The contents of this file are subject to the Mozilla Public License
|
|
|
|
Version 1.0 (the "License"); you may not use this file except in
|
|
|
|
compliance with the License. You may obtain a copy of the License at
|
|
|
|
http://www.mozilla.org/MPL/
|
|
|
|
|
|
|
|
Software distributed under the License is distributed on an "AS IS"
|
|
|
|
basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
|
|
License for the specific language governing rights and limitations
|
|
|
|
under the License.
|
|
|
|
|
|
|
|
The Original Code is expat.
|
|
|
|
|
|
|
|
The Initial Developer of the Original Code is James Clark.
|
|
|
|
Portions created by James Clark are Copyright (C) 1998
|
|
|
|
James Clark. All Rights Reserved.
|
|
|
|
|
|
|
|
Contributor(s):
|
|
|
|
*/
|
|
|
|
|
1998-02-04 00:25:27 -05:00
|
|
|
#include "xmlparse.h"
|
|
|
|
#include "filemap.h"
|
1998-06-01 08:12:35 -04:00
|
|
|
#include "codepage.h"
|
1998-02-04 00:25:27 -05:00
|
|
|
|
1997-11-11 00:52:10 -05:00
|
|
|
#include <stdio.h>
|
1998-02-04 00:25:27 -05:00
|
|
|
#include <stdlib.h>
|
1998-02-04 02:20:46 -05:00
|
|
|
#include <stddef.h>
|
1997-12-10 02:44:19 -05:00
|
|
|
#include <string.h>
|
1998-02-04 00:25:27 -05:00
|
|
|
#include <fcntl.h>
|
|
|
|
|
|
|
|
#ifdef _MSC_VER
|
|
|
|
#include <io.h>
|
|
|
|
#endif
|
|
|
|
|
1998-04-10 02:30:05 -04:00
|
|
|
#ifdef _POSIX_SOURCE
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
|
|
|
|
1998-02-04 00:25:27 -05:00
|
|
|
#ifndef O_BINARY
|
|
|
|
#ifdef _O_BINARY
|
|
|
|
#define O_BINARY _O_BINARY
|
|
|
|
#else
|
|
|
|
#define O_BINARY 0
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
1997-12-13 04:11:15 -05:00
|
|
|
#ifdef _MSC_VER
|
|
|
|
#include <crtdbg.h>
|
|
|
|
#endif
|
1997-11-11 00:52:10 -05:00
|
|
|
|
1998-02-04 00:25:27 -05:00
|
|
|
#ifdef _DEBUG
|
|
|
|
#define READ_SIZE 16
|
|
|
|
#else
|
|
|
|
#define READ_SIZE (1024*8)
|
|
|
|
#endif
|
|
|
|
|
1998-05-31 07:53:28 -04:00
|
|
|
#ifdef XML_UNICODE
|
1998-06-17 07:19:44 -04:00
|
|
|
#ifndef XML_UNICODE_WCHAR_T
|
|
|
|
#error xmlwf requires a 16-bit Unicode-compatible wchar_t
|
|
|
|
#endif
|
1998-05-31 07:53:28 -04:00
|
|
|
#define T(x) L ## x
|
|
|
|
#define ftprintf fwprintf
|
|
|
|
#define tfopen _wfopen
|
|
|
|
#define fputts fputws
|
|
|
|
#define puttc putwc
|
|
|
|
#define tcscmp wcscmp
|
|
|
|
#define tcscpy wcscpy
|
|
|
|
#define tcscat wcscat
|
|
|
|
#define tcschr wcschr
|
|
|
|
#define tcsrchr wcsrchr
|
|
|
|
#define tcslen wcslen
|
|
|
|
#define tperror _wperror
|
|
|
|
#define topen _wopen
|
|
|
|
#define tmain wmain
|
|
|
|
#define tremove _wremove
|
|
|
|
#else /* not XML_UNICODE */
|
|
|
|
#define T(x) x
|
|
|
|
#define ftprintf fprintf
|
|
|
|
#define tfopen fopen
|
|
|
|
#define fputts fputs
|
|
|
|
#define puttc putc
|
|
|
|
#define tcscmp strcmp
|
|
|
|
#define tcscpy strcpy
|
|
|
|
#define tcscat strcat
|
|
|
|
#define tcschr strchr
|
|
|
|
#define tcsrchr strrchr
|
|
|
|
#define tcslen strlen
|
|
|
|
#define tperror perror
|
|
|
|
#define topen open
|
|
|
|
#define tmain main
|
|
|
|
#define tremove remove
|
|
|
|
#endif /* not XML_UNICODE */
|
|
|
|
|
1998-08-26 05:10:50 -04:00
|
|
|
#define NSSEP T('\001')
|
|
|
|
|
1998-05-31 07:53:28 -04:00
|
|
|
static void characterData(void *userData, const XML_Char *s, int len)
|
1998-02-04 00:25:27 -05:00
|
|
|
{
|
|
|
|
FILE *fp = userData;
|
|
|
|
for (; len > 0; --len, ++s) {
|
|
|
|
switch (*s) {
|
1998-05-31 07:53:28 -04:00
|
|
|
case T('&'):
|
|
|
|
fputts(T("&"), fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
break;
|
1998-05-31 07:53:28 -04:00
|
|
|
case T('<'):
|
|
|
|
fputts(T("<"), fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
break;
|
1998-05-31 07:53:28 -04:00
|
|
|
case T('>'):
|
|
|
|
fputts(T(">"), fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
break;
|
1998-05-31 07:53:28 -04:00
|
|
|
case T('"'):
|
|
|
|
fputts(T("""), fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
break;
|
1998-02-09 20:44:29 -05:00
|
|
|
case 9:
|
|
|
|
case 10:
|
|
|
|
case 13:
|
1998-05-31 07:53:28 -04:00
|
|
|
ftprintf(fp, T("&#%d;"), *s);
|
1998-02-04 00:25:27 -05:00
|
|
|
break;
|
|
|
|
default:
|
1998-05-31 07:53:28 -04:00
|
|
|
puttc(*s, fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Lexicographically comparing UTF-8 encoded attribute values,
|
|
|
|
is equivalent to lexicographically comparing based on the character number. */
|
|
|
|
|
|
|
|
static int attcmp(const void *att1, const void *att2)
|
|
|
|
{
|
1998-05-31 07:53:28 -04:00
|
|
|
return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
|
1998-02-04 00:25:27 -05:00
|
|
|
}
|
|
|
|
|
1998-05-31 07:53:28 -04:00
|
|
|
static void startElement(void *userData, const XML_Char *name, const XML_Char **atts)
|
1998-02-04 00:25:27 -05:00
|
|
|
{
|
|
|
|
int nAtts;
|
1998-05-31 07:53:28 -04:00
|
|
|
const XML_Char **p;
|
1998-02-04 00:25:27 -05:00
|
|
|
FILE *fp = userData;
|
1998-05-31 07:53:28 -04:00
|
|
|
puttc(T('<'), fp);
|
|
|
|
fputts(name, fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
|
|
|
|
p = atts;
|
|
|
|
while (*p)
|
|
|
|
++p;
|
|
|
|
nAtts = (p - atts) >> 1;
|
|
|
|
if (nAtts > 1)
|
1998-05-31 07:53:28 -04:00
|
|
|
qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
|
1998-02-04 00:25:27 -05:00
|
|
|
while (*atts) {
|
1998-05-31 07:53:28 -04:00
|
|
|
puttc(T(' '), fp);
|
|
|
|
fputts(*atts++, fp);
|
|
|
|
puttc(T('='), fp);
|
|
|
|
puttc(T('"'), fp);
|
|
|
|
characterData(userData, *atts, tcslen(*atts));
|
|
|
|
puttc(T('"'), fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
atts++;
|
|
|
|
}
|
1998-05-31 07:53:28 -04:00
|
|
|
puttc(T('>'), fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
}
|
|
|
|
|
1998-05-31 07:53:28 -04:00
|
|
|
static void endElement(void *userData, const XML_Char *name)
|
1998-02-04 00:25:27 -05:00
|
|
|
{
|
|
|
|
FILE *fp = userData;
|
1998-05-31 07:53:28 -04:00
|
|
|
puttc(T('<'), fp);
|
|
|
|
puttc(T('/'), fp);
|
|
|
|
fputts(name, fp);
|
|
|
|
puttc(T('>'), fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
}
|
|
|
|
|
1998-08-26 05:10:50 -04:00
|
|
|
static void startElementNS(void *userData, const XML_Char *name, const XML_Char **atts)
|
|
|
|
{
|
|
|
|
int nAtts;
|
|
|
|
int nsi;
|
|
|
|
const XML_Char **p;
|
|
|
|
FILE *fp = userData;
|
|
|
|
const XML_Char *sep;
|
|
|
|
puttc(T('<'), fp);
|
|
|
|
|
|
|
|
sep = tcsrchr(name, NSSEP);
|
|
|
|
if (sep) {
|
|
|
|
fputts(T("ns0:"), fp);
|
|
|
|
fputts(sep + 1, fp);
|
|
|
|
fputts(T(" xmlns:ns0=\""), fp);
|
|
|
|
characterData(userData, name, sep - name);
|
|
|
|
puttc(T('"'), fp);
|
|
|
|
nsi = 1;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
fputts(name, fp);
|
|
|
|
nsi = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
p = atts;
|
|
|
|
while (*p)
|
|
|
|
++p;
|
|
|
|
nAtts = (p - atts) >> 1;
|
|
|
|
if (nAtts > 1)
|
|
|
|
qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
|
|
|
|
while (*atts) {
|
|
|
|
name = *atts++;
|
|
|
|
sep = tcsrchr(name, NSSEP);
|
|
|
|
if (sep) {
|
|
|
|
ftprintf(fp, T(" xmlns:ns%d=\""), nsi);
|
|
|
|
characterData(userData, name, sep - name);
|
|
|
|
puttc(T('"'), fp);
|
|
|
|
name = sep + 1;
|
|
|
|
ftprintf(fp, T(" ns%d:"), nsi++);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
puttc(T(' '), fp);
|
|
|
|
fputts(name, fp);
|
|
|
|
puttc(T('='), fp);
|
|
|
|
puttc(T('"'), fp);
|
|
|
|
characterData(userData, *atts, tcslen(*atts));
|
|
|
|
puttc(T('"'), fp);
|
|
|
|
atts++;
|
|
|
|
}
|
|
|
|
puttc(T('>'), fp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void endElementNS(void *userData, const XML_Char *name)
|
|
|
|
{
|
|
|
|
FILE *fp = userData;
|
|
|
|
const XML_Char *sep;
|
|
|
|
puttc(T('<'), fp);
|
|
|
|
puttc(T('/'), fp);
|
|
|
|
sep = tcsrchr(name, NSSEP);
|
|
|
|
if (sep) {
|
|
|
|
fputts(T("ns0:"), fp);
|
|
|
|
fputts(sep + 1, fp);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
fputts(name, fp);
|
|
|
|
puttc(T('>'), fp);
|
|
|
|
}
|
|
|
|
|
1998-05-31 07:53:28 -04:00
|
|
|
static void processingInstruction(void *userData, const XML_Char *target, const XML_Char *data)
|
1998-02-04 00:25:27 -05:00
|
|
|
{
|
|
|
|
FILE *fp = userData;
|
1998-05-31 07:53:28 -04:00
|
|
|
puttc(T('<'), fp);
|
|
|
|
puttc(T('?'), fp);
|
|
|
|
fputts(target, fp);
|
|
|
|
puttc(T(' '), fp);
|
|
|
|
fputts(data, fp);
|
|
|
|
puttc(T('?'), fp);
|
|
|
|
puttc(T('>'), fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
}
|
|
|
|
|
1998-06-21 02:53:13 -04:00
|
|
|
static void defaultCharacterData(XML_Parser parser, const XML_Char *s, int len)
|
1998-06-17 06:04:31 -04:00
|
|
|
{
|
1998-06-21 02:53:13 -04:00
|
|
|
XML_DefaultCurrent(parser);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void defaultStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts)
|
|
|
|
{
|
|
|
|
XML_DefaultCurrent(parser);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void defaultEndElement(XML_Parser parser, const XML_Char *name)
|
|
|
|
{
|
|
|
|
XML_DefaultCurrent(parser);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void defaultProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data)
|
|
|
|
{
|
|
|
|
XML_DefaultCurrent(parser);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void markup(XML_Parser parser, const XML_Char *s, int len)
|
|
|
|
{
|
|
|
|
FILE *fp = XML_GetUserData(parser);
|
1998-06-17 06:04:31 -04:00
|
|
|
for (; len > 0; --len, ++s)
|
1998-06-21 02:53:13 -04:00
|
|
|
puttc(*s, fp);
|
1998-06-17 06:04:31 -04:00
|
|
|
}
|
1998-06-01 00:33:10 -04:00
|
|
|
|
1998-06-03 06:16:20 -04:00
|
|
|
static
|
|
|
|
void metaLocation(XML_Parser parser)
|
1998-06-01 00:33:10 -04:00
|
|
|
{
|
1998-06-03 06:16:20 -04:00
|
|
|
const XML_Char *uri = XML_GetBase(parser);
|
|
|
|
if (uri)
|
|
|
|
ftprintf(XML_GetUserData(parser), T(" uri=\"%s\""), uri);
|
|
|
|
ftprintf(XML_GetUserData(parser),
|
|
|
|
T(" byte=\"%ld\" line=\"%d\" col=\"%d\""),
|
|
|
|
XML_GetCurrentByteIndex(parser),
|
|
|
|
XML_GetCurrentLineNumber(parser),
|
|
|
|
XML_GetCurrentColumnNumber(parser));
|
1998-06-01 00:33:10 -04:00
|
|
|
}
|
|
|
|
|
1998-06-03 06:16:20 -04:00
|
|
|
static
|
|
|
|
void metaStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts)
|
1998-06-01 00:33:10 -04:00
|
|
|
{
|
1998-06-03 06:16:20 -04:00
|
|
|
FILE *fp = XML_GetUserData(parser);
|
|
|
|
ftprintf(fp, T("<starttag name=\"%s\""), name);
|
|
|
|
metaLocation(parser);
|
|
|
|
if (*atts) {
|
|
|
|
fputts(T(">\n"), fp);
|
|
|
|
do {
|
1998-06-03 06:55:52 -04:00
|
|
|
ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
|
1998-06-03 06:16:20 -04:00
|
|
|
characterData(fp, atts[1], tcslen(atts[1]));
|
|
|
|
fputts(T("\"/>\n"), fp);
|
|
|
|
} while (*(atts += 2));
|
|
|
|
fputts(T("</starttag>\n"), fp);
|
1998-06-01 00:33:10 -04:00
|
|
|
}
|
|
|
|
else
|
1998-06-03 06:16:20 -04:00
|
|
|
fputts(T("/>\n"), fp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
void metaEndElement(XML_Parser parser, const XML_Char *name)
|
|
|
|
{
|
|
|
|
FILE *fp = XML_GetUserData(parser);
|
|
|
|
ftprintf(fp, T("<endtag name=\"%s\""), name);
|
|
|
|
metaLocation(parser);
|
|
|
|
fputts(T("/>\n"), fp);
|
1998-06-01 00:33:10 -04:00
|
|
|
}
|
|
|
|
|
1998-06-03 06:16:20 -04:00
|
|
|
static
|
|
|
|
void metaProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data)
|
|
|
|
{
|
|
|
|
FILE *fp = XML_GetUserData(parser);
|
|
|
|
ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
|
|
|
|
characterData(fp, data, tcslen(data));
|
|
|
|
puttc(T('"'), fp);
|
|
|
|
metaLocation(parser);
|
|
|
|
fputts(T("/>\n"), fp);
|
|
|
|
}
|
1998-06-01 00:33:10 -04:00
|
|
|
|
1998-09-24 00:12:12 -04:00
|
|
|
static
|
|
|
|
void metaComment(XML_Parser parser, const XML_Char *data)
|
|
|
|
{
|
|
|
|
FILE *fp = XML_GetUserData(parser);
|
|
|
|
fputts(T("<comment data=\""), fp);
|
|
|
|
characterData(fp, data, tcslen(data));
|
|
|
|
puttc(T('"'), fp);
|
|
|
|
metaLocation(parser);
|
|
|
|
fputts(T("/>\n"), fp);
|
|
|
|
}
|
|
|
|
|
1998-06-03 03:04:44 -04:00
|
|
|
static
|
1998-06-03 06:16:20 -04:00
|
|
|
void metaCharacterData(XML_Parser parser, const XML_Char *s, int len)
|
1998-06-03 03:04:44 -04:00
|
|
|
{
|
1998-06-03 06:16:20 -04:00
|
|
|
FILE *fp = XML_GetUserData(parser);
|
|
|
|
fputts(T("<chars str=\""), fp);
|
|
|
|
characterData(fp, s, len);
|
|
|
|
puttc(T('"'), fp);
|
|
|
|
metaLocation(parser);
|
|
|
|
fputts(T("/>\n"), fp);
|
1998-06-03 03:04:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static
|
1998-06-03 06:16:20 -04:00
|
|
|
void metaUnparsedEntityDecl(XML_Parser parser,
|
|
|
|
const XML_Char *entityName,
|
|
|
|
const XML_Char *base,
|
|
|
|
const XML_Char *systemId,
|
|
|
|
const XML_Char *publicId,
|
|
|
|
const XML_Char *notationName)
|
1998-06-03 03:04:44 -04:00
|
|
|
{
|
|
|
|
FILE *fp = XML_GetUserData(parser);
|
1998-06-03 06:16:20 -04:00
|
|
|
ftprintf(fp, T("<entity name=\"%s\""), entityName);
|
|
|
|
if (publicId)
|
|
|
|
ftprintf(fp, T(" public=\"%s\""), publicId);
|
|
|
|
fputts(T(" system=\""), fp);
|
|
|
|
characterData(fp, systemId, tcslen(systemId));
|
|
|
|
puttc(T('"'), fp);
|
|
|
|
ftprintf(fp, T(" notation=\"%s\""), notationName);
|
1998-06-03 03:04:44 -04:00
|
|
|
metaLocation(parser);
|
|
|
|
fputts(T("/>\n"), fp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
1998-06-03 06:16:20 -04:00
|
|
|
void metaNotationDecl(XML_Parser parser,
|
|
|
|
const XML_Char *notationName,
|
|
|
|
const XML_Char *base,
|
|
|
|
const XML_Char *systemId,
|
|
|
|
const XML_Char *publicId)
|
1998-06-03 03:04:44 -04:00
|
|
|
{
|
|
|
|
FILE *fp = XML_GetUserData(parser);
|
1998-06-03 06:16:20 -04:00
|
|
|
ftprintf(fp, T("<notation name=\"%s\""), notationName);
|
|
|
|
if (publicId)
|
|
|
|
ftprintf(fp, T(" public=\"%s\""), publicId);
|
|
|
|
if (systemId) {
|
|
|
|
fputts(T(" system=\""), fp);
|
|
|
|
characterData(fp, systemId, tcslen(systemId));
|
|
|
|
puttc(T('"'), fp);
|
|
|
|
}
|
1998-06-03 03:04:44 -04:00
|
|
|
metaLocation(parser);
|
|
|
|
fputts(T("/>\n"), fp);
|
|
|
|
}
|
|
|
|
|
1998-02-04 00:25:27 -05:00
|
|
|
typedef struct {
|
|
|
|
XML_Parser parser;
|
|
|
|
int *retPtr;
|
|
|
|
} PROCESS_ARGS;
|
|
|
|
|
|
|
|
static
|
1998-05-31 07:53:28 -04:00
|
|
|
void reportError(XML_Parser parser, const XML_Char *filename)
|
1998-02-04 00:25:27 -05:00
|
|
|
{
|
|
|
|
int code = XML_GetErrorCode(parser);
|
1998-05-31 07:53:28 -04:00
|
|
|
const XML_Char *message = XML_ErrorString(code);
|
1998-02-04 00:25:27 -05:00
|
|
|
if (message)
|
1998-05-31 07:53:28 -04:00
|
|
|
ftprintf(stdout, T("%s:%d:%ld: %s\n"),
|
|
|
|
filename,
|
|
|
|
XML_GetErrorLineNumber(parser),
|
|
|
|
XML_GetErrorColumnNumber(parser),
|
|
|
|
message);
|
1998-02-04 00:25:27 -05:00
|
|
|
else
|
1998-05-31 07:53:28 -04:00
|
|
|
ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
|
1998-02-04 00:25:27 -05:00
|
|
|
}
|
1997-12-10 02:44:19 -05:00
|
|
|
|
1997-11-11 00:52:10 -05:00
|
|
|
static
|
1998-05-31 07:53:28 -04:00
|
|
|
void processFile(const void *data, size_t size, const XML_Char *filename, void *args)
|
1997-11-11 00:52:10 -05:00
|
|
|
{
|
1998-02-04 00:25:27 -05:00
|
|
|
XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
|
|
|
|
int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
|
|
|
|
if (!XML_Parse(parser, data, size, 1)) {
|
|
|
|
reportError(parser, filename);
|
|
|
|
*retPtr = 0;
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|
1997-12-10 02:44:19 -05:00
|
|
|
else
|
1998-02-04 00:25:27 -05:00
|
|
|
*retPtr = 1;
|
|
|
|
}
|
|
|
|
|
1998-05-10 06:18:15 -04:00
|
|
|
static
|
1998-05-31 07:53:28 -04:00
|
|
|
int isAsciiLetter(XML_Char c)
|
1998-05-10 06:18:15 -04:00
|
|
|
{
|
1998-05-31 07:53:28 -04:00
|
|
|
return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
|
1998-05-10 06:18:15 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static
|
1998-05-31 07:53:28 -04:00
|
|
|
const XML_Char *resolveSystemId(const XML_Char *base, const XML_Char *systemId, XML_Char **toFree)
|
1998-05-10 06:18:15 -04:00
|
|
|
{
|
1998-05-31 07:53:28 -04:00
|
|
|
XML_Char *s;
|
1998-05-10 06:18:15 -04:00
|
|
|
*toFree = 0;
|
|
|
|
if (!base
|
1998-05-31 07:53:28 -04:00
|
|
|
|| *systemId == T('/')
|
1998-05-10 06:18:15 -04:00
|
|
|
#ifdef WIN32
|
1998-05-31 07:53:28 -04:00
|
|
|
|| *systemId == T('\\')
|
|
|
|
|| (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
|
1998-05-10 06:18:15 -04:00
|
|
|
#endif
|
|
|
|
)
|
|
|
|
return systemId;
|
1998-05-31 07:53:28 -04:00
|
|
|
*toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)*sizeof(XML_Char));
|
1998-05-10 06:18:15 -04:00
|
|
|
if (!*toFree)
|
|
|
|
return systemId;
|
1998-05-31 07:53:28 -04:00
|
|
|
tcscpy(*toFree, base);
|
1998-05-10 06:18:15 -04:00
|
|
|
s = *toFree;
|
1998-05-31 07:53:28 -04:00
|
|
|
if (tcsrchr(s, T('/')))
|
|
|
|
s = tcsrchr(s, T('/')) + 1;
|
1998-05-10 06:18:15 -04:00
|
|
|
#ifdef WIN32
|
1998-05-31 07:53:28 -04:00
|
|
|
if (tcsrchr(s, T('\\')))
|
|
|
|
s = tcsrchr(s, T('\\')) + 1;
|
1998-05-10 06:18:15 -04:00
|
|
|
#endif
|
1998-05-31 07:53:28 -04:00
|
|
|
tcscpy(s, systemId);
|
1998-05-10 06:18:15 -04:00
|
|
|
return *toFree;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
int externalEntityRefFilemap(XML_Parser parser,
|
1998-08-26 05:10:50 -04:00
|
|
|
const XML_Char *context,
|
1998-05-31 07:53:28 -04:00
|
|
|
const XML_Char *base,
|
|
|
|
const XML_Char *systemId,
|
|
|
|
const XML_Char *publicId)
|
1998-05-10 06:18:15 -04:00
|
|
|
{
|
|
|
|
int result;
|
1998-05-31 07:53:28 -04:00
|
|
|
XML_Char *s;
|
1998-06-03 03:04:44 -04:00
|
|
|
const XML_Char *filename;
|
1998-08-26 05:10:50 -04:00
|
|
|
XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
|
1998-05-10 06:18:15 -04:00
|
|
|
PROCESS_ARGS args;
|
|
|
|
args.retPtr = &result;
|
|
|
|
args.parser = entParser;
|
1998-06-03 03:04:44 -04:00
|
|
|
filename = resolveSystemId(base, systemId, &s);
|
|
|
|
XML_SetBase(entParser, filename);
|
|
|
|
if (!filemap(filename, processFile, &args))
|
1998-05-10 06:18:15 -04:00
|
|
|
result = 0;
|
|
|
|
free(s);
|
|
|
|
XML_ParserFree(entParser);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
1998-02-04 00:25:27 -05:00
|
|
|
static
|
1998-05-31 07:53:28 -04:00
|
|
|
int processStream(const XML_Char *filename, XML_Parser parser)
|
1998-02-04 00:25:27 -05:00
|
|
|
{
|
1998-05-31 07:53:28 -04:00
|
|
|
int fd = topen(filename, O_BINARY|O_RDONLY);
|
1998-02-04 00:25:27 -05:00
|
|
|
if (fd < 0) {
|
1998-05-31 07:53:28 -04:00
|
|
|
tperror(filename);
|
1998-02-04 00:25:27 -05:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
for (;;) {
|
|
|
|
int nread;
|
|
|
|
char *buf = XML_GetBuffer(parser, READ_SIZE);
|
|
|
|
if (!buf) {
|
|
|
|
close(fd);
|
1998-05-31 07:53:28 -04:00
|
|
|
ftprintf(stderr, T("%s: out of memory\n"), filename);
|
1998-02-04 00:25:27 -05:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
nread = read(fd, buf, READ_SIZE);
|
|
|
|
if (nread < 0) {
|
1998-05-31 07:53:28 -04:00
|
|
|
tperror(filename);
|
1998-02-04 00:25:27 -05:00
|
|
|
close(fd);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!XML_ParseBuffer(parser, nread, nread == 0)) {
|
|
|
|
reportError(parser, filename);
|
|
|
|
close(fd);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (nread == 0) {
|
|
|
|
close(fd);
|
|
|
|
break;;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
1998-05-10 06:18:15 -04:00
|
|
|
static
|
|
|
|
int externalEntityRefStream(XML_Parser parser,
|
1998-08-26 05:10:50 -04:00
|
|
|
const XML_Char *context,
|
1998-05-31 07:53:28 -04:00
|
|
|
const XML_Char *base,
|
|
|
|
const XML_Char *systemId,
|
|
|
|
const XML_Char *publicId)
|
1998-05-10 06:18:15 -04:00
|
|
|
{
|
1998-05-31 07:53:28 -04:00
|
|
|
XML_Char *s;
|
1998-06-03 03:04:44 -04:00
|
|
|
const XML_Char *filename;
|
|
|
|
int ret;
|
1998-08-26 05:10:50 -04:00
|
|
|
XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
|
1998-06-03 03:04:44 -04:00
|
|
|
filename = resolveSystemId(base, systemId, &s);
|
|
|
|
XML_SetBase(entParser, filename);
|
|
|
|
ret = processStream(filename, entParser);
|
1998-05-10 06:18:15 -04:00
|
|
|
free(s);
|
|
|
|
XML_ParserFree(entParser);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
1998-06-01 08:12:35 -04:00
|
|
|
static
|
1998-06-03 03:52:49 -04:00
|
|
|
int unknownEncodingConvert(void *data, const char *p)
|
1998-06-02 04:57:14 -04:00
|
|
|
{
|
|
|
|
return codepageConvert(*(int *)data, p);
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
int unknownEncoding(void *userData,
|
|
|
|
const XML_Char *name,
|
|
|
|
XML_Encoding *info)
|
1998-06-01 08:12:35 -04:00
|
|
|
{
|
|
|
|
int cp;
|
|
|
|
static const XML_Char prefixL[] = T("windows-");
|
|
|
|
static const XML_Char prefixU[] = T("WINDOWS-");
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; prefixU[i]; i++)
|
1998-06-02 04:57:14 -04:00
|
|
|
if (name[i] != prefixU[i] && name[i] != prefixL[i])
|
1998-06-01 08:12:35 -04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
cp = 0;
|
1998-06-02 04:57:14 -04:00
|
|
|
for (; name[i]; i++) {
|
1998-06-01 08:12:35 -04:00
|
|
|
static const XML_Char digits[] = T("0123456789");
|
1998-06-02 04:57:14 -04:00
|
|
|
const XML_Char *s = tcschr(digits, name[i]);
|
1998-06-01 08:12:35 -04:00
|
|
|
if (!s)
|
|
|
|
return 0;
|
|
|
|
cp *= 10;
|
|
|
|
cp += s - digits;
|
|
|
|
if (cp >= 0x10000)
|
|
|
|
return 0;
|
|
|
|
}
|
1998-06-02 04:57:14 -04:00
|
|
|
if (!codepageMap(cp, info->map))
|
|
|
|
return 0;
|
|
|
|
info->convert = unknownEncodingConvert;
|
|
|
|
/* We could just cast the code page integer to a void *,
|
|
|
|
and avoid the use of release. */
|
|
|
|
info->release = free;
|
|
|
|
info->data = malloc(sizeof(int));
|
|
|
|
if (!info->data)
|
|
|
|
return 0;
|
|
|
|
*(int *)info->data = cp;
|
|
|
|
return 1;
|
1998-06-01 08:12:35 -04:00
|
|
|
}
|
|
|
|
|
1998-02-04 00:25:27 -05:00
|
|
|
static
|
1998-05-31 07:53:28 -04:00
|
|
|
void usage(const XML_Char *prog)
|
1998-02-04 00:25:27 -05:00
|
|
|
{
|
1998-08-26 05:10:50 -04:00
|
|
|
ftprintf(stderr, T("usage: %s [-n] [-r] [-w] [-x] [-d output-dir] [-e encoding] file ...\n"), prog);
|
1998-02-04 00:25:27 -05:00
|
|
|
exit(1);
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|
|
|
|
|
1998-05-31 07:53:28 -04:00
|
|
|
int tmain(int argc, XML_Char **argv)
|
1997-11-11 00:52:10 -05:00
|
|
|
{
|
1998-02-04 00:25:27 -05:00
|
|
|
int i;
|
1998-05-31 07:53:28 -04:00
|
|
|
const XML_Char *outputDir = 0;
|
|
|
|
const XML_Char *encoding = 0;
|
1998-02-04 00:25:27 -05:00
|
|
|
int useFilemap = 1;
|
1998-05-10 06:18:15 -04:00
|
|
|
int processExternalEntities = 0;
|
1998-06-01 08:12:35 -04:00
|
|
|
int windowsCodePages = 0;
|
1998-06-17 06:04:31 -04:00
|
|
|
int outputType = 0;
|
1998-08-26 05:10:50 -04:00
|
|
|
int useNamespaces = 0;
|
1997-12-10 02:44:19 -05:00
|
|
|
|
1997-12-13 04:11:15 -05:00
|
|
|
#ifdef _MSC_VER
|
|
|
|
_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF);
|
|
|
|
#endif
|
1997-12-10 02:44:19 -05:00
|
|
|
|
1998-02-04 00:25:27 -05:00
|
|
|
i = 1;
|
1998-05-31 07:53:28 -04:00
|
|
|
while (i < argc && argv[i][0] == T('-')) {
|
1998-02-04 00:25:27 -05:00
|
|
|
int j;
|
1998-05-31 07:53:28 -04:00
|
|
|
if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
|
1998-02-04 00:25:27 -05:00
|
|
|
i++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
j = 1;
|
1998-05-31 07:53:28 -04:00
|
|
|
if (argv[i][j] == T('r')) {
|
1998-02-04 00:25:27 -05:00
|
|
|
useFilemap = 0;
|
|
|
|
j++;
|
|
|
|
}
|
1998-08-22 18:40:45 -04:00
|
|
|
if (argv[i][j] == T('n')) {
|
1998-08-26 05:10:50 -04:00
|
|
|
useNamespaces = 1;
|
|
|
|
outputType = 0;
|
1998-08-22 18:40:45 -04:00
|
|
|
j++;
|
|
|
|
}
|
1998-05-31 07:53:28 -04:00
|
|
|
if (argv[i][j] == T('x')) {
|
1998-05-10 06:18:15 -04:00
|
|
|
processExternalEntities = 1;
|
|
|
|
j++;
|
|
|
|
}
|
1998-06-01 08:12:35 -04:00
|
|
|
if (argv[i][j] == T('w')) {
|
|
|
|
windowsCodePages = 1;
|
|
|
|
j++;
|
|
|
|
}
|
1998-06-03 03:04:44 -04:00
|
|
|
if (argv[i][j] == T('m')) {
|
1998-06-17 06:04:31 -04:00
|
|
|
outputType = 'm';
|
1998-08-26 05:10:50 -04:00
|
|
|
useNamespaces = 0;
|
1998-06-17 06:04:31 -04:00
|
|
|
j++;
|
|
|
|
}
|
|
|
|
if (argv[i][j] == T('c')) {
|
|
|
|
outputType = 'c';
|
1998-08-26 05:10:50 -04:00
|
|
|
useNamespaces = 0;
|
1998-06-03 03:04:44 -04:00
|
|
|
j++;
|
|
|
|
}
|
1998-05-31 07:53:28 -04:00
|
|
|
if (argv[i][j] == T('d')) {
|
|
|
|
if (argv[i][j + 1] == T('\0')) {
|
1998-02-04 00:25:27 -05:00
|
|
|
if (++i == argc)
|
|
|
|
usage(argv[0]);
|
|
|
|
outputDir = argv[i];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
outputDir = argv[i] + j + 1;
|
|
|
|
i++;
|
|
|
|
}
|
1998-05-31 07:53:28 -04:00
|
|
|
else if (argv[i][j] == T('e')) {
|
|
|
|
if (argv[i][j + 1] == T('\0')) {
|
1998-02-06 23:53:44 -05:00
|
|
|
if (++i == argc)
|
|
|
|
usage(argv[0]);
|
|
|
|
encoding = argv[i];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
encoding = argv[i] + j + 1;
|
|
|
|
i++;
|
|
|
|
}
|
1998-05-31 07:53:28 -04:00
|
|
|
else if (argv[i][j] == T('\0') && j > 1)
|
1998-02-04 00:25:27 -05:00
|
|
|
i++;
|
|
|
|
else
|
|
|
|
usage(argv[0]);
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|
1998-02-04 00:25:27 -05:00
|
|
|
if (i == argc)
|
|
|
|
usage(argv[0]);
|
1997-12-10 02:44:19 -05:00
|
|
|
for (; i < argc; i++) {
|
1998-02-04 00:25:27 -05:00
|
|
|
FILE *fp = 0;
|
1998-05-31 07:53:28 -04:00
|
|
|
XML_Char *outName = 0;
|
1998-02-04 00:25:27 -05:00
|
|
|
int result;
|
1998-08-26 05:10:50 -04:00
|
|
|
XML_Parser parser;
|
|
|
|
if (useNamespaces)
|
|
|
|
parser = XML_ParserCreateNS(encoding, NSSEP);
|
|
|
|
else
|
|
|
|
parser = XML_ParserCreate(encoding);
|
1998-02-04 00:25:27 -05:00
|
|
|
if (outputDir) {
|
1998-05-31 07:53:28 -04:00
|
|
|
const XML_Char *file = argv[i];
|
|
|
|
if (tcsrchr(file, T('/')))
|
|
|
|
file = tcsrchr(file, T('/')) + 1;
|
1998-02-09 05:33:07 -05:00
|
|
|
#ifdef WIN32
|
1998-05-31 07:53:28 -04:00
|
|
|
if (tcsrchr(file, T('\\')))
|
|
|
|
file = tcsrchr(file, T('\\')) + 1;
|
1998-02-09 05:33:07 -05:00
|
|
|
#endif
|
1998-05-31 07:53:28 -04:00
|
|
|
outName = malloc((tcslen(outputDir) + tcslen(file) + 2) * sizeof(XML_Char));
|
|
|
|
tcscpy(outName, outputDir);
|
|
|
|
tcscat(outName, T("/"));
|
|
|
|
tcscat(outName, file);
|
|
|
|
fp = tfopen(outName, T("wb"));
|
1998-02-04 00:25:27 -05:00
|
|
|
if (!fp) {
|
1998-05-31 07:53:28 -04:00
|
|
|
tperror(outName);
|
1998-02-04 00:25:27 -05:00
|
|
|
exit(1);
|
|
|
|
}
|
1998-08-26 05:10:50 -04:00
|
|
|
setvbuf(fp, NULL, _IOFBF, 16384);
|
1998-05-31 07:53:28 -04:00
|
|
|
#ifdef XML_UNICODE
|
|
|
|
puttc(0xFEFF, fp);
|
|
|
|
#endif
|
1998-02-04 00:25:27 -05:00
|
|
|
XML_SetUserData(parser, fp);
|
1998-06-17 06:04:31 -04:00
|
|
|
switch (outputType) {
|
|
|
|
case 'm':
|
1998-06-03 03:04:44 -04:00
|
|
|
XML_UseParserAsHandlerArg(parser);
|
1998-06-03 06:16:20 -04:00
|
|
|
fputts(T("<document>\n"), fp);
|
1998-06-03 03:04:44 -04:00
|
|
|
XML_SetElementHandler(parser, metaStartElement, metaEndElement);
|
1998-06-03 06:16:20 -04:00
|
|
|
XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
|
1998-09-24 00:12:12 -04:00
|
|
|
XML_SetCommentHandler(parser, metaComment);
|
1998-06-03 06:16:20 -04:00
|
|
|
XML_SetCharacterDataHandler(parser, metaCharacterData);
|
|
|
|
XML_SetUnparsedEntityDeclHandler(parser, metaUnparsedEntityDecl);
|
|
|
|
XML_SetNotationDeclHandler(parser, metaNotationDecl);
|
1998-06-17 06:04:31 -04:00
|
|
|
break;
|
|
|
|
case 'c':
|
1998-06-21 02:53:13 -04:00
|
|
|
XML_UseParserAsHandlerArg(parser);
|
1998-06-17 06:04:31 -04:00
|
|
|
XML_SetDefaultHandler(parser, markup);
|
1998-06-21 02:53:13 -04:00
|
|
|
XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
|
|
|
|
XML_SetCharacterDataHandler(parser, defaultCharacterData);
|
|
|
|
XML_SetProcessingInstructionHandler(parser, defaultProcessingInstruction);
|
1998-06-17 06:04:31 -04:00
|
|
|
break;
|
|
|
|
default:
|
1998-08-26 05:10:50 -04:00
|
|
|
if (useNamespaces)
|
|
|
|
XML_SetElementHandler(parser, startElementNS, endElementNS);
|
|
|
|
else
|
|
|
|
XML_SetElementHandler(parser, startElement, endElement);
|
1998-06-03 03:04:44 -04:00
|
|
|
XML_SetCharacterDataHandler(parser, characterData);
|
|
|
|
XML_SetProcessingInstructionHandler(parser, processingInstruction);
|
1998-06-17 06:04:31 -04:00
|
|
|
break;
|
1998-06-03 03:04:44 -04:00
|
|
|
}
|
1998-06-01 08:12:35 -04:00
|
|
|
}
|
|
|
|
if (windowsCodePages)
|
1998-06-02 04:57:14 -04:00
|
|
|
XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
|
1998-06-03 07:29:18 -04:00
|
|
|
if (!XML_SetBase(parser, argv[i])) {
|
|
|
|
ftprintf(stderr, T("%s: out of memory"), argv[0]);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (processExternalEntities)
|
1998-05-10 06:18:15 -04:00
|
|
|
XML_SetExternalEntityRefHandler(parser,
|
|
|
|
useFilemap
|
|
|
|
? externalEntityRefFilemap
|
|
|
|
: externalEntityRefStream);
|
1998-02-04 00:25:27 -05:00
|
|
|
if (useFilemap) {
|
|
|
|
PROCESS_ARGS args;
|
|
|
|
args.retPtr = &result;
|
|
|
|
args.parser = parser;
|
|
|
|
if (!filemap(argv[i], processFile, &args))
|
|
|
|
result = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
result = processStream(argv[i], parser);
|
|
|
|
if (outputDir) {
|
1998-06-17 06:04:31 -04:00
|
|
|
if (outputType == 'm')
|
1998-06-03 06:16:20 -04:00
|
|
|
fputts(T("</document>\n"), fp);
|
1998-02-04 00:25:27 -05:00
|
|
|
fclose(fp);
|
|
|
|
if (!result)
|
1998-05-31 07:53:28 -04:00
|
|
|
tremove(outName);
|
1998-02-04 00:25:27 -05:00
|
|
|
free(outName);
|
|
|
|
}
|
|
|
|
XML_ParserFree(parser);
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|
1998-02-04 00:25:27 -05:00
|
|
|
return 0;
|
1997-11-11 00:52:10 -05:00
|
|
|
}
|