libexpat/expat/xmlwf/xmlwf.c

531 lines
12 KiB
C
Raw Normal View History

1998-04-05 11:11:29 -04:00
/*
The contents of this file are subject to the Mozilla Public License
Version 1.0 (the "License"); you may not use this file except in
compliance with the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS"
basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific language governing rights and limitations
under the License.
The Original Code is expat.
The Initial Developer of the Original Code is James Clark.
Portions created by James Clark are Copyright (C) 1998
James Clark. All Rights Reserved.
Contributor(s):
*/
1998-02-04 00:25:27 -05:00
#include "xmlparse.h"
#include "filemap.h"
#include "codepage.h"
1998-02-04 00:25:27 -05:00
1997-11-11 00:52:10 -05:00
#include <stdio.h>
1998-02-04 00:25:27 -05:00
#include <stdlib.h>
1998-02-04 02:20:46 -05:00
#include <stddef.h>
1997-12-10 02:44:19 -05:00
#include <string.h>
1998-02-04 00:25:27 -05:00
#include <fcntl.h>
#ifdef _MSC_VER
#include <io.h>
#endif
1998-04-10 02:30:05 -04:00
#ifdef _POSIX_SOURCE
#include <unistd.h>
#endif
1998-02-04 00:25:27 -05:00
#ifndef O_BINARY
#ifdef _O_BINARY
#define O_BINARY _O_BINARY
#else
#define O_BINARY 0
#endif
#endif
#ifdef _MSC_VER
#include <crtdbg.h>
#endif
1997-11-11 00:52:10 -05:00
1998-02-04 00:25:27 -05:00
#ifdef _DEBUG
#define READ_SIZE 16
#else
#define READ_SIZE (1024*8)
#endif
1998-05-31 07:53:28 -04:00
#ifdef XML_UNICODE
#define T(x) L ## x
#define ftprintf fwprintf
#define stscanf swscanf
1998-05-31 07:53:28 -04:00
#define tfopen _wfopen
#define fputts fputws
#define puttc putwc
#define tcscmp wcscmp
#define tcscpy wcscpy
#define tcscat wcscat
#define tcschr wcschr
#define tcsrchr wcsrchr
#define tcslen wcslen
#define tperror _wperror
#define topen _wopen
#define tmain wmain
#define tremove _wremove
#else /* not XML_UNICODE */
#define T(x) x
#define ftprintf fprintf
#define stscanf sscanf
1998-05-31 07:53:28 -04:00
#define tfopen fopen
#define fputts fputs
#define puttc putc
#define tcscmp strcmp
#define tcscpy strcpy
#define tcscat strcat
#define tcschr strchr
#define tcsrchr strrchr
#define tcslen strlen
#define tperror perror
#define topen open
#define tmain main
#define tremove remove
#endif /* not XML_UNICODE */
static void characterData(void *userData, const XML_Char *s, int len)
1998-02-04 00:25:27 -05:00
{
FILE *fp = userData;
for (; len > 0; --len, ++s) {
switch (*s) {
1998-05-31 07:53:28 -04:00
case T('&'):
fputts(T("&amp;"), fp);
1998-02-04 00:25:27 -05:00
break;
1998-05-31 07:53:28 -04:00
case T('<'):
fputts(T("&lt;"), fp);
1998-02-04 00:25:27 -05:00
break;
1998-05-31 07:53:28 -04:00
case T('>'):
fputts(T("&gt;"), fp);
1998-02-04 00:25:27 -05:00
break;
1998-05-31 07:53:28 -04:00
case T('"'):
fputts(T("&quot;"), fp);
1998-02-04 00:25:27 -05:00
break;
case 9:
case 10:
case 13:
1998-05-31 07:53:28 -04:00
ftprintf(fp, T("&#%d;"), *s);
1998-02-04 00:25:27 -05:00
break;
default:
1998-05-31 07:53:28 -04:00
puttc(*s, fp);
1998-02-04 00:25:27 -05:00
break;
}
}
}
/* Lexicographically comparing UTF-8 encoded attribute values,
is equivalent to lexicographically comparing based on the character number. */
static int attcmp(const void *att1, const void *att2)
{
1998-05-31 07:53:28 -04:00
return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
1998-02-04 00:25:27 -05:00
}
1998-05-31 07:53:28 -04:00
static void startElement(void *userData, const XML_Char *name, const XML_Char **atts)
1998-02-04 00:25:27 -05:00
{
int nAtts;
1998-05-31 07:53:28 -04:00
const XML_Char **p;
1998-02-04 00:25:27 -05:00
FILE *fp = userData;
1998-05-31 07:53:28 -04:00
puttc(T('<'), fp);
fputts(name, fp);
1998-02-04 00:25:27 -05:00
p = atts;
while (*p)
++p;
nAtts = (p - atts) >> 1;
if (nAtts > 1)
1998-05-31 07:53:28 -04:00
qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
1998-02-04 00:25:27 -05:00
while (*atts) {
1998-05-31 07:53:28 -04:00
puttc(T(' '), fp);
fputts(*atts++, fp);
puttc(T('='), fp);
puttc(T('"'), fp);
characterData(userData, *atts, tcslen(*atts));
puttc(T('"'), fp);
1998-02-04 00:25:27 -05:00
atts++;
}
1998-05-31 07:53:28 -04:00
puttc(T('>'), fp);
1998-02-04 00:25:27 -05:00
}
1998-05-31 07:53:28 -04:00
static void endElement(void *userData, const XML_Char *name)
1998-02-04 00:25:27 -05:00
{
FILE *fp = userData;
1998-05-31 07:53:28 -04:00
puttc(T('<'), fp);
puttc(T('/'), fp);
fputts(name, fp);
puttc(T('>'), fp);
1998-02-04 00:25:27 -05:00
}
1998-05-31 07:53:28 -04:00
static void processingInstruction(void *userData, const XML_Char *target, const XML_Char *data)
1998-02-04 00:25:27 -05:00
{
FILE *fp = userData;
1998-05-31 07:53:28 -04:00
puttc(T('<'), fp);
puttc(T('?'), fp);
fputts(target, fp);
puttc(T(' '), fp);
fputts(data, fp);
puttc(T('?'), fp);
puttc(T('>'), fp);
1998-02-04 00:25:27 -05:00
}
#ifdef DEBUG_UNPARSED_ENTITIES
static void unparsedEntityDecl(void *userData,
const XML_Char *entityName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName)
{
FILE *fp = userData;
XML_Char lit = tcschr(systemId, T('"')) ? '\'' : '"';
fputts(T("<!ENTITY "), fp);
fputts(entityName, fp);
if (publicId) {
fputts(T(" PUBLIC \""), fp);
fputts(publicId, fp);
puttc(T('"'), fp);
puttc(T(' '), fp);
}
else
fputts(T(" SYSTEM "), fp);
puttc(lit, fp);
fputts(systemId, fp);
puttc(lit, fp);
fputts(T(" NDATA "), fp);
fputts(notationName, fp);
puttc(T('>'), fp);
}
static void notationDecl(void *userData,
const XML_Char *notationName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId)
{
FILE *fp = userData;
fputts(T("<!NOTATION "), fp);
fputts(notationName, fp);
if (publicId) {
fputts(T(" PUBLIC \""), fp);
fputts(publicId, fp);
puttc(T('"'), fp);
}
else
fputts(T(" SYSTEM"), fp);
if (systemId) {
XML_Char lit = tcschr(systemId, T('"')) ? '\'' : '"';
puttc(T(' '), fp);
puttc(lit, fp);
fputts(systemId, fp);
puttc(lit, fp);
}
puttc(T('>'), fp);
}
#endif /* DEBUG_UNPARSED_ENTITIES */
1998-02-04 00:25:27 -05:00
typedef struct {
XML_Parser parser;
int *retPtr;
} PROCESS_ARGS;
static
1998-05-31 07:53:28 -04:00
void reportError(XML_Parser parser, const XML_Char *filename)
1998-02-04 00:25:27 -05:00
{
int code = XML_GetErrorCode(parser);
1998-05-31 07:53:28 -04:00
const XML_Char *message = XML_ErrorString(code);
1998-02-04 00:25:27 -05:00
if (message)
1998-05-31 07:53:28 -04:00
ftprintf(stdout, T("%s:%d:%ld: %s\n"),
filename,
XML_GetErrorLineNumber(parser),
XML_GetErrorColumnNumber(parser),
message);
1998-02-04 00:25:27 -05:00
else
1998-05-31 07:53:28 -04:00
ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
1998-02-04 00:25:27 -05:00
}
1997-12-10 02:44:19 -05:00
1997-11-11 00:52:10 -05:00
static
1998-05-31 07:53:28 -04:00
void processFile(const void *data, size_t size, const XML_Char *filename, void *args)
1997-11-11 00:52:10 -05:00
{
1998-02-04 00:25:27 -05:00
XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
if (!XML_Parse(parser, data, size, 1)) {
reportError(parser, filename);
*retPtr = 0;
1997-11-11 00:52:10 -05:00
}
1997-12-10 02:44:19 -05:00
else
1998-02-04 00:25:27 -05:00
*retPtr = 1;
}
1998-05-10 06:18:15 -04:00
static
1998-05-31 07:53:28 -04:00
int isAsciiLetter(XML_Char c)
1998-05-10 06:18:15 -04:00
{
1998-05-31 07:53:28 -04:00
return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
1998-05-10 06:18:15 -04:00
}
static
1998-05-31 07:53:28 -04:00
const XML_Char *resolveSystemId(const XML_Char *base, const XML_Char *systemId, XML_Char **toFree)
1998-05-10 06:18:15 -04:00
{
1998-05-31 07:53:28 -04:00
XML_Char *s;
1998-05-10 06:18:15 -04:00
*toFree = 0;
if (!base
1998-05-31 07:53:28 -04:00
|| *systemId == T('/')
1998-05-10 06:18:15 -04:00
#ifdef WIN32
1998-05-31 07:53:28 -04:00
|| *systemId == T('\\')
|| (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
1998-05-10 06:18:15 -04:00
#endif
)
return systemId;
1998-05-31 07:53:28 -04:00
*toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)*sizeof(XML_Char));
1998-05-10 06:18:15 -04:00
if (!*toFree)
return systemId;
1998-05-31 07:53:28 -04:00
tcscpy(*toFree, base);
1998-05-10 06:18:15 -04:00
s = *toFree;
1998-05-31 07:53:28 -04:00
if (tcsrchr(s, T('/')))
s = tcsrchr(s, T('/')) + 1;
1998-05-10 06:18:15 -04:00
#ifdef WIN32
1998-05-31 07:53:28 -04:00
if (tcsrchr(s, T('\\')))
s = tcsrchr(s, T('\\')) + 1;
1998-05-10 06:18:15 -04:00
#endif
1998-05-31 07:53:28 -04:00
tcscpy(s, systemId);
1998-05-10 06:18:15 -04:00
return *toFree;
}
static
int externalEntityRefFilemap(XML_Parser parser,
1998-05-31 07:53:28 -04:00
const XML_Char *openEntityNames,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId)
1998-05-10 06:18:15 -04:00
{
int result;
1998-05-31 07:53:28 -04:00
XML_Char *s;
1998-05-10 06:18:15 -04:00
XML_Parser entParser = XML_ExternalEntityParserCreate(parser, openEntityNames, 0);
PROCESS_ARGS args;
args.retPtr = &result;
args.parser = entParser;
if (!filemap(resolveSystemId(base, systemId, &s), processFile, &args))
result = 0;
free(s);
XML_ParserFree(entParser);
return result;
}
1998-02-04 00:25:27 -05:00
static
1998-05-31 07:53:28 -04:00
int processStream(const XML_Char *filename, XML_Parser parser)
1998-02-04 00:25:27 -05:00
{
1998-05-31 07:53:28 -04:00
int fd = topen(filename, O_BINARY|O_RDONLY);
1998-02-04 00:25:27 -05:00
if (fd < 0) {
1998-05-31 07:53:28 -04:00
tperror(filename);
1998-02-04 00:25:27 -05:00
return 0;
}
for (;;) {
int nread;
char *buf = XML_GetBuffer(parser, READ_SIZE);
if (!buf) {
close(fd);
1998-05-31 07:53:28 -04:00
ftprintf(stderr, T("%s: out of memory\n"), filename);
1998-02-04 00:25:27 -05:00
return 0;
}
nread = read(fd, buf, READ_SIZE);
if (nread < 0) {
1998-05-31 07:53:28 -04:00
tperror(filename);
1998-02-04 00:25:27 -05:00
close(fd);
return 0;
}
if (!XML_ParseBuffer(parser, nread, nread == 0)) {
reportError(parser, filename);
close(fd);
return 0;
}
if (nread == 0) {
close(fd);
break;;
}
}
return 1;
}
1998-05-10 06:18:15 -04:00
static
int externalEntityRefStream(XML_Parser parser,
1998-05-31 07:53:28 -04:00
const XML_Char *openEntityNames,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId)
1998-05-10 06:18:15 -04:00
{
1998-05-31 07:53:28 -04:00
XML_Char *s;
1998-05-10 06:18:15 -04:00
XML_Parser entParser = XML_ExternalEntityParserCreate(parser, openEntityNames, 0);
int ret = processStream(resolveSystemId(base, systemId, &s), entParser);
free(s);
XML_ParserFree(entParser);
return ret;
}
static
int singleByteEncoding(void *userData,
const XML_Char *encoding,
unsigned short *table)
{
int cp;
static const XML_Char prefixL[] = T("windows-");
static const XML_Char prefixU[] = T("WINDOWS-");
int i;
for (i = 0; prefixU[i]; i++)
if (encoding[i] != prefixU[i] && encoding[i] != prefixL[i])
return 0;
cp = 0;
for (; encoding[i]; i++) {
static const XML_Char digits[] = T("0123456789");
const XML_Char *s = tcschr(digits, encoding[i]);
if (!s)
return 0;
cp *= 10;
cp += s - digits;
if (cp >= 0x10000)
return 0;
}
return codepage(cp, table);
}
1998-02-04 00:25:27 -05:00
static
1998-05-31 07:53:28 -04:00
void usage(const XML_Char *prog)
1998-02-04 00:25:27 -05:00
{
ftprintf(stderr, T("usage: %s [-r] [-w] [-x] [-d output-dir] [-e encoding] file ...\n"), prog);
1998-02-04 00:25:27 -05:00
exit(1);
1997-11-11 00:52:10 -05:00
}
1998-05-31 07:53:28 -04:00
int tmain(int argc, XML_Char **argv)
1997-11-11 00:52:10 -05:00
{
1998-02-04 00:25:27 -05:00
int i;
1998-05-31 07:53:28 -04:00
const XML_Char *outputDir = 0;
const XML_Char *encoding = 0;
1998-02-04 00:25:27 -05:00
int useFilemap = 1;
1998-05-10 06:18:15 -04:00
int processExternalEntities = 0;
int windowsCodePages = 0;
1997-12-10 02:44:19 -05:00
#ifdef _MSC_VER
_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF);
#endif
1997-12-10 02:44:19 -05:00
1998-02-04 00:25:27 -05:00
i = 1;
1998-05-31 07:53:28 -04:00
while (i < argc && argv[i][0] == T('-')) {
1998-02-04 00:25:27 -05:00
int j;
1998-05-31 07:53:28 -04:00
if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
1998-02-04 00:25:27 -05:00
i++;
break;
}
j = 1;
1998-05-31 07:53:28 -04:00
if (argv[i][j] == T('r')) {
1998-02-04 00:25:27 -05:00
useFilemap = 0;
j++;
}
1998-05-31 07:53:28 -04:00
if (argv[i][j] == T('x')) {
1998-05-10 06:18:15 -04:00
processExternalEntities = 1;
j++;
}
if (argv[i][j] == T('w')) {
windowsCodePages = 1;
j++;
}
1998-05-31 07:53:28 -04:00
if (argv[i][j] == T('d')) {
if (argv[i][j + 1] == T('\0')) {
1998-02-04 00:25:27 -05:00
if (++i == argc)
usage(argv[0]);
outputDir = argv[i];
}
else
outputDir = argv[i] + j + 1;
i++;
}
1998-05-31 07:53:28 -04:00
else if (argv[i][j] == T('e')) {
if (argv[i][j + 1] == T('\0')) {
if (++i == argc)
usage(argv[0]);
encoding = argv[i];
}
else
encoding = argv[i] + j + 1;
i++;
}
1998-05-31 07:53:28 -04:00
else if (argv[i][j] == T('\0') && j > 1)
1998-02-04 00:25:27 -05:00
i++;
else
usage(argv[0]);
1997-11-11 00:52:10 -05:00
}
1998-02-04 00:25:27 -05:00
if (i == argc)
usage(argv[0]);
1997-12-10 02:44:19 -05:00
for (; i < argc; i++) {
1998-02-04 00:25:27 -05:00
FILE *fp = 0;
1998-05-31 07:53:28 -04:00
XML_Char *outName = 0;
1998-02-04 00:25:27 -05:00
int result;
XML_Parser parser = XML_ParserCreate(encoding);
1998-02-04 00:25:27 -05:00
if (outputDir) {
1998-05-31 07:53:28 -04:00
const XML_Char *file = argv[i];
if (tcsrchr(file, T('/')))
file = tcsrchr(file, T('/')) + 1;
#ifdef WIN32
1998-05-31 07:53:28 -04:00
if (tcsrchr(file, T('\\')))
file = tcsrchr(file, T('\\')) + 1;
#endif
1998-05-31 07:53:28 -04:00
outName = malloc((tcslen(outputDir) + tcslen(file) + 2) * sizeof(XML_Char));
tcscpy(outName, outputDir);
tcscat(outName, T("/"));
tcscat(outName, file);
fp = tfopen(outName, T("wb"));
1998-02-04 00:25:27 -05:00
if (!fp) {
1998-05-31 07:53:28 -04:00
tperror(outName);
1998-02-04 00:25:27 -05:00
exit(1);
}
1998-05-31 07:53:28 -04:00
#ifdef XML_UNICODE
puttc(0xFEFF, fp);
#endif
1998-02-04 00:25:27 -05:00
XML_SetUserData(parser, fp);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser, characterData);
XML_SetProcessingInstructionHandler(parser, processingInstruction);
#ifdef DEBUG_UNPARSED_ENTITIES
XML_SetUnparsedEntityDeclHandler(parser, unparsedEntityDecl);
XML_SetNotationDeclHandler(parser, notationDecl);
#endif
}
if (windowsCodePages)
XML_SetSingleByteEncodingHandler(parser, singleByteEncoding);
1998-05-10 06:18:15 -04:00
if (processExternalEntities) {
if (!XML_SetBase(parser, argv[i])) {
1998-05-31 07:53:28 -04:00
ftprintf(stderr, T("%s: out of memory"), argv[0]);
1998-05-10 06:18:15 -04:00
exit(1);
}
XML_SetExternalEntityRefHandler(parser,
useFilemap
? externalEntityRefFilemap
: externalEntityRefStream);
}
1998-02-04 00:25:27 -05:00
if (useFilemap) {
PROCESS_ARGS args;
args.retPtr = &result;
args.parser = parser;
if (!filemap(argv[i], processFile, &args))
result = 0;
}
else
result = processStream(argv[i], parser);
if (outputDir) {
fclose(fp);
if (!result)
1998-05-31 07:53:28 -04:00
tremove(outName);
1998-02-04 00:25:27 -05:00
free(outName);
}
XML_ParserFree(parser);
1997-11-11 00:52:10 -05:00
}
1998-02-04 00:25:27 -05:00
return 0;
1997-11-11 00:52:10 -05:00
}