2017-08-12 14:56:14 -04:00
|
|
|
/*
|
|
|
|
__ __ _
|
|
|
|
___\ \/ /_ __ __ _| |_
|
|
|
|
/ _ \\ /| '_ \ / _` | __|
|
|
|
|
| __// \| |_) | (_| | |_
|
|
|
|
\___/_/\_\ .__/ \__,_|\__|
|
|
|
|
|_| XML parser
|
|
|
|
|
|
|
|
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
|
2021-05-01 19:49:02 -04:00
|
|
|
Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
|
|
|
|
Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
|
|
|
|
Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
|
|
|
|
Copyright (c) 2005-2007 Steven Solie <ssolie@users.sourceforge.net>
|
|
|
|
Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
|
|
|
|
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
|
|
|
|
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
|
2017-08-12 14:56:14 -04:00
|
|
|
Licensed under the MIT license:
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
a copy of this software and associated documentation files (the
|
|
|
|
"Software"), to deal in the Software without restriction, including
|
|
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
distribute, sublicense, and/or sell copies of the Software, and to permit
|
|
|
|
persons to whom the Software is furnished to do so, subject to the
|
|
|
|
following conditions:
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be included
|
|
|
|
in all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
|
|
|
|
NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
|
|
|
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
|
|
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
|
|
USE OR OTHER DEALINGS IN THE SOFTWARE.
|
2000-04-21 00:20:31 -04:00
|
|
|
*/
|
1998-11-25 03:32:06 -05:00
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <fcntl.h>
|
2005-12-25 12:05:10 -05:00
|
|
|
|
2017-05-25 09:06:28 -04:00
|
|
|
#ifdef _WIN32
|
2019-08-03 14:34:54 -04:00
|
|
|
# include "winconfig.h"
|
2021-04-21 17:35:17 -04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <expat_config.h>
|
2005-12-25 12:05:10 -05:00
|
|
|
|
2000-09-18 12:26:23 -04:00
|
|
|
#include "expat.h"
|
2019-08-03 14:34:54 -04:00
|
|
|
#include "internal.h" /* for UNUSED_P only */
|
1998-11-25 03:32:06 -05:00
|
|
|
#include "xmlfile.h"
|
|
|
|
#include "xmltchar.h"
|
|
|
|
#include "filemap.h"
|
|
|
|
|
2017-04-30 19:13:40 -04:00
|
|
|
#if defined(_MSC_VER)
|
2019-08-03 14:34:54 -04:00
|
|
|
# include <io.h>
|
1998-11-25 03:32:06 -05:00
|
|
|
#endif
|
|
|
|
|
2002-06-12 22:52:09 -04:00
|
|
|
#ifdef HAVE_UNISTD_H
|
2019-08-03 14:34:54 -04:00
|
|
|
# include <unistd.h>
|
1998-11-25 03:32:06 -05:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef O_BINARY
|
2019-08-03 14:34:54 -04:00
|
|
|
# ifdef _O_BINARY
|
|
|
|
# define O_BINARY _O_BINARY
|
|
|
|
# else
|
|
|
|
# define O_BINARY 0
|
|
|
|
# endif
|
1998-11-25 03:32:06 -05:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef _DEBUG
|
2019-08-03 14:34:54 -04:00
|
|
|
# define READ_SIZE 16
|
1998-11-25 03:32:06 -05:00
|
|
|
#else
|
2019-08-03 14:34:54 -04:00
|
|
|
# define READ_SIZE (1024 * 8)
|
1998-11-25 03:32:06 -05:00
|
|
|
#endif
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
XML_Parser parser;
|
|
|
|
int *retPtr;
|
|
|
|
} PROCESS_ARGS;
|
|
|
|
|
2019-08-03 14:34:54 -04:00
|
|
|
static int processStream(const XML_Char *filename, XML_Parser parser);
|
2017-05-22 19:48:52 -04:00
|
|
|
|
2002-04-19 17:41:54 -04:00
|
|
|
static void
|
2019-08-03 14:34:54 -04:00
|
|
|
reportError(XML_Parser parser, const XML_Char *filename) {
|
2004-08-10 14:18:25 -04:00
|
|
|
enum XML_Error code = XML_GetErrorCode(parser);
|
1998-11-25 03:32:06 -05:00
|
|
|
const XML_Char *message = XML_ErrorString(code);
|
|
|
|
if (message)
|
2017-10-04 15:16:21 -04:00
|
|
|
ftprintf(stdout,
|
2019-08-03 14:34:54 -04:00
|
|
|
T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
|
|
|
|
T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
|
|
|
|
filename, XML_GetErrorLineNumber(parser),
|
|
|
|
XML_GetErrorColumnNumber(parser), message);
|
1998-11-25 03:32:06 -05:00
|
|
|
else
|
|
|
|
ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
|
|
|
|
}
|
2019-08-03 14:34:54 -04:00
|
|
|
|
2006-04-14 14:02:08 -04:00
|
|
|
/* This implementation will give problems on files larger than INT_MAX. */
|
2002-04-19 17:41:54 -04:00
|
|
|
static void
|
2019-08-03 14:34:54 -04:00
|
|
|
processFile(const void *data, size_t size, const XML_Char *filename,
|
|
|
|
void *args) {
|
1998-11-25 03:32:06 -05:00
|
|
|
XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
|
|
|
|
int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
|
2006-04-14 14:02:08 -04:00
|
|
|
if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
|
1998-11-25 03:32:06 -05:00
|
|
|
reportError(parser, filename);
|
|
|
|
*retPtr = 0;
|
2019-08-03 14:34:54 -04:00
|
|
|
} else
|
1998-11-25 03:32:06 -05:00
|
|
|
*retPtr = 1;
|
|
|
|
}
|
|
|
|
|
2017-05-25 09:06:28 -04:00
|
|
|
#if defined(_WIN32)
|
2000-05-09 23:13:58 -04:00
|
|
|
|
2002-04-19 17:41:54 -04:00
|
|
|
static int
|
2019-08-03 14:34:54 -04:00
|
|
|
isAsciiLetter(XML_Char c) {
|
1998-11-25 03:32:06 -05:00
|
|
|
return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
|
|
|
|
}
|
|
|
|
|
2017-05-25 09:06:28 -04:00
|
|
|
#endif /* _WIN32 */
|
2000-05-09 23:13:58 -04:00
|
|
|
|
2002-04-19 17:41:54 -04:00
|
|
|
static const XML_Char *
|
|
|
|
resolveSystemId(const XML_Char *base, const XML_Char *systemId,
|
2019-08-03 14:34:54 -04:00
|
|
|
XML_Char **toFree) {
|
1998-11-25 03:32:06 -05:00
|
|
|
XML_Char *s;
|
|
|
|
*toFree = 0;
|
2019-08-03 14:34:54 -04:00
|
|
|
if (! base || *systemId == T('/')
|
2017-05-25 09:06:28 -04:00
|
|
|
#if defined(_WIN32)
|
1998-11-25 03:32:06 -05:00
|
|
|
|| *systemId == T('\\')
|
|
|
|
|| (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
|
|
|
|
#endif
|
2019-08-03 14:34:54 -04:00
|
|
|
)
|
1998-11-25 03:32:06 -05:00
|
|
|
return systemId;
|
2002-04-19 17:41:54 -04:00
|
|
|
*toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
|
2002-07-01 11:13:02 -04:00
|
|
|
* sizeof(XML_Char));
|
2019-08-03 14:34:54 -04:00
|
|
|
if (! *toFree)
|
1998-11-25 03:32:06 -05:00
|
|
|
return systemId;
|
|
|
|
tcscpy(*toFree, base);
|
|
|
|
s = *toFree;
|
|
|
|
if (tcsrchr(s, T('/')))
|
|
|
|
s = tcsrchr(s, T('/')) + 1;
|
2017-05-25 09:06:28 -04:00
|
|
|
#if defined(_WIN32)
|
1998-11-25 03:32:06 -05:00
|
|
|
if (tcsrchr(s, T('\\')))
|
|
|
|
s = tcsrchr(s, T('\\')) + 1;
|
|
|
|
#endif
|
|
|
|
tcscpy(s, systemId);
|
|
|
|
return *toFree;
|
|
|
|
}
|
|
|
|
|
2002-04-19 17:41:54 -04:00
|
|
|
static int
|
2019-08-03 14:34:54 -04:00
|
|
|
externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
|
|
|
|
const XML_Char *base, const XML_Char *systemId,
|
2019-08-16 12:20:00 -04:00
|
|
|
const XML_Char *publicId) {
|
1998-11-25 03:32:06 -05:00
|
|
|
int result;
|
|
|
|
XML_Char *s;
|
|
|
|
const XML_Char *filename;
|
|
|
|
XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
|
2017-05-22 19:48:52 -04:00
|
|
|
int filemapRes;
|
1998-11-25 03:32:06 -05:00
|
|
|
PROCESS_ARGS args;
|
2019-08-16 12:20:00 -04:00
|
|
|
UNUSED_P(publicId);
|
1998-11-25 03:32:06 -05:00
|
|
|
args.retPtr = &result;
|
|
|
|
args.parser = entParser;
|
|
|
|
filename = resolveSystemId(base, systemId, &s);
|
|
|
|
XML_SetBase(entParser, filename);
|
2017-05-22 19:48:52 -04:00
|
|
|
filemapRes = filemap(filename, processFile, &args);
|
|
|
|
switch (filemapRes) {
|
|
|
|
case 0:
|
1998-11-25 03:32:06 -05:00
|
|
|
result = 0;
|
2017-05-22 19:48:52 -04:00
|
|
|
break;
|
|
|
|
case 2:
|
2019-08-03 14:34:54 -04:00
|
|
|
ftprintf(stderr,
|
|
|
|
T("%s: file too large for memory-mapping")
|
|
|
|
T(", switching to streaming\n"),
|
|
|
|
filename);
|
2017-05-22 19:48:52 -04:00
|
|
|
result = processStream(filename, entParser);
|
|
|
|
break;
|
|
|
|
}
|
1998-11-25 03:32:06 -05:00
|
|
|
free(s);
|
|
|
|
XML_ParserFree(entParser);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2002-04-19 17:41:54 -04:00
|
|
|
static int
|
2019-08-03 14:34:54 -04:00
|
|
|
processStream(const XML_Char *filename, XML_Parser parser) {
|
2019-11-04 07:52:31 -05:00
|
|
|
/* passing NULL for filename means read input from stdin */
|
2019-08-03 14:34:54 -04:00
|
|
|
int fd = 0; /* 0 is the fileno for stdin */
|
2002-04-19 17:41:54 -04:00
|
|
|
|
|
|
|
if (filename != NULL) {
|
2019-08-03 14:34:54 -04:00
|
|
|
fd = topen(filename, O_BINARY | O_RDONLY);
|
2002-04-19 17:41:54 -04:00
|
|
|
if (fd < 0) {
|
|
|
|
tperror(filename);
|
|
|
|
return 0;
|
|
|
|
}
|
1998-11-25 03:32:06 -05:00
|
|
|
}
|
|
|
|
for (;;) {
|
|
|
|
int nread;
|
2004-08-10 14:18:25 -04:00
|
|
|
char *buf = (char *)XML_GetBuffer(parser, READ_SIZE);
|
2019-08-03 14:34:54 -04:00
|
|
|
if (! buf) {
|
2002-04-19 17:41:54 -04:00
|
|
|
if (filename != NULL)
|
2002-07-01 11:13:02 -04:00
|
|
|
close(fd);
|
2002-04-19 17:41:54 -04:00
|
|
|
ftprintf(stderr, T("%s: out of memory\n"),
|
2017-08-25 12:26:57 -04:00
|
|
|
filename != NULL ? filename : T("xmlwf"));
|
1998-11-25 03:32:06 -05:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
nread = read(fd, buf, READ_SIZE);
|
|
|
|
if (nread < 0) {
|
2017-08-25 12:26:57 -04:00
|
|
|
tperror(filename != NULL ? filename : T("STDIN"));
|
2002-04-19 17:41:54 -04:00
|
|
|
if (filename != NULL)
|
2002-07-01 11:13:02 -04:00
|
|
|
close(fd);
|
1998-11-25 03:32:06 -05:00
|
|
|
return 0;
|
|
|
|
}
|
2002-08-02 15:40:09 -04:00
|
|
|
if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
|
2019-08-03 14:34:54 -04:00
|
|
|
reportError(parser, filename != NULL ? filename : T("STDIN"));
|
2002-04-19 17:41:54 -04:00
|
|
|
if (filename != NULL)
|
2002-07-01 11:13:02 -04:00
|
|
|
close(fd);
|
1998-11-25 03:32:06 -05:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (nread == 0) {
|
2002-04-19 17:41:54 -04:00
|
|
|
if (filename != NULL)
|
2002-07-01 11:13:02 -04:00
|
|
|
close(fd);
|
2019-08-03 14:34:54 -04:00
|
|
|
break;
|
|
|
|
;
|
1998-11-25 03:32:06 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2002-04-19 17:41:54 -04:00
|
|
|
static int
|
2019-08-03 14:34:54 -04:00
|
|
|
externalEntityRefStream(XML_Parser parser, const XML_Char *context,
|
|
|
|
const XML_Char *base, const XML_Char *systemId,
|
2019-08-16 12:20:00 -04:00
|
|
|
const XML_Char *publicId) {
|
1998-11-25 03:32:06 -05:00
|
|
|
XML_Char *s;
|
|
|
|
const XML_Char *filename;
|
|
|
|
int ret;
|
|
|
|
XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
|
2019-08-16 12:20:00 -04:00
|
|
|
UNUSED_P(publicId);
|
1998-11-25 03:32:06 -05:00
|
|
|
filename = resolveSystemId(base, systemId, &s);
|
|
|
|
XML_SetBase(entParser, filename);
|
|
|
|
ret = processStream(filename, entParser);
|
|
|
|
free(s);
|
|
|
|
XML_ParserFree(entParser);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2002-04-19 17:41:54 -04:00
|
|
|
int
|
2019-08-03 14:34:54 -04:00
|
|
|
XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
|
1998-11-25 03:32:06 -05:00
|
|
|
int result;
|
|
|
|
|
2019-08-03 14:34:54 -04:00
|
|
|
if (! XML_SetBase(parser, filename)) {
|
1998-11-25 03:32:06 -05:00
|
|
|
ftprintf(stderr, T("%s: out of memory"), filename);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags & XML_EXTERNAL_ENTITIES)
|
2019-08-03 14:34:54 -04:00
|
|
|
XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
|
|
|
|
? externalEntityRefFilemap
|
|
|
|
: externalEntityRefStream);
|
1998-11-25 03:32:06 -05:00
|
|
|
if (flags & XML_MAP_FILE) {
|
2017-05-22 19:48:52 -04:00
|
|
|
int filemapRes;
|
1998-11-25 03:32:06 -05:00
|
|
|
PROCESS_ARGS args;
|
|
|
|
args.retPtr = &result;
|
|
|
|
args.parser = parser;
|
2017-05-22 19:48:52 -04:00
|
|
|
filemapRes = filemap(filename, processFile, &args);
|
|
|
|
switch (filemapRes) {
|
|
|
|
case 0:
|
1998-11-25 03:32:06 -05:00
|
|
|
result = 0;
|
2017-05-22 19:48:52 -04:00
|
|
|
break;
|
|
|
|
case 2:
|
2019-08-03 14:34:54 -04:00
|
|
|
ftprintf(stderr,
|
|
|
|
T("%s: file too large for memory-mapping")
|
|
|
|
T(", switching to streaming\n"),
|
|
|
|
filename);
|
2017-05-22 19:48:52 -04:00
|
|
|
result = processStream(filename, parser);
|
|
|
|
break;
|
|
|
|
}
|
2019-08-03 14:34:54 -04:00
|
|
|
} else
|
1998-11-25 03:32:06 -05:00
|
|
|
result = processStream(filename, parser);
|
|
|
|
return result;
|
|
|
|
}
|