Added a test to make sure that whitespace in ENTITIES, IDREFS, and NMTOKENS

attributes is properly collapsed according to Section 3.3.3 of the spec.
This is the first even slightly complicated test; boy are these painful in
C!  Had to add a test of a helper routine as well; that just uses assertions
since the test framework should not be dealing with tests of the tester,
just of Expat.

Added a helper to make the failure messages more useful when Expat produces
an unexpected error code; we now include the error message and location from
Expat.  This is mostly useful when developing a new test.
This commit is contained in:
Fred L. Drake, Jr. 2001-11-16 20:19:39 +00:00
parent b1447b1ba3
commit 01012061bc

View File

@ -1,5 +1,7 @@
#include <assert.h>
#include <check.h>
#include <stdlib.h>
#include <stdio.h>
#include "expat.h"
@ -18,21 +20,34 @@ basic_setup(void)
static void
basic_teardown(void)
{
if (parser != NULL) {
if (parser != NULL)
XML_ParserFree(parser);
}
}
/* Generate a failure using the parser state to create an error message;
* this should be used when the parser reports and error we weren't
* expecting.
*/
static void
xml_failure(void)
{
char buffer[256];
sprintf(buffer, "%s (line %d, offset %d)",
XML_ErrorString(XML_GetErrorCode(parser)),
XML_GetCurrentLineNumber(parser),
XML_GetCurrentColumnNumber(parser));
fail(buffer);
}
START_TEST(test_nul_byte)
{
char *text = "<doc>\0</doc>";
char text[] = "<doc>\0</doc>";
/* test that a NUL byte (in US-ASCII data) is an error */
if (XML_Parse(parser, text, 12, 1))
if (XML_Parse(parser, text, sizeof(text) - 1, 1))
fail("Parser did not report error on NUL-byte.");
fail_unless(XML_GetErrorCode(parser) == XML_ERROR_INVALID_TOKEN,
"Got wrong error code for NUL-byte in US-ASCII encoding.");
if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
xml_failure();
}
END_TEST
@ -44,8 +59,8 @@ START_TEST(test_u0000_char)
/* test that a NUL byte (in US-ASCII data) is an error */
if (XML_Parse(parser, text, strlen(text), 1))
fail("Parser did not report error on NUL-byte.");
fail_unless(XML_GetErrorCode(parser) == XML_ERROR_BAD_CHAR_REF,
"Got wrong error code for &#0;.");
if (XML_GetErrorCode(parser) != XML_ERROR_BAD_CHAR_REF)
xml_failure();
}
END_TEST
@ -58,8 +73,8 @@ START_TEST(test_xmldecl_misplaced)
"<a>&eee;</a>";
if (!XML_Parse(parser, text, strlen(text), 1)) {
fail_unless(XML_GetErrorCode(parser) == XML_ERROR_MISPLACED_XML_PI,
"wrong error when XML declaration is misplaced");
if (XML_GetErrorCode(parser) != XML_ERROR_MISPLACED_XML_PI)
xml_failure();
}
else {
fail("expected XML_ERROR_MISPLACED_XML_PI with misplaced XML decl");
@ -73,7 +88,7 @@ START_TEST(test_bom_utf8)
char *text = "\357\273\277<e/>";
if (!XML_Parse(parser, text, strlen(text), 1))
fail("false error reported for UTF-8 BOM");
xml_failure();
}
END_TEST
@ -82,7 +97,7 @@ START_TEST(test_bom_utf16_be)
char text[] = "\376\377\0<\0e\0/\0>";
if (!XML_Parse(parser, text, sizeof(text) - 1, 1))
fail("false error reported for UTF-16-BE BOM");
xml_failure();
}
END_TEST
@ -91,15 +106,126 @@ START_TEST(test_bom_utf16_le)
char text[] = "\377\376<\0e\0/\0>\0";
if (!XML_Parse(parser, text, sizeof(text) - 1, 1))
fail("false error reported for UTF-16-LE BOM");
xml_failure();
}
END_TEST
/* Helpers used by the following test; this checks any "attr" and "refs"
* attributes to make sure whitespace has been normalized.
*/
/* Return true if whitespace has been normalized in a string, using
* the rules for attribute value normalization. The 'is_cdata' flag
* is needed since CDATA attributes don't need to have multiple
* whitespace characters collapsed to a single space, while other
* attribute data types do. (Section 3.3.3 of the recommendation.)
*/
static int
is_whitespace_normalized(const XML_Char *s, int is_cdata)
{
int blanks = 0;
int at_start = 1;
while (*s) {
if (*s == ' ')
++blanks;
else if (*s == '\t' || *s == '\n' || *s == '\r')
return 0;
else {
if (at_start) {
at_start = 0;
if (blanks && !is_cdata)
/* illegal leading blanks */
return 0;
}
else if (blanks > 1 && !is_cdata)
return 0;
blanks = 0;
}
++s;
}
if (blanks && !is_cdata)
return 0;
return 1;
}
/* Check the attribute whitespace checker: */
static void
testhelper_is_whitespace_normalized(void)
{
assert(is_whitespace_normalized("abc", 0));
assert(is_whitespace_normalized("abc", 1));
assert(is_whitespace_normalized("abc def ghi", 0));
assert(is_whitespace_normalized("abc def ghi", 1));
assert(!is_whitespace_normalized(" abc def ghi", 0));
assert(is_whitespace_normalized(" abc def ghi", 1));
assert(!is_whitespace_normalized("abc def ghi", 0));
assert(is_whitespace_normalized("abc def ghi", 1));
assert(!is_whitespace_normalized("abc def ghi ", 0));
assert(is_whitespace_normalized("abc def ghi ", 1));
assert(!is_whitespace_normalized(" ", 0));
assert(is_whitespace_normalized(" ", 1));
assert(!is_whitespace_normalized("\t", 0));
assert(!is_whitespace_normalized("\t", 1));
assert(!is_whitespace_normalized("\n", 0));
assert(!is_whitespace_normalized("\n", 1));
assert(!is_whitespace_normalized("\r", 0));
assert(!is_whitespace_normalized("\r", 1));
assert(!is_whitespace_normalized("abc\t def", 1));
}
static void
check_attr_contains_normalized_whitespace(void *userdata,
const XML_Char *name,
const XML_Char **atts)
{
int i;
for (i = 0; atts[i] != NULL; i += 2) {
const XML_Char *attrname = atts[i];
const XML_Char *value = atts[i + 1];
if (strcmp("attr", attrname) == 0
|| strcmp("ents", attrname) == 0
|| strcmp("refs", attrname) == 0) {
if (!is_whitespace_normalized(value, 0)) {
char buffer[256];
sprintf(buffer, "attribute value not normalized: %s='%s'",
attrname, value);
fail(buffer);
}
}
}
}
START_TEST(test_attr_whitespace_normalization)
{
char *text =
"<!DOCTYPE doc [\n"
" <!ATTLIST doc\n"
" attr NMTOKENS #REQUIRED\n"
" ents ENTITIES #REQUIRED\n"
" refs IDREFS #REQUIRED>\n"
"]>\n"
"<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
" ents=' ent-1 \t\r\n"
" ent-2 ' >\n"
" <e id='id-1'/>\n"
" <e id='id-2'/>\n"
"</doc>";
XML_SetStartElementHandler(parser,
check_attr_contains_normalized_whitespace);
if (!XML_Parse(parser, text, strlen(text), 1))
xml_failure();
}
END_TEST
static Suite *
make_basic_suite(void)
{
Suite *s = suite_create("basic");
TCase *tc_chars = tcase_create("character tests");
TCase *tc_attrs = tcase_create("attributes");
TCase *tc_xmldecl = tcase_create("XML declaration");
suite_add_tcase(s, tc_chars);
@ -110,6 +236,10 @@ make_basic_suite(void)
tcase_add_test(tc_chars, test_bom_utf16_be);
tcase_add_test(tc_chars, test_bom_utf16_le);
suite_add_tcase(s, tc_attrs);
tcase_add_checked_fixture(tc_attrs, basic_setup, basic_teardown);
tcase_add_test(tc_attrs, test_attr_whitespace_normalization);
suite_add_tcase(s, tc_xmldecl);
tcase_add_checked_fixture(tc_xmldecl, basic_setup, basic_teardown);
tcase_add_test(tc_xmldecl, test_xmldecl_misplaced);
@ -127,6 +257,9 @@ main(int argc, char *argv[])
Suite *s = make_basic_suite();
SRunner *sr = srunner_create(s);
/* run the tests for internal helper functions */
testhelper_is_whitespace_normalized();
for (i = 1; i < argc; ++i) {
char *opt = argv[i];
if (strcmp(opt, "-v") == 0 || strcmp(opt, "--verbose") == 0)