Added a test to make sure that whitespace in ENTITIES, IDREFS, and NMTOKENS

attributes is properly collapsed according to Section 3.3.3 of the spec. This is the first even slightly complicated test; boy are these painful in C! Had to add a test of a helper routine as well; that just uses assertions since the test framework should not be dealing with tests of the tester, just of Expat. Added a helper to make the failure messages more useful when Expat produces an unexpected error code; we now include the error message and location from Expat. This is mostly useful when developing a new test.
2001-11-16 20:19:39 +00:00 · 2001-11-16 20:19:39 +00:00 · 01012061bc
commit 01012061bc
parent b1447b1ba3
1 changed files with 146 additions and 13 deletions
--- a/expat/tests/runtests.c
+++ b/expat/tests/runtests.c
@ -1,5 +1,7 @@
+#include <assert.h>
 #include <check.h>
 #include <stdlib.h>
+#include <stdio.h>

 #include "expat.h"

@ -18,21 +20,34 @@ basic_setup(void)
 static void
 basic_teardown(void)
 {
-    if (parser != NULL) {
+    if (parser != NULL)
        XML_ParserFree(parser);
-    }
 }

+/* Generate a failure using the parser state to create an error message;
+ * this should be used when the parser reports and error we weren't
+ * expecting.
+ */
+static void
+xml_failure(void)
+{
+    char buffer[256];
+    sprintf(buffer, "%s (line %d, offset %d)",
+            XML_ErrorString(XML_GetErrorCode(parser)),
+            XML_GetCurrentLineNumber(parser),
+            XML_GetCurrentColumnNumber(parser));
+    fail(buffer);
+}

 START_TEST(test_nul_byte)
 {
-    char *text = "<doc>\0</doc>";
+    char text[] = "<doc>\0</doc>";

    /* test that a NUL byte (in US-ASCII data) is an error */
-    if (XML_Parse(parser, text, 12, 1))
+    if (XML_Parse(parser, text, sizeof(text) - 1, 1))
        fail("Parser did not report error on NUL-byte.");
-    fail_unless(XML_GetErrorCode(parser) == XML_ERROR_INVALID_TOKEN,
-                "Got wrong error code for NUL-byte in US-ASCII encoding.");
+    if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
+        xml_failure();
 }
 END_TEST

@ -44,8 +59,8 @@ START_TEST(test_u0000_char)
    /* test that a NUL byte (in US-ASCII data) is an error */
    if (XML_Parse(parser, text, strlen(text), 1))
        fail("Parser did not report error on NUL-byte.");
-    fail_unless(XML_GetErrorCode(parser) == XML_ERROR_BAD_CHAR_REF,
-                "Got wrong error code for &#0;.");
+    if (XML_GetErrorCode(parser) != XML_ERROR_BAD_CHAR_REF)
+        xml_failure();
 }
 END_TEST

@ -58,8 +73,8 @@ START_TEST(test_xmldecl_misplaced)
        "<a>&eee;</a>";

    if (!XML_Parse(parser, text, strlen(text), 1)) {
-        fail_unless(XML_GetErrorCode(parser) == XML_ERROR_MISPLACED_XML_PI,
-                    "wrong error when XML declaration is misplaced");
+        if (XML_GetErrorCode(parser) != XML_ERROR_MISPLACED_XML_PI)
+            xml_failure();
    }
    else {
        fail("expected XML_ERROR_MISPLACED_XML_PI with misplaced XML decl");
@ -73,7 +88,7 @@ START_TEST(test_bom_utf8)
    char *text = "\357\273\277<e/>";

    if (!XML_Parse(parser, text, strlen(text), 1))
-        fail("false error reported for UTF-8 BOM");
+        xml_failure();
 }
 END_TEST

@ -82,7 +97,7 @@ START_TEST(test_bom_utf16_be)
    char text[] = "\376\377\0<\0e\0/\0>";

    if (!XML_Parse(parser, text, sizeof(text) - 1, 1))
-        fail("false error reported for UTF-16-BE BOM");
+        xml_failure();
 }
 END_TEST

@ -91,15 +106,126 @@ START_TEST(test_bom_utf16_le)
    char text[] = "\377\376<\0e\0/\0>\0";

    if (!XML_Parse(parser, text, sizeof(text) - 1, 1))
-        fail("false error reported for UTF-16-LE BOM");
+        xml_failure();
 }
 END_TEST

+
+/* Helpers used by the following test; this checks any "attr" and "refs"
+ * attributes to make sure whitespace has been normalized.
+ */
+
+/* Return true if whitespace has been normalized in a string, using
+ * the rules for attribute value normalization.  The 'is_cdata' flag
+ * is needed since CDATA attributes don't need to have multiple
+ * whitespace characters collapsed to a single space, while other
+ * attribute data types do.  (Section 3.3.3 of the recommendation.)
+ */
+static int
+is_whitespace_normalized(const XML_Char *s, int is_cdata)
+{
+    int blanks = 0;
+    int at_start = 1;
+    while (*s) {
+        if (*s == ' ')
+            ++blanks;
+        else if (*s == '\t' || *s == '\n' || *s == '\r')
+            return 0;
+        else {
+            if (at_start) {
+                at_start = 0;
+                if (blanks && !is_cdata)
+                    /* illegal leading blanks */
+                    return 0;
+            }
+            else if (blanks > 1 && !is_cdata)
+                return 0;
+            blanks = 0;
+        }
+        ++s;
+    }
+    if (blanks && !is_cdata)
+        return 0;
+    return 1;
+}
+
+/* Check the attribute whitespace checker: */
+static void
+testhelper_is_whitespace_normalized(void)
+{
+    assert(is_whitespace_normalized("abc", 0));
+    assert(is_whitespace_normalized("abc", 1));
+    assert(is_whitespace_normalized("abc def ghi", 0));
+    assert(is_whitespace_normalized("abc def ghi", 1));
+    assert(!is_whitespace_normalized(" abc def ghi", 0));
+    assert(is_whitespace_normalized(" abc def ghi", 1));
+    assert(!is_whitespace_normalized("abc  def ghi", 0));
+    assert(is_whitespace_normalized("abc  def ghi", 1));
+    assert(!is_whitespace_normalized("abc def ghi ", 0));
+    assert(is_whitespace_normalized("abc def ghi ", 1));
+    assert(!is_whitespace_normalized(" ", 0));
+    assert(is_whitespace_normalized(" ", 1));
+    assert(!is_whitespace_normalized("\t", 0));
+    assert(!is_whitespace_normalized("\t", 1));
+    assert(!is_whitespace_normalized("\n", 0));
+    assert(!is_whitespace_normalized("\n", 1));
+    assert(!is_whitespace_normalized("\r", 0));
+    assert(!is_whitespace_normalized("\r", 1));
+    assert(!is_whitespace_normalized("abc\t def", 1));
+}
+
+static void
+check_attr_contains_normalized_whitespace(void *userdata,
+                                          const XML_Char *name,
+                                          const XML_Char **atts)
+{
+    int i;
+    for (i = 0; atts[i] != NULL; i += 2) {
+        const XML_Char *attrname = atts[i];
+        const XML_Char *value = atts[i + 1];
+        if (strcmp("attr", attrname) == 0
+            || strcmp("ents", attrname) == 0
+            || strcmp("refs", attrname) == 0) {
+            if (!is_whitespace_normalized(value, 0)) {
+                char buffer[256];
+                sprintf(buffer, "attribute value not normalized: %s='%s'",
+                        attrname, value);
+                fail(buffer);
+            }
+        }
+    }
+}
+
+START_TEST(test_attr_whitespace_normalization)
+{
+    char *text =
+        "<!DOCTYPE doc [\n"
+        "  <!ATTLIST doc\n"
+        "            attr NMTOKENS #REQUIRED\n"
+        "            ents ENTITIES #REQUIRED\n"
+        "            refs IDREFS   #REQUIRED>\n"
+        "]>\n"
+        "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
+        "     ents=' ent-1   \t\r\n"
+        "            ent-2  ' >\n"
+        "  <e id='id-1'/>\n"
+        "  <e id='id-2'/>\n"
+        "</doc>";
+
+    XML_SetStartElementHandler(parser,
+                               check_attr_contains_normalized_whitespace);
+    if (!XML_Parse(parser, text, strlen(text), 1))
+        xml_failure();
+}
+END_TEST
+
+
 static Suite *
 make_basic_suite(void)
 {
    Suite *s = suite_create("basic");
    TCase *tc_chars = tcase_create("character tests");
+    TCase *tc_attrs = tcase_create("attributes");
    TCase *tc_xmldecl = tcase_create("XML declaration");

    suite_add_tcase(s, tc_chars);
@ -110,6 +236,10 @@ make_basic_suite(void)
    tcase_add_test(tc_chars, test_bom_utf16_be);
    tcase_add_test(tc_chars, test_bom_utf16_le);

+    suite_add_tcase(s, tc_attrs);
+    tcase_add_checked_fixture(tc_attrs, basic_setup, basic_teardown);
+    tcase_add_test(tc_attrs, test_attr_whitespace_normalization);
+
    suite_add_tcase(s, tc_xmldecl);
    tcase_add_checked_fixture(tc_xmldecl, basic_setup, basic_teardown);
    tcase_add_test(tc_xmldecl, test_xmldecl_misplaced);
@ -127,6 +257,9 @@ main(int argc, char *argv[])
    Suite *s = make_basic_suite();
    SRunner *sr = srunner_create(s);

+    /* run the tests for internal helper functions */
+    testhelper_is_whitespace_normalized();
+
    for (i = 1; i < argc; ++i) {
        char *opt = argv[i];
        if (strcmp(opt, "-v") == 0 || strcmp(opt, "--verbose") == 0)