1999-06-24 05:39:06 -04:00
|
|
|
#define CHARSET_MAX 41
|
|
|
|
|
2002-07-01 11:13:02 -04:00
|
|
|
static const char *
|
|
|
|
getTok(const char **pp)
|
1999-06-24 05:39:06 -04:00
|
|
|
{
|
|
|
|
enum { inAtom, inString, init, inComment };
|
|
|
|
int state = init;
|
|
|
|
const char *tokStart = 0;
|
|
|
|
for (;;) {
|
|
|
|
switch (**pp) {
|
|
|
|
case '\0':
|
|
|
|
return 0;
|
|
|
|
case ' ':
|
|
|
|
case '\r':
|
|
|
|
case '\t':
|
|
|
|
case '\n':
|
|
|
|
if (state == inAtom)
|
2002-07-01 11:13:02 -04:00
|
|
|
return tokStart;
|
1999-06-24 05:39:06 -04:00
|
|
|
break;
|
|
|
|
case '(':
|
|
|
|
if (state == inAtom)
|
2002-07-01 11:13:02 -04:00
|
|
|
return tokStart;
|
1999-06-24 05:39:06 -04:00
|
|
|
if (state != inString)
|
2002-07-01 11:13:02 -04:00
|
|
|
state++;
|
1999-06-24 05:39:06 -04:00
|
|
|
break;
|
|
|
|
case ')':
|
|
|
|
if (state > init)
|
2002-07-01 11:13:02 -04:00
|
|
|
--state;
|
1999-06-24 05:39:06 -04:00
|
|
|
else if (state != inString)
|
2002-07-01 11:13:02 -04:00
|
|
|
return 0;
|
1999-06-24 05:39:06 -04:00
|
|
|
break;
|
|
|
|
case ';':
|
|
|
|
case '/':
|
|
|
|
case '=':
|
|
|
|
if (state == inAtom)
|
2002-07-01 11:13:02 -04:00
|
|
|
return tokStart;
|
1999-06-24 05:39:06 -04:00
|
|
|
if (state == init)
|
2002-07-01 11:13:02 -04:00
|
|
|
return (*pp)++;
|
1999-06-24 05:39:06 -04:00
|
|
|
break;
|
|
|
|
case '\\':
|
|
|
|
++*pp;
|
|
|
|
if (**pp == '\0')
|
2002-07-01 11:13:02 -04:00
|
|
|
return 0;
|
1999-06-24 05:39:06 -04:00
|
|
|
break;
|
|
|
|
case '"':
|
|
|
|
switch (state) {
|
|
|
|
case inString:
|
2002-07-01 11:13:02 -04:00
|
|
|
++*pp;
|
|
|
|
return tokStart;
|
1999-06-24 05:39:06 -04:00
|
|
|
case inAtom:
|
2002-07-01 11:13:02 -04:00
|
|
|
return tokStart;
|
1999-06-24 05:39:06 -04:00
|
|
|
case init:
|
2002-07-01 11:13:02 -04:00
|
|
|
tokStart = *pp;
|
|
|
|
state = inString;
|
|
|
|
break;
|
1999-06-24 05:39:06 -04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (state == init) {
|
2002-07-01 11:13:02 -04:00
|
|
|
tokStart = *pp;
|
|
|
|
state = inAtom;
|
1999-06-24 05:39:06 -04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
++*pp;
|
|
|
|
}
|
|
|
|
/* not reached */
|
|
|
|
}
|
|
|
|
|
2000-04-21 00:20:31 -04:00
|
|
|
/* key must be lowercase ASCII */
|
1999-06-24 05:39:06 -04:00
|
|
|
|
2002-07-01 11:13:02 -04:00
|
|
|
static int
|
|
|
|
matchkey(const char *start, const char *end, const char *key)
|
1999-06-24 05:39:06 -04:00
|
|
|
{
|
|
|
|
if (!start)
|
|
|
|
return 0;
|
|
|
|
for (; start != end; start++, key++)
|
|
|
|
if (*start != *key && *start != 'A' + (*key - 'a'))
|
|
|
|
return 0;
|
|
|
|
return *key == '\0';
|
|
|
|
}
|
|
|
|
|
2002-07-01 11:13:02 -04:00
|
|
|
void
|
|
|
|
getXMLCharset(const char *buf, char *charset)
|
1999-06-24 05:39:06 -04:00
|
|
|
{
|
|
|
|
const char *next, *p;
|
|
|
|
|
|
|
|
charset[0] = '\0';
|
|
|
|
next = buf;
|
|
|
|
p = getTok(&next);
|
|
|
|
if (matchkey(p, next, "text"))
|
|
|
|
strcpy(charset, "us-ascii");
|
|
|
|
else if (!matchkey(p, next, "application"))
|
|
|
|
return;
|
|
|
|
p = getTok(&next);
|
|
|
|
if (!p || *p != '/')
|
|
|
|
return;
|
|
|
|
p = getTok(&next);
|
|
|
|
if (matchkey(p, next, "xml"))
|
|
|
|
isXml = 1;
|
|
|
|
p = getTok(&next);
|
|
|
|
while (p) {
|
|
|
|
if (*p == ';') {
|
|
|
|
p = getTok(&next);
|
|
|
|
if (matchkey(p, next, "charset")) {
|
2002-07-01 11:13:02 -04:00
|
|
|
p = getTok(&next);
|
|
|
|
if (p && *p == '=') {
|
|
|
|
p = getTok(&next);
|
|
|
|
if (p) {
|
|
|
|
char *s = charset;
|
|
|
|
if (*p == '"') {
|
|
|
|
while (++p != next - 1) {
|
|
|
|
if (*p == '\\')
|
|
|
|
++p;
|
|
|
|
if (s == charset + CHARSET_MAX - 1) {
|
|
|
|
charset[0] = '\0';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
*s++ = *p;
|
|
|
|
}
|
|
|
|
*s++ = '\0';
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (next - p > CHARSET_MAX - 1)
|
|
|
|
break;
|
|
|
|
while (p != next)
|
|
|
|
*s++ = *p++;
|
|
|
|
*s = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
1999-06-24 05:39:06 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
p = getTok(&next);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-07-01 11:13:02 -04:00
|
|
|
int
|
|
|
|
main(int argc, char **argv)
|
1999-06-24 05:39:06 -04:00
|
|
|
{
|
|
|
|
char buf[CHARSET_MAX];
|
|
|
|
getXMLCharset(argv[1], buf);
|
|
|
|
printf("charset = \"%s\"\n", buf);
|
|
|
|
return 0;
|
|
|
|
}
|