541 lines
17 KiB
C
541 lines
17 KiB
C
/* $Id: genperf.c 1959 2007-09-18 05:38:06Z peter $
|
|
*
|
|
* Generate Minimal Perfect Hash (genperf)
|
|
*
|
|
* Copyright (C) 2006-2007 Peter Johnson
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
#include <stdarg.h>
|
|
#include <string.h>
|
|
#include "tools/genperf/perfect.h"
|
|
#include "libyasm/compat-queue.h"
|
|
#include "libyasm/coretype.h"
|
|
#include "libyasm/errwarn.h"
|
|
|
|
typedef STAILQ_HEAD(slist, sval) slist;
|
|
typedef struct sval {
|
|
STAILQ_ENTRY(sval) link;
|
|
char *str;
|
|
} sval;
|
|
|
|
typedef STAILQ_HEAD(keyword_list, keyword) keyword_list;
|
|
typedef struct keyword {
|
|
STAILQ_ENTRY(keyword) link;
|
|
char *name;
|
|
char *args;
|
|
unsigned int line;
|
|
} keyword;
|
|
|
|
static unsigned int cur_line = 1;
|
|
static int errors = 0;
|
|
|
|
static void
|
|
report_error(const char *fmt, ...)
|
|
{
|
|
va_list ap;
|
|
|
|
fprintf(stderr, "%u: ", cur_line);
|
|
va_start(ap, fmt);
|
|
vfprintf(stderr, fmt, ap);
|
|
va_end(ap);
|
|
fputc('\n', stderr);
|
|
errors++;
|
|
}
|
|
|
|
void
|
|
yasm__fatal(const char *message, ...)
|
|
{
|
|
abort();
|
|
}
|
|
|
|
/* make the c output for the perfect hash tab array */
|
|
static void
|
|
make_c_tab(
|
|
FILE *f,
|
|
bstuff *tab, /* table indexed by b */
|
|
ub4 smax, /* range of scramble[] */
|
|
ub4 blen, /* b in 0..blen-1, power of 2 */
|
|
ub4 *scramble) /* used in final hash */
|
|
{
|
|
ub4 i;
|
|
/* table for the mapping for the perfect hash */
|
|
if (blen >= USE_SCRAMBLE) {
|
|
/* A way to make the 1-byte values in tab bigger */
|
|
if (smax > UB2MAXVAL+1) {
|
|
fprintf(f, " static const unsigned long scramble[] = {\n");
|
|
for (i=0; i<=UB1MAXVAL; i+=4)
|
|
fprintf(f, " 0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n",
|
|
scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]);
|
|
} else {
|
|
fprintf(f, " static const unsigned short scramble[] = {\n");
|
|
for (i=0; i<=UB1MAXVAL; i+=8)
|
|
fprintf(f,
|
|
" 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n",
|
|
scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3],
|
|
scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]);
|
|
}
|
|
fprintf(f, " };\n");
|
|
fprintf(f, "\n");
|
|
}
|
|
|
|
if (blen > 0) {
|
|
/* small adjustments to _a_ to make values distinct */
|
|
if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
|
|
fprintf(f, " static const unsigned char ");
|
|
else
|
|
fprintf(f, " static const unsigned short ");
|
|
fprintf(f, "tab[] = {\n");
|
|
|
|
if (blen < 16) {
|
|
for (i=0; i<blen; ++i)
|
|
fprintf(f, "%3ld,", scramble[tab[i].val_b]);
|
|
} else if (blen <= 1024) {
|
|
for (i=0; i<blen; i+=16)
|
|
fprintf(f, " %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
|
|
scramble[tab[i+0].val_b], scramble[tab[i+1].val_b],
|
|
scramble[tab[i+2].val_b], scramble[tab[i+3].val_b],
|
|
scramble[tab[i+4].val_b], scramble[tab[i+5].val_b],
|
|
scramble[tab[i+6].val_b], scramble[tab[i+7].val_b],
|
|
scramble[tab[i+8].val_b], scramble[tab[i+9].val_b],
|
|
scramble[tab[i+10].val_b], scramble[tab[i+11].val_b],
|
|
scramble[tab[i+12].val_b], scramble[tab[i+13].val_b],
|
|
scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]);
|
|
} else if (blen < USE_SCRAMBLE) {
|
|
for (i=0; i<blen; i+=8)
|
|
fprintf(f, " %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
|
|
scramble[tab[i+0].val_b], scramble[tab[i+1].val_b],
|
|
scramble[tab[i+2].val_b], scramble[tab[i+3].val_b],
|
|
scramble[tab[i+4].val_b], scramble[tab[i+5].val_b],
|
|
scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]);
|
|
} else {
|
|
for (i=0; i<blen; i+=16)
|
|
fprintf(f, " %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,\n",
|
|
tab[i+0].val_b, tab[i+1].val_b,
|
|
tab[i+2].val_b, tab[i+3].val_b,
|
|
tab[i+4].val_b, tab[i+5].val_b,
|
|
tab[i+6].val_b, tab[i+7].val_b,
|
|
tab[i+8].val_b, tab[i+9].val_b,
|
|
tab[i+10].val_b, tab[i+11].val_b,
|
|
tab[i+12].val_b, tab[i+13].val_b,
|
|
tab[i+14].val_b, tab[i+15].val_b);
|
|
}
|
|
fprintf(f, " };\n");
|
|
fprintf(f, "\n");
|
|
}
|
|
}
|
|
|
|
static void
|
|
perfect_gen(FILE *out, const char *lookup_function_name,
|
|
const char *struct_name, keyword_list *kws,
|
|
const char *filename)
|
|
{
|
|
ub4 nkeys;
|
|
key *keys;
|
|
hashform form;
|
|
bstuff *tab; /* table indexed by b */
|
|
hstuff *tabh; /* table indexed by hash value */
|
|
ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */
|
|
ub4 alen; /* a in 0..alen-1, a power of 2 */
|
|
ub4 blen; /* b in 0..blen-1, a power of 2 */
|
|
ub4 salt; /* a parameter to the hash function */
|
|
gencode final; /* code for final hash */
|
|
ub4 i;
|
|
ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */
|
|
char buf[10][80]; /* buffer for generated code */
|
|
char *buf2[10]; /* also for generated code */
|
|
keyword *kw;
|
|
|
|
/* perfect hash configuration */
|
|
form.mode = NORMAL_HM;
|
|
form.hashtype = STRING_HT;
|
|
form.perfect = MINIMAL_HP;
|
|
form.speed = SLOW_HS;
|
|
|
|
/* set up code for final hash */
|
|
final.line = buf2;
|
|
final.used = 0;
|
|
final.len = 10;
|
|
for (i=0; i<10; i++)
|
|
final.line[i] = buf[i];
|
|
|
|
/* build list of keys */
|
|
nkeys = 0;
|
|
keys = NULL;
|
|
STAILQ_FOREACH(kw, kws, link) {
|
|
key *k = yasm_xmalloc(sizeof(key));
|
|
|
|
k->name_k = yasm__xstrdup(kw->name);
|
|
k->len_k = (ub4)strlen(kw->name);
|
|
k->next_k = keys;
|
|
keys = k;
|
|
nkeys++;
|
|
}
|
|
|
|
/* find the hash */
|
|
findhash(&tab, &tabh, &alen, &blen, &salt, &final,
|
|
scramble, &smax, keys, nkeys, &form);
|
|
|
|
/* The hash function beginning */
|
|
fprintf(out, "static const struct %s *\n", struct_name);
|
|
fprintf(out, "%s(const char *key, size_t len)\n", lookup_function_name);
|
|
fprintf(out, "{\n");
|
|
|
|
/* output the dir table: this should loop up to smax for NORMAL_HP,
|
|
* or up to pakd.nkeys for MINIMAL_HP.
|
|
*/
|
|
fprintf(out, " static const struct %s pd[%lu] = {\n", struct_name, nkeys);
|
|
for (i=0; i<nkeys; i++) {
|
|
if (tabh[i].key_h) {
|
|
STAILQ_FOREACH(kw, kws, link) {
|
|
if (strcmp(kw->name, tabh[i].key_h->name_k) == 0)
|
|
break;
|
|
}
|
|
if (!kw) {
|
|
report_error("internal error: could not find `%s'",
|
|
tabh[i].key_h->name_k);
|
|
break;
|
|
}
|
|
fprintf(out, "#line %u \"%s\"\n", kw->line, filename);
|
|
fprintf(out, " {\"%s\"%s}", kw->name, kw->args);
|
|
} else
|
|
fprintf(out, " { NULL }");
|
|
|
|
if (i < nkeys-1)
|
|
fprintf(out, ",");
|
|
fprintf(out, "\n");
|
|
}
|
|
fprintf(out, " };\n");
|
|
|
|
/* output the hash tab[] array */
|
|
make_c_tab(out, tab, smax, blen, scramble);
|
|
|
|
/* The hash function body */
|
|
fprintf(out, " const struct %s *ret;\n", struct_name);
|
|
for (i=0; i<final.used; ++i)
|
|
fprintf(out, final.line[i]);
|
|
fprintf(out, " if (rsl >= %lu) return NULL;\n", nkeys);
|
|
fprintf(out, " ret = &pd[rsl];\n");
|
|
fprintf(out, " if (strcmp(key, ret->name) != 0) return NULL;\n");
|
|
fprintf(out, " return ret;\n");
|
|
fprintf(out, "}\n");
|
|
fprintf(out, "\n");
|
|
|
|
free(tab);
|
|
free(tabh);
|
|
}
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
FILE *in, *out;
|
|
size_t i;
|
|
char *ch;
|
|
static char line[1024], tmp[1024];
|
|
static char struct_name[128] = "";
|
|
static char lookup_function_name[128] = "in_word_set";
|
|
static char language[16] = "";
|
|
static char delimiters[16] = ",\r\n";
|
|
static char name[128];
|
|
static char filename[768];
|
|
int need_struct = 0;
|
|
int have_struct = 0;
|
|
int go_keywords = 0;
|
|
int ignore_case = 0;
|
|
int compare_strncmp = 0;
|
|
int readonly_tables = 0;
|
|
slist usercode, usercode2;
|
|
keyword_list keywords;
|
|
sval *sv;
|
|
keyword *kw;
|
|
|
|
if (argc != 3) {
|
|
fprintf(stderr, "Usage: genperf <in> <out>\n");
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
in = fopen(argv[1], "rt");
|
|
if (!in) {
|
|
fprintf(stderr, "Could not open `%s' for reading\n", argv[1]);
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
ch = argv[1];
|
|
i = 0;
|
|
while (*ch && i < 767) {
|
|
if (*ch == '\\') {
|
|
filename[i++] = '/';
|
|
ch++;
|
|
} else
|
|
filename[i++] = *ch++;
|
|
}
|
|
filename[i] = '\0';
|
|
|
|
STAILQ_INIT(&usercode);
|
|
STAILQ_INIT(&usercode2);
|
|
STAILQ_INIT(&keywords);
|
|
|
|
/* Parse declarations section */
|
|
while (fgets(line, 1024, in)) {
|
|
/* Comments start with # as the first thing on a line */
|
|
if (line[0] == '#') {
|
|
cur_line++;
|
|
continue;
|
|
}
|
|
|
|
/* Handle structure declaration */
|
|
if (strncmp(line, "struct", 6) == 0) {
|
|
int braces;
|
|
|
|
if (!need_struct) {
|
|
report_error("struct without %%struct-type declaration");
|
|
return EXIT_FAILURE;
|
|
}
|
|
if (have_struct) {
|
|
report_error("more than one struct declaration");
|
|
return EXIT_FAILURE;
|
|
}
|
|
have_struct = 1;
|
|
|
|
/* copy struct name */
|
|
ch = &line[6];
|
|
while (isspace(*ch))
|
|
ch++;
|
|
i = 0;
|
|
while ((isalnum(*ch) || *ch == '_') && i < 127)
|
|
struct_name[i++] = *ch++;
|
|
if (i == 127) {
|
|
report_error("struct name too long");
|
|
return EXIT_FAILURE;
|
|
}
|
|
struct_name[i] = '\0';
|
|
|
|
sv = yasm_xmalloc(sizeof(sval));
|
|
sprintf(tmp, "#line %u \"%s\"\n", cur_line, filename);
|
|
sv->str = yasm__xstrdup(tmp);
|
|
STAILQ_INSERT_TAIL(&usercode, sv, link);
|
|
|
|
braces = 0;
|
|
do {
|
|
/* count braces to determine when we're done */
|
|
ch = line;
|
|
while (*ch != '\0') {
|
|
if (*ch == '{')
|
|
braces++;
|
|
if (*ch == '}')
|
|
braces--;
|
|
ch++;
|
|
}
|
|
sv = yasm_xmalloc(sizeof(sval));
|
|
sv->str = yasm__xstrdup(line);
|
|
STAILQ_INSERT_TAIL(&usercode, sv, link);
|
|
cur_line++;
|
|
if (braces <= 0)
|
|
break;
|
|
} while (fgets(line, 1024, in));
|
|
cur_line++;
|
|
continue;
|
|
}
|
|
|
|
/* Ignore non-declaration lines */
|
|
if (line[0] != '%') {
|
|
cur_line++;
|
|
continue;
|
|
}
|
|
|
|
/* %% terminates declarations section */
|
|
if (line[1] == '%') {
|
|
if (need_struct && !have_struct) {
|
|
report_error("%%struct-type declaration, but no struct found");
|
|
return EXIT_FAILURE;
|
|
}
|
|
go_keywords = 1;
|
|
break; /* move on to keywords section */
|
|
}
|
|
|
|
/* %{ begins a verbatim code section that ends with %} */
|
|
if (line[1] == '{') {
|
|
sv = yasm_xmalloc(sizeof(sval));
|
|
sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename);
|
|
sv->str = yasm__xstrdup(tmp);
|
|
STAILQ_INSERT_TAIL(&usercode, sv, link);
|
|
|
|
while (fgets(line, 1024, in)) {
|
|
cur_line++;
|
|
if (line[0] == '%' && line[1] == '}')
|
|
break;
|
|
sv = yasm_xmalloc(sizeof(sval));
|
|
sv->str = yasm__xstrdup(line);
|
|
STAILQ_INSERT_TAIL(&usercode, sv, link);
|
|
}
|
|
cur_line++;
|
|
continue;
|
|
}
|
|
|
|
if (strncmp(&line[1], "ignore-case", 11) == 0) {
|
|
ignore_case = 1;
|
|
} else if (strncmp(&line[1], "compare-strncmp", 15) == 0) {
|
|
compare_strncmp = 1;
|
|
} else if (strncmp(&line[1], "readonly-tables", 15) == 0) {
|
|
readonly_tables = 1;
|
|
} else if (strncmp(&line[1], "language=", 9) == 0) {
|
|
ch = &line[10];
|
|
i = 0;
|
|
while (*ch != '\n' && i<15)
|
|
language[i++] = *ch++;
|
|
language[i] = '\0';
|
|
} else if (strncmp(&line[1], "delimiters=", 11) == 0) {
|
|
ch = &line[12];
|
|
i = 0;
|
|
while (i<15)
|
|
delimiters[i++] = *ch++;
|
|
delimiters[i] = '\0';
|
|
} else if (strncmp(&line[1], "enum", 4) == 0) {
|
|
/* unused */
|
|
} else if (strncmp(&line[1], "struct-type", 11) == 0) {
|
|
need_struct = 1;
|
|
} else if (strncmp(&line[1], "define", 6) == 0) {
|
|
/* Several different defines we need to handle */
|
|
ch = &line[7];
|
|
while (isspace(*ch))
|
|
ch++;
|
|
|
|
if (strncmp(ch, "hash-function-name", 18) == 0) {
|
|
/* unused */
|
|
} else if (strncmp(ch, "lookup-function-name", 20) == 0) {
|
|
ch = &line[7+20+1];
|
|
while (isspace(*ch))
|
|
ch++;
|
|
i = 0;
|
|
while ((isalnum(*ch) || *ch == '_') && i < 127)
|
|
lookup_function_name[i++] = *ch++;
|
|
if (i == 127) {
|
|
report_error("struct name too long");
|
|
return EXIT_FAILURE;
|
|
}
|
|
lookup_function_name[i] = '\0';
|
|
} else {
|
|
fprintf(stderr, "%u: unrecognized define `%s'\n", cur_line,
|
|
line);
|
|
}
|
|
} else {
|
|
fprintf(stderr, "%u: unrecognized declaration `%s'\n", cur_line,
|
|
line);
|
|
}
|
|
|
|
cur_line++;
|
|
}
|
|
|
|
if (!go_keywords) {
|
|
report_error("no keywords section found");
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
/* Parse keywords section */
|
|
while (fgets(line, 1024, in)) {
|
|
char *d;
|
|
|
|
/* Comments start with # as the first thing on a line */
|
|
if (line[0] == '#') {
|
|
cur_line++;
|
|
continue;
|
|
}
|
|
|
|
/* Keywords section terminated with %% */
|
|
if (line[0] == '%' && line[1] == '%')
|
|
break;
|
|
|
|
/* Look for name */
|
|
ch = &line[0];
|
|
i = 0;
|
|
while (strchr(delimiters, *ch) == NULL && i < 127)
|
|
name[i++] = *ch++;
|
|
if (i == 127) {
|
|
report_error("keyword name too long");
|
|
return EXIT_FAILURE;
|
|
}
|
|
name[i] = '\0';
|
|
|
|
/* Strip EOL */
|
|
d = strrchr(ch, '\n');
|
|
if (d)
|
|
*d = '\0';
|
|
d = strrchr(ch, '\r');
|
|
if (d)
|
|
*d = '\0';
|
|
kw = yasm_xmalloc(sizeof(keyword));
|
|
kw->name = yasm__xstrdup(name);
|
|
kw->args = yasm__xstrdup(ch);
|
|
kw->line = cur_line;
|
|
STAILQ_INSERT_TAIL(&keywords, kw, link);
|
|
cur_line++;
|
|
}
|
|
|
|
if (errors > 0)
|
|
return EXIT_FAILURE;
|
|
|
|
/* Pull in any end code */
|
|
if (!feof(in)) {
|
|
sv = yasm_xmalloc(sizeof(sval));
|
|
sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename);
|
|
sv->str = yasm__xstrdup(tmp);
|
|
STAILQ_INSERT_TAIL(&usercode2, sv, link);
|
|
|
|
while (fgets(line, 1024, in)) {
|
|
sv = yasm_xmalloc(sizeof(sval));
|
|
sv->str = yasm__xstrdup(line);
|
|
STAILQ_INSERT_TAIL(&usercode2, sv, link);
|
|
}
|
|
}
|
|
|
|
/* output code */
|
|
out = fopen(argv[2], "wt");
|
|
if (!out) {
|
|
fprintf(stderr, "Could not open `%s' for writing\n", argv[2]);
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
fprintf(out, "/* %s code produced by genperf */\n", language);
|
|
fprintf(out, "/* Command-line: genperf %s %s */\n", argv[1], argv[2]);
|
|
|
|
STAILQ_FOREACH(sv, &usercode, link)
|
|
fprintf(out, "%s", sv->str);
|
|
|
|
/* Get perfect hash */
|
|
perfect_gen(out, lookup_function_name, struct_name, &keywords, filename);
|
|
|
|
STAILQ_FOREACH(sv, &usercode2, link)
|
|
fprintf(out, "%s", sv->str);
|
|
|
|
fclose(out);
|
|
|
|
if (errors > 0) {
|
|
remove(argv[2]);
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|
|
|