diff --git a/En_words b/stringHash4MCU_/En_words similarity index 100% rename from En_words rename to stringHash4MCU_/En_words diff --git a/stringHash4MCU_/Readme b/stringHash4MCU_/Readme new file mode 100644 index 0000000..4e7cf15 --- /dev/null +++ b/stringHash4MCU_/Readme @@ -0,0 +1,6 @@ +hashtest.c allows to test different hash functions on your dictionary +hashgen.c will generate two files by dictionary: source and header +use test.c to test generated files + +Compile: gcc -lusefull_macro file.c -o file +gcc -lusefull_macro test.c hash.c -o test diff --git a/stringHash4MCU_/hashgen.c b/stringHash4MCU_/hashgen.c new file mode 100644 index 0000000..c4f2a7f --- /dev/null +++ b/stringHash4MCU_/hashgen.c @@ -0,0 +1,301 @@ +/* + * Copyright 2022 Edward V. Emelianov . + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#define ALLOCSZ (5000) +#define DJB2 + +typedef struct{ + char *dict; + char *headerfile; + char *sourcefile; + int genfunc; +} glob_pars; + +static glob_pars G = {.headerfile = "hash.h", .sourcefile = "hash.c"}; +static int help = 0; +static myoption cmdlnopts[] = { + {"help", NO_ARGS, NULL, 'h', arg_int, APTR(&help), "show this help"}, + {"dict", NEED_ARG, NULL, 'd', arg_string, APTR(&G.dict), "dictionary file"}, + {"header", NEED_ARG, NULL, 'H', arg_string, APTR(&G.headerfile),"output header filename"}, + {"source", NEED_ARG, NULL, 'S', arg_string, APTR(&G.sourcefile),"output source filename"}, + {"genfunc", NO_ARGS, NULL, 'F', arg_int, APTR(&G.genfunc), "generate function bodys"}, + end_option +}; +static void parse_args(int argc, char **argv){ + parseargs(&argc, &argv, cmdlnopts); + if(help) showhelp(-1, cmdlnopts); + if(argc > 0){ + red("Unused arguments:\n"); + for(int i = 0; i < argc; ++i) + printf("%s ", argv[i]); + printf("\n"); + showhelp(-1, cmdlnopts); + } +} + +#define HASHFNO (3) +// djb2 & sdbm: http://www.cse.yorku.ca/~oz/hash.html +static uint32_t djb2(const char *str){ + uint32_t hash = 5381; + uint32_t c; + while((c = (uint32_t)*str++)) + hash = ((hash << 7) + hash) + c; + //hash = hash * 31 + c; + //hash = hash * 33 + c; + return hash; +} +static uint32_t sdbm(const char *str){ + uint32_t hash = 5381; + uint32_t c; + while((c = (uint32_t)*str++)) + hash = c + (hash << 6) + (hash << 16) - hash; + return hash; +} +// jenkins: https://en.wikipedia.org/wiki/Jenkins_hash_function +static uint32_t jenkins(const char *str){ + uint32_t hash = 0, c; + while((c = (uint32_t)*str++)){ + hash += c; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +static const char *hashsources[HASHFNO] = { +"static uint32_t hashf(const char *str){\n\ + uint32_t hash = 5381;\n\ + uint32_t c;\n\ + while((c = (uint32_t)*str++))\n\ + hash = ((hash << 7) + hash) + c;\n\ + return hash;\n\ +}\n", +"static uint32_t hashf(const char *str){\n\ + uint32_t hash = 5381;\n\ + uint32_t c;\n\ + while((c = (uint32_t)*str++))\n\ + hash = c + (hash << 6) + (hash << 16) - hash;\n\ + return hash;\n\ +}\n", +"static uint32_t hashf(const char *str){\n\ + uint32_t hash = 0, c;\n\ + while((c = (uint32_t)*str++)){\n\ + hash += c;\n\ + hash += (hash << 10);\n\ + hash ^= (hash >> 6);\n\ + }\n\ + hash += (hash << 3);\n\ + hash ^= (hash >> 11);\n\ + hash += (hash << 15);\n\ + return hash;\n\ +}\n" +}; + +static uint32_t (*hash[HASHFNO])(const char *str) = {djb2, sdbm, jenkins}; +static const char *hashnames[HASHFNO] = {"DJB2", "SDBM", "Jenkins"}; + +typedef struct{ + char str[32]; + uint32_t hash; +} strhash; + +static int sorthashesH(const void *a, const void *b){ + register uint32_t h1 = ((strhash*)a)->hash, h2 = ((strhash*)b)->hash; + if(h1 > h2) return 1; + else if(h1 < h2) return -1; + return 0; +} +static int sorthashesS(const void *a, const void *b){ + char *s1 = ((strhash*)a)->str, *s2 = ((strhash*)b)->str; + return strcmp(s1, s2); +} + +static FILE *openoutp(const char *name){ + FILE *f = fopen(name, "w"); + if(!f) ERR("Can't open file %s", name); + return f; +} + +static char *macroname(const char *cmd){ + static char macro[32]; + int i = 0; + while(i < 31 && *cmd){ + char c = *cmd++; + if(!isalnum(c)) c = '_'; + if(islower(c)) c = toupper(c); + macro[i++] = c; + } + macro[i] = 0; + return macro; +} + +static char *fnname(const char *cmd){ + static char fn[32]; + int i = 0; + while(i < 31 && *cmd){ + char c = *cmd++; + if(!isalpha(c)) c = '_'; + if(isupper(c)) c = tolower(c); + fn[i++] = c; + } + fn[i] = 0; + return fn; +} + +static const char *fhdr = +"int parsecmd(char *cmdwargs){\n\ + if(!cmdwargs || !*cmdwargs) return 0;\n\ + char cmd[32];\n\ + int i = 0;\n\ + char *args = cmdwargs;\n\ + while(*args && *args < 33) ++args;\n\ + if(!args || !*args) return 0;\n\ + while(*args > 33 && i < 31){\n\ + cmd[i++] = *args++;\n\ + }\n\ + cmd[i] = 0;\n\ + if(i == 31) args = NULL;\n\ + if(args){\n\ + while(*args && *args < 33) ++args;\n\ + if(!*args) args = NULL;\n\ + }\n\ + uint32_t h = hashf(cmd);\n\ + switch(h){\n" +; +static const char *ffooter = +" default: return 0;\n\ + }\n\ + return 0;\n\ +}\n\n" +; +static const char *fns = +"TRUE_INLINE int fn_%s(_U_ uint32_t hash, _U_ char *args){ // %s (%u)\n\ + return 1;\n\ +}\n\n" +; +static const char *fproto = "int parsecmd(char *cmdwargs);\n\n"; +static const char *sw = +" case CMD_%s:\n\ + return fn_%s(h, args);\n\ + break;\n"; +static const char *srchdr = +"#include \n\ +#include \n\ +#include \"%s\"\n\n\ +#ifndef _U_\n\ +#define _U_ __attribute__((__unused__))\n\ +#endif\n\n\ +#ifndef TRUE_INLINE\n\ +#define TRUE_INLINE __attribute__((always_inline)) static inline\n\ +#endif\n\n" +; + +static void build(strhash *H, int hno, int hlen){ + green("Generate files for hash function '%s'\n", hashnames[hno]); + int lmax = 1; + for(int i = 0; i < hlen; ++i){ + int l = strlen(H[i].str); + if(l > lmax){ + lmax = l; + } + } + lmax = (lmax + 3)/4; + lmax *= 4; + // resort H by strings + qsort(H, hlen, sizeof(strhash), sorthashesS); + FILE *source = openoutp(G.sourcefile), *header = openoutp(G.headerfile); + fprintf(source, srchdr, G.headerfile); + if(G.genfunc){ + for(int i = 0; i < hlen; ++i){ + //fprintf(source, fns, "popo", "lolo", 12); + fprintf(source, fns, fnname(H[i].str), H[i].str, H[i].hash); + } + } + fprintf(header, "%s", fproto); + fprintf(source, "%s\n", hashsources[hno]); + fprintf(source, "%s", fhdr); + for(int i = 0; i < hlen; ++i){ + char *m = macroname(H[i].str); + fprintf(source, sw, m, fnname(H[i].str)); + fprintf(header, "#define CMD_%-*s (%u)\n", lmax, m, H[i].hash); + } + fprintf(source, "%s", ffooter); + fclose(source); + fclose(header); +} + +int main(int argc, char **argv){ + initial_setup(); + parse_args(argc, argv); + if(!G.dict) ERRX("point dictionary file"); + if(!G.headerfile) ERRX("point header source file"); + if(!G.sourcefile) ERRX("point c source file"); + mmapbuf *b = My_mmap(G.dict); + if(!b) ERRX("Can't open %s", G.dict); + char *word = b->data; + strhash *H = MALLOC(strhash, ALLOCSZ); + int l = ALLOCSZ, idx = 0; + while(*word){ + if(idx >= l){ + l += ALLOCSZ; + H = realloc(H, sizeof(strhash) * l); + if(!H) ERR("realloc()"); + } + while(*word && *word < 33) ++word; + if(!*word) break; + char *nxt = strchr(word, '\n'); + if(nxt){ + int len = nxt - word; + if(len > 31) len = 31; + strncpy(H[idx].str, word, len); + H[idx].str[len] = 0; + }else{ + snprintf(H[idx].str, 31, "%s", word); + } + ++idx; + if(!nxt) break; + word = nxt + 1; + } + int imax1 = idx - 1, hno = 0; + for(; hno < HASHFNO; ++hno){ + for(int i = 0; i < idx; ++i) + H[i].hash = hash[hno](H[i].str); + qsort(H, idx, sizeof(strhash), sorthashesH); + strhash *p = H; + int nmatches = 0; + for(int i = 0; i < imax1; ++i, ++p){ + if(p->hash == p[1].hash) ++nmatches; + } + if(nmatches == 0){ + build(H, hno, idx); + break; + } + WARNX("Function '%s' have %d matches", hashnames[hno], nmatches); + } + if(hno == HASHFNO) WARNX("Can't find proper hash function"); + FREE(H); + My_munmap(b); + return 0; +} diff --git a/hashtest.c b/stringHash4MCU_/hashtest.c similarity index 100% rename from hashtest.c rename to stringHash4MCU_/hashtest.c diff --git a/stringHash4MCU_/test.c b/stringHash4MCU_/test.c new file mode 100644 index 0000000..f50870b --- /dev/null +++ b/stringHash4MCU_/test.c @@ -0,0 +1,18 @@ +#include +#include +#include + +#include "hash.h" + +/* +static int f(uint32_t h, const char *args){ + printf("%u -> '%s'\n", h, args); + return 1; +}*/ + +int main(int argc, char **argv){ + if(argc != 2) return 1; + if(!parsecmd(argv[1])) printf("%s not found\n", argv[1]); + else printf("All OK\n"); + return 0; +} diff --git a/stringHash4MCU_/testdic b/stringHash4MCU_/testdic new file mode 100644 index 0000000..20f8994 --- /dev/null +++ b/stringHash4MCU_/testdic @@ -0,0 +1,11 @@ +hello +world +what +put +change +set +clear +reset +get +out +in