fixed wrong words in En_words & bug in sorting in hashtest.c

This commit is contained in:
Edward Emelianov 2022-12-08 09:22:46 +03:00
parent 0079aa2551
commit 65c17d0ff1
2 changed files with 21 additions and 124 deletions

116
En_words
View File

@ -1181,7 +1181,6 @@ aftermarket
aftermath aftermath
aftermost aftermost
afternoon afternoon
afternoon's
afternoons afternoons
after-sale after-sale
aftersale aftersale
@ -1307,7 +1306,6 @@ agressive
agribusiness agribusiness
agribusinesses agribusinesses
agric agric
agric.
agricultural agricultural
agriculturalist agriculturalist
agriculturally agriculturally
@ -1472,7 +1470,6 @@ akc
akimbo akimbo
akin akin
al al
al.
alabama alabama
alabamian alabamian
alabaster alabaster
@ -1858,13 +1855,10 @@ alveolar
alveolus alveolus
always always
aly aly
a.m
a.m.
am am
ama ama
amah amah
amain amain
amalg.
amalgam amalgam
amalgamate amalgamate
amalgamated amalgamated
@ -2826,7 +2820,6 @@ appetizing
appetizingly appetizingly
appian appian
appl appl
appl.
applaud applaud
applauder applauder
applauders applauders
@ -2920,7 +2913,6 @@ approvement
approving approving
approvingly approvingly
approx approx
approx.
approximate approximate
approximately approximately
approximating approximating
@ -3195,7 +3187,6 @@ armour
armoured armoured
armpit armpit
armrest armrest
arm's
arms arms
armstrong armstrong
army army
@ -3962,7 +3953,6 @@ avast
avatar avatar
avaunt avaunt
ave ave
ave.
avenge avenge
avenged avenged
avenger avenger
@ -6783,12 +6773,9 @@ churl
churlish churlish
churn churn
chute chute
c.i.a.
cia cia
cicada cicada
cider cider
c.i.f
c.i.f.
cif cif
cigar cigar
cigarette cigarette
@ -6843,8 +6830,6 @@ circumvent
circumvention circumvention
circus circus
cirrhosis cirrhosis
c.i.s
c.i.s.
cis cis
cistern cistern
citadel citadel
@ -7070,7 +7055,6 @@ clutch
clutter clutter
cnn cnn
co co
co.
coach coach
coachman coachman
coacting coacting
@ -7138,7 +7122,6 @@ coco
cocoa cocoa
coconut coconut
cocoon cocoon
c.o.d.
cod cod
coddle coddle
code code
@ -7867,7 +7850,6 @@ conjuncture
conjurer conjurer
conk conk
conn conn
conn.
connate connate
connatural connatural
connect connect
@ -8335,7 +8317,6 @@ coronet
coroutine coroutine
coroutining coroutining
corp corp
corp.
corporal corporal
corporate corporate
corporation corporation
@ -8343,7 +8324,6 @@ corporative
corporeal corporeal
corporeality corporeality
corposant corposant
corp.'s
corps corps
corpse corpse
corpsman corpsman
@ -9884,7 +9864,6 @@ depriving
deprocedure deprocedure
deproceduring deproceduring
dept dept
dept.
depth depth
depth-first depth-first
deputation deputation
@ -10857,11 +10836,8 @@ dj
dlimly dlimly
dmv dmv
do do
d.o.b
d.o.b.
dob dob
doc doc
doc.
docile docile
dock dock
docked docked
@ -10869,7 +10845,6 @@ docker
docket docket
docking docking
dockyard dockyard
docs.
doctor doctor
doctoral doctoral
doctorate doctorate
@ -11059,7 +11034,6 @@ doze
dozen dozen
dp dp
dpb dpb
dr.
drab drab
drabness drabness
drachma drachma
@ -11203,7 +11177,6 @@ dry-dock
dryer dryer
dryly dryly
dryness dryness
d.t.'s
dual dual
dual-head dual-head
dual-headed dual-headed
@ -11361,7 +11334,6 @@ dysprosium
dystrophy dystrophy
dysurea dysurea
ea ea
ea.
each each
eager eager
eagerly eagerly
@ -11539,8 +11511,6 @@ effrontery
effulgent effulgent
effusion effusion
effusive effusive
e.g
e.g.
eg eg
egalitarian egalitarian
egg egg
@ -11850,7 +11820,6 @@ encircle
encircled encircled
encirclement encirclement
encl encl
encl.
enclave enclave
enclose enclose
enclosed enclosed
@ -12172,7 +12141,6 @@ equiprobable
equitable equitable
equitably equitably
equity equity
equiv.
equivalence equivalence
equivalent equivalent
equivalent-to equivalent-to
@ -12255,7 +12223,7 @@ espouse
espoused espoused
espousing espousing
espy espy
esq. esq
esquire esquire
essay essay
essayist essayist
@ -12290,7 +12258,6 @@ estuary
et et
eta eta
etc etc
etc.
etcetera etcetera
etch etch
etched etched
@ -13081,7 +13048,6 @@ feces
feckless feckless
fecund fecund
fed fed
fed.
federal federal
federalism federalism
federalist federalist
@ -13242,7 +13208,6 @@ fifty
fifty-fifty fifty-fifty
fifty/fifty fifty/fifty
fig fig
fig.
fight fight
fighter fighter
fighter-bomber fighter-bomber
@ -13250,7 +13215,6 @@ fighter-pilot
fighting fighting
figment figment
figs figs
figs.
figurative figurative
figuratively figuratively
figure figure
@ -13653,8 +13617,6 @@ fmt
foal foal
foam foam
foamy foamy
f.o.b
f.o.b.
fob fob
focal focal
foci foci
@ -14131,7 +14093,6 @@ frying
frying-pan frying-pan
fsb fsb
ft ft
ft.
ftc ftc
fuchsia fuchsia
fuchsin fuchsin
@ -14703,7 +14664,6 @@ glycerin
glycerine glycerine
glycogen glycogen
glycol glycol
g.m.
gmt gmt
gnarl gnarl
gnarled gnarled
@ -16168,9 +16128,7 @@ however
howitzer howitzer
howl howl
howsoever howsoever
h.p.
hq hq
hrs.
hub hub
hubbub hubbub
hubcap hubcap
@ -16420,8 +16378,6 @@ idol
idolator idolator
idolize idolize
idyllic idyllic
i.e
i.e.
ie ie
ier ier
if if
@ -16474,7 +16430,6 @@ illumination
illusion illusion
illusive illusive
illusory illusory
illust.
illustrate illustrate
illustrated illustrated
illustrating illustrating
@ -16733,7 +16688,6 @@ inauguration
inauspicious inauspicious
inborn inborn
inc inc
inc.
incalculable incalculable
incandescence incandescence
incandescent incandescent
@ -16849,7 +16803,6 @@ inconvenient
inconvertible inconvertible
inconvincible inconvincible
incoordination incoordination
incorpor.
incorporate incorporate
incorporated incorporated
incorporation incorporation
@ -16876,7 +16829,6 @@ incriminate
incriminated incriminated
incriminating incriminating
incriminatory incriminatory
inc.'s
incubate incubate
incubated incubated
incubating incubating
@ -17074,7 +17026,6 @@ influential
influenza influenza
influx influx
info info
info.
infological infological
inform inform
informal informal
@ -17918,7 +17869,6 @@ jowl
joy joy
joystick joystick
jr jr
jr.
jubilant jubilant
jubilee jubilee
judaic judaic
@ -18234,7 +18184,6 @@ labors
labour labour
labourite labourite
labours labours
labs.
labware labware
labyrinth labyrinth
labyrinthine labyrinthine
@ -19149,7 +19098,6 @@ loyal
loyalty loyalty
lozenge lozenge
ltd ltd
ltd.
lubricant lubricant
lubricate lubricate
lubricated lubricated
@ -19531,7 +19479,6 @@ maple
mapped mapped
mapping mapping
mar mar
mar.
marabou marabou
maraschino maraschino
marasmus marasmus
@ -19719,7 +19666,6 @@ mayonnaise
mayor mayor
maze maze
mazurka mazurka
m.b.a.
mbo mbo
mbp mbp
mbyte mbyte
@ -20007,7 +19953,6 @@ mexico
mezzanine mezzanine
mezzo-soprano mezzo-soprano
mfg mfg
mfg.
mg mg
mgm mgm
mgmt mgmt
@ -20373,7 +20318,6 @@ mockingbird
mock-up mock-up
mockup mockup
mod mod
mod.
modal modal
modality modality
mode mode
@ -20712,16 +20656,12 @@ mozambique
mpg mpg
mph mph
mr mr
mr.
mrs mrs
mrs.
ms ms
ms.
msc msc
msdos msdos
msds msds
mtn mtn
mtn.
much much
much-needed much-needed
mucilage mucilage
@ -21055,7 +20995,6 @@ nazi
nazism nazism
nba nba
ndearment ndearment
n.e.
neal neal
neanderthal neanderthal
near near
@ -21315,7 +21254,6 @@ nixon
nizhniy nizhniy
nlrb nlrb
no no
no.
noah noah
noahcian noahcian
noahcic noahcic
@ -21662,9 +21600,6 @@ nuts
nutter nutter
nutty nutty
nuzzle nuzzle
n.w.
n.y
n.y.
ny ny
nylon nylon
nymph nymph
@ -21823,7 +21758,6 @@ octopus
octree octree
ocular ocular
oculist oculist
o.d.
odd odd
oddest oddest
oddity oddity
@ -21921,8 +21855,6 @@ oilseed
oilspill oilspill
oily oily
ointment ointment
o.k
o.k.
okapi okapi
okay okay
oklahoma oklahoma
@ -22597,7 +22529,6 @@ oxygenation
oy oy
oyster oyster
oz oz
oz.
ozocerite ozocerite
ozone ozone
pa pa
@ -23988,7 +23919,6 @@ phasemeter
phaseout phaseout
phasic phasic
phasing phasing
ph.d
pheasant pheasant
phenol phenol
phenomena phenomena
@ -24447,9 +24377,6 @@ pizzeria
pizzicato pizzicato
pkg pkg
pkwy pkwy
p.&l.
p.l.
p/l
placable placable
placard placard
placarder placarder
@ -24678,7 +24605,6 @@ plowing
plowman plowman
plowshare plowshare
ploy ploy
p.&l.s.
pluck pluck
pluckily pluckily
pluckiness pluckiness
@ -24734,21 +24660,17 @@ pluvial
ply ply
plymouth plymouth
plywood plywood
p.m
p.m.
pm pm
pneumatic pneumatic
pneumatically pneumatically
pneumatically-operated pneumatically-operated
pneumonia pneumonia
pneumonitis pneumonitis
p.o.
poa poa
poach poach
poached poached
poacher poacher
poaching poaching
p.o.box
pochard pochard
pock pock
pocked pocked
@ -25253,7 +25175,6 @@ pouter
pouting pouting
poverty poverty
poverty-stricken poverty-stricken
p.o.w.
pow pow
powder powder
powdered powdered
@ -25936,7 +25857,6 @@ productive
productiveness productiveness
productivity productivity
prof prof
prof.
profanation profanation
profanatory profanatory
profane profane
@ -26440,7 +26360,6 @@ psychotically
psychotropic psychotropic
psychrophyl psychrophyl
pt pt
pt.
ptarmigan ptarmigan
pterodactyl pterodactyl
ptolemaic ptolemaic
@ -27865,7 +27784,6 @@ refutable
refutation refutation
refute refute
reg reg
reg.
regain regain
regal regal
regale regale
@ -28700,7 +28618,6 @@ rio
riot riot
rioter rioter
riotous riotous
r.i.p.
rip rip
ripcord ripcord
ripe ripe
@ -28796,7 +28713,6 @@ rogers
roget roget
rogue rogue
roguish roguish
r.o.i.
roi roi
roil roil
role role
@ -30114,7 +30030,6 @@ scythe
scythian scythian
sdcl sdcl
sdr sdr
s.e.
sea sea
seabed seabed
seabird seabird
@ -32681,7 +32596,6 @@ smut
smuts smuts
smuttiness smuttiness
smutty smutty
s.n.
snack snack
snack-bar snack-bar
snaffle snaffle
@ -33186,7 +33100,6 @@ soothingly
soothsayer soothsayer
soothsaying soothsaying
sooty sooty
s.o.p
sop sop
soph soph
sophia sophia
@ -33494,7 +33407,6 @@ spearmint
spearmints spearmints
speary speary
spec spec
spec.
special special
specialist specialist
speciality speciality
@ -33899,7 +33811,6 @@ spotty
spousal spousal
spouse spouse
spout spout
spp.
sprain sprain
sprained sprained
spraining spraining
@ -33984,7 +33895,6 @@ spy
spyglass spyglass
spying spying
sq sq
sq.
squab squab
squabble squabble
squabbler squabbler
@ -34084,8 +33994,6 @@ ssgt
ssts ssts
ssw ssw
st st
st.
st.-
stab stab
stabbed stabbed
stabbing stabbing
@ -34702,7 +34610,6 @@ stitch
stitched stitched
stitchery stitchery
stitching stitching
st.-john's-wort
stoa stoa
stoat stoat
stochastic stochastic
@ -34840,9 +34747,6 @@ stowaway
stowed stowed
stower stower
stowing stowing
st.peterburg
st.-petersburg
st.petersburg
str str
strabismus strabismus
strabotomy strabotomy
@ -36208,8 +36112,6 @@ suzerainty
svelte svelte
sverige sverige
svga svga
s.w
s.w.
swab swab
swaddle swaddle
swaddling swaddling
@ -36889,7 +36791,6 @@ teaspoon
teaspoonful teaspoonful
teat teat
tech tech
tech.
techie techie
technetium technetium
technic technic
@ -37341,7 +37242,6 @@ theosophical
theosophically theosophically
theosophist theosophist
theosophy theosophy
ther.
therapeutic therapeutic
therapeutical therapeutical
therapeutically therapeutically
@ -39212,7 +39112,6 @@ ubiquity
ucla ucla
uconn uconn
udder udder
u.f.o.
ufo ufo
uganda uganda
ugandan ugandan
@ -39287,7 +39186,6 @@ umlaut
umpire umpire
umpteen umpteen
umpteenth umpteenth
u.n.
unabashed unabashed
unabashedly unabashedly
unabated unabated
@ -40705,11 +40603,7 @@ ursa
ursine ursine
urticaria urticaria
uruguay uruguay
u.s
u.s.
us us
u.s.a
u.s.a.
usa usa
usability usability
usable usable
@ -40732,13 +40626,10 @@ user-friendly
username username
users users
uses uses
u.s.gal
usher usher
usherette usherette
using using
usm usm
u.s.-soviet
u.s.s.r
ussr ussr
usual usual
usually usually
@ -40750,7 +40641,6 @@ usurpation
usurped usurped
usurper usurper
usury usury
u.s.w.s
utah utah
utensil utensil
utensils utensils
@ -40778,7 +40668,6 @@ uxorious
uxoriousness uxoriousness
uzbek uzbek
va va
va.
vacancy vacancy
vacant vacant
vacate vacate
@ -41312,8 +41201,6 @@ vow
vowel vowel
voxel voxel
voyage voyage
v.p
v.p.
vs vs
vtvm vtvm
vulcan vulcan
@ -41781,7 +41668,6 @@ webs
web-server web-server
webster webster
wed wed
wed.
wedding wedding
wedge wedge
wedlock wedlock

View File

@ -15,28 +15,25 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
// RUN: gcc -lusefull_macros hashtest.c -o hashtest && time ./hashtest En_words
// check hashes for all words in given dictionary and show words with similar hashes
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <usefull_macros.h> #include <usefull_macros.h>
#define ALLOCSZ (5000) #define ALLOCSZ (5000)
#define DJB2
//#if 0 #if defined DJB2
// djb2 http://www.cse.yorku.ca/~oz/hash.html // djb2 http://www.cse.yorku.ca/~oz/hash.html
static uint32_t hash(const char *str){ static uint32_t hash(const char *str){
uint32_t hash = 5381; uint32_t hash = 5381;
uint32_t c; uint32_t c;
while((c = (uint32_t)*str++)) while((c = (uint32_t)*str++))
hash = ((hash << 5) + hash) + c; hash = ((hash << 5) + hash) + c;
// hash = hash * 19 + c; //hash = hash * 31 + c;
//hash = hash * 33 + c; //hash = hash * 33 + c;
return hash; return hash;
} }
//#endif #elif defined SDBM
#if 0
static uint32_t hash(const char *str){ // sdbm static uint32_t hash(const char *str){ // sdbm
uint32_t hash = 5381; uint32_t hash = 5381;
uint32_t c; uint32_t c;
@ -44,6 +41,19 @@ static uint32_t hash(const char *str){ // sdbm
hash = c + (hash << 6) + (hash << 16) - hash; hash = c + (hash << 6) + (hash << 16) - hash;
return hash; return hash;
} }
#elif defined JENKINS
uint32_t hash(const char *str){
uint32_t hash = 0, c;
while((c = (uint32_t)*str++)){
hash += c;
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return hash;
}
#endif #endif
@ -53,8 +63,9 @@ typedef struct{
} strhash; } strhash;
static int sorthashes(const void *a, const void *b){ static int sorthashes(const void *a, const void *b){
strhash *h1 = (strhash*)a, *h2 = (strhash*)b; register uint32_t h1 = ((strhash*)a)->hash, h2 = ((strhash*)b)->hash;
return strcmp(h1->str, h2->str); if(h2 > h1) return h2 - h1;
else return -((h1 - h2));
} }
int main(int argc, char **argv){ int main(int argc, char **argv){