/* * mapping.cpp -- associative array implementations * by pts@fazekas.hu at Fri Mar 15 21:13:47 CET 2002 */ #ifdef __GNUC__ #ifndef __clang__ #pragma implementation #endif #endif #include "mapping.hpp" #include "error.hpp" #include #include /* abort() */ /* --- */ bool Mapping::Gen::update(char const*key, slen_t keylen, char const*data) { char *found_data=get(key, keylen); if (found_data==NULLP) return true; memcpy(found_data, data, datalen); return false; } /* --- */ bool Mapping::DoubleHash::obj_assert() { assert(len <= used); assert(minlen <= len); assert(used <= maxused); assert(maxused < alloced); assert(alloced < (slen_t)-2); /* Imp: re-count used and len */ return true; } bool Mapping::DoubleHash::set (char const* key, slen_t keylen, char const*data) { assert(obj_assert()); slen_t h1=vi_h1(key,keylen); assert(h1keylen==NEVER_USED) { added: /* Dat: we shouldn't stop on p->keylen==DELETED */ /* This will be deleted by `delete [] (pp->keydata-datalen);' called from * Mapping::DoubleHash::clear() called from the destructor of * Mapping::DoubleHash15, also known as Mapping::H. */ memcpy(p->keydata=new char[keylen+datalen], data, datalen); memcpy(p->keydata+=datalen, key, p->keylen=keylen); len++; //if (p->keylen==NEVER_USED && used++==maxused) { p->keylen=keylen; rehash(); } // else { p->keylen=keylen; } if (used++==maxused) rehash(); assert(obj_assert()); return true; /* new value added */ } if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) { updated: memcpy(p->keydata-datalen, data, datalen); return false; /* value updated */ } /* Not found for the 1st try. We have to walk. */ slen_t h2=vi_h2(key,keylen); assert(1<=h2 && h1=h2) { h1-=h2; p-=h2; } else { h1+=alloced-h2; p+=alloced-h2; } if (p->keylen==NEVER_USED) goto added; if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) goto updated; } } char*Mapping::DoubleHash::get (char const* key, slen_t keylen) { assert(obj_assert()); slen_t h1=vi_h1(key,keylen); assert(h1keylen==NEVER_USED) return (char*)NULLP; if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) return p->keydata-datalen; /* Not found for the 1st try. We have to walk. */ slen_t h2=vi_h2(key,keylen); assert(1<=h2 && h1=h2) { h1-=h2; p-=h2; } else { h1+=alloced-h2; p+=alloced-h2; } if (p->keylen==NEVER_USED) return (char*)NULLP; if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) return p->keydata-datalen; } } bool Mapping::DoubleHash::deletee (char const* key, slen_t keylen) { /* We must not set `p->keylen=NEVER_USED' in this method because this would * ruin searches jumping on `p', but not coming from `p+h2'. So we just set * `p->keylen=DELETED'. */ assert(obj_assert()); slen_t h1=vi_h1(key,keylen); assert(h1keylen==NEVER_USED) return true; /* key to be deleted does not exist */ if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) { found: vi_dtor(p->keydata-datalen); delete [] (p->keydata-datalen); p->keylen=(slen_t)DELETED; // p->keydata=(char*)NULLP; if (len--==minlen) rehash(); assert(obj_assert()); return false; /* key-value pair deleted */ } /* Not found for the 1st try. We have to walk. */ slen_t h2=vi_h2(key,keylen); assert(1<=h2 && h1=h2) { h1-=h2; p-=h2; } else { h1+=alloced-h2; p+=alloced-h2; } if (p->keylen==(slen_t)NEVER_USED) return true; /* key to be deleted does not exist */ if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) goto found; } } void Mapping::DoubleHash::getFirst(char const*const*& key, slen_t &keylen, char *& data) { assert(obj_assert()); Ary *p=ary, *pend=p+alloced; while (p!=pend && p->keylen>=(slen_t)-2) p++; if (p==pend) { key=(char const*const*)NULLP; } else { key=&p->keydata; data=p->keydata-datalen; keylen=p->keylen; } } void Mapping::DoubleHash::getNext (char const*const*& key, slen_t &keylen, char *& data) { assert(obj_assert()); /* vvv Dat: this operation is safe despite of the fact that it increases * signedness off pointer target type ((char*) -> (Ary*)). */ Ary *p=PTS_align_cast(Ary*, ( (char*)const_cast(key) - ((char*)&((Ary*)0)->keydata-(char*)0) )), *pend=ary+alloced; p++; while (p!=pend && p->keylen>=(slen_t)-2) p++; if (p==pend) { key=(char const*const*)NULLP; } else { key=&p->keydata; data=p->keydata-datalen; keylen=p->keylen; } } void Mapping::DoubleHash::rehash() { unsigned old_scale=scale; slen_t old_alloced=alloced; #ifndef NDEBUG slen_t old_len=len; slen_t old_used=used; #endif Ary *old_ary=ary; // printf("rehash minlen=%u len=%u used=%u maxused=%u alloced=%u\n", minlen, len, used, maxused, alloced); vi_scale(); // printf("scaled minlen=%u len=%u used=%u maxused=%u alloced=%u\n", minlen, len, used, maxused, alloced); // assert( minlen <= len); if (scale==old_scale && used<=maxused) { assert(obj_assert()); return; } Ary *pp=old_ary, *ppend=pp+old_alloced; slen_t calclen=0, calcused=0; ary=new Ary[alloced]; // printf("alloced=%u\n", alloced); memset(ary, '\377', sizeof(Ary)*alloced); /* fill with NEVER_USED */ /* insert the old values to the new hash */ for (; pp!=ppend; pp++) { if (pp->keylen!=NEVER_USED) calcused++; if (pp->keylen<(slen_t)-2) { // printf("%u %u\n", (unsigned char)pp->keydata[0], (unsigned char)pp->keydata[1]); calclen++; slen_t h1=vi_h1(pp->keydata,pp->keylen); assert(h1keylen!=DELETED); if (p->keylen==NEVER_USED) { found: p->keylen=pp->keylen; p->keydata=pp->keydata; continue; } assert(!(p->keylen==pp->keylen && 0==memcmp(p->keydata, pp->keydata, pp->keylen)) && "dupicate key"); /* Not found for the 1st try. We have to walk. */ slen_t h2=vi_h2(pp->keydata,pp->keylen); assert(1<=h2 && h1=h2) { h1-=h2; p-=h2; } else { h1+=alloced-h2; p+=alloced-h2; } assert(p->keylen!=DELETED); if (p->keylen==NEVER_USED) goto found; assert(!(p->keylen==pp->keylen && 0==memcmp(p->keydata, pp->keydata, pp->keylen)) && "dupicate key"); } /* WHILE */ } } /* NEXT */ used=len=calclen; assert(calclen==old_len); assert(calcused==old_used); assert(old_len==len); assert(old_len==used); assert(obj_assert()); delete [] old_ary; } void Mapping::DoubleHash::clear() { assert(obj_assert()); Ary *pp=ary, *ppend=pp+alloced; for (; pp!=ppend; pp++) if (pp->keylen<(slen_t)-2) { vi_dtor(pp->keydata-datalen); /* this is quite fast */ delete [] (pp->keydata-datalen); /* this is really slow for millions of values */ pp->keylen=(slen_t)DELETED; len--; } assert(len==0); if (minlen!=0) rehash(); } /* --- */ static slen_t const d15_primes[]= { 0, 10, 13, 23, 37, 59, 89, 137, 211, 317, 479, 719, 1087, 1637, 2459, 3691, 5557, 8353, 12539, 18839, 28277, 42433, 63659, 95507UL, 143261UL, 214913UL, 322397UL, 483611UL, 725423UL, 1088159UL, 1632259UL, 2448389UL, 3672593UL, 5508913UL, 8263373UL, 12395069UL, 18592631UL, 27888947UL, 41833427UL, 62750147UL, 94125247UL, 141187901UL, 211781873UL, 317672813UL, 476509223UL, 714763843UL, 1072145771UL, 1608218669UL, 2412328031UL, 3618492049UL, #if SIZEOF_SLEN_T>=8 5427738097UL, 8141607167UL, 12212410753UL, 18318616157UL, 27477924239UL, 41216886467UL, 61825329719UL, 92737994593UL, 139106991917UL, 208660487887UL, 312990731839UL, 469486097807UL, 704229146717UL, 1056343720093UL, 1584515580187UL, 2376773370313UL, 3565160055487UL, 5347740083243UL, 8021610124867UL, 12032415187319UL, 18048622780987UL, 27072934171487UL, 40609401257291UL, 60914101885951UL, 91371152828947UL, 137056729243439UL, 205585093865189UL, 308377640797829UL, 462566461196803UL, 693849691795229UL, 1040774537692907UL, 1561161806539393UL, 2341742709809177UL, 3512614064713777UL, 5268921097070759UL, 7903381645606193UL, 11855072468409349UL, 17782608702614033UL, 26673913053921061UL, 40010869580881603UL, 60016304371322423UL, 90024456556983643UL, 135036684835475519UL, 202555027253213279UL, 303832540879819943UL, 455748811319729951UL, 683623216979594929UL, 1025434825469392417UL, 1538152238204088631UL, 2307228357306132983UL, 3460842535959199481UL, 5191263803938799269UL, #endif 0 }; void Mapping::DoubleHash15::vi_scale() { #if 0 // =1); alloced=(d15_primes+2)[--scale]; if (scale>1) minlen=(d15_primes+2)[scale-2]; else if (scale==1) minlen=10; else minlen=0; if (lenmaxused) { assert(used==maxused+1); if ((alloced=(d15_primes+2)[++scale])==0) abort(); /* Imp: better overflow reporting */ if (scale>1) minlen=(d15_primes+2)[scale-2]; else if (scale==1) minlen=10; else minlen=0; // if (len>4; else maxused=(alloced>>4)*15; if (used>maxused) printf("alloced=%u used=%u maxused=%u\n", alloced, used, maxused); #endif /* Dat: we respect only `len', not used. */ /* Imp: make calculation of scale relative to previous one */ scale=0; while (1) { // printf("xscale=%u\n", scale); if (0==(alloced=(d15_primes+2)[scale])) { assert(0); abort(); } /* Imp: better overflow reporting */ if (alloced<=4000U) maxused=(alloced*15)>>4; else maxused=(alloced>>4)*15; /* avoid overflows */ if (len<=maxused) break; /* _not_ used<=maxused, because of rehash() */ scale++; } minlen=(d15_primes)[scale]; // printf("ret alloced=%u used=%u maxused=%u\n", alloced, used, maxused); } slen_t Mapping::DoubleHash15::vi_h1(register char const*key, register slen_t keylen) { register slen_t hval=0, m=alloced; while (keylen--!=0) hval=((hval<<8)+*key++)%m; /* Imp: make this accurate and faster (the `%' operation is rather slow) */ return hval; } slen_t Mapping::DoubleHash15::vi_h2(char const*key, slen_t keylen) { register slen_t hval=0, m=alloced-1; while (keylen--!=0) hval=((hval<<8)+*key++)%m; /* Imp: make this accurate and faster (the `%' operation is rather slow) */ return hval+1; } void Mapping::DoubleHash15::vi_dtor(char *) {} Mapping::DoubleHash15::DoubleHash15(slen_t datalen_) { ary=new Ary[alloced=(d15_primes+2)[scale=0]]; memset(ary, '\377', sizeof(Ary)*alloced); /* fill with NEVER_USED */ datalen=datalen_; len=used=minlen=0; maxused=(alloced>>4)*15; } Mapping::DoubleHash15::~DoubleHash15() { minlen=0; clear(); delete [] ary; } /* __END__ */