| Deutsch English Français Italiano |
|
<20240605005916.00001b33@yahoo.com> View for Bookmarking (what is this?) Look up another Usenet article |
Path: ...!eternal-september.org!feeder3.eternal-september.org!news.eternal-september.org!.POSTED!not-for-mail
From: Michael S <already5chosen@yahoo.com>
Newsgroups: comp.lang.c
Subject: Re: Good hash for pointers
Date: Wed, 5 Jun 2024 00:59:16 +0300
Organization: A noiseless patient Spider
Lines: 241
Message-ID: <20240605005916.00001b33@yahoo.com>
References: <v2n88p$1nlcc$1@dont-email.me>
<v2qm8m$2el55$1@raubtier-asyl.eternal-september.org>
<v2qnue$2evlu$1@dont-email.me>
<v2r9br$2hva2$1@dont-email.me>
<86fru6gsqr.fsf@linuxsc.com>
<v2sudq$2trh1$1@raubtier-asyl.eternal-september.org>
<8634q5hjsp.fsf@linuxsc.com>
<v2vmhr$3ffjk$1@raubtier-asyl.eternal-september.org>
<86le3wfsmd.fsf@linuxsc.com>
<v2voe7$3fr50$1@raubtier-asyl.eternal-september.org>
<86ed9ofq14.fsf@linuxsc.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: 7bit
Injection-Date: Tue, 04 Jun 2024 23:59:20 +0200 (CEST)
Injection-Info: dont-email.me; posting-host="ba293a898abc9f7139ad072c906a26d7";
logging-data="632028"; mail-complaints-to="abuse@eternal-september.org"; posting-account="U2FsdGVkX18fOIDMRhi+hMn35bnvD317hEfLJuvY3Lc="
Cancel-Lock: sha1:poV4GyHNwZm6sNCm53pe5Jn14b0=
X-Newsreader: Claws Mail 4.1.1 (GTK 3.24.34; x86_64-w64-mingw32)
Bytes: 7941
On Sun, 26 May 2024 10:20:55 -0700
Tim Rentsch <tr.17687@z991.linuxsc.com> wrote:
> Bonita Montero <Bonita.Montero@gmail.com> writes:
>
> > Am 26.05.2024 um 18:24 schrieb Tim Rentsch:
> >
> >> Bonita Montero <Bonita.Montero@gmail.com> writes:
> >>
> >>> Am 25.05.2024 um 19:40 schrieb Tim Rentsch:
> >>>
> >>>> Bonita Montero <Bonita.Montero@gmail.com> writes:
> >>>>
> >>>>> Am 25.05.2024 um 11:12 schrieb Tim Rentsch:
> >>>>>
> >>>>>> Your hash function is expensive to compute, moreso even
> >>>>>> than the "FNV" function shown earlier. In a case like
> >>>>>> this one where the compares are cheap, it's better to
> >>>>>> have a dumb-but-fast hash function that might need a
> >>>>>> few more looks to find an open slot, because the cost
> >>>>>> of looking is so cheap compared to computing the hash
> >>>>>> function.
> >>>>>
> >>>>> A (size_t)pointer * LARGE_PRIME can be sufficient,
> >>>>> ignoring the overflow.
> >>>>
> >>>> Plenty fast but the output quality is poor. ...
> >>>
> >>> If the prime is large enough there'e no regular distribution.
> >>
> >> Your conjecture is contradicted by empirical facts.
> >
> > There are no empirival facts for that since two times the
> > taken prime is beyond the 64 bit address space.
>
> You don't listen very well do you?
>
> I say the output quality is poor because I have run tests that
> show the poor output quality. I've done that with a prime of my
> own choosing and also with 18446744073709551557, the value you
> suggested. In both cases the test runs show results that are
> clearly worse than all the other hash functions tested, including
> bart's and malcolm's. Furthermore the results for your suggested
> calculation are worse across the board, on every variety of
> dynamic workload in my test suite.
>
> Your proposed hash function is too weak to be taken as a serious
> candidate.
18446744073709551557 is indeed very bad (too close to 2**64).
But I tried other prime and at least in my simple tests it performs
rather well.
May be my tests are to simplistic, I don't pretend to understand much
about it.
// hash_aes4.c
// Reference. Hopefully behaves similarly to crypto hash
#include <stdint.h>
#include <string.h>
#include <x86intrin.h>
uint64_t hash_ref(void* key)
{
uint64_t ukey = (uint64_t)key;
__m128i xkey = _mm_set_epi64x(ukey, 42);
static const uint64_t bar[8] = {
0xBB09BBCC90B24BF2, 0x825C622FF2792A01,
0x94F0535CB06D4060, 0x939C756246DBFD1D,
0x5B835E01A7E14CA1, 0xAC2BDAFC023CDD06,
0xE0B6A4735B774AEC, 0x9CAFB43E7DDE494C,
};
for (int i = 0; i < 4; ++i) {
__m128i rkey;
memcpy(&rkey, &bar[i*2], sizeof(rkey));
xkey = _mm_aesenc_si128(xkey, rkey);
}
memcpy(&ukey, &xkey, sizeof(ukey));
return ukey;
}
// hash_bonita.c
// Bonita's with different prime
#include <stdint.h>
uint64_t hash_foo(void* key)
{
uint64_t ukey = (uint64_t)key;
static const uint64_t LARGE_PRIME = 0xbb09bbcc90b24bcdull;
return ukey * LARGE_PRIME;
}
// ptr_hash.c
// test bench
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
uint64_t hash_foo(void*);
uint64_t hash_ref(void*);
static void test(
void** pointers, size_t n_pointers,
size_t* table, size_t tab_sz,
uint64_t (*hash_function)(void*));
static const char UsageStr[] =
"ptr_hash - examine performance of pointer hash function\n"
"Usage\n"
"ptr_hash n m sz1 [sz2]\n"
"where\n"
" n - the size of hash table\n"
" m - # of pointers to put in the table\n"
" sz1 - minimal size of allocated blocks\n"
" sz2 - [optional] maximal size of allocated blocks. Default = sz1\n"
;
int main(int argz, char** argv)
{
if (argz < 4) {
fprintf(stderr, "%s", UsageStr);
return 1;
}
size_t params[4];
for (int arg_i = 1; arg_i < 5 && arg_i < argz; ++arg_i) {
long val = strtol(argv[arg_i], NULL, 0);
if (val <= 0) {
fprintf(stderr
,"Bad parameter %s. Please specify positive number\n"
,argv[arg_i]);
return 1;
}
params[arg_i-1] = val;
}
size_t n = params[0];
size_t m = params[1];
size_t sz1 = params[2];
size_t sz2 = params[3];
if (argz == 4)
sz2 = sz1;
if (sz1 > sz2) {
size_t tmp = sz1; sz1 = sz2; sz2 = tmp;
}
int ret = 1;
void **pointers = malloc(m * sizeof(*pointers));
if (pointers) {
size_t* table = malloc(n * sizeof(*table));
if (table) {
ret = 0;
srand(1);
const unsigned __int128 RAND_SCALE = (unsigned __int128)RAND_MAX
+ 1;
for (size_t i = 0; i < m; ++i) {
size_t r = rand();
size_t sz = (unsigned __int128)(sz2-sz1+1)*r / RAND_SCALE + sz1;
void* block = malloc(sz);
if (!block) {
ret = 1;
perror("malloc()");
for (size_t k = 0; k < i; ++k)
free(pointers[k]);
break;
}
pointers[i] = block;
}
if (ret == 0) {
for (size_t i = 0; i < m; ++i)
free(pointers[i]); // we don't need allocated blocks for a
test
========== REMAINDER OF ARTICLE TRUNCATED ==========