Deutsch   English   Français   Italiano  
<20240605005916.00001b33@yahoo.com>

View for Bookmarking (what is this?)
Look up another Usenet article

Path: ...!eternal-september.org!feeder3.eternal-september.org!news.eternal-september.org!.POSTED!not-for-mail
From: Michael S <already5chosen@yahoo.com>
Newsgroups: comp.lang.c
Subject: Re: Good hash for pointers
Date: Wed, 5 Jun 2024 00:59:16 +0300
Organization: A noiseless patient Spider
Lines: 241
Message-ID: <20240605005916.00001b33@yahoo.com>
References: <v2n88p$1nlcc$1@dont-email.me>
	<v2qm8m$2el55$1@raubtier-asyl.eternal-september.org>
	<v2qnue$2evlu$1@dont-email.me>
	<v2r9br$2hva2$1@dont-email.me>
	<86fru6gsqr.fsf@linuxsc.com>
	<v2sudq$2trh1$1@raubtier-asyl.eternal-september.org>
	<8634q5hjsp.fsf@linuxsc.com>
	<v2vmhr$3ffjk$1@raubtier-asyl.eternal-september.org>
	<86le3wfsmd.fsf@linuxsc.com>
	<v2voe7$3fr50$1@raubtier-asyl.eternal-september.org>
	<86ed9ofq14.fsf@linuxsc.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: 7bit
Injection-Date: Tue, 04 Jun 2024 23:59:20 +0200 (CEST)
Injection-Info: dont-email.me; posting-host="ba293a898abc9f7139ad072c906a26d7";
	logging-data="632028"; mail-complaints-to="abuse@eternal-september.org";	posting-account="U2FsdGVkX18fOIDMRhi+hMn35bnvD317hEfLJuvY3Lc="
Cancel-Lock: sha1:poV4GyHNwZm6sNCm53pe5Jn14b0=
X-Newsreader: Claws Mail 4.1.1 (GTK 3.24.34; x86_64-w64-mingw32)
Bytes: 7941

On Sun, 26 May 2024 10:20:55 -0700
Tim Rentsch <tr.17687@z991.linuxsc.com> wrote:

> Bonita Montero <Bonita.Montero@gmail.com> writes:
> 
> > Am 26.05.2024 um 18:24 schrieb Tim Rentsch:
> >  
> >> Bonita Montero <Bonita.Montero@gmail.com> writes:
> >>  
> >>> Am 25.05.2024 um 19:40 schrieb Tim Rentsch:
> >>>  
> >>>> Bonita Montero <Bonita.Montero@gmail.com> writes:
> >>>>  
> >>>>> Am 25.05.2024 um 11:12 schrieb Tim Rentsch:
> >>>>>  
> >>>>>> Your hash function is expensive to compute, moreso even
> >>>>>> than the "FNV" function shown earlier.  In a case like
> >>>>>> this one where the compares are cheap, it's better to
> >>>>>> have a dumb-but-fast hash function that might need a
> >>>>>> few more looks to find an open slot, because the cost
> >>>>>> of looking is so cheap compared to computing the hash
> >>>>>> function.  
> >>>>>
> >>>>> A (size_t)pointer * LARGE_PRIME can be sufficient,
> >>>>> ignoring the overflow.  
> >>>>
> >>>> Plenty fast but the output quality is poor. ...  
> >>>
> >>> If the prime is large enough there'e no regular distribution.  
> >>
> >> Your conjecture is contradicted by empirical facts.  
> >
> > There are no empirival facts for that since two times the
> > taken prime is beyond the 64 bit address space.  
> 
> You don't listen very well do you?
> 
> I say the output quality is poor because I have run tests that
> show the poor output quality.  I've done that with a prime of my
> own choosing and also with 18446744073709551557, the value you
> suggested.  In both cases the test runs show results that are
> clearly worse than all the other hash functions tested, including
> bart's and malcolm's.  Furthermore the results for your suggested
> calculation are worse across the board, on every variety of
> dynamic workload in my test suite.
> 
> Your proposed hash function is too weak to be taken as a serious
> candidate.

18446744073709551557 is indeed very bad (too close to 2**64).
But I tried other prime and at least in my simple tests it performs
rather well.
May be my tests are to simplistic, I don't pretend to understand much
about it.

// hash_aes4.c
// Reference. Hopefully behaves similarly to crypto hash
#include <stdint.h>
#include <string.h>
#include <x86intrin.h>

uint64_t hash_ref(void* key)
{
  uint64_t ukey = (uint64_t)key;
  __m128i xkey = _mm_set_epi64x(ukey, 42);
  static const uint64_t bar[8] = {
    0xBB09BBCC90B24BF2, 0x825C622FF2792A01,
    0x94F0535CB06D4060, 0x939C756246DBFD1D,
    0x5B835E01A7E14CA1, 0xAC2BDAFC023CDD06,
    0xE0B6A4735B774AEC, 0x9CAFB43E7DDE494C,
  };
  for (int i = 0; i < 4; ++i) {
    __m128i rkey;
    memcpy(&rkey, &bar[i*2], sizeof(rkey));
    xkey = _mm_aesenc_si128(xkey, rkey);
  }
  memcpy(&ukey, &xkey, sizeof(ukey));
  return ukey;
}

// hash_bonita.c 
// Bonita's with different prime
#include <stdint.h>

uint64_t hash_foo(void* key)
{
  uint64_t ukey = (uint64_t)key;
  static const uint64_t LARGE_PRIME = 0xbb09bbcc90b24bcdull;
  return ukey * LARGE_PRIME;
}


// ptr_hash.c 
// test bench
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

uint64_t hash_foo(void*);
uint64_t hash_ref(void*);

static void test(
 void** pointers, size_t n_pointers,
 size_t* table,   size_t tab_sz,
 uint64_t (*hash_function)(void*));

static const char UsageStr[] =
"ptr_hash - examine performance of pointer hash function\n"
"Usage\n"
"ptr_hash n m sz1 [sz2]\n"
"where\n"
" n   - the size of hash table\n"
" m   - # of pointers to put in the table\n"
" sz1 - minimal size of allocated blocks\n"
" sz2 - [optional] maximal size of allocated blocks. Default = sz1\n"
;
int main(int argz, char** argv)
{
  if (argz < 4) {
    fprintf(stderr, "%s", UsageStr);
    return 1;
  }

  size_t params[4];
  for (int arg_i = 1; arg_i < 5 && arg_i < argz; ++arg_i) {
    long val = strtol(argv[arg_i], NULL, 0);
    if (val <= 0) {
      fprintf(stderr
        ,"Bad parameter %s. Please specify positive number\n"
        ,argv[arg_i]);
      return 1;
    }
    params[arg_i-1] = val;
  }
  size_t n = params[0];
  size_t m = params[1];
  size_t sz1 = params[2];
  size_t sz2 = params[3];
  if (argz == 4)
    sz2 = sz1;
  if (sz1 > sz2) {
    size_t tmp = sz1; sz1 = sz2; sz2 = tmp;
  }

  int ret = 1;
  void **pointers = malloc(m * sizeof(*pointers));
  if (pointers) {
    size_t* table = malloc(n * sizeof(*table));
    if (table) {
      ret = 0;
      srand(1);
      const unsigned __int128 RAND_SCALE = (unsigned __int128)RAND_MAX
  + 1;
      for (size_t i = 0; i < m; ++i) {
        size_t r = rand();
        size_t sz = (unsigned __int128)(sz2-sz1+1)*r / RAND_SCALE + sz1;
        void* block = malloc(sz);
        if (!block) {
          ret = 1;
          perror("malloc()");
          for (size_t k = 0; k < i; ++k)
            free(pointers[k]);
          break;
        }
        pointers[i] = block;
      }
      if (ret == 0) {
        for (size_t i = 0; i < m; ++i)
          free(pointers[i]); // we don't need allocated blocks for a
  test
========== REMAINDER OF ARTICLE TRUNCATED ==========