Skip to content

Commit b012cde

Browse files
committed
Add hashlittle2() and ensure the hash is never 0
It's probably time for a faster hash algorithm, but this gives us the free 64-bit hashing that things like the xattr code can use.
1 parent 464555e commit b012cde

File tree

1 file changed

+168
-4
lines changed

1 file changed

+168
-4
lines changed

hashtable.c

+168-4
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,9 @@ void *hashtable_find(struct hashtable *tbl, int64 key, void *data_when_new)
350350
-------------------------------------------------------------------------------
351351
*/
352352

353+
#define NON_ZERO_32(x) ((x) ? (x) : (uint32_t)1)
354+
#define NON_ZERO_64(x, y) ((x) || (y) ? ((int64)(x) << 32) | (y) : (int64)1)
355+
353356
uint32_t hashlittle(const void *key, size_t length)
354357
{
355358
uint32_t a,b,c; /* internal state */
@@ -390,7 +393,7 @@ uint32_t hashlittle(const void *key, size_t length)
390393
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
391394
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
392395
case 1 : a+=k8[0]; break;
393-
case 0 : return c;
396+
case 0 : return NON_ZERO_32(c);
394397
}
395398
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
396399
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
@@ -436,7 +439,7 @@ uint32_t hashlittle(const void *key, size_t length)
436439
break;
437440
case 1 : a+=k8[0];
438441
break;
439-
case 0 : return c; /* zero length requires no mixing */
442+
case 0 : return NON_ZERO_32(c); /* zero length requires no mixing */
440443
}
441444

442445
} else { /* need to read the key one byte at a time */
@@ -489,10 +492,171 @@ uint32_t hashlittle(const void *key, size_t length)
489492
/* FALLTHROUGH */
490493
case 1 : a+=k[0];
491494
break;
492-
case 0 : return c;
495+
case 0 : return NON_ZERO_32(c);
493496
}
494497
}
495498

496499
final(a,b,c);
497-
return c;
500+
return NON_ZERO_32(c);
498501
}
502+
503+
#if SIZEOF_INT64 >= 8
504+
/*
505+
* hashlittle2: return 2 32-bit hash values joined into an int64.
506+
*
507+
* This is identical to hashlittle(), except it returns two 32-bit hash
508+
* values instead of just one. This is good enough for hash table
509+
* lookup with 2^^64 buckets, or if you want a second hash if you're not
510+
* happy with the first, or if you want a probably-unique 64-bit ID for
511+
* the key. *pc is better mixed than *pb, so use *pc first. If you want
512+
* a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
513+
*/
514+
int64 hashlittle2(const void *key, size_t length)
515+
{
516+
uint32_t a,b,c; /* internal state */
517+
union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
518+
519+
/* Set up the internal state */
520+
a = b = c = 0xdeadbeef + ((uint32_t)length);
521+
522+
u.ptr = key;
523+
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
524+
const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
525+
const uint8_t *k8;
526+
527+
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
528+
while (length > 12)
529+
{
530+
a += k[0];
531+
b += k[1];
532+
c += k[2];
533+
mix(a,b,c);
534+
length -= 12;
535+
k += 3;
536+
}
537+
538+
/*----------------------------- handle the last (probably partial) block */
539+
k8 = (const uint8_t *)k;
540+
switch(length)
541+
{
542+
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
543+
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
544+
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
545+
case 9 : c+=k8[8]; /* fall through */
546+
case 8 : b+=k[1]; a+=k[0]; break;
547+
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
548+
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
549+
case 5 : b+=k8[4]; /* fall through */
550+
case 4 : a+=k[0]; break;
551+
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
552+
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
553+
case 1 : a+=k8[0]; break;
554+
case 0 : return NON_ZERO_64(b, c);
555+
}
556+
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
557+
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
558+
const uint8_t *k8;
559+
560+
/*--------------- all but last block: aligned reads and different mixing */
561+
while (length > 12)
562+
{
563+
a += k[0] + (((uint32_t)k[1])<<16);
564+
b += k[2] + (((uint32_t)k[3])<<16);
565+
c += k[4] + (((uint32_t)k[5])<<16);
566+
mix(a,b,c);
567+
length -= 12;
568+
k += 6;
569+
}
570+
571+
/*----------------------------- handle the last (probably partial) block */
572+
k8 = (const uint8_t *)k;
573+
switch(length)
574+
{
575+
case 12: c+=k[4]+(((uint32_t)k[5])<<16);
576+
b+=k[2]+(((uint32_t)k[3])<<16);
577+
a+=k[0]+(((uint32_t)k[1])<<16);
578+
break;
579+
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
580+
case 10: c+=k[4];
581+
b+=k[2]+(((uint32_t)k[3])<<16);
582+
a+=k[0]+(((uint32_t)k[1])<<16);
583+
break;
584+
case 9 : c+=k8[8]; /* fall through */
585+
case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
586+
a+=k[0]+(((uint32_t)k[1])<<16);
587+
break;
588+
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
589+
case 6 : b+=k[2];
590+
a+=k[0]+(((uint32_t)k[1])<<16);
591+
break;
592+
case 5 : b+=k8[4]; /* fall through */
593+
case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
594+
break;
595+
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
596+
case 2 : a+=k[0];
597+
break;
598+
case 1 : a+=k8[0];
599+
break;
600+
case 0 : return NON_ZERO_64(b, c); /* zero length strings require no mixing */
601+
}
602+
603+
} else { /* need to read the key one byte at a time */
604+
const uint8_t *k = (const uint8_t *)key;
605+
606+
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
607+
while (length > 12)
608+
{
609+
a += k[0];
610+
a += ((uint32_t)k[1])<<8;
611+
a += ((uint32_t)k[2])<<16;
612+
a += ((uint32_t)k[3])<<24;
613+
b += k[4];
614+
b += ((uint32_t)k[5])<<8;
615+
b += ((uint32_t)k[6])<<16;
616+
b += ((uint32_t)k[7])<<24;
617+
c += k[8];
618+
c += ((uint32_t)k[9])<<8;
619+
c += ((uint32_t)k[10])<<16;
620+
c += ((uint32_t)k[11])<<24;
621+
mix(a,b,c);
622+
length -= 12;
623+
k += 12;
624+
}
625+
626+
/*-------------------------------- last block: affect all 32 bits of (c) */
627+
switch(length) /* all the case statements fall through */
628+
{
629+
case 12: c+=((uint32_t)k[11])<<24;
630+
/* FALLTHROUGH */
631+
case 11: c+=((uint32_t)k[10])<<16;
632+
/* FALLTHROUGH */
633+
case 10: c+=((uint32_t)k[9])<<8;
634+
/* FALLTHROUGH */
635+
case 9 : c+=k[8];
636+
/* FALLTHROUGH */
637+
case 8 : b+=((uint32_t)k[7])<<24;
638+
/* FALLTHROUGH */
639+
case 7 : b+=((uint32_t)k[6])<<16;
640+
/* FALLTHROUGH */
641+
case 6 : b+=((uint32_t)k[5])<<8;
642+
/* FALLTHROUGH */
643+
case 5 : b+=k[4];
644+
/* FALLTHROUGH */
645+
case 4 : a+=((uint32_t)k[3])<<24;
646+
/* FALLTHROUGH */
647+
case 3 : a+=((uint32_t)k[2])<<16;
648+
/* FALLTHROUGH */
649+
case 2 : a+=((uint32_t)k[1])<<8;
650+
/* FALLTHROUGH */
651+
case 1 : a+=k[0];
652+
break;
653+
case 0 : return NON_ZERO_64(b, c);
654+
}
655+
}
656+
657+
final(a,b,c);
658+
return NON_ZERO_64(b, c);
659+
}
660+
#else
661+
#define hashlittle2(key, len) hashlittle(key, len)
662+
#endif

0 commit comments

Comments
 (0)