Skip to content

Commit 692bd18

Browse files
committedSep 18, 2018
benchmarking HashMaps
·
v1.0.01.0.0
1 parent b678be9 commit 692bd18

File tree

11 files changed

+2543
-1681
lines changed

11 files changed

+2543
-1681
lines changed
 

‎benchmarks/hashmap.spec.js

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
const HashMap = require('../src/data-structures/hash-maps/hash-map');
2+
const fs = require('fs');
3+
const readline = require('readline');
4+
const stream = require('stream');
5+
6+
describe('HashMap Performance Test', () => {
7+
describe('#set', () => {
8+
it('set lots of values', (done) => {
9+
hashMap = new HashMap();
10+
11+
const instream = fs.createReadStream('./words.txt');
12+
const outstream = new stream;
13+
const rl = readline.createInterface(instream, outstream);
14+
15+
rl.on('line', function(line) {
16+
console.log(line);
17+
});
18+
19+
rl.on('close', function() {
20+
expect(hashMap.collisions).toBeLessThan(1);
21+
done();
22+
});
23+
24+
});
25+
});
26+
});

‎benchmarks/words.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
the
2+
of
3+
and
4+
to
5+
a
6+
in
7+
for
8+
is
9+
on
10+
that
11+
by
12+
this
13+
with
14+
i
15+
you
16+
it
17+
not
18+
or
19+
be
20+
are
21+
from
22+
at
23+
as
24+
your

‎package-lock.json

Lines changed: 2062 additions & 1679 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77
"lodash": "4.17.10"
88
},
99
"devDependencies": {
10+
"asciichart": "1.5.7",
1011
"eslint": "4.19.1",
1112
"eslint-config-airbnb-base": "12.1.0",
1213
"eslint-plugin-import": "2.12.0",
1314
"eslint-plugin-jest": "21.17.0",
14-
"jest": "23.0.1"
15+
"jest": "23.6.0",
16+
"numbers": "0.7.0",
17+
"pandas-js": "0.2.4"
1518
},
1619
"scripts": {
1720
"test": "jest src/data-structures # jest # mocha src/**/*spec.js # jasmine JASMINE_CONFIG_PATH=jasmine.json # node jasmine-runner.js",

‎sortedArray1e4.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
const assert = require('assert');
2+
3+
/**
4+
* Calculates polynomial hash code that maps a key (value) to an integer (unbounded).
5+
* It uses a 20 bit offset to avoid Unicode value overlaps
6+
* @param {any} key
7+
* @returns {BigInt} returns big integer (unbounded) that maps to the key
8+
*/
9+
function hashCode(key) {
10+
const array = Array.from(`${key}${typeof key}`);
11+
return array.reduce((hashCode, char, position) => {
12+
return hashCode + BigInt(char.codePointAt(0)) * (2n ** (BigInt(position) * 20n));
13+
}, 0n);
14+
}
15+
16+
/**
17+
* Compression function: maps an arbitrary integer to integer in the range of [0… BUCKET_SIZE -1].
18+
* @param {BigInt} hashCode
19+
* @param {Number} size bucket size
20+
* @returns {Number} array index
21+
*/
22+
function compressToIndex(hashCode, size = 10) {
23+
return parseInt(hashCode % BigInt(size), 10);
24+
}
25+
26+
/**
27+
*
28+
* @param {*} key
29+
*/
30+
function hashFunction(key, size = 10) {
31+
return compressToIndex(hashCode(key), size);
32+
}
33+
34+
35+
function printHash(el) {
36+
const code = hashCode(el);
37+
return { s: el, v: code.toLocaleString(), hex: code.toString(16), hashFn: compressToIndex(code) };
38+
}
39+
40+
// similar ending
41+
console.table(['00', '10', '20', '30', '40', '50', '60', '70', '80', '90'].map(printHash));
42+
// similar start
43+
console.table(['10', '11', '12', '13', '14', '15', '16', '17', '18', '19'].map(printHash));
44+
45+
console.table(['@', '#', '#!', 'stop', 'pots', 'Ca', 'DB'].map(printHash));
46+
47+
// all different
48+
// console.table(['cat', 'dog', 'rat', 'art', 10, '10', {a:1}, '😸', '🐶', '😸🐶', '🐶😸'].map(printHash));
49+
// console.log(hashCode(Array(1500).fill('😁').join('')));
50+
// console.log(hashFunction(Array(1500).fill('😁').join('')));
51+
52+
53+
// function test(){
54+
// return 1n + 2n;
55+
// }
56+
57+
// test();
58+
59+
60+
// hashCode(10); //=> 97
61+
// hashCode('10'); //=> 97
62+
63+
assert.notEqual(hashCode(10), hashCode('10'), 'Hash code should be different with different types');
64+
assert.notEqual(hashCode('10string'), hashCode('10'), 'Hash code should be different with different types');
65+
66+
hashCode(10) === hashCode('10'); //=> false
67+
hashCode('10') === hashCode('10string'); //=> false
68+
hashCode('art') === hashCode('rat'); //=> false
69+
hashCode('😄') === hashCode('😄'); //=> true
70+
hashCode('😄') === hashCode('😸'); //=> false
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
const assert = require('assert');
2+
3+
/**
4+
*
5+
* @param {*} key
6+
*/
7+
function hashFunction(key, size = 10) {
8+
const primeNumber = 1327144003n; // 2 ** 77232917 - 1
9+
10+
const hashCode = Array.from(key.toString()).reduce((hash, char) => {
11+
return (hash * primeNumber + BigInt(char.codePointAt(0))) % BigInt(size);
12+
}, 0n);
13+
14+
return parseInt(hashCode, 10);
15+
}
16+
17+
// function hashCodeJava(key) {
18+
// let h = 0;
19+
// const value = key.toString();
20+
// const length = value.length >> 1;
21+
22+
// for (let i = 0; i < length; i++) {
23+
// h = 31 * h + value.codePointAt(i);
24+
// }
25+
// return h;
26+
// }
27+
28+
function printHash(key) {
29+
return { s: key, hashFn: hashFunction(key) };
30+
}
31+
32+
// similar ending
33+
// console.table(['00', '10', '20', '30', '40', '50', '60', '70', '80', '90'].map(printHash));
34+
// similar start
35+
// console.table(['10', '11', '12', '13', '14', '15', '16', '17', '18', '19'].map(printHash));
36+
37+
// console.table(['@', '#', '#!', 'stop', 'pots', 'Ca', 'DB', 'polygenelubricants', 'Pneumonoultramicroscopicsilicovolcanoconiosis'].map(printHash));
38+
39+
const size = 5100;
40+
console.log(printHash(Array(size).fill('😁').join('')).hashFn);
41+
console.log(printHash(Array(size).fill('1').join('')).hashFn);
42+
console.log(printHash(Array(size).fill('A').join('')).hashFn);
43+
44+
45+
// all different
46+
// console.table(['cat', 'dog', 'rat', 'art', 10, '10', {a:1}, '😸', '🐶', '😸🐶', '🐶😸'].map(printHash));
47+
// console.log(hashFunction(Array(1500).fill('😁').join('')));
48+
49+
50+
// function test(){
51+
// return 1n + 2n;
52+
// }
53+
54+
// test();
55+
56+
57+
// hashCode(10); //=> 97
58+
// hashCode('10'); //=> 97
59+
60+
// assert.notEqual(hashCode(10), hashCode('10'), 'Hash code should be different with different types');
61+
// assert.notEqual(hashCode('10string'), hashCode('10'), 'Hash code should be different with different types');
62+
63+
// hashCode(10) === hashCode('10'); //=> false
64+
// hashCode('10') === hashCode('10string'); //=> false
65+
// hashCode('art') === hashCode('rat'); //=> false
66+
// hashCode('😄') === hashCode('😄'); //=> true
67+
// hashCode('😄') === hashCode('😸'); //=> false
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
const primes = [31n, 33n, 37n, 39n, 41n, 101n, 8191n, 131071n, 524287n, 6700417n, 1327144003n, 9007199254740881n];
2+
3+
function doubleToLongBits(number) {
4+
const buffer = new ArrayBuffer(8); // 8 bytes for float64
5+
const dataView = new DataView(buffer);
6+
dataView.setFloat64(0, number); // set as float64
7+
return dataView.getBigInt64(0); // read as long int (BigInt)
8+
}
9+
10+
function hashNumber(number) {
11+
const bigInt = doubleToLongBits(number);
12+
return bigInt > 0 ? bigInt : ((2n ** 63n) + (bigInt * -1n));
13+
}
14+
15+
/**
16+
* Polynomial hash codes
17+
* @param {any} key
18+
*/
19+
function hashString(key) {
20+
return Array.from(key.toString()).reduce((hash, char) => {
21+
return (hash * 33n) + BigInt(char.codePointAt(0));
22+
}, 0n);
23+
}
24+
25+
function hashCode(key) {
26+
if (typeof(key) === 'number') {
27+
return hashNumber(key);
28+
}
29+
return 2n ** 64n + hashString(key);
30+
}
31+
32+
function hashIndex({key, size = 16} = {}) {
33+
// return hashCode(key) % BigInt(size); // modulo size
34+
35+
// Multiply-Add-Divide (MAD) compression
36+
const p = 524287n; // prime number larger than size.
37+
const a = 8191n; // random [1..p-1]
38+
const b = 0n; // random [0..p-1]
39+
return ( (a * hashCode(key) + b) % p ) % BigInt(size);
40+
}
41+
42+
module.exports = {
43+
hashCode,
44+
hashIndex
45+
}
46+
47+
/**
48+
49+
function prepareToPrint(key){
50+
return { key: key.substring ? `${key.substring(0, 10)} (${key.length})` : key, hashCode: hashCode(key), hashIndex10: hashIndex({key, size: 10}) };
51+
}
52+
53+
const res = [-2, -1, 0.5, 1, 2, 3, Math.PI, Number.MAX_VALUE, 2.7976931348623157e+308, 17.976931348623156e+400,
54+
'😁',
55+
'hola',
56+
'@', '#', '#!', 'stop', 'pots', 'Ca', 'DB', 'polygenelubricants',
57+
'Aa',
58+
'BB',
59+
'aoffckzdaoffckzdatafwjsh',
60+
'aoffckzdaoffckzdbhlijevx',
61+
Array(50).fill('1').join(''),
62+
// types
63+
{a:1},
64+
1n,
65+
1,
66+
'1',
67+
function a() {return;}
68+
]
69+
.map(prepareToPrint);
70+
71+
console.table(res);
72+
73+
const res1 = [
74+
Array(1500).fill('1').join(''),
75+
Array(1500).fill('😁').join(''),
76+
// Array(1500).fill('z').join(''),
77+
]
78+
.map(prepareToPrint);
79+
80+
console.log(res1);
81+
82+
// */
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
const HashTable = require('./hash-map');
2+
// const asciichart = require ('asciichart');
3+
// const { Series } = require('pandas-js');
4+
5+
const dict = new HashTable();
6+
7+
const keys = [];
8+
const values = [];
9+
for (let i = 0; i < 1e1; i++) {
10+
keys.push(makeid());
11+
values.push(parseInt(Math.random()*1e2));
12+
}
13+
14+
console.time('SET');
15+
for (let i = 0; i < keys.length; i++) {
16+
dict.set(keys[i], values[i]);
17+
}
18+
console.timeEnd('SET');
19+
20+
console.time('GET');
21+
for (let i = 0; i < keys.length; i++) {
22+
const val = dict.get(keys[i]);
23+
}
24+
console.timeEnd('GET');
25+
26+
// distibution
27+
const usage = dict.buckets.reduce((array, el, index) => {
28+
array[index] = el.length;
29+
return array;
30+
}, Array(dict.buckets.length).fill(0));
31+
32+
console.log(usage, dict.buckets);
33+
34+
const ds = new Series(usage);
35+
console.log({
36+
length: usage.length,
37+
max: Math.max.apply(null, usage),
38+
min: Math.min.apply(null, usage),
39+
count: ds.length,
40+
mean: ds.mean(),
41+
std: ds.std(),
42+
variance: ds.variance(),
43+
});
44+
45+
// console.log({elementsByBucket});
46+
// console.log (asciichart.plot(elementsByBucket));
47+
48+
// Helpers
49+
50+
function makeid() {
51+
const text = [];
52+
const possible = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ñåé😁🖖1234567890!@#$%^&*()_+{}';<>?";
53+
54+
for (let i = 0; i < 5; i++) {
55+
text.push(possible.charAt(Math.floor(Math.random() * possible.length)));
56+
}
57+
58+
return text.join('');
59+
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/**
2+
* Related:
3+
* https://github.com/numbers/numbers.js/blob/master/test/statistic.test.js
4+
*/
5+
class Stats {
6+
constructor(serie = []) {
7+
this.serie = serie;
8+
}
9+
10+
set serie(serie) {
11+
this.originalArray = serie;
12+
this.sortedArray = serie.sort((a, b) => b - a);
13+
this.evenSet = serie % 2 === 0;
14+
this.oddSet = !this.evenSet;
15+
this.length = serie.length;
16+
this.middleIndex = parseInt(serie.length / 2);
17+
}
18+
19+
get serie() {
20+
return this.sortedArray;
21+
}
22+
23+
count() {
24+
return this.serie.length;
25+
}
26+
27+
sum() {
28+
return this.serie.reduce((sum, el) => sum + el);
29+
}
30+
31+
/**
32+
* The mean is the average of the numbers: a calculated "central" value of a set of numbers.
33+
*
34+
* add up all the numbers, then divide by how many numbers there are.
35+
*/
36+
mean() {
37+
return this.sum() / this.count();
38+
}
39+
40+
/**
41+
* "middle" value
42+
*/
43+
median(array = this.serie) {
44+
const sortedArray = array.sort((a, b) => b - a);
45+
const count = sortedArray.length;
46+
const middle = parseInt(count/2);
47+
if (count % 2 === 0) {
48+
// even
49+
return {
50+
median: (sortedArray[middle] + sortedArray[middle - 1]) / 2,
51+
medianIndex: null,
52+
};
53+
}
54+
// odd
55+
return {
56+
median: sortedArray[middle],
57+
medianIndex: middle,
58+
};
59+
}
60+
61+
/**
62+
* A quartile is a type of quantile.
63+
* The first quartile (Q1) is defined as the middle number between the smallest number and the median of the data set.
64+
* The second quartile (Q2) is the median of the data.
65+
* The third quartile (Q3) is the middle value between the median and the highest value of the data set.
66+
*
67+
* Use the median to divide the ordered data set into two halves.
68+
* If there are an odd number of data points in the original ordered data set, include the median (the central value in the ordered list) in both halves.
69+
* If there are an even number of data points in the original ordered data set, split this data set exactly in half.
70+
* The lower quartile value is the median of the lower half of the data. The upper quartile value is the median of the upper half of the data.
71+
*/
72+
quartile() {
73+
const { medianIndex, median } = this.median();
74+
let q1;
75+
let q3;
76+
77+
if (this.evenSet) {
78+
// even - split half
79+
q1 = this.median(this.serie.slice(0, this.middleIndex)).median;
80+
q3 = this.median(this.serie.slice(this.middleIndex)).median;
81+
} else {
82+
// odd - include the median
83+
q1 = this.median(this.serie.slice(0, medianIndex + 1)).median;
84+
q3 = this.median(this.serie.slice(medianIndex)).median;
85+
}
86+
87+
return {
88+
'25%': q1,
89+
'50%': median,
90+
'75%': q3,
91+
};
92+
}
93+
94+
describe() {
95+
return {
96+
count: this.serie.length,
97+
mean: 0,
98+
std: 0,
99+
min: 0,
100+
'25%': 0,
101+
'50%': 0,
102+
'75%': 0,
103+
max: 0,
104+
};
105+
}
106+
}
107+
108+
module.exports = Stats;
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
const Serie = require('./stats');
2+
3+
describe('Stats', () => {
4+
describe('#median', () => {
5+
it('should work with odd array', () => {
6+
const s = new Serie([1, 3, 3, 6, 7, 8, 9]); // 7
7+
expect(s.median()).toEqual({
8+
median: 6,
9+
medianIndex: 3,
10+
});
11+
});
12+
13+
it('should work with even array', () => {
14+
const s = new Serie([1, 2, 3, 4, 5, 6, 8, 9]); // 8
15+
expect(s.median()).toEqual({
16+
median: 4.5,
17+
medianIndex: null,
18+
});
19+
});
20+
});
21+
22+
xdescribe('#quartile', () => {
23+
it('should get quartile with odd dataset', () => {
24+
const s = new Serie([7, 15, 36, 39, 40, 41]);
25+
expect(s.quartile()).toEqual({
26+
'25%': 15,
27+
'50%': 37.5,
28+
'75%': 40,
29+
});
30+
});
31+
32+
it('should get quartile with even data set', () => {
33+
const s = new Serie([7, 15, 36, 39, 40, 41]);
34+
expect(s.quartile()).toEqual({
35+
'25%': 25.5,
36+
'50%': 50,
37+
'75%': 42.5,
38+
});
39+
});
40+
});
41+
});

0 commit comments

Comments
 (0)
Please sign in to comment.