Skip to content

Commit addf1bf

Browse files
author
Amos Brocco
committed
Remove unneeded include, improve Memento code, add Jump CH
1 parent 252f15b commit addf1bf

File tree

6 files changed

+134
-107
lines changed

6 files changed

+134
-107
lines changed

CMakeLists.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ add_executable(speed_test speed_test.cpp
3232
anchor/AnchorHashQre.cpp anchor/AnchorHashQre.hpp anchor/misc/crc32c_sse42_u64.h
3333
anchor/anchorengine.h
3434
memento/mashtable.h
35+
jump/jumpengine.h
3536
)
3637

3738
add_executable(balance balance.cpp
@@ -41,12 +42,15 @@ add_executable(balance balance.cpp
4142
anchor/AnchorHashQre.cpp anchor/AnchorHashQre.hpp anchor/misc/crc32c_sse42_u64.h
4243
anchor/anchorengine.h
4344
memento/mashtable.h
45+
jump/jumpengine.h
4446
)
4547

4648
add_executable(mashtable_test mashtable_test.cpp memento/mashtable.h)
4749

48-
target_include_directories(speed_test PRIVATE ${PCG_INCLUDE_DIRS})
49-
target_include_directories(balance PRIVATE ${PCG_INCLUDE_DIRS})
50+
if(WITH_PCG32)
51+
target_include_directories(speed_test PRIVATE ${PCG_INCLUDE_DIRS})
52+
target_include_directories(balance PRIVATE ${PCG_INCLUDE_DIRS})
53+
endif()
5054
target_link_libraries(speed_test PRIVATE xxHash::xxhash fmt::fmt cxxopts::cxxopts)
5155
target_link_libraries(balance PRIVATE xxHash::xxhash fmt::fmt cxxopts::cxxopts)
5256

balance.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "anchor/anchorengine.h"
2525
#include "memento/mashtable.h"
2626
#include "memento/mementoengine.h"
27+
#include "jump/jumpengine.h"
2728
#include <fmt/core.h>
2829
#include <fstream>
2930
#include <unordered_map>
@@ -124,7 +125,7 @@ int main(int argc, char *argv[]) {
124125
cxxopts::Options options("speed_test", "MementoHash vs AnchorHash benchmark");
125126
options.add_options()(
126127
"Algorithm",
127-
"Algorithm (null|baseline|anchor|memento|mementoboost|mementomash)",
128+
"Algorithm (null|baseline|anchor|memento|mementoboost|mementomash|jump)",
128129
cxxopts::value<std::string>())(
129130
"AnchorSet", "Size of the AnchorSet (ignored by Memento)",
130131
cxxopts::value<int>())("WorkingSet", "Size of the WorkingSet",
@@ -195,6 +196,10 @@ int main(int argc, char *argv[]) {
195196
return bench<MementoEngine<MashTable>>("Memento<MashTable>", filename,
196197
anchor_set, working_set,
197198
num_removals, num_keys);
199+
} else if (algorithm == "jump") {
200+
return bench<JumpEngine>("JumpEngine", filename,
201+
anchor_set, working_set,
202+
num_removals, num_keys);
198203
} else {
199204
fmt::println("Unknown algorithm {}", algorithm);
200205
return 2;

jump/jumpengine.h

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* Copyright (c) 2023 Amos Brocco.
3+
*
4+
* This program is free software: you can redistribute it and/or modify
5+
* it under the terms of the GNU General Public License as published by
6+
* the Free Software Foundation, version 3.
7+
*
8+
* This program is distributed in the hope that it will be useful, but
9+
* WITHOUT ANY WARRANTY; without even the implied warranty of
10+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11+
* General Public License for more details.
12+
*
13+
* You should have received a copy of the GNU General Public License
14+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
15+
*/
16+
#ifndef JUMPENGINE_H
17+
#define JUMPENGINE_H
18+
#include <cstdint>
19+
20+
class JumpEngine final {
21+
public:
22+
JumpEngine(uint32_t, uint32_t working_set)
23+
: m_num_buckets{working_set}
24+
{}
25+
26+
// From AnchorHash
27+
static uint32_t crc32c_sse42_u64(uint64_t key, uint64_t seed) {
28+
__asm__ volatile("crc32q %[key], %[seed];"
29+
: [seed] "+r"(seed)
30+
: [key] "rm"(key));
31+
return seed;
32+
}
33+
34+
/**
35+
* Returns the bucket where the given key should be mapped.
36+
* This implementations is the same as provided by Jump authors
37+
*
38+
* @param key the key to map
39+
* @param key the initial seed for CRC32c
40+
* @return the related bucket
41+
*/
42+
uint32_t getBucketCRC32c(uint64_t key, uint64_t seed) noexcept
43+
{
44+
uint64_t hash = crc32c_sse42_u64(key, seed);
45+
int64_t b = 1, j = 0;
46+
while (j < m_num_buckets) {
47+
b = j;
48+
hash = hash * 2862933555777941757ULL + 1;
49+
j = (b + 1) * (double(1LL << 31) / double((hash >> 33) + 1));
50+
}
51+
return b;
52+
}
53+
54+
/**
55+
* Adds a new bucket to the engine.
56+
*
57+
* @return the added bucket
58+
*/
59+
uint32_t addBucket() noexcept { return m_num_buckets++; }
60+
61+
/**
62+
* Removes the given bucket from the engine.
63+
* Since Jump does not support random removals, it will always remove the
64+
* last bucket.
65+
*
66+
* @return the removed bucket
67+
*/
68+
uint32_t removeBucket(uint32_t) noexcept
69+
{
70+
return --m_num_buckets;
71+
}
72+
73+
private:
74+
uint32_t m_num_buckets;
75+
};
76+
77+
#endif // JUMPENGINE_H

memento/memento.h

Lines changed: 29 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
* You should have received a copy of the GNU General Public License
1717
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1818
*/
19-
#include <optional>
20-
#include <boost/unordered/unordered_flat_map.hpp>
2119

2220
template<template <typename...> class MementoMap, typename... Args>
2321
class Memento final
@@ -36,23 +34,24 @@ class Memento final
3634
};
3735

3836
MementoMap<uint32_t, Entry> m_table;
37+
uint32_t m_table_size;
3938

4039
public:
41-
Memento();
40+
Memento() {}
4241

4342
/**
4443
* Returns the size of the replacement set.
4544
*
4645
* @return the size of the replacement set
4746
*/
48-
int32_t size() const noexcept;
47+
int32_t size() const noexcept { return m_table_size; }
4948

5049
/**
5150
* Returns {@code true} if the replacement set is empty.
5251
*
5352
* @return {@code true} if empty, {@code false} otherwise
5453
*/
55-
bool isEmpty() const noexcept;
54+
bool isEmpty() const noexcept { return m_table_size == 0; }
5655

5756
/**
5857
* Remembers that the given bucket has been removed
@@ -67,7 +66,11 @@ class Memento final
6766
* @param prevRemoved the previous removed bucket
6867
* @return the value of the new last removed bucket
6968
*/
70-
int32_t remember(uint32_t bucket, uint32_t replacer, uint32_t prevRemoved ) noexcept;
69+
int32_t remember(uint32_t bucket, uint32_t replacer, uint32_t prevRemoved ) noexcept {
70+
m_table.emplace(bucket, Entry{replacer, prevRemoved});
71+
++m_table_size;
72+
return bucket;
73+
}
7174

7275
/**
7376
* Restores the given bucket by removing it
@@ -79,7 +82,15 @@ class Memento final
7982
* @param bucket the bucket to restore
8083
* @return the new last removed bucket
8184
*/
82-
int32_t restore(uint32_t bucket) noexcept;
85+
int32_t restore(uint32_t bucket) noexcept {
86+
if (m_table_size == 0) {
87+
return bucket + 1;
88+
}
89+
auto e = m_table.find(bucket);
90+
m_table.erase(e);
91+
--m_table_size;
92+
return e->second.prevRemoved;
93+
}
8394

8495
/**
8596
* Returns the replacer of the bucket if it
@@ -93,87 +104,16 @@ class Memento final
93104
* @param bucket the bucket to search for
94105
* @return the replacing bucket if any, {@code std::nullopt} otherwise
95106
*/
96-
std::optional<int32_t> replacer(int32_t bucket ) const noexcept;
107+
int32_t replacer(int32_t bucket ) const noexcept {
108+
if (m_table_size == 0) {
109+
return -1;
110+
}
111+
auto e = m_table.find(bucket);
112+
if (e != m_table.end()) {
113+
return e->second.replacer;
114+
} else {
115+
return -1;
116+
}
117+
}
97118
};
98-
99-
template<template <typename...> class MementoMap, typename... Args>
100-
Memento<MementoMap,Args...>::Memento() {}
101-
102-
/**
103-
* Returns the size of the replacement set.
104-
*
105-
* @return the size of the replacement set
106-
*/
107-
template<template <typename...> class MementoMap, typename... Args>
108-
inline int32_t Memento<MementoMap,Args...>::size() const noexcept { return m_table.size(); }
109-
110-
/**
111-
* Returns {@code true} if the replacement set is empty.
112-
*
113-
* @return {@code true} if empty, {@code false} otherwise
114-
*/
115-
template<template <typename...> class MementoMap, typename... Args>
116-
inline bool Memento<MementoMap,Args...>::isEmpty() const noexcept { return m_table.empty(); }
117-
118-
/**
119-
* Remembers that the given bucket has been removed
120-
* and that was replaced by the given replacer.
121-
* <p>
122-
* This method also stores the last removed bucket
123-
* (before the current one) to create the sequence
124-
* of removals.
125-
*
126-
* @param bucket the removed bucket
127-
* @param replacer the replacing bucket
128-
* @param prevRemoved the previous removed bucket
129-
* @return the value of the new last removed bucket
130-
*/
131-
template<template <typename...> class MementoMap, typename... Args>
132-
inline int32_t Memento<MementoMap,Args...>::remember(uint32_t bucket, uint32_t replacer,
133-
uint32_t prevRemoved) noexcept {
134-
m_table.emplace(bucket, Entry{replacer, prevRemoved});
135-
return bucket;
136-
}
137-
138-
/**
139-
* Restores the given bucket by removing it
140-
* from the memory.
141-
* <p>
142-
* If the memory is empty the last removed bucket
143-
* becomes the given bucket + 1.
144-
*
145-
* @param bucket the bucket to restore
146-
* @return the new last removed bucket
147-
*/
148-
template<template <typename...> class MementoMap, typename... Args>
149-
inline int32_t Memento<MementoMap,Args...>::restore(uint32_t bucket) noexcept {
150-
if (m_table.empty()) {
151-
return bucket + 1;
152-
}
153-
auto e = m_table.find(bucket);
154-
m_table.erase(e);
155-
return e->second.prevRemoved;
156-
}
157-
158-
/**
159-
* Returns the replacer of the bucket if it
160-
* was removed, otherwise returns {@code -1}.
161-
* <p>
162-
* The value returned by this method represents
163-
* both the bucket that replaced the given one
164-
* and the size of the working set after removing
165-
* the given bucket.
166-
*
167-
* @param bucket the bucket to search for
168-
* @return the replacing bucket if any, {@code std::nullopt} otherwise
169-
*/
170-
template<template <typename...> class MementoMap, typename... Args>
171-
inline std::optional<int32_t> Memento<MementoMap,Args...>::replacer(int32_t bucket) const noexcept {
172-
auto e = m_table.find(bucket);
173-
if (e != m_table.end()) {
174-
return e->second.replacer;
175-
} else {
176-
return std::nullopt;
177-
}
178-
}
179119
#endif // MEMENTO_H

memento/mementoengine.h

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#ifndef MEMENTOENGINE_H
1717
#define MEMENTOENGINE_H
1818
#include "memento.h"
19-
#include <cassert>
2019
#include <string_view>
2120
#include <xxhash.h>
2221

@@ -116,20 +115,19 @@ MementoEngine<MementoMap, Args...>::MementoEngine(uint32_t, uint32_t size)
116115
template <template <typename...> class MementoMap, typename... Args>
117116
uint32_t MementoEngine<MementoMap, Args...>::getBucket(
118117
std::string_view key) const noexcept {
119-
assert(m_bArraySize > 0);
120118
/*
121119
* We invoke JumpHash to get a bucket
122120
* in the range [0,bArraySize-1].
123121
*/
124-
auto hash{XXH64(key.data(), key.size(), 0)};
122+
const auto hash{XXH64(key.data(), key.size(), 0)};
125123
auto b = JumpConsistentHash(hash, m_bArraySize);
126124

127125
/*
128126
* We check if the bucket was removed, if not we are done.
129127
* If the bucket was removed the replacing bucket is >= 0,
130128
* otherwise it is -1.
131129
*/
132-
auto replacer = m_memento.replacer(b).value_or(-1);
130+
auto replacer = m_memento.replacer(b);
133131
while (replacer >= 0) {
134132

135133
/*
@@ -139,18 +137,18 @@ uint32_t MementoEngine<MementoMap, Args...>::getBucket(
139137
* represents the size of the working set when the bucket
140138
* was removed and get a new bucket in [0,replacer-1].
141139
*/
142-
auto h = XXH64(key.data(), key.size(), b);
140+
const auto h = XXH64(key.data(), key.size(), b);
143141
b = h % replacer;
144142

145143
/*
146144
* If we hit a removed bucket we follow the replacements
147145
* until we get a working bucket or a bucket in the range
148146
* [0,replacer-1]
149147
*/
150-
auto r = m_memento.replacer(b).value_or(-1);
148+
auto r = m_memento.replacer(b);
151149
while (r >= replacer) {
152150
b = r;
153-
r = m_memento.replacer(b).value_or(-1);
151+
r = m_memento.replacer(b);
154152
}
155153

156154
/* Finally we update the entry of the external loop. */
@@ -163,9 +161,7 @@ uint32_t MementoEngine<MementoMap, Args...>::getBucket(
163161
template <template <typename...> class MementoMap, typename... Args>
164162
uint32_t MementoEngine<MementoMap, Args...>::getBucketCRC32c(
165163
uint64_t key, uint64_t seed) const noexcept {
166-
assert(m_bArraySize > 0);
167-
168-
uint32_t hash = crc32c_sse42_u64(key, seed);
164+
const auto hash = crc32c_sse42_u64(key, seed);
169165
/*
170166
* We invoke JumpHash to get a bucket
171167
* in the range [0,bArraySize-1].
@@ -177,7 +173,7 @@ uint32_t MementoEngine<MementoMap, Args...>::getBucketCRC32c(
177173
* If the bucket was removed the replacing bucket is >= 0,
178174
* otherwise it is -1.
179175
*/
180-
auto replacer = m_memento.replacer(b).value_or(-1);
176+
auto replacer = m_memento.replacer(b);
181177
while (replacer >= 0) {
182178

183179
/*
@@ -187,18 +183,18 @@ uint32_t MementoEngine<MementoMap, Args...>::getBucketCRC32c(
187183
* represents the size of the working set when the bucket
188184
* was removed and get a new bucket in [0,replacer-1].
189185
*/
190-
auto h = crc32c_sse42_u64(key, b);
186+
const auto h = crc32c_sse42_u64(key, b);
191187
b = h % replacer;
192188

193189
/*
194190
* If we hit a removed bucket we follow the replacements
195191
* until we get a working bucket or a bucket in the range
196192
* [0,replacer-1]
197193
*/
198-
auto r = m_memento.replacer(b).value_or(-1);
194+
auto r = m_memento.replacer(b);
199195
while (r >= replacer) {
200196
b = r;
201-
r = m_memento.replacer(b).value_or(-1);
197+
r = m_memento.replacer(b);
202198
}
203199

204200
/* Finally we update the entry of the external loop. */

0 commit comments

Comments
 (0)