Skip to content

Commit abfe165

Browse files
committed
[NFC] Add FixedBitSet
I wanted a bit vector that I could use as a compact sorted set of enum values: an inline-allocated, fixed-size array of bits supporting efficient and convenient set operations and iteration. The C++ standard library offers std::bitset, but the API is far from ideal for this purpose. It's positioned as an abstract bit-vector rather than as a set. To use it as a set, you have to turn your values into indices, which for enums means explicitly casting them all in the caller. There's also no iteration operation, so to find the elements of the set, you have to iterate over all possible indices, test whether they're in the set, and (if so) cast the current index back to the enum. Not only is that much more awkward than normal iteration, but it's also substantially less efficient than what you can get by counting trailing zeroes in a mask. LLVM and Swift offer a number of other bit vectors, but they're all dynamically allocated because they're meant to track arbitrary sets. That's not a non-starter for my use case, which is in textual serialization and so rather slow anyway, but it's also not very hard to whip together the optimal data structure here. I have committed the cardinal sin of C++ data structure design and provided the operations as ordinary methods instead of operators.
1 parent 352b3a2 commit abfe165

File tree

3 files changed

+547
-0
lines changed

3 files changed

+547
-0
lines changed

Diff for: include/swift/Basic/FixedBitSet.h

+257
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
//===- llvm/ADT/FixedBitSet.h - Fixed-length bitset -------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines the FixedBitSet template, which is basically
10+
// just std::bitset (a fixed-size inline-allocated bit vector) but with:
11+
//
12+
// - a cleaner interface for using the type to implement a set,
13+
// especially a set of an enum type
14+
//
15+
// - a more useful set of operations, such as the ability to iterate
16+
// over the set instead of scanning over all possible elements
17+
//
18+
//===----------------------------------------------------------------------===//
19+
20+
#ifndef LLVM_ADT_FIXEDBITSET_H
21+
#define LLVM_ADT_FIXEDBITSET_H
22+
23+
#include <assert.h>
24+
#include <inttypes.h>
25+
#include <initializer_list>
26+
#include "llvm/Support/MathExtras.h"
27+
28+
namespace swift {
29+
30+
namespace detail {
31+
// In principle, long sets would be happier if we chunked
32+
// at the pointer size instead of capping at 32, but we expect this
33+
// to be used with relatively short Sets where larger chunks
34+
// would introduce more padding.
35+
template <size_t numElements,
36+
bool fitsInUInt8 = (numElements <= 8),
37+
bool fitsInUInt16 = (numElements <= 16)>
38+
struct FixedBitSetStorageType;
39+
40+
template <size_t numElements>
41+
struct FixedBitSetStorageType<numElements, true, true> {
42+
using type = uint8_t;
43+
};
44+
45+
template <size_t numElements>
46+
struct FixedBitSetStorageType<numElements, false, true> {
47+
using type = uint16_t;
48+
};
49+
50+
template <size_t numElements>
51+
struct FixedBitSetStorageType<numElements, false, false> {
52+
using type = uint32_t;
53+
};
54+
55+
} // end namespace detail
56+
57+
/// A set of integral elements, all of which must be less than
58+
/// numElements. Iteration produces elements in a sorted
59+
/// (numerically increasing) order.
60+
template <size_t numElements, class ValueType = size_t>
61+
class FixedBitSet {
62+
static_assert(std::is_integral<ValueType>::value ||
63+
std::is_enum<ValueType>::value,
64+
"value type is not an integer or enum type");
65+
66+
using ChunkType = typename detail::FixedBitSetStorageType<numElements>::type;
67+
68+
static constexpr size_t chunkSize = CHAR_BIT * sizeof(ChunkType);
69+
static constexpr size_t numChunks =
70+
(numElements + chunkSize - 1) / chunkSize;
71+
72+
/// We represent the elements as an inline array of chunks, with
73+
/// earlier chunks representing lower indices. Any padding bits
74+
/// in the last chunk (if the number of elements isn't an even
75+
/// multiple of the chunk size) are always clear.
76+
ChunkType chunks[numChunks] = {};
77+
78+
static size_t chunkIndex(ValueType i) {
79+
return size_t(i) / chunkSize;
80+
}
81+
static size_t chunkMask(ValueType i) {
82+
return ChunkType(1) << (size_t(i) % chunkSize);
83+
}
84+
85+
public:
86+
/// Build an empty set.
87+
FixedBitSet() {}
88+
89+
/// Build a set containing the given elements.
90+
FixedBitSet(std::initializer_list<ValueType> elements) {
91+
for (const auto &elt : elements)
92+
insert(elt);
93+
}
94+
95+
/// Return true if the set is empty.
96+
bool empty() const {
97+
for (auto chunk : chunks)
98+
if (chunk != 0) return false;
99+
return true;
100+
}
101+
102+
/// Return whether the given element is present in the set.
103+
bool contains(ValueType i) const {
104+
assert(size_t(i) < numElements);
105+
return chunks[chunkIndex(i)] & chunkMask(i);
106+
}
107+
108+
/// Either insert or remove the given element.
109+
void insertOrRemove(ValueType i, bool shouldInsert) {
110+
if (shouldInsert)
111+
insert(i);
112+
else
113+
remove(i);
114+
}
115+
116+
/// Insert the given element.
117+
void insert(ValueType i) {
118+
assert(size_t(i) < numElements);
119+
chunks[chunkIndex(i)] |= chunkMask(i);
120+
}
121+
122+
/// Remove the given element from the set.
123+
void remove(ValueType i) {
124+
assert(size_t(i) < numElements);
125+
chunks[chunkIndex(i)] &= ~chunkMask(i);
126+
}
127+
128+
/// Add every element in the range to this set.
129+
void insertAll() {
130+
// Our invariant is that any padding bits are clear, so
131+
// we need to set bits in the most significant chunk only
132+
// for the bits that are set.
133+
constexpr size_t partialBits = (numElements % chunkSize);
134+
constexpr size_t firstIncompleteChunk =
135+
partialBits == 0 ? numChunks : numChunks - 1;
136+
137+
for (size_t i = 0; i != firstIncompleteChunk; ++i)
138+
chunks[i] = ~ChunkType(0);
139+
140+
if (partialBits != 0)
141+
chunks[numChunks - 1] = (ChunkType(1) << partialBits) - 1;
142+
}
143+
144+
/// Remove all of the elements in this set.
145+
void removeAll() {
146+
for (size_t i = 0; i != numChunks; ++i)
147+
chunks[i] = 0;
148+
}
149+
150+
/// Add all of the elements in the given set.
151+
void insertAll(const FixedBitSet &other) {
152+
for (size_t i = 0; i != numChunks; ++i) {
153+
chunks[i] |= other.chunks[i];
154+
}
155+
}
156+
157+
/// Remove all of the elements that aren't in the given set.
158+
void removeAllExcept(const FixedBitSet &other) {
159+
for (size_t i = 0; i != numChunks; ++i) {
160+
chunks[i] &= other.chunks[i];
161+
}
162+
}
163+
164+
/// Remove all of the elements that are also in the given set.
165+
void removeAll(const FixedBitSet &other) {
166+
for (size_t i = 0; i != numChunks; ++i) {
167+
chunks[i] &= ~other.chunks[i];
168+
}
169+
}
170+
171+
class iterator {
172+
const ChunkType *chunks;
173+
size_t chunkIndex;
174+
175+
/// Our possibly-edited copy of the current chunk. As we iterate
176+
/// past elements, we clear the corresponding bit here and then find
177+
/// the next chunk that has a bit set. The invariant is that either
178+
/// this is non-zero or chunkIndex == numChunks.
179+
size_t remainingChunk;
180+
181+
friend class FixedBitSet;
182+
183+
// Constructor for begin().
184+
iterator(const ChunkType *chunks, size_t chunkIndex,
185+
size_t remainingChunk)
186+
: chunks(chunks), chunkIndex(chunkIndex),
187+
remainingChunk(remainingChunk) {
188+
advance();
189+
}
190+
191+
/// Constructor for end().
192+
iterator(const ChunkType *chunks)
193+
: chunks(chunks), chunkIndex(numChunks), remainingChunk(0) {}
194+
195+
/// Find the next element, if any, or else set chunkIndex to numChunks.
196+
void advance() {
197+
while (!remainingChunk) {
198+
assert(chunkIndex < numChunks);
199+
if (++chunkIndex == numChunks) break;
200+
remainingChunk = chunks[chunkIndex];
201+
}
202+
}
203+
204+
public:
205+
iterator &operator++() {
206+
assert(remainingChunk && "incrementing a completed iterator");
207+
// rc = aaaaaaaa100
208+
// rc - 1 = aaaaaaaa011
209+
remainingChunk &= (remainingChunk - 1);
210+
advance();
211+
return *this;
212+
}
213+
iterator operator++(int) {
214+
iterator copy = *this;
215+
++*this;
216+
return copy;
217+
}
218+
219+
ValueType operator*() const {
220+
assert(remainingChunk && "dereferencing a completed iterator");
221+
return ValueType(chunkIndex * chunkSize
222+
+ llvm::findFirstSet(remainingChunk,
223+
llvm::ZB_Undefined));
224+
}
225+
226+
bool operator==(const iterator &other) const {
227+
assert(chunks == other.chunks &&
228+
"comparing iterators from different bit Sets");
229+
return chunkIndex == other.chunkIndex
230+
&& remainingChunk == other.remainingChunk;
231+
}
232+
bool operator!=(const iterator &other) const {
233+
return !(*this == other);
234+
}
235+
};
236+
237+
iterator begin() const {
238+
return iterator(chunks, 0, chunks[0]);
239+
}
240+
iterator end() const {
241+
return iterator(chunks);
242+
}
243+
244+
bool operator==(const FixedBitSet &other) const {
245+
for (size_t i = 0; i != numChunks; ++i)
246+
if (chunks[i] != other.chunks[i])
247+
return false;
248+
return true;
249+
}
250+
bool operator!=(const FixedBitSet &other) const {
251+
return !(*this == other);
252+
}
253+
};
254+
255+
} // end namespace swift
256+
257+
#endif

Diff for: unittests/Basic/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ add_swift_unittest(SwiftBasicTests
1616
EncodedSequenceTest.cpp
1717
ExponentialGrowthAppendingBinaryByteStreamTests.cpp
1818
FileSystemTest.cpp
19+
FixedBitSetTest.cpp
1920
FrozenMultiMapTest.cpp
2021
ImmutablePointerSetTest.cpp
2122
JSONSerialization.cpp

0 commit comments

Comments
 (0)