diff --git a/llvm/include/llvm/ADT/HashMappedTrie.h b/llvm/include/llvm/ADT/HashMappedTrie.h new file mode 100644 index 0000000000000..7131e668d0366 --- /dev/null +++ b/llvm/include/llvm/ADT/HashMappedTrie.h @@ -0,0 +1,339 @@ +//===- HashMappedTrie.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_HASHMAPPEDTRIE_H +#define LLVM_ADT_HASHMAPPEDTRIE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include + +namespace llvm { + +/// Base class for a lock-free thread-safe hash-mapped trie. +class ThreadSafeHashMappedTrieBase { +public: + enum : size_t { TrieContentBaseSize = 4 }; + +private: + template struct AllocValueType { + char Base[TrieContentBaseSize]; + std::aligned_union_t Content; + }; + +protected: + template static constexpr size_t getContentAllocSize() { + return sizeof(AllocValueType); + } + template static constexpr size_t getContentAllocAlign() { + return alignof(AllocValueType); + } + template static constexpr size_t getContentOffset() { + return offsetof(AllocValueType, Content); + } + +public: + void operator delete(void *Ptr) { ::free(Ptr); } + + static constexpr size_t DefaultNumRootBits = 6; + static constexpr size_t DefaultNumSubtrieBits = 4; + + LLVM_DUMP_METHOD void dump() const; + void print(raw_ostream &OS) const; + +protected: + /// Result of a lookup. Suitable for an insertion hint. Maybe could be + /// expanded into an iterator of sorts, but likely not useful (visiting + /// everything in the trie should probably be done some way other than + /// through an iterator pattern). + class PointerBase { + protected: + void *get() const { return I == -2u ? P : nullptr; } + + public: + PointerBase() noexcept {} + PointerBase(PointerBase &&) = default; + PointerBase(const PointerBase &) = default; + PointerBase &operator=(PointerBase &&) = default; + PointerBase &operator=(const PointerBase &) = default; + + private: + friend class ThreadSafeHashMappedTrieBase; + explicit PointerBase(void *Content) : P(Content), I(-2u) {} + PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {} + + bool isHint() const { return I != -1u && I != -2u; } + + void *P = nullptr; + unsigned I = -1u; + unsigned B = 0; + }; + + PointerBase find(ArrayRef Hash) const; + + /// Insert and return the stored content. + PointerBase + insert(PointerBase Hint, ArrayRef Hash, + function_ref Hash)> + Constructor); + + ThreadSafeHashMappedTrieBase() = delete; + + ThreadSafeHashMappedTrieBase(size_t ContentAllocSize, + size_t ContentAllocAlign, size_t ContentOffset, + Optional NumRootBits = None, + Optional NumSubtrieBits = None); + + /// Destructor, which asserts if there's anything to do. Subclasses should + /// call \a destroyImpl(). + /// + /// \pre \a destroyImpl() was already called. + ~ThreadSafeHashMappedTrieBase(); + void destroyImpl(function_ref Destructor); + + ThreadSafeHashMappedTrieBase(ThreadSafeHashMappedTrieBase &&RHS); + + // Move assignment can be implemented in a thread-safe way if NumRootBits and + // NumSubtrieBits are stored inside the Root. + ThreadSafeHashMappedTrieBase & + operator=(ThreadSafeHashMappedTrieBase &&RHS) = delete; + + // No copy. + ThreadSafeHashMappedTrieBase(const ThreadSafeHashMappedTrieBase &) = delete; + ThreadSafeHashMappedTrieBase & + operator=(const ThreadSafeHashMappedTrieBase &) = delete; + +private: + const unsigned short ContentAllocSize; + const unsigned short ContentAllocAlign; + const unsigned short ContentOffset; + unsigned short NumRootBits; + unsigned short NumSubtrieBits; + struct ImplType; + // ImplPtr is owned by ThreadSafeHashMappedTrieBase and needs to be freed in + // destoryImpl. + std::atomic ImplPtr; + ImplType &getOrCreateImpl(); + ImplType *getImpl() const; +}; + +/// Lock-free thread-safe hash-mapped trie. +template +class ThreadSafeHashMappedTrie : ThreadSafeHashMappedTrieBase { +public: + using HashT = std::array; + + class LazyValueConstructor; + struct value_type { + const HashT Hash; + T Data; + + value_type(value_type &&) = default; + value_type(const value_type &) = default; + + value_type(ArrayRef Hash, const T &Data) + : Hash(makeHash(Hash)), Data(Data) {} + value_type(ArrayRef Hash, T &&Data) + : Hash(makeHash(Hash)), Data(std::move(Data)) {} + + private: + friend class LazyValueConstructor; + + struct EmplaceTag {}; + template + value_type(ArrayRef Hash, EmplaceTag, ArgsT &&...Args) + : Hash(makeHash(Hash)), Data(std::forward(Args)...) {} + + static HashT makeHash(ArrayRef HashRef) { + HashT Hash; + std::copy(HashRef.begin(), HashRef.end(), Hash.data()); + return Hash; + } + }; + + using ThreadSafeHashMappedTrieBase::operator delete; + using HashType = HashT; + + using ThreadSafeHashMappedTrieBase::dump; + using ThreadSafeHashMappedTrieBase::print; + +private: + template class PointerImpl : PointerBase { + friend class ThreadSafeHashMappedTrie; + + ValueT *get() const { + if (void *B = PointerBase::get()) + return reinterpret_cast(B); + return nullptr; + } + + public: + ValueT &operator*() const { + assert(get()); + return *get(); + } + ValueT *operator->() const { + assert(get()); + return get(); + } + explicit operator bool() const { return get(); } + + PointerImpl() = default; + PointerImpl(PointerImpl &&) = default; + PointerImpl(const PointerImpl &) = default; + PointerImpl &operator=(PointerImpl &&) = default; + PointerImpl &operator=(const PointerImpl &) = default; + + protected: + PointerImpl(PointerBase Result) : PointerBase(Result) {} + }; + +public: + class pointer; + class const_pointer; + class pointer : public PointerImpl { + friend class ThreadSafeHashMappedTrie; + friend class const_pointer; + + public: + pointer() = default; + pointer(pointer &&) = default; + pointer(const pointer &) = default; + pointer &operator=(pointer &&) = default; + pointer &operator=(const pointer &) = default; + + private: + pointer(PointerBase Result) : pointer::PointerImpl(Result) {} + }; + + class const_pointer : public PointerImpl { + friend class ThreadSafeHashMappedTrie; + + public: + const_pointer() = default; + const_pointer(const_pointer &&) = default; + const_pointer(const const_pointer &) = default; + const_pointer &operator=(const_pointer &&) = default; + const_pointer &operator=(const const_pointer &) = default; + + const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {} + + private: + const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {} + }; + + class LazyValueConstructor { + public: + value_type &operator()(T &&RHS) { + assert(Mem && "Constructor already called, or moved away"); + return assign(::new (Mem) value_type(Hash, std::move(RHS))); + } + value_type &operator()(const T &RHS) { + assert(Mem && "Constructor already called, or moved away"); + return assign(::new (Mem) value_type(Hash, RHS)); + } + template value_type &emplace(ArgsT &&...Args) { + assert(Mem && "Constructor already called, or moved away"); + return assign(::new (Mem) + value_type(Hash, typename value_type::EmplaceTag{}, + std::forward(Args)...)); + } + + LazyValueConstructor(LazyValueConstructor &&RHS) + : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) { + RHS.Mem = nullptr; // Moved away, cannot call. + } + ~LazyValueConstructor() { assert(!Mem && "Constructor never called!"); } + + private: + value_type &assign(value_type *V) { + Mem = nullptr; + Result = V; + return *V; + } + friend class ThreadSafeHashMappedTrie; + LazyValueConstructor() = delete; + LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef Hash) + : Mem(Mem), Result(Result), Hash(Hash) { + assert(Hash.size() == sizeof(HashT) && "Invalid hash"); + assert(Mem && "Invalid memory for construction"); + } + void *Mem; + value_type *&Result; + ArrayRef Hash; + }; + + /// Insert with a hint. Default-constructed hint will work, but it's + /// recommended to start with a lookup to avoid overhead in object creation + /// if it already exists. + pointer insertLazy(const_pointer Hint, ArrayRef Hash, + function_ref OnConstruct) { + return pointer(ThreadSafeHashMappedTrieBase::insert( + Hint, Hash, [&](void *Mem, ArrayRef Hash) { + value_type *Result = nullptr; + OnConstruct(LazyValueConstructor(Mem, Result, Hash)); + return Result->Hash.data(); + })); + } + + pointer insertLazy(ArrayRef Hash, + function_ref OnConstruct) { + return insertLazy(const_pointer(), Hash, OnConstruct); + } + + pointer insert(const_pointer Hint, value_type &&HashedData) { + return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) { + C(std::move(HashedData.Data)); + }); + } + + pointer insert(const_pointer Hint, const value_type &HashedData) { + return insertLazy(Hint, HashedData.Hash, + [&](LazyValueConstructor C) { C(HashedData.Data); }); + } + + pointer find(ArrayRef Hash) { + assert(Hash.size() == std::tuple_size::value); + return ThreadSafeHashMappedTrieBase::find(Hash); + } + + const_pointer find(ArrayRef Hash) const { + assert(Hash.size() == std::tuple_size::value); + return ThreadSafeHashMappedTrieBase::find(Hash); + } + + ThreadSafeHashMappedTrie(Optional NumRootBits = None, + Optional NumSubtrieBits = None) + : ThreadSafeHashMappedTrieBase(getContentAllocSize(), + getContentAllocAlign(), + getContentOffset(), + NumRootBits, NumSubtrieBits) {} + + ~ThreadSafeHashMappedTrie() { + if (std::is_trivially_destructible::value) + this->destroyImpl(nullptr); + else + this->destroyImpl( + [](void *P) { static_cast(P)->~value_type(); }); + } + + // Move constructor okay. + ThreadSafeHashMappedTrie(ThreadSafeHashMappedTrie &&) = default; + + // No move assignment or any copy. + ThreadSafeHashMappedTrie &operator=(ThreadSafeHashMappedTrie &&) = delete; + ThreadSafeHashMappedTrie(const ThreadSafeHashMappedTrie &) = delete; + ThreadSafeHashMappedTrie & + operator=(const ThreadSafeHashMappedTrie &) = delete; +}; + +} // namespace llvm + +#endif // LLVM_ADT_HASHMAPPEDTRIE_H diff --git a/llvm/include/llvm/ADT/LazyAtomicPointer.h b/llvm/include/llvm/ADT/LazyAtomicPointer.h new file mode 100644 index 0000000000000..890584746220d --- /dev/null +++ b/llvm/include/llvm/ADT/LazyAtomicPointer.h @@ -0,0 +1,166 @@ +//===- LazyAtomicPointer.----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_LAZYATOMICPOINTER_H +#define LLVM_ADT_LAZYATOMICPOINTER_H + +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/Support/Compiler.h" +#include +#include + +namespace llvm { + +/// Atomic pointer that's lock-free, but that can coordinate concurrent writes +/// from a lazy generator. Should be reserved for cases where concurrent uses of +/// a generator for the same storage is unlikely. +/// +/// The laziness comes in with \a loadOrGenerate(), which lazily calls the +/// provided generator ONLY when the value is currently \c nullptr. With +/// concurrent calls, only one generator is called and the rest see that value. +/// +/// Most other APIs treat an in-flight \a loadOrGenerate() as if \c nullptr +/// were stored. APIs that are required to write a value will spin. +/// +/// The underlying storage is \a std::atomic. +/// +/// TODO: In C++20, use std::atomic::wait() instead of spinning and call +/// std::atomic::notify_all() in \a loadOrGenerate(). +template class LazyAtomicPointer { + static constexpr uintptr_t getNull() { return 0; } + static constexpr uintptr_t getBusy() { return -1ULL; } + + static T *makePointer(uintptr_t Value) { + assert(Value != getBusy()); + return Value ? reinterpret_cast(Value) : nullptr; + } + static uintptr_t makeRaw(T *Value) { + uintptr_t Raw = Value ? reinterpret_cast(Value) : getNull(); + assert(Raw != getBusy()); + return Raw; + } + +public: + /// Store a value. Waits for concurrent \a loadOrGenerate() calls. + void store(T *Value) { return (void)exchange(Value); } + + /// Set a value. Return the old value. Waits for concurrent \a + /// loadOrGenerate() calls. + T *exchange(T *Value) { + // Note: the call to compare_exchange_weak() fails "spuriously" if the + // current value is \a getBusy(), causing the loop to spin. + T *Old = nullptr; + while (!compare_exchange_weak(Old, Value)) { + } + return Old; + } + + /// Compare-exchange. Returns \c false if there is a concurrent \a + /// loadOrGenerate() call, setting \p ExistingValue to \c nullptr. + bool compare_exchange_weak(T *&ExistingValue, T *NewValue) { + uintptr_t RawExistingValue = makeRaw(ExistingValue); + if (Storage.compare_exchange_weak(RawExistingValue, makeRaw(NewValue))) + return true; + + /// Report the existing value as "None" if busy. + if (RawExistingValue == getBusy()) + ExistingValue = nullptr; + else + ExistingValue = makePointer(RawExistingValue); + return false; + } + + /// Compare-exchange. Keeps trying if there is a concurrent + /// \a loadOrGenerate() call. + bool compare_exchange_strong(T *&ExistingValue, T *NewValue) { + uintptr_t RawExistingValue = makeRaw(ExistingValue); + const uintptr_t OriginalRawExistingValue = RawExistingValue; + if (Storage.compare_exchange_strong(RawExistingValue, makeRaw(NewValue))) + return true; + + /// Keep trying as long as it's busy. + if (LLVM_UNLIKELY(RawExistingValue == getBusy())) { + while (RawExistingValue == getBusy()) { + RawExistingValue = OriginalRawExistingValue; + if (Storage.compare_exchange_weak(RawExistingValue, makeRaw(NewValue))) + return true; + } + } + ExistingValue = makePointer(RawExistingValue); + return false; + } + + /// Return the current stored value. Returns \a None if there is a concurrent + /// \a loadOrGenerate() in flight. + T *load() const { + uintptr_t RawValue = Storage.load(); + return RawValue == getBusy() ? nullptr : makePointer(RawValue); + } + + /// Get the current value, or call \p Generator to generate a value. + /// Guarantees that only one thread's \p Generator will run. + /// + /// \pre \p Generator doesn't return \c nullptr. + T &loadOrGenerate(function_ref Generator) { + // Return existing value, if already set. + uintptr_t Raw = Storage.load(); + if (Raw != getNull() && Raw != getBusy()) + return *makePointer(Raw); + + // Try to mark as busy, then generate and store a new value. + if (LLVM_LIKELY(Raw == getNull() && + Storage.compare_exchange_strong(Raw, getBusy()))) { + Raw = makeRaw(Generator()); + assert(Raw != getNull() && "Expected non-null from generator"); + Storage.store(Raw); + return *makePointer(Raw); + } + + // Contended with another generator. Wait for it to complete. + while (Raw == getBusy()) + Raw = Storage.load(); + assert(Raw != getNull() && "Expected non-null from competing generator"); + return *makePointer(Raw); + } + + explicit operator bool() const { return load(); } + operator T *() const { return load(); } + + T &operator*() const { + T *P = load(); + assert(P && "Unexpected null dereference"); + return *P; + } + T *operator->() const { return &operator*(); } + + LazyAtomicPointer() : Storage(0) {} + LazyAtomicPointer(std::nullptr_t) : Storage(0) {} + LazyAtomicPointer(T *Value) : Storage(makeRaw(Value)) {} + LazyAtomicPointer(const LazyAtomicPointer &RHS) + : Storage(makeRaw(RHS.load())) {} + + LazyAtomicPointer &operator=(std::nullptr_t) { + store(nullptr); + return *this; + } + LazyAtomicPointer &operator=(T *RHS) { + store(RHS); + return *this; + } + LazyAtomicPointer &operator=(const LazyAtomicPointer &RHS) { + store(RHS.load()); + return *this; + } + +private: + std::atomic Storage; +}; + +} // end namespace llvm + +#endif // LLVM_ADT_LAZYATOMICPOINTER_H diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h index ee6c33924e963..591ae6ed66f99 100644 --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -58,10 +58,19 @@ inline StringRef toStringRef(bool B) { return StringRef(B ? "true" : "false"); } inline StringRef toStringRef(ArrayRef Input) { return StringRef(reinterpret_cast(Input.begin()), Input.size()); } +inline StringRef toStringRef(ArrayRef Input) { + return StringRef(Input.begin(), Input.size()); +} /// Construct a string ref from an array ref of unsigned chars. -inline ArrayRef arrayRefFromStringRef(StringRef Input) { - return {Input.bytes_begin(), Input.bytes_end()}; +template +inline ArrayRef arrayRefFromStringRef(StringRef Input) { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value, + "Expected byte type"); + return ArrayRef(reinterpret_cast(Input.data()), + Input.size()); } /// Interpret the given character \p C as a hexadecimal digit and return its diff --git a/llvm/include/llvm/CAS/CASID.h b/llvm/include/llvm/CAS/CASID.h new file mode 100644 index 0000000000000..8566a92700d95 --- /dev/null +++ b/llvm/include/llvm/CAS/CASID.h @@ -0,0 +1,149 @@ +//===- llvm/CAS/CASID.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_CASID_H +#define LLVM_CAS_CASID_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +class raw_ostream; + +namespace cas { + +class CASID; + +/// Context for CAS identifiers. +class CASIDContext { + virtual void anchor(); + +public: + virtual ~CASIDContext() = default; + + /// Get an identifer for the schema used by this CAS context. Two CAS + /// instances should return \c true for this identifier if and only if their + /// CASIDs are safe to compare by hash. This is used by \a + /// CASID::equalsImpl(). + virtual StringRef getHashSchemaIdentifier() const = 0; + +protected: + /// Get the hash for \p ID. Implementation for \a CASID::getHash(). + virtual ArrayRef getHashImpl(const CASID &ID) const = 0; + + /// Print \p ID to \p OS. + virtual void printIDImpl(raw_ostream &OS, const CASID &ID) const = 0; + + friend class CASID; +}; + +/// Unique identifier for a CAS object. +/// +/// Locally, stores an internal CAS identifier that's specific to a single CAS +/// instance. It's guaranteed not to change across the view of that CAS, but +/// might change between runs. +/// +/// It also has \a CASIDContext pointer to allow comparison of these +/// identifiers. If two CASIDs are from the same CASIDContext, they can be +/// compared directly. If they are, then \a +/// CASIDContext::getHashSchemaIdentifier() is compared to see if they can be +/// compared by hash, in which case the result of \a getHash() is compared. +class CASID { +public: + void dump() const; + void print(raw_ostream &OS) const { + return getContext().printIDImpl(OS, *this); + } + friend raw_ostream &operator<<(raw_ostream &OS, const CASID &ID) { + ID.print(OS); + return OS; + } + std::string toString() const; + + ArrayRef getHash() const { return getContext().getHashImpl(*this); } + + friend bool operator==(CASID LHS, CASID RHS) { + // If it's the same CAS (or both nullptr), then the IDs are directly + // comparable. + if (LHS.Context == RHS.Context) + return LHS.InternalID == RHS.InternalID; + + // Check if one CAS is nullptr, indicating a tombstone or empty key for + // DenseMap, and return false if so. + if (!LHS.Context || !RHS.Context) + return false; + + // Check if the schemas match. + if (LHS.Context->getHashSchemaIdentifier() != + RHS.Context->getHashSchemaIdentifier()) + return false; + + // Compare the hashes. + return LHS.getHash() == RHS.getHash(); + } + + friend bool operator!=(CASID LHS, CASID RHS) { return !(LHS == RHS); } + + friend hash_code hash_value(CASID ID) { + ArrayRef Hash = ID.getHash(); + return hash_combine_range(Hash.begin(), Hash.end()); + } + + const CASIDContext &getContext() const { + assert(Context && "Tombstone or empty key for DenseMap?"); + return *Context; + } + + /// Get the internal ID. Asserts that \p ExpectedContext is the Context that + /// this ID comes from, to help catch usage errors. + uint64_t getInternalID(const CASIDContext &ExpectedContext) const { + assert(&ExpectedContext == Context); + return InternalID; + } + + static CASID getDenseMapEmptyKey() { return CASID(-1ULL, nullptr); } + static CASID getDenseMapTombstoneKey() { return CASID(-2ULL, nullptr); } + + static CASID getFromInternalID(const CASIDContext &Context, + uint64_t InternalID) { + return CASID(InternalID, &Context); + } + + CASID() = delete; + +private: + CASID(uint64_t InternalID, const CASIDContext *Context) + : InternalID(InternalID), Context(Context) {} + + bool equalsImpl(CASID RHS) const; + + uint64_t InternalID = 0; + const CASIDContext *Context = nullptr; +}; + +} // namespace cas + +template <> struct DenseMapInfo { + static cas::CASID getEmptyKey() { return cas::CASID::getDenseMapEmptyKey(); } + + static cas::CASID getTombstoneKey() { + return cas::CASID::getDenseMapTombstoneKey(); + } + + static unsigned getHashValue(cas::CASID ID) { + return (unsigned)hash_value(ID); + } + + static bool isEqual(cas::CASID LHS, cas::CASID RHS) { return LHS == RHS; } +}; + +} // namespace llvm + +#endif // LLVM_CAS_CASID_H diff --git a/llvm/include/llvm/CAS/CASReference.h b/llvm/include/llvm/CAS/CASReference.h new file mode 100644 index 0000000000000..1f435cf306c4c --- /dev/null +++ b/llvm/include/llvm/CAS/CASReference.h @@ -0,0 +1,207 @@ +//===- llvm/CAS/CASReference.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_CASREFERENCE_H +#define LLVM_CAS_CASREFERENCE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +class raw_ostream; + +namespace cas { + +class ObjectStore; + +class ObjectHandle; +class ObjectRef; + +/// Base class for references to things in \a ObjectStore. +class ReferenceBase { +protected: + struct DenseMapEmptyTag {}; + struct DenseMapTombstoneTag {}; + static constexpr uint64_t getDenseMapEmptyRef() { return -1ULL; } + static constexpr uint64_t getDenseMapTombstoneRef() { return -2ULL; } + +public: + /// Get an internal reference. + uint64_t getInternalRef(const ObjectStore &ExpectedCAS) const { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + assert(CAS == &ExpectedCAS && "Extracting reference for the wrong CAS"); +#endif + return InternalRef; + } + + unsigned getDenseMapHash() const { + return (unsigned)llvm::hash_value(InternalRef); + } + bool isDenseMapEmpty() const { return InternalRef == getDenseMapEmptyRef(); } + bool isDenseMapTombstone() const { + return InternalRef == getDenseMapTombstoneRef(); + } + bool isDenseMapSentinel() const { + return isDenseMapEmpty() || isDenseMapTombstone(); + } + +protected: + void print(raw_ostream &OS, const ObjectHandle &This) const; + void print(raw_ostream &OS, const ObjectRef &This) const; + + bool hasSameInternalRef(const ReferenceBase &RHS) const { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + assert( + (isDenseMapSentinel() || RHS.isDenseMapSentinel() || CAS == RHS.CAS) && + "Cannot compare across CAS instances"); +#endif + return InternalRef == RHS.InternalRef; + } + +protected: + friend class ObjectStore; + ReferenceBase(const ObjectStore *CAS, uint64_t InternalRef, bool IsHandle) + : InternalRef(InternalRef) { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + this->CAS = CAS; +#endif + assert(InternalRef != getDenseMapEmptyRef() && "Reserved for DenseMapInfo"); + assert(InternalRef != getDenseMapTombstoneRef() && + "Reserved for DenseMapInfo"); + } + explicit ReferenceBase(DenseMapEmptyTag) + : InternalRef(getDenseMapEmptyRef()) {} + explicit ReferenceBase(DenseMapTombstoneTag) + : InternalRef(getDenseMapTombstoneRef()) {} + +private: + uint64_t InternalRef; + +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + const ObjectStore *CAS = nullptr; +#endif +}; + +/// Reference to an object in a \a ObjectStore instance. +/// +/// If you have an ObjectRef, you know the object exists, and you can point at +/// it from new nodes with \a ObjectStore::store(), but you don't know anything +/// about it. "Loading" the object is a separate step that may not have +/// happened yet, and which can fail (due to filesystem corruption) or +/// introduce latency (if downloading from a remote store). +/// +/// \a ObjectStore::store() takes a list of these, and these are returned by \a +/// ObjectStore::forEachRef() and \a ObjectStore::readRef(), which are accessors +/// for nodes, and \a ObjectStore::getReference(). +/// +/// \a ObjectStore::load() will load the referenced object, and returns \a +/// ObjectHandle, a variant that knows what kind of entity it is. \a +/// ObjectStore::getReferenceKind() can expect the type of reference without +/// asking for unloaded objects to be loaded. +/// +/// This is a wrapper around a \c uint64_t (and a \a ObjectStore instance when +/// assertions are on). If necessary, it can be deconstructed and reconstructed +/// using \a Reference::getInternalRef() and \a +/// Reference::getFromInternalRef(), but clients aren't expected to need to do +/// this. These both require the right \a ObjectStore instance. +class ObjectRef : public ReferenceBase { + struct DenseMapTag {}; + +public: + friend bool operator==(const ObjectRef &LHS, const ObjectRef &RHS) { + return LHS.hasSameInternalRef(RHS); + } + friend bool operator!=(const ObjectRef &LHS, const ObjectRef &RHS) { + return !(LHS == RHS); + } + + /// Allow a reference to be recreated after it's deconstructed. + static ObjectRef getFromInternalRef(const ObjectStore &CAS, + uint64_t InternalRef) { + return ObjectRef(CAS, InternalRef); + } + + static ObjectRef getDenseMapEmptyKey() { + return ObjectRef(DenseMapEmptyTag{}); + } + static ObjectRef getDenseMapTombstoneKey() { + return ObjectRef(DenseMapTombstoneTag{}); + } + + /// Print internal ref and/or CASID. Only suitable for debugging. + void print(raw_ostream &OS) const { return ReferenceBase::print(OS, *this); } + + LLVM_DUMP_METHOD void dump() const; + +private: + friend class ObjectStore; + friend class ReferenceBase; + using ReferenceBase::ReferenceBase; + ObjectRef(const ObjectStore &CAS, uint64_t InternalRef) + : ReferenceBase(&CAS, InternalRef, /*IsHandle=*/false) { + assert(InternalRef != -1ULL && "Reserved for DenseMapInfo"); + assert(InternalRef != -2ULL && "Reserved for DenseMapInfo"); + } + explicit ObjectRef(DenseMapEmptyTag T) : ReferenceBase(T) {} + explicit ObjectRef(DenseMapTombstoneTag T) : ReferenceBase(T) {} + explicit ObjectRef(ReferenceBase) = delete; +}; + +/// Handle to a loaded object in a \a ObjectStore instance. +/// +/// ObjectHandle encapulates a *loaded* object in the CAS. You need one +/// of these to inspect the content of an object: to look at its stored +/// data and references. +class ObjectHandle : public ReferenceBase { +public: + friend bool operator==(const ObjectHandle &LHS, const ObjectHandle &RHS) { + return LHS.hasSameInternalRef(RHS); + } + friend bool operator!=(const ObjectHandle &LHS, const ObjectHandle &RHS) { + return !(LHS == RHS); + } + + /// Print internal ref and/or CASID. Only suitable for debugging. + void print(raw_ostream &OS) const { return ReferenceBase::print(OS, *this); } + + LLVM_DUMP_METHOD void dump() const; + +private: + friend class ObjectStore; + friend class ReferenceBase; + using ReferenceBase::ReferenceBase; + explicit ObjectHandle(ReferenceBase) = delete; + ObjectHandle(const ObjectStore &CAS, uint64_t InternalRef) + : ReferenceBase(&CAS, InternalRef, /*IsHandle=*/true) {} +}; + +} // namespace cas + +template <> struct DenseMapInfo { + static cas::ObjectRef getEmptyKey() { + return cas::ObjectRef::getDenseMapEmptyKey(); + } + + static cas::ObjectRef getTombstoneKey() { + return cas::ObjectRef::getDenseMapTombstoneKey(); + } + + static unsigned getHashValue(cas::ObjectRef Ref) { + return Ref.getDenseMapHash(); + } + + static bool isEqual(cas::ObjectRef LHS, cas::ObjectRef RHS) { + return LHS == RHS; + } +}; + +} // namespace llvm + +#endif // LLVM_CAS_CASREFERENCE_H diff --git a/llvm/include/llvm/CAS/ObjectStore.h b/llvm/include/llvm/CAS/ObjectStore.h new file mode 100644 index 0000000000000..71746ddbb631d --- /dev/null +++ b/llvm/include/llvm/CAS/ObjectStore.h @@ -0,0 +1,307 @@ +//===- llvm/CAS/ObjectStore.h -----------------------------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_OBJECTSTORE_H +#define LLVM_CAS_OBJECTSTORE_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CAS/CASID.h" +#include "llvm/CAS/CASReference.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include + +namespace llvm { + +class MemoryBuffer; + +namespace cas { + +class ObjectStore; + +class ObjectProxy; + +/// Content-addressable storage for objects. +/// +/// Conceptually, objects are stored in a "unique set". +/// +/// - Objects are immutable ("value objects") that are defined by their +/// content. They are implicitly deduplicated by content. +/// - Each object has a unique identifier (UID) that's derived from its content, +/// called a \a CASID. +/// - This UID is a fixed-size (strong) hash of the transitive content of a +/// CAS object. +/// - It's comparable between any two CAS instances that have the same \a +/// CASIDContext::getHashSchemaIdentifier(). +/// - The UID can be printed (e.g., \a CASID::toString()) and it can parsed +/// by the same or a different CAS instance with \a +/// ObjectStore::parseID(). +/// - An object can be looked up by content or by UID. +/// - \a store() is "get-or-create" methods, writing an object if it +/// doesn't exist yet, and return a ref to it in any case. +/// - \a loadObject(const CASID&) looks up an object by its UID. +/// - Objects can reference other objects, forming an arbitrary DAG. +/// +/// The \a ObjectStore interface has a few ways of referencing objects: +/// +/// - \a ObjectRef encapsulates a reference to something in the CAS. It is an +/// opaque type that references an object inside a specific CAS. It is +/// implementation defined if the underlying object exists or not for an +/// ObjectRef, and it can used to speed up CAS lookup as an implementation +/// detail. However, you don't know anything about the underlying objects. +/// "Loading" the object is a separate step that may not have happened +/// yet, and which can fail (e.g. due to filesystem corruption) or introduce +/// latency (if downloading from a remote store). +/// - \a ObjectHandle encapulates a *loaded* object in the CAS. You need one of +/// these to inspect the content of an object: to look at its stored +/// data and references. This is internal to CAS implementation and not +/// availble from CAS public APIs. +/// - \a CASID: the UID for an object in the CAS, obtained through \a +/// ObjectStore::getID() or \a ObjectStore::parseID(). This is a valid CAS +/// identifier, but may reference an object that is unknown to this CAS +/// instance. +/// - \a ObjectProxy pairs an ObjectHandle (subclass) with a ObjectStore, and +/// wraps access APIs to avoid having to pass extra parameters. It is the +/// object used for accessing underlying data and refs by CAS users. +/// +/// There are a few options for accessing content of objects, with different +/// lifetime tradeoffs: +/// +/// - \a getData() accesses data without exposing lifetime at all. +/// - \a loadIndependentDataBuffer() returns a \a MemoryBuffer whose lifetime +/// is independent of the CAS (it can live longer). +/// - \a getDataString() return StringRef with lifetime is guaranteed to last as +/// long as \a ObjectStore. +/// - \a readRef() and \a forEachRef() iterate through the references in an +/// object. There is no lifetime assumption. +/// +/// Both ObjectRef and ObjectHandle are lightweight, wrapping a `uint64_t`. +/// Doing anything with them requires a ObjectStore. As a convenience: +/// +class ObjectStore : public CASIDContext { + friend class ObjectProxy; + void anchor() override; + +public: + /// Get a \p CASID from a \p ID, which should have been generated by \a + /// CASID::print(). This succeeds as long as \a validateID() would pass. The + /// object may be unknown to this CAS instance. + virtual Expected parseID(StringRef ID) = 0; + + /// Store object into ObjectStore. + virtual Expected store(ArrayRef Refs, + ArrayRef Data) = 0; + /// Get an ID for \p Ref. + virtual CASID getID(ObjectRef Ref) const = 0; + /// Get an ID for \p Handle. + virtual CASID getID(ObjectHandle Handle) const = 0; + + /// Get a reference to the object called \p ID. + /// + /// Returns \c None if not stored in this CAS. + virtual Optional getReference(const CASID &ID) const = 0; + + /// Get a reference to the object has the hash value \p Hash. + virtual Optional getReference(ArrayRef Hash) const = 0; + + /// Validate the underlying object referred by CASID. + virtual Error validate(const CASID &ID) = 0; + +protected: + /// Get a Ref from Handle. + virtual ObjectRef getReference(ObjectHandle Handle) const = 0; + + /// Load the object referenced by \p Ref. + /// + /// Errors if the object cannot be loaded. + virtual Expected load(ObjectRef Ref) = 0; + + /// Get the size of some data. + virtual uint64_t getDataSize(ObjectHandle Node) const = 0; + + /// Methods for handling objects. + virtual Error forEachRef(ObjectHandle Node, + function_ref Callback) const = 0; + virtual ObjectRef readRef(ObjectHandle Node, size_t I) const = 0; + virtual size_t getNumRefs(ObjectHandle Node) const = 0; + virtual ArrayRef getData(ObjectHandle Node, + bool RequiresNullTerminator = false) const = 0; + + /// Get ObjectRef from open file. + virtual Expected + storeFromOpenFileImpl(sys::fs::file_t FD, + Optional Status) = 0; + + /// Get a lifetime-extended StringRef pointing at \p Data. + /// + /// Depending on the CAS implementation, this may involve in-memory storage + /// overhead. + StringRef getDataString(ObjectHandle Node) { + return toStringRef(getData(Node)); + } + + /// Get a lifetime-extended MemoryBuffer pointing at \p Data. + /// + /// Depending on the CAS implementation, this may involve in-memory storage + /// overhead. + std::unique_ptr + getMemoryBuffer(ObjectHandle Node, StringRef Name = "", + bool RequiresNullTerminator = true); + + /// Read all the refs from object in a SmallVector. + virtual void readRefs(ObjectHandle Node, + SmallVectorImpl &Refs) const; + + Expected getProxy(Expected Ref); + + /// Allow ObjectStore implementations to create internal handles. +#define MAKE_CAS_HANDLE_CONSTRUCTOR(HandleKind) \ + HandleKind make##HandleKind(uint64_t InternalRef) const { \ + return HandleKind(*this, InternalRef); \ + } + MAKE_CAS_HANDLE_CONSTRUCTOR(ObjectHandle) + MAKE_CAS_HANDLE_CONSTRUCTOR(ObjectRef) +#undef MAKE_CAS_HANDLE_CONSTRUCTOR + +public: + /// Helper functions to store object and returns a ObjectProxy. + Expected createProxy(ArrayRef Refs, StringRef Data); + + /// Store object from StringRef. + Expected storeFromString(ArrayRef Refs, + StringRef String) { + return store(Refs, arrayRefFromStringRef(String)); + } + + /// Default implementation reads \p FD and calls \a storeNode(). Does not + /// take ownership of \p FD; the caller is responsible for closing it. + /// + /// If \p Status is sent in it is to be treated as a hint. Implementations + /// must protect against the file size potentially growing after the status + /// was taken (i.e., they cannot assume that an mmap will be null-terminated + /// where \p Status implies). + /// + /// Returns the \a CASID and the size of the file. + Expected + storeFromOpenFile(sys::fs::file_t FD, + Optional Status = None) { + return storeFromOpenFileImpl(FD, Status); + } + + static Error createUnknownObjectError(CASID ID); + + /// Create ObjectProxy from CASID. If the object doesn't exit, get an error. + Expected getProxy(CASID ID); + /// Create ObjectProxy from CASID. If the object doesn't exit, get None.. + Expected> getProxyOrNone(CASID ID); + /// Create ObjectProxy from ObjectRef. If the object can't be loaded, get an + /// error. + Expected getProxy(ObjectRef Ref); + + /// Read the data from \p Data into \p OS. + uint64_t readData(ObjectHandle Node, raw_ostream &OS, uint64_t Offset = 0, + uint64_t MaxBytes = -1ULL) const { + ArrayRef Data = getData(Node); + assert(Offset < Data.size() && "Expected valid offset"); + Data = Data.drop_front(Offset).take_front(MaxBytes); + OS << toStringRef(Data); + return Data.size(); + } + + /// Get a MemoryBuffer with the contents of \p Data whose lifetime is + /// independent of this CAS instance. + virtual Expected> + loadIndependentDataBuffer(ObjectHandle Node, const Twine &Name = "", + bool NullTerminate = true) const; + + /// Print the ObjectStore internals for debugging purpose. + virtual void print(raw_ostream &) const {} + void dump() const; + + virtual ~ObjectStore() = default; +}; + +/// Reference to an abstract hierarchical node, with data and references. +/// Reference is passed by value and is expected to be valid as long as the \a +/// ObjectStore is. +/// +/// TODO: Expose \a ObjectStore::readData() and only call \a +/// ObjectStore::getDataString() when asked. +class ObjectProxy { +public: + const ObjectStore &getCAS() const { return *CAS; } + ObjectStore &getCAS() { return *CAS; } + CASID getID() const { return CAS->getID(H); } + ObjectRef getRef() const { return CAS->getReference(H); } + size_t getNumReferences() const { return CAS->getNumRefs(H); } + ObjectRef getReference(size_t I) const { return CAS->readRef(H, I); } + + operator CASID() const { return getID(); } + CASID getReferenceID(size_t I) const { + Optional ID = getCAS().getID(getReference(I)); + assert(ID && "Expected reference to be first-class object"); + return *ID; + } + + /// Visit each reference in order, returning an error from \p Callback to + /// stop early. + Error forEachReference(function_ref Callback) const { + return CAS->forEachRef(H, Callback); + } + Error forEachReferenceID(function_ref Callback) const { + return CAS->forEachRef(H, [&](ObjectRef Ref) { + Optional ID = getCAS().getID(Ref); + assert(ID && "Expected reference to be first-class object"); + return Callback(*ID); + }); + } + + std::unique_ptr + getMemoryBuffer(StringRef Name = "", + bool RequiresNullTerminator = true) const; + + /// Get the content of the node. Valid as long as the CAS is valid. + StringRef getData() const { return CAS->getDataString(H); } + + friend bool operator==(const ObjectProxy &Proxy, ObjectRef Ref) { + return Proxy.getRef() == Ref; + } + friend bool operator==(ObjectRef Ref, const ObjectProxy &Proxy) { + return Proxy.getRef() == Ref; + } + friend bool operator!=(const ObjectProxy &Proxy, ObjectRef Ref) { + return !(Proxy.getRef() == Ref); + } + friend bool operator!=(ObjectRef Ref, const ObjectProxy &Proxy) { + return !(Proxy.getRef() == Ref); + } + +public: + ObjectProxy() = delete; + + static ObjectProxy load(ObjectStore &CAS, ObjectHandle Node) { + return ObjectProxy(CAS, Node); + } + +private: + ObjectProxy(ObjectStore &CAS, ObjectHandle H) : CAS(&CAS), H(H) {} + + ObjectStore *CAS; + ObjectHandle H; +}; + +Expected> +createPluginCAS(StringRef PluginPath, ArrayRef PluginArgs = None); +std::unique_ptr createInMemoryCAS(); + +} // namespace cas +} // namespace llvm + +#endif // LLVM_CAS_OBJECTSTORE_H diff --git a/llvm/include/llvm/Support/ThreadSafeAllocator.h b/llvm/include/llvm/Support/ThreadSafeAllocator.h new file mode 100644 index 0000000000000..d3d408f1989ab --- /dev/null +++ b/llvm/include/llvm/Support/ThreadSafeAllocator.h @@ -0,0 +1,60 @@ +//===- ThreadSafeAllocator.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_THREADSAFEALLOCATOR_H +#define LLVM_SUPPORT_THREADSAFEALLOCATOR_H + +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/Support/Allocator.h" +#include + +namespace llvm { + +/// Thread-safe allocator adaptor. Uses an unfair lock on the assumption that +/// contention here is extremely rare. +/// +/// TODO: Using an unfair lock on every allocation can be quite expensive when +/// contention is high. Since this is mainly used for BumpPtrAllocator and +/// SpecificBumpPtrAllocator, it'd be better to have a specific thread-safe +/// BumpPtrAllocator implementation that only use a fair lock when allocating a +/// new stab but otherwise using atomic and be lock-free. +template class ThreadSafeAllocator { + struct LockGuard { + LockGuard(std::atomic_flag &Flag) : Flag(Flag) { + if (LLVM_UNLIKELY(Flag.test_and_set(std::memory_order_acquire))) + while (Flag.test_and_set(std::memory_order_acquire)) { + } + } + ~LockGuard() { Flag.clear(std::memory_order_release); } + std::atomic_flag &Flag; + }; + +public: + auto Allocate(size_t N = 1) { + LockGuard Lock(Flag); + return Alloc.Allocate(N); + } + + auto Allocate(size_t Size, size_t Align) { + LockGuard Lock(Flag); + return Alloc.Allocate(Size, Align); + } + + void applyLocked(llvm::function_ref Fn) { + LockGuard Lock(Flag); + Fn(Alloc); + } + +private: + AllocatorType Alloc; + std::atomic_flag Flag = ATOMIC_FLAG_INIT; +}; + +} // namespace llvm + +#endif // LLVM_SUPPORT_THREADSAFEALLOCATOR_H diff --git a/llvm/lib/CAS/BuiltinCAS.cpp b/llvm/lib/CAS/BuiltinCAS.cpp new file mode 100644 index 0000000000000..892105ce4b41c --- /dev/null +++ b/llvm/lib/CAS/BuiltinCAS.cpp @@ -0,0 +1,129 @@ +//===- BuiltinCAS.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BuiltinCAS.h" +#include "BuiltinObjectHasher.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Process.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::builtin; + +static StringRef getCASIDPrefix() { return "llvmcas://"; } + +Expected BuiltinCAS::parseID(StringRef Reference) { + if (!Reference.consume_front(getCASIDPrefix())) + return createStringError(std::make_error_code(std::errc::invalid_argument), + "invalid cas-id '" + Reference + "'"); + + // FIXME: Allow shortened references? + if (Reference.size() != 2 * sizeof(HashType)) + return createStringError(std::make_error_code(std::errc::invalid_argument), + "wrong size for cas-id hash '" + Reference + "'"); + + std::string Binary; + if (!tryGetFromHex(Reference, Binary)) + return createStringError(std::make_error_code(std::errc::invalid_argument), + "invalid hash in cas-id '" + Reference + "'"); + + return parseIDImpl(arrayRefFromStringRef(Binary)); +} + +void BuiltinCAS::printIDImpl(raw_ostream &OS, const CASID &ID) const { + assert(&ID.getContext() == this); + assert(ID.getHash().size() == sizeof(HashType)); + + SmallString<64> Hash; + toHex(ID.getHash(), /*LowerCase=*/true, Hash); + OS << getCASIDPrefix() << Hash; +} + +static size_t getPageSize() { + static int PageSize = sys::Process::getPageSizeEstimate(); + return PageSize; +} + +Expected +BuiltinCAS::storeFromOpenFileImpl(sys::fs::file_t FD, + Optional Status) { + int PageSize = getPageSize(); + + if (!Status) { + Status.emplace(); + if (std::error_code EC = sys::fs::status(FD, *Status)) + return errorCodeToError(EC); + } + + constexpr size_t MinMappedSize = 4 * 4096; + auto readWithStream = [&]() -> Expected { + // FIXME: MSVC: SmallString + SmallString<4 * 4096 * 2> Data; + if (Error E = sys::fs::readNativeFileToEOF(FD, Data, MinMappedSize)) + return std::move(E); + return store(None, makeArrayRef(Data.data(), Data.size())); + }; + + // Check whether we can trust the size from stat. + if (Status->type() != sys::fs::file_type::regular_file && + Status->type() != sys::fs::file_type::block_file) + return readWithStream(); + + if (Status->getSize() < MinMappedSize) + return readWithStream(); + + std::error_code EC; + sys::fs::mapped_file_region Map(FD, sys::fs::mapped_file_region::readonly, + Status->getSize(), + /*offset=*/0, EC); + if (EC) + return errorCodeToError(EC); + + // If the file is guaranteed to be null-terminated, use it directly. Note + // that the file size may have changed from ::stat if this file is volatile, + // so we need to check for an actual null character at the end. + ArrayRef Data(Map.data(), Map.size()); + HashType ComputedHash = + BuiltinObjectHasher::hashObject(*this, None, Data); + if (!isAligned(Align(PageSize), Data.size()) && Data.end()[0] == 0) + return storeFromNullTerminatedRegion(ComputedHash, std::move(Map)); + return storeImpl(ComputedHash, None, Data); +} + +Expected BuiltinCAS::store(ArrayRef Refs, + ArrayRef Data) { + return storeImpl(BuiltinObjectHasher::hashObject(*this, Refs, Data), + Refs, Data); +} + +Error BuiltinCAS::validate(const CASID &ID) { + auto Ref = getReference(ID); + if (!Ref) + return createUnknownObjectError(ID); + + auto Handle = load(*Ref); + if (!Handle) + return Handle.takeError(); + + auto Proxy = ObjectProxy::load(*this, *Handle); + SmallVector Refs; + if (auto E = Proxy.forEachReference([&](ObjectRef Ref) -> Error { + Refs.push_back(Ref); + return Error::success(); + })) + return E; + + ArrayRef Data(Proxy.getData().data(), Proxy.getData().size()); + auto Hash = BuiltinObjectHasher::hashObject(*this, Refs, Data); + if (!ID.getHash().equals(Hash)) + return createCorruptObjectError(ID); + + return Error::success(); +} diff --git a/llvm/lib/CAS/BuiltinCAS.h b/llvm/lib/CAS/BuiltinCAS.h new file mode 100644 index 0000000000000..d526beee13753 --- /dev/null +++ b/llvm/lib/CAS/BuiltinCAS.h @@ -0,0 +1,142 @@ +//===- BuiltinCAS.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CAS_BUILTINCAS_H +#define LLVM_LIB_CAS_BUILTINCAS_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CAS/ObjectStore.h" +#include "llvm/Support/BLAKE3.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace cas { +namespace builtin { + +/// Current hash type for the internal CAS. +/// +/// FIXME: This should be configurable via an enum to allow configuring the hash +/// function. The enum should be sent into \a createInMemoryCAS() and \a +/// createOnDiskCAS(). +/// +/// This is important (at least) for future-proofing, when we want to make new +/// CAS instances use BLAKE7, but still know how to read/write BLAKE3. +/// +/// Even just for BLAKE3, it would be useful to have these values: +/// +/// BLAKE3 => 32B hash from BLAKE3 +/// BLAKE3_16B => 16B hash from BLAKE3 (truncated) +/// +/// ... where BLAKE3_16 uses \a TruncatedBLAKE3<16>. +/// +/// Motivation for a truncated hash is that it's cheaper to store. It's not +/// clear if we always (or ever) need the full 32B, and for an ephemeral +/// in-memory CAS, we almost certainly don't need it. +/// +/// Note that the cost is linear in the number of objects for the builtin CAS +/// and embedded action cache, since we're using internal offsets and/or +/// pointers as an optimization. +/// +/// However, it's possible we'll want to hook up a local builtin CAS to, e.g., +/// a distributed generic hash map to use as an ActionCache. In that scenario, +/// the transitive closure of the structured objects that are the results of +/// the cached actions would need to be serialized into the map, something +/// like: +/// +/// "action::" -> "0123" +/// "object::0123" -> "3,4567,89AB,CDEF,9,some data" +/// "object::4567" -> ... +/// "object::89AB" -> ... +/// "object::CDEF" -> ... +/// +/// These references would be full cost. +using HasherT = BLAKE3; +using HashType = decltype(HasherT::hash(std::declval &>())); + +class BuiltinCAS : public ObjectStore { + void printIDImpl(raw_ostream &OS, const CASID &ID) const final; + +public: + /// Get the name of the hash for any table identifiers. + /// + /// FIXME: This should be configurable via an enum, with at the following + /// values: + /// + /// "BLAKE3" => 32B hash from BLAKE3 + /// "BLAKE3.16" => 16B hash from BLAKE3 (truncated) + /// + /// Enum can be sent into \a createInMemoryCAS() and \a createOnDiskCAS(). + static StringRef getHashName() { return "BLAKE3"; } + StringRef getHashSchemaIdentifier() const final { + static const std::string ID = + ("llvm.cas.builtin.v2[" + getHashName() + "]").str(); + return ID; + } + + Expected parseID(StringRef Reference) final; + + virtual Expected parseIDImpl(ArrayRef Hash) = 0; + + Expected store(ArrayRef Refs, + ArrayRef Data) final; + virtual Expected storeImpl(ArrayRef ComputedHash, + ArrayRef Refs, + ArrayRef Data) = 0; + + Expected + storeFromOpenFileImpl(sys::fs::file_t FD, + Optional Status) override; + virtual Expected + storeFromNullTerminatedRegion(ArrayRef ComputedHash, + sys::fs::mapped_file_region Map) { + return storeImpl(ComputedHash, None, makeArrayRef(Map.data(), Map.size())); + } + + /// Both builtin CAS implementations provide lifetime for free, so this can + /// be const, and readData() and getDataSize() can be implemented on top of + /// it. + virtual ArrayRef getDataConst(ObjectHandle Node) const = 0; + + ArrayRef getData(ObjectHandle Node, + bool RequiresNullTerminator) const final { + // BuiltinCAS Objects are always null terminated. + return getDataConst(Node); + } + uint64_t getDataSize(ObjectHandle Node) const final { + return getDataConst(Node).size(); + } + + Error createUnknownObjectError(CASID ID) const { + return createStringError(std::make_error_code(std::errc::invalid_argument), + "unknown object '" + ID.toString() + "'"); + } + + Error createCorruptObjectError(CASID ID) const { + return createStringError(std::make_error_code(std::errc::invalid_argument), + "corrupt object '" + ID.toString() + "'"); + } + + Error createCorruptStorageError() const { + return createStringError(std::make_error_code(std::errc::invalid_argument), + "corrupt storage"); + } + + Error validate(const CASID &ID) final; +}; + +// FIXME: Proxy not portable. Maybe also error-prone? +constexpr StringLiteral DefaultDirProxy = "/^llvm::cas::builtin::default"; +constexpr StringLiteral DefaultDir = "llvm.cas.builtin.default"; + +} // end namespace builtin +} // end namespace cas +} // end namespace llvm + +#endif // LLVM_LIB_CAS_BUILTINCAS_H diff --git a/llvm/lib/CAS/BuiltinObjectHasher.h b/llvm/lib/CAS/BuiltinObjectHasher.h new file mode 100644 index 0000000000000..1f47e7df59776 --- /dev/null +++ b/llvm/lib/CAS/BuiltinObjectHasher.h @@ -0,0 +1,73 @@ +//===- BuiltinObjectHasher.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_BUILTINOBJECTHASHER_H +#define LLVM_CAS_BUILTINOBJECTHASHER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/CAS/ObjectStore.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace cas { + +template class BuiltinObjectHasher { +public: + using HashT = decltype(HasherT::hash(std::declval &>())); + + static HashT hashObject(const ObjectStore &CAS, ArrayRef Refs, + ArrayRef Data) { + BuiltinObjectHasher H; + H.updateSize(Refs.size()); + for (const ObjectRef &Ref : Refs) + H.updateRef(CAS, Ref); + H.updateArray(Data); + return H.finish(); + } + +private: + HashT finish() { return Hasher.final(); } + + void updateRef(const ObjectStore &CAS, ObjectRef Ref) { + updateID(CAS.getID(Ref)); + } + + void updateID(const CASID &ID) { + // NOTE: Does not hash the size of the hash. That's a CAS implementation + // detail that shouldn't leak into the UUID for an object. + ArrayRef Hash = ID.getHash(); + assert(Hash.size() == sizeof(HashT) && + "Expected object ref to match the hash size"); + Hasher.update(Hash); + } + + void updateArray(ArrayRef Bytes) { + updateSize(Bytes.size()); + Hasher.update(Bytes); + } + + void updateArray(ArrayRef Bytes) { + updateArray(makeArrayRef(reinterpret_cast(Bytes.data()), + Bytes.size())); + } + + void updateSize(uint64_t Size) { + Size = support::endian::byte_swap(Size, support::endianness::little); + Hasher.update( + makeArrayRef(reinterpret_cast(&Size), sizeof(Size))); + } + + BuiltinObjectHasher() = default; + ~BuiltinObjectHasher() = default; + HasherT Hasher; +}; + +} // namespace cas +} // namespace llvm + +#endif // LLVM_CAS_BUILTINOBJECTHASHER_H diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt new file mode 100644 index 0000000000000..a486ab66ae426 --- /dev/null +++ b/llvm/lib/CAS/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_component_library(LLVMCAS + BuiltinCAS.cpp + InMemoryCAS.cpp + ObjectStore.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS +) diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp new file mode 100644 index 0000000000000..aea4767a6485d --- /dev/null +++ b/llvm/lib/CAS/InMemoryCAS.cpp @@ -0,0 +1,353 @@ +//===- InMemoryCAS.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BuiltinCAS.h" +#include "BuiltinObjectHasher.h" +#include "llvm/ADT/HashMappedTrie.h" +#include "llvm/ADT/LazyAtomicPointer.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ThreadSafeAllocator.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::builtin; + +namespace { + +class InMemoryObject; + +/// Index of referenced IDs (map: Hash -> InMemoryObject*). Uses +/// LazyAtomicPointer to coordinate creation of objects. +using InMemoryIndexT = + ThreadSafeHashMappedTrie, + sizeof(HashType)>; + +/// Values in \a InMemoryIndexT. \a InMemoryObject's point at this to access +/// their hash. +using InMemoryIndexValueT = InMemoryIndexT::value_type; + +class InMemoryObject { +public: + enum class Kind { + /// Node with refs and data. + RefNode, + + /// Node with refs and data co-allocated. + InlineNode, + + Max = InlineNode, + }; + + Kind getKind() const { return IndexAndKind.getInt(); } + const InMemoryIndexValueT &getIndex() const { + assert(IndexAndKind.getPointer()); + return *IndexAndKind.getPointer(); + } + + ArrayRef getHash() const { return getIndex().Hash; } + + InMemoryObject() = delete; + InMemoryObject(InMemoryObject &&) = delete; + InMemoryObject(const InMemoryObject &) = delete; + +protected: + InMemoryObject(Kind K, const InMemoryIndexValueT &I) : IndexAndKind(&I, K) {} + +private: + enum Counts : int { + NumKindBits = 2, + }; + PointerIntPair IndexAndKind; + static_assert((1U << NumKindBits) <= alignof(InMemoryIndexValueT), + "Kind will clobber pointer"); + static_assert(((int)Kind::Max >> NumKindBits) == 0, "Kind will be truncated"); + +public: + inline ArrayRef getData() const; + + inline ArrayRef getRefs() const; +}; + +class InMemoryRefObject : public InMemoryObject { +public: + static constexpr Kind KindValue = Kind::RefNode; + static bool classof(const InMemoryObject *O) { + return O->getKind() == KindValue; + } + + ArrayRef getRefsImpl() const { return Refs; } + ArrayRef getRefs() const { return Refs; } + ArrayRef getDataImpl() const { return Data; } + ArrayRef getData() const { return Data; } + + static InMemoryRefObject &create(function_ref Allocate, + const InMemoryIndexValueT &I, + ArrayRef Refs, + ArrayRef Data) { + void *Mem = Allocate(sizeof(InMemoryRefObject)); + return *new (Mem) InMemoryRefObject(I, Refs, Data); + } + +private: + InMemoryRefObject(const InMemoryIndexValueT &I, + ArrayRef Refs, ArrayRef Data) + : InMemoryObject(KindValue, I), Refs(Refs), Data(Data) { + assert(isAddrAligned(Align(8), this) && "Expected 8-byte alignment"); + assert(isAddrAligned(Align(8), Data.data()) && "Expected 8-byte alignment"); + assert(*Data.end() == 0 && "Expected null-termination"); + } + + ArrayRef Refs; + ArrayRef Data; +}; + +class InMemoryInlineObject : public InMemoryObject { +public: + static constexpr Kind KindValue = Kind::InlineNode; + static bool classof(const InMemoryObject *O) { + return O->getKind() == KindValue; + } + + ArrayRef getRefs() const { return getRefsImpl(); } + ArrayRef getRefsImpl() const { + return makeArrayRef( + reinterpret_cast(this + 1), NumRefs); + } + + ArrayRef getData() const { return getDataImpl(); } + ArrayRef getDataImpl() const { + ArrayRef Refs = getRefs(); + return makeArrayRef( + reinterpret_cast(Refs.data() + Refs.size()), DataSize); + } + + static InMemoryInlineObject & + create(function_ref Allocate, + const InMemoryIndexValueT &I, ArrayRef Refs, + ArrayRef Data) { + void *Mem = Allocate(sizeof(InMemoryInlineObject) + + sizeof(uintptr_t) * Refs.size() + Data.size() + 1); + return *new (Mem) InMemoryInlineObject(I, Refs, Data); + } + +private: + InMemoryInlineObject(const InMemoryIndexValueT &I, + ArrayRef Refs, + ArrayRef Data) + : InMemoryObject(KindValue, I), NumRefs(Refs.size()), + DataSize(Data.size()) { + auto *BeginRefs = reinterpret_cast(this + 1); + llvm::copy(Refs, BeginRefs); + auto *BeginData = reinterpret_cast(BeginRefs + NumRefs); + llvm::copy(Data, BeginData); + BeginData[Data.size()] = 0; + } + uint32_t NumRefs; + uint32_t DataSize; +}; + +/// In-memory CAS database. +class InMemoryCAS : public BuiltinCAS { +public: + Expected parseIDImpl(ArrayRef Hash) final { + return getID(indexHash(Hash)); + } + + Expected storeImpl(ArrayRef ComputedHash, + ArrayRef Refs, + ArrayRef Data) final; + + Expected + storeFromNullTerminatedRegion(ArrayRef ComputedHash, + sys::fs::mapped_file_region Map) override; + + CASID getID(const InMemoryIndexValueT &I) const { + return CASID::getFromInternalID(*this, reinterpret_cast(&I)); + } + CASID getID(const InMemoryObject &O) const { return getID(O.getIndex()); } + const InMemoryIndexValueT &extractIndexFromID(CASID ID) const { + assert(&ID.getContext() == this); + return *reinterpret_cast( + (uintptr_t)ID.getInternalID(*this)); + } + InMemoryIndexValueT &extractIndexFromID(CASID ID) { + return const_cast( + const_cast(this)->extractIndexFromID(ID)); + } + + ArrayRef getHashImpl(const CASID &ID) const final { + return extractIndexFromID(ID).Hash; + } + + ObjectHandle getObjectHandle(const InMemoryObject &Node) const { + assert(!(reinterpret_cast(&Node) & 0x1ULL)); + return makeObjectHandle(reinterpret_cast(&Node)); + } + + Expected load(ObjectRef Ref) override { + return getObjectHandle(asInMemoryObject(Ref)); + } + + InMemoryIndexValueT &indexHash(ArrayRef Hash) { + return *Index.insertLazy( + Hash, [](auto ValueConstructor) { ValueConstructor.emplace(nullptr); }); + } + + /// TODO: Consider callers to actually do an insert and to return a handle to + /// the slot in the trie. + const InMemoryObject *getInMemoryObject(CASID ID) const { + if (&ID.getContext() == this) + return extractIndexFromID(ID).Data; + assert(ID.getContext().getHashSchemaIdentifier() == + getHashSchemaIdentifier() && + "Expected ID from same hash schema"); + if (InMemoryIndexT::const_pointer P = Index.find(ID.getHash())) + return P->Data; + return nullptr; + } + + const InMemoryObject &getInMemoryObject(ObjectHandle OH) const { + return *reinterpret_cast( + (uintptr_t)OH.getInternalRef(*this)); + } + + const InMemoryObject &asInMemoryObject(ReferenceBase Ref) const { + uintptr_t P = Ref.getInternalRef(*this); + return *reinterpret_cast(P); + } + ObjectRef toReference(const InMemoryObject &O) const { + return makeObjectRef(reinterpret_cast(&O)); + } + + CASID getID(ObjectRef Ref) const final { return getIDImpl(Ref); } + CASID getID(ObjectHandle Ref) const final { return getIDImpl(Ref); } + CASID getIDImpl(ReferenceBase Ref) const { + return getID(asInMemoryObject(Ref)); + } + + Optional getReference(const CASID &ID) const final { + if (const InMemoryObject *Object = getInMemoryObject(ID)) + return toReference(*Object); + return None; + } + Optional getReference(ArrayRef Hash) const final { + if (InMemoryIndexT::const_pointer P = Index.find(Hash)) + return toReference(*P->Data); + return None; + } + ObjectRef getReference(ObjectHandle Handle) const final { + return toReference(asInMemoryObject(Handle)); + } + + ArrayRef getDataConst(ObjectHandle Node) const final { + return cast(asInMemoryObject(Node)).getData(); + } + + void print(raw_ostream &OS) const final; + + InMemoryCAS() = default; + +private: + size_t getNumRefs(ObjectHandle Node) const final { + return getInMemoryObject(Node).getRefs().size(); + } + ObjectRef readRef(ObjectHandle Node, size_t I) const final { + return toReference(*getInMemoryObject(Node).getRefs()[I]); + } + Error forEachRef(ObjectHandle Node, + function_ref Callback) const final; + + /// Index of referenced IDs (map: Hash -> InMemoryObject*). Mapped to nullptr + /// as a convenient way to store hashes. + /// + /// - Insert nullptr on lookups. + /// - InMemoryObject points back to here. + InMemoryIndexT Index; + + ThreadSafeAllocator Objects; + ThreadSafeAllocator> + MemoryMaps; +}; + +} // end anonymous namespace + +ArrayRef InMemoryObject::getData() const { + if (auto *Derived = dyn_cast(this)) + return Derived->getDataImpl(); + return cast(this)->getDataImpl(); +} + +ArrayRef InMemoryObject::getRefs() const { + if (auto *Derived = dyn_cast(this)) + return Derived->getRefsImpl(); + return cast(this)->getRefsImpl(); +} + +void InMemoryCAS::print(raw_ostream &OS) const { + OS << "index: "; + Index.print(OS); +} + +Expected +InMemoryCAS::storeFromNullTerminatedRegion(ArrayRef ComputedHash, + sys::fs::mapped_file_region Map) { + // Look up the hash in the index, initializing to nullptr if it's new. + ArrayRef Data(Map.data(), Map.size()); + auto &I = indexHash(ComputedHash); + + // Load or generate. + auto Allocator = [&](size_t Size) -> void * { + return Objects.Allocate(Size, alignof(InMemoryObject)); + }; + auto Generator = [&]() -> const InMemoryObject * { + return &InMemoryRefObject::create(Allocator, I, None, Data); + }; + const InMemoryObject &Node = + cast(I.Data.loadOrGenerate(Generator)); + + // Save Map if the winning node uses it. + if (auto *RefNode = dyn_cast(&Node)) + if (RefNode->getData().data() == Map.data()) + new (MemoryMaps.Allocate()) sys::fs::mapped_file_region(std::move(Map)); + + return toReference(Node); +} + +Expected InMemoryCAS::storeImpl(ArrayRef ComputedHash, + ArrayRef Refs, + ArrayRef Data) { + // Look up the hash in the index, initializing to nullptr if it's new. + auto &I = indexHash(ComputedHash); + + // Create the node. + SmallVector InternalRefs; + for (ObjectRef Ref : Refs) + InternalRefs.push_back(&asInMemoryObject(Ref)); + auto Allocator = [&](size_t Size) -> void * { + return Objects.Allocate(Size, alignof(InMemoryObject)); + }; + auto Generator = [&]() -> const InMemoryObject * { + return &InMemoryInlineObject::create(Allocator, I, InternalRefs, Data); + }; + return toReference(cast(I.Data.loadOrGenerate(Generator))); +} + +Error InMemoryCAS::forEachRef(ObjectHandle Handle, + function_ref Callback) const { + auto &Node = getInMemoryObject(Handle); + for (const InMemoryObject *Ref : Node.getRefs()) + if (Error E = Callback(toReference(*Ref))) + return E; + return Error::success(); +} + +std::unique_ptr cas::createInMemoryCAS() { + return std::make_unique(); +} diff --git a/llvm/lib/CAS/ObjectStore.cpp b/llvm/lib/CAS/ObjectStore.cpp new file mode 100644 index 0000000000000..c2e1bb9bfe2dd --- /dev/null +++ b/llvm/lib/CAS/ObjectStore.cpp @@ -0,0 +1,137 @@ +//===- ObjectStore.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/ObjectStore.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" + +using namespace llvm; +using namespace llvm::cas; + +void CASIDContext::anchor() {} +void ObjectStore::anchor() {} + +LLVM_DUMP_METHOD void CASID::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void ObjectRef::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(dbgs()); } + +std::string CASID::toString() const { + std::string S; + raw_string_ostream(S) << *this; + return S; +} + +static void printReferenceBase(raw_ostream &OS, StringRef Kind, + uint64_t InternalRef, Optional ID) { + OS << Kind << "=" << InternalRef; + if (ID) + OS << "[" << *ID << "]"; +} + +void ReferenceBase::print(raw_ostream &OS, const ObjectHandle &This) const { + assert(this == &This); + + Optional ID; +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (CAS) + ID = CAS->getID(This); +#endif + printReferenceBase(OS, "object-handle", InternalRef, ID); +} + +void ReferenceBase::print(raw_ostream &OS, const ObjectRef &This) const { + assert(this == &This); + + Optional ID; +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (CAS) + ID = CAS->getID(This); +#endif + printReferenceBase(OS, "object-ref", InternalRef, ID); +} + +std::unique_ptr +ObjectStore::getMemoryBuffer(ObjectHandle Node, StringRef Name, + bool RequiresNullTerminator) { + return MemoryBuffer::getMemBuffer( + toStringRef(getData(Node, RequiresNullTerminator)), Name, + RequiresNullTerminator); +} + +void ObjectStore::readRefs(ObjectHandle Node, + SmallVectorImpl &Refs) const { + consumeError(forEachRef(Node, [&Refs](ObjectRef Ref) -> Error { + Refs.push_back(Ref); + return Error::success(); + })); +} + +Expected ObjectStore::getProxy(CASID ID) { + Optional Ref = getReference(ID); + if (!Ref) + return createUnknownObjectError(ID); + + Optional H; + if (Error E = load(*Ref).moveInto(H)) + return std::move(E); + + return ObjectProxy::load(*this, *H); +} + +Expected> ObjectStore::getProxyOrNone(CASID ID) { + Optional Ref = getReference(ID); + if (!Ref) + return None; + + Optional H; + if (Error E = load(*Ref).moveInto(H)) + return std::move(E); + + return ObjectProxy::load(*this, *H); +} + +Expected ObjectStore::getProxy(ObjectRef Ref) { + return getProxy(load(Ref)); +} + +Expected ObjectStore::getProxy(Expected H) { + if (!H) + return H.takeError(); + return ObjectProxy::load(*this, *H); +} + +Error ObjectStore::createUnknownObjectError(CASID ID) { + return createStringError(std::make_error_code(std::errc::invalid_argument), + "unknown object '" + ID.toString() + "'"); +} + +Expected ObjectStore::createProxy(ArrayRef Refs, + StringRef Data) { + Expected Ref = store(Refs, arrayRefFromStringRef(Data)); + if (!Ref) + return Ref.takeError(); + return getProxy(*Ref); +} + +Expected> +ObjectStore::loadIndependentDataBuffer(ObjectHandle Node, const Twine &Name, + bool NullTerminate) const { + SmallString<256> Bytes; + raw_svector_ostream OS(Bytes); + readData(Node, OS); + return std::make_unique(std::move(Bytes), Name.str(), + NullTerminate); +} + +std::unique_ptr +ObjectProxy::getMemoryBuffer(StringRef Name, + bool RequiresNullTerminator) const { + return CAS->getMemoryBuffer(H, Name, RequiresNullTerminator); +} diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt index 5ecdf5af956a3..5d4d9685f85e9 100644 --- a/llvm/lib/CMakeLists.txt +++ b/llvm/lib/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory(FuzzMutate) add_subdirectory(FileCheck) add_subdirectory(InterfaceStub) add_subdirectory(IRReader) +add_subdirectory(CAS) add_subdirectory(CodeGen) add_subdirectory(BinaryFormat) add_subdirectory(Bitcode) diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 5044b2639a0f7..71bc833c2fee8 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -167,6 +167,7 @@ add_llvm_component_library(LLVMSupport FormatVariadic.cpp GlobPattern.cpp GraphWriter.cpp + HashMappedTrie.cpp Hashing.cpp InitLLVM.cpp InstructionCost.cpp diff --git a/llvm/lib/Support/HashMappedTrie.cpp b/llvm/lib/Support/HashMappedTrie.cpp new file mode 100644 index 0000000000000..1fa4f92ba5e9f --- /dev/null +++ b/llvm/lib/Support/HashMappedTrie.cpp @@ -0,0 +1,476 @@ +//===- HashMappedTrie.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/HashMappedTrie.h" +#include "HashMappedTrieIndexGenerator.h" +#include "llvm/ADT/LazyAtomicPointer.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ThreadSafeAllocator.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +namespace { +struct TrieNode { + const bool IsSubtrie = false; + + TrieNode(bool IsSubtrie) : IsSubtrie(IsSubtrie) {} + + static void *operator new(size_t Size) { return ::malloc(Size); } + void operator delete(void *Ptr) { ::free(Ptr); } +}; + +struct TrieContent final : public TrieNode { + const uint8_t ContentOffset; + const uint8_t HashSize; + const uint8_t HashOffset; + + void *getValuePointer() const { + auto Content = reinterpret_cast(this) + ContentOffset; + return const_cast(Content); + } + + ArrayRef getHash() const { + auto *Begin = reinterpret_cast(this) + HashOffset; + return makeArrayRef(Begin, Begin + HashSize); + } + + TrieContent(size_t ContentOffset, size_t HashSize, size_t HashOffset) + : TrieNode(/*IsSubtrie=*/false), ContentOffset(ContentOffset), + HashSize(HashSize), HashOffset(HashOffset) {} +}; +static_assert(sizeof(TrieContent) == + ThreadSafeHashMappedTrieBase::TrieContentBaseSize, + "Check header assumption!"); + +class TrieSubtrie final : public TrieNode { +public: + TrieNode *get(size_t I) const { return Slots[I].load(); } + + TrieSubtrie * + sink(size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI, + function_ref)> Saver); + + void printHash(raw_ostream &OS, ArrayRef Bytes) const; + void print(raw_ostream &OS) const { print(OS, None); } + void print(raw_ostream &OS, Optional Prefix) const; + void dump() const { print(dbgs()); } + + static std::unique_ptr create(size_t StartBit, size_t NumBits); + + explicit TrieSubtrie(size_t StartBit, size_t NumBits); + +private: + // FIXME: Use a bitset to speed up access: + // + // std::array, NumSlots/64> IsSet; + // + // This will avoid needing to visit sparsely filled slots in + // \a ThreadSafeHashMappedTrieBase::destroyImpl() when there's a non-trivial + // destructor. + // + // It would also greatly speed up iteration, if we add that some day, and + // allow get() to return one level sooner. + // + // This would be the algorithm for updating IsSet (after updating Slots): + // + // std::atomic &Bits = IsSet[I.High]; + // const uint64_t NewBit = 1ULL << I.Low; + // uint64_t Old = 0; + // while (!Bits.compare_exchange_weak(Old, Old | NewBit)) + // ; + + // For debugging. + unsigned StartBit = 0; + unsigned NumBits = 0; + +public: + /// Linked list for ownership of tries. The pointer is owned by TrieSubtrie. + std::atomic Next; + + /// The (co-allocated) slots of the subtrie. + MutableArrayRef> Slots; +}; +} // end namespace + +namespace llvm { +template <> struct isa_impl { + static inline bool doit(const TrieNode &TN) { return !TN.IsSubtrie; } +}; +template <> struct isa_impl { + static inline bool doit(const TrieNode &TN) { return TN.IsSubtrie; } +}; +} // end namespace llvm + +static size_t getTrieTailSize(size_t StartBit, size_t NumBits) { + assert(NumBits < 20 && "Tries should have fewer than ~1M slots"); + return sizeof(TrieNode *) * (1u << NumBits); +} + +std::unique_ptr TrieSubtrie::create(size_t StartBit, + size_t NumBits) { + size_t Size = sizeof(TrieSubtrie) + getTrieTailSize(StartBit, NumBits); + void *Memory = ::malloc(Size); + TrieSubtrie *S = ::new (Memory) TrieSubtrie(StartBit, NumBits); + return std::unique_ptr(S); +} + +TrieSubtrie::TrieSubtrie(size_t StartBit, size_t NumBits) + : TrieNode(true), StartBit(StartBit), NumBits(NumBits), Next(nullptr), + Slots(reinterpret_cast *>( + reinterpret_cast(this) + sizeof(TrieSubtrie)), + (1u << NumBits)) { + for (auto *I = Slots.begin(), *E = Slots.end(); I != E; ++I) + new (I) LazyAtomicPointer(nullptr); + + static_assert( + std::is_trivially_destructible>::value, + "Expected no work in destructor for TrieNode"); +} + +TrieSubtrie *TrieSubtrie::sink( + size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI, + function_ref)> Saver) { + assert(NumSubtrieBits > 0); + std::unique_ptr S = create(StartBit + NumBits, NumSubtrieBits); + + assert(NewI < S->Slots.size()); + S->Slots[NewI].store(&Content); + + TrieNode *ExistingNode = &Content; + assert(I < Slots.size()); + if (Slots[I].compare_exchange_strong(ExistingNode, S.get())) + return Saver(std::move(S)); + + // Another thread created a subtrie already. Return it and let "S" be + // destructed. + return cast(ExistingNode); +} + +struct ThreadSafeHashMappedTrieBase::ImplType { + static ImplType *create(size_t StartBit, size_t NumBits) { + size_t Size = sizeof(ImplType) + getTrieTailSize(StartBit, NumBits); + void *Memory = ::malloc(Size); + return ::new (Memory) ImplType(StartBit, NumBits); + } + + TrieSubtrie *save(std::unique_ptr S) { + assert(!S->Next && "Expected S to a freshly-constructed leaf"); + + TrieSubtrie *CurrentHead = nullptr; + // Add ownership of "S" to front of the list, so that Root -> S -> + // Root.Next. This works by repeatedly setting S->Next to a candidate value + // of Root.Next (initially nullptr), then setting Root.Next to S once the + // candidate matches reality. + while (!Root.Next.compare_exchange_weak(CurrentHead, S.get())) + S->Next.exchange(CurrentHead); + + // Ownership transferred to subtrie. + return S.release(); + } + + static void *operator new(size_t Size) { return ::malloc(Size); } + void operator delete(void *Ptr) { ::free(Ptr); } + + /// FIXME: This should take a function that allocates and constructs the + /// content lazily (taking the hash as a separate parameter), in case of + /// collision. + ThreadSafeAllocator ContentAlloc; + TrieSubtrie Root; // Must be last! Tail-allocated. + +private: + ImplType(size_t StartBit, size_t NumBits) : Root(StartBit, NumBits) {} +}; + +ThreadSafeHashMappedTrieBase::ImplType & +ThreadSafeHashMappedTrieBase::getOrCreateImpl() { + if (ImplType *Impl = ImplPtr.load()) + return *Impl; + + // Create a new ImplType and store it if another thread doesn't do so first. + // If another thread wins this one is destroyed locally. + std::unique_ptr Impl(ImplType::create(0, NumRootBits)); + ImplType *ExistingImpl = nullptr; + if (ImplPtr.compare_exchange_strong(ExistingImpl, Impl.get())) + return *Impl.release(); + + return *ExistingImpl; +} + +ThreadSafeHashMappedTrieBase::PointerBase +ThreadSafeHashMappedTrieBase::find(ArrayRef Hash) const { + assert(!Hash.empty() && "Uninitialized hash"); + + ImplType *Impl = ImplPtr.load(); + if (!Impl) + return PointerBase(); + + TrieSubtrie *S = &Impl->Root; + IndexGenerator IndexGen{NumRootBits, NumSubtrieBits, Hash}; + size_t Index = IndexGen.next(); + for (;;) { + // Try to set the content. + TrieNode *Existing = S->get(Index); + if (!Existing) + return PointerBase(S, Index, *IndexGen.StartBit); + + // Check for an exact match. + if (auto *ExistingContent = dyn_cast(Existing)) + return ExistingContent->getHash() == Hash + ? PointerBase(ExistingContent->getValuePointer()) + : PointerBase(S, Index, *IndexGen.StartBit); + + Index = IndexGen.next(); + S = cast(Existing); + } +} + +ThreadSafeHashMappedTrieBase::PointerBase ThreadSafeHashMappedTrieBase::insert( + PointerBase Hint, ArrayRef Hash, + function_ref Hash)> + Constructor) { + assert(!Hash.empty() && "Uninitialized hash"); + + ImplType &Impl = getOrCreateImpl(); + TrieSubtrie *S = &Impl.Root; + IndexGenerator IndexGen{NumRootBits, NumSubtrieBits, Hash}; + size_t Index; + if (Hint.isHint()) { + S = static_cast(Hint.P); + Index = IndexGen.hint(Hint.I, Hint.B); + } else { + Index = IndexGen.next(); + } + + for (;;) { + // Load the node from the slot, allocating and calling the constructor if + // the slot is empty. + bool Generated = false; + TrieNode &Existing = S->Slots[Index].loadOrGenerate([&]() { + Generated = true; + + // Construct the value itself at the tail. + uint8_t *Memory = reinterpret_cast( + Impl.ContentAlloc.Allocate(ContentAllocSize, ContentAllocAlign)); + const uint8_t *HashStorage = Constructor(Memory + ContentOffset, Hash); + + // Construct the TrieContent header, passing in the offset to the hash. + TrieContent *Content = ::new (Memory) + TrieContent(ContentOffset, Hash.size(), HashStorage - Memory); + assert(Hash == Content->getHash() && "Hash not properly initialized"); + return Content; + }); + // If we just generated it, return it! + if (Generated) + return PointerBase(cast(Existing).getValuePointer()); + + if (isa(Existing)) { + S = &cast(Existing); + Index = IndexGen.next(); + continue; + } + + // Return the existing content if it's an exact match! + auto &ExistingContent = cast(Existing); + if (ExistingContent.getHash() == Hash) + return PointerBase(ExistingContent.getValuePointer()); + + // Sink the existing content as long as the indexes match. + for (;;) { + size_t NextIndex = IndexGen.next(); + size_t NewIndexForExistingContent = + IndexGen.getCollidingBits(ExistingContent.getHash()); + S = S->sink(Index, ExistingContent, IndexGen.getNumBits(), + NewIndexForExistingContent, + [&Impl](std::unique_ptr S) { + return Impl.save(std::move(S)); + }); + Index = NextIndex; + + // Found the difference. + if (NextIndex != NewIndexForExistingContent) + break; + } + } +} + +static void printHexDigit(raw_ostream &OS, uint8_t Digit) { + if (Digit < 10) + OS << char(Digit + '0'); + else + OS << char(Digit - 10 + 'a'); +} + +static void printHexDigits(raw_ostream &OS, ArrayRef Bytes, + size_t StartBit, size_t NumBits) { + assert(StartBit % 4 == 0); + assert(NumBits % 4 == 0); + for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) { + uint8_t HexPair = Bytes[I / 8]; + uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf; + printHexDigit(OS, HexDigit); + } +} + +static void printBits(raw_ostream &OS, ArrayRef Bytes, size_t StartBit, + size_t NumBits) { + assert(StartBit + NumBits <= Bytes.size() * 8u); + for (size_t I = StartBit, E = StartBit + NumBits; I != E; ++I) { + uint8_t Byte = Bytes[I / 8]; + size_t ByteOffset = I % 8; + if (size_t ByteShift = 8 - ByteOffset - 1) + Byte >>= ByteShift; + OS << (Byte & 0x1 ? '1' : '0'); + } +} + +void TrieSubtrie::printHash(raw_ostream &OS, ArrayRef Bytes) const { + // afb[1c:00*01110*0]def + size_t EndBit = StartBit + NumBits; + size_t HashEndBit = Bytes.size() * 8u; + + size_t FirstBinaryBit = StartBit & ~0x3u; + printHexDigits(OS, Bytes, 0, FirstBinaryBit); + + size_t LastBinaryBit = (EndBit + 3u) & ~0x3u; + OS << "["; + printBits(OS, Bytes, FirstBinaryBit, LastBinaryBit - FirstBinaryBit); + OS << "]"; + + printHexDigits(OS, Bytes, LastBinaryBit, HashEndBit - LastBinaryBit); +} + +static void appendIndexBits(std::string &Prefix, size_t Index, + size_t NumSlots) { + std::string Bits; + for (size_t NumBits = 1u; NumBits < NumSlots; NumBits <<= 1) { + Bits.push_back('0' + (Index & 0x1)); + Index >>= 1; + } + for (char Ch : llvm::reverse(Bits)) + Prefix += Ch; +} + +static void printPrefix(raw_ostream &OS, StringRef Prefix) { + while (Prefix.size() >= 4) { + uint8_t Digit; + bool ErrorParsingBinary = Prefix.take_front(4).getAsInteger(2, Digit); + assert(!ErrorParsingBinary); + (void)ErrorParsingBinary; + printHexDigit(OS, Digit); + Prefix = Prefix.drop_front(4); + } + if (!Prefix.empty()) + OS << "[" << Prefix << "]"; +} + +void TrieSubtrie::print(raw_ostream &OS, Optional Prefix) const { + if (!Prefix) { + OS << "root"; + Prefix.emplace(); + } else { + OS << "subtrie="; + printPrefix(OS, *Prefix); + } + + OS << " num-slots=" << Slots.size() << "\n"; + SmallVector Subs; + SmallVector Prefixes; + for (size_t I = 0, E = Slots.size(); I != E; ++I) { + TrieNode *N = get(I); + if (!N) + continue; + OS << "- index=" << I << " "; + if (auto *S = dyn_cast(N)) { + std::string SubtriePrefix = *Prefix; + appendIndexBits(SubtriePrefix, I, Slots.size()); + OS << "subtrie="; + printPrefix(OS, SubtriePrefix); + OS << "\n"; + Subs.push_back(S); + Prefixes.push_back(SubtriePrefix); + continue; + } + auto *Content = cast(N); + OS << "content="; + printHash(OS, Content->getHash()); + OS << "\n"; + } + for (size_t I = 0, E = Subs.size(); I != E; ++I) + Subs[I]->print(OS, Prefixes[I]); +} + +void ThreadSafeHashMappedTrieBase::print(raw_ostream &OS) const { + OS << "root-bits=" << NumRootBits << " subtrie-bits=" << NumSubtrieBits + << "\n"; + if (ImplType *Impl = ImplPtr.load()) + Impl->Root.print(OS); + else + OS << "[no-root]\n"; +} + +LLVM_DUMP_METHOD void ThreadSafeHashMappedTrieBase::dump() const { + print(dbgs()); +} + +ThreadSafeHashMappedTrieBase::ThreadSafeHashMappedTrieBase( + size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset, + Optional NumRootBits, Optional NumSubtrieBits) + : ContentAllocSize(ContentAllocSize), ContentAllocAlign(ContentAllocAlign), + ContentOffset(ContentOffset), + NumRootBits(NumRootBits ? *NumRootBits : DefaultNumRootBits), + NumSubtrieBits(NumSubtrieBits ? *NumSubtrieBits : DefaultNumSubtrieBits), + ImplPtr(nullptr) { + assert((!NumRootBits || *NumRootBits < 20) && + "Root should have fewer than ~1M slots"); + assert((!NumSubtrieBits || *NumSubtrieBits < 10) && + "Subtries should have fewer than ~1K slots"); +} + +ThreadSafeHashMappedTrieBase::ThreadSafeHashMappedTrieBase( + ThreadSafeHashMappedTrieBase &&RHS) + : ContentAllocSize(RHS.ContentAllocSize), + ContentAllocAlign(RHS.ContentAllocAlign), + ContentOffset(RHS.ContentOffset), NumRootBits(RHS.NumRootBits), + NumSubtrieBits(RHS.NumSubtrieBits) { + // Steal the root from RHS. + ImplPtr = RHS.ImplPtr.exchange(nullptr); +} + +ThreadSafeHashMappedTrieBase::~ThreadSafeHashMappedTrieBase() { + assert(!ImplPtr.load() && "Expected subclass to call destroyImpl()"); +} + +void ThreadSafeHashMappedTrieBase::destroyImpl( + function_ref Destructor) { + std::unique_ptr Impl(ImplPtr.exchange(nullptr)); + if (!Impl) + return; + + // Destroy content nodes throughout trie. Avoid destroying any subtries since + // we need TrieNode::classof() to find the content nodes. + // + // FIXME: Once we have bitsets (see FIXME in TrieSubtrie class), use them + // facilitate sparse iteration here. + if (Destructor) + for (TrieSubtrie *Trie = &Impl->Root; Trie; Trie = Trie->Next.load()) + for (auto &Slot : Trie->Slots) + if (auto *Content = dyn_cast_or_null(Slot.load())) + Destructor(Content->getValuePointer()); + + // Destroy the subtries. Incidentally, this destroys them in the reverse order + // of saving. + TrieSubtrie *Trie = Impl->Root.Next; + while (Trie) { + TrieSubtrie *Next = Trie->Next.exchange(nullptr); + delete Trie; + Trie = Next; + } +} diff --git a/llvm/lib/Support/HashMappedTrieIndexGenerator.h b/llvm/lib/Support/HashMappedTrieIndexGenerator.h new file mode 100644 index 0000000000000..f28978c78bb3a --- /dev/null +++ b/llvm/lib/Support/HashMappedTrieIndexGenerator.h @@ -0,0 +1,89 @@ +//===- HashMappedTrieIndexGenerator.h ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_SUPPORT_HASHMAPPEDTRIEINDEXGENERATOR_H +#define LLVM_LIB_SUPPORT_HASHMAPPEDTRIEINDEXGENERATOR_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" + +namespace llvm { + +struct IndexGenerator { + size_t NumRootBits; + size_t NumSubtrieBits; + ArrayRef Bytes; + Optional StartBit = None; + + size_t getNumBits() const { + assert(StartBit); + size_t TotalNumBits = Bytes.size() * 8; + assert(*StartBit <= TotalNumBits); + return std::min(*StartBit ? NumSubtrieBits : NumRootBits, + TotalNumBits - *StartBit); + } + size_t next() { + size_t Index; + if (!StartBit) { + StartBit = 0; + Index = getIndex(Bytes, *StartBit, NumRootBits); + } else { + *StartBit += *StartBit ? NumSubtrieBits : NumRootBits; + assert((*StartBit - NumRootBits) % NumSubtrieBits == 0); + Index = getIndex(Bytes, *StartBit, NumSubtrieBits); + } + return Index; + } + + size_t hint(unsigned Index, unsigned Bit) { + assert(Index >= 0); + assert(Bit < Bytes.size() * 8); + assert(Bit == 0 || (Bit - NumRootBits) % NumSubtrieBits == 0); + StartBit = Bit; + return Index; + } + + size_t getCollidingBits(ArrayRef CollidingBits) const { + assert(StartBit); + return getIndex(CollidingBits, *StartBit, NumSubtrieBits); + } + + static size_t getIndex(ArrayRef Bytes, size_t StartBit, + size_t NumBits) { + assert(StartBit < Bytes.size() * 8); + + Bytes = Bytes.drop_front(StartBit / 8u); + StartBit %= 8u; + size_t Index = 0; + for (uint8_t Byte : Bytes) { + size_t ByteStart = 0, ByteEnd = 8; + if (StartBit) { + ByteStart = StartBit; + Byte &= (1u << (8 - StartBit)) - 1u; + StartBit = 0; + } + size_t CurrentNumBits = ByteEnd - ByteStart; + if (CurrentNumBits > NumBits) { + Byte >>= CurrentNumBits - NumBits; + CurrentNumBits = NumBits; + } + Index <<= CurrentNumBits; + Index |= Byte & ((1u << CurrentNumBits) - 1u); + + assert(NumBits >= CurrentNumBits); + NumBits -= CurrentNumBits; + if (!NumBits) + break; + } + return Index; + } +}; + +} // namespace llvm + +#endif // LLVM_LIB_SUPPORT_HASHMAPPEDTRIEINDEXGENERATOR_H diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt index cac7280717496..f1876aee0c66a 100644 --- a/llvm/unittests/ADT/CMakeLists.txt +++ b/llvm/unittests/ADT/CMakeLists.txt @@ -31,6 +31,7 @@ add_llvm_unittest(ADTTests FoldingSet.cpp FunctionExtrasTest.cpp FunctionRefTest.cpp + HashMappedTrieTest.cpp HashingTest.cpp IListBaseTest.cpp IListIteratorTest.cpp @@ -45,6 +46,7 @@ add_llvm_unittest(ADTTests IntervalMapTest.cpp IntrusiveRefCntPtrTest.cpp IteratorTest.cpp + LazyAtomicPointerTest.cpp MappedIteratorTest.cpp MapVectorTest.cpp OptionalTest.cpp diff --git a/llvm/unittests/ADT/HashMappedTrieTest.cpp b/llvm/unittests/ADT/HashMappedTrieTest.cpp new file mode 100644 index 0000000000000..7e0b987f0fb02 --- /dev/null +++ b/llvm/unittests/ADT/HashMappedTrieTest.cpp @@ -0,0 +1,335 @@ +//===- HashMappedTrieTest.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/HashMappedTrie.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/SHA1.h" +#include "gtest/gtest.h" + +using namespace llvm; + +static StringRef takeNextLine(StringRef &Lines) { + size_t Newline = Lines.find('\n'); + StringRef Line = Lines.take_front(Newline); + Lines = Lines.drop_front(Newline + 1); + return Line; +} + +namespace { + +TEST(HashMappedTrieTest, TrieStructure) { + using NumType = uint64_t; + using HashType = std::array; + using TrieType = ThreadSafeHashMappedTrie; + NumType Numbers[] = { + // Three numbers that will nest deeply to test (1) sinking subtries and + // (2) deep, non-trivial hints. + std::numeric_limits::max(), + std::numeric_limits::max() - 2u, + std::numeric_limits::max() - 3u, + // One number to stay at the top-level. + 0x37, + }; + + // Use the number itself as hash to test the pathological case. + auto hash = [](NumType Num) { + NumType HashN = llvm::support::endian::byte_swap(Num, llvm::support::big); + HashType Hash; + memcpy(&Hash[0], &HashN, sizeof(HashType)); + return Hash; + }; + + // Use root and subtrie sizes of 1 so this gets sunk quite deep. + TrieType Trie(1, 1); + for (NumType N : Numbers) { + // Lookup first to exercise hint code for deep tries. + TrieType::pointer Lookup = Trie.find(hash(N)); + EXPECT_FALSE(Lookup); + + Trie.insert(Lookup, TrieType::value_type(hash(N), N)); + } + for (NumType N : Numbers) { + TrieType::pointer Lookup = Trie.find(hash(N)); + EXPECT_TRUE(Lookup); + if (!Lookup) + continue; + EXPECT_EQ(hash(N), Lookup->Hash); + EXPECT_EQ(N, Lookup->Data); + + // Confirm a subsequent insertion fails to overwrite by trying to insert a + // bad value. + EXPECT_EQ(N, + Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1))->Data); + } + + // Dump out the trie so we can confirm the structure is correct. Each subtrie + // should have 2 slots. The root's index=0 should have the content for + // 0x37 directly, and index=1 should be a linked-list of subtries, finally + // ending with content for (max-2) and (max-3). + // + // Note: This structure is not exhaustive (too expensive to update tests), + // but it does test that the dump format is somewhat readable and that the + // basic structure is correct. + // + // Note: This test requires that the trie reads bytes starting from index 0 + // of the array of uint8_t, and then reads each byte's bits from high to low. + SmallString<128> Dump; + { + raw_svector_ostream OS(Dump); + Trie.print(OS); + } + + // Check the header. + StringRef DumpRef = Dump; + ASSERT_EQ("root-bits=1 subtrie-bits=1", takeNextLine(DumpRef)); + + // Check the root trie. + ASSERT_EQ("root num-slots=2", takeNextLine(DumpRef)); + ASSERT_EQ("- index=0 content=[0000]000000000000037", takeNextLine(DumpRef)); + ASSERT_EQ("- index=1 subtrie=[1]", takeNextLine(DumpRef)); + ASSERT_EQ("subtrie=[1] num-slots=2", takeNextLine(DumpRef)); + + // Check the last subtrie. + size_t LastSubtrie = DumpRef.rfind("\nsubtrie="); + ASSERT_NE(StringRef::npos, LastSubtrie); + DumpRef = DumpRef.substr(LastSubtrie + 1); + ASSERT_EQ("subtrie=fffffffffffffff[110] num-slots=2", takeNextLine(DumpRef)); + ASSERT_EQ("- index=0 content=fffffffffffffff[1100]", takeNextLine(DumpRef)); + ASSERT_EQ("- index=1 content=fffffffffffffff[1101]", takeNextLine(DumpRef)); + ASSERT_TRUE(DumpRef.empty()); +} + +TEST(HashMappedTrieTest, TrieStructureSmallFinalSubtrie) { + using NumType = uint64_t; + using HashType = std::array; + using TrieType = ThreadSafeHashMappedTrie; + NumType Numbers[] = { + // Three numbers that will nest deeply to test (1) sinking subtries and + // (2) deep, non-trivial hints. + std::numeric_limits::max(), + std::numeric_limits::max() - 2u, + std::numeric_limits::max() - 3u, + // One number to stay at the top-level. + 0x37, + }; + + // Use the number itself as hash to test the pathological case. + auto hash = [](NumType Num) { + NumType HashN = llvm::support::endian::byte_swap(Num, llvm::support::big); + HashType Hash; + memcpy(&Hash[0], &HashN, sizeof(HashType)); + return Hash; + }; + + // Use subtrie size of 7 to avoid hitting 64 evenly, making the final subtrie + // small. + TrieType Trie(8, 5); + for (NumType N : Numbers) { + // Lookup first to exercise hint code for deep tries. + TrieType::pointer Lookup = Trie.find(hash(N)); + EXPECT_FALSE(Lookup); + + Trie.insert(Lookup, TrieType::value_type(hash(N), N)); + } + for (NumType N : Numbers) { + TrieType::pointer Lookup = Trie.find(hash(N)); + EXPECT_TRUE(Lookup); + if (!Lookup) + continue; + EXPECT_EQ(hash(N), Lookup->Hash); + EXPECT_EQ(N, Lookup->Data); + + // Confirm a subsequent insertion fails to overwrite by trying to insert a + // bad value. + EXPECT_EQ(N, + Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1))->Data); + } + + // Dump out the trie so we can confirm the structure is correct. The root + // should have 2^8=256 slots, most subtries should have 2^5=32 slots, and the + // deepest subtrie should have 2^1=2 slots (since (64-8)mod(5)=1). + // should have 2 slots. The root's index=0 should have the content for + // 0x37 directly, and index=1 should be a linked-list of subtries, finally + // ending with content for (max-2) and (max-3). + // + // Note: This structure is not exhaustive (too expensive to update tests), + // but it does test that the dump format is somewhat readable and that the + // basic structure is correct. + // + // Note: This test requires that the trie reads bytes starting from index 0 + // of the array of uint8_t, and then reads each byte's bits from high to low. + SmallString<128> Dump; + { + raw_svector_ostream OS(Dump); + Trie.print(OS); + } + + // Check the header. + StringRef DumpRef = Dump; + ASSERT_EQ("root-bits=8 subtrie-bits=5", takeNextLine(DumpRef)); + + // Check the root trie. + ASSERT_EQ("root num-slots=256", takeNextLine(DumpRef)); + ASSERT_EQ("- index=0 content=[00000000]00000000000037", + takeNextLine(DumpRef)); + ASSERT_EQ("- index=255 subtrie=ff", takeNextLine(DumpRef)); + ASSERT_EQ("subtrie=ff num-slots=32", takeNextLine(DumpRef)); + + // Check the last subtrie. + size_t LastSubtrie = DumpRef.rfind("\nsubtrie="); + ASSERT_NE(StringRef::npos, LastSubtrie); + DumpRef = DumpRef.substr(LastSubtrie + 1); + ASSERT_EQ("subtrie=fffffffffffffff[110] num-slots=2", takeNextLine(DumpRef)); + ASSERT_EQ("- index=0 content=fffffffffffffff[1100]", takeNextLine(DumpRef)); + ASSERT_EQ("- index=1 content=fffffffffffffff[1101]", takeNextLine(DumpRef)); + ASSERT_TRUE(DumpRef.empty()); +} + +TEST(HashMappedTrieTest, TrieDestructionLoop) { + using NumT = uint64_t; + struct NumWithDestructorT { + NumT Num; + operator NumT() const { return Num; } + ~NumWithDestructorT() {} + }; + + using HashT = std::array; + using TrieT = ThreadSafeHashMappedTrie; + using TrieWithDestructorT = + ThreadSafeHashMappedTrie; + + // Use the number itself in big-endian order as the hash. + auto hash = [](NumT Num) { + NumT HashN = llvm::support::endian::byte_swap(Num, llvm::support::big); + HashT Hash; + memcpy(&Hash[0], &HashN, sizeof(HashT)); + return Hash; + }; + + // Use optionals to control when destructors are called. + Optional Trie; + Optional TrieWithDestructor; + + // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate. + Trie.emplace(/*NumRootBits=*/1, /*NumSubtrieBits=*/1); + TrieWithDestructor.emplace(/*NumRootBits=*/1, /*NumSubtrieBits=*/1); + + // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug + // builds. + static constexpr uint64_t MaxN = 100000; + for (uint64_t N = 0; N != MaxN; ++N) { + HashT Hash = hash(N); + Trie->insert(TrieT::pointer(), TrieT::value_type(Hash, N)); + TrieWithDestructor->insert( + TrieWithDestructorT::pointer(), + TrieWithDestructorT::value_type(Hash, NumWithDestructorT{N})); + } + + // Destroy tries. If destruction is recursive and MaxN is high enough, these + // will both fail. + Trie.reset(); + TrieWithDestructor.reset(); +} + +namespace { +using HasherT = SHA1; +using HashType = decltype(HasherT::hash(std::declval &>())); +template +class ThreadSafeHashMappedTrieSet + : ThreadSafeHashMappedTrie { +public: + using TrieType = + typename ThreadSafeHashMappedTrieSet::ThreadSafeHashMappedTrie; + using LazyValueConstructor = typename ThreadSafeHashMappedTrieSet:: + ThreadSafeHashMappedTrie::LazyValueConstructor; + + class pointer : public TrieType::const_pointer { + using BaseType = typename TrieType::const_pointer; + + public: + const T &operator*() const { + return TrieType::const_pointer::operator*().Data; + } + const T *operator->() const { return &operator*(); } + + pointer() = default; + pointer(pointer &&) = default; + pointer(const pointer &) = default; + pointer &operator=(pointer &&) = default; + pointer &operator=(const pointer &) = default; + + private: + pointer(BaseType Result) : BaseType(Result) {} + friend class ThreadSafeHashMappedTrieSet; + }; + + ThreadSafeHashMappedTrieSet(Optional NumRootBits = None, + Optional NumSubtrieBits = None) + : TrieType(NumRootBits, NumSubtrieBits) {} + + static HashType hash(const T &V) { + return HasherT::hash(ArrayRef( + reinterpret_cast(V.data()), V.size())); + } + pointer find(const T &Value) const { + return pointer(TrieType::find(hash(Value))); + } + pointer insert(pointer Hint, T &&Value) { + return pointer(TrieType::insertLazy( + typename pointer::BaseType(Hint), + [&](LazyValueConstructor C) { C(std::move(Value)); })); + } + pointer insert(pointer Hint, const T &Value) { + return pointer( + TrieType::insertLazy(typename pointer::BaseType(Hint), hash(Value), + [&](LazyValueConstructor C) { C(Value); })); + } + pointer insert(T &&Value) { return insert(pointer(), Value); } + pointer insert(const T &Value) { return insert(pointer(), Value); } +}; +} // end anonymous namespace + +TEST(HashMappedTrieTest, Strings) { + for (unsigned RootBits : {2, 3, 6, 10}) { + for (unsigned SubtrieBits : {2, 3, 4}) { + ThreadSafeHashMappedTrieSet Strings(RootBits, SubtrieBits); + const std::string &A1 = *Strings.insert("A"); + EXPECT_EQ(&A1, &*Strings.insert("A")); + std::string A2 = A1; + EXPECT_EQ(&A1, &*Strings.insert(A2)); + + const std::string &B1 = *Strings.insert("B"); + EXPECT_EQ(&B1, &*Strings.insert(B1)); + std::string B2 = B1; + EXPECT_EQ(&B1, &*Strings.insert(B2)); + + for (int I = 0, E = 1000; I != E; ++I) { + ThreadSafeHashMappedTrieSet::pointer Lookup; + std::string S = Twine(I).str(); + if (I & 1) + Lookup = Strings.find(S); + const std::string &S1 = *Strings.insert(Lookup, S); + EXPECT_EQ(&S1, &*Strings.insert(S1)); + std::string S2 = S1; + EXPECT_EQ(&S1, &*Strings.insert(S2)); + } + for (int I = 0, E = 1000; I != E; ++I) { + std::string S = Twine(I).str(); + ThreadSafeHashMappedTrieSet::pointer Lookup = + Strings.find(S); + EXPECT_TRUE(Lookup); + if (!Lookup) + continue; + EXPECT_EQ(S, *Lookup); + } + } + } +} + +} // namespace diff --git a/llvm/unittests/ADT/LazyAtomicPointerTest.cpp b/llvm/unittests/ADT/LazyAtomicPointerTest.cpp new file mode 100644 index 0000000000000..d6f640b18c4f4 --- /dev/null +++ b/llvm/unittests/ADT/LazyAtomicPointerTest.cpp @@ -0,0 +1,70 @@ +//===- LazyAtomicPointerTest.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/LazyAtomicPointer.h" +#include "llvm/Support/ThreadPool.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(LazyAtomicPointer, loadOrGenerate) { + int Value = 0; + LazyAtomicPointer Ptr; + ThreadPool Threads; + for (unsigned I = 0; I < 4; ++I) + Threads.async([&]() { + Ptr.loadOrGenerate([&]() { + // Make sure this is only called once. + static std::atomic Once(false); + bool Current = false; + EXPECT_TRUE(Once.compare_exchange_strong(Current, true)); + return &Value; + }); + }); + + Threads.wait(); + + EXPECT_EQ(Ptr.load(), &Value); +} + +TEST(LazyAtomicPointer, BusyState) { + int Value = 0; + LazyAtomicPointer Ptr; + ThreadPool Threads; + + std::mutex BusyStart, BusyEnd; + BusyStart.lock(); + BusyEnd.lock(); + Threads.async([&]() { + Ptr.loadOrGenerate([&]() { + BusyStart.unlock(); + while (!BusyEnd.try_lock()) { + // wait till the lock is unlocked. + } + return &Value; + }); + }); + + // Wait for busy state. + std::lock_guard BusyLockG(BusyStart); + int *ExistingValue = nullptr; + // Busy state will not exchange the value. + EXPECT_FALSE(Ptr.compare_exchange_weak(ExistingValue, nullptr)); + // Busy state return nullptr on load/compare_exchange_weak. + EXPECT_EQ(ExistingValue, nullptr); + EXPECT_EQ(Ptr.load(), nullptr); + + // End busy state. + BusyEnd.unlock(); + Threads.wait(); + EXPECT_EQ(Ptr.load(), &Value); +} + +} // namespace diff --git a/llvm/unittests/CAS/CASTestConfig.cpp b/llvm/unittests/CAS/CASTestConfig.cpp new file mode 100644 index 0000000000000..bb06ee5573134 --- /dev/null +++ b/llvm/unittests/CAS/CASTestConfig.cpp @@ -0,0 +1,22 @@ +//===- CASTestConfig.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CASTestConfig.h" +#include "llvm/CAS/ObjectStore.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::cas; + +CASTestingEnv createInMemory(int I) { + std::unique_ptr CAS = createInMemoryCAS(); + return CASTestingEnv{std::move(CAS)}; +} + +INSTANTIATE_TEST_SUITE_P(InMemoryCAS, CASTest, + ::testing::Values(createInMemory)); diff --git a/llvm/unittests/CAS/CASTestConfig.h b/llvm/unittests/CAS/CASTestConfig.h new file mode 100644 index 0000000000000..07bcb4ef08990 --- /dev/null +++ b/llvm/unittests/CAS/CASTestConfig.h @@ -0,0 +1,36 @@ +//===- CASTestConfig.h ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/ObjectStore.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" +#include "gtest/gtest.h" + +#ifndef LLVM_UNITTESTS_CASTESTCONFIG_H +#define LLVM_UNITTESTS_CASTESTCONFIG_H + +struct CASTestingEnv { + std::unique_ptr CAS; +}; + +class CASTest + : public testing::TestWithParam> { +protected: + llvm::Optional NextCASIndex; + + std::unique_ptr createObjectStore() { + auto TD = GetParam()(++(*NextCASIndex)); + return std::move(TD.CAS); + } + void SetUp() { NextCASIndex = 0; } + void TearDown() { NextCASIndex = llvm::None; } +}; + +#endif diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt new file mode 100644 index 0000000000000..39a2100c4909e --- /dev/null +++ b/llvm/unittests/CAS/CMakeLists.txt @@ -0,0 +1,12 @@ +set(LLVM_LINK_COMPONENTS + Support + CAS + TestingSupport + ) + +add_llvm_unittest(CASTests + CASTestConfig.cpp + ObjectStoreTest.cpp + ) + +target_link_libraries(CASTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/CAS/ObjectStoreTest.cpp b/llvm/unittests/CAS/ObjectStoreTest.cpp new file mode 100644 index 0000000000000..d68b6ca699b55 --- /dev/null +++ b/llvm/unittests/CAS/ObjectStoreTest.cpp @@ -0,0 +1,278 @@ +//===- ObjectStoreTest.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/ObjectStore.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" +#include "gtest/gtest.h" + +#include "CASTestConfig.h" + +using namespace llvm; +using namespace llvm::cas; + +TEST_P(CASTest, PrintIDs) { + std::unique_ptr CAS = createObjectStore(); + + Optional ID1, ID2; + ASSERT_THAT_ERROR(CAS->createProxy(None, "1").moveInto(ID1), Succeeded()); + ASSERT_THAT_ERROR(CAS->createProxy(None, "2").moveInto(ID2), Succeeded()); + EXPECT_NE(ID1, ID2); + std::string PrintedID1 = ID1->toString(); + std::string PrintedID2 = ID2->toString(); + EXPECT_NE(PrintedID1, PrintedID2); + + Optional ParsedID1, ParsedID2; + ASSERT_THAT_ERROR(CAS->parseID(PrintedID1).moveInto(ParsedID1), Succeeded()); + ASSERT_THAT_ERROR(CAS->parseID(PrintedID2).moveInto(ParsedID2), Succeeded()); + EXPECT_EQ(ID1, ParsedID1); + EXPECT_EQ(ID2, ParsedID2); +} + +TEST_P(CASTest, Blobs) { + std::unique_ptr CAS1 = createObjectStore(); + StringRef ContentStrings[] = { + "word", + "some longer text std::string's local memory", + R"(multiline text multiline text multiline text multiline text +multiline text multiline text multiline text multiline text multiline text +multiline text multiline text multiline text multiline text multiline text +multiline text multiline text multiline text multiline text multiline text +multiline text multiline text multiline text multiline text multiline text +multiline text multiline text multiline text multiline text multiline text)", + }; + + SmallVector IDs; + for (StringRef Content : ContentStrings) { + // Use StringRef::str() to create a temporary std::string. This could cause + // problems if the CAS is storing references to the input string instead of + // copying it. + Optional Blob; + ASSERT_THAT_ERROR(CAS1->createProxy(None, Content).moveInto(Blob), + Succeeded()); + IDs.push_back(Blob->getID()); + + // Check basic printing of IDs. + EXPECT_EQ(IDs.back().toString(), IDs.back().toString()); + if (IDs.size() > 2) + EXPECT_NE(IDs.front().toString(), IDs.back().toString()); + } + + // Check that the blobs give the same IDs later. + for (int I = 0, E = IDs.size(); I != E; ++I) { + Optional Blob; + ASSERT_THAT_ERROR(CAS1->createProxy(None, ContentStrings[I]).moveInto(Blob), + Succeeded()); + EXPECT_EQ(IDs[I], Blob->getID()); + } + + // Run validation on all CASIDs. + for (int I = 0, E = IDs.size(); I != E; ++I) + ASSERT_THAT_ERROR(CAS1->validate(IDs[I]), Succeeded()); + + // Check that the blobs can be retrieved multiple times. + for (int I = 0, E = IDs.size(); I != E; ++I) { + for (int J = 0, JE = 3; J != JE; ++J) { + Optional Buffer; + ASSERT_THAT_ERROR(CAS1->getProxy(IDs[I]).moveInto(Buffer), Succeeded()); + EXPECT_EQ(ContentStrings[I], Buffer->getData()); + } + } + + // Confirm these blobs don't exist in a fresh CAS instance. + std::unique_ptr CAS2 = createObjectStore(); + for (int I = 0, E = IDs.size(); I != E; ++I) { + Optional Proxy; + EXPECT_THAT_ERROR(CAS2->getProxyOrNone(IDs[I]).moveInto(Proxy), + Succeeded()); + ASSERT_FALSE(Proxy); + } + + // Insert into the second CAS and confirm the IDs are stable. Getting them + // should work now. + for (int I = IDs.size(), E = 0; I != E; --I) { + auto &ID = IDs[I - 1]; + auto &Content = ContentStrings[I - 1]; + Optional Blob; + ASSERT_THAT_ERROR(CAS2->createProxy(None, Content).moveInto(Blob), + Succeeded()); + EXPECT_EQ(ID, Blob->getID()); + + Optional Buffer; + ASSERT_THAT_ERROR(CAS2->getProxy(ID).moveInto(Buffer), Succeeded()); + EXPECT_EQ(Content, Buffer->getData()); + } +} + +TEST_P(CASTest, BlobsBig) { + // A little bit of validation that bigger blobs are okay. Climb up to 1MB. + std::unique_ptr CAS = createObjectStore(); + SmallString<256> String1 = StringRef("a few words"); + SmallString<256> String2 = StringRef("others"); + while (String1.size() < 1024U * 1024U) { + Optional ID1; + Optional ID2; + ASSERT_THAT_ERROR(CAS->createProxy(None, String1).moveInto(ID1), + Succeeded()); + ASSERT_THAT_ERROR(CAS->createProxy(None, String1).moveInto(ID2), + Succeeded()); + ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded()); + ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded()); + ASSERT_EQ(ID1, ID2); + + String1.append(String2); + ASSERT_THAT_ERROR(CAS->createProxy(None, String2).moveInto(ID1), + Succeeded()); + ASSERT_THAT_ERROR(CAS->createProxy(None, String2).moveInto(ID2), + Succeeded()); + ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded()); + ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded()); + ASSERT_EQ(ID1, ID2); + String2.append(String1); + } + + // Specifically check near 1MB for objects large enough they're likely to be + // stored externally in an on-disk CAS and will be near a page boundary. + SmallString<0> Storage; + const size_t InterestingSize = 1024U * 1024ULL; + const size_t SizeE = InterestingSize + 2; + if (Storage.size() < SizeE) + Storage.resize(SizeE, '\01'); + for (size_t Size = InterestingSize - 2; Size != SizeE; ++Size) { + StringRef Data(Storage.data(), Size); + Optional Blob; + ASSERT_THAT_ERROR(CAS->createProxy(None, Data).moveInto(Blob), Succeeded()); + ASSERT_EQ(Data, Blob->getData()); + ASSERT_EQ(0, Blob->getData().end()[0]); + } +} + +TEST_P(CASTest, LeafNodes) { + std::unique_ptr CAS1 = createObjectStore(); + StringRef ContentStrings[] = { + "word", + "some longer text std::string's local memory", + R"(multiline text multiline text multiline text multiline text +multiline text multiline text multiline text multiline text multiline text +multiline text multiline text multiline text multiline text multiline text +multiline text multiline text multiline text multiline text multiline text +multiline text multiline text multiline text multiline text multiline text +multiline text multiline text multiline text multiline text multiline text)", + }; + + SmallVector Nodes; + SmallVector IDs; + for (StringRef Content : ContentStrings) { + // Use StringRef::str() to create a temporary std::string. This could cause + // problems if the CAS is storing references to the input string instead of + // copying it. + Optional Node; + ASSERT_THAT_ERROR( + CAS1->store(None, arrayRefFromStringRef(Content)).moveInto(Node), + Succeeded()); + Nodes.push_back(*Node); + + // Check basic printing of IDs. + IDs.push_back(CAS1->getID(*Node)); + EXPECT_EQ(IDs.back().toString(), IDs.back().toString()); + EXPECT_EQ(Nodes.front(), Nodes.front()); + EXPECT_EQ(Nodes.back(), Nodes.back()); + EXPECT_EQ(IDs.front(), IDs.front()); + EXPECT_EQ(IDs.back(), IDs.back()); + if (Nodes.size() <= 1) + continue; + EXPECT_NE(Nodes.front(), Nodes.back()); + EXPECT_NE(IDs.front(), IDs.back()); + } + + // Check that the blobs give the same IDs later. + for (int I = 0, E = IDs.size(); I != E; ++I) { + Optional Node; + ASSERT_THAT_ERROR( + CAS1->store(None, arrayRefFromStringRef(ContentStrings[I])) + .moveInto(Node), + Succeeded()); + EXPECT_EQ(IDs[I], CAS1->getID(*Node)); + } + + // Check that the blobs can be retrieved multiple times. + for (int I = 0, E = IDs.size(); I != E; ++I) { + for (int J = 0, JE = 3; J != JE; ++J) { + Optional Object; + ASSERT_THAT_ERROR(CAS1->getProxy(IDs[I]).moveInto(Object), Succeeded()); + ASSERT_TRUE(Object); + EXPECT_EQ(ContentStrings[I], Object->getData()); + } + } + + // Confirm these blobs don't exist in a fresh CAS instance. + std::unique_ptr CAS2 = createObjectStore(); + for (int I = 0, E = IDs.size(); I != E; ++I) { + Optional Object; + EXPECT_THAT_ERROR(CAS2->getProxyOrNone(IDs[I]).moveInto(Object), + Succeeded()); + EXPECT_FALSE(Object); + } + + // Insert into the second CAS and confirm the IDs are stable. Getting them + // should work now. + for (int I = IDs.size(), E = 0; I != E; --I) { + auto &ID = IDs[I - 1]; + auto &Content = ContentStrings[I - 1]; + Optional Node; + ASSERT_THAT_ERROR( + CAS2->store(None, arrayRefFromStringRef(Content)).moveInto(Node), + Succeeded()); + EXPECT_EQ(ID, CAS2->getID(*Node)); + + Optional Object; + ASSERT_THAT_ERROR(CAS2->getProxy(ID).moveInto(Object), Succeeded()); + ASSERT_TRUE(Object); + EXPECT_EQ(Content, Object->getData()); + } +} + +TEST_P(CASTest, NodesBig) { + std::unique_ptr CAS = createObjectStore(); + + // Specifically check near 1MB for objects large enough they're likely to be + // stored externally in an on-disk CAS, and such that one of them will be + // near a page boundary. + SmallString<0> Storage; + constexpr size_t InterestingSize = 1024U * 1024ULL; + constexpr size_t WordSize = sizeof(void *); + + // Start much smaller to account for headers. + constexpr size_t SizeB = InterestingSize - 8 * WordSize; + constexpr size_t SizeE = InterestingSize + 1; + if (Storage.size() < SizeE) + Storage.resize(SizeE, '\01'); + + SmallVector CreatedNodes; + // Avoid checking every size because this is an expensive test. Just check + // for data that is 8B-word-aligned, and one less. Also appending the created + // nodes as the references in the next block to check references are created + // correctly. + for (size_t Size = SizeB; Size < SizeE; Size += WordSize) { + for (bool IsAligned : {false, true}) { + StringRef Data(Storage.data(), Size - (IsAligned ? 0 : 1)); + Optional Node; + ASSERT_THAT_ERROR(CAS->createProxy(CreatedNodes, Data).moveInto(Node), + Succeeded()); + ASSERT_EQ(Data, Node->getData()); + ASSERT_EQ(0, Node->getData().end()[0]); + ASSERT_EQ(Node->getNumReferences(), CreatedNodes.size()); + CreatedNodes.emplace_back(Node->getRef()); + } + } + + for (auto ID : CreatedNodes) + ASSERT_THAT_ERROR(CAS->validate(CAS->getID(ID)), Succeeded()); +} diff --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt index 95bc117757c09..3c1a969970691 100644 --- a/llvm/unittests/CMakeLists.txt +++ b/llvm/unittests/CMakeLists.txt @@ -20,6 +20,7 @@ add_subdirectory(AsmParser) add_subdirectory(BinaryFormat) add_subdirectory(Bitcode) add_subdirectory(Bitstream) +add_subdirectory(CAS) add_subdirectory(CodeGen) add_subdirectory(DebugInfo) add_subdirectory(Debuginfod) diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index 993d57e9ef7b2..8f902913ca328 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -84,6 +84,7 @@ add_llvm_unittest(SupportTests TaskQueueTest.cpp ThreadLocalTest.cpp ThreadPool.cpp + ThreadSafeAllocatorTest.cpp Threading.cpp TimerTest.cpp ToolOutputFileTest.cpp diff --git a/llvm/unittests/Support/ThreadSafeAllocatorTest.cpp b/llvm/unittests/Support/ThreadSafeAllocatorTest.cpp new file mode 100644 index 0000000000000..b0e19e4d92679 --- /dev/null +++ b/llvm/unittests/Support/ThreadSafeAllocatorTest.cpp @@ -0,0 +1,48 @@ +//===- llvm/unittest/Support/ThreadSafeAllocatorTest.cpp ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ThreadSafeAllocator.h" +#include "llvm/Support/ThreadPool.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; + +TEST(ThreadSafeAllocatorTest, AllocWithAlign) { + ThreadSafeAllocator Alloc; + ThreadPool Threads; + + for (unsigned Index = 1; Index < 100; ++Index) + Threads.async( + [&Alloc](unsigned I) { + int *P = (int *)Alloc.Allocate(sizeof(int) * I, alignof(int)); + P[I - 1] = I; + }, + Index); + + Threads.wait(); + + Alloc.applyLocked([](BumpPtrAllocator &Alloc) { + EXPECT_EQ(4950U * sizeof(int), Alloc.getBytesAllocated()); + }); +} + +TEST(ThreadSafeAllocatorTest, SpecificBumpPtrAllocator) { + ThreadSafeAllocator> Alloc; + ThreadPool Threads; + + for (unsigned Index = 1; Index < 100; ++Index) + Threads.async( + [&Alloc](unsigned I) { + int *P = Alloc.Allocate(I); + P[I - 1] = I; + }, + Index); + + Threads.wait(); +}