Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve RangeSet initialization performance #75089

Merged
merged 3 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 48 additions & 16 deletions stdlib/public/core/RangeSetRanges.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,57 @@ extension RangeSet {
_storage.sort {
$0.lowerBound < $1.lowerBound
}
var i = 0
while i < _storage.count {
let current = _storage[i]
if i > 0 {
let previous = _storage[i - 1]
if previous.upperBound >= current.lowerBound {
let newUpper = Swift.max(previous.upperBound, current.upperBound)
_storage[i - 1] = previous.lowerBound ..< newUpper
_storage.remove(at: i)
continue
}
}

if current.isEmpty {
_storage.remove(at: i)

// Find the index of the first non-empty range. If all ranges are empty,
// the result is empty.
guard let firstNonEmpty = _storage.firstIndex(where: { $0.isEmpty == false }) else {
_storage = []
return
}

// Swap that non-empty range to be first. (This and the swap in the loop
// might be no-ops, if no empty or overlapping ranges have been found.)
_storage.swapAt(0, firstNonEmpty)

// That single range is now a valid range set, so we set up three sections
// of the storage array:
//
// 1: a processed, valid range set (0...lastValid)
// 2: ranges to discard (lastValid + 1 ..< current)
// 3: unprocessed ranges (current ..< _storage.count)
//
// Section 2 is made up of ranges that are either empty or that overlap
// with the ranges in section 1. By waiting to remove these ranges until
// we've processed the entire array, we avoid needing to constantly
// reshuffle the elements during processing.
var lastValid = 0
var current = firstNonEmpty + 1

while current < _storage.count {
defer { current += 1 }

// Skip over empty ranges.
if _storage[current].isEmpty { continue }

// If the last valid range overlaps with the current range, extend the
// last valid range to cover the current.
if _storage[lastValid].upperBound >= _storage[current].lowerBound {
let newUpper = Swift.max(
_storage[lastValid].upperBound,
_storage[current].upperBound)
_storage[lastValid] = Range(
uncheckedBounds: (_storage[lastValid].lowerBound, newUpper))
} else {
i += 1
// Otherwise, this is a valid new range to add to the range set:
// swap it into place at the end of the valid section.
lastValid += 1
_storage.swapAt(current, lastValid)
}
}

// Now that we've processed the whole array, remove anything left after
// the valid section.
_storage.removeSubrange((lastValid + 1) ..< _storage.count)
}
}
}
Expand Down
41 changes: 41 additions & 0 deletions validation-test/stdlib/RangeSet.swift
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,47 @@ if #available(SwiftStdlib 6.0, *) {
}
return set
}

RangeSetTests.test("initialization") {
// Test coalescing and elimination of empty ranges
do {
let empty = RangeSet(Array(repeating: 0..<0, count: 100))
expectTrue(empty.isEmpty)

let repeated = RangeSet(Array(repeating: 0..<3, count: 100))
expectEqual(repeated, [0..<3])

let singleAfterEmpty = RangeSet(Array(repeating: 0..<0, count: 100) + [0..<3])
expectEqual(singleAfterEmpty, [0..<3])

let contiguousRanges = (0..<100).map { $0 ..< ($0 + 1) }
expectEqual(RangeSet(contiguousRanges), [0..<100])
expectEqual(RangeSet(contiguousRanges.shuffled()), [0..<100])
}

// The `buildRandomRangeSet()` function builds a range set via additions
// and removals. This function creates an array of potentially empty or
// overlapping ranges that can be used to initialize a range set.
func randomRanges() -> [Range<Int>] {
(0..<100).map { _ in
let low = Int.random(in: 0...100)
let count = Int.random(in: 0...20)
return low ..< (low + count)
}
}

for _ in 0..<1000 {
let ranges = randomRanges()
let set = RangeSet(ranges)

// Manually construct a range set for comparison
var comparison = RangeSet<Int>()
for r in ranges {
comparison.insert(contentsOf: r)
}
expectEqual(set, comparison)
}
}

RangeSetTests.test("contains") {
expectFalse(source.contains(0))
Expand Down