Skip to content

Commit 0beef9b

Browse files
committed
Implement partial reindexes
1 parent 9d1b51c commit 0beef9b

File tree

1 file changed

+48
-14
lines changed

1 file changed

+48
-14
lines changed

src/database/indexer.rs

+48-14
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ use ini::Ini;
1212
use time::OffsetDateTime;
1313
use tracing::{error, info, info_span, instrument, warn};
1414

15-
use super::schema::tag::TagTree;
1615
use crate::database::schema::{
1716
commit::Commit,
17+
prefixes::TreePrefix,
1818
repository::{Repository, RepositoryId},
19-
tag::Tag,
19+
tag::{Tag, TagTree},
2020
};
2121

2222
pub fn run(scan_path: &Path, db: &sled::Db) {
@@ -154,6 +154,7 @@ fn update_repository_reflog(scan_path: &Path, db: &sled::Db) {
154154
db_repository.get(),
155155
db,
156156
&git_repository,
157+
false,
157158
) {
158159
error!(%error, "Failed to update reflog for {relative_path}@{reference_name}");
159160
}
@@ -169,39 +170,72 @@ fn branch_index_update(
169170
db_repository: &Repository<'_>,
170171
db: &sled::Db,
171172
git_repository: &git2::Repository,
173+
force_reindex: bool,
172174
) -> Result<(), anyhow::Error> {
173175
info!("Refreshing indexes");
174176

177+
if force_reindex {
178+
db.drop_tree(TreePrefix::commit_id(db_repository.id, reference_name))?;
179+
}
180+
181+
let commit = reference.peel_to_commit()?;
175182
let commit_tree = db_repository.commit_tree(db, reference_name)?;
176183

177-
if let (Some(latest_indexed), Ok(latest_commit)) =
178-
(commit_tree.fetch_latest_one(), reference.peel_to_commit())
179-
{
180-
if latest_commit.id().as_bytes() == &*latest_indexed.get().hash {
184+
let latest_indexed = if let Some(latest_indexed) = commit_tree.fetch_latest_one() {
185+
if commit.id().as_bytes() == &*latest_indexed.get().hash {
181186
info!("No commits since last index");
182187
return Ok(());
183188
}
184-
}
185189

186-
// TODO: only scan revs from the last time we looked
190+
Some(latest_indexed)
191+
} else {
192+
None
193+
};
194+
187195
let mut revwalk = git_repository.revwalk()?;
188196
revwalk.set_sorting(Sort::REVERSE)?;
189197
revwalk.push_ref(reference_name)?;
190198

199+
let tree_len = commit_tree.len();
200+
let mut seen = false;
191201
let mut i = 0;
192202
for rev in revwalk {
193-
let commit = git_repository.find_commit(rev?)?;
203+
let rev = rev?;
204+
205+
if let (false, Some(latest_indexed)) = (seen, &latest_indexed) {
206+
if rev.as_bytes() == &*latest_indexed.get().hash {
207+
seen = true;
208+
}
209+
210+
continue;
211+
}
212+
213+
seen = true;
214+
215+
if ((i + 1) % 25_000) == 0 {
216+
info!("{} commits ingested", i + 1);
217+
}
218+
219+
let commit = git_repository.find_commit(rev)?;
194220
let author = commit.author();
195221
let committer = commit.committer();
196222

197-
Commit::new(&commit, &author, &committer).insert(&commit_tree, i);
223+
Commit::new(&commit, &author, &committer).insert(&commit_tree, tree_len + i);
198224
i += 1;
199225
}
200226

201-
// a complete and utter hack to remove potentially dropped commits from our tree,
202-
// we'll need to add `clear()` to sled's tx api to remove this
203-
for to_remove in (i + 1)..(i + 100) {
204-
commit_tree.remove(to_remove.to_be_bytes())?;
227+
if !seen && !force_reindex {
228+
warn!("Detected converged history, forcing reindex");
229+
230+
return branch_index_update(
231+
reference,
232+
reference_name,
233+
relative_path,
234+
db_repository,
235+
db,
236+
git_repository,
237+
true,
238+
);
205239
}
206240

207241
Ok(())

0 commit comments

Comments
 (0)