Skip to content

Commit 0fad4f4

Browse files
committed
Use simdutf8 for interacting with files within Git
1 parent a3ead79 commit 0fad4f4

File tree

3 files changed

+33
-14
lines changed

3 files changed

+33
-14
lines changed

Cargo.lock

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ rand = "0.8.5"
3737
rocksdb = { version = "0.22", default-features = false, features = ["snappy"] }
3838
rust-ini = "0.21.1"
3939
serde = { version = "1.0", features = ["derive", "rc"] }
40+
simdutf8 = "0.1.5"
4041
syntect = "5"
4142
tar = "0.4"
4243
time = { version = "0.3", features = ["serde"] }

src/git.rs

+25-14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::{
22
borrow::Cow,
3-
collections::{BTreeMap, VecDeque},
3+
collections::VecDeque,
44
ffi::OsStr,
55
fmt::{self, Arguments, Write},
66
io::ErrorKind,
@@ -157,15 +157,17 @@ impl OpenRepository {
157157
.or_else(|| path.file_name())
158158
.map_or_else(|| Cow::Borrowed(""), OsStr::to_string_lossy);
159159

160-
let content = match (formatted, String::from_utf8(blob.take_data())) {
160+
let content = match (formatted, simdutf8::basic::from_utf8(&blob.data)) {
161161
(true, Err(_)) => Content::Binary(vec![]),
162162
(true, Ok(data)) => Content::Text(Cow::Owned(format_file(
163-
&data,
163+
data,
164164
&extension,
165165
&self.git.syntax_set,
166166
)?)),
167-
(false, Err(e)) => Content::Binary(e.into_bytes()),
168-
(false, Ok(data)) => Content::Text(Cow::Owned(data)),
167+
(false, Err(_)) => Content::Binary(blob.take_data()),
168+
(false, Ok(_data)) => Content::Text(Cow::Owned(unsafe {
169+
String::from_utf8_unchecked(blob.take_data())
170+
})),
169171
};
170172

171173
return Ok(PathDestination::File(FileWithContent {
@@ -295,7 +297,7 @@ impl OpenRepository {
295297
continue;
296298
};
297299

298-
let Ok(content) = std::str::from_utf8(&blob.data) else {
300+
let Ok(content) = simdutf8::basic::from_utf8(&blob.data) else {
299301
continue;
300302
};
301303

@@ -757,7 +759,7 @@ fn fetch_diff_and_stats(
757759
.transpose()?
758760
.unwrap_or_else(|| repo.empty_tree());
759761

760-
let mut diffs = BTreeMap::<_, FileDiff>::new();
762+
let mut diffs = Vec::new();
761763
let mut diff_output = String::new();
762764

763765
let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?;
@@ -795,9 +797,9 @@ fn fetch_diff_and_stats(
795797
diffs.iter().fold(
796798
(0, 0, 0, 0, 0),
797799
|(max_file_name_length, max_change_length, files_changed, insertions, deletions),
798-
(f, stats)| {
800+
stats| {
799801
(
800-
max_file_name_length.max(f.len()),
802+
max_file_name_length.max(stats.path.len()),
801803
max_change_length
802804
.max(((stats.insertions + stats.deletions).ilog10() + 1) as usize),
803805
files_changed + 1,
@@ -811,7 +813,7 @@ fn fetch_diff_and_stats(
811813

812814
let total_changes = insertions + deletions;
813815

814-
for (file, diff) in &diffs {
816+
for diff in &diffs {
815817
let local_changes = diff.insertions + diff.deletions;
816818
let width = WIDTH.min(local_changes);
817819

@@ -829,6 +831,7 @@ fn fetch_diff_and_stats(
829831
let plus_str = "+".repeat(adjusted_addition_width);
830832
let minus_str = "-".repeat(adjusted_deletion_width);
831833

834+
let file = diff.path.as_str();
832835
writeln!(diff_stats, " {file:max_file_name_length$} | {local_changes:max_change_length$} {plus_str}{minus_str}").unwrap();
833836
}
834837

@@ -864,6 +867,7 @@ fn fetch_diff_and_stats(
864867

865868
#[derive(Default, Debug)]
866869
struct FileDiff {
870+
path: String,
867871
insertions: usize,
868872
deletions: usize,
869873
}
@@ -1039,11 +1043,12 @@ trait DiffFormatter {
10391043
struct DiffBuilder<'a, F> {
10401044
output: &'a mut String,
10411045
resource_cache: &'a mut gix::diff::blob::Platform,
1042-
diffs: &'a mut BTreeMap<String, FileDiff>,
1046+
diffs: &'a mut Vec<FileDiff>,
10431047
formatter: F,
10441048
}
10451049

10461050
impl<'a, F: DiffFormatter + Callback> DiffBuilder<'a, F> {
1051+
#[allow(clippy::too_many_lines)]
10471052
fn handle(
10481053
&mut self,
10491054
change: gix::object::tree::diff::Change<'_, '_, '_>,
@@ -1052,7 +1057,11 @@ impl<'a, F: DiffFormatter + Callback> DiffBuilder<'a, F> {
10521057
return Ok(gix::object::tree::diff::Action::Continue);
10531058
}
10541059

1055-
let diff = self.diffs.entry(change.location.to_string()).or_default();
1060+
let mut diff = FileDiff {
1061+
path: change.location.to_string(),
1062+
insertions: 0,
1063+
deletions: 0,
1064+
};
10561065
let change = change.diff(self.resource_cache)?;
10571066

10581067
let prep = change.resource_cache.prepare_diff()?;
@@ -1129,10 +1138,10 @@ impl<'a, F: DiffFormatter + Callback> DiffBuilder<'a, F> {
11291138
.file_header(self.output, format_args!("+++ {new_path}"));
11301139

11311140
let old_source = gix::diff::blob::sources::lines_with_terminator(
1132-
std::str::from_utf8(prep.old.data.as_slice().unwrap_or_default())?,
1141+
simdutf8::basic::from_utf8(prep.old.data.as_slice().unwrap_or_default())?,
11331142
);
11341143
let new_source = gix::diff::blob::sources::lines_with_terminator(
1135-
std::str::from_utf8(prep.new.data.as_slice().unwrap_or_default())?,
1144+
simdutf8::basic::from_utf8(prep.new.data.as_slice().unwrap_or_default())?,
11361145
);
11371146
let input = gix::diff::blob::intern::InternedInput::new(old_source, new_source);
11381147

@@ -1166,6 +1175,8 @@ impl<'a, F: DiffFormatter + Callback> DiffBuilder<'a, F> {
11661175
}
11671176
}
11681177

1178+
self.diffs.push(diff);
1179+
11691180
self.resource_cache.clear_resource_cache_keep_allocation();
11701181
Ok(gix::object::tree::diff::Action::Continue)
11711182
}

0 commit comments

Comments
 (0)