Skip to content

Commit 74e31cb

Browse files
committed
Avoid syntax highlighting overly long lines
1 parent 8aeb684 commit 74e31cb

File tree

1 file changed

+150
-21
lines changed

1 file changed

+150
-21
lines changed

src/git.rs

+150-21
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::{
22
borrow::Cow,
33
ffi::OsStr,
4+
fmt,
45
fmt::Write,
56
path::{Path, PathBuf},
67
sync::Arc,
@@ -18,8 +19,8 @@ use git2::{
1819
use moka::future::Cache;
1920
use parking_lot::Mutex;
2021
use syntect::{
21-
html::{ClassStyle, ClassedHTMLGenerator},
2222
parsing::SyntaxSet,
23+
parsing::{BasicScopeStackOp, ParseState, Scope, ScopeStack, SCOPE_REPO},
2324
util::LinesWithEndings,
2425
};
2526
use time::OffsetDateTime;
@@ -133,7 +134,12 @@ impl OpenRepository {
133134
let content = match (formatted, blob.is_binary()) {
134135
(true, true) => Content::Binary(vec![]),
135136
(true, false) => Content::Text(
136-
format_file(blob.content(), &extension, &self.git.syntax_set)?.into(),
137+
format_file(
138+
&String::from_utf8_lossy(blob.content()),
139+
&extension,
140+
&self.git.syntax_set,
141+
)?
142+
.into(),
137143
),
138144
(false, true) => Content::Binary(blob.content().to_vec()),
139145
(false, false) => Content::Text(
@@ -673,25 +679,151 @@ fn fetch_diff_and_stats(
673679
Ok((diff_plain.freeze(), diff_output, diff_stats))
674680
}
675681

676-
fn format_file(content: &[u8], extension: &str, syntax_set: &SyntaxSet) -> Result<String> {
677-
let content = String::from_utf8_lossy(content);
682+
fn format_file(content: &str, extension: &str, syntax_set: &SyntaxSet) -> Result<String> {
683+
let mut out = String::new();
684+
format_file_inner(&mut out, content, extension, syntax_set, true)?;
685+
Ok(out)
686+
}
678687

688+
// TODO: this is in some serious need of refactoring
689+
fn format_file_inner(
690+
out: &mut String,
691+
content: &str,
692+
extension: &str,
693+
syntax_set: &SyntaxSet,
694+
code_tag: bool,
695+
) -> Result<()> {
679696
let syntax = syntax_set
680697
.find_syntax_by_extension(extension)
681698
.unwrap_or_else(|| syntax_set.find_syntax_plain_text());
682-
let mut html_generator =
683-
ClassedHTMLGenerator::new_with_class_style(syntax, syntax_set, ClassStyle::Spaced);
699+
let mut parse_state = ParseState::new(syntax);
700+
701+
let mut scope_stack = ScopeStack::new();
702+
let mut span_empty = false;
703+
let mut span_start = 0;
704+
let mut open_spans = Vec::new();
705+
706+
for line in LinesWithEndings::from(content) {
707+
if code_tag {
708+
out.push_str("<code>");
709+
}
710+
711+
if line.len() > 2048 {
712+
// avoid highlighting overly complex lines
713+
write!(out, "{}", Escape(line.trim_end()))?;
714+
} else {
715+
let mut cur_index = 0;
716+
let ops = parse_state.parse_line(line, syntax_set)?;
717+
out.reserve(line.len() + ops.len() * 8);
718+
719+
if code_tag {
720+
for scope in &open_spans {
721+
out.push_str("<span class=\"");
722+
scope_to_classes(out, *scope);
723+
out.push_str("\">");
724+
}
725+
}
726+
727+
// mostly copied from syntect, but slightly modified to keep track
728+
// of open spans, so we can open and close them for each line
729+
for &(i, ref op) in &ops {
730+
if i > cur_index {
731+
span_empty = false;
732+
write!(out, "{}", Escape(&line[cur_index..i]))?;
733+
cur_index = i;
734+
}
735+
736+
scope_stack.apply_with_hook(op, |basic_op, _| match basic_op {
737+
BasicScopeStackOp::Push(scope) => {
738+
span_start = out.len();
739+
span_empty = true;
740+
out.push_str("<span class=\"");
741+
open_spans.push(scope);
742+
scope_to_classes(out, scope);
743+
out.push_str("\">");
744+
}
745+
BasicScopeStackOp::Pop => {
746+
open_spans.pop();
747+
if span_empty {
748+
out.truncate(span_start);
749+
} else {
750+
out.push_str("</span>");
751+
}
752+
span_empty = false;
753+
}
754+
})?;
755+
}
756+
757+
let line = line.trim_end();
758+
if line.len() > cur_index {
759+
write!(out, "{}", Escape(&line[cur_index..]))?;
760+
}
761+
762+
if code_tag {
763+
for _scope in &open_spans {
764+
out.push_str("</span>");
765+
}
766+
}
767+
}
684768

685-
for line in LinesWithEndings::from(&content) {
686-
html_generator
687-
.parse_html_for_line_which_includes_newline(line)
688-
.context("Couldn't parse line of file")?;
769+
if code_tag {
770+
out.push_str("</code>\n");
771+
}
772+
}
773+
774+
if !code_tag {
775+
for _scope in &open_spans {
776+
out.push_str("</span>");
777+
}
689778
}
690779

691-
Ok(format!(
692-
"<code>{}</code>",
693-
html_generator.finalize().replace('\n', "</code>\n<code>")
694-
))
780+
Ok(())
781+
}
782+
783+
fn scope_to_classes(s: &mut String, scope: Scope) {
784+
let repo = SCOPE_REPO.lock().unwrap();
785+
for i in 0..(scope.len()) {
786+
let atom = scope.atom_at(i as usize);
787+
let atom_s = repo.atom_str(atom);
788+
if i != 0 {
789+
s.push(' ');
790+
}
791+
s.push_str(atom_s);
792+
}
793+
}
794+
795+
// Copied from syntect as it isn't exposed from there.
796+
pub struct Escape<'a>(pub &'a str);
797+
798+
impl<'a> fmt::Display for Escape<'a> {
799+
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
800+
let Escape(s) = *self;
801+
let pile_o_bits = s;
802+
let mut last = 0;
803+
for (i, ch) in s.bytes().enumerate() {
804+
match ch as char {
805+
'<' | '>' | '&' | '\'' | '"' => {
806+
fmt.write_str(&pile_o_bits[last..i])?;
807+
let s = match ch as char {
808+
'>' => "&gt;",
809+
'<' => "&lt;",
810+
'&' => "&amp;",
811+
'\'' => "&#39;",
812+
'"' => "&quot;",
813+
_ => unreachable!(),
814+
};
815+
fmt.write_str(s)?;
816+
last = i + 1;
817+
}
818+
_ => {}
819+
}
820+
}
821+
822+
if last < s.len() {
823+
fmt.write_str(&pile_o_bits[last..])?;
824+
}
825+
Ok(())
826+
}
695827
}
696828

697829
#[instrument(skip(diff, syntax_set))]
@@ -722,16 +854,13 @@ fn format_diff(diff: &git2::Diff<'_>, syntax_set: &SyntaxSet) -> Result<String>
722854
} else {
723855
Cow::Borrowed("patch")
724856
};
725-
let syntax = syntax_set
726-
.find_syntax_by_extension(&extension)
727-
.unwrap_or_else(|| syntax_set.find_syntax_plain_text());
728-
let mut html_generator =
729-
ClassedHTMLGenerator::new_with_class_style(syntax, syntax_set, ClassStyle::Spaced);
730-
let _res = html_generator.parse_html_for_line_which_includes_newline(&line);
857+
731858
if let Some(class) = class {
732859
let _ = write!(diff_output, r#"<span class="diff-{class}">"#);
733860
}
734-
diff_output.push_str(&html_generator.finalize());
861+
862+
let _res = format_file_inner(&mut diff_output, &line, &extension, syntax_set, false);
863+
735864
if class.is_some() {
736865
diff_output.push_str("</span>");
737866
}

0 commit comments

Comments
 (0)