-
Notifications
You must be signed in to change notification settings - Fork 463
/
Copy pathSemanticTokens.ml
469 lines (435 loc) · 17.2 KB
/
SemanticTokens.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
(*
Generally speaking, semantic highlighting here takes care of categorizing identifiers,
since the kind of an identifier is highly context-specific and hard to catch with a grammar.
The big exception is labels, whose location is not represented in the AST
E.g. function definition such as (~foo as _) =>, application (~foo=3) and prop <div foo=3>.
Labels are handled in the grammar, not here.
Punned labels such as (~foo) => are both labels and identifiers. They are overridden here.
There are 2 cases where the grammar and semantic highlighting work jointly.
The styles emitted in the grammar and here need to be kept in sync.
1) For jsx angled brackets, the grammar handles basic cases such as />
whose location is not in the AST.
Instead < and > are handled here. Those would be difficult to disambiguate in a grammar.
2) Most operators are handled in the grammar. Except < and > are handled here.
The reason is again that < and > would be difficult do disambiguate in a grammar.
*)
module Token = struct
(* This needs to stay synced with the same legend in `server.ts` *)
(* See https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textDocument_semanticTokens *)
type tokenType =
| Operator (** < and > *)
| Variable (** let x = *)
| Type (** type t = *)
| JsxTag (** the < and > in <div> *)
| Namespace (** module M = *)
| EnumMember (** variant A or poly variant #A *)
| Property (** {x:...} *)
| JsxLowercase (** div in <div> *)
let tokenTypeToString = function
| Operator -> "0"
| Variable -> "1"
| Type -> "2"
| JsxTag -> "3"
| Namespace -> "4"
| EnumMember -> "5"
| Property -> "6"
| JsxLowercase -> "7"
let tokenTypeDebug = function
| Operator -> "Operator"
| Variable -> "Variable"
| Type -> "Type"
| JsxTag -> "JsxTag"
| Namespace -> "Namespace"
| EnumMember -> "EnumMember"
| Property -> "Property"
| JsxLowercase -> "JsxLowercase"
let tokenModifiersString = "0" (* None at the moment *)
type token = int * int * int * tokenType
type emitter = {
mutable tokens: token list;
mutable lastLine: int;
mutable lastChar: int;
}
let createEmitter () = {tokens = []; lastLine = 0; lastChar = 0}
let add ~line ~char ~length ~type_ e =
e.tokens <- (line, char, length, type_) :: e.tokens
let emitToken buf (line, char, length, type_) e =
let deltaLine = line - e.lastLine in
let deltaChar = if deltaLine = 0 then char - e.lastChar else char in
e.lastLine <- line;
e.lastChar <- char;
if Buffer.length buf > 0 then Buffer.add_char buf ',';
if
deltaLine >= 0 && deltaChar >= 0 && length >= 0
(* Defensive programming *)
then
Buffer.add_string buf
(string_of_int deltaLine ^ "," ^ string_of_int deltaChar ^ ","
^ string_of_int length ^ "," ^ tokenTypeToString type_ ^ ","
^ tokenModifiersString)
let emit e =
let sortedTokens =
e.tokens
|> List.sort (fun (l1, c1, _, _) (l2, c2, _, _) ->
if l1 = l2 then compare c1 c2 else compare l1 l2)
in
let buf = Buffer.create 1 in
sortedTokens |> List.iter (fun t -> e |> emitToken buf t);
Buffer.contents buf
end
let isLowercaseId id =
id <> ""
&&
let c = id.[0] in
c == '_' || (c >= 'a' && c <= 'z')
let isUppercaseId id =
id <> ""
&&
let c = id.[0] in
c >= 'A' && c <= 'Z'
let emitFromRange (posStart, posEnd) ~type_ emitter =
let length =
if fst posStart = fst posEnd then snd posEnd - snd posStart else 0
in
if length > 0 then
emitter
|> Token.add ~line:(fst posStart) ~char:(snd posStart) ~length ~type_
let emitFromLoc ~loc ~type_ emitter =
emitter |> emitFromRange (Loc.range loc) ~type_
let emitLongident ?(backwards = false) ?(jsx = false)
?(lowerCaseToken = if jsx then Token.JsxLowercase else Token.Variable)
?(upperCaseToken = Token.Namespace) ?(lastToken = None) ?(posEnd = None)
~pos ~lid ~debug emitter =
let rec flatten acc lid =
match lid with
| Longident.Lident txt -> txt :: acc
| Ldot (lid, txt) ->
let acc = if jsx && txt = "createElement" then acc else txt :: acc in
flatten acc lid
| _ -> acc
in
let rec loop pos segments =
match segments with
| [id] when isUppercaseId id || isLowercaseId id ->
let type_ =
match lastToken with
| Some type_ -> type_
| None -> if isUppercaseId id then upperCaseToken else lowerCaseToken
in
let posAfter = (fst pos, snd pos + String.length id) in
let posEnd, lenMismatch =
(* There could be a length mismatch when ids are quoted
e.g. variable /"true" or object field {"x":...} *)
match posEnd with
| Some posEnd -> (posEnd, posEnd <> posAfter)
| None -> (posAfter, false)
in
if debug then
Printf.printf "Lident: %s %s%s %s\n" id (Pos.toString pos)
(if lenMismatch then "->" ^ Pos.toString posEnd else "")
(Token.tokenTypeDebug type_);
emitter |> emitFromRange (pos, posEnd) ~type_
| id :: segments when isUppercaseId id || isLowercaseId id ->
let type_ = if isUppercaseId id then upperCaseToken else lowerCaseToken in
if debug then
Printf.printf "Ldot: %s %s %s\n" id (Pos.toString pos)
(Token.tokenTypeDebug type_);
let length = String.length id in
emitter |> emitFromRange (pos, (fst pos, snd pos + length)) ~type_;
loop (fst pos, snd pos + length + 1) segments
| _ -> ()
in
let segments = flatten [] lid in
if backwards then (
let totalLength = segments |> String.concat "." |> String.length in
if snd pos >= totalLength then
loop (fst pos, snd pos - totalLength) segments)
else loop pos segments
let emitVariable ~id ~debug ~loc emitter =
if debug then Printf.printf "Variable: %s %s\n" id (Loc.toString loc);
emitter |> emitFromLoc ~loc ~type_:Variable
let emitJsxOpen ~lid ~debug ~(loc : Location.t) emitter =
if not loc.loc_ghost then
emitter |> emitLongident ~pos:(Loc.start loc) ~lid ~jsx:true ~debug
let emitJsxClose ~lid ~debug ~pos emitter =
emitter |> emitLongident ~backwards:true ~pos ~lid ~jsx:true ~debug
let emitJsxTag ~debug ~name ~pos emitter =
if debug then Printf.printf "JsxTag %s: %s\n" name (Pos.toString pos);
emitter |> emitFromRange (pos, (fst pos, snd pos + 1)) ~type_:Token.JsxTag
let emitType ~lid ~debug ~(loc : Location.t) emitter =
if not loc.loc_ghost then
emitter
|> emitLongident ~lowerCaseToken:Token.Type ~pos:(Loc.start loc) ~lid ~debug
let emitRecordLabel ~(label : Longident.t Location.loc) ~debug emitter =
if not label.loc.loc_ghost then
emitter
|> emitLongident ~lowerCaseToken:Token.Property ~pos:(Loc.start label.loc)
~posEnd:(Some (Loc.end_ label.loc))
~lid:label.txt ~debug
let emitVariant ~(name : Longident.t Location.loc) ~debug emitter =
if not name.loc.loc_ghost then
emitter
|> emitLongident ~lastToken:(Some Token.EnumMember)
~pos:(Loc.start name.loc) ~lid:name.txt ~debug
let command ~debug ~emitter ~path =
let processTypeArg (coreType : Parsetree.core_type) =
if debug then Printf.printf "TypeArg: %s\n" (Loc.toString coreType.ptyp_loc)
in
let typ (iterator : Ast_iterator.iterator) (coreType : Parsetree.core_type) =
match coreType.ptyp_desc with
| Ptyp_constr ({txt = lid; loc}, args) ->
emitter |> emitType ~lid ~debug ~loc;
args |> List.iter processTypeArg;
Ast_iterator.default_iterator.typ iterator coreType
| _ -> Ast_iterator.default_iterator.typ iterator coreType
in
let type_declaration (iterator : Ast_iterator.iterator)
(tydecl : Parsetree.type_declaration) =
emitter
|> emitType ~lid:(Lident tydecl.ptype_name.txt) ~debug
~loc:tydecl.ptype_name.loc;
Ast_iterator.default_iterator.type_declaration iterator tydecl
in
let pat (iterator : Ast_iterator.iterator) (p : Parsetree.pattern) =
match p.ppat_desc with
| Ppat_var {txt = id} ->
if isLowercaseId id then
emitter |> emitVariable ~id ~debug ~loc:p.ppat_loc;
Ast_iterator.default_iterator.pat iterator p
| Ppat_construct ({txt = Lident ("true" | "false")}, _) ->
(* Don't emit true or false *)
Ast_iterator.default_iterator.pat iterator p
| Ppat_record (cases, _) ->
cases
|> List.iter (fun (label, _, _) ->
emitter |> emitRecordLabel ~label ~debug);
Ast_iterator.default_iterator.pat iterator p
| Ppat_construct (name, _) ->
emitter |> emitVariant ~name ~debug;
Ast_iterator.default_iterator.pat iterator p
| Ppat_type {txt = lid; loc} ->
emitter |> emitType ~lid ~debug ~loc;
Ast_iterator.default_iterator.pat iterator p
| _ -> Ast_iterator.default_iterator.pat iterator p
in
let expr (iterator : Ast_iterator.iterator) (e : Parsetree.expression) =
match e.pexp_desc with
| Pexp_ident {txt = lid; loc} ->
if lid <> Lident "not" then
if not loc.loc_ghost then
emitter
|> emitLongident ~pos:(Loc.start loc)
~posEnd:(Some (Loc.end_ loc))
~lid ~debug;
Ast_iterator.default_iterator.expr iterator e
| Pexp_apply {funct = {pexp_desc = Pexp_ident lident; pexp_loc}; args}
when Res_parsetree_viewer.is_jsx_expression e ->
(*
Angled brackets:
- These are handled in the grammar: <> </> </ />
- Here we handle `<` and `>`
Component names:
- handled like other Longitent.t, except lowercase id is marked Token.JsxLowercase
*)
emitter (* --> <div... *)
|> emitJsxTag ~debug ~name:"<"
~pos:
(let pos = Loc.start e.pexp_loc in
(fst pos, snd pos - 1 (* the AST skips the loc of < somehow *)));
emitter |> emitJsxOpen ~lid:lident.txt ~debug ~loc:pexp_loc;
let posOfGreatherthanAfterProps =
let rec loop = function
| (Asttypes.Labelled {txt = "children"}, {Parsetree.pexp_loc}) :: _ ->
Loc.start pexp_loc
| _ :: args -> loop args
| [] -> (* should not happen *) (-1, -1)
in
loop args
in
let posOfFinalGreatherthan =
let pos = Loc.end_ e.pexp_loc in
(fst pos, snd pos - 1)
in
let selfClosing =
fst posOfGreatherthanAfterProps == fst posOfFinalGreatherthan
&& snd posOfGreatherthanAfterProps + 1 == snd posOfFinalGreatherthan
(* there's an off-by one somehow in the AST *)
in
(if not selfClosing then
let lineStart, colStart = Loc.start pexp_loc in
let lineEnd, colEnd = Loc.end_ pexp_loc in
let length = if lineStart = lineEnd then colEnd - colStart else 0 in
let lineEndWhole, colEndWhole = Loc.end_ e.pexp_loc in
if length > 0 && colEndWhole > length then (
emitter
|> emitJsxClose ~debug ~lid:lident.txt
~pos:(lineEndWhole, colEndWhole - 1);
emitter (* <foo ...props > <-- *)
|> emitJsxTag ~debug ~name:">" ~pos:posOfGreatherthanAfterProps;
emitter (* <foo> ... </foo> <-- *)
|> emitJsxTag ~debug ~name:">" ~pos:posOfFinalGreatherthan));
args |> List.iter (fun (_lbl, arg) -> iterator.expr iterator arg)
| Pexp_apply
{
funct =
{
pexp_desc =
Pexp_ident {txt = Longident.Lident (("<" | ">") as op); loc};
};
args = [_; _];
} ->
if debug then
Printf.printf "Binary operator %s %s\n" op (Loc.toString loc);
emitter |> emitFromLoc ~loc ~type_:Operator;
Ast_iterator.default_iterator.expr iterator e
| Pexp_record (cases, _) ->
cases
|> List.filter_map (fun ((label : Longident.t Location.loc), _, _) ->
match label.txt with
| Longident.Lident s when not (Utils.isFirstCharUppercase s) ->
Some label
| _ -> None)
|> List.iter (fun label -> emitter |> emitRecordLabel ~label ~debug);
Ast_iterator.default_iterator.expr iterator e
| Pexp_field (_, label) | Pexp_setfield (_, label, _) ->
emitter |> emitRecordLabel ~label ~debug;
Ast_iterator.default_iterator.expr iterator e
| Pexp_construct ({txt = Lident ("true" | "false")}, _) ->
(* Don't emit true or false *)
Ast_iterator.default_iterator.expr iterator e
| Pexp_construct (name, _) ->
emitter |> emitVariant ~name ~debug;
Ast_iterator.default_iterator.expr iterator e
| _ -> Ast_iterator.default_iterator.expr iterator e
in
let module_expr (iterator : Ast_iterator.iterator)
(me : Parsetree.module_expr) =
match me.pmod_desc with
| Pmod_ident {txt = lid; loc} ->
if not loc.loc_ghost then
emitter |> emitLongident ~pos:(Loc.start loc) ~lid ~debug;
Ast_iterator.default_iterator.module_expr iterator me
| _ -> Ast_iterator.default_iterator.module_expr iterator me
in
let module_binding (iterator : Ast_iterator.iterator)
(mb : Parsetree.module_binding) =
if not mb.pmb_name.loc.loc_ghost then
emitter
|> emitLongident
~pos:(Loc.start mb.pmb_name.loc)
~lid:(Longident.Lident mb.pmb_name.txt) ~debug;
Ast_iterator.default_iterator.module_binding iterator mb
in
let module_declaration (iterator : Ast_iterator.iterator)
(md : Parsetree.module_declaration) =
if not md.pmd_name.loc.loc_ghost then
emitter
|> emitLongident
~pos:(Loc.start md.pmd_name.loc)
~lid:(Longident.Lident md.pmd_name.txt) ~debug;
Ast_iterator.default_iterator.module_declaration iterator md
in
let module_type (iterator : Ast_iterator.iterator)
(mt : Parsetree.module_type) =
match mt.pmty_desc with
| Pmty_ident {txt = lid; loc} ->
if not loc.loc_ghost then
emitter
|> emitLongident ~upperCaseToken:Token.Type ~pos:(Loc.start loc) ~lid
~debug;
Ast_iterator.default_iterator.module_type iterator mt
| _ -> Ast_iterator.default_iterator.module_type iterator mt
in
let module_type_declaration (iterator : Ast_iterator.iterator)
(mtd : Parsetree.module_type_declaration) =
if not mtd.pmtd_name.loc.loc_ghost then
emitter
|> emitLongident ~upperCaseToken:Token.Type
~pos:(Loc.start mtd.pmtd_name.loc)
~lid:(Longident.Lident mtd.pmtd_name.txt) ~debug;
Ast_iterator.default_iterator.module_type_declaration iterator mtd
in
let open_description (iterator : Ast_iterator.iterator)
(od : Parsetree.open_description) =
if not od.popen_lid.loc.loc_ghost then
emitter
|> emitLongident
~pos:(Loc.start od.popen_lid.loc)
~lid:od.popen_lid.txt ~debug;
Ast_iterator.default_iterator.open_description iterator od
in
let label_declaration (iterator : Ast_iterator.iterator)
(ld : Parsetree.label_declaration) =
emitter
|> emitRecordLabel
~label:{loc = ld.pld_name.loc; txt = Longident.Lident ld.pld_name.txt}
~debug;
Ast_iterator.default_iterator.label_declaration iterator ld
in
let constructor_declaration (iterator : Ast_iterator.iterator)
(cd : Parsetree.constructor_declaration) =
emitter
|> emitVariant
~name:{loc = cd.pcd_name.loc; txt = Longident.Lident cd.pcd_name.txt}
~debug;
Ast_iterator.default_iterator.constructor_declaration iterator cd
in
let structure_item (iterator : Ast_iterator.iterator)
(item : Parsetree.structure_item) =
(match item.pstr_desc with
| Pstr_primitive {pval_name = {txt = id; loc}} ->
emitter |> emitVariable ~id ~debug ~loc
| _ -> ());
Ast_iterator.default_iterator.structure_item iterator item
in
let signature_item (iterator : Ast_iterator.iterator)
(item : Parsetree.signature_item) =
(match item.psig_desc with
| Psig_value {pval_name = {txt = id; loc}} ->
emitter |> emitVariable ~id ~debug ~loc
| _ -> ());
Ast_iterator.default_iterator.signature_item iterator item
in
let iterator =
{
Ast_iterator.default_iterator with
constructor_declaration;
expr;
label_declaration;
module_declaration;
module_binding;
module_expr;
module_type;
module_type_declaration;
open_description;
pat;
typ;
type_declaration;
structure_item;
signature_item;
}
in
if Files.classifySourceFile path = Res then (
let parser =
Res_driver.parsing_engine.parse_implementation ~for_printer:false
in
let {Res_driver.parsetree = structure; diagnostics} =
parser ~filename:path
in
if debug then
Printf.printf "structure items:%d diagnostics:%d \n"
(List.length structure) (List.length diagnostics);
iterator.structure iterator structure |> ignore)
else
let parser = Res_driver.parsing_engine.parse_interface ~for_printer:false in
let {Res_driver.parsetree = signature; diagnostics} =
parser ~filename:path
in
if debug then
Printf.printf "signature items:%d diagnostics:%d \n"
(List.length signature) (List.length diagnostics);
iterator.signature iterator signature |> ignore
let semanticTokens ~currentFile =
let emitter = Token.createEmitter () in
command ~emitter ~debug:false ~path:currentFile;
Printf.printf "{\"data\":[%s]}" (Token.emit emitter)