forked from rescript-lang/rescript
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathj.ml
402 lines (344 loc) · 12.3 KB
/
j.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
(* Copyright (C) 2015-2016 Bloomberg Finance L.P.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* In addition to the permissions granted to you by the LGPL, you may combine
* or link a "work that uses the Library" with a publicly distributed version
* of this file to produce a combined library or application, then distribute
* that combined work under the terms of your choosing, with no requirement
* to comply with the obligations normally placed on you by section 4 of the
* LGPL version 3 (or the corresponding section of a later version of the LGPL
* should you choose to use a later version).
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *)
(** Javascript IR
It's a subset of Javascript AST specialized for OCaml lambda backend
Note it's not exactly the same as Javascript, the AST itself follows lexical
convention and [Block] is just a sequence of statements, which means it does
not introduce new scope
*)
type label = string
and binop = Js_op.binop
and int_op = Js_op.int_op
and kind = Js_op.kind
and property = Js_op.property
and number = Js_op.number
and mutable_flag = Js_op.mutable_flag
and ident_info = Js_op.ident_info
and exports = Js_op.exports
and tag_info = Js_op.tag_info
and required_modules = Js_op.required_modules
and code_info = Js_op.code_info
(** object literal, if key is ident, in this case, it might be renamed by
Google Closure optimizer,
currently we always use quote
*)
and property_name = Js_op.property_name
and jsint = Js_op.jsint
and ident = Ident.t
and vident =
| Id of ident
| Qualified of ident * kind * string option
(* Since camldot is only available for toplevel module accessors,
we don't need print `A.length$2`
just print `A.length` - it's guarateed to be unique
when the third one is None, it means the whole module
TODO:
invariant, when [kind] is [Runtime], then we can ignore [ident],
since all [runtime] functions are unique, when do the
pattern match we can ignore the first one for simplicity
for example
{[
Qualified (_, Runtime, Some "caml_int_compare")
]}
*)
and exception_ident = ident
and for_ident = ident
and for_direction = Asttypes.direction_flag
and property_map =
(property_name * expression) list
and length_object = Js_op.length_object
and expression_desc =
| Math of string * expression list
| Length of expression * length_object
| Char_of_int of expression
| Char_to_int of expression
| Array_of_size of expression
(* used in [#create_array] primitive, note having
uninitilized array is not as bad as in ocaml,
since GC does not rely on it
*)
| Array_copy of expression (* shallow copy, like [x.slice] *)
| Array_append of expression * expression (* For [caml_array_append]*)
(* | Tag_ml_obj of expression *)
| String_append of expression * expression
| Int_of_boolean of expression
| Anything_to_number of expression
| Bool of bool (* js true/false*)
(* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
[typeof] is an operator
*)
| Typeof of expression
| Caml_not of expression (* 1 - v *)
| Js_not of expression (* !v *)
| String_of_small_int_array of expression
(* String.fromCharCode.apply(null, args) *)
(* Convert JS boolean into OCaml boolean
like [+true], note this ast talks using js
terminnology unless explicity stated
*)
| Json_stringify of expression
(* TODO: in the future, it might make sense to group primitivie by type,
which makes optimizations easier
{[ JSON.stringify(value, replacer[, space]) ]}
*)
| Anything_to_string of expression
(* for debugging utitlites,
TODO: [Dump] is not necessary with this primitive
Note that the semantics is slightly different from [JSON.stringify]
{[
JSON.stringify("x")
]}
{[
""x""
]}
{[
JSON.stringify(undefined)
]}
{[
undefined
]}
{[ '' + undefined
]}
{[ 'undefined'
]}
*)
| Dump of Js_op.level * expression list
(* TODO:
add
{[ Assert of bool * expression ]}
*)
(* to support
val log1 : 'a -> unit
val log2 : 'a -> 'b -> unit
val log3 : 'a -> 'b -> 'c -> unit
*)
(* TODO: Add some primitives so that [js inliner] can do a better job *)
| Seq of expression * expression
| Cond of expression * expression * expression
| Bin of binop * expression * expression
(* [int_op] will guarantee return [int32] bits
https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Operators/Bitwise_Operators *)
(* | Int32_bin of int_op * expression * expression *)
| FlatCall of expression * expression
(* f.apply(null,args) -- Fully applied guaranteed
TODO: once we know args's shape --
if it's know at compile time, we can turn it into
f(args[0], args[1], ... )
*)
| Bind of expression * expression
(* {[ Bind (a,b) ]}
is literally
{[ a.bind(b) ]}
*)
| Call of expression * expression list * Js_call_info.t
(* Analysze over J expression is hard since,
some primitive call is translated
into a plain call, it's better to keep them
*)
| String_access of expression * expression
| Access of expression * expression
(* Invariant:
The second argument has to be type of [int],
This can be constructed either in a static way [E.index] or a dynamic way
[E.access]
*)
| Dot of expression * string * bool
(* The third argument bool indicates whether we should
print it as
a["idd"] -- false
or
a.idd -- true
There are several kinds of properties
1. OCaml module dot (need to be escaped or not)
All exported declarations have to be OCaml identifiers
2. Javascript dot (need to be preserved/or using quote)
*)
| New of expression * expression list option (* TODO: option remove *)
| Var of vident
| Fun of bool * ident list * block * Js_fun_env.t
(* The first parameter by default is false,
it will be true when it's a method
*)
| Str of bool * string
(* A string is UTF-8 encoded, the string may contain
escape sequences.
The first argument is used to mark it is non-pure, please
don't optimize it, since it does have side effec,
examples like "use asm;" and our compiler may generate "error;..."
which is better to leave it alone
The last argument is passed from as `j` from `{j||j}`
*)
| Unicode of string
(* It is escaped string, print delimited by '"'*)
| Raw_js_code of string * code_info
(* literally raw JS code
*)
| Array of expression list * mutable_flag
| Caml_block of expression list * mutable_flag * expression * tag_info
(* The third argument is [tag] , forth is [tag_info] *)
| Caml_uninitialized_obj of expression * expression
(* [tag] and [size] tailed for [Obj.new_block] *)
(* For setter, it still return the value of expression,
we can not use
{[
type 'a access = Get | Set of 'a
]}
in another module, since it will break our code generator
[Caml_block_tag] can return [undefined],
you have to use [E.tag] in a safe way
*)
| Caml_block_tag of expression
| Caml_block_set_tag of expression * expression
| Caml_block_set_length of expression * expression
(* It will just fetch tag, to make it safe, when creating it,
we need apply "|0", we don't do it in the
last step since "|0" can potentially be optimized
*)
| Number of number
| Object of property_map
and for_ident_expression = expression (* pure*)
and finish_ident_expression = expression (* pure *)
(* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/block
block can be nested, specified in ES3
*)
(* Delay some units like [primitive] into JS layer ,
benefit: better cross module inlining, and smaller IR size?
*)
(*
[closure] captured loop mutable values in the outer loop
check if it contains loop mutable values, happens in nested loop
when closured, it's no longer loop mutable value.
which means the outer loop mutable value can not peek into the inner loop
{[
var i = f ();
for(var finish = 32; i < finish; ++i){
}
]}
when [for_ident_expression] is [None], [var i] has to
be initialized outside, so
{[
var i = f ()
(function (xxx){
for(var finish = 32; i < finish; ++i)
}(..i))
]}
This happens rare it's okay
this is because [i] has to be initialized outside, if [j]
contains a block side effect
TODO: create such example
*)
(* Since in OCaml,
[for i = 0 to k end do done ]
k is only evaluated once , to encode this invariant in JS IR,
make sure [ident] is defined in the first b
TODO: currently we guarantee that [bound] was only
excecuted once, should encode this in AST level
*)
(* Can be simplified to keep the semantics of OCaml
For (var i, e, ...){
let j = ...
}
if [i] or [j] is captured inside closure
for (var i , e, ...){
(function (){
})(i)
}
*)
(* Single return is good for ininling..
However, when you do tail-call optmization
you loose the expression oriented semantics
Block is useful for implementing goto
{[
xx:{
break xx;
}
]}
*)
and statement_desc =
| Block of block
| Variable of variable_declaration
(* Function declaration and Variable declaration *)
| Exp of expression
| If of expression * block * block option
| While of label option * expression * block
* Js_closure.t (* check if it contains loop mutable values, happens in nested loop *)
| ForRange of for_ident_expression option * finish_ident_expression *
for_ident * for_direction * block
* Js_closure.t
| Continue of label
| Break (* only used when inline a fucntion *)
| Return of return_expression (* Here we need track back a bit ?, move Return to Function ...
Then we can only have one Return, which is not good *)
| Int_switch of expression * int case_clause list * block option
| String_switch of expression * string case_clause list * block option
| Throw of expression
| Try of block * (exception_ident * block) option * block option
| Debugger
and return_expression = {
(* since in ocaml, it's expression oriented langauge, [return] in
general has no jumps, it only happens when we do
tailcall conversion, in that case there is a jump.
However, currently a single [break] is good to cover
our compilation strategy
Attention: we should not insert [break] arbitrarily, otherwise
it would break the semantics
A more robust signature would be
{[ goto : label option ; ]}
*)
return_value : expression
}
and expression = {
expression_desc : expression_desc;
comment : string option;
}
and statement = {
statement_desc : statement_desc;
comment : string option;
}
and variable_declaration = {
ident : ident ;
value : expression option;
property : property;
ident_info : ident_info;
}
and 'a case_clause = {
case : 'a ;
body : block * bool ; (* true means break *)
}
(* TODO: For efficency: block should not be a list, it should be able to
be concatenated in both ways
*)
and block = statement list
and program = {
name : string;
block : block ;
exports : exports ;
export_set : Ident_set.t ;
}
and deps_program =
{
program : program ;
modules : required_modules ;
side_effect : string option (* None: no, Some reason *)
}