Skip to content

Commit 2b510e0

Browse files
butterunderflowbobzhang
authored andcommitted
Change char payload (rescript-lang#5759)
* change Pconst_char payload (WIP) * tweak * tweak * representation of char for lambda * lib * bugfix: replace wrong pp * libs * bugfix: replace wrong print * use unsafe_chr to handle possible overflow char * safe print int as char * reduce duplication * (re)use encodeCodepoint to support string_of_int_as_char * some refactor * libs * changelog
1 parent 32a133d commit 2b510e0

38 files changed

+513
-175
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
- Add `loading`, `aria-*` DOM element attributes in `JsxDOM.domProps`: `ariaCurrent`, `ariaInvalid`, `ariaAutocomplete`, etc.
7474
- Change the internal representation of props for the lowercase components to record. https://github.com/rescript-lang/syntax/pull/665
7575
- Add `JsxPPXReactSupport` module to relocate the helper functions for JSX v4 from `rescript-react`
76+
- Change the payload of Pconst_char for type safety. https://github.com/rescript-lang/syntax/pull/709
7677

7778
# 10.1.0-alpha.2
7879

jscomp/core/js_dump.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ and expression_desc cxt ~(level : int) f x : cxt =
630630
match v with
631631
| Float { f } -> Js_number.caml_float_literal_to_js_string f
632632
(* attach string here for float constant folding?*)
633-
| Int { i; c = Some c } -> Format.asprintf "/* %C */%ld" c i
633+
| Int { i; c = Some c } -> Format.asprintf "/* %s */%ld" (Ext_util.string_of_int_as_char c) i
634634
| Int { i; c = None } ->
635635
Int32.to_string i
636636
(* check , js convention with ocaml lexical convention *)

jscomp/core/js_exp_make.mli

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ val method_ :
103103

104104
val econd : ?comment:string -> t -> t -> t -> t
105105

106-
val int : ?comment:string -> ?c:char -> int32 -> t
106+
val int : ?comment:string -> ?c:int -> int32 -> t
107107

108108
val uint32 : ?comment:string -> int32 -> t
109109

jscomp/core/js_of_lam_string.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ module E = Js_exp_make
2929
currently, it follows the same patten of ocaml, [char] is [int]
3030
*)
3131

32-
let const_char (i : char) = E.int ~c:i (Int32.of_int @@ Char.code i)
32+
let const_char (i : int) = E.int ~c:i (Int32.of_int @@ i)
3333

3434
(* string [s[i]] expects to return a [ocaml_char] *)
3535
let ref_string e e1 = E.string_index e e1

jscomp/core/js_of_lam_string.mli

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,6 @@ val ref_byte : J.expression -> J.expression -> J.expression
3434

3535
val set_byte : J.expression -> J.expression -> J.expression -> J.expression
3636

37-
val const_char : char -> J.expression
37+
val const_char : int -> J.expression
3838

3939
val bytes_to_string : J.expression -> J.expression

jscomp/core/js_op.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ type float_lit = { f : string } [@@unboxed]
126126

127127
type number =
128128
| Float of float_lit
129-
| Int of { i : int32; c : char option }
129+
| Int of { i : int32; c : int option }
130130
| Uint of int32
131131

132132
(* becareful when constant folding +/-,

jscomp/core/lam.ml

+2-2
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ let prim ~primitive:(prim : Lam_primitive.t) ~args loc : t =
562562
| ( (Pstringrefs | Pstringrefu),
563563
Const_string { s = a; unicode = false },
564564
Const_int { i = b } ) -> (
565-
try Lift.char (String.get a (Int32.to_int b)) with _ -> default ())
565+
try Lift.char (Char.code (String.get a (Int32.to_int b))) with _ -> default ())
566566
| _ -> default ())
567567
| _ -> (
568568
match prim with
@@ -633,7 +633,7 @@ let rec complete_range (sw_consts : (int * _) list) ~(start : int) ~finish =
633633
let rec eval_const_as_bool (v : Lam_constant.t) : bool =
634634
match v with
635635
| Const_int { i = x } -> x <> 0l
636-
| Const_char x -> Char.code x <> 0
636+
| Const_char x -> x <> 0
637637
| Const_int64 x -> x <> 0L
638638
| Const_js_false | Const_js_null | Const_module_alias | Const_js_undefined ->
639639
false

jscomp/core/lam_constant.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ type t =
4242
| Const_js_true
4343
| Const_js_false
4444
| Const_int of { i : int32; comment : pointer_info }
45-
| Const_char of char
45+
| Const_char of int
4646
| Const_string of { s : string; unicode : bool }
4747
| Const_float of string
4848
| Const_int64 of int64

jscomp/core/lam_constant.mli

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ type t =
3838
| Const_js_true
3939
| Const_js_false
4040
| Const_int of { i : int32; comment : pointer_info }
41-
| Const_char of char
41+
| Const_char of int
4242
| Const_string of { s : string; unicode : bool }
4343
| Const_float of string
4444
| Const_int64 of int64

jscomp/core/lam_pass_lets_dce.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ let lets_helper (count_var : Ident.t -> Lam_pass_count.used_info) lam : Lam.t =
209209
|Lconst((Const_int {i})) ->
210210
let i = Int32.to_int i in
211211
if i < String.length l_s && i >= 0 then
212-
Lam.const ((Const_char l_s.[i]))
212+
Lam.const ((Const_char (Char.code l_s.[i])))
213213
else
214214
Lam.prim ~primitive ~args:[l';r'] loc
215215
| _ ->

jscomp/core/lam_pass_lets_dce.pp.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ let lets_helper (count_var : Ident.t -> Lam_pass_count.used_info) lam : Lam.t =
208208
|Lconst((Const_int {i})) ->
209209
let i = Int32.to_int i in
210210
if i < String.length l_s && i >= 0 then
211-
Lam.const ((Const_char l_s.[i]))
211+
Lam.const ((Const_char (Char.code l_s.[i])))
212212
else
213213
Lam.prim ~primitive ~args:[l';r'] loc
214214
| _ ->

jscomp/core/lam_print.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ let rec struct_const ppf (cst : Lam_constant.t) =
2121
| Const_module_alias -> fprintf ppf "#alias"
2222
| Const_js_undefined -> fprintf ppf "#undefined"
2323
| Const_int { i } -> fprintf ppf "%ld" i
24-
| Const_char c -> fprintf ppf "%C" c
24+
| Const_char i -> fprintf ppf "%s" (Ext_util.string_of_int_as_char i)
2525
| Const_string { s } -> fprintf ppf "%S" s
2626
| Const_float f -> fprintf ppf "%s" f
2727
| Const_int64 n -> fprintf ppf "%LiL" n

jscomp/ext/ext_utf8.ml

+37
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,40 @@ let decode_utf8_string s =
9292

9393
(* let verify s loc =
9494
assert false *)
95+
96+
let encode_codepoint c =
97+
(* reused from syntax/src/res_utf8.ml *)
98+
let h2 = 0b1100_0000 in
99+
let h3 = 0b1110_0000 in
100+
let h4 = 0b1111_0000 in
101+
let cont_mask = 0b0011_1111 in
102+
if c <= 127 then (
103+
let bytes = (Bytes.create [@doesNotRaise]) 1 in
104+
Bytes.unsafe_set bytes 0 (Char.unsafe_chr c);
105+
Bytes.unsafe_to_string bytes)
106+
else if c <= 2047 then (
107+
let bytes = (Bytes.create [@doesNotRaise]) 2 in
108+
Bytes.unsafe_set bytes 0 (Char.unsafe_chr (h2 lor (c lsr 6)));
109+
Bytes.unsafe_set bytes 1
110+
(Char.unsafe_chr (0b1000_0000 lor (c land cont_mask)));
111+
Bytes.unsafe_to_string bytes)
112+
else if c <= 65535 then (
113+
let bytes = (Bytes.create [@doesNotRaise]) 3 in
114+
Bytes.unsafe_set bytes 0 (Char.unsafe_chr (h3 lor (c lsr 12)));
115+
Bytes.unsafe_set bytes 1
116+
(Char.unsafe_chr (0b1000_0000 lor ((c lsr 6) land cont_mask)));
117+
Bytes.unsafe_set bytes 2
118+
(Char.unsafe_chr (0b1000_0000 lor (c land cont_mask)));
119+
Bytes.unsafe_to_string bytes)
120+
else
121+
(* if c <= max then *)
122+
let bytes = (Bytes.create [@doesNotRaise]) 4 in
123+
Bytes.unsafe_set bytes 0 (Char.unsafe_chr (h4 lor (c lsr 18)));
124+
Bytes.unsafe_set bytes 1
125+
(Char.unsafe_chr (0b1000_0000 lor ((c lsr 12) land cont_mask)));
126+
Bytes.unsafe_set bytes 2
127+
(Char.unsafe_chr (0b1000_0000 lor ((c lsr 6) land cont_mask)));
128+
Bytes.unsafe_set bytes 3
129+
(Char.unsafe_chr (0b1000_0000 lor (c land cont_mask)));
130+
Bytes.unsafe_to_string bytes
131+

jscomp/ext/ext_utf8.mli

+2
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,5 @@ val next : string -> remaining:int -> int -> int
3636
exception Invalid_utf8 of string
3737

3838
val decode_utf8_string : string -> int list
39+
40+
val encode_codepoint : int -> string

jscomp/ext/ext_util.ml

+17
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,20 @@ let stats_to_string
4040
num_buckets max_bucket_length
4141
(String.concat ","
4242
(Array.to_list (Array.map string_of_int bucket_histogram)))
43+
44+
let string_of_int_as_char i =
45+
let str = match Char.unsafe_chr i with
46+
| '\'' -> "\\'"
47+
| '\\' -> "\\\\"
48+
| '\n' -> "\\n"
49+
| '\t' -> "\\t"
50+
| '\r' -> "\\r"
51+
| '\b' -> "\\b"
52+
| ' ' .. '~' as c ->
53+
let s = (Bytes.create [@doesNotRaise]) 1 in
54+
Bytes.unsafe_set s 0 c;
55+
Bytes.unsafe_to_string s
56+
| _ -> Ext_utf8.encode_codepoint i
57+
in
58+
Printf.sprintf "\'%s\'" str
59+

jscomp/ext/ext_util.mli

+3
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,6 @@
2525
val power_2_above : int -> int -> int
2626

2727
val stats_to_string : Hashtbl.statistics -> string
28+
29+
val string_of_int_as_char : int -> string
30+

jscomp/ml/ast_helper.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ module Const = struct
3939
let int64 ?(suffix='L') i = integer ~suffix (Int64.to_string i)
4040
let nativeint ?(suffix='n') i = integer ~suffix (Nativeint.to_string i)
4141
let float ?suffix f = Pconst_float (f, suffix)
42-
let char c = Pconst_char c
42+
let char c = Pconst_char (Char.code c)
4343
let string ?quotation_delimiter s = Pconst_string (s, quotation_delimiter)
4444
end
4545

jscomp/ml/asttypes.ml

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
type constant =
1919
Const_int of int
20-
| Const_char of char
20+
| Const_char of int
2121
| Const_string of string * string option
2222
| Const_float of string
2323
| Const_int32 of int32
@@ -70,4 +70,4 @@ let same_arg_label (x : arg_label) y =
7070
begin match y with
7171
| Optional s0 -> s = s0
7272
| _ -> false
73-
end
73+
end

jscomp/ml/matching.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2202,7 +2202,7 @@ let combine_constant names loc arg cst partial ctx def
22022202
call_switcher loc fail arg min_int max_int int_lambda_list names
22032203
| Const_char _ ->
22042204
let int_lambda_list =
2205-
List.map (function Const_char c, l -> (Char.code c, l)
2205+
List.map (function Const_char c, l -> (c, l)
22062206
| _ -> assert false)
22072207
const_lambda_list in
22082208
call_switcher loc fail arg 0 max_int int_lambda_list names

jscomp/ml/parmatch.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ let is_cons = function
379379

380380
let pretty_const c = match c with
381381
| Const_int i -> Printf.sprintf "%d" i
382-
| Const_char c -> Printf.sprintf "%C" c
382+
| Const_char i -> Printf.sprintf "%s" (Pprintast.string_of_int_as_char i)
383383
| Const_string (s, _) -> Printf.sprintf "%S" s
384384
| Const_float f -> Printf.sprintf "%s" f
385385
| Const_int32 i -> Printf.sprintf "%ldl" i

jscomp/ml/parser.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11015,7 +11015,7 @@ let yyact = [|
1101511015
let _1 = (Parsing.peek_val __caml_parser_env 0 : char) in
1101611016
Obj.repr(
1101711017
# 2155 "ml/parser.mly"
11018-
( Pconst_char _1 )
11018+
( Pconst_char (Char.code _1) )
1101911019
# 11020 "ml/parser.ml"
1102011020
: 'constant))
1102111021
; (fun __caml_parser_env ->

jscomp/ml/parser.mly

+1-1
Original file line numberDiff line numberDiff line change
@@ -2152,7 +2152,7 @@ label:
21522152
21532153
constant:
21542154
| INT { let (n, m) = $1 in Pconst_integer (n, m) }
2155-
| CHAR { Pconst_char $1 }
2155+
| CHAR { Pconst_char (Char.code $1) }
21562156
| STRING { let (s, d) = $1 in Pconst_string (s, d) }
21572157
| FLOAT { let (f, m) = $1 in Pconst_float (f, m) }
21582158
;

jscomp/ml/parsetree.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ type constant =
2424
Suffixes [g-z][G-Z] are accepted by the parser.
2525
Suffixes except 'l', 'L' and 'n' are rejected by the typechecker
2626
*)
27-
| Pconst_char of char
27+
| Pconst_char of int
2828
(* 'c' *)
2929
| Pconst_string of string * string option
3030
(* "constant"

jscomp/ml/pprintast.ml

+5-3
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,10 @@ let rec longident f = function
192192

193193
let longident_loc f x = pp f "%a" longident x.txt
194194

195+
let string_of_int_as_char i = Ext_utf8.encode_codepoint i
196+
195197
let constant f = function
196-
| Pconst_char i -> pp f "%C" i
198+
| Pconst_char i -> pp f "%s" (string_of_int_as_char i)
197199
| Pconst_string (i, None) -> pp f "%S" i
198200
| Pconst_string (i, Some delim) -> pp f "{%s|%s|%s}" delim i delim
199201
| Pconst_integer (i, None) -> paren (i.[0]='-') (fun f -> pp f "%s") f i
@@ -770,7 +772,7 @@ and value_description ctxt f x =
770772
pp f "@[<hov2>%a%a@]" (core_type ctxt) x.pval_type
771773
(fun f x ->
772774

773-
# 772 "ml/pprintast.pp.ml"
775+
# 774 "ml/pprintast.pp.ml"
774776
match x.pval_prim with
775777
| first :: second :: _
776778
when Ext_string.first_marshal_char second
@@ -783,7 +785,7 @@ and value_description ctxt f x =
783785
pp f "@ =@ %a" (list constant_string) x.pval_prim
784786

785787

786-
# 787 "ml/pprintast.pp.ml"
788+
# 789 "ml/pprintast.pp.ml"
787789
) x
788790

789791
and extension ctxt f (s, e) =

jscomp/ml/pprintast.mli

+1
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ val pattern: Format.formatter -> Parsetree.pattern -> unit
2424
val signature: Format.formatter -> Parsetree.signature -> unit
2525
val structure: Format.formatter -> Parsetree.structure -> unit
2626
val string_of_structure: Parsetree.structure -> string
27+
val string_of_int_as_char: int -> string

jscomp/ml/pprintast.pp.ml

+3-1
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,10 @@ let rec longident f = function
191191

192192
let longident_loc f x = pp f "%a" longident x.txt
193193

194+
let string_of_int_as_char i = Ext_util.string_of_int_as_char i
195+
194196
let constant f = function
195-
| Pconst_char i -> pp f "%C" i
197+
| Pconst_char i -> pp f "%s" (string_of_int_as_char i)
196198
| Pconst_string (i, None) -> pp f "%S" i
197199
| Pconst_string (i, Some delim) -> pp f "{%s|%s|%s}" delim i delim
198200
| Pconst_integer (i, None) -> paren (i.[0]='-') (fun f -> pp f "%s") f i

jscomp/ml/printast.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ let fmt_char_option f = function
6060
let fmt_constant f x =
6161
match x with
6262
| Pconst_integer (i,m) -> fprintf f "PConst_int (%s,%a)" i fmt_char_option m;
63-
| Pconst_char (c) -> fprintf f "PConst_char %02x" (Char.code c);
63+
| Pconst_char (c) -> fprintf f "PConst_char %02x" c;
6464
| Pconst_string (s, None) -> fprintf f "PConst_string(%S,None)" s;
6565
| Pconst_string (s, Some delim) ->
6666
fprintf f "PConst_string (%S,Some %S)" s delim;

jscomp/ml/printlambda.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ open Lambda
2121

2222
let rec struct_const ppf = function
2323
| Const_base(Const_int n) -> fprintf ppf "%i" n
24-
| Const_base(Const_char c) -> fprintf ppf "%C" c
24+
| Const_base(Const_char i) -> fprintf ppf "%s" (Pprintast.string_of_int_as_char i)
2525
| Const_base(Const_string (s, _)) -> fprintf ppf "%S" s
2626
| Const_immstring s -> fprintf ppf "#%S" s
2727
| Const_base(Const_float f) -> fprintf ppf "%s" f

jscomp/ml/printtyped.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ let fmt_path f x = fprintf f "\"%a\"" fmt_path_aux x;;
5858
let fmt_constant f x =
5959
match x with
6060
| Const_int (i) -> fprintf f "Const_int %d" i;
61-
| Const_char (c) -> fprintf f "Const_char %02x" (Char.code c);
61+
| Const_char (c) -> fprintf f "Const_char %02x" c;
6262
| Const_string (s, None) -> fprintf f "Const_string(%S,None)" s;
6363
| Const_string (s, Some delim) ->
6464
fprintf f "Const_string (%S,Some %S)" s delim;

jscomp/ml/typecore.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,7 @@ and type_pat_aux ~constrs ~labels ~no_existentials ~mode ~explode ~env
10091009
else
10101010
or_ ~loc:gloc
10111011
(constant ~loc:gloc (Pconst_char c1))
1012-
(loop (Char.chr(Char.code c1 + 1)) c2)
1012+
(loop (c1 + 1) c2)
10131013
in
10141014
let p = if c1 <= c2 then loop c1 c2 else loop c2 c1 in
10151015
let p = {p with ppat_loc=loc} in

jscomp/test/gpr_5753.js

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
'use strict';
2+
3+
4+
console.log(/* '文' */25991);
5+
6+
/* Not a pure module */

jscomp/test/gpr_5753.res

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
@@config({
2+
flags : ["-w", "-8"]
3+
})
4+
5+
'文'-> Js.log

jscomp/test/res_debug.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ var v1 = {
7070
z: 3
7171
};
7272

73-
var h = /* '\522' */128522;
73+
var h = /* '😊' */128522;
7474

7575
var hey = "hello, 世界";
7676

jscomp/test/string_unicode_test.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ function f(x) {
4747

4848
eq("File \"string_unicode_test.ml\", line 27, characters 7-14", f(/* '{' */123), 0);
4949

50-
eq("File \"string_unicode_test.ml\", line 28, characters 7-14", f(/* '\333' */333), 2);
50+
eq("File \"string_unicode_test.ml\", line 28, characters 7-14", f(/* 'ō' */333), 2);
5151

52-
eq("File \"string_unicode_test.ml\", line 29, characters 7-14", f(/* '\444' */444), 3);
52+
eq("File \"string_unicode_test.ml\", line 29, characters 7-14", f(/* 'Ƽ' */444), 3);
5353

5454
Mt.from_pair_suites("string_unicode_test.ml", suites.contents);
5555

0 commit comments

Comments
 (0)