-
Notifications
You must be signed in to change notification settings - Fork 465
/
Copy pathres_utf8_test.ml
93 lines (85 loc) · 3.15 KB
/
res_utf8_test.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
type utf8Test = {codepoint: int; str: string; size: int}
let utf8CodePointTests =
[|
{codepoint = 0x00; str = "\x00"; size = 1};
{codepoint = 0x01; str = "\x01"; size = 1};
{codepoint = 0x7e; str = "\x7e"; size = 1};
{codepoint = 0x7f; str = "\x7f"; size = 1};
{codepoint = 0x0080; str = "\xc2\x80"; size = 2};
{codepoint = 0x0081; str = "\xc2\x81"; size = 2};
{codepoint = 0x00bf; str = "\xc2\xbf"; size = 2};
{codepoint = 0x00c0; str = "\xc3\x80"; size = 2};
{codepoint = 0x00c1; str = "\xc3\x81"; size = 2};
{codepoint = 0x00c8; str = "\xc3\x88"; size = 2};
{codepoint = 0x00d0; str = "\xc3\x90"; size = 2};
{codepoint = 0x00e0; str = "\xc3\xa0"; size = 2};
{codepoint = 0x00f0; str = "\xc3\xb0"; size = 2};
{codepoint = 0x00f8; str = "\xc3\xb8"; size = 2};
{codepoint = 0x00ff; str = "\xc3\xbf"; size = 2};
{codepoint = 0x0100; str = "\xc4\x80"; size = 2};
{codepoint = 0x07ff; str = "\xdf\xbf"; size = 2};
{codepoint = 0x0400; str = "\xd0\x80"; size = 2};
{codepoint = 0x0800; str = "\xe0\xa0\x80"; size = 3};
{codepoint = 0x0801; str = "\xe0\xa0\x81"; size = 3};
{codepoint = 0x1000; str = "\xe1\x80\x80"; size = 3};
{codepoint = 0xd000; str = "\xed\x80\x80"; size = 3};
{codepoint = 0xd7ff; str = "\xed\x9f\xbf"; size = 3};
{codepoint = 0xe000; str = "\xee\x80\x80"; size = 3};
{codepoint = 0xfffe; str = "\xef\xbf\xbe"; size = 3};
{codepoint = 0xffff; str = "\xef\xbf\xbf"; size = 3};
{codepoint = 0x10000; str = "\xf0\x90\x80\x80"; size = 4};
{codepoint = 0x10001; str = "\xf0\x90\x80\x81"; size = 4};
{codepoint = 0x40000; str = "\xf1\x80\x80\x80"; size = 4};
{codepoint = 0x10fffe; str = "\xf4\x8f\xbf\xbe"; size = 4};
{codepoint = 0x10ffff; str = "\xf4\x8f\xbf\xbf"; size = 4};
{codepoint = 0xFFFD; str = "\xef\xbf\xbd"; size = 3};
|]
let surrogateRange =
[|
{codepoint = 0xFFFD; str = "\xed\xa0\x80"; size = 1};
{codepoint = 0xFFFD; str = "\xed\xbf\xbf"; size = 1};
|]
let testDecode () =
Array.iter
(fun t ->
let len = String.length t.str in
let codepoint, size = Res_utf8.decodeCodePoint 0 t.str len in
assert (codepoint = t.codepoint);
assert (size = t.size))
utf8CodePointTests
let testDecodeSurrogateRange () =
Array.iter
(fun t ->
let len = String.length t.str in
let codepoint, size = Res_utf8.decodeCodePoint 0 t.str len in
assert (codepoint = t.codepoint);
assert (size = t.size))
surrogateRange
let testEncode () =
Array.iter
(fun t ->
let encodedString = Res_utf8.encodeCodePoint t.codepoint in
assert (encodedString = t.str))
utf8CodePointTests
let validCodePointsTests =
[|
(0, true);
(Char.code 'e', true);
(Res_utf8.max, true);
(0xD7FF, true);
(0xD800, false);
(0xDFFF, false);
(0xE000, true);
(Res_utf8.max + 1, false);
(-1, false);
|]
let testIsValidCodePoint () =
Array.iter
(fun (codePoint, t) -> assert (Res_utf8.isValidCodePoint codePoint = t))
validCodePointsTests
let run () =
testDecode ();
testDecodeSurrogateRange ();
testEncode ();
testIsValidCodePoint ();
print_endline "✅ utf8 tests"