-
Notifications
You must be signed in to change notification settings - Fork 10.5k
/
Copy pathUnicodeExtendedGraphemeClusters.cpp.gyb
121 lines (97 loc) · 4.1 KB
/
UnicodeExtendedGraphemeClusters.cpp.gyb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
%# -*- mode: C++ -*-
%# Ignore the following admonition; it applies to the resulting .cpp file only
//// Automatically Generated From UnicodeExtendedGraphemeClusters.cpp.gyb.
//// Do Not Edit Directly!
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
%{
import re
# Grapheme_Cluster_Break property. An array of tuples (startCodePoint,
# endCodePoint, value).
graphemeBreakProperty = []
with open(unicodeGraphemeBreakPropertyFile, 'rb') as f:
for line in f:
# Strip comments.
line = re.sub('#.*', '', line)
# Single code point?
m = re.match('([0-9A-F]+) +; +([a-zA-Z]+) ', line)
if m:
codePoint = int(m.group(1), 16)
value = m.group(2)
graphemeBreakProperty += [(codePoint, codePoint, value)]
continue
# Range of code points?
m = re.match('([0-9A-F]+)..([0-9A-F]+) +; +([a-zA-Z_]+) ', line)
if m:
startCodePoint = int(m.group(1), 16)
endCodePoint = int(m.group(2), 16)
value = m.group(3)
graphemeBreakProperty += [(startCodePoint, endCodePoint, value)]
}%
#include "swift/Basic/Unicode.h"
swift::unicode::GraphemeClusterBreakProperty
swift::unicode::getGraphemeClusterBreakProperty(uint32_t C) {
// FIXME: replace linear search with a trie lookup.
% for startCodePoint,endCodePoint,value in graphemeBreakProperty:
% if startCodePoint == 0:
if (C <= ${endCodePoint})
% else:
if (C >= ${startCodePoint} && C <= ${endCodePoint})
% end
return GraphemeClusterBreakProperty::${value};
% end
return GraphemeClusterBreakProperty::Other;
}
%{
# The order should be consistent with 'GraphemeClusterBreakProperty' enum.
anyGraphemePropertyValue = [
'Other', 'CR', 'LF', 'Control', 'Extend', 'Regional_Indicator', 'Prepend',
'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT',
]
# Rules to determine extended grapheme cluster boundaries, as defined in
# 'Grapheme Break Chart', ucd/auxiliary/GraphemeBreakTest.html, Unicode 6.3.0.
extendedGraphemeClusterRules = [
( [ 'CR' ], 'no_boundary', [ 'LF' ] ),
( [ 'Control', 'CR', 'LF' ], 'boundary', anyGraphemePropertyValue ),
( anyGraphemePropertyValue, 'boundary', [ 'Control', 'CR', 'LF' ] ),
( [ 'L' ], 'no_boundary', [ 'L', 'V', 'LV', 'LVT' ] ),
( [ 'LV', 'V' ], 'no_boundary', [ 'V', 'T' ] ),
( [ 'LVT', 'T' ], 'no_boundary', [ 'T' ] ),
( [ 'Regional_Indicator' ], 'no_boundary', [ 'Regional_Indicator' ] ),
( anyGraphemePropertyValue, 'no_boundary', [ 'Extend' ] ),
( anyGraphemePropertyValue, 'no_boundary', [ 'SpacingMark' ] ),
( [ 'Prepend' ], 'no_boundary', anyGraphemePropertyValue ),
( anyGraphemePropertyValue, 'boundary', anyGraphemePropertyValue ),
]
# Expand the rules into a matrix.
extendedGraphemeClusterRulesMatrix = {}
for first in anyGraphemePropertyValue:
extendedGraphemeClusterRulesMatrix[first] = \
dict.fromkeys(anyGraphemePropertyValue, None)
for firstList,action,secondList in reversed(extendedGraphemeClusterRules):
for first in firstList:
for second in secondList:
extendedGraphemeClusterRulesMatrix[first][second] = action
# Make sure we can pack one row of the matrix into a 'uint16_t'.
assert(len(anyGraphemePropertyValue) <= 16)
}%
uint16_t swift::unicode::ExtendedGraphemeClusterNoBoundaryRulesMatrix[] = {
% for first in anyGraphemePropertyValue:
% # Retrieve a row that corresponds to this first code point.
% row = extendedGraphemeClusterRulesMatrix[first]
% # Change strings into bits.
% bits = [ row[second] == 'no_boundary' for second in anyGraphemePropertyValue ]
% # Pack bits into an integer.
% packed = sum([ bits[i] * pow(2, i) for i in range(0, len(bits)) ])
${packed},
% end
};