-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathparse-word-break.js
42 lines (39 loc) · 1.1 KB
/
parse-word-break.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
'use strict';
const utils = require('./utils.js');
const regenerate = require('regenerate');
const parseWordBreak = function(version) {
const source = utils.readDataFile(version, 'word-break');
if (!source) {
return;
}
const map = {
// All code points not explicitly listed have the value `Other` (`XX`).
'Other': regenerate().addRange(0, 0x10FFFF)
};
const lines = source.split('\n');
for (const line of lines) {
if (!line || /^#/.test(line)) {
continue;
}
const data = line.trim().split(';');
const charRange = data[0].replace('..', '-').trim();
const rangeParts = charRange.split('-');
const value = data[1].split('#')[0].trim();
const canonicalName = value;
map[canonicalName] ??= regenerate();
if (rangeParts.length == 2) {
const [from, to] = [
parseInt(rangeParts[0], 16),
parseInt(rangeParts[1], 16),
];
map['Other'].removeRange(from, to);
map[canonicalName].addRange(from, to);
} else {
const codePoint = parseInt(charRange, 16);
map['Other'].remove(codePoint);
map[canonicalName].add(codePoint);
}
}
return map;
};
module.exports = parseWordBreak;