Skip to content

Commit d542555

Browse files
committedFeb 22, 2018
Add emoji data
We can do this now that the emoji data is versioned in the same way as the Unicode Standard itself.
1 parent b4c998a commit d542555

10 files changed

+2532
-3
lines changed
 

‎data/11.0.0-emoji-sequences.txt

+855
Large diffs are not rendered by default.

‎data/11.0.0-emoji-zwj-sequences.txt

+839
Large diffs are not rendered by default.

‎data/11.0.0-emoji.txt

+714
Large diffs are not rendered by default.

‎data/resources.js

+7-1
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,13 @@ var resources = [
273273
'bidi-mirroring': 'https://unicode.org/Public/11.0.0/ucd/BidiMirroring-11.0.0d3.txt',
274274
'bidi-brackets': 'https://unicode.org/Public/11.0.0/ucd/BidiBrackets-11.0.0d1.txt',
275275
'line-break': 'https://unicode.org/Public/11.0.0/ucd/LineBreak-11.0.0d10.txt',
276-
'word-break': 'https://unicode.org/Public/11.0.0/ucd/auxiliary/WordBreakProperty-11.0.0d20.txt'
276+
'word-break': 'https://unicode.org/Public/11.0.0/ucd/auxiliary/WordBreakProperty-11.0.0d20.txt',
277+
// Emoji, Emoji_Presentation, Emoji_Modifier, Emoji_Modifier_Base, Emoji_Component, Extended_Pictographic
278+
'emoji': 'https://unicode.org/Public/emoji/11.0/emoji-data.txt',
279+
// Emoji_Keycap_Sequence, Emoji_Flag_Sequence, Emoji_Modifier_Sequence,
280+
'emoji-sequences': 'https://unicode.org/Public/emoji/11.0/emoji-sequences.txt',
281+
// Emoji_ZWJ_Sequence
282+
'emoji-zwj-sequences': 'https://unicode.org/Public/emoji/11.0/emoji-zwj-sequences.txt',
277283
},
278284
];
279285

‎index.js

+14
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ parsers.parseCompositionExclusions = require('./scripts/parse-composition-exclus
1111
parsers.parseLineBreak = require('./scripts/parse-line-break.js');
1212
parsers.parseScriptExtensions = require('./scripts/parse-script-extensions.js');
1313
parsers.parseWordBreak = require('./scripts/parse-word-break.js');
14+
parsers.parseEmoji = require('./scripts/parse-emoji.js');
15+
parsers.parseEmojiSequences = require('./scripts/parse-emoji-sequences.js');
1416
parsers.parseNames = require('./scripts/parse-names.js');
1517
const extend = utils.extend;
1618
const cp = require('cp');
@@ -119,6 +121,18 @@ const generateData = function(version) {
119121
'map': parsers.parseWordBreak(version),
120122
'type': 'Word_Break'
121123
}));
124+
console.log('Parsing Unicode v%s binary emoji properties…', version);
125+
extend(dirMap, utils.writeFiles({
126+
'version': version,
127+
'map': parsers.parseEmoji(version),
128+
'type': 'Binary_Property'
129+
}));
130+
console.log('Parsing Unicode v%s emoji sequence properties…', version);
131+
extend(dirMap, utils.writeFiles({
132+
'version': version,
133+
'map': parsers.parseEmojiSequences(version),
134+
'type': 'Sequence_Property'
135+
}));
122136
console.log('Parsing Unicode v%s `Names`…', version);
123137
extend(dirMap, utils.writeFiles({
124138
'version': version,

‎scripts/download.js

+4-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,10 @@ const TYPES = [
4040
'bidi-mirroring',
4141
'bidi-brackets',
4242
'line-break',
43-
'word-break'
43+
'word-break',
44+
'emoji',
45+
'emoji-sequences',
46+
'emoji-zwj-sequences'
4447
];
4548

4649
for (const resource of resources) {

‎scripts/parse-emoji-sequences.js

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
'use strict';
2+
3+
const utils = require('./utils.js');
4+
5+
const parseEmojiSequences = function(version) {
6+
const source = utils.readDataFile(version, 'emoji-sequences');
7+
if (!source) {
8+
return;
9+
}
10+
const propertyMap = new Map();
11+
const lines = source.split('\n');
12+
lines.forEach(function(line) {
13+
if (!line || /^#/.test(line)) {
14+
return;
15+
}
16+
const data = line.trim().split('; ');
17+
const codePoints = data[0].trim().split(' ').map((hex) => {
18+
return parseInt(hex, 16);
19+
});
20+
const sequence = String.fromCodePoint(...codePoints);
21+
const property = data[1].split('#')[0].trim();
22+
if (propertyMap.has(property)) {
23+
propertyMap.get(property).add(sequence);
24+
} else {
25+
propertyMap.set(property, new Set([sequence]));
26+
}
27+
});
28+
const plainObject = {};
29+
for (const [property, codePoints] of propertyMap) {
30+
plainObject[property] = [...codePoints].sort((a, b) => a - b);
31+
}
32+
return plainObject;
33+
};
34+
35+
module.exports = parseEmojiSequences;

‎scripts/parse-emoji.js

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
'use strict';
2+
3+
const utils = require('./utils.js');
4+
5+
const parseEmoji = function(version) {
6+
const source = utils.readDataFile(version, 'emoji');
7+
if (!source) {
8+
return;
9+
}
10+
const propertyMap = new Map();
11+
const lines = source.split('\n');
12+
lines.forEach(function(line) {
13+
if (!line || /^#/.test(line)) {
14+
return;
15+
}
16+
const data = line.trim().split(' ; ');
17+
const charRange = data[0].replace('..', '-').trim();
18+
const rangeParts = charRange.split('-');
19+
const property = data[1].split('#')[0].trim();
20+
if (rangeParts.length == 2) {
21+
utils.range(
22+
parseInt(rangeParts[0], 16),
23+
parseInt(rangeParts[1], 16)
24+
).forEach(function(codePoint) {
25+
if (propertyMap.has(property)) {
26+
propertyMap.get(property).add(codePoint);
27+
} else {
28+
propertyMap.set(property, new Set([codePoint]));
29+
}
30+
});
31+
} else {
32+
const codePoint = parseInt(rangeParts, 16);
33+
if (propertyMap.has(property)) {
34+
propertyMap.get(property).add(codePoint);
35+
} else {
36+
propertyMap.set(property, new Set([codePoint]));
37+
}
38+
}
39+
});
40+
const plainObject = {};
41+
for (const [property, codePoints] of propertyMap) {
42+
plainObject[property] = [...codePoints].sort((a, b) => a - b);
43+
}
44+
return plainObject;
45+
};
46+
47+
module.exports = parseEmoji;

‎scripts/utils.js

+13-1
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,20 @@ const writeFiles = function(options) {
8383
return;
8484
}
8585
append(dirMap, type, item);
86-
// Create the target directory if it doesn’t exist yet
86+
// Create the target directory if it doesn’t exist yet.
8787
mkdirp.sync(dir);
88+
89+
// Sequence properties are special.
90+
if (type == 'Sequence_Property') {
91+
const sequences = codePoints;
92+
const output = `module.exports=${ gzipInline(map[item]) }`;
93+
fs.writeFileSync(
94+
path.resolve(dir, 'index.js'),
95+
output
96+
);
97+
return;
98+
}
99+
88100
// Save the data to a file
89101
fs.writeFileSync(
90102
path.resolve(dir, 'code-points.js'),

‎templates/README.md

+4
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ require('unicode-<%= version %>/<%= type %>/<%= dir %>/code-points.js'); // look
7171
require('unicode-<%= version %>/<%= type %>/<%= dir %>/code-points.js').get(codePoint);
7272
require('unicode-<%= version %>/<%= type %>/<%= dir %>/symbols.js'); // lookup map from symbol to symbol(s)
7373
require('unicode-<%= version %>/<%= type %>/<%= dir %>/symbols.js').get(symbol);
74+
<%
75+
} else if ('Sequence_Property' == type) {
76+
%>
77+
require('unicode-<%= version %>/<%= type %>/<%= dir %>/index.js'); // array containing a string for each sequence
7478
<%
7579
} else {
7680
%>

0 commit comments

Comments
 (0)
Please sign in to comment.