|
55 | 55 | sre.CATEGORY_NOT_WORD: BYTES_ALL - BYTES_WORD,
|
56 | 56 | }
|
57 | 57 |
|
58 |
| -# On Python < 3.4 (including 2.7), the following unicode chars are weird. |
59 |
| -# They are matched by the \W, meaning 'not word', but unicodedata.category(c) |
60 |
| -# returns one of the word categories above. There's special handling below. |
61 |
| -HAS_WEIRD_WORD_CHARS = sys.version_info[:2] < (3, 4) |
| 58 | +# On Python 2, these unicode chars are matched by \W, meaning 'not word', |
| 59 | +# but unicodedata.category(c) returns one of the word categories above. |
62 | 60 | UNICODE_WEIRD_NONWORD_CHARS = set(u"\U00012432\U00012433\U00012456\U00012457")
|
63 | 61 |
|
64 | 62 |
|
@@ -164,16 +162,12 @@ def add_category(self, category):
|
164 | 162 | elif category == sre.CATEGORY_WORD:
|
165 | 163 | self._categories |= UNICODE_WORD_CATEGORIES
|
166 | 164 | self._whitelist_chars.add(u"_")
|
167 |
| - if HAS_WEIRD_WORD_CHARS and self._unicode: # pragma: no cover |
168 |
| - # This code is workaround of weird behavior in |
169 |
| - # specific Python versions and run only on those versions |
| 165 | + if self._unicode and not PY3: # pragma: no cover |
170 | 166 | self._blacklist_chars |= UNICODE_WEIRD_NONWORD_CHARS
|
171 | 167 | elif category == sre.CATEGORY_NOT_WORD:
|
172 | 168 | self._categories |= UNICODE_CATEGORIES - UNICODE_WORD_CATEGORIES
|
173 | 169 | self._blacklist_chars.add(u"_")
|
174 |
| - if HAS_WEIRD_WORD_CHARS and self._unicode: # pragma: no cover |
175 |
| - # This code is workaround of weird behavior in |
176 |
| - # specific Python versions and run only on those versions |
| 170 | + if self._unicode and not PY3: # pragma: no cover |
177 | 171 | self._whitelist_chars |= UNICODE_WEIRD_NONWORD_CHARS
|
178 | 172 | else: # pragma: no cover
|
179 | 173 | raise AssertionError("Unknown character category: %s" % category)
|
|
0 commit comments