|
33 | 33 |
|
34 | 34 | #include <limits>
|
35 | 35 |
|
36 |
| -// Regex lexing delivered via libSwift. |
37 |
| -#include "swift/Parse/ExperimentalRegexBridging.h" |
38 |
| -static RegexLiteralLexingFn regexLiteralLexingFn = nullptr; |
39 |
| -void Parser_registerRegexLiteralLexingFn(RegexLiteralLexingFn fn) { |
40 |
| - regexLiteralLexingFn = fn; |
41 |
| -} |
42 |
| - |
43 | 36 | using namespace swift;
|
44 | 37 | using namespace swift::syntax;
|
45 | 38 |
|
@@ -1958,46 +1951,36 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
|
1958 | 1951 | }
|
1959 | 1952 | }
|
1960 | 1953 |
|
1961 |
| -bool Lexer::tryLexRegexLiteral(const char *TokStart) { |
| 1954 | +void Lexer::lexRegexLiteral(const char *TokStart) { |
1962 | 1955 | assert(*TokStart == '\'');
|
1963 | 1956 |
|
1964 |
| - // We need to have experimental string processing enabled, and have the |
1965 |
| - // parsing logic for regex literals available. |
1966 |
| - if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn) |
1967 |
| - return false; |
1968 |
| - |
1969 |
| - // Ask libswift to try and lex a regex literal. |
1970 |
| - // - Ptr will not be advanced if this is not for a regex literal. |
1971 |
| - // - ErrStr will be set if there is any error to emit. |
1972 |
| - // - CompletelyErroneous will be set if there was an error that cannot be |
1973 |
| - // recovered from. |
1974 |
| - auto *Ptr = TokStart; |
1975 |
| - const char *ErrStr = nullptr; |
1976 |
| - bool CompletelyErroneous = regexLiteralLexingFn(&Ptr, BufferEnd, &ErrStr); |
1977 |
| - if (ErrStr) |
1978 |
| - diagnose(TokStart, diag::regex_literal_parsing_error, ErrStr); |
1979 |
| - |
1980 |
| - // If we didn't make any lexing progress, this isn't a regex literal and we |
1981 |
| - // should fallback to lexing as something else. |
1982 |
| - if (Ptr == TokStart) |
1983 |
| - return false; |
1984 |
| - |
1985 |
| - // Update to point to where we ended regex lexing. |
1986 |
| - assert(Ptr > TokStart && Ptr <= BufferEnd); |
1987 |
| - CurPtr = Ptr; |
| 1957 | + bool HadError = false; |
| 1958 | + while (true) { |
| 1959 | + // Check if we reached the end of the literal without terminating. |
| 1960 | + if (CurPtr >= BufferEnd || *CurPtr == '\n' || *CurPtr == '\r') { |
| 1961 | + diagnose(TokStart, diag::lex_unterminated_regex); |
| 1962 | + return formToken(tok::unknown, TokStart); |
| 1963 | + } |
1988 | 1964 |
|
1989 |
| - // If the lexing was completely erroneous, form an unknown token. |
1990 |
| - if (CompletelyErroneous) { |
1991 |
| - assert(ErrStr); |
1992 |
| - formToken(tok::unknown, TokStart); |
1993 |
| - return true; |
| 1965 | + const auto *CharStart = CurPtr; |
| 1966 | + uint32_t CharValue = validateUTF8CharacterAndAdvance(CurPtr, BufferEnd); |
| 1967 | + if (CharValue == ~0U) { |
| 1968 | + diagnose(CharStart, diag::lex_invalid_utf8); |
| 1969 | + HadError = true; |
| 1970 | + continue; |
| 1971 | + } |
| 1972 | + if (CharValue == '\\' && (*CurPtr == '\'' || *CurPtr == '\\')) { |
| 1973 | + // Skip escaped delimiter or \. |
| 1974 | + CurPtr++; |
| 1975 | + } else if (CharValue == '\'') { |
| 1976 | + // End of literal, stop. |
| 1977 | + break; |
| 1978 | + } |
1994 | 1979 | }
|
| 1980 | + if (HadError) |
| 1981 | + return formToken(tok::unknown, TokStart); |
1995 | 1982 |
|
1996 |
| - // Otherwise, we either had a successful lex, or something that was |
1997 |
| - // recoverable. |
1998 |
| - assert(ErrStr || CurPtr[-1] == '\''); |
1999 | 1983 | formToken(tok::regex_literal, TokStart);
|
2000 |
| - return true; |
2001 | 1984 | }
|
2002 | 1985 |
|
2003 | 1986 | /// lexEscapedIdentifier:
|
@@ -2545,11 +2528,11 @@ void Lexer::lexImpl() {
|
2545 | 2528 |
|
2546 | 2529 | case '\'':
|
2547 | 2530 | // If we have experimental string processing enabled, and have the parsing
|
2548 |
| - // logic for regex literals, try to lex a single quoted string as a regex |
2549 |
| - // literal. |
2550 |
| - if (tryLexRegexLiteral(TokStart)) |
2551 |
| - return; |
2552 |
| - |
| 2531 | + // logic for regex literals, lex a single quoted string as a regex literal. |
| 2532 | + if (LangOpts.EnableExperimentalStringProcessing && |
| 2533 | + Parser_hasParseRegexStrawperson()) { |
| 2534 | + return lexRegexLiteral(TokStart); |
| 2535 | + } |
2553 | 2536 | // Otherwise lex as a string literal and emit a diagnostic.
|
2554 | 2537 | LLVM_FALLTHROUGH;
|
2555 | 2538 | case '"':
|
|
0 commit comments