Skip to content

Commit f40e666

Browse files
committed
Merge remote-tracking branch 'origin/main' into rebranch
2 parents 134e17c + a67a043 commit f40e666

34 files changed

+299
-181
lines changed

Diff for: cmake/modules/AddSwift.cmake

+7-2
Original file line numberDiff line numberDiff line change
@@ -634,8 +634,13 @@ function(add_libswift_module module)
634634
""
635635
"DEPENDS"
636636
${ARGN})
637-
set(sources ${ALSM_UNPARSED_ARGUMENTS})
638-
list(TRANSFORM sources PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/")
637+
set(raw_sources ${ALSM_UNPARSED_ARGUMENTS})
638+
set(sources)
639+
foreach(raw_source ${raw_sources})
640+
get_filename_component(
641+
raw_source "${raw_source}" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
642+
list(APPEND sources "${raw_source}")
643+
endforeach()
639644

640645
set(target_name "LibSwift${module}")
641646

Diff for: include/swift/AST/ASTContext.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ class ASTContext final {
621621
KnownProtocolKind builtinProtocol,
622622
llvm::function_ref<DeclName (ASTContext &ctx)> initName) const;
623623

624-
/// Retrieve _StringProcessing.Regex.init(_regexString: String).
624+
/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
625625
ConcreteDeclRef getRegexInitDecl(Type regexType) const;
626626

627627
/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.

Diff for: include/swift/AST/Expr.h

+8-3
Original file line numberDiff line numberDiff line change
@@ -966,18 +966,23 @@ class InterpolatedStringLiteralExpr : public LiteralExpr {
966966
class RegexLiteralExpr : public LiteralExpr {
967967
SourceLoc Loc;
968968
StringRef RegexText;
969+
unsigned Version;
969970

970-
RegexLiteralExpr(SourceLoc loc, StringRef regexText, bool isImplicit)
971+
RegexLiteralExpr(SourceLoc loc, StringRef regexText, unsigned version,
972+
bool isImplicit)
971973
: LiteralExpr(ExprKind::RegexLiteral, isImplicit), Loc(loc),
972-
RegexText(regexText) {}
974+
RegexText(regexText), Version(version) {}
973975

974976
public:
975977
static RegexLiteralExpr *createParsed(ASTContext &ctx, SourceLoc loc,
976-
StringRef regexText);
978+
StringRef regexText, unsigned version);
977979

978980
/// Retrieve the raw regex text.
979981
StringRef getRegexText() const { return RegexText; }
980982

983+
/// Retrieve the version of the regex string.
984+
unsigned getVersion() const { return Version; }
985+
981986
SourceRange getSourceRange() const { return Loc; }
982987

983988
static bool classof(const Expr *E) {

Diff for: include/swift/AST/KnownIdentifiers.def

+1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ IDENTIFIER(zero)
252252
// String processing
253253
IDENTIFIER(Regex)
254254
IDENTIFIER_(regexString)
255+
IDENTIFIER(version)
255256
IDENTIFIER_(StringProcessing)
256257

257258
// Distributed actors

Diff for: include/swift/Parse/ExperimentalRegexBridging.h

+34-8
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,43 @@
55
extern "C" {
66
#endif
77

8-
typedef const char *(* ParseRegexStrawperson)(const char *);
9-
10-
void Parser_registerParseRegexStrawperson(ParseRegexStrawperson fn);
11-
bool Parser_hasParseRegexStrawperson();
8+
/// Attempt to lex a regex literal string. Takes the following arguments:
9+
///
10+
/// - CurPtrPtr: A pointer to the current pointer of lexer, which should be the
11+
/// start of the literal. This will be advanced to the point at
12+
/// which the lexer should resume, or will remain the same if this
13+
/// is not a regex literal.
14+
/// - BufferEnd: A pointer to the end of the buffer, which should not be lexed
15+
/// past.
16+
/// - ErrorOut: If an error is encountered, this will be set to the error
17+
/// string.
18+
///
19+
/// Returns: A bool indicating whether lexing was completely erroneous, and
20+
/// cannot be recovered from, or false if there either was no error,
21+
/// or there was a recoverable error.
22+
typedef bool(* RegexLiteralLexingFn)(/*CurPtrPtr*/ const char **,
23+
/*BufferEnd*/ const char *,
24+
/*ErrorOut*/ const char **);
25+
void Parser_registerRegexLiteralLexingFn(RegexLiteralLexingFn fn);
26+
27+
/// Parse a regex literal string. Takes the following arguments:
28+
///
29+
/// - InputPtr: A null-terminated C string of the regex literal.
30+
/// - ErrorOut: A buffer accepting an error string upon error.
31+
/// - VersionOut: A buffer accepting a regex literal format version.
32+
/// - CaptureStructureOut: A buffer accepting a byte sequence representing the
33+
/// capture structure of the literal.
34+
/// - CaptureStructureSize: The size of the capture structure buffer. Must be
35+
/// greater than or equal to `strlen(InputPtr)`.
36+
typedef void(* RegexLiteralParsingFn)(/*InputPtr*/ const char *,
37+
/*ErrorOut*/ const char **,
38+
/*VersionOut*/ unsigned *,
39+
/*CaptureStructureOut*/ char *,
40+
/*CaptureStructureSize*/ unsigned);
41+
void Parser_registerRegexLiteralParsingFn(RegexLiteralParsingFn fn);
1242

1343
#ifdef __cplusplus
1444
} // extern "C"
1545
#endif
1646

1747
#endif // EXPERIMENTAL_REGEX_BRIDGING
18-
19-
20-
//const char* experimental_regex_strawperson(const char *in);
21-

Diff for: include/swift/Parse/Lexer.h

+3-4
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,9 @@ class Lexer {
595595
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
596596
void lexEscapedIdentifier();
597597

598-
void lexRegexLiteral(const char *TokStart);
598+
/// Attempt to lex a regex literal, returning true if a regex literal was
599+
/// lexed, false if this is not a regex literal.
600+
bool tryLexRegexLiteral(const char *TokStart);
599601

600602
void tryLexEditorPlaceholder();
601603
const char *findEndOfCurlyQuoteStringLiteral(const char *,
@@ -612,9 +614,6 @@ class Lexer {
612614

613615
/// Emit diagnostics for single-quote string and suggest replacement
614616
/// with double-quoted equivalent.
615-
///
616-
/// Or, if we're in strawperson mode, we will emit a custom
617-
/// error message instead, determined by the Swift library.
618617
void diagnoseSingleQuoteStringLiteral(const char *TokStart,
619618
const char *TokEnd);
620619

Diff for: lib/AST/ASTContext.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1221,7 +1221,7 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
12211221
auto *spModule = getLoadedModule(Id_StringProcessing);
12221222
DeclName name(*const_cast<ASTContext *>(this),
12231223
DeclBaseName::createConstructor(),
1224-
{Id_regexString});
1224+
{Id_regexString, Id_version});
12251225
SmallVector<ValueDecl *, 1> results;
12261226
spModule->lookupQualified(getRegexType(), DeclNameRef(name),
12271227
NL_IncludeUsableFromInline, results);

Diff for: lib/AST/Expr.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -2246,8 +2246,9 @@ SourceLoc TapExpr::getEndLoc() const {
22462246

22472247
RegexLiteralExpr *
22482248
RegexLiteralExpr::createParsed(ASTContext &ctx, SourceLoc loc,
2249-
StringRef regexText) {
2250-
return new (ctx) RegexLiteralExpr(loc, regexText, /*implicit*/ false);
2249+
StringRef regexText, unsigned version) {
2250+
return new (ctx) RegexLiteralExpr(loc, regexText, version,
2251+
/*implicit*/ false);
22512252
}
22522253

22532254
void swift::simple_display(llvm::raw_ostream &out, const ClosureExpr *CE) {

Diff for: lib/Parse/Lexer.cpp

+46-29
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@
3333

3434
#include <limits>
3535

36+
// Regex lexing delivered via libSwift.
37+
#include "swift/Parse/ExperimentalRegexBridging.h"
38+
static RegexLiteralLexingFn regexLiteralLexingFn = nullptr;
39+
void Parser_registerRegexLiteralLexingFn(RegexLiteralLexingFn fn) {
40+
regexLiteralLexingFn = fn;
41+
}
42+
3643
using namespace swift;
3744
using namespace swift::syntax;
3845

@@ -1951,36 +1958,46 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
19511958
}
19521959
}
19531960

1954-
void Lexer::lexRegexLiteral(const char *TokStart) {
1961+
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
19551962
assert(*TokStart == '\'');
19561963

1957-
bool HadError = false;
1958-
while (true) {
1959-
// Check if we reached the end of the literal without terminating.
1960-
if (CurPtr >= BufferEnd || *CurPtr == '\n' || *CurPtr == '\r') {
1961-
diagnose(TokStart, diag::lex_unterminated_regex);
1962-
return formToken(tok::unknown, TokStart);
1963-
}
1964+
// We need to have experimental string processing enabled, and have the
1965+
// parsing logic for regex literals available.
1966+
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1967+
return false;
19641968

1965-
const auto *CharStart = CurPtr;
1966-
uint32_t CharValue = validateUTF8CharacterAndAdvance(CurPtr, BufferEnd);
1967-
if (CharValue == ~0U) {
1968-
diagnose(CharStart, diag::lex_invalid_utf8);
1969-
HadError = true;
1970-
continue;
1971-
}
1972-
if (CharValue == '\\' && (*CurPtr == '\'' || *CurPtr == '\\')) {
1973-
// Skip escaped delimiter or \.
1974-
CurPtr++;
1975-
} else if (CharValue == '\'') {
1976-
// End of literal, stop.
1977-
break;
1978-
}
1969+
// Ask libswift to try and lex a regex literal.
1970+
// - Ptr will not be advanced if this is not for a regex literal.
1971+
// - ErrStr will be set if there is any error to emit.
1972+
// - CompletelyErroneous will be set if there was an error that cannot be
1973+
// recovered from.
1974+
auto *Ptr = TokStart;
1975+
const char *ErrStr = nullptr;
1976+
bool CompletelyErroneous = regexLiteralLexingFn(&Ptr, BufferEnd, &ErrStr);
1977+
if (ErrStr)
1978+
diagnose(TokStart, diag::regex_literal_parsing_error, ErrStr);
1979+
1980+
// If we didn't make any lexing progress, this isn't a regex literal and we
1981+
// should fallback to lexing as something else.
1982+
if (Ptr == TokStart)
1983+
return false;
1984+
1985+
// Update to point to where we ended regex lexing.
1986+
assert(Ptr > TokStart && Ptr <= BufferEnd);
1987+
CurPtr = Ptr;
1988+
1989+
// If the lexing was completely erroneous, form an unknown token.
1990+
if (CompletelyErroneous) {
1991+
assert(ErrStr);
1992+
formToken(tok::unknown, TokStart);
1993+
return true;
19791994
}
1980-
if (HadError)
1981-
return formToken(tok::unknown, TokStart);
19821995

1996+
// Otherwise, we either had a successful lex, or something that was
1997+
// recoverable.
1998+
assert(ErrStr || CurPtr[-1] == '\'');
19831999
formToken(tok::regex_literal, TokStart);
2000+
return true;
19842001
}
19852002

19862003
/// lexEscapedIdentifier:
@@ -2528,11 +2545,11 @@ void Lexer::lexImpl() {
25282545

25292546
case '\'':
25302547
// If we have experimental string processing enabled, and have the parsing
2531-
// logic for regex literals, lex a single quoted string as a regex literal.
2532-
if (LangOpts.EnableExperimentalStringProcessing &&
2533-
Parser_hasParseRegexStrawperson()) {
2534-
return lexRegexLiteral(TokStart);
2535-
}
2548+
// logic for regex literals, try to lex a single quoted string as a regex
2549+
// literal.
2550+
if (tryLexRegexLiteral(TokStart))
2551+
return;
2552+
25362553
// Otherwise lex as a string literal and emit a diagnostic.
25372554
LLVM_FALLTHROUGH;
25382555
case '"':

Diff for: lib/Parse/ParseRegex.cpp

+12-14
Original file line numberDiff line numberDiff line change
@@ -22,38 +22,36 @@
2222

2323
// Regex parser delivered via libSwift
2424
#include "swift/Parse/ExperimentalRegexBridging.h"
25-
static ParseRegexStrawperson parseRegexStrawperson = nullptr;
26-
void Parser_registerParseRegexStrawperson(ParseRegexStrawperson fn) {
27-
parseRegexStrawperson = fn;
28-
}
29-
// Exposes the presence of the regex parsing function to the lexer.
30-
bool Parser_hasParseRegexStrawperson() {
31-
return parseRegexStrawperson != nullptr;
25+
static RegexLiteralParsingFn regexLiteralParsingFn = nullptr;
26+
void Parser_registerRegexLiteralParsingFn(RegexLiteralParsingFn fn) {
27+
regexLiteralParsingFn = fn;
3228
}
3329

3430
using namespace swift;
3531
using namespace swift::syntax;
3632

3733
ParserResult<Expr> Parser::parseExprRegexLiteral() {
3834
assert(Tok.is(tok::regex_literal));
39-
assert(parseRegexStrawperson);
35+
assert(regexLiteralParsingFn);
4036

4137
SyntaxParsingContext LocalContext(SyntaxContext,
4238
SyntaxKind::RegexLiteralExpr);
43-
// Strip off delimiters.
44-
auto rawText = Tok.getText();
45-
assert(rawText.front() == '\'' && rawText.back() == '\'');
46-
auto regexText = rawText.slice(1, rawText.size() - 1);
39+
40+
auto regexText = Tok.getText();
4741

4842
// Let the Swift library parse the contents, returning an error, or null if
4943
// successful.
5044
// TODO: We need to be able to pass back a source location to emit the error
5145
// at.
52-
auto *errorStr = parseRegexStrawperson(regexText.str().c_str());
46+
const char *errorStr = nullptr;
47+
unsigned version;
48+
regexLiteralParsingFn(regexText.str().c_str(), &errorStr, &version,
49+
/*captureStructureOut*/ nullptr,
50+
/*captureStructureSize*/ 0);
5351
if (errorStr)
5452
diagnose(Tok, diag::regex_literal_parsing_error, errorStr);
5553

5654
auto loc = consumeToken();
5755
return makeParserResult(
58-
RegexLiteralExpr::createParsed(Context, loc, regexText));
56+
RegexLiteralExpr::createParsed(Context, loc, regexText, version));
5957
}

Diff for: lib/SILGen/SILGenApply.cpp

+24-3
Original file line numberDiff line numberDiff line change
@@ -1910,10 +1910,31 @@ buildBuiltinLiteralArgs(SILGenFunction &SGF, SGFContext C,
19101910
RValue string = SGF.emitApplyAllocatingInitializer(
19111911
expr, strInitDecl, std::move(strLiteralArgs),
19121912
/*overriddenSelfType*/ Type(), SGFContext());
1913-
PreparedArguments args(
1914-
ArrayRef<AnyFunctionType::Param>({
1915-
AnyFunctionType::Param(ctx.getStringType())}));
1913+
1914+
// The version of the regex string.
1915+
// %3 = integer_literal $Builtin.IntLiteral <version>
1916+
auto versionIntLiteral =
1917+
ManagedValue::forUnmanaged(SGF.B.createIntegerLiteral(
1918+
expr, SILType::getBuiltinIntegerLiteralType(SGF.getASTContext()),
1919+
expr->getVersion()));
1920+
1921+
using Param = AnyFunctionType::Param;
1922+
auto builtinIntTy = versionIntLiteral.getType().getASTType();
1923+
PreparedArguments versionIntBuiltinArgs(ArrayRef<Param>{Param(builtinIntTy)});
1924+
versionIntBuiltinArgs.add(
1925+
expr, RValue(SGF, {versionIntLiteral}, builtinIntTy));
1926+
1927+
// %4 = function_ref Int.init(_builtinIntegerLiteral: Builtin.IntLiteral)
1928+
// %5 = apply %5(%3, ...) -> $Int
1929+
auto intLiteralInit = ctx.getIntBuiltinInitDecl(ctx.getIntDecl());
1930+
RValue versionInt = SGF.emitApplyAllocatingInitializer(
1931+
expr, intLiteralInit, std::move(versionIntBuiltinArgs),
1932+
/*overriddenSelfType*/ Type(), SGFContext());
1933+
1934+
PreparedArguments args(ArrayRef<Param>{Param(ctx.getStringType()),
1935+
Param(ctx.getIntType())});
19161936
args.add(expr, std::move(string));
1937+
args.add(expr, std::move(versionInt));
19171938
return args;
19181939
}
19191940

Diff for: lib/Sema/CSGen.cpp

+2-5
Original file line numberDiff line numberDiff line change
@@ -1266,18 +1266,15 @@ namespace {
12661266
ctx.Id_Regex.str());
12671267
return Type();
12681268
}
1269-
auto substringType = ctx.getSubstringType();
12701269
auto dynCapturesType = ctx.getDynamicCapturesType();
12711270
if (!dynCapturesType) {
12721271
ctx.Diags.diagnose(E->getLoc(),
12731272
diag::string_processing_lib_missing,
12741273
"DynamicCaptures");
12751274
return Type();
12761275
}
1277-
// TODO: Replace `(Substring, DynamicCaptures)` with type inferred from
1278-
// the regex.
1279-
auto matchType = TupleType::get({substringType, dynCapturesType}, ctx);
1280-
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
1276+
// TODO: Replace `DynamicCaptures` with type inferred from the regex.
1277+
return BoundGenericStructType::get(regexDecl, Type(), {dynCapturesType});
12811278
}
12821279

12831280
Type visitDeclRefExpr(DeclRefExpr *E) {

Diff for: libswift/Sources/CMakeLists.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
# See http://swift.org/LICENSE.txt for license information
77
# See http://swift.org/CONTRIBUTORS.txt for Swift project authors
88

9-
add_subdirectory(ExperimentalRegex)
9+
if(SWIFT_ENABLE_EXPERIMENTAL_STRING_PROCESSING)
10+
add_subdirectory(ExperimentalRegex)
11+
endif()
1012
add_subdirectory(SIL)
1113
add_subdirectory(Optimizer)

Diff for: libswift/Sources/ExperimentalRegex/CMakeLists.txt

+11-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,16 @@
66
# See http://swift.org/LICENSE.txt for license information
77
# See http://swift.org/CONTRIBUTORS.txt for Swift project authors
88

9+
file(GLOB_RECURSE _LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES
10+
"${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}/Sources/_MatchingEngine/*.swift")
11+
set(LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES)
12+
foreach(source ${_LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES})
13+
file(TO_CMAKE_PATH "${source}" source)
14+
list(APPEND LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES ${source})
15+
endforeach()
16+
message(STATUS "Using Experimental String Processing library for libswift ExperimentalRegex (${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}).")
17+
918
add_libswift_module(ExperimentalRegex
10-
Regex.swift
11-
)
19+
"${LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES}"
20+
Regex.swift)
1221

0 commit comments

Comments
 (0)