Skip to content

Commit 9511994

Browse files
committed
Revert "Merge pull request #40595 from hamishknight/straw-bales"
This reverts commit a67a043, reversing changes made to 9965df7. This commit or the earlier commit this commit is based on (#40531) broke the incremental bot.
1 parent 296e38f commit 9511994

34 files changed

+181
-299
lines changed

cmake/modules/AddSwift.cmake

+2-7
Original file line numberDiff line numberDiff line change
@@ -634,13 +634,8 @@ function(add_libswift_module module)
634634
""
635635
"DEPENDS"
636636
${ARGN})
637-
set(raw_sources ${ALSM_UNPARSED_ARGUMENTS})
638-
set(sources)
639-
foreach(raw_source ${raw_sources})
640-
get_filename_component(
641-
raw_source "${raw_source}" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
642-
list(APPEND sources "${raw_source}")
643-
endforeach()
637+
set(sources ${ALSM_UNPARSED_ARGUMENTS})
638+
list(TRANSFORM sources PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/")
644639

645640
set(target_name "LibSwift${module}")
646641

include/swift/AST/ASTContext.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ class ASTContext final {
621621
KnownProtocolKind builtinProtocol,
622622
llvm::function_ref<DeclName (ASTContext &ctx)> initName) const;
623623

624-
/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
624+
/// Retrieve _StringProcessing.Regex.init(_regexString: String).
625625
ConcreteDeclRef getRegexInitDecl(Type regexType) const;
626626

627627
/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.

include/swift/AST/Expr.h

+3-8
Original file line numberDiff line numberDiff line change
@@ -966,23 +966,18 @@ class InterpolatedStringLiteralExpr : public LiteralExpr {
966966
class RegexLiteralExpr : public LiteralExpr {
967967
SourceLoc Loc;
968968
StringRef RegexText;
969-
unsigned Version;
970969

971-
RegexLiteralExpr(SourceLoc loc, StringRef regexText, unsigned version,
972-
bool isImplicit)
970+
RegexLiteralExpr(SourceLoc loc, StringRef regexText, bool isImplicit)
973971
: LiteralExpr(ExprKind::RegexLiteral, isImplicit), Loc(loc),
974-
RegexText(regexText), Version(version) {}
972+
RegexText(regexText) {}
975973

976974
public:
977975
static RegexLiteralExpr *createParsed(ASTContext &ctx, SourceLoc loc,
978-
StringRef regexText, unsigned version);
976+
StringRef regexText);
979977

980978
/// Retrieve the raw regex text.
981979
StringRef getRegexText() const { return RegexText; }
982980

983-
/// Retrieve the version of the regex string.
984-
unsigned getVersion() const { return Version; }
985-
986981
SourceRange getSourceRange() const { return Loc; }
987982

988983
static bool classof(const Expr *E) {

include/swift/AST/KnownIdentifiers.def

-1
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ IDENTIFIER(zero)
252252
// String processing
253253
IDENTIFIER(Regex)
254254
IDENTIFIER_(regexString)
255-
IDENTIFIER(version)
256255
IDENTIFIER_(StringProcessing)
257256

258257
// Distributed actors

include/swift/Parse/ExperimentalRegexBridging.h

+8-34
Original file line numberDiff line numberDiff line change
@@ -5,43 +5,17 @@
55
extern "C" {
66
#endif
77

8-
/// Attempt to lex a regex literal string. Takes the following arguments:
9-
///
10-
/// - CurPtrPtr: A pointer to the current pointer of lexer, which should be the
11-
/// start of the literal. This will be advanced to the point at
12-
/// which the lexer should resume, or will remain the same if this
13-
/// is not a regex literal.
14-
/// - BufferEnd: A pointer to the end of the buffer, which should not be lexed
15-
/// past.
16-
/// - ErrorOut: If an error is encountered, this will be set to the error
17-
/// string.
18-
///
19-
/// Returns: A bool indicating whether lexing was completely erroneous, and
20-
/// cannot be recovered from, or false if there either was no error,
21-
/// or there was a recoverable error.
22-
typedef bool(* RegexLiteralLexingFn)(/*CurPtrPtr*/ const char **,
23-
/*BufferEnd*/ const char *,
24-
/*ErrorOut*/ const char **);
25-
void Parser_registerRegexLiteralLexingFn(RegexLiteralLexingFn fn);
26-
27-
/// Parse a regex literal string. Takes the following arguments:
28-
///
29-
/// - InputPtr: A null-terminated C string of the regex literal.
30-
/// - ErrorOut: A buffer accepting an error string upon error.
31-
/// - VersionOut: A buffer accepting a regex literal format version.
32-
/// - CaptureStructureOut: A buffer accepting a byte sequence representing the
33-
/// capture structure of the literal.
34-
/// - CaptureStructureSize: The size of the capture structure buffer. Must be
35-
/// greater than or equal to `strlen(InputPtr)`.
36-
typedef void(* RegexLiteralParsingFn)(/*InputPtr*/ const char *,
37-
/*ErrorOut*/ const char **,
38-
/*VersionOut*/ unsigned *,
39-
/*CaptureStructureOut*/ char *,
40-
/*CaptureStructureSize*/ unsigned);
41-
void Parser_registerRegexLiteralParsingFn(RegexLiteralParsingFn fn);
8+
typedef const char *(* ParseRegexStrawperson)(const char *);
9+
10+
void Parser_registerParseRegexStrawperson(ParseRegexStrawperson fn);
11+
bool Parser_hasParseRegexStrawperson();
4212

4313
#ifdef __cplusplus
4414
} // extern "C"
4515
#endif
4616

4717
#endif // EXPERIMENTAL_REGEX_BRIDGING
18+
19+
20+
//const char* experimental_regex_strawperson(const char *in);
21+

include/swift/Parse/Lexer.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -595,9 +595,7 @@ class Lexer {
595595
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
596596
void lexEscapedIdentifier();
597597

598-
/// Attempt to lex a regex literal, returning true if a regex literal was
599-
/// lexed, false if this is not a regex literal.
600-
bool tryLexRegexLiteral(const char *TokStart);
598+
void lexRegexLiteral(const char *TokStart);
601599

602600
void tryLexEditorPlaceholder();
603601
const char *findEndOfCurlyQuoteStringLiteral(const char *,
@@ -614,6 +612,9 @@ class Lexer {
614612

615613
/// Emit diagnostics for single-quote string and suggest replacement
616614
/// with double-quoted equivalent.
615+
///
616+
/// Or, if we're in strawperson mode, we will emit a custom
617+
/// error message instead, determined by the Swift library.
617618
void diagnoseSingleQuoteStringLiteral(const char *TokStart,
618619
const char *TokEnd);
619620

lib/AST/ASTContext.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1221,7 +1221,7 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
12211221
auto *spModule = getLoadedModule(Id_StringProcessing);
12221222
DeclName name(*const_cast<ASTContext *>(this),
12231223
DeclBaseName::createConstructor(),
1224-
{Id_regexString, Id_version});
1224+
{Id_regexString});
12251225
SmallVector<ValueDecl *, 1> results;
12261226
spModule->lookupQualified(getRegexType(), DeclNameRef(name),
12271227
NL_IncludeUsableFromInline, results);

lib/AST/Expr.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -2246,9 +2246,8 @@ SourceLoc TapExpr::getEndLoc() const {
22462246

22472247
RegexLiteralExpr *
22482248
RegexLiteralExpr::createParsed(ASTContext &ctx, SourceLoc loc,
2249-
StringRef regexText, unsigned version) {
2250-
return new (ctx) RegexLiteralExpr(loc, regexText, version,
2251-
/*implicit*/ false);
2249+
StringRef regexText) {
2250+
return new (ctx) RegexLiteralExpr(loc, regexText, /*implicit*/ false);
22522251
}
22532252

22542253
void swift::simple_display(llvm::raw_ostream &out, const ClosureExpr *CE) {

lib/Parse/Lexer.cpp

+29-46
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,6 @@
3333

3434
#include <limits>
3535

36-
// Regex lexing delivered via libSwift.
37-
#include "swift/Parse/ExperimentalRegexBridging.h"
38-
static RegexLiteralLexingFn regexLiteralLexingFn = nullptr;
39-
void Parser_registerRegexLiteralLexingFn(RegexLiteralLexingFn fn) {
40-
regexLiteralLexingFn = fn;
41-
}
42-
4336
using namespace swift;
4437
using namespace swift::syntax;
4538

@@ -1958,46 +1951,36 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
19581951
}
19591952
}
19601953

1961-
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
1954+
void Lexer::lexRegexLiteral(const char *TokStart) {
19621955
assert(*TokStart == '\'');
19631956

1964-
// We need to have experimental string processing enabled, and have the
1965-
// parsing logic for regex literals available.
1966-
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1967-
return false;
1968-
1969-
// Ask libswift to try and lex a regex literal.
1970-
// - Ptr will not be advanced if this is not for a regex literal.
1971-
// - ErrStr will be set if there is any error to emit.
1972-
// - CompletelyErroneous will be set if there was an error that cannot be
1973-
// recovered from.
1974-
auto *Ptr = TokStart;
1975-
const char *ErrStr = nullptr;
1976-
bool CompletelyErroneous = regexLiteralLexingFn(&Ptr, BufferEnd, &ErrStr);
1977-
if (ErrStr)
1978-
diagnose(TokStart, diag::regex_literal_parsing_error, ErrStr);
1979-
1980-
// If we didn't make any lexing progress, this isn't a regex literal and we
1981-
// should fallback to lexing as something else.
1982-
if (Ptr == TokStart)
1983-
return false;
1984-
1985-
// Update to point to where we ended regex lexing.
1986-
assert(Ptr > TokStart && Ptr <= BufferEnd);
1987-
CurPtr = Ptr;
1957+
bool HadError = false;
1958+
while (true) {
1959+
// Check if we reached the end of the literal without terminating.
1960+
if (CurPtr >= BufferEnd || *CurPtr == '\n' || *CurPtr == '\r') {
1961+
diagnose(TokStart, diag::lex_unterminated_regex);
1962+
return formToken(tok::unknown, TokStart);
1963+
}
19881964

1989-
// If the lexing was completely erroneous, form an unknown token.
1990-
if (CompletelyErroneous) {
1991-
assert(ErrStr);
1992-
formToken(tok::unknown, TokStart);
1993-
return true;
1965+
const auto *CharStart = CurPtr;
1966+
uint32_t CharValue = validateUTF8CharacterAndAdvance(CurPtr, BufferEnd);
1967+
if (CharValue == ~0U) {
1968+
diagnose(CharStart, diag::lex_invalid_utf8);
1969+
HadError = true;
1970+
continue;
1971+
}
1972+
if (CharValue == '\\' && (*CurPtr == '\'' || *CurPtr == '\\')) {
1973+
// Skip escaped delimiter or \.
1974+
CurPtr++;
1975+
} else if (CharValue == '\'') {
1976+
// End of literal, stop.
1977+
break;
1978+
}
19941979
}
1980+
if (HadError)
1981+
return formToken(tok::unknown, TokStart);
19951982

1996-
// Otherwise, we either had a successful lex, or something that was
1997-
// recoverable.
1998-
assert(ErrStr || CurPtr[-1] == '\'');
19991983
formToken(tok::regex_literal, TokStart);
2000-
return true;
20011984
}
20021985

20031986
/// lexEscapedIdentifier:
@@ -2545,11 +2528,11 @@ void Lexer::lexImpl() {
25452528

25462529
case '\'':
25472530
// If we have experimental string processing enabled, and have the parsing
2548-
// logic for regex literals, try to lex a single quoted string as a regex
2549-
// literal.
2550-
if (tryLexRegexLiteral(TokStart))
2551-
return;
2552-
2531+
// logic for regex literals, lex a single quoted string as a regex literal.
2532+
if (LangOpts.EnableExperimentalStringProcessing &&
2533+
Parser_hasParseRegexStrawperson()) {
2534+
return lexRegexLiteral(TokStart);
2535+
}
25532536
// Otherwise lex as a string literal and emit a diagnostic.
25542537
LLVM_FALLTHROUGH;
25552538
case '"':

lib/Parse/ParseRegex.cpp

+14-12
Original file line numberDiff line numberDiff line change
@@ -22,36 +22,38 @@
2222

2323
// Regex parser delivered via libSwift
2424
#include "swift/Parse/ExperimentalRegexBridging.h"
25-
static RegexLiteralParsingFn regexLiteralParsingFn = nullptr;
26-
void Parser_registerRegexLiteralParsingFn(RegexLiteralParsingFn fn) {
27-
regexLiteralParsingFn = fn;
25+
static ParseRegexStrawperson parseRegexStrawperson = nullptr;
26+
void Parser_registerParseRegexStrawperson(ParseRegexStrawperson fn) {
27+
parseRegexStrawperson = fn;
28+
}
29+
// Exposes the presence of the regex parsing function to the lexer.
30+
bool Parser_hasParseRegexStrawperson() {
31+
return parseRegexStrawperson != nullptr;
2832
}
2933

3034
using namespace swift;
3135
using namespace swift::syntax;
3236

3337
ParserResult<Expr> Parser::parseExprRegexLiteral() {
3438
assert(Tok.is(tok::regex_literal));
35-
assert(regexLiteralParsingFn);
39+
assert(parseRegexStrawperson);
3640

3741
SyntaxParsingContext LocalContext(SyntaxContext,
3842
SyntaxKind::RegexLiteralExpr);
39-
40-
auto regexText = Tok.getText();
43+
// Strip off delimiters.
44+
auto rawText = Tok.getText();
45+
assert(rawText.front() == '\'' && rawText.back() == '\'');
46+
auto regexText = rawText.slice(1, rawText.size() - 1);
4147

4248
// Let the Swift library parse the contents, returning an error, or null if
4349
// successful.
4450
// TODO: We need to be able to pass back a source location to emit the error
4551
// at.
46-
const char *errorStr = nullptr;
47-
unsigned version;
48-
regexLiteralParsingFn(regexText.str().c_str(), &errorStr, &version,
49-
/*captureStructureOut*/ nullptr,
50-
/*captureStructureSize*/ 0);
52+
auto *errorStr = parseRegexStrawperson(regexText.str().c_str());
5153
if (errorStr)
5254
diagnose(Tok, diag::regex_literal_parsing_error, errorStr);
5355

5456
auto loc = consumeToken();
5557
return makeParserResult(
56-
RegexLiteralExpr::createParsed(Context, loc, regexText, version));
58+
RegexLiteralExpr::createParsed(Context, loc, regexText));
5759
}

lib/SILGen/SILGenApply.cpp

+3-24
Original file line numberDiff line numberDiff line change
@@ -1910,31 +1910,10 @@ buildBuiltinLiteralArgs(SILGenFunction &SGF, SGFContext C,
19101910
RValue string = SGF.emitApplyAllocatingInitializer(
19111911
expr, strInitDecl, std::move(strLiteralArgs),
19121912
/*overriddenSelfType*/ Type(), SGFContext());
1913-
1914-
// The version of the regex string.
1915-
// %3 = integer_literal $Builtin.IntLiteral <version>
1916-
auto versionIntLiteral =
1917-
ManagedValue::forUnmanaged(SGF.B.createIntegerLiteral(
1918-
expr, SILType::getBuiltinIntegerLiteralType(SGF.getASTContext()),
1919-
expr->getVersion()));
1920-
1921-
using Param = AnyFunctionType::Param;
1922-
auto builtinIntTy = versionIntLiteral.getType().getASTType();
1923-
PreparedArguments versionIntBuiltinArgs(ArrayRef<Param>{Param(builtinIntTy)});
1924-
versionIntBuiltinArgs.add(
1925-
expr, RValue(SGF, {versionIntLiteral}, builtinIntTy));
1926-
1927-
// %4 = function_ref Int.init(_builtinIntegerLiteral: Builtin.IntLiteral)
1928-
// %5 = apply %5(%3, ...) -> $Int
1929-
auto intLiteralInit = ctx.getIntBuiltinInitDecl(ctx.getIntDecl());
1930-
RValue versionInt = SGF.emitApplyAllocatingInitializer(
1931-
expr, intLiteralInit, std::move(versionIntBuiltinArgs),
1932-
/*overriddenSelfType*/ Type(), SGFContext());
1933-
1934-
PreparedArguments args(ArrayRef<Param>{Param(ctx.getStringType()),
1935-
Param(ctx.getIntType())});
1913+
PreparedArguments args(
1914+
ArrayRef<AnyFunctionType::Param>({
1915+
AnyFunctionType::Param(ctx.getStringType())}));
19361916
args.add(expr, std::move(string));
1937-
args.add(expr, std::move(versionInt));
19381917
return args;
19391918
}
19401919

lib/Sema/CSGen.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -1266,15 +1266,18 @@ namespace {
12661266
ctx.Id_Regex.str());
12671267
return Type();
12681268
}
1269+
auto substringType = ctx.getSubstringType();
12691270
auto dynCapturesType = ctx.getDynamicCapturesType();
12701271
if (!dynCapturesType) {
12711272
ctx.Diags.diagnose(E->getLoc(),
12721273
diag::string_processing_lib_missing,
12731274
"DynamicCaptures");
12741275
return Type();
12751276
}
1276-
// TODO: Replace `DynamicCaptures` with type inferred from the regex.
1277-
return BoundGenericStructType::get(regexDecl, Type(), {dynCapturesType});
1277+
// TODO: Replace `(Substring, DynamicCaptures)` with type inferred from
1278+
// the regex.
1279+
auto matchType = TupleType::get({substringType, dynCapturesType}, ctx);
1280+
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
12781281
}
12791282

12801283
Type visitDeclRefExpr(DeclRefExpr *E) {

libswift/Sources/CMakeLists.txt

+1-3
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
# See http://swift.org/LICENSE.txt for license information
77
# See http://swift.org/CONTRIBUTORS.txt for Swift project authors
88

9-
if(SWIFT_ENABLE_EXPERIMENTAL_STRING_PROCESSING)
10-
add_subdirectory(ExperimentalRegex)
11-
endif()
9+
add_subdirectory(ExperimentalRegex)
1210
add_subdirectory(SIL)
1311
add_subdirectory(Optimizer)

libswift/Sources/ExperimentalRegex/CMakeLists.txt

+2-11
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,7 @@
66
# See http://swift.org/LICENSE.txt for license information
77
# See http://swift.org/CONTRIBUTORS.txt for Swift project authors
88

9-
file(GLOB_RECURSE _LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES
10-
"${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}/Sources/_MatchingEngine/*.swift")
11-
set(LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES)
12-
foreach(source ${_LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES})
13-
file(TO_CMAKE_PATH "${source}" source)
14-
list(APPEND LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES ${source})
15-
endforeach()
16-
message(STATUS "Using Experimental String Processing library for libswift ExperimentalRegex (${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}).")
17-
189
add_libswift_module(ExperimentalRegex
19-
"${LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES}"
20-
Regex.swift)
10+
Regex.swift
11+
)
2112

0 commit comments

Comments
 (0)