Skip to content

Commit df57a53

Browse files
authored
Fix backtracking example (dotnet#36660)
1 parent a705d07 commit df57a53

24 files changed

+263
-224
lines changed

docs/standard/base-types/backtracking-in-regular-expressions.md

Lines changed: 36 additions & 38 deletions
Large diffs are not rendered by default.

docs/standard/base-types/regular-expression-options.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ The following example is identical to the previous example, except that the stat
390390

391391
## Nonbacktracking mode
392392

393-
By default, .NET's regex engine uses *backtracking* to try to find pattern matches. A backtracking engine is one that tries to match one pattern, and if that fails, goes backs and tries to match an alternate pattern, and so on. A backtracking engine is very fast for typical cases, but slows down as the number of pattern alternations increases, which can lead to *catastrophic backtracking*. The <xref:System.Text.RegularExpressions.RegexOptions.NonBacktracking?displayProperty=nameWithType> option doesn't use backtracking and avoids that worst-case scenario. Its goal is to provide consistently good behavior, regardless of the input being searched.
393+
By default, .NET's regex engine uses *backtracking* to try to find pattern matches. A backtracking engine is one that tries to match one pattern, and if that fails, goes backs and tries to match an alternate pattern, and so on. A backtracking engine is very fast for typical cases, but slows down as the number of pattern alternations increases, which can lead to *catastrophic backtracking*. The <xref:System.Text.RegularExpressions.RegexOptions.NonBacktracking?displayProperty=nameWithType> option, which was introduced in .NET 7, doesn't use backtracking and avoids that worst-case scenario. Its goal is to provide consistently good behavior, regardless of the input being searched.
394394

395395
The <xref:System.Text.RegularExpressions.RegexOptions.NonBacktracking?displayProperty=nameWithType> option doesn't support everything the other built-in engines support. In particular, the option can't be used in conjunction with <xref:System.Text.RegularExpressions.RegexOptions.RightToLeft?displayProperty=nameWithType> or <xref:System.Text.RegularExpressions.RegexOptions.ECMAScript?displayProperty=nameWithType>. It also doesn't allow for the following constructs in the pattern:
396396

@@ -403,6 +403,8 @@ The <xref:System.Text.RegularExpressions.RegexOptions.NonBacktracking?displayPro
403403

404404
<xref:System.Text.RegularExpressions.RegexOptions.NonBacktracking?displayProperty=nameWithType> also has a subtle difference with regards to execution. If a capture group is in a loop, most (non-.NET) regex engines only provide the last matched value for that capture. However, .NET's regex engine tracks all values that are captured inside a loop and provides access to them. The <xref:System.Text.RegularExpressions.RegexOptions.NonBacktracking?displayProperty=nameWithType> option is like most other regex implementations and only supports providing the final capture.
405405

406+
For more information about backtracking, see [Backtracking in regular expressions](backtracking-in-regular-expressions.md).
407+
406408
## See also
407409

408410
- [Regular Expression Language - Quick Reference](regular-expression-language-quick-reference.md)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
internal class Program
2+
{
3+
static void Main(string[] args)
4+
{
5+
Example1.Run();
6+
Example2.Run();
7+
Example3.Run();
8+
Example4.Run();
9+
Example5.Run();
10+
Example6.Run();
11+
}
12+
13+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net7.0</TargetFramework>
6+
<Nullable>enable</Nullable>
7+
</PropertyGroup>
8+
9+
</Project>
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// <Snippet1>
2+
using System;
3+
using System.Text.RegularExpressions;
4+
5+
public class Example1
6+
{
7+
public static void Run()
8+
{
9+
string input = "needing a reed";
10+
string pattern = @"e{2}\w\b";
11+
foreach (Match match in Regex.Matches(input, pattern))
12+
Console.WriteLine("{0} found at position {1}",
13+
match.Value, match.Index);
14+
}
15+
}
16+
// The example displays the following output:
17+
// eed found at position 11
18+
// </Snippet1>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// <Snippet2>
2+
using System;
3+
using System.Text.RegularExpressions;
4+
5+
public class Example2
6+
{
7+
public static void Run()
8+
{
9+
string input = "Essential services are provided by regular expressions.";
10+
string pattern = ".*(es)";
11+
Match m = Regex.Match(input, pattern, RegexOptions.IgnoreCase);
12+
if (m.Success)
13+
{
14+
Console.WriteLine("'{0}' found at position {1}",
15+
m.Value, m.Index);
16+
Console.WriteLine("'es' found at position {0}",
17+
m.Groups[1].Index);
18+
}
19+
}
20+
}
21+
// 'Essential services are provided by regular expressions found at position 0
22+
// 'es' found at position 47
23+
// </Snippet2>
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// <Snippet3>
2+
using System;
3+
using System.Diagnostics;
4+
using System.Text.RegularExpressions;
5+
6+
public class Example3
7+
{
8+
public static void Run()
9+
{
10+
string pattern = "^(a+)+$";
11+
string[] inputs = { "aaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaa!" };
12+
Regex rgx = new Regex(pattern);
13+
Stopwatch sw;
14+
15+
foreach (string input in inputs)
16+
{
17+
sw = Stopwatch.StartNew();
18+
Match match = rgx.Match(input);
19+
sw.Stop();
20+
if (match.Success)
21+
Console.WriteLine($"Matched {match.Value} in {sw.Elapsed}");
22+
else
23+
Console.WriteLine($"No match found in {sw.Elapsed}");
24+
}
25+
}
26+
}
27+
// Matched aaaaaaaaaaaaaaaaaaaaaaaaaaa in 00:00:00.0018281
28+
// No match found in 00:00:05.1882144
29+
// </Snippet3>
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// <Snippet4>
2+
using System;
3+
using System.Diagnostics;
4+
using System.Text.RegularExpressions;
5+
6+
public class Example4
7+
{
8+
public static void Run()
9+
{
10+
string input = "b51:4:1DB:9EE1:5:27d60:f44:D4:cd:E:5:0A5:4a:D24:41Ad:";
11+
bool matched;
12+
Stopwatch sw;
13+
14+
Console.WriteLine("With backtracking:");
15+
string backPattern = "^(([0-9a-fA-F]{1,4}:)*([0-9a-fA-F]{1,4}))*(::)$";
16+
sw = Stopwatch.StartNew();
17+
matched = Regex.IsMatch(input, backPattern);
18+
sw.Stop();
19+
Console.WriteLine("Match: {0} in {1}", Regex.IsMatch(input, backPattern), sw.Elapsed);
20+
Console.WriteLine();
21+
22+
Console.WriteLine("Without backtracking:");
23+
string noBackPattern = "^((?>[0-9a-fA-F]{1,4}:)*(?>[0-9a-fA-F]{1,4}))*(::)$";
24+
sw = Stopwatch.StartNew();
25+
matched = Regex.IsMatch(input, noBackPattern);
26+
sw.Stop();
27+
Console.WriteLine("Match: {0} in {1}", Regex.IsMatch(input, noBackPattern), sw.Elapsed);
28+
}
29+
}
30+
// The example displays output like the following:
31+
// With backtracking:
32+
// Match: False in 00:00:27.4282019
33+
//
34+
// Without backtracking:
35+
// Match: False in 00:00:00.0001391
36+
// </Snippet4>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// <Snippet5>
2+
using System;
3+
using System.Diagnostics;
4+
using System.Text.RegularExpressions;
5+
6+
public class Example5
7+
{
8+
public static void Run()
9+
{
10+
Stopwatch sw;
11+
string input = "test@contoso.com";
12+
bool result;
13+
14+
string pattern = @"^[0-9A-Z]([-.\w]*[0-9A-Z])?@";
15+
sw = Stopwatch.StartNew();
16+
result = Regex.IsMatch(input, pattern, RegexOptions.IgnoreCase);
17+
sw.Stop();
18+
Console.WriteLine("Match: {0} in {1}", result, sw.Elapsed);
19+
20+
string behindPattern = @"^[0-9A-Z][-.\w]*(?<=[0-9A-Z])@";
21+
sw = Stopwatch.StartNew();
22+
result = Regex.IsMatch(input, behindPattern, RegexOptions.IgnoreCase);
23+
sw.Stop();
24+
Console.WriteLine("Match with Lookbehind: {0} in {1}", result, sw.Elapsed);
25+
}
26+
}
27+
// The example displays output similar to the following:
28+
// Match: True in 00:00:00.0017549
29+
// Match with Lookbehind: True in 00:00:00.0000659
30+
// </Snippet5>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// <Snippet6>
2+
using System;
3+
using System.Diagnostics;
4+
using System.Text.RegularExpressions;
5+
6+
public class Example6
7+
{
8+
public static void Run()
9+
{
10+
string input = "aaaaaaaaaaaaaaaaaaaaaa.";
11+
bool result;
12+
Stopwatch sw;
13+
14+
string pattern = @"^(([A-Z]\w*)+\.)*[A-Z]\w*$";
15+
sw = Stopwatch.StartNew();
16+
result = Regex.IsMatch(input, pattern, RegexOptions.IgnoreCase);
17+
sw.Stop();
18+
Console.WriteLine("{0} in {1}", result, sw.Elapsed);
19+
20+
string aheadPattern = @"^((?=[A-Z])\w+\.)*[A-Z]\w*$";
21+
sw = Stopwatch.StartNew();
22+
result = Regex.IsMatch(input, aheadPattern, RegexOptions.IgnoreCase);
23+
sw.Stop();
24+
Console.WriteLine("{0} in {1}", result, sw.Elapsed);
25+
}
26+
}
27+
// The example displays the following output:
28+
// False in 00:00:03.8003793
29+
// False in 00:00:00.0000866
30+
// </Snippet6>

0 commit comments

Comments
 (0)