@@ -100,22 +100,45 @@ def is_safe_regex_pattern(pattern: str) -> bool:
100
100
Returns:
101
101
True if the pattern looks like a safe regex, False otherwise
102
102
"""
103
- # Allow basic regex operators that are commonly used and safe
104
- safe_regex_chars = ['|' , '(' , ')' , '[' , ']' , '^' , '$' ]
103
+ # Strong indicators of regex intent
104
+ strong_regex_indicators = ['|' , '(' , ')' , '[' , ']' , '^' , '$' ]
105
105
106
- # Check if pattern contains any regex metacharacters
107
- has_regex_chars = any ( char in pattern for char in safe_regex_chars )
106
+ # Weaker indicators that need context
107
+ weak_regex_indicators = [ '.' , '*' , '+' , '?' ]
108
108
109
- # Basic safety check - avoid obviously dangerous patterns
110
- dangerous_patterns = [
111
- r'(.+)+' , # Nested quantifiers
112
- r'(.*)*' , # Nested stars
113
- r'(.{0,})+' , # Potential ReDoS patterns
114
- ]
109
+ # Check for strong regex indicators
110
+ has_strong_regex = any (char in pattern for char in strong_regex_indicators )
115
111
116
- has_dangerous_patterns = any (dangerous in pattern for dangerous in dangerous_patterns )
112
+ # Check for weak indicators with context
113
+ has_weak_regex = any (char in pattern for char in weak_regex_indicators )
117
114
118
- return has_regex_chars and not has_dangerous_patterns
115
+ # If has strong indicators, likely regex
116
+ if has_strong_regex :
117
+ # Still check for dangerous patterns
118
+ dangerous_patterns = [
119
+ r'(.+)+' , # Nested quantifiers
120
+ r'(.*)*' , # Nested stars
121
+ r'(.{0,})+' , # Potential ReDoS patterns
122
+ ]
123
+
124
+ has_dangerous_patterns = any (dangerous in pattern for dangerous in dangerous_patterns )
125
+ return not has_dangerous_patterns
126
+
127
+ # If only weak indicators, need more context
128
+ if has_weak_regex :
129
+ # Patterns like ".*", ".+", "file.*py" look like regex
130
+ # But "file.txt", "test.py" look like literal filenames
131
+ regex_like_patterns = [
132
+ r'\.\*' , # .*
133
+ r'\.\+' , # .+
134
+ r'\.\w*\*' , # .something*
135
+ r'\*\.' , # *.
136
+ r'\w+\.\*\w*' , # word.*word
137
+ ]
138
+
139
+ return any (re .search (regex_pattern , pattern ) for regex_pattern in regex_like_patterns )
140
+
141
+ return False
119
142
120
143
121
144
class SearchStrategy (ABC ):
0 commit comments