Skip to content

Commit 3ae2f2b

Browse files
author
MomIsBestFriend
committed
CI: Add test case for unwanted patterns
1 parent 1593023 commit 3ae2f2b

File tree

2 files changed

+135
-0
lines changed

2 files changed

+135
-0
lines changed

ci/code_checks.sh

+4
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
212212
invgrep -R --include=*.{py,pyx} 'xrange' pandas
213213
RET=$(($RET + $?)) ; echo $MSG "DONE"
214214

215+
MSG='Check for use of not concatenated strings' ; echo $MSG
216+
python $BASE_DIR/scripts/validate_string_concatenation.py pandas
217+
RET=$(($RET + $?)) ; echo $MSG "DONE"
218+
215219
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
216220
INVGREP_APPEND=" <- trailing whitespaces found"
217221
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#!/usr/bin/env python
2+
"""
3+
GH #30454
4+
5+
Check where there is a string that needs to be concatenated.
6+
7+
This is necessary after black formating,
8+
where for example black transforms this:
9+
10+
>>> foo = (
11+
... "bar "
12+
... "baz"
13+
... )
14+
15+
into this:
16+
17+
>>> foo = ("bar " "baz")
18+
19+
Black is not considering this as an
20+
issue (see https://github.com/psf/black/issues/1051), so we are checking
21+
it here.
22+
"""
23+
24+
import os
25+
import sys
26+
import token
27+
import tokenize
28+
29+
FILE_EXTENSIONS_TO_CHECK = [".pxd", ".py", ".pyx", ".pyx.ini"]
30+
31+
32+
def main():
33+
path = sys.argv[1]
34+
35+
if not os.path.exists(path):
36+
raise ValueError("Please enter a valid path, to a file/directory.")
37+
38+
if os.path.isfile(path):
39+
# Means that the given path is of a single file.
40+
sys.exit(is_concatenated(path))
41+
42+
status_codes = set()
43+
# Means that the given path is of a directory.
44+
for subdir, _, files in os.walk(path):
45+
for file_name in files:
46+
ext = full_ext(os.path.join(subdir, file_name))
47+
if ext in FILE_EXTENSIONS_TO_CHECK:
48+
status_codes.add(is_concatenated(os.path.join(subdir, file_name)))
49+
50+
if 1 in status_codes:
51+
sys.exit(1)
52+
53+
sys.exit(0)
54+
55+
56+
def full_ext(path):
57+
"""
58+
Get the full file extention name.
59+
60+
Parameters
61+
----------
62+
path : str
63+
File path.
64+
65+
Returns
66+
-------
67+
str
68+
Full extention of a file.
69+
70+
Notes
71+
-----
72+
This function is needed only because of file extentions like
73+
` .pxi.ini` for example.
74+
75+
Examples
76+
-------
77+
78+
With one suffix:
79+
80+
>>> ext = full_ext('/full/path/to/file.py')
81+
>>> ext
82+
.py
83+
84+
Wuth two suffixes:
85+
86+
>>> ext = full_ext('/full/path/to/file.pxi.ini')
87+
>>> ext
88+
.pxi.ini
89+
"""
90+
ext_list = [".{suffix}".format(suffix=suffix) for suffix in path.split(".")[1:]]
91+
return "".join(ext_list)
92+
93+
94+
def is_concatenated(file_path):
95+
"""
96+
Checking if the file containing strings that needs to be concatenated.
97+
98+
Parameters
99+
----------
100+
file_path : str
101+
File path pointing to a single file.
102+
103+
Returns
104+
-------
105+
int
106+
Status code representing if the file needs a fix.
107+
0 - All good.
108+
1 - Needs to be fixed.
109+
"""
110+
need_fix = False
111+
with open(file_path, "r") as file_name:
112+
toks = list(tokenize.generate_tokens(file_name.readline))
113+
for i in range(len(toks) - 1):
114+
tok = toks[i]
115+
tok2 = toks[i + 1]
116+
if tok[0] == token.STRING and tok[0] == tok2[0]:
117+
need_fix = True
118+
print(
119+
"{file_path}:{line_number}:\t{start} and {end}".format(
120+
file_path=file_path,
121+
line_number=tok[2][0],
122+
start=tok[1],
123+
end=tok2[1],
124+
)
125+
)
126+
127+
return int(need_fix)
128+
129+
130+
if __name__ == "__main__":
131+
main()

0 commit comments

Comments
 (0)