Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Lint for lists instead of generators in built-in Python functions #18335

Merged
merged 4 commits into from
Nov 19, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/frame_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def setup(self, offset, n_steps):
offset = getattr(offsets, offset)
self.idx = get_index_for_offset(offset(n_steps, **kwargs))
self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
self.d = dict([(col, self.df[col]) for col in self.df.columns])
self.d = dict(self.df.items())

def time_frame_ctor(self, offset, n_steps):
DataFrame(self.d)
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ class read_json_lines(object):
def setup(self):
self.N = 100000
self.C = 5
self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]))
self.df = DataFrame({('float{0}'.format(i), randn(self.N)) for i in range(self.C)})
self.df.to_json(self.fname,orient="records",lines=True)

def teardown(self):
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def _setup(self):
self.N = 100000
self.C = 5
self.index = date_range('20000101', periods=self.N, freq='H')
self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index)
self.df = DataFrame(dict(('float{0}'.format(i), randn(self.N)) for i in range(self.C)), index=self.index)
self.df2 = self.df.copy()
self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)]
self.remove(self.f)
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/vbench_to_asv.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def visit_ClassDef(self, node):
return node

def visit_TryExcept(self, node):
if any([isinstance(x, (ast.Import, ast.ImportFrom)) for x in node.body]):
if any(isinstance(x, (ast.Import, ast.ImportFrom)) for x in node.body):
self.imports.append(node)
else:
self.generic_visit(node)
Expand Down
13 changes: 13 additions & 0 deletions ci/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ if [ "$LINT" ]; then
fi
echo "Check for invalid testing DONE"

echo "Check for use of lists instead of generators in built-in Python functions"

# Example: Avoid `any([i for i in some_iterator])` in favor of `any(i for i in some_iterator)`
#
# Check the following functions:
# any(), all(), sum(), max(), min(), list(), dict(), set(), frozenset(), tuple(), str.join()
grep -R --include="*.py*" -E "[^_](any|all|sum|max|min|list|dict|set|frozenset|tuple|join)\(\[.* for .* in .*\]\)"

if [ $? = "0" ]; then
RET=1
fi
echo "Check for use of lists instead of generators in built-in Python functions DONE"

else
echo "NOT Linting"
fi
Expand Down
4 changes: 2 additions & 2 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
# JP: added from sphinxdocs
autosummary_generate = False

if any([re.match("\s*api\s*", l) for l in index_rst_lines]):
if any(re.match("\s*api\s*", l) for l in index_rst_lines):
autosummary_generate = True

files_to_delete = []
Expand All @@ -89,7 +89,7 @@

_file_basename = os.path.splitext(f)[0]
_regex_to_match = "\s*{}\s*$".format(_file_basename)
if not any([re.match(_regex_to_match, line) for line in index_rst_lines]):
if not any(re.match(_regex_to_match, line) for line in index_rst_lines):
files_to_delete.append(f)

if files_to_delete:
Expand Down
2 changes: 1 addition & 1 deletion doc/sphinxext/ipython_sphinxext/ipython_directive.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def process_output(self, data, output_prompt,
source = self.directive.state.document.current_source
content = self.directive.content
# Add tabs and join into a single string.
content = '\n'.join([TAB + line for line in content])
content = '\n'.join(TAB + line for line in content)

# Make sure the output contains the output prompt.
ind = found.find(output_prompt)
Expand Down
2 changes: 1 addition & 1 deletion doc/sphinxext/numpydoc/compiler_unparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def _Return(self, t):
self._fill("return ")
if t.value:
if isinstance(t.value, Tuple):
text = ', '.join([ name.name for name in t.value.asList() ])
text = ', '.join(name.name for name in t.value.asList())
self._write(text)
else:
self._dispatch(t.value)
Expand Down
4 changes: 2 additions & 2 deletions doc/sphinxext/numpydoc/docscrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def _parse_summary(self):
# If several signatures present, take the last one
while True:
summary = self._doc.read_to_next_empty_line()
summary_str = " ".join([s.strip() for s in summary]).strip()
summary_str = " ".join(s.strip() for s in summary).strip()
if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str):
self['Signature'] = summary_str
if not self._is_at_section():
Expand All @@ -289,7 +289,7 @@ def _parse(self):

for (section,content) in self._read_sections():
if not section.startswith('..'):
section = ' '.join([s.capitalize() for s in section.split(' ')])
section = ' '.join(s.capitalize() for s in section.split(' '))
if section in ('Parameters', 'Returns', 'Raises', 'Warns',
'Other Parameters', 'Attributes', 'Methods'):
self[section] = self._parse_param_list(content)
Expand Down
4 changes: 2 additions & 2 deletions doc/sphinxext/numpydoc/docscrape_sphinx.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _str_member_list(self, name):
out += [''] + autosum

if others:
maxlen_0 = max(3, max([len(x[0]) for x in others]))
maxlen_0 = max(3, max(len(x[0]) for x in others))
hdr = sixu("=")*maxlen_0 + sixu(" ") + sixu("=")*10
fmt = sixu('%%%ds %%s ') % (maxlen_0,)
out += ['', hdr]
Expand Down Expand Up @@ -203,7 +203,7 @@ def _str_references(self):
m = re.match(r'.. \[([a-z0-9._-]+)\]', line, re.I)
if m:
items.append(m.group(1))
out += [' ' + ", ".join(["[%s]_" % item for item in items]), '']
out += [' ' + ", ".join("[%s]_" % item for item in items), '']
return out

def _str_examples(self):
Expand Down
2 changes: 1 addition & 1 deletion doc/sphinxext/numpydoc/phantom_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def import_phantom_module(xml_file):
# Sort items so that
# - Base classes come before classes inherited from them
# - Modules come before their contents
all_nodes = dict([(n.attrib['id'], n) for n in root])
all_nodes = dict((n.attrib['id'], n) for n in root)

def _get_bases(node, recurse=False):
bases = [x.attrib['ref'] for x in node.findall('base')]
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ cdef class TextReader:
msg = self.orig_header
if isinstance(msg, list):
msg = "[%s], len of %d," % (
','.join([ str(m) for m in msg ]), len(msg))
','.join(str(m) for m in msg), len(msg))
raise ParserError(
'Passed header=%s but only %d lines in file'
% (msg, self.parser.lines))
Expand Down Expand Up @@ -2227,7 +2227,7 @@ def _concatenate_chunks(list chunks):
for name in names:
arrs = [chunk.pop(name) for chunk in chunks]
# Check each arr for consistent types.
dtypes = set([a.dtype for a in arrs])
dtypes = set(a.dtype for a in arrs)
if len(dtypes) > 1:
common_type = np.find_common_type(dtypes, [])
if common_type == np.object:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1309,7 +1309,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,

# we try to coerce datetime w/tz but must all have the same tz
if seen.datetimetz_:
if len(set([getattr(val, 'tzinfo', None) for val in objects])) == 1:
if len({getattr(val, 'tzinfo', None) for val in objects}) == 1:
from pandas import DatetimeIndex
return DatetimeIndex(objects)
seen.object_ = 1
Expand Down
5 changes: 2 additions & 3 deletions pandas/_libs/tslibs/resolution.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ class Resolution(object):
'U': 'N',
'N': None}

_str_reso_map = dict([(v, k) for k, v in _reso_str_map.items()])
_str_reso_map = {v: k for k, v in _reso_str_map.items()}

_reso_freq_map = {
'year': 'A',
Expand All @@ -232,8 +232,7 @@ class Resolution(object):
'microsecond': 'U',
'nanosecond': 'N'}

_freq_reso_map = dict([(v, k)
for k, v in _reso_freq_map.items()])
_freq_reso_map = {v: k for k, v in _reso_freq_map.items()}

@classmethod
def get_str(cls, reso):
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ class TimeRE(dict):
break
else:
return ''
regex = '|'.join([re.escape(stuff) for stuff in to_convert])
regex = '|'.join(re.escape(stuff) for stuff in to_convert)
regex = '(?P<%s>%s' % (directive, regex)
return '%s)' % regex

Expand Down
6 changes: 3 additions & 3 deletions pandas/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")])
refs = set(r.strip() for r in refnames.strip("()").split(","))
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
tags = set(r[len(TAG):] for r in refs if r.startswith(TAG))
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
Expand All @@ -154,7 +154,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r'\d', r)])
tags = set(r for r in refs if re.search(r'\d', r))
if verbose:
print("discarding '{}', no digits".format(",".join(refs - tags)))
if verbose:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def map_indices_py(arr):
Returns a dictionary with (element, index) pairs for each element in the
given array/list
"""
return dict([(x, i) for i, x in enumerate(arr)])
return dict((x, i) for i, x in enumerate(arr))


def union(*seqs):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def _concat_datetimetz(to_concat, name=None):
it is used in DatetimeIndex.append also
"""
# do not pass tz to set because tzlocal cannot be hashed
if len(set([str(x.dtype) for x in to_concat])) != 1:
if len(set(str(x.dtype) for x in to_concat)) != 1:
raise ValueError('to_concat must have the same tz')
tz = to_concat[0].tz
# no need to localize because internal repr will not be changed
Expand Down
12 changes: 5 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3895,7 +3895,7 @@ def f(col):
return self._constructor_sliced(r, index=new_index,
dtype=r.dtype)

result = dict([(col, f(col)) for col in this])
result = dict((col, f(col)) for col in this)

# non-unique
else:
Expand All @@ -3906,9 +3906,7 @@ def f(i):
return self._constructor_sliced(r, index=new_index,
dtype=r.dtype)

result = dict([
(i, f(i)) for i, col in enumerate(this.columns)
])
result = dict((i, f(i)) for i, col in enumerate(this.columns))
result = self._constructor(result, index=new_index, copy=False)
result.columns = new_columns
return result
Expand Down Expand Up @@ -3986,7 +3984,7 @@ def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True):
if self.columns.is_unique:

def _compare(a, b):
return dict([(col, func(a[col], b[col])) for col in a.columns])
return dict((col, func(a[col], b[col])) for col in a.columns)

new_data = expressions.evaluate(_compare, str_rep, self, other)
return self._constructor(data=new_data, index=self.index,
Expand All @@ -3995,8 +3993,8 @@ def _compare(a, b):
else:

def _compare(a, b):
return dict([(i, func(a.iloc[:, i], b.iloc[:, i]))
for i, col in enumerate(a.columns)])
return dict((i, func(a.iloc[:, i], b.iloc[:, i]))
for i, col in enumerate(a.columns))

new_data = expressions.evaluate(_compare, str_rep, self, other)
result = self._constructor(data=new_data, index=self.index,
Expand Down
26 changes: 13 additions & 13 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,21 +279,21 @@ def set_axis(a, i):

def _construct_axes_dict(self, axes=None, **kwargs):
"""Return an axes dictionary for myself."""
d = dict([(a, self._get_axis(a)) for a in (axes or self._AXIS_ORDERS)])
d = dict((a, self._get_axis(a)) for a in (axes or self._AXIS_ORDERS))
d.update(kwargs)
return d

@staticmethod
def _construct_axes_dict_from(self, axes, **kwargs):
"""Return an axes dictionary for the passed axes."""
d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, axes)])
d = dict((a, ax) for a, ax in zip(self._AXIS_ORDERS, axes))
d.update(kwargs)
return d

def _construct_axes_dict_for_slice(self, axes=None, **kwargs):
"""Return an axes dictionary for myself."""
d = dict([(self._AXIS_SLICEMAP[a], self._get_axis(a))
for a in (axes or self._AXIS_ORDERS)])
d = dict((self._AXIS_SLICEMAP[a], self._get_axis(a))
for a in (axes or self._AXIS_ORDERS))
d.update(kwargs)
return d

Expand Down Expand Up @@ -329,7 +329,7 @@ def _construct_axes_from_arguments(self, args, kwargs, require_all=False):
raise TypeError("not enough/duplicate arguments "
"specified!")

axes = dict([(a, kwargs.pop(a, None)) for a in self._AXIS_ORDERS])
axes = dict((a, kwargs.pop(a, None)) for a in self._AXIS_ORDERS)
return axes, kwargs

@classmethod
Expand Down Expand Up @@ -586,10 +586,10 @@ def transpose(self, *args, **kwargs):
# construct the args
axes, kwargs = self._construct_axes_from_arguments(args, kwargs,
require_all=True)
axes_names = tuple([self._get_axis_name(axes[a])
for a in self._AXIS_ORDERS])
axes_numbers = tuple([self._get_axis_number(axes[a])
for a in self._AXIS_ORDERS])
axes_names = tuple(self._get_axis_name(axes[a])
for a in self._AXIS_ORDERS)
axes_numbers = tuple(self._get_axis_number(axes[a])
for a in self._AXIS_ORDERS)

# we must have unique axes
if len(axes) != len(set(axes)):
Expand Down Expand Up @@ -699,8 +699,8 @@ def squeeze(self, axis=None):
(self._get_axis_number(axis),))
try:
return self.iloc[
tuple([0 if i in axis and len(a) == 1 else slice(None)
for i, a in enumerate(self.axes)])]
tuple(0 if i in axis and len(a) == 1 else slice(None)
for i, a in enumerate(self.axes))]
except Exception:
return self

Expand Down Expand Up @@ -4277,8 +4277,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
elif self.ndim == 3:

# fill in 2d chunks
result = dict([(col, s.fillna(method=method, value=value))
for col, s in self.iteritems()])
result = dict((col, s.fillna(method=method, value=value))
for col, s in self.iteritems())
new_obj = self._constructor.\
from_dict(result).__finalize__(self)
new_data = new_obj._data
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def get_converter(s):
raise ValueError(msg)

converters = [get_converter(s) for s in index_sample]
names = [tuple([f(n) for f, n in zip(converters, name)])
names = [tuple(f(n) for f, n in zip(converters, name))
for name in names]

else:
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def conv(i):


def _sanitize_and_check(indexes):
kinds = list(set([type(index) for index in indexes]))
kinds = list({type(index) for index in indexes})

if list in kinds:
if len(kinds) > 1:
Expand All @@ -122,8 +122,8 @@ def _get_consensus_names(indexes):

# find the non-none names, need to tupleify to make
# the set hashable, then reverse on return
consensus_names = set([tuple(i.names) for i in indexes
if com._any_not_none(*i.names)])
consensus_names = set(tuple(i.names) for i in indexes
if com._any_not_none(*i.names))
if len(consensus_names) == 1:
return list(list(consensus_names)[0])
return [None] * indexes[0].nlevels
Expand Down
Loading