Skip to content

Commit aa85f02

Browse files
janoshMarcoGorelli
andauthored
Fix typos (pandas-dev#47275)
* fix codespell violations * drop rule files: ^(pandas|doc)/ from codespell pre-commit config also fix typos fempto -> femto * improve codespell ignore-regex in setup.cfg Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com> Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com>
1 parent 11881ea commit aa85f02

18 files changed

+26
-28
lines changed

.pre-commit-config.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ repos:
2626
hooks:
2727
- id: codespell
2828
types_or: [python, rst, markdown]
29-
files: ^(pandas|doc)/
3029
- repo: https://github.com/pre-commit/pre-commit-hooks
3130
rev: v4.2.0
3231
hooks:

asv_bench/benchmarks/frame_ctor.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def setup(self):
3737
self.dict_list = frame.to_dict(orient="records")
3838
self.data2 = {i: {j: float(j) for j in range(100)} for i in range(2000)}
3939

40-
# arrays which we wont consolidate
40+
# arrays which we won't consolidate
4141
self.dict_of_categoricals = {i: Categorical(np.arange(N)) for i in range(K)}
4242

4343
def time_list_of_dict(self):
@@ -60,7 +60,7 @@ def time_nested_dict_int64(self):
6060
DataFrame(self.data2)
6161

6262
def time_dict_of_categoricals(self):
63-
# dict of arrays that we wont consolidate
63+
# dict of arrays that we won't consolidate
6464
DataFrame(self.dict_of_categoricals)
6565

6666

asv_bench/benchmarks/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ def time_dtype_as_field(self, dtype, method, application, ncols):
527527

528528
class GroupByCythonAgg:
529529
"""
530-
Benchmarks specifically targetting our cython aggregation algorithms
530+
Benchmarks specifically targeting our cython aggregation algorithms
531531
(using a big enough dataframe with simple key, so a large part of the
532532
time is actually spent in the grouped aggregation).
533533
"""

asv_bench/benchmarks/libs.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Benchmarks for code in pandas/_libs, excluding pandas/_libs/tslibs,
33
which has its own directory.
44
5-
If a PR does not edit anything in _libs/, then it is unlikely that thes
5+
If a PR does not edit anything in _libs/, then it is unlikely that the
66
benchmarks will be affected.
77
"""
88
import numpy as np

asv_bench/benchmarks/replace.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def time_replace_list(self, inplace):
5050
self.df.replace([np.inf, -np.inf], np.nan, inplace=inplace)
5151

5252
def time_replace_list_one_match(self, inplace):
53-
# the 1 can be held in self._df.blocks[0], while the inf and -inf cant
53+
# the 1 can be held in self._df.blocks[0], while the inf and -inf can't
5454
self.df.replace([np.inf, -np.inf, 1], np.nan, inplace=inplace)
5555

5656

asv_bench/benchmarks/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def time_stack(self, dtype):
7878
self.df.stack()
7979

8080
def time_unstack_fast(self, dtype):
81-
# last level -> doesnt have to make copies
81+
# last level -> doesn't have to make copies
8282
self.ser.unstack("bar")
8383

8484
def time_unstack_slow(self, dtype):

asv_bench/benchmarks/tslibs/period.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""
2-
Period benchmarks that rely only on tslibs. See benchmarks.period for
3-
Period benchmarks that rely on other parts fo pandas.
2+
Period benchmarks that rely only on tslibs. See benchmarks.period for
3+
Period benchmarks that rely on other parts of pandas.
44
"""
55

66
import numpy as np

asv_bench/benchmarks/tslibs/timedelta.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""
2-
Timedelta benchmarks that rely only on tslibs. See benchmarks.timedeltas for
3-
Timedelta benchmarks that rely on other parts fo pandas.
2+
Timedelta benchmarks that rely only on tslibs. See benchmarks.timedeltas for
3+
Timedelta benchmarks that rely on other parts of pandas.
44
"""
55
import datetime
66

doc/source/user_guide/style.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1762,7 +1762,7 @@
17621762
"cell_type": "markdown",
17631763
"metadata": {},
17641764
"source": [
1765-
"In the above case the text is blue because the selector `#T_b_ .cls-1` is worth 110 (ID plus class), which takes precendence."
1765+
"In the above case the text is blue because the selector `#T_b_ .cls-1` is worth 110 (ID plus class), which takes precedence."
17661766
]
17671767
},
17681768
{

pandas/_libs/groupby.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1011,7 +1011,7 @@ cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike):
10111011
elif numeric_t is int64_t and is_datetimelike:
10121012
na_val = NPY_NAT
10131013
else:
1014-
# Will not be used, but define to avoid unitialized warning.
1014+
# Will not be used, but define to avoid uninitialized warning.
10151015
na_val = 0
10161016
return na_val
10171017

pandas/_libs/tslibs/dtypes.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns)
349349

350350
if reso == NPY_DATETIMEUNIT.NPY_FR_ps:
351351
# pico is the smallest unit for which we don't overflow, so
352-
# we exclude fempto and atto
352+
# we exclude femto and atto
353353
day_units = 24 * 3600 * 1_000_000_000_000
354354
elif reso == NPY_DATETIMEUNIT.NPY_FR_ns:
355355
day_units = 24 * 3600 * 1_000_000_000

pandas/_libs/tslibs/timedeltas.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def ints_to_pytimedelta(ndarray m8values, box=False):
202202
elif reso == NPY_DATETIMEUNIT.NPY_FR_W:
203203
res_val = timedelta(weeks=value)
204204
else:
205-
# Month, Year, NPY_FR_GENERIC, pico, fempto, atto
205+
# Month, Year, NPY_FR_GENERIC, pico, femto, atto
206206
raise NotImplementedError(reso)
207207

208208
# Note: we can index result directly instead of using PyArray_MultiIter_DATA

pandas/io/common.py

-1
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,6 @@ def check_parent_directory(path: Path | str) -> None:
590590
----------
591591
path: Path or str
592592
Path to check parent directory of
593-
594593
"""
595594
parent = Path(path).parent
596595
if not parent.is_dir():

pandas/tests/tools/test_to_datetime.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,7 @@ def test_to_datetime_YYYYMMDD(self):
610610
actual = to_datetime("20080115")
611611
assert actual == datetime(2008, 1, 15)
612612

613-
def test_to_datetime_unparseable_ignore(self):
613+
def test_to_datetime_unparsable_ignore(self):
614614
# unparsable
615615
ser = "Month 1, 1999"
616616
assert to_datetime(ser, errors="ignore") == ser

setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ exclude =
160160

161161
[codespell]
162162
ignore-words-list = ba,blocs,coo,hist,nd,sav,ser
163-
ignore-regex = https://(\w+\.)+
163+
ignore-regex = https://([\w/\.])+
164164

165165
[coverage:run]
166166
branch = True

versioneer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -691,7 +691,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
691691
# TAG-NUM-gHEX
692692
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
693693
if not mo:
694-
# unparseable. Maybe git-describe is misbehaving?
694+
# unparsable. Maybe git-describe is misbehaving?
695695
pieces["error"] = ("unable to parse git-describe output: '%%s'"
696696
%% describe_out)
697697
return pieces
@@ -1105,7 +1105,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
11051105
# TAG-NUM-gHEX
11061106
mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
11071107
if not mo:
1108-
# unparseable. Maybe git-describe is misbehaving?
1108+
# unparsable. Maybe git-describe is misbehaving?
11091109
pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
11101110
return pieces
11111111

web/pandas/community/blog/2019-user-survey.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ This analysis and the raw data can be found [on GitHub](https://github.com/panda
2626
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/pandas-dev/pandas-user-surveys/master?filepath=2019.ipynb)
2727

2828

29-
We had about 1250 repsonses over the 15 days we ran the survey in the summer of 2019.
29+
We had about 1250 responses over the 15 days we ran the survey in the summer of 2019.
3030

3131
## About the Respondents
3232

33-
There was a fair amount of representation across pandas experience and frequeny of use, though the majority of respondents are on the more experienced side.
33+
There was a fair amount of representation across pandas experience and frequency of use, though the majority of respondents are on the more experienced side.
3434

3535

3636

@@ -101,15 +101,15 @@ CSV and Excel are (for better or worse) the most popular formats.
101101
![png]({{ base_url }}/static/img/blog/2019-user-survey/2019_18_0.png)
102102

103103

104-
In preperation for a possible refactor of pandas internals, we wanted to get a sense for
104+
In preparation for a possible refactor of pandas internals, we wanted to get a sense for
105105
how common wide (100s of columns or more) DataFrames are.
106106

107107

108108

109109
![png]({{ base_url }}/static/img/blog/2019-user-survey/2019_20_0.png)
110110

111111

112-
Pandas is slowly growing new exentension types. Categoricals are the most popular,
112+
Pandas is slowly growing new extension types. Categoricals are the most popular,
113113
and the nullable integer type is already almost as popular as datetime with timezone.
114114

115115

@@ -139,7 +139,7 @@ Of these, the clear standout is "scaling" to large datasets. A couple observatio
139139
1. Perhaps pandas' documentation should do a better job of promoting libraries that provide scalable dataframes (like [Dask](https://dask.org), [vaex](https://dask.org), and [modin](https://modin.readthedocs.io/en/latest/))
140140
2. Memory efficiency (perhaps from a native string data type, fewer internal copies, etc.) is a valuable goal.
141141

142-
After that, the next-most critical improvement is integer missing values. Those were actually added in [Pandas 0.24](https://pandas.pydata.org/pandas-docs/stable/whatsnew/v0.24.0.html#optional-integer-na-support), but they're not the default, and there's still some incompatibilites with the rest of pandas API.
142+
After that, the next-most critical improvement is integer missing values. Those were actually added in [Pandas 0.24](https://pandas.pydata.org/pandas-docs/stable/whatsnew/v0.24.0.html#optional-integer-na-support), but they're not the default, and there's still some incompatibilities with the rest of pandas API.
143143

144144
Pandas is a less conservative library than, say, NumPy. We're approaching 1.0, but on the way we've made many deprecations and some outright API breaking changes. Fortunately, most people are OK with the tradeoff.
145145

web/pandas/community/ecosystem.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ users to view, manipulate and edit pandas `Index`, `Series`, and
177177
`DataFrame` objects like a "spreadsheet", including copying and
178178
modifying values, sorting, displaying a "heatmap", converting data
179179
types and more. Pandas objects can also be renamed, duplicated, new
180-
columns added, copyed/pasted to/from the clipboard (as TSV), and
180+
columns added, copied/pasted to/from the clipboard (as TSV), and
181181
saved/loaded to/from a file. Spyder can also import data from a variety
182182
of plain text and binary files or the clipboard into a new pandas
183183
DataFrame via a sophisticated import wizard.
@@ -379,8 +379,8 @@ A directory of projects providing
379379
`extension accessors <extending.register-accessors>`. This is for users to discover new accessors and for library
380380
authors to coordinate on the namespace.
381381

382-
| Library | Accessor | Classes |
383-
| ---------------------------------------------------------------------|------------|-----------------------|
382+
| Library | Accessor | Classes |
383+
| -------------------------------------------------------------------- | ---------- | --------------------- |
384384
| [cyberpandas](https://cyberpandas.readthedocs.io/en/latest) | `ip` | `Series` |
385385
| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` |
386386
| [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` |

0 commit comments

Comments
 (0)