scrapinghub · vshlapakov · Jan 27, 2020 · Jan 24, 2020
diff --git a/docs/client/overview.rst b/docs/client/overview.rst
@@ -144,7 +144,7 @@ For example, to run a new job for a given spider with custom parameters::
 
 
 
-Geting job information
+Getting job information
 ^^^^^^^^^^^^^^^^^^^^^^
 
 To select a specific job for a project, use ``.jobs.get(<jobKey>)``::
@@ -387,7 +387,7 @@ acts like a Python dictionary::
     '5123a86-master'
 
 To check what keys are available (they ultimately depend on the job),
-you can use its ``.iter()`` method (here, it's wrapped inside a dict for readibility)::
+you can use its ``.iter()`` method (here, it's wrapped inside a dict for readability)::
 
     >>> dict(job.metadata.iter())
     {...

diff --git a/docs/conf.py b/docs/conf.py
@@ -172,6 +172,7 @@
 html_theme = 'sphinx_rtd_theme'
 html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 
+
 # disable cross-reference for ivar
 # patch taken from http://stackoverflow.com/a/41184353/1932023
 def patched_make_field(self, types, domain, items, env=None):

diff --git a/scrapinghub/client/jobs.py b/scrapinghub/client/jobs.py
@@ -54,9 +54,9 @@ def count(self, spider=None, state=None, has_tag=None, lacks_tag=None,
         :param lacks_tag: (optional) filter results by missing tag(s), a string
             or a list of strings.
         :param startts: (optional) UNIX timestamp at which to begin results,
-            in millisecons.
+            in milliseconds.
         :param endts: (optional) UNIX timestamp at which to end results,
-            in millisecons.
+            in milliseconds.
         :param \*\*params: (optional) other filter params.
 
         :return: jobs count.
@@ -222,9 +222,9 @@ def list(self, count=None, start=None, spider=None, state=None,
         :param lacks_tag: (optional) filter results by missing tag(s), a string
             or a list of strings.
         :param startts: (optional) UNIX timestamp at which to begin results,
-            in millisecons.
+            in milliseconds.
         :param endts: (optional) UNIX timestamp at which to end results,
-            in millisecons.
+            in milliseconds.
         :param meta: (optional) request for additional fields, a single
             field name or a list of field names to return.
         :param \*\*params: (optional) other filter params.

diff --git a/scrapinghub/client/utils.py b/scrapinghub/client/utils.py
@@ -89,7 +89,7 @@ def update_kwargs(kwargs, **params):
 
 
 def parse_auth(auth):
-    """Parse authentification token.
+    """Parse authentication token.
 
     >>> os.environ['SH_APIKEY'] = 'apikey'
     >>> parse_auth(None)
@@ -106,7 +106,7 @@ def parse_auth(auth):
     if auth is None:
         apikey = os.environ.get('SH_APIKEY')
         if apikey:
-            return (apikey, '')
+            return apikey, ''
 
         jobauth = os.environ.get('SHUB_JOBAUTH')
         if jobauth:
@@ -131,7 +131,7 @@ def parse_auth(auth):
         return jwt_auth
 
     login, _, password = auth.partition(':')
-    return (login, password)
+    return login, password
 
 
 def _search_for_jwt_credentials(auth):
@@ -144,6 +144,6 @@ def _search_for_jwt_credentials(auth):
             decoded_auth = decoded_auth.decode('ascii')
         login, _, password = decoded_auth.partition(':')
         if password and parse_job_key(login):
-            return (login, password)
+            return login, password
     except (UnicodeDecodeError, ValueError):
         pass
diff --git a/scrapinghub/hubstorage/batchuploader.py b/scrapinghub/hubstorage/batchuploader.py
@@ -196,6 +196,7 @@ def _upload(self, batch):
             headers=headers,
         )
 
+
 class ValueTooLarge(ValueError):
     """Raised when a serialized item is greater than 1MB"""
 

diff --git a/scrapinghub/hubstorage/collectionsrt.py b/scrapinghub/hubstorage/collectionsrt.py
@@ -66,21 +66,22 @@ def truncate(self, _name):
         return self.apipost('delete', params={'name': _name}, is_idempotent=True)
 
     def iter_json(self, _type, _name, requests_params=None, **apiparams):
-        return DownloadableResource.iter_json(self, (_type, _name),
-            requests_params=requests_params, **apiparams)
+        return DownloadableResource.iter_json(
+            self, (_type, _name), requests_params=requests_params, **apiparams
+        )
 
     def iter_msgpack(self, _type, _name, requests_params=None, **apiparams):
-        return DownloadableResource.iter_msgpack(self, (_type, _name),
-            requests_params=requests_params, **apiparams)
+        return DownloadableResource.iter_msgpack(
+            self, (_type, _name), requests_params=requests_params, **apiparams
+        )
 
     def create_writer(self, coltype, colname, **writer_kwargs):
         self._validate_collection(coltype, colname)
         kwargs = dict(writer_kwargs)
         kwargs.setdefault('content_encoding', 'gzip')
         kwargs.setdefault('auth', self.auth)
         url = urlpathjoin(self.url, coltype, colname)
-        return self.client.batchuploader.create_writer(url,
-            **kwargs)
+        return self.client.batchuploader.create_writer(url, **kwargs)
 
     def new_collection(self, coltype, colname):
         self._validate_collection(coltype, colname)
@@ -109,15 +110,14 @@ def _validate_collection(self, coltype, colname):
             raise ValueError('Invalid collection name {!r}, only alphanumeric '
                              'characters'.format(colname))
 
-
     def _batch(self, method, path, total_param, progress=None, **params):
         total = 0
         getparams = dict(params)
         try:
             while True:
                 r = next(self.apirequest(
                     path, method=method, params=getparams,
-                    is_idempotent=method=='GET',
+                    is_idempotent=method == 'GET',
                 ))
                 total += r[total_param]
                 next_start = r.get('nextstart')
@@ -147,8 +147,7 @@ def create_writer(self, **kwargs):
         kwargs are passed to batchuploader.create_writer, but auth and gzip
         content encoding are specified if not provided
         """
-        return self._collections.create_writer(self.coltype, self.colname,
-            **kwargs)
+        return self._collections.create_writer(self.coltype, self.colname, **kwargs)
 
     def get(self, *args, **kwargs):
         return self._collections.get(self.coltype, self.colname, *args, **kwargs)
@@ -166,9 +165,11 @@ def count(self, *args, **kwargs):
         return self._collections.count(self.coltype, self.colname, *args, **kwargs)
 
     def iter_json(self, requests_params=None, **apiparams):
-        return self._collections.iter_json(self.coltype, self.colname,
-            requests_params=requests_params, **apiparams)
+        return self._collections.iter_json(
+            self.coltype, self.colname, requests_params=requests_params, **apiparams
+        )
 
     def iter_values(self, requests_params=None, **apiparams):
-        return self._collections.iter_values(self.coltype, self.colname,
-            requests_params=requests_params, **apiparams)
+        return self._collections.iter_values(
+            self.coltype, self.colname, requests_params=requests_params, **apiparams
+        )
diff --git a/scrapinghub/hubstorage/job.py b/scrapinghub/hubstorage/job.py
@@ -1,6 +1,6 @@
 import logging
 from .resourcetype import (ItemsResourceType, DownloadableResource,
-    MappingResourceType)
+                           MappingResourceType)
 from .utils import millitime, urlpathjoin
 from .jobq import JobQ
 

diff --git a/scrapinghub/hubstorage/project.py b/scrapinghub/hubstorage/project.py
@@ -15,7 +15,7 @@ def __init__(self, client, projectid, auth=None):
         self.client = client
         self.projectid = urlpathjoin(projectid)
         assert len(self.projectid.split('/')) == 1, \
-                'projectkey must be just one id: %s' % projectid
+            'projectkey must be just one id: %s' % projectid
         self.auth = xauth(auth) or client.auth
         self.jobs = Jobs(client, self.projectid, auth=auth)
         self.items = Items(client, self.projectid, auth=auth)
@@ -68,6 +68,7 @@ class Jobs(ResourceType):
     def list(self, _key=None, **params):
         return self.apiget(_key, params=params)
 
+
 class Items(ResourceType):
 
     resource_type = 'items'

diff --git a/scrapinghub/hubstorage/utils.py b/scrapinghub/hubstorage/utils.py
@@ -45,7 +45,7 @@ def urlpathjoin(*parts):
 
 
 def xauth(auth):
-    """Expand authentification token
+    """Expand authentication token
 
     >>> xauth(None)
     >>> xauth(('user', 'pass'))

diff --git a/scrapinghub/legacy.py b/scrapinghub/legacy.py
@@ -61,7 +61,7 @@ def __init__(self, apikey=None, password='', _old_passwd='',
                 raise RuntimeError("No API key provided and SH_APIKEY environment variable not set")
 
         assert not apikey.startswith('http://'), \
-                "Instantiating scrapinghub.Connection with url as first argument is not supported"
+            "Instantiating scrapinghub.Connection with url as first argument is not supported"
         if password:
             warnings.warn("A lot of endpoints support authentication only via apikey.")
         self.apikey = apikey
@@ -77,7 +77,7 @@ def __repr__(self):
     def auth(self):
         warnings.warn("'auth' connection attribute is deprecated, "
                       "use 'apikey' attribute instead", stacklevel=2)
-        return (self.apikey, self.password)
+        return self.apikey, self.password
 
     def _create_session(self):
         from requests import session
@@ -169,8 +169,8 @@ def _decode_response(self, response, format, raw):
                 raise APIError("JSON response does not contain status")
         else:  # jl
             return (json.loads(line.decode('utf-8')
-                              if isinstance(line, _BINARY_TYPE) else line)
-                        for line in response.iter_lines())
+                               if isinstance(line, _BINARY_TYPE) else line)
+                    for line in response.iter_lines())
 
     ##
     ## public methods

diff --git a/setup.py b/setup.py
@@ -21,12 +21,12 @@
     author='Scrapinghub',
     author_email='info@scrapinghub.com',
     url='http://github.com/scrapinghub/python-scrapinghub',
-    platforms = ['Any'],
+    platforms=['Any'],
     packages=['scrapinghub', 'scrapinghub.client', 'scrapinghub.hubstorage'],
     package_data={'scrapinghub': ['VERSION']},
     install_requires=['requests>=1.0', 'retrying>=1.3.3', 'six>=1.10.0'],
-    extras_require = {'msgpack': [mpack_required]},
-    classifiers = [
+    extras_require={'msgpack': [mpack_required]},
+    classifiers=[
         'Development Status :: 5 - Production/Stable',
         'License :: OSI Approved :: BSD License',
         'Operating System :: OS Independent',

diff --git a/tests/client/test_job.py b/tests/client/test_job.py
@@ -62,7 +62,7 @@ def test_cancel_jobs_validation(spider):
     assert 'keys should be a list' in str(err)
 
     with pytest.raises(ValueError) as err:
-        spider.jobs.cancel(count=[1,2])
+        spider.jobs.cancel(count=[1, 2])
 
     assert 'count should be an int' in str(err)
 

diff --git a/tests/client/utils.py b/tests/client/utils.py
@@ -39,4 +39,4 @@ def normalize_job_for_tests(job):
     existing snapshots.
     """
     normalized_key = '{}/{}'.format(TEST_PROJECT_ID, job.key.split('/', 1)[1])
-    return job._client.get_job(normalized_key)
+    return job._client.get_job(normalized_key)
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -113,4 +113,4 @@ def _get_accept_header(request):
 @pytest.fixture
 def frontier_name(request):
     """Provide a name for test-unique HS frontier."""
-    return re.sub('\W+', '-', request.node.nodeid)
+    return re.sub(r'\W+', '-', request.node.nodeid)
diff --git a/tests/hubstorage/test_activity.py b/tests/hubstorage/test_activity.py
@@ -1,5 +1,5 @@
 """
-Test Activty
+Test Activity
 """
 from six.moves import range
 

diff --git a/tests/hubstorage/test_collections.py b/tests/hubstorage/test_collections.py
@@ -72,7 +72,7 @@ def post_scan_test(hsproject, hscollection):
 
     # combining with normal filters
     result = list(hscollection.get(filter='["counter", ">", [5]]',
-                          prefix='post_scan_test1'))
+                                   prefix='post_scan_test1'))
     # 10-19
     assert len(result) == 10
 

diff --git a/tests/hubstorage/test_retry.py b/tests/hubstorage/test_retry.py
@@ -71,12 +71,12 @@ def request_callback(request):
         attempts[0] += 1
 
         if attempts[0] <= timeout_count:
-            return (http_error_status, {}, "Timeout")
+            return http_error_status, {}, "Timeout"
         else:
             resp_body = dict(body_on_success)
-            return (200, {}, json.dumps(resp_body))
+            return 200, {}, json.dumps(resp_body)
 
-    return (request_callback, attempts)
+    return request_callback, attempts
 
 
 def test_delete_on_hubstorage_api_does_not_404():
@@ -154,10 +154,10 @@ def request_callback(request):
         if attempts_count[0] <= 2:
             raise ConnectionError("Connection aborted.", BadStatusLine("''"))
         if attempts_count[0] == 3:
-            return (err_code, {}, u'')
+            return err_code, {}, u''
         else:
             resp_body = dict(job_metadata)
-            return (200, {}, json.dumps(resp_body))
+            return 200, {}, json.dumps(resp_body)
 
     mock_api(callback=request_callback)
-Original file line number
+Diff line change
@@ Expand Up / @@ -196,6 +196,7 @@ def _upload(self, batch): @@
                 headers=headers,
             )
     class ValueTooLarge(ValueError):
         """Raised when a serialized item is greater than 1MB"""
@@ Expand Down @@