Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ dist

# documentation
docs/_build

.DS_Store
pytestdebug.log
44 changes: 11 additions & 33 deletions tests/client/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import os
import zlib
import base64
import pickle

import vcr
import pytest
Expand All @@ -12,42 +9,23 @@
from scrapinghub.hubstorage.serialization import MSGPACK_AVAILABLE

from ..conftest import request_accept_header_matcher


TEST_PROJECT_ID = "2222222"
TEST_SPIDER_NAME = 'hs-test-spider'
TEST_FRONTIER_SLOT = 'site.com'
TEST_BOTGROUP = 'python-hubstorage-test'
TEST_COLLECTION_NAME = "test_collection_123"
TEST_ADMIN_AUTH = os.getenv('AUTH', 'f' * 32)
TEST_USER_AUTH = os.getenv('USER_AUTH', 'e' * 32)
TEST_DASH_ENDPOINT = os.getenv('DASH_ENDPOINT', 'http://33.33.33.51:8080/api/')
TEST_HS_ENDPOINT = os.getenv('HS_ENDPOINT',
'http://storage.vm.scrapinghub.com')
from ..conftest import VCRGzipSerializer
from ..conftest import (
TEST_SPIDER_NAME,
TEST_FRONTIER_SLOT,
TEST_COLLECTION_NAME,
TEST_ENDPOINT,
TEST_PROJECT_ID,
TEST_ADMIN_AUTH,
TEST_DASH_ENDPOINT,
)

# use some fixed timestamp to represent current time
TEST_TS = 1476803148638

# vcrpy creates the cassetes automatically under VCR_CASSETES_DIR
VCR_CASSETES_DIR = 'tests/client/cassetes'


class VCRGzipSerializer(object):
"""Custom ZIP serializer for VCR.py."""

def serialize(self, cassette_dict):
# receives a dict, must return a string
# there can be binary data inside some of the requests,
# so it's impossible to use json for serialization to string
compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2))
return base64.b64encode(compressed).decode('utf8')

def deserialize(self, cassette_string):
# receives a string, must return a dict
decoded = base64.b64decode(cassette_string.encode('utf8'))
return pickle.loads(zlib.decompress(decoded))


my_vcr = vcr.VCR(cassette_library_dir=VCR_CASSETES_DIR, record_mode='once')
my_vcr.register_serializer('gz', VCRGzipSerializer())
my_vcr.register_matcher('accept_header', request_accept_header_matcher)
Expand Down Expand Up @@ -79,7 +57,7 @@ def is_using_real_services(request):
@pytest.fixture(scope='session')
def client():
return ScrapinghubClient(auth=TEST_ADMIN_AUTH,
endpoint=TEST_HS_ENDPOINT,
endpoint=TEST_ENDPOINT,
dash_endpoint=TEST_DASH_ENDPOINT)


Expand Down
2 changes: 1 addition & 1 deletion tests/client/test_activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from .conftest import TEST_PROJECT_ID
from ..conftest import TEST_PROJECT_ID


def _add_test_activity(project):
Expand Down
2 changes: 1 addition & 1 deletion tests/client/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from scrapinghub.client.jobs import Job
from scrapinghub.client.projects import Projects, Project

from .conftest import TEST_PROJECT_ID
from ..conftest import TEST_PROJECT_ID


# ScrapinghubClient class tests
Expand Down
2 changes: 1 addition & 1 deletion tests/client/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from scrapinghub.client.exceptions import NotFound
from scrapinghub.client.exceptions import ValueTooLarge

from .conftest import TEST_COLLECTION_NAME
from ..conftest import TEST_COLLECTION_NAME


def _mkitem():
Expand Down
2 changes: 1 addition & 1 deletion tests/client/test_frontiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from six import string_types

from scrapinghub.client.frontiers import Frontiers, Frontier, FrontierSlot
from .conftest import TEST_FRONTIER_SLOT
from ..conftest import TEST_FRONTIER_SLOT


def _add_test_requests_to_frontier(frontier):
Expand Down
4 changes: 2 additions & 2 deletions tests/client/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from scrapinghub.client.requests import Requests
from scrapinghub.client.samples import Samples

from .conftest import TEST_PROJECT_ID
from .conftest import TEST_SPIDER_NAME
from ..conftest import TEST_PROJECT_ID
from ..conftest import TEST_SPIDER_NAME


def test_job_base(client, spider):
Expand Down
4 changes: 2 additions & 2 deletions tests/client/test_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

from scrapinghub.hubstorage.utils import apipoll

from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from .conftest import TEST_USER_AUTH, TEST_DASH_ENDPOINT
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from ..conftest import TEST_USER_AUTH, TEST_DASH_ENDPOINT
from .utils import validate_default_meta


Expand Down
2 changes: 1 addition & 1 deletion tests/client/test_spiders.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from scrapinghub.client.spiders import Spider
from scrapinghub.client.utils import JobKey

from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from .utils import validate_default_meta


Expand Down
4 changes: 2 additions & 2 deletions tests/client/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from .conftest import TEST_DASH_ENDPOINT
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from ..conftest import TEST_DASH_ENDPOINT


def validate_default_meta(meta, state='pending', units=1,
Expand Down
84 changes: 84 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,92 @@
# -*- coding: utf-8 -*-
import base64
import os
import pickle
import pytest
import re
import zlib

from scrapinghub.hubstorage.serialization import MSGPACK_AVAILABLE
from scrapinghub import HubstorageClient
from scrapinghub.legacy import Connection


DEFAULT_PROJECT_ID = "2222222"
DEFAULT_ENDPOINT = 'http://storage.vm.scrapinghub.com'
DEFAULT_DASH_ENDPOINT = 'http://33.33.33.51:8080/api/'
DEFAULT_ADMIN_AUTH = 'f' * 32
DEFAULT_USER_AUTH = 'e' * 32


TEST_PROJECT_ID = os.getenv('HS_PROJECT_ID', DEFAULT_PROJECT_ID)
TEST_SPIDER_NAME = 'hs-test-spider'
TEST_FRONTIER_SLOT = 'site.com'
TEST_BOTGROUP = 'python-hubstorage-test'
TEST_COLLECTION_NAME = "test_collection_123"
TEST_AUTH = os.getenv('HS_AUTH', DEFAULT_ADMIN_AUTH)
TEST_ENDPOINT = os.getenv('HS_ENDPOINT', DEFAULT_ENDPOINT)
TEST_COLLECTION_NAME = "test_collection_123"
TEST_ADMIN_AUTH = os.getenv('AUTH', DEFAULT_ADMIN_AUTH)
TEST_USER_AUTH = os.getenv('USER_AUTH', DEFAULT_USER_AUTH)
TEST_DASH_ENDPOINT = os.getenv('DASH_ENDPOINT', DEFAULT_DASH_ENDPOINT)


class VCRGzipSerializer(object):
"""Custom ZIP serializer for VCR.py."""

def serialize(self, cassette_dict):
# receives a dict, must return a string
# there can be binary data inside some of the requests,
# so it's impossible to use json for serialization to string
cassette_dict = normalize_cassette(cassette_dict)
compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2))
return base64.b64encode(compressed).decode('utf8')

def deserialize(self, cassette_string):
# receives a string, must return a dict
decoded = base64.b64decode(cassette_string.encode('utf8'))
return pickle.loads(zlib.decompress(decoded))


def normalize_endpoint(uri, endpoint, default_endpoint):
return uri.replace(endpoint.rstrip('/'), default_endpoint.rstrip('/'))


def normalize_cassette(cassette_dict):
"""
This function normalizes the cassette dict trying to make sure
we are always making API requests with the same variables:
- project id
- endpoint
- authentication header
"""
interactions = []
for interaction in cassette_dict['interactions']:
uri = interaction['request']['uri']
uri = uri.replace(TEST_PROJECT_ID, DEFAULT_PROJECT_ID)

hs_endpoint = TEST_ENDPOINT or HubstorageClient.DEFAULT_ENDPOINT
uri = normalize_endpoint(uri, hs_endpoint, DEFAULT_ENDPOINT)

dash_endpoint = TEST_DASH_ENDPOINT or Connection.DEFAULT_ENDPOINT
uri = normalize_endpoint(uri, dash_endpoint, DEFAULT_DASH_ENDPOINT)

interaction['request']['uri'] = uri

if 'Authorization' in interaction['request']['headers']:
del interaction['request']['headers']['Authorization']
interaction['request']['headers']['Authorization'] = (
'Basic {}'.format(
base64.b64encode(
'{}:'.format(DEFAULT_ADMIN_AUTH).encode('utf-8')
).decode('utf-8')
)
)

interactions.append(interaction)

cassette_dict['interactions'] = interactions
return cassette_dict


def pytest_addoption(parser):
Expand Down
38 changes: 9 additions & 29 deletions tests/hubstorage/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import os
import zlib
import base64
import pickle

import vcr
import pytest
Expand All @@ -14,36 +11,19 @@
from scrapinghub.hubstorage.serialization import MSGPACK_AVAILABLE

from ..conftest import request_accept_header_matcher


TEST_PROJECT_ID = "2222222"
TEST_SPIDER_NAME = 'hs-test-spider'
TEST_FRONTIER_SLOT = 'site.com'
TEST_BOTGROUP = 'python-hubstorage-test'
TEST_COLLECTION_NAME = "test_collection_123"
TEST_AUTH = os.getenv('HS_AUTH', 'f' * 32)
TEST_ENDPOINT = os.getenv('HS_ENDPOINT', 'http://storage.vm.scrapinghub.com')
from ..conftest import VCRGzipSerializer
from ..conftest import (
TEST_PROJECT_ID,
TEST_ENDPOINT,
TEST_AUTH,
TEST_BOTGROUP,
TEST_COLLECTION_NAME,
TEST_SPIDER_NAME,
)

# vcrpy creates the cassetes automatically under VCR_CASSETES_DIR
VCR_CASSETES_DIR = 'tests/hubstorage/cassetes'


class VCRGzipSerializer(object):
"""Custom ZIP serializer for VCR.py."""

def serialize(self, cassette_dict):
# receives a dict, must return a string
# there can be binary data inside some of the requests,
# so it's impossible to use json for serialization to string
compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2))
return base64.b64encode(compressed).decode('utf8')

def deserialize(self, cassette_string):
# receives a string, must return a dict
decoded = base64.b64decode(cassette_string.encode('utf8'))
return pickle.loads(zlib.decompress(decoded))


my_vcr = vcr.VCR(cassette_library_dir=VCR_CASSETES_DIR, record_mode='once')
my_vcr.register_serializer('gz', VCRGzipSerializer())
my_vcr.register_matcher('accept_header', request_accept_header_matcher)
Expand Down
2 changes: 1 addition & 1 deletion tests/hubstorage/test_batchuploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections import defaultdict

from scrapinghub.hubstorage import ValueTooLarge
from .conftest import TEST_SPIDER_NAME, TEST_AUTH
from ..conftest import TEST_SPIDER_NAME, TEST_AUTH
from .conftest import start_job


Expand Down
4 changes: 2 additions & 2 deletions tests/hubstorage/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from scrapinghub import HubstorageClient
from scrapinghub.hubstorage.utils import apipoll

from .conftest import TEST_AUTH, TEST_ENDPOINT
from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from ..conftest import TEST_AUTH, TEST_ENDPOINT
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from .conftest import start_job


Expand Down
2 changes: 1 addition & 1 deletion tests/hubstorage/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from scrapinghub import HubstorageClient
from six.moves import range

from .conftest import TEST_COLLECTION_NAME
from ..conftest import TEST_COLLECTION_NAME
from .testutil import failing_downloader


Expand Down
2 changes: 1 addition & 1 deletion tests/hubstorage/test_frontier.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
import pytest

from .conftest import TEST_FRONTIER_SLOT
from ..conftest import TEST_FRONTIER_SLOT


@pytest.fixture(autouse=True)
Expand Down
2 changes: 1 addition & 1 deletion tests/hubstorage/test_jobq.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from scrapinghub.hubstorage.jobq import DuplicateJobError
from scrapinghub.hubstorage.utils import apipoll

from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from .conftest import hsspiderid


Expand Down
2 changes: 1 addition & 1 deletion tests/hubstorage/test_jobsmeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

System tests for operations on stored job metadata
"""
from .conftest import TEST_SPIDER_NAME
from ..conftest import TEST_SPIDER_NAME
from .conftest import start_job


Expand Down
2 changes: 1 addition & 1 deletion tests/hubstorage/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from scrapinghub import HubstorageClient

from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from .conftest import hsspiderid
from .conftest import start_job
from .conftest import set_testbotgroup, unset_testbotgroup
Expand Down
4 changes: 2 additions & 2 deletions tests/hubstorage/test_retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from scrapinghub import HubstorageClient
from six.moves.http_client import BadStatusLine

from .conftest import TEST_AUTH, TEST_ENDPOINT
from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME
from ..conftest import TEST_AUTH, TEST_ENDPOINT
from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME


GET = responses.GET
Expand Down
4 changes: 2 additions & 2 deletions tests/hubstorage/test_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from scrapinghub import HubstorageClient
from scrapinghub.hubstorage.utils import millitime

from .conftest import TEST_ENDPOINT, TEST_SPIDER_NAME
from .conftest import TEST_PROJECT_ID, TEST_AUTH
from ..conftest import TEST_ENDPOINT, TEST_SPIDER_NAME
from ..conftest import TEST_PROJECT_ID, TEST_AUTH
from .conftest import start_job


Expand Down