diff --git a/scrapinghub/hubstorage/resourcetype.py b/scrapinghub/hubstorage/resourcetype.py index 0fd029fd..53320f6d 100644 --- a/scrapinghub/hubstorage/resourcetype.py +++ b/scrapinghub/hubstorage/resourcetype.py @@ -1,8 +1,13 @@ +import time +import json +import socket +import logging +from collections import MutableMapping + import six from six.moves import range -import logging, time, json, socket -from collections import MutableMapping import requests.exceptions as rexc + from .utils import urlpathjoin, xauth from .serialization import jlencode, jldecode, mpdecode @@ -78,6 +83,22 @@ def apiget(self, _path=None, **kwargs): kwargs.setdefault('is_idempotent', True) return self.apirequest(_path, method='GET', **kwargs) + def apiget_json(self, _path, **kwargs): + """Optimized GET logic for endpoints returning a single JSON line. + + Some endpoints, like /items/stats, can return a large JSON line, and + due to chunking logic in _iter_lines() it can take double time to get + the chunks one by one, join and convert it to a single JSON line. This + method should be called for endpoints that always return a single JSON + line in the response. + """ + kwargs.update(method='GET', url=urlpathjoin(self.url, _path)) + kwargs.setdefault('auth', self.auth) + kwargs.setdefault('is_idempotent', True) + r = self.client.request(**kwargs) + r.raise_for_status() + return r.json() + def apidelete(self, _path=None, **kwargs): kwargs.setdefault('is_idempotent', True) return self.apirequest(_path, method='DELETE', **kwargs) @@ -217,7 +238,7 @@ def get(self, _key, **params): return o def stats(self): - return next(self.apiget('stats')) + return self.apiget_json('stats') class MappingResourceType(ResourceType, MutableMapping):