From 043b97644d804cdc5620e0955b430d3171e4fb4d Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Sat, 23 May 2020 16:28:32 +0200 Subject: [PATCH 1/4] ref: Refactor ASGI middleware and improve contextvars error message --- sentry_sdk/integrations/aiohttp.py | 5 +- sentry_sdk/integrations/asgi.py | 105 ++++++++++++++------- sentry_sdk/integrations/django/__init__.py | 18 ++-- sentry_sdk/integrations/django/asgi.py | 8 +- sentry_sdk/integrations/sanic.py | 3 +- sentry_sdk/integrations/tornado.py | 4 +- sentry_sdk/utils.py | 35 +++++-- 7 files changed, 120 insertions(+), 58 deletions(-) diff --git a/sentry_sdk/integrations/aiohttp.py b/sentry_sdk/integrations/aiohttp.py index c00a07d2b2..63bd827669 100644 --- a/sentry_sdk/integrations/aiohttp.py +++ b/sentry_sdk/integrations/aiohttp.py @@ -15,6 +15,7 @@ event_from_exception, transaction_from_function, HAS_REAL_CONTEXTVARS, + CONTEXTVARS_ERROR_MESSAGE, AnnotatedValue, ) @@ -60,9 +61,9 @@ def setup_once(): if not HAS_REAL_CONTEXTVARS: # We better have contextvars or we're going to leak state between # requests. - raise RuntimeError( + raise DidNotEnable( "The aiohttp integration for Sentry requires Python 3.7+ " - " or aiocontextvars package" + " or aiocontextvars package." + CONTEXTVARS_ERROR_MESSAGE ) ignore_logger("aiohttp.server") diff --git a/sentry_sdk/integrations/asgi.py b/sentry_sdk/integrations/asgi.py index 25201ccf31..acf6fcd56e 100644 --- a/sentry_sdk/integrations/asgi.py +++ b/sentry_sdk/integrations/asgi.py @@ -12,7 +12,13 @@ from sentry_sdk._types import MYPY from sentry_sdk.hub import Hub, _should_send_default_pii from sentry_sdk.integrations._wsgi_common import _filter_headers -from sentry_sdk.utils import ContextVar, event_from_exception, transaction_from_function +from sentry_sdk.utils import ( + ContextVar, + event_from_exception, + transaction_from_function, + HAS_REAL_CONTEXTVARS, + CONTEXTVARS_ERROR_MESSAGE, +) from sentry_sdk.tracing import Span if MYPY: @@ -21,11 +27,15 @@ from typing import Optional from typing import Callable + from typing_extensions import Literal + from sentry_sdk._types import Event, Hint _asgi_middleware_applied = ContextVar("sentry_asgi_middleware_applied") +TRANSACTION_SENTINEL = "generic ASGI request" + def _capture_exception(hub, exc): # type: (Hub, Any) -> None @@ -59,8 +69,19 @@ def _looks_like_asgi3(app): class SentryAsgiMiddleware: __slots__ = ("app", "__call__") - def __init__(self, app): - # type: (Any) -> None + def __init__(self, app, unsafe_context_data=False): + # type: (Any, bool) -> None + + if not unsafe_context_data and not HAS_REAL_CONTEXTVARS: + # We better have contextvars or we're going to leak state between + # requests. + raise RuntimeError( + "The ASGI middleware for Sentry requires Python 3.7+ " + "or the aiocontextvars package." + + CONTEXTVARS_ERROR_MESSAGE + + "\nIf you know what you are doing you can disable this warning " + "with `SentryAsgiMiddleware(..., unsafe_context_data=True)`" + ) self.app = app if _looks_like_asgi3(app): @@ -95,15 +116,17 @@ async def _run_app(self, scope, callback): processor = partial(self.event_processor, asgi_scope=scope) sentry_scope.add_event_processor(processor) - if scope["type"] in ("http", "websocket"): + ty = scope["type"] + + if ty in ("http", "websocket"): span = Span.continue_from_headers(dict(scope["headers"])) - span.op = "{}.server".format(scope["type"]) + span.op = "{}.server".format(ty) else: span = Span() span.op = "asgi.server" - span.set_tag("asgi.type", scope["type"]) - span.transaction = "generic ASGI request" + span.set_tag("asgi.type", ty) + span.transaction = TRANSACTION_SENTINEL with hub.start_span(span) as span: # XXX: Would be cool to have correct span status, but we @@ -121,38 +144,52 @@ def event_processor(self, event, hint, asgi_scope): # type: (Event, Hint, Any) -> Optional[Event] request_info = event.get("request", {}) - if asgi_scope["type"] in ("http", "websocket"): - request_info["url"] = self.get_url(asgi_scope) - request_info["method"] = asgi_scope["method"] - request_info["headers"] = _filter_headers(self.get_headers(asgi_scope)) - request_info["query_string"] = self.get_query(asgi_scope) + ty = asgi_scope["type"] + if ty in ("http", "websocket"): + request_info["method"] = asgi_scope.get("method") + request_info["headers"] = headers = _filter_headers( + self._get_headers(asgi_scope) + ) + request_info["query_string"] = self._get_query(asgi_scope) - if asgi_scope.get("client") and _should_send_default_pii(): - request_info["env"] = {"REMOTE_ADDR": asgi_scope["client"][0]} + request_info["url"] = self._get_url( + asgi_scope, "http" if ty == "http" else "ws", headers.get("host") + ) - if asgi_scope.get("endpoint"): + client = asgi_scope.get("client") + if client and _should_send_default_pii(): + request_info["env"] = {"REMOTE_ADDR": client[0]} + + if event.get("transaction", TRANSACTION_SENTINEL) == TRANSACTION_SENTINEL: + endpoint = asgi_scope.get("endpoint") # Webframeworks like Starlette mutate the ASGI env once routing is # done, which is sometime after the request has started. If we have - # an endpoint, overwrite our path-based transaction name. - event["transaction"] = self.get_transaction(asgi_scope) + # an endpoint, overwrite our generic transaction name. + if endpoint: + event["transaction"] = transaction_from_function(endpoint) event["request"] = request_info return event - def get_url(self, scope): - # type: (Any) -> str + # Helper functions for extracting request data. + # + # Note: Those functions are not public API. If you want to mutate request + # data to your liking it's recommended to use the `before_send` callback + # for that. + + def _get_url(self, scope, default_scheme, host): + # type: (Dict[str, Any], Literal["ws", "http"], Optional[str]) -> str """ Extract URL from the ASGI scope, without also including the querystring. """ - scheme = scope.get("scheme", "http") + scheme = scope.get("scheme", default_scheme) + server = scope.get("server", None) - path = scope.get("root_path", "") + scope["path"] + path = scope.get("root_path", "") + scope.get("path", "") - for key, value in scope["headers"]: - if key == b"host": - host_header = value.decode("latin-1") - return "%s://%s%s" % (scheme, host_header, path) + if host: + return "%s://%s%s" % (scheme, host, path) if server is not None: host, port = server @@ -162,15 +199,18 @@ def get_url(self, scope): return "%s://%s%s" % (scheme, host, path) return path - def get_query(self, scope): + def _get_query(self, scope): # type: (Any) -> Any """ Extract querystring from the ASGI scope, in the format that the Sentry protocol expects. """ - return urllib.parse.unquote(scope["query_string"].decode("latin-1")) + qs = scope.get("query_string") + if not qs: + return None + return urllib.parse.unquote(qs.decode("latin-1")) - def get_headers(self, scope): - # type: (Any) -> Dict[str, Any] + def _get_headers(self, scope): + # type: (Any) -> Dict[str, str] """ Extract headers from the ASGI scope, in the format that the Sentry protocol expects. """ @@ -183,10 +223,3 @@ def get_headers(self, scope): else: headers[key] = value return headers - - def get_transaction(self, scope): - # type: (Any) -> Optional[str] - """ - Return a transaction string to identify the routed endpoint. - """ - return transaction_from_function(scope["endpoint"]) diff --git a/sentry_sdk/integrations/django/__init__.py b/sentry_sdk/integrations/django/__init__.py index 4e62fe3b74..a4869227e0 100644 --- a/sentry_sdk/integrations/django/__init__.py +++ b/sentry_sdk/integrations/django/__init__.py @@ -12,6 +12,7 @@ from sentry_sdk.tracing import record_sql_queries from sentry_sdk.utils import ( HAS_REAL_CONTEXTVARS, + CONTEXTVARS_ERROR_MESSAGE, logger, capture_internal_exceptions, event_from_exception, @@ -301,11 +302,12 @@ def _patch_channels(): # requests. # # We cannot hard-raise here because channels may not be used at all in - # the current process. + # the current process. That is the case when running traditional WSGI + # workers in gunicorn+gevent and the websocket stuff in a separate + # process. logger.warning( - "We detected that you are using Django channels 2.0. To get proper " - "instrumentation for ASGI requests, the Sentry SDK requires " - "Python 3.7+ or the aiocontextvars package from PyPI." + "We detected that you are using Django channels 2.0." + + CONTEXTVARS_ERROR_MESSAGE ) from sentry_sdk.integrations.django.asgi import patch_channels_asgi_handler_impl @@ -324,12 +326,10 @@ def _patch_django_asgi_handler(): # We better have contextvars or we're going to leak state between # requests. # - # We cannot hard-raise here because Django may not be used at all in - # the current process. + # We cannot hard-raise here because Django's ASGI stuff may not be used + # at all. logger.warning( - "We detected that you are using Django 3. To get proper " - "instrumentation for ASGI requests, the Sentry SDK requires " - "Python 3.7+ or the aiocontextvars package from PyPI." + "We detected that you are using Django 3." + CONTEXTVARS_ERROR_MESSAGE ) from sentry_sdk.integrations.django.asgi import patch_django_asgi_handler_impl diff --git a/sentry_sdk/integrations/django/asgi.py b/sentry_sdk/integrations/django/asgi.py index 96ae3e0809..b29abc209b 100644 --- a/sentry_sdk/integrations/django/asgi.py +++ b/sentry_sdk/integrations/django/asgi.py @@ -25,7 +25,9 @@ async def sentry_patched_asgi_handler(self, scope, receive, send): if Hub.current.get_integration(DjangoIntegration) is None: return await old_app(self, scope, receive, send) - middleware = SentryAsgiMiddleware(old_app.__get__(self, cls))._run_asgi3 + middleware = SentryAsgiMiddleware( + old_app.__get__(self, cls), unsafe_context_data=True + )._run_asgi3 return await middleware(scope, receive, send) cls.__call__ = sentry_patched_asgi_handler @@ -40,7 +42,9 @@ async def sentry_patched_asgi_handler(self, receive, send): if Hub.current.get_integration(DjangoIntegration) is None: return await old_app(self, receive, send) - middleware = SentryAsgiMiddleware(lambda _scope: old_app.__get__(self, cls)) + middleware = SentryAsgiMiddleware( + lambda _scope: old_app.__get__(self, cls), unsafe_context_data=True + ) return await middleware(self.scope)(receive, send) diff --git a/sentry_sdk/integrations/sanic.py b/sentry_sdk/integrations/sanic.py index e8fdca422a..eecb633a51 100644 --- a/sentry_sdk/integrations/sanic.py +++ b/sentry_sdk/integrations/sanic.py @@ -8,6 +8,7 @@ capture_internal_exceptions, event_from_exception, HAS_REAL_CONTEXTVARS, + CONTEXTVARS_ERROR_MESSAGE, ) from sentry_sdk.integrations import Integration, DidNotEnable from sentry_sdk.integrations._wsgi_common import RequestExtractor, _filter_headers @@ -55,7 +56,7 @@ def setup_once(): # requests. raise DidNotEnable( "The sanic integration for Sentry requires Python 3.7+ " - " or aiocontextvars package" + " or the aiocontextvars package." + CONTEXTVARS_ERROR_MESSAGE ) if SANIC_VERSION.startswith("0.8."): diff --git a/sentry_sdk/integrations/tornado.py b/sentry_sdk/integrations/tornado.py index d3ae065690..81fb872de9 100644 --- a/sentry_sdk/integrations/tornado.py +++ b/sentry_sdk/integrations/tornado.py @@ -4,6 +4,7 @@ from sentry_sdk.hub import Hub, _should_send_default_pii from sentry_sdk.utils import ( HAS_REAL_CONTEXTVARS, + CONTEXTVARS_ERROR_MESSAGE, event_from_exception, capture_internal_exceptions, transaction_from_function, @@ -48,7 +49,8 @@ def setup_once(): # Tornado is async. We better have contextvars or we're going to leak # state between requests. raise DidNotEnable( - "The tornado integration for Sentry requires Python 3.6+ or the aiocontextvars package" + "The tornado integration for Sentry requires Python 3.7+ or the aiocontextvars package" + + CONTEXTVARS_ERROR_MESSAGE ) ignore_logger("tornado.access") diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 502e582e00..c1dbc2238e 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -760,20 +760,22 @@ def _get_contextvars(): # backport (also a PyPI package) works with asyncio under Python 3.6 # # Import it if available. - if not PY2 and sys.version_info < (3, 7): + if sys.version_info < (3, 7): + # `aiocontextvars` is absolutely required for functional + # contextvars on Python 3.6. try: from aiocontextvars import ContextVar # noqa return True, ContextVar except ImportError: pass + else: + try: + from contextvars import ContextVar - try: - from contextvars import ContextVar - - return True, ContextVar - except ImportError: - pass + return True, ContextVar + except ImportError: + pass from threading import local @@ -798,6 +800,25 @@ def set(self, value): HAS_REAL_CONTEXTVARS, ContextVar = _get_contextvars() +CONTEXTVARS_ERROR_MESSAGE = """ + +With asyncio/ASGI applications, the Sentry SDK requires a functional +installation of `contextvars` to avoid leaking scope/context data across +requests. + +- For Python 3.7 `contextvars` is fully functional and part of stdlib. +- For Python 3.6 and below, `aiocontextvars` can be installed from PyPI. + +On top of that, gevent's and eventlet's magic may break `contextvars`: + +- When gunicorn is used with gevent (or gevent is otherwise enabled with + monkeypatches), please upgrade to at least gevent 20.5 to get asyncio support + from Sentry. + +- Eventlet is only monkeypatching the threading module and at the moment + completely incompatible with asyncio. +""" + def transaction_from_function(func): # type: (Callable[..., Any]) -> Optional[str] From ce5acee6b8db579527827538239547ff21c28bc6 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Wed, 27 May 2020 17:46:14 +0200 Subject: [PATCH 2/4] address review feedback --- sentry_sdk/integrations/asgi.py | 21 ++++++++++++++------- sentry_sdk/utils.py | 27 ++++++++++++--------------- tests/integrations/asgi/test_asgi.py | 27 +++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 22 deletions(-) diff --git a/sentry_sdk/integrations/asgi.py b/sentry_sdk/integrations/asgi.py index acf6fcd56e..202c49025a 100644 --- a/sentry_sdk/integrations/asgi.py +++ b/sentry_sdk/integrations/asgi.py @@ -34,7 +34,7 @@ _asgi_middleware_applied = ContextVar("sentry_asgi_middleware_applied") -TRANSACTION_SENTINEL = "generic ASGI request" +_DEFAULT_TRANSACTION_NAME = "generic ASGI request" def _capture_exception(hub, exc): @@ -71,16 +71,20 @@ class SentryAsgiMiddleware: def __init__(self, app, unsafe_context_data=False): # type: (Any, bool) -> None + """ + Instrument an ASGI application with Sentry. Provides HTTP/websocket + data to sent events and basic handling for exceptions bubbling up + through the middleware. + + :param unsafe_context_data: Disable errors when a proper contextvars installation could not be found. We do not recommend changing this from the default. + """ if not unsafe_context_data and not HAS_REAL_CONTEXTVARS: # We better have contextvars or we're going to leak state between # requests. raise RuntimeError( "The ASGI middleware for Sentry requires Python 3.7+ " - "or the aiocontextvars package." - + CONTEXTVARS_ERROR_MESSAGE - + "\nIf you know what you are doing you can disable this warning " - "with `SentryAsgiMiddleware(..., unsafe_context_data=True)`" + "or the aiocontextvars package." + CONTEXTVARS_ERROR_MESSAGE ) self.app = app @@ -126,7 +130,7 @@ async def _run_app(self, scope, callback): span.op = "asgi.server" span.set_tag("asgi.type", ty) - span.transaction = TRANSACTION_SENTINEL + span.transaction = _DEFAULT_TRANSACTION_NAME with hub.start_span(span) as span: # XXX: Would be cool to have correct span status, but we @@ -160,7 +164,10 @@ def event_processor(self, event, hint, asgi_scope): if client and _should_send_default_pii(): request_info["env"] = {"REMOTE_ADDR": client[0]} - if event.get("transaction", TRANSACTION_SENTINEL) == TRANSACTION_SENTINEL: + if ( + event.get("transaction", _DEFAULT_TRANSACTION_NAME) + == _DEFAULT_TRANSACTION_NAME + ): endpoint = asgi_scope.get("endpoint") # Webframeworks like Starlette mutate the ASGI env once routing is # done, which is sometime after the request has started. If we have diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index c1dbc2238e..0f0a4953b0 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -724,10 +724,15 @@ def strip_string(value, max_length=None): def _is_contextvars_broken(): # type: () -> bool + """ + Returns whether gevent/eventlet have patched the stdlib in a way where thread locals are now more "correct" than contextvars. + """ try: from gevent.monkey import is_object_patched # type: ignore if is_object_patched("threading", "local"): + # Gevent 20.5 is able to patch both thread locals and contextvars, + # in that case all is good. if is_object_patched("contextvars", "ContextVar"): return False @@ -749,11 +754,10 @@ def _is_contextvars_broken(): def _get_contextvars(): # type: () -> Tuple[bool, type] """ - Try to import contextvars and use it if it's deemed safe. We should not use - contextvars if gevent or eventlet have patched thread locals, as - contextvars are unaffected by that patch. + Figure out the "right" contextvars installation to use. Returns a + `contextvars.ContextVar`-like class with a limited API. - https://github.com/gevent/gevent/issues/1407 + See https://docs.sentry.io/platforms/python/contextvars/ for more information. """ if not _is_contextvars_broken(): # aiocontextvars is a PyPI package that ensures that the contextvars @@ -770,6 +774,7 @@ def _get_contextvars(): except ImportError: pass else: + # On Python 3.7 contextvars are functional. try: from contextvars import ContextVar @@ -777,6 +782,8 @@ def _get_contextvars(): except ImportError: pass + # Fall back to basic thread-local usage. + from threading import local class ContextVar(object): @@ -806,17 +813,7 @@ def set(self, value): installation of `contextvars` to avoid leaking scope/context data across requests. -- For Python 3.7 `contextvars` is fully functional and part of stdlib. -- For Python 3.6 and below, `aiocontextvars` can be installed from PyPI. - -On top of that, gevent's and eventlet's magic may break `contextvars`: - -- When gunicorn is used with gevent (or gevent is otherwise enabled with - monkeypatches), please upgrade to at least gevent 20.5 to get asyncio support - from Sentry. - -- Eventlet is only monkeypatching the threading module and at the moment - completely incompatible with asyncio. +Please refer to https://docs.sentry.io/platforms/python/contextvars/ for more information. """ diff --git a/tests/integrations/asgi/test_asgi.py b/tests/integrations/asgi/test_asgi.py index 9da20199ca..d66407c196 100644 --- a/tests/integrations/asgi/test_asgi.py +++ b/tests/integrations/asgi/test_asgi.py @@ -6,6 +6,7 @@ from starlette.applications import Starlette from starlette.responses import PlainTextResponse from starlette.testclient import TestClient +from starlette.websockets import WebSocket @pytest.fixture @@ -119,3 +120,29 @@ def myerror(request): frame["filename"].endswith("tests/integrations/asgi/test_asgi.py") for frame in exception["stacktrace"]["frames"] ) + + +def test_websocket(sentry_init, capture_events): + sentry_init(debug=True, send_default_pii=True) + events = capture_events() + + from starlette.testclient import TestClient + + def message(): + return "Hello, World!" + + async def app(scope, receive, send): + assert scope["type"] == "websocket" + websocket = WebSocket(scope, receive=receive, send=send) + await websocket.accept() + await websocket.send_text(message()) + await websocket.close() + + app = SentryAsgiMiddleware(app) + + client = TestClient(app) + with client.websocket_connect("/") as websocket: + data = websocket.receive_text() + assert data == "Hello, World!" + + assert not events From 13689e14630308b14e2f7c6cb9015ebfd1cb0cc2 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Fri, 29 May 2020 20:25:11 +0200 Subject: [PATCH 3/4] better test --- test-requirements.txt | 1 + tests/integrations/asgi/test_asgi.py | 45 ++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/test-requirements.txt b/test-requirements.txt index be051169ad..c771bdc50b 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -7,3 +7,4 @@ pytest-cov==2.8.1 gevent eventlet newrelic +pytest-randomly==1.2.3 diff --git a/tests/integrations/asgi/test_asgi.py b/tests/integrations/asgi/test_asgi.py index d66407c196..2561537708 100644 --- a/tests/integrations/asgi/test_asgi.py +++ b/tests/integrations/asgi/test_asgi.py @@ -1,7 +1,7 @@ import sys import pytest -from sentry_sdk import capture_message +from sentry_sdk import Hub, capture_message from sentry_sdk.integrations.asgi import SentryAsgiMiddleware from starlette.applications import Starlette from starlette.responses import PlainTextResponse @@ -122,14 +122,21 @@ def myerror(request): ) -def test_websocket(sentry_init, capture_events): +def test_websocket(sentry_init, capture_events, request): sentry_init(debug=True, send_default_pii=True) + + # Bind client to main thread because context propagation for the websocket + # client does not work. + Hub.main.bind_client(Hub.current.client) + request.addfinalizer(lambda: Hub.main.bind_client(None)) + events = capture_events() from starlette.testclient import TestClient def message(): - return "Hello, World!" + capture_message("hi") + raise ValueError("oh no") async def app(scope, receive, send): assert scope["type"] == "websocket" @@ -142,7 +149,33 @@ async def app(scope, receive, send): client = TestClient(app) with client.websocket_connect("/") as websocket: - data = websocket.receive_text() - assert data == "Hello, World!" + with pytest.raises(ValueError): + websocket.receive_text() + + msg_event, error_event = events - assert not events + assert msg_event["message"] == "hi" + + (exc,) = error_event["exception"]["values"] + assert exc["type"] == "ValueError" + assert exc["value"] == "oh no" + + assert ( + msg_event["request"] + == error_event["request"] + == { + "env": {"REMOTE_ADDR": "testclient"}, + "headers": { + "accept": "*/*", + "accept-encoding": "gzip, deflate", + "connection": "upgrade", + "host": "testserver", + "sec-websocket-key": "testserver==", + "sec-websocket-version": "13", + "user-agent": "testclient", + }, + "method": None, + "query_string": None, + "url": "ws://testserver/", + } + ) From c98968f101a6e97abf01bfa4d58dbe8ed1f6a33f Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Sat, 30 May 2020 00:13:13 +0200 Subject: [PATCH 4/4] do not add randomization plugin for now --- test-requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/test-requirements.txt b/test-requirements.txt index c771bdc50b..be051169ad 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -7,4 +7,3 @@ pytest-cov==2.8.1 gevent eventlet newrelic -pytest-randomly==1.2.3