From 33edce4728acccbae6dee21519dc78d07f43ae1c Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 15 Dec 2020 08:25:26 -0800 Subject: [PATCH 1/7] chore(tracing): Split tracing utils into separate module (#952) No behavior changes - just moving stuff around. # Conflicts: # sentry_sdk/tracing.py --- sentry_sdk/integrations/django/__init__.py | 2 +- sentry_sdk/integrations/sqlalchemy.py | 2 +- sentry_sdk/integrations/stdlib.py | 2 +- sentry_sdk/tracing.py | 189 ++------------------ sentry_sdk/tracing_utils.py | 194 +++++++++++++++++++++ tests/tracing/test_sampling.py | 7 +- 6 files changed, 211 insertions(+), 185 deletions(-) create mode 100644 sentry_sdk/tracing_utils.py diff --git a/sentry_sdk/integrations/django/__init__.py b/sentry_sdk/integrations/django/__init__.py index e26948e2dd..87f9c7bc61 100644 --- a/sentry_sdk/integrations/django/__init__.py +++ b/sentry_sdk/integrations/django/__init__.py @@ -9,7 +9,7 @@ from sentry_sdk.hub import Hub, _should_send_default_pii from sentry_sdk.scope import add_global_event_processor from sentry_sdk.serializer import add_global_repr_processor -from sentry_sdk.tracing import record_sql_queries +from sentry_sdk.tracing_utils import record_sql_queries from sentry_sdk.utils import ( HAS_REAL_CONTEXTVARS, CONTEXTVARS_ERROR_MESSAGE, diff --git a/sentry_sdk/integrations/sqlalchemy.py b/sentry_sdk/integrations/sqlalchemy.py index 6c8e5eb88e..4b0207f5ec 100644 --- a/sentry_sdk/integrations/sqlalchemy.py +++ b/sentry_sdk/integrations/sqlalchemy.py @@ -3,7 +3,7 @@ from sentry_sdk._types import MYPY from sentry_sdk.hub import Hub from sentry_sdk.integrations import Integration, DidNotEnable -from sentry_sdk.tracing import record_sql_queries +from sentry_sdk.tracing_utils import record_sql_queries try: from sqlalchemy.engine import Engine # type: ignore diff --git a/sentry_sdk/integrations/stdlib.py b/sentry_sdk/integrations/stdlib.py index ac2ec103c7..405b95c372 100644 --- a/sentry_sdk/integrations/stdlib.py +++ b/sentry_sdk/integrations/stdlib.py @@ -6,7 +6,7 @@ from sentry_sdk.hub import Hub from sentry_sdk.integrations import Integration from sentry_sdk.scope import add_global_event_processor -from sentry_sdk.tracing import EnvironHeaders +from sentry_sdk.tracing_utils import EnvironHeaders from sentry_sdk.utils import capture_internal_exceptions, safe_repr from sentry_sdk._types import MYPY diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 4ce25f27c2..ae7b75bef2 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -1,27 +1,21 @@ -import re import uuid -import contextlib -import math import random import time from datetime import datetime, timedelta -from numbers import Real import sentry_sdk -from sentry_sdk.utils import ( - capture_internal_exceptions, - logger, - to_string, +from sentry_sdk.utils import logger +from sentry_sdk.tracing_utils import ( + SENTRY_TRACE_REGEX, + EnvironHeaders, + has_tracing_enabled, + is_valid_sample_rate, + maybe_create_breadcrumbs_from_span, ) -from sentry_sdk._compat import PY2 from sentry_sdk._types import MYPY -if PY2: - from collections import Mapping -else: - from collections.abc import Mapping if MYPY: import typing @@ -35,45 +29,6 @@ from sentry_sdk._types import SamplingContext -_traceparent_header_format_re = re.compile( - "^[ \t]*" # whitespace - "([0-9a-f]{32})?" # trace_id - "-?([0-9a-f]{16})?" # span_id - "-?([01])?" # sampled - "[ \t]*$" # whitespace -) - - -class EnvironHeaders(Mapping): # type: ignore - def __init__( - self, - environ, # type: typing.Mapping[str, str] - prefix="HTTP_", # type: str - ): - # type: (...) -> None - self.environ = environ - self.prefix = prefix - - def __getitem__(self, key): - # type: (str) -> Optional[Any] - return self.environ[self.prefix + key.replace("-", "_").upper()] - - def __len__(self): - # type: () -> int - return sum(1 for _ in iter(self)) - - def __iter__(self): - # type: () -> Generator[str, None, None] - for k in self.environ: - if not isinstance(k, str): - continue - - k = k.replace("-", "_").upper() - if not k.startswith(self.prefix): - continue - - yield k[len(self.prefix) :] - class _SpanRecorder(object): """Limits the number of spans recorded in a transaction.""" @@ -325,7 +280,7 @@ def from_traceparent( if traceparent.startswith("00-") and traceparent.endswith("-00"): traceparent = traceparent[3:-3] - match = _traceparent_header_format_re.match(str(traceparent)) + match = SENTRY_TRACE_REGEX.match(str(traceparent)) if match is None: return None @@ -422,7 +377,7 @@ def finish(self, hub=None): except AttributeError: self.timestamp = datetime.utcnow() - _maybe_create_breadcrumbs_from_span(hub, self) + maybe_create_breadcrumbs_from_span(hub, self) return None def to_json(self): @@ -617,7 +572,7 @@ def _set_initial_sampling_decision(self, sampling_context): # Since this is coming from the user (or from a function provided by the # user), who knows what we might get. (The only valid values are # booleans or numbers between 0 and 1.) - if not _is_valid_sample_rate(sample_rate): + if not is_valid_sample_rate(sample_rate): logger.warning( "[Tracing] Discarding {transaction_description} because of invalid sample rate.".format( transaction_description=transaction_description, @@ -660,127 +615,3 @@ def _set_initial_sampling_decision(self, sampling_context): sample_rate=float(sample_rate), ) ) - - -def has_tracing_enabled(options): - # type: (Dict[str, Any]) -> bool - """ - Returns True if either traces_sample_rate or traces_sampler is - defined, False otherwise. - """ - - return bool( - options.get("traces_sample_rate") is not None - or options.get("traces_sampler") is not None - ) - - -def _is_valid_sample_rate(rate): - # type: (Any) -> bool - """ - Checks the given sample rate to make sure it is valid type and value (a - boolean or a number between 0 and 1, inclusive). - """ - - # both booleans and NaN are instances of Real, so a) checking for Real - # checks for the possibility of a boolean also, and b) we have to check - # separately for NaN - if not isinstance(rate, Real) or math.isnan(rate): - logger.warning( - "[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format( - rate=rate, type=type(rate) - ) - ) - return False - - # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False - rate = float(rate) - if rate < 0 or rate > 1: - logger.warning( - "[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format( - rate=rate - ) - ) - return False - - return True - - -def _format_sql(cursor, sql): - # type: (Any, str) -> Optional[str] - - real_sql = None - - # If we're using psycopg2, it could be that we're - # looking at a query that uses Composed objects. Use psycopg2's mogrify - # function to format the query. We lose per-parameter trimming but gain - # accuracy in formatting. - try: - if hasattr(cursor, "mogrify"): - real_sql = cursor.mogrify(sql) - if isinstance(real_sql, bytes): - real_sql = real_sql.decode(cursor.connection.encoding) - except Exception: - real_sql = None - - return real_sql or to_string(sql) - - -@contextlib.contextmanager -def record_sql_queries( - hub, # type: sentry_sdk.Hub - cursor, # type: Any - query, # type: Any - params_list, # type: Any - paramstyle, # type: Optional[str] - executemany, # type: bool -): - # type: (...) -> Generator[Span, None, None] - - # TODO: Bring back capturing of params by default - if hub.client and hub.client.options["_experiments"].get( - "record_sql_params", False - ): - if not params_list or params_list == [None]: - params_list = None - - if paramstyle == "pyformat": - paramstyle = "format" - else: - params_list = None - paramstyle = None - - query = _format_sql(cursor, query) - - data = {} - if params_list is not None: - data["db.params"] = params_list - if paramstyle is not None: - data["db.paramstyle"] = paramstyle - if executemany: - data["db.executemany"] = True - - with capture_internal_exceptions(): - hub.add_breadcrumb(message=query, category="query", data=data) - - with hub.start_span(op="db", description=query) as span: - for k, v in data.items(): - span.set_data(k, v) - yield span - - -def _maybe_create_breadcrumbs_from_span(hub, span): - # type: (sentry_sdk.Hub, Span) -> None - if span.op == "redis": - hub.add_breadcrumb( - message=span.description, type="redis", category="redis", data=span._tags - ) - elif span.op == "http": - hub.add_breadcrumb(type="http", category="httplib", data=span._data) - elif span.op == "subprocess": - hub.add_breadcrumb( - type="subprocess", - category="subprocess", - message=span.description, - data=span._data, - ) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py new file mode 100644 index 0000000000..62489105c1 --- /dev/null +++ b/sentry_sdk/tracing_utils.py @@ -0,0 +1,194 @@ +import re +import contextlib +import math + +from numbers import Real + +import sentry_sdk + +from sentry_sdk.utils import ( + capture_internal_exceptions, + logger, + to_string, +) +from sentry_sdk._compat import PY2 +from sentry_sdk._types import MYPY + +if PY2: + from collections import Mapping +else: + from collections.abc import Mapping + +if MYPY: + import typing + + from typing import Generator + from typing import Optional + from typing import Any + from typing import Dict + + from sentry_sdk.tracing import Span + + +SENTRY_TRACE_REGEX = re.compile( + "^[ \t]*" # whitespace + "([0-9a-f]{32})?" # trace_id + "-?([0-9a-f]{16})?" # span_id + "-?([01])?" # sampled + "[ \t]*$" # whitespace +) + + +class EnvironHeaders(Mapping): # type: ignore + def __init__( + self, + environ, # type: typing.Mapping[str, str] + prefix="HTTP_", # type: str + ): + # type: (...) -> None + self.environ = environ + self.prefix = prefix + + def __getitem__(self, key): + # type: (str) -> Optional[Any] + return self.environ[self.prefix + key.replace("-", "_").upper()] + + def __len__(self): + # type: () -> int + return sum(1 for _ in iter(self)) + + def __iter__(self): + # type: () -> Generator[str, None, None] + for k in self.environ: + if not isinstance(k, str): + continue + + k = k.replace("-", "_").upper() + if not k.startswith(self.prefix): + continue + + yield k[len(self.prefix) :] + + +def has_tracing_enabled(options): + # type: (Dict[str, Any]) -> bool + """ + Returns True if either traces_sample_rate or traces_sampler is + non-zero/defined, False otherwise. + """ + + return bool( + options.get("traces_sample_rate") is not None + or options.get("traces_sampler") is not None + ) + + +def is_valid_sample_rate(rate): + # type: (Any) -> bool + """ + Checks the given sample rate to make sure it is valid type and value (a + boolean or a number between 0 and 1, inclusive). + """ + + # both booleans and NaN are instances of Real, so a) checking for Real + # checks for the possibility of a boolean also, and b) we have to check + # separately for NaN + if not isinstance(rate, Real) or math.isnan(rate): + logger.warning( + "[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format( + rate=rate, type=type(rate) + ) + ) + return False + + # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False + rate = float(rate) + if rate < 0 or rate > 1: + logger.warning( + "[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format( + rate=rate + ) + ) + return False + + return True + + +@contextlib.contextmanager +def record_sql_queries( + hub, # type: sentry_sdk.Hub + cursor, # type: Any + query, # type: Any + params_list, # type: Any + paramstyle, # type: Optional[str] + executemany, # type: bool +): + # type: (...) -> Generator[Span, None, None] + + # TODO: Bring back capturing of params by default + if hub.client and hub.client.options["_experiments"].get( + "record_sql_params", False + ): + if not params_list or params_list == [None]: + params_list = None + + if paramstyle == "pyformat": + paramstyle = "format" + else: + params_list = None + paramstyle = None + + query = _format_sql(cursor, query) + + data = {} + if params_list is not None: + data["db.params"] = params_list + if paramstyle is not None: + data["db.paramstyle"] = paramstyle + if executemany: + data["db.executemany"] = True + + with capture_internal_exceptions(): + hub.add_breadcrumb(message=query, category="query", data=data) + + with hub.start_span(op="db", description=query) as span: + for k, v in data.items(): + span.set_data(k, v) + yield span + + +def maybe_create_breadcrumbs_from_span(hub, span): + # type: (sentry_sdk.Hub, Span) -> None + if span.op == "redis": + hub.add_breadcrumb( + message=span.description, type="redis", category="redis", data=span._tags + ) + elif span.op == "http": + hub.add_breadcrumb(type="http", category="httplib", data=span._data) + elif span.op == "subprocess": + hub.add_breadcrumb( + type="subprocess", + category="subprocess", + message=span.description, + data=span._data, + ) + + +def _format_sql(cursor, sql): + # type: (Any, str) -> Optional[str] + + real_sql = None + + # If we're using psycopg2, it could be that we're + # looking at a query that uses Composed objects. Use psycopg2's mogrify + # function to format the query. We lose per-parameter trimming but gain + # accuracy in formatting. + try: + if hasattr(cursor, "mogrify"): + real_sql = cursor.mogrify(sql) + if isinstance(real_sql, bytes): + real_sql = real_sql.decode(cursor.connection.encoding) + except Exception: + real_sql = None + + return real_sql or to_string(sql) diff --git a/tests/tracing/test_sampling.py b/tests/tracing/test_sampling.py index 672110ada2..758b4be2da 100644 --- a/tests/tracing/test_sampling.py +++ b/tests/tracing/test_sampling.py @@ -3,7 +3,8 @@ import pytest from sentry_sdk import Hub, start_span, start_transaction -from sentry_sdk.tracing import Transaction, _is_valid_sample_rate +from sentry_sdk.tracing import Transaction +from sentry_sdk.tracing_utils import is_valid_sample_rate from sentry_sdk.utils import logger try: @@ -56,7 +57,7 @@ def test_no_double_sampling(sentry_init, capture_events): ) def test_accepts_valid_sample_rate(rate): with mock.patch.object(logger, "warning", mock.Mock()): - result = _is_valid_sample_rate(rate) + result = is_valid_sample_rate(rate) assert logger.warning.called is False assert result is True @@ -77,7 +78,7 @@ def test_accepts_valid_sample_rate(rate): ) def test_warns_on_invalid_sample_rate(rate, StringContaining): # noqa: N803 with mock.patch.object(logger, "warning", mock.Mock()): - result = _is_valid_sample_rate(rate) + result = is_valid_sample_rate(rate) logger.warning.assert_any_call(StringContaining("Given sample rate is invalid")) assert result is False From c994b09c3c3156e701a72192845e3cc39ad0eb8a Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Thu, 25 Feb 2021 20:27:59 -0800 Subject: [PATCH 2/7] feat(tracing): Handle `tracestate` HTTP headers/correlation context envelope headers (#971) --- sentry_sdk/client.py | 29 ++- sentry_sdk/hub.py | 3 +- sentry_sdk/tracing.py | 126 ++++++++----- sentry_sdk/tracing_utils.py | 180 +++++++++++++++++++ sentry_sdk/utils.py | 42 +++++ tests/test_envelope.py | 73 ++++++-- tests/tracing/test_http_headers.py | 230 ++++++++++++++++++++++++ tests/tracing/test_integration_tests.py | 50 +++--- tests/tracing/test_misc.py | 33 +++- tests/tracing/test_sampling.py | 4 +- tests/utils/test_general.py | 57 +++++- 11 files changed, 722 insertions(+), 105 deletions(-) create mode 100644 tests/tracing/test_http_headers.py diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 7687baa76f..6da9870a5f 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -22,6 +22,7 @@ from sentry_sdk.utils import ContextVar from sentry_sdk.sessions import SessionFlusher from sentry_sdk.envelope import Envelope +from sentry_sdk.tracing_utils import reinflate_tracestate from sentry_sdk._types import MYPY @@ -329,15 +330,29 @@ def capture_event( attachments = hint.get("attachments") is_transaction = event_opt.get("type") == "transaction" + # this is outside of the `if` immediately below because even if we don't + # use the value, we want to make sure we remove it before the event is + # sent (which the `.pop()` does) + raw_tracestate = ( + event_opt.get("contexts", {}).get("trace", {}).pop("tracestate", "") + ) + + # Transactions or events with attachments should go to the /envelope/ + # endpoint. if is_transaction or attachments: - # Transactions or events with attachments should go to the - # /envelope/ endpoint. - envelope = Envelope( - headers={ - "event_id": event_opt["event_id"], - "sent_at": format_timestamp(datetime.utcnow()), - } + + headers = { + "event_id": event_opt["event_id"], + "sent_at": format_timestamp(datetime.utcnow()), + } + + tracestate_data = reinflate_tracestate( + raw_tracestate.replace("sentry=", "") ) + if tracestate_data: + headers["trace"] = tracestate_data + + envelope = Envelope(headers=headers) if is_transaction: envelope.add_transaction(event_opt) diff --git a/sentry_sdk/hub.py b/sentry_sdk/hub.py index 1976aaba34..addca57417 100644 --- a/sentry_sdk/hub.py +++ b/sentry_sdk/hub.py @@ -700,7 +700,8 @@ def iter_trace_propagation_headers(self, span=None): if not propagate_traces: return - yield "sentry-trace", span.to_traceparent() + for header in span.iter_headers(): + yield header GLOBAL_HUB = Hub() diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index ae7b75bef2..2fa06a85b4 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -8,8 +8,10 @@ from sentry_sdk.utils import logger from sentry_sdk.tracing_utils import ( - SENTRY_TRACE_REGEX, EnvironHeaders, + compute_tracestate_entry, + extract_sentrytrace_data, + extract_tracestate_data, has_tracing_enabled, is_valid_sample_rate, maybe_create_breadcrumbs_from_span, @@ -210,11 +212,12 @@ def continue_from_environ( # type: (...) -> Transaction """ Create a Transaction with the given params, then add in data pulled from - the 'sentry-trace' header in the environ (if any) before returning the - Transaction. + the 'sentry-trace' and 'tracestate' headers from the environ (if any) + before returning the Transaction. - If the 'sentry-trace' header is malformed or missing, just create and - return a Transaction instance with the given params. + This is different from `continue_from_headers` in that it assumes header + names in the form "HTTP_HEADER_NAME" - such as you would get from a wsgi + environ - rather than the form "header-name". """ if cls is Span: logger.warning( @@ -231,28 +234,30 @@ def continue_from_headers( ): # type: (...) -> Transaction """ - Create a Transaction with the given params, then add in data pulled from - the 'sentry-trace' header (if any) before returning the Transaction. - - If the 'sentry-trace' header is malformed or missing, just create and - return a Transaction instance with the given params. + Create a transaction with the given params (including any data pulled from + the 'sentry-trace' and 'tracestate' headers). """ + # TODO move this to the Transaction class if cls is Span: logger.warning( "Deprecated: use Transaction.continue_from_headers " "instead of Span.continue_from_headers." ) - transaction = Transaction.from_traceparent( - headers.get("sentry-trace"), **kwargs - ) - if transaction is None: - transaction = Transaction(**kwargs) + + kwargs.update(extract_sentrytrace_data(headers.get("sentry-trace"))) + kwargs.update(extract_tracestate_data(headers.get("tracestate"))) + + transaction = Transaction(**kwargs) transaction.same_process_as_parent = False + return transaction def iter_headers(self): # type: () -> Generator[Tuple[str, str], None, None] yield "sentry-trace", self.to_traceparent() + tracestate = self.to_tracestate() + if tracestate: + yield "tracestate", tracestate @classmethod def from_traceparent( @@ -262,46 +267,21 @@ def from_traceparent( ): # type: (...) -> Optional[Transaction] """ + DEPRECATED: Use Transaction.continue_from_headers(headers, **kwargs) + Create a Transaction with the given params, then add in data pulled from the given 'sentry-trace' header value before returning the Transaction. - If the header value is malformed or missing, just create and return a - Transaction instance with the given params. """ - if cls is Span: - logger.warning( - "Deprecated: use Transaction.from_traceparent " - "instead of Span.from_traceparent." - ) + logger.warning( + "Deprecated: Use Transaction.continue_from_headers(headers, **kwargs) " + "instead of from_traceparent(traceparent, **kwargs)" + ) if not traceparent: return None - if traceparent.startswith("00-") and traceparent.endswith("-00"): - traceparent = traceparent[3:-3] - - match = SENTRY_TRACE_REGEX.match(str(traceparent)) - if match is None: - return None - - trace_id, parent_span_id, sampled_str = match.groups() - - if trace_id is not None: - trace_id = "{:032x}".format(int(trace_id, 16)) - if parent_span_id is not None: - parent_span_id = "{:016x}".format(int(parent_span_id, 16)) - - if sampled_str: - parent_sampled = sampled_str != "0" # type: Optional[bool] - else: - parent_sampled = None - - return Transaction( - trace_id=trace_id, - parent_span_id=parent_span_id, - parent_sampled=parent_sampled, - **kwargs - ) + return cls.continue_from_headers({"sentry-trace": traceparent}, **kwargs) def to_traceparent(self): # type: () -> str @@ -312,6 +292,34 @@ def to_traceparent(self): sampled = "0" return "%s-%s-%s" % (self.trace_id, self.span_id, sampled) + def to_tracestate(self): + # type: () -> Optional[str] + """ + Generates the `tracestate` header value to attach to outgoing requests. + """ + header_value = None + + if isinstance(self, Transaction): + transaction = self # type: Optional[Transaction] + else: + transaction = self._containing_transaction + + # we should have the relevant values stored on the transaction, but if + # this is an orphan span, make a new value + if transaction: + sentry_tracestate = transaction._sentry_tracestate + third_party_tracestate = transaction._third_party_tracestate + else: + sentry_tracestate = compute_tracestate_entry(self) + third_party_tracestate = None + + header_value = sentry_tracestate + + if third_party_tracestate: + header_value = header_value + "," + third_party_tracestate + + return header_value + def set_tag(self, key, value): # type: (str, Any) -> None self._tags[key] = value @@ -418,16 +426,36 @@ def get_trace_context(self): if self.status: rv["status"] = self.status + if isinstance(self, Transaction): + transaction = self # type: Optional[Transaction] + else: + transaction = self._containing_transaction + + if transaction: + rv["tracestate"] = transaction._sentry_tracestate + return rv class Transaction(Span): - __slots__ = ("name", "parent_sampled") + __slots__ = ( + "name", + "parent_sampled", + # the sentry portion of the `tracestate` header used to transmit + # correlation context for server-side dynamic sampling, of the form + # `sentry=xxxxx`, where `xxxxx` is the base64-encoded json of the + # correlation context data, missing trailing any = + "_sentry_tracestate", + # tracestate data from other vendors, of the form `dogs=yes,cats=maybe` + "_third_party_tracestate", + ) def __init__( self, name="", # type: str parent_sampled=None, # type: Optional[bool] + sentry_tracestate=None, # type: Optional[str] + third_party_tracestate=None, # type: Optional[str] **kwargs # type: Any ): # type: (...) -> None @@ -443,6 +471,8 @@ def __init__( Span.__init__(self, **kwargs) self.name = name self.parent_sampled = parent_sampled + self._sentry_tracestate = sentry_tracestate or compute_tracestate_entry(self) + self._third_party_tracestate = third_party_tracestate def __repr__(self): # type: () -> str diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index 62489105c1..f13a81a026 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -1,5 +1,6 @@ import re import contextlib +import json import math from numbers import Real @@ -8,8 +9,11 @@ from sentry_sdk.utils import ( capture_internal_exceptions, + Dsn, logger, + to_base64, to_string, + from_base64, ) from sentry_sdk._compat import PY2 from sentry_sdk._types import MYPY @@ -26,6 +30,7 @@ from typing import Optional from typing import Any from typing import Dict + from typing import Union from sentry_sdk.tracing import Span @@ -38,6 +43,38 @@ "[ \t]*$" # whitespace ) +# This is a normal base64 regex, modified to reflect that fact that we strip the +# trailing = or == off +base64_stripped = ( + # any of the characters in the base64 "alphabet", in multiples of 4 + "([a-zA-Z0-9+/]{4})*" + # either nothing or 2 or 3 base64-alphabet characters (see + # https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding for + # why there's never only 1 extra character) + "([a-zA-Z0-9+/]{2,3})?" +) + +# comma-delimited list of entries of the form `xxx=yyy` +tracestate_entry = "[^=]+=[^=]+" +TRACESTATE_ENTRIES_REGEX = re.compile( + # one or more xxxxx=yyyy entries + "^({te})+" + # each entry except the last must be followed by a comma + "(,|$)".format(te=tracestate_entry) +) + +# this doesn't check that the value is valid, just that there's something there +# of the form `sentry=xxxx` +SENTRY_TRACESTATE_ENTRY_REGEX = re.compile( + # either sentry is the first entry or there's stuff immediately before it, + # ending in a commma (this prevents matching something like `coolsentry=xxx`) + "(?:^|.+,)" + # sentry's part, not including the potential comma + "(sentry=[^,]*)" + # either there's a comma and another vendor's entry or we end + "(?:,.+|$)" +) + class EnvironHeaders(Mapping): # type: ignore def __init__( @@ -174,6 +211,149 @@ def maybe_create_breadcrumbs_from_span(hub, span): ) +def extract_sentrytrace_data(header): + # type: (Optional[str]) -> typing.Mapping[str, Union[str, bool, None]] + """ + Given a `sentry-trace` header string, return a dictionary of data. + """ + trace_id = parent_span_id = parent_sampled = None + + if header: + if header.startswith("00-") and header.endswith("-00"): + header = header[3:-3] + + match = SENTRY_TRACE_REGEX.match(header) + + if match: + trace_id, parent_span_id, sampled_str = match.groups() + + if trace_id: + trace_id = "{:032x}".format(int(trace_id, 16)) + if parent_span_id: + parent_span_id = "{:016x}".format(int(parent_span_id, 16)) + if sampled_str: + parent_sampled = sampled_str != "0" + + return { + "trace_id": trace_id, + "parent_span_id": parent_span_id, + "parent_sampled": parent_sampled, + } + + +def extract_tracestate_data(header): + # type: (Optional[str]) -> typing.Mapping[str, Optional[str]] + """ + Extracts the sentry tracestate value and any third-party data from the given + tracestate header, returning a dictionary of data. + """ + sentry_entry = third_party_entry = None + before = after = "" + + if header: + # find sentry's entry, if any + sentry_match = SENTRY_TRACESTATE_ENTRY_REGEX.search(header) + + if sentry_match: + sentry_entry = sentry_match.group(1) + + # remove the commas after the split so we don't end up with + # `xxx=yyy,,zzz=qqq` (double commas) when we put them back together + before, after = map(lambda s: s.strip(","), header.split(sentry_entry)) + + # extract sentry's value from its entry and test to make sure it's + # valid; if it isn't, discard the entire entry so that a new one + # will be created + sentry_value = sentry_entry.replace("sentry=", "") + if not re.search("^{b64}$".format(b64=base64_stripped), sentry_value): + sentry_entry = None + else: + after = header + + # if either part is invalid or empty, remove it before gluing them together + third_party_entry = ( + ",".join(filter(TRACESTATE_ENTRIES_REGEX.search, [before, after])) or None + ) + + return { + "sentry_tracestate": sentry_entry, + "third_party_tracestate": third_party_entry, + } + + +def compute_tracestate_value(data): + # type: (typing.Mapping[str, str]) -> str + """ + Computes a new tracestate value using the given data. + """ + + tracestate_json = json.dumps(data) + + # Base64-encoded strings always come out with a length which is a multiple + # of 4. In order to achieve this, the end is padded with one or more `=` + # signs. Because the tracestate standard calls for using `=` signs between + # vendor name and value (`sentry=xxx,dogsaregreat=yyy`), to avoid confusion + # we strip the `=` + return (to_base64(tracestate_json) or "").rstrip("=") + + +def compute_tracestate_entry(span): + # type: (Span) -> str + """ + Computes a new tracestate value for the span. + """ + data = {} + + client = (span.hub or sentry_sdk.Hub.current).client + + # if there's no client and/or no DSN, we're not sending anything anywhere, + # so it's fine to not have any tracestate data + if client and client.options.get("dsn"): + options = client.options + data = { + "trace_id": span.trace_id, + "environment": options["environment"], + "release": options.get("release"), + "public_key": Dsn(options["dsn"]).public_key, + } + + return "sentry=" + compute_tracestate_value(data) + + +def reinflate_tracestate(encoded_tracestate): + # type: (str) -> typing.Optional[Mapping[str, str]] + """ + Given a sentry tracestate value in its encoded form, translate it back into + a dictionary of data. + """ + inflated_tracestate = None + + if encoded_tracestate: + # Base64-encoded strings always come out with a length which is a + # multiple of 4. In order to achieve this, the end is padded with one or + # more `=` signs. Because the tracestate standard calls for using `=` + # signs between vendor name and value (`sentry=xxx,dogsaregreat=yyy`), + # to avoid confusion we strip the `=` when the data is initially + # encoded. Python's decoding function requires they be put back. + # Fortunately, it doesn't complain if there are too many, so we just + # attach two `=` on spec (there will never be more than 2, see + # https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding). + tracestate_json = from_base64(encoded_tracestate + "==") + + try: + assert tracestate_json is not None + inflated_tracestate = json.loads(tracestate_json) + except Exception as err: + logger.warning( + ( + "Unable to attach tracestate data to envelope header: {err}" + + "\nTracestate value is {encoded_tracestate}" + ).format(err=err, encoded_tracestate=encoded_tracestate), + ) + + return inflated_tracestate + + def _format_sql(cursor, sql): # type: (Any, str) -> Optional[str] diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 43b63b41ac..8fb03e014d 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1,3 +1,4 @@ +import base64 import json import linecache import logging @@ -5,6 +6,7 @@ import sys import threading import subprocess +import re from datetime import datetime @@ -39,6 +41,7 @@ MAX_STRING_LENGTH = 512 MAX_FORMAT_PARAM_LENGTH = 128 +BASE64_ALPHABET = re.compile(r"^[a-zA-Z0-9/+=]*$") def json_dumps(data): @@ -968,3 +971,42 @@ def run(self): integer_configured_timeout ) ) + + +def to_base64(original): + # type: (str) -> Optional[str] + """ + Convert a string to base64, via UTF-8. Returns None on invalid input. + """ + base64_string = None + + try: + utf8_bytes = original.encode("UTF-8") + base64_bytes = base64.b64encode(utf8_bytes) + base64_string = base64_bytes.decode("UTF-8") + except Exception as err: + logger.warning("Unable to encode {orig} to base64:".format(orig=original), err) + + return base64_string + + +def from_base64(base64_string): + # type: (str) -> Optional[str] + """ + Convert a string from base64, via UTF-8. Returns None on invalid input. + """ + utf8_string = None + + try: + only_valid_chars = BASE64_ALPHABET.match(base64_string) + assert only_valid_chars + + base64_bytes = base64_string.encode("UTF-8") + utf8_bytes = base64.b64decode(base64_bytes) + utf8_string = utf8_bytes.decode("UTF-8") + except Exception as err: + logger.warning( + "Unable to decode {b64} from base64:".format(b64=base64_string), err + ) + + return utf8_string diff --git a/tests/test_envelope.py b/tests/test_envelope.py index e795e9d93c..877c67bace 100644 --- a/tests/test_envelope.py +++ b/tests/test_envelope.py @@ -1,36 +1,49 @@ from sentry_sdk.envelope import Envelope from sentry_sdk.session import Session +from sentry_sdk import capture_event +from sentry_sdk.tracing_utils import compute_tracestate_value +import sentry_sdk.client def generate_transaction_item(): return { - "event_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "event_id": "15210411201320122115110420122013", "type": "transaction", - "transaction": "/organizations/:orgId/performance/:eventSlug/", - "start_timestamp": 1597976392.6542819, - "timestamp": 1597976400.6189718, + "transaction": "/interactions/other-dogs/new-dog", + "start_timestamp": 1353568872.11122131, + "timestamp": 1356942672.09040815, "contexts": { "trace": { - "trace_id": "4C79F60C11214EB38604F4AE0781BFB2", - "span_id": "FA90FDEAD5F74052", - "type": "trace", + "trace_id": "12312012123120121231201212312012", + "span_id": "0415201309082013", + "parent_span_id": None, + "description": "", + "op": "greeting.sniff", + "tracestate": compute_tracestate_value( + { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + } + ), } }, "spans": [ { "description": "", - "op": "react.mount", - "parent_span_id": "8f5a2b8768cafb4e", - "span_id": "bd429c44b67a3eb4", - "start_timestamp": 1597976393.4619668, - "timestamp": 1597976393.4718769, - "trace_id": "ff62a8b040f340bda5d830223def1d81", + "op": "greeting.sniff", + "parent_span_id": None, + "span_id": "0415201309082013", + "start_timestamp": 1353568872.11122131, + "timestamp": 1356942672.09040815, + "trace_id": "12312012123120121231201212312012", } ], } -def test_basic_event(): +def test_add_and_get_basic_event(): envelope = Envelope() expected = {"message": "Hello, World!"} @@ -39,7 +52,7 @@ def test_basic_event(): assert envelope.get_event() == {"message": "Hello, World!"} -def test_transaction_event(): +def test_add_and_get_transaction_event(): envelope = Envelope() transaction_item = generate_transaction_item() @@ -55,7 +68,7 @@ def test_transaction_event(): assert envelope.get_transaction_event() == transaction_item -def test_session(): +def test_add_and_get_session(): envelope = Envelope() expected = Session() @@ -64,3 +77,31 @@ def test_session(): for item in envelope: if item.type == "session": assert item.payload.json == expected.to_json() + + +def test_envelope_headers(sentry_init, capture_envelopes, monkeypatch): + monkeypatch.setattr( + sentry_sdk.client, + "format_timestamp", + lambda x: "2012-11-21T12:31:12.415908Z", + ) + + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + ) + envelopes = capture_envelopes() + + capture_event(generate_transaction_item()) + + assert len(envelopes) == 1 + + assert envelopes[0].headers == { + "event_id": "15210411201320122115110420122013", + "sent_at": "2012-11-21T12:31:12.415908Z", + "trace": { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + }, + } diff --git a/tests/tracing/test_http_headers.py b/tests/tracing/test_http_headers.py new file mode 100644 index 0000000000..5c0cad320b --- /dev/null +++ b/tests/tracing/test_http_headers.py @@ -0,0 +1,230 @@ +import json + +import pytest + +from sentry_sdk.tracing import Transaction, Span +from sentry_sdk.tracing_utils import ( + compute_tracestate_value, + extract_sentrytrace_data, + extract_tracestate_data, + reinflate_tracestate, +) +from sentry_sdk.utils import from_base64, to_base64 + + +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 + + +def test_tracestate_computation(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + trace_id="12312012123120121231201212312012", + ) + + computed_value = transaction._sentry_tracestate.replace("sentry=", "") + # we have to decode and reinflate the data because we can guarantee that the + # order of the entries in the jsonified dict will be the same here as when + # the tracestate is computed + reinflated_trace_data = json.loads(from_base64(computed_value)) + + assert reinflated_trace_data == { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + } + + +def test_adds_new_tracestate_to_transaction_when_none_given(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + # sentry_tracestate=< value would be passed here > + ) + + assert transaction._sentry_tracestate is not None + + +@pytest.mark.parametrize("sampled", [True, False, None]) +def test_to_traceparent(sentry_init, sampled): + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + trace_id="12312012123120121231201212312012", + sampled=sampled, + ) + + traceparent = transaction.to_traceparent() + + trace_id, parent_span_id, parent_sampled = traceparent.split("-") + assert trace_id == "12312012123120121231201212312012" + assert parent_span_id == transaction.span_id + assert parent_sampled == ( + "1" if sampled is True else "0" if sampled is False else "" + ) + + +def test_to_tracestate(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + # it correctly uses the value from the transaction itself or the span's + # containing transaction + transaction_no_third_party = Transaction( + trace_id="12312012123120121231201212312012", + sentry_tracestate="sentry=doGsaREgReaT", + ) + non_orphan_span = Span() + non_orphan_span._containing_transaction = transaction_no_third_party + assert transaction_no_third_party.to_tracestate() == "sentry=doGsaREgReaT" + assert non_orphan_span.to_tracestate() == "sentry=doGsaREgReaT" + + # it combines sentry and third-party values correctly + transaction_with_third_party = Transaction( + trace_id="12312012123120121231201212312012", + sentry_tracestate="sentry=doGsaREgReaT", + third_party_tracestate="maisey=silly", + ) + assert ( + transaction_with_third_party.to_tracestate() + == "sentry=doGsaREgReaT,maisey=silly" + ) + + # it computes a tracestate from scratch for orphan transactions + orphan_span = Span( + trace_id="12312012123120121231201212312012", + ) + assert orphan_span._containing_transaction is None + assert orphan_span.to_tracestate() == "sentry=" + compute_tracestate_value( + { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + } + ) + + +@pytest.mark.parametrize("sampling_decision", [True, False]) +def test_sentrytrace_extraction(sampling_decision): + sentrytrace_header = "12312012123120121231201212312012-0415201309082013-{}".format( + 1 if sampling_decision is True else 0 + ) + assert extract_sentrytrace_data(sentrytrace_header) == { + "trace_id": "12312012123120121231201212312012", + "parent_span_id": "0415201309082013", + "parent_sampled": sampling_decision, + } + + +@pytest.mark.parametrize( + ("incoming_header", "expected_sentry_value", "expected_third_party"), + [ + # sentry only + ("sentry=doGsaREgReaT", "sentry=doGsaREgReaT", None), + # sentry only, invalid (`!` isn't a valid base64 character) + ("sentry=doGsaREgReaT!", None, None), + # stuff before + ("maisey=silly,sentry=doGsaREgReaT", "sentry=doGsaREgReaT", "maisey=silly"), + # stuff after + ("sentry=doGsaREgReaT,maisey=silly", "sentry=doGsaREgReaT", "maisey=silly"), + # stuff before and after + ( + "charlie=goofy,sentry=doGsaREgReaT,maisey=silly", + "sentry=doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple before + ( + "charlie=goofy,maisey=silly,sentry=doGsaREgReaT", + "sentry=doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple after + ( + "sentry=doGsaREgReaT,charlie=goofy,maisey=silly", + "sentry=doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple before and after + ( + "charlie=goofy,maisey=silly,sentry=doGsaREgReaT,bodhi=floppy,cory=loyal", + "sentry=doGsaREgReaT", + "charlie=goofy,maisey=silly,bodhi=floppy,cory=loyal", + ), + # only third-party data + ("maisey=silly", None, "maisey=silly"), + # invalid third-party data, valid sentry data + ("maisey_is_silly,sentry=doGsaREgReaT", "sentry=doGsaREgReaT", None), + # valid third-party data, invalid sentry data + ("maisey=silly,sentry=doGsaREgReaT!", None, "maisey=silly"), + # nothing valid at all + ("maisey_is_silly,sentry=doGsaREgReaT!", None, None), + ], +) +def test_tracestate_extraction( + incoming_header, expected_sentry_value, expected_third_party +): + assert extract_tracestate_data(incoming_header) == { + "sentry_tracestate": expected_sentry_value, + "third_party_tracestate": expected_third_party, + } + + +def test_iter_headers(sentry_init, monkeypatch): + monkeypatch.setattr( + Transaction, + "to_traceparent", + mock.Mock(return_value="12312012123120121231201212312012-0415201309082013-0"), + ) + monkeypatch.setattr( + Transaction, + "to_tracestate", + mock.Mock(return_value="sentry=doGsaREgReaT,charlie=goofy"), + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + ) + + headers = dict(transaction.iter_headers()) + assert ( + headers["sentry-trace"] == "12312012123120121231201212312012-0415201309082013-0" + ) + assert headers["tracestate"] == "sentry=doGsaREgReaT,charlie=goofy" + + +@pytest.mark.parametrize( + "data", + [ # comes out with no trailing `=` + {"name": "Maisey", "birthday": "12/31/12"}, + # comes out with one trailing `=` + {"dogs": "yes", "cats": "maybe"}, + # comes out with two trailing `=` + {"name": "Charlie", "birthday": "11/21/12"}, + ], +) +def test_tracestate_reinflation(data): + encoded_tracestate = to_base64(json.dumps(data)).strip("=") + assert reinflate_tracestate(encoded_tracestate) == data diff --git a/tests/tracing/test_integration_tests.py b/tests/tracing/test_integration_tests.py index b2ce2e3a18..f9530d31b3 100644 --- a/tests/tracing/test_integration_tests.py +++ b/tests/tracing/test_integration_tests.py @@ -47,46 +47,46 @@ def test_basic(sentry_init, capture_events, sample_rate): @pytest.mark.parametrize("sampled", [True, False, None]) -@pytest.mark.parametrize( - "sample_rate", [0.0, 1.0] -) # ensure sampling decision is actually passed along via headers +@pytest.mark.parametrize("sample_rate", [0.0, 1.0]) def test_continue_from_headers(sentry_init, capture_events, sampled, sample_rate): + """ + Ensure data is actually passed along via headers, and that they are read + correctly. + """ sentry_init(traces_sample_rate=sample_rate) events = capture_events() # make a parent transaction (normally this would be in a different service) - with start_transaction(name="hi", sampled=True if sample_rate == 0 else None): + with start_transaction( + name="hi", sampled=True if sample_rate == 0 else None + ) as parent_transaction: with start_span() as old_span: old_span.sampled = sampled headers = dict(Hub.current.iter_trace_propagation_headers(old_span)) - - # test that the sampling decision is getting encoded in the header correctly - header = headers["sentry-trace"] - if sampled is True: - assert header.endswith("-1") - if sampled is False: - assert header.endswith("-0") - if sampled is None: - assert header.endswith("-") - - # child transaction, to prove that we can read 'sentry-trace' header data - # correctly - transaction = Transaction.continue_from_headers(headers, name="WRONG") - assert transaction is not None - assert transaction.parent_sampled == sampled - assert transaction.trace_id == old_span.trace_id - assert transaction.same_process_as_parent is False - assert transaction.parent_span_id == old_span.span_id - assert transaction.span_id != old_span.span_id + tracestate = parent_transaction._sentry_tracestate + + # child transaction, to prove that we can read 'sentry-trace' and + # `tracestate` header data correctly + child_transaction = Transaction.continue_from_headers(headers, name="WRONG") + assert child_transaction is not None + assert child_transaction.parent_sampled == sampled + assert child_transaction.trace_id == old_span.trace_id + assert child_transaction.same_process_as_parent is False + assert child_transaction.parent_span_id == old_span.span_id + assert child_transaction.span_id != old_span.span_id + assert child_transaction._sentry_tracestate == tracestate # add child transaction to the scope, to show that the captured message will # be tagged with the trace id (since it happens while the transaction is # open) - with start_transaction(transaction): + with start_transaction(child_transaction): with configure_scope() as scope: + # change the transaction name from "WRONG" to make sure the change + # is reflected in the final data scope.transaction = "ho" capture_message("hello") + # in this case the child transaction won't be captured if sampled is False or (sample_rate == 0 and sampled is None): trace1, message = events @@ -100,7 +100,7 @@ def test_continue_from_headers(sentry_init, capture_events, sampled, sample_rate assert ( trace1["contexts"]["trace"]["trace_id"] == trace2["contexts"]["trace"]["trace_id"] - == transaction.trace_id + == child_transaction.trace_id == message["contexts"]["trace"]["trace_id"] ) diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index f5b8aa5e85..1ab4dfcb63 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -25,30 +25,51 @@ def test_span_trimming(sentry_init, capture_events): assert span2["op"] == "foo1" -def test_transaction_method_signature(sentry_init, capture_events): +def test_transaction_naming(sentry_init, capture_events): sentry_init(traces_sample_rate=1.0) events = capture_events() + # only transactions have names - spans don't with pytest.raises(TypeError): start_span(name="foo") assert len(events) == 0 + # default name in event if no name is passed with start_transaction() as transaction: pass - assert transaction.name == "" assert len(events) == 1 + assert events[0]["transaction"] == "" + # the name can be set once the transaction's already started with start_transaction() as transaction: transaction.name = "name-known-after-transaction-started" assert len(events) == 2 + assert events[1]["transaction"] == "name-known-after-transaction-started" + # passing in a name works, too with start_transaction(name="a"): pass assert len(events) == 3 + assert events[2]["transaction"] == "a" - with start_transaction(Transaction(name="c")): - pass - assert len(events) == 4 + +def test_start_transaction(sentry_init): + sentry_init(traces_sample_rate=1.0) + + # you can have it start a transaction for you + result1 = start_transaction( + name="/interactions/other-dogs/new-dog", op="greeting.sniff" + ) + assert isinstance(result1, Transaction) + assert result1.name == "/interactions/other-dogs/new-dog" + assert result1.op == "greeting.sniff" + + # or you can pass it an already-created transaction + preexisting_transaction = Transaction( + name="/interactions/other-dogs/new-dog", op="greeting.sniff" + ) + result2 = start_transaction(preexisting_transaction) + assert result2 is preexisting_transaction def test_finds_transaction_on_scope(sentry_init): @@ -77,7 +98,7 @@ def test_finds_transaction_on_scope(sentry_init): assert scope._span.name == "dogpark" -def test_finds_transaction_when_decedent_span_is_on_scope( +def test_finds_transaction_when_descendent_span_is_on_scope( sentry_init, ): sentry_init(traces_sample_rate=1.0) diff --git a/tests/tracing/test_sampling.py b/tests/tracing/test_sampling.py index 758b4be2da..6f09b451e1 100644 --- a/tests/tracing/test_sampling.py +++ b/tests/tracing/test_sampling.py @@ -232,7 +232,9 @@ def test_passes_parent_sampling_decision_in_sampling_context( ) ) - transaction = Transaction.from_traceparent(sentry_trace_header, name="dogpark") + transaction = Transaction.continue_from_headers( + headers={"sentry-trace": sentry_trace_header}, name="dogpark" + ) spy = mock.Mock(wraps=transaction) start_transaction(transaction=spy) diff --git a/tests/utils/test_general.py b/tests/utils/test_general.py index 370a6327ff..03be52ca17 100644 --- a/tests/utils/test_general.py +++ b/tests/utils/test_general.py @@ -13,8 +13,10 @@ filename_for_module, handle_in_app_impl, iter_event_stacktraces, + to_base64, + from_base64, ) -from sentry_sdk._compat import text_type +from sentry_sdk._compat import text_type, string_types try: @@ -168,3 +170,56 @@ def test_iter_stacktraces(): ) == {1, 2, 3} ) + + +@pytest.mark.parametrize( + ("original", "base64_encoded"), + [ + # ascii only + ("Dogs are great!", "RG9ncyBhcmUgZ3JlYXQh"), + # emoji + (u"🐶", "8J+Qtg=="), + # non-ascii + ( + u"Καλό κορίτσι, Μάιζεϊ!", + "zprOsc67z4wgzrrOv8+Bzq/PhM+DzrksIM6czqzOuc62zrXPiiE=", + ), + # mix of ascii and non-ascii + ( + u"Of margir hundar! Ég geri ráð fyrir að ég þurfi stærra rúm.", + "T2YgbWFyZ2lyIGh1bmRhciEgw4lnIGdlcmkgcsOhw7AgZnlyaXIgYcOwIMOpZyDDvnVyZmkgc3TDpnJyYSByw7ptLg==", + ), + ], +) +def test_successful_base64_conversion(original, base64_encoded): + # all unicode characters should be handled correctly + assert to_base64(original) == base64_encoded + assert from_base64(base64_encoded) == original + + # "to" and "from" should be inverses + assert from_base64(to_base64(original)) == original + assert to_base64(from_base64(base64_encoded)) == base64_encoded + + +@pytest.mark.parametrize( + "input", + [ + 1231, # incorrect type + True, # incorrect type + [], # incorrect type + {}, # incorrect type + None, # incorrect type + "yayfordogs", # wrong length + "#dog", # invalid ascii character + "🐶", # non-ascii character + ], +) +def test_failed_base64_conversion(input): + # conversion from base64 should fail if given input of the wrong type or + # input which isn't a valid base64 string + assert from_base64(input) is None + + # any string can be converted to base64, so only type errors will cause + # failures + if type(input) not in string_types: + assert to_base64(input) is None From d634c6013c93b2ef1c0c860407445f9fab22bead Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Wed, 25 Aug 2021 06:32:44 -0700 Subject: [PATCH 3/7] chore(tracing): Pre-lazy-tracestate cleanup (#1168) Mostly docstrings and comments. Also a stray, no-longer-necessary variable initialization and a few log statements. Pulled from an upcoming PR in order to make it less noisy. --- sentry_sdk/client.py | 2 +- sentry_sdk/integrations/httpx.py | 11 +++++++++++ sentry_sdk/integrations/stdlib.py | 7 ++++++- sentry_sdk/scope.py | 2 +- sentry_sdk/tracing.py | 28 ++++++++++++++++++---------- sentry_sdk/tracing_utils.py | 7 ++++++- 6 files changed, 43 insertions(+), 14 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 6da9870a5f..f4789d5306 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -332,7 +332,7 @@ def capture_event( # this is outside of the `if` immediately below because even if we don't # use the value, we want to make sure we remove it before the event is - # sent (which the `.pop()` does) + # sent raw_tracestate = ( event_opt.get("contexts", {}).get("trace", {}).pop("tracestate", "") ) diff --git a/sentry_sdk/integrations/httpx.py b/sentry_sdk/integrations/httpx.py index af67315338..3d4bbf8300 100644 --- a/sentry_sdk/integrations/httpx.py +++ b/sentry_sdk/integrations/httpx.py @@ -1,5 +1,6 @@ from sentry_sdk import Hub from sentry_sdk.integrations import Integration, DidNotEnable +from sentry_sdk.utils import logger from sentry_sdk._types import MYPY @@ -45,6 +46,11 @@ def send(self, request, **kwargs): span.set_data("method", request.method) span.set_data("url", str(request.url)) for key, value in hub.iter_trace_propagation_headers(): + logger.debug( + "[Tracing] Adding `{key}` header {value} to outgoing request to {url}.".format( + key=key, value=value, url=request.url + ) + ) request.headers[key] = value rv = real_send(self, request, **kwargs) @@ -72,6 +78,11 @@ async def send(self, request, **kwargs): span.set_data("method", request.method) span.set_data("url", str(request.url)) for key, value in hub.iter_trace_propagation_headers(): + logger.debug( + "[Tracing] Adding `{key}` header {value} to outgoing request to {url}.".format( + key=key, value=value, url=request.url + ) + ) request.headers[key] = value rv = await real_send(self, request, **kwargs) diff --git a/sentry_sdk/integrations/stdlib.py b/sentry_sdk/integrations/stdlib.py index 405b95c372..adea742b2d 100644 --- a/sentry_sdk/integrations/stdlib.py +++ b/sentry_sdk/integrations/stdlib.py @@ -7,7 +7,7 @@ from sentry_sdk.integrations import Integration from sentry_sdk.scope import add_global_event_processor from sentry_sdk.tracing_utils import EnvironHeaders -from sentry_sdk.utils import capture_internal_exceptions, safe_repr +from sentry_sdk.utils import capture_internal_exceptions, logger, safe_repr from sentry_sdk._types import MYPY @@ -86,6 +86,11 @@ def putrequest(self, method, url, *args, **kwargs): rv = real_putrequest(self, method, url, *args, **kwargs) for key, value in hub.iter_trace_propagation_headers(span): + logger.debug( + "[Tracing] Adding `{key}` header {value} to outgoing request to {real_url}.".format( + key=key, value=value, real_url=real_url + ) + ) self.putheader(key, value) self._sentrysdk_span = span diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index ccf6f4e086..f7070e8a02 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -174,7 +174,7 @@ def transaction(self, value): # anything set in the scope. # XXX: note that with the introduction of the Scope.transaction getter, # there is a semantic and type mismatch between getter and setter. The - # getter returns a transaction, the setter sets a transaction name. + # getter returns a Transaction, the setter sets a transaction name. # Without breaking version compatibility, we could make the setter set a # transaction name or transaction (self._span) depending on the type of # the value argument. diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 2fa06a85b4..7235adbb95 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -22,12 +22,12 @@ if MYPY: import typing - from typing import Generator from typing import Optional from typing import Any from typing import Dict from typing import List from typing import Tuple + from typing import Iterator from sentry_sdk._types import SamplingContext @@ -183,19 +183,19 @@ def start_child(self, **kwargs): """ kwargs.setdefault("sampled", self.sampled) - rv = Span( + child = Span( trace_id=self.trace_id, span_id=None, parent_span_id=self.span_id, **kwargs ) if isinstance(self, Transaction): - rv._containing_transaction = self + child._containing_transaction = self else: - rv._containing_transaction = self._containing_transaction + child._containing_transaction = self._containing_transaction - rv._span_recorder = recorder = self._span_recorder + child._span_recorder = recorder = self._span_recorder if recorder: - recorder.add(rv) - return rv + recorder.add(child) + return child def new_span(self, **kwargs): # type: (**Any) -> Span @@ -253,9 +253,15 @@ def continue_from_headers( return transaction def iter_headers(self): - # type: () -> Generator[Tuple[str, str], None, None] + # type: () -> Iterator[Tuple[str, str]] + """ + Creates a generator which returns the span's `sentry-trace` and + `tracestate` headers. + """ yield "sentry-trace", self.to_traceparent() + tracestate = self.to_tracestate() + # `tracestate` will only be `None` if there's no client or no DSN if tracestate: yield "tracestate", tracestate @@ -295,9 +301,11 @@ def to_traceparent(self): def to_tracestate(self): # type: () -> Optional[str] """ - Generates the `tracestate` header value to attach to outgoing requests. + Computes the `tracestate` header value using data from the containing + transaction. + + Returns None if there's no client and/or no DSN. """ - header_value = None if isinstance(self, Transaction): transaction = self # type: Optional[Transaction] diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index f13a81a026..d3c53a971e 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -285,6 +285,9 @@ def compute_tracestate_value(data): # type: (typing.Mapping[str, str]) -> str """ Computes a new tracestate value using the given data. + + Note: Returns just the base64-encoded data, NOT the full `sentry=...` + tracestate entry. """ tracestate_json = json.dumps(data) @@ -300,7 +303,9 @@ def compute_tracestate_value(data): def compute_tracestate_entry(span): # type: (Span) -> str """ - Computes a new tracestate value for the span. + Computes a new sentry tracestate for the span. Includes the `sentry=`. + + Will return `None` if there's no client and/or no DSN. """ data = {} From a6a2b6f4e038a29c079768b9b24ed7685e326a7e Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 30 Aug 2021 06:12:53 -0700 Subject: [PATCH 4/7] ref(tracing): Various tweaks to tracestate header handling (#1173) Three small changes here, mostly unrelated. If you're specifically interested in one of them, it's probably easiest to look at them commit by commit in the PR. - Correct return type of `compute_tracestate_entry` to correctly return `None` when we're missing a client or DSN. - Let transactions (effectively) point to themselves as the "containing transaction." In order to avoid a circular reference, this is handled through a new property on both spans and transactions. As a result of this change, we can eliminate a good deal of type-checking that we've been doing up until now. - Switch `tracestate` creation to be lazy. Rather than always creating a value in the transaction constructor, we now wait until we need it - either for an outgoing HTTP header or for the envelope header when sending the transaction event. --- sentry_sdk/client.py | 2 +- sentry_sdk/scope.py | 20 ++---- sentry_sdk/tracing.py | 100 +++++++++++++++++++++-------- sentry_sdk/tracing_utils.py | 8 +-- tests/tracing/test_http_headers.py | 89 ++++++++++++++++++++++++- 5 files changed, 172 insertions(+), 47 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index f4789d5306..6c9eb44939 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -346,7 +346,7 @@ def capture_event( "sent_at": format_timestamp(datetime.utcnow()), } - tracestate_data = reinflate_tracestate( + tracestate_data = raw_tracestate and reinflate_tracestate( raw_tracestate.replace("sentry=", "") ) if tracestate_data: diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index f7070e8a02..fb3bee42f1 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -150,19 +150,13 @@ def transaction(self): if self._span is None: return None - # the span on the scope is itself a transaction - if isinstance(self._span, Transaction): - return self._span - - # the span on the scope isn't a transaction but belongs to one - if self._span._containing_transaction: - return self._span._containing_transaction - - # there's a span (not a transaction) on the scope, but it was started on - # its own, not as the descendant of a transaction (this is deprecated - # behavior, but as long as the start_span function exists, it can still - # happen) - return None + # there is an orphan span on the scope + if self._span.containing_transaction is None: + return None + + # there is either a transaction (which is its own containing + # transaction) or a non-orphan span on the scope + return self._span.containing_transaction @transaction.setter def transaction(self, value): diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 7235adbb95..4a4f8b2e3f 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -73,8 +73,6 @@ class Span(object): "_span_recorder", "hub", "_context_manager_state", - # TODO: rename this "transaction" once we fully and truly deprecate the - # old "transaction" attribute (which was actually the transaction name)? "_containing_transaction", ) @@ -104,6 +102,7 @@ def __init__( hub=None, # type: Optional[sentry_sdk.Hub] status=None, # type: Optional[str] transaction=None, # type: Optional[str] # deprecated + containing_transaction=None, # type: Optional[Transaction] ): # type: (...) -> None self.trace_id = trace_id or uuid.uuid4().hex @@ -117,6 +116,7 @@ def __init__( self.hub = hub self._tags = {} # type: Dict[str, str] self._data = {} # type: Dict[str, Any] + self._containing_transaction = containing_transaction self.start_timestamp = datetime.utcnow() try: # TODO: For Python 3.7+, we could use a clock with ns resolution: @@ -131,7 +131,6 @@ def __init__( self.timestamp = None # type: Optional[datetime] self._span_recorder = None # type: Optional[_SpanRecorder] - self._containing_transaction = None # type: Optional[Transaction] def init_span_recorder(self, maxlen): # type: (int) -> None @@ -172,6 +171,15 @@ def __exit__(self, ty, value, tb): self.finish(hub) scope.span = old_span + @property + def containing_transaction(self): + # type: () -> Optional[Transaction] + + # this is a getter rather than a regular attribute so that transactions + # can return `self` here instead (as a way to prevent them circularly + # referencing themselves) + return self._containing_transaction + def start_child(self, **kwargs): # type: (**Any) -> Span """ @@ -184,14 +192,12 @@ def start_child(self, **kwargs): kwargs.setdefault("sampled", self.sampled) child = Span( - trace_id=self.trace_id, span_id=None, parent_span_id=self.span_id, **kwargs + trace_id=self.trace_id, + parent_span_id=self.span_id, + containing_transaction=self.containing_transaction, + **kwargs ) - if isinstance(self, Transaction): - child._containing_transaction = self - else: - child._containing_transaction = self._containing_transaction - child._span_recorder = recorder = self._span_recorder if recorder: recorder.add(child) @@ -257,6 +263,10 @@ def iter_headers(self): """ Creates a generator which returns the span's `sentry-trace` and `tracestate` headers. + + If the span's containing transaction doesn't yet have a + `sentry_tracestate` value, this will cause one to be generated and + stored. """ yield "sentry-trace", self.to_traceparent() @@ -304,22 +314,24 @@ def to_tracestate(self): Computes the `tracestate` header value using data from the containing transaction. + If the containing transaction doesn't yet have a `sentry_tracestate` + value, this will cause one to be generated and stored. + + If there is no containing transaction, a value will be generated but not + stored. + Returns None if there's no client and/or no DSN. """ - if isinstance(self, Transaction): - transaction = self # type: Optional[Transaction] - else: - transaction = self._containing_transaction + sentry_tracestate = self.get_or_set_sentry_tracestate() + third_party_tracestate = ( + self.containing_transaction._third_party_tracestate + if self.containing_transaction + else None + ) - # we should have the relevant values stored on the transaction, but if - # this is an orphan span, make a new value - if transaction: - sentry_tracestate = transaction._sentry_tracestate - third_party_tracestate = transaction._third_party_tracestate - else: - sentry_tracestate = compute_tracestate_entry(self) - third_party_tracestate = None + if not sentry_tracestate: + return None header_value = sentry_tracestate @@ -328,6 +340,25 @@ def to_tracestate(self): return header_value + def get_or_set_sentry_tracestate(self): + # type: (Span) -> Optional[str] + """ + Read sentry tracestate off of the span's containing transaction. + + If the transaction doesn't yet have a `_sentry_tracestate` value, + compute one and store it. + """ + transaction = self.containing_transaction + + if transaction: + if not transaction._sentry_tracestate: + transaction._sentry_tracestate = compute_tracestate_entry(self) + + return transaction._sentry_tracestate + + # orphan span - nowhere to store the value, so just return it + return compute_tracestate_entry(self) + def set_tag(self, key, value): # type: (str, Any) -> None self._tags[key] = value @@ -434,13 +465,14 @@ def get_trace_context(self): if self.status: rv["status"] = self.status - if isinstance(self, Transaction): - transaction = self # type: Optional[Transaction] - else: - transaction = self._containing_transaction + # if the transaction didn't inherit a tracestate value, and no outgoing + # requests - whose need for headers would have caused a tracestate value + # to be created - were made as part of the transaction, the transaction + # still won't have a tracestate value, so compute one now + sentry_tracestate = self.get_or_set_sentry_tracestate() - if transaction: - rv["tracestate"] = transaction._sentry_tracestate + if sentry_tracestate: + rv["tracestate"] = sentry_tracestate return rv @@ -479,7 +511,10 @@ def __init__( Span.__init__(self, **kwargs) self.name = name self.parent_sampled = parent_sampled - self._sentry_tracestate = sentry_tracestate or compute_tracestate_entry(self) + # if tracestate isn't inherited and set here, it will get set lazily, + # either the first time an outgoing request needs it for a header or the + # first time an event needs it for inclusion in the captured data + self._sentry_tracestate = sentry_tracestate self._third_party_tracestate = third_party_tracestate def __repr__(self): @@ -494,6 +529,15 @@ def __repr__(self): self.sampled, ) + @property + def containing_transaction(self): + # type: () -> Transaction + + # Transactions (as spans) belong to themselves (as transactions). This + # is a getter rather than a regular attribute to avoid having a circular + # reference. + return self + def finish(self, hub=None): # type: (Optional[sentry_sdk.Hub]) -> Optional[str] if self.timestamp is not None: diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index d3c53a971e..ec7ad7743f 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -301,7 +301,7 @@ def compute_tracestate_value(data): def compute_tracestate_entry(span): - # type: (Span) -> str + # type: (Span) -> Optional[str] """ Computes a new sentry tracestate for the span. Includes the `sentry=`. @@ -311,8 +311,6 @@ def compute_tracestate_entry(span): client = (span.hub or sentry_sdk.Hub.current).client - # if there's no client and/or no DSN, we're not sending anything anywhere, - # so it's fine to not have any tracestate data if client and client.options.get("dsn"): options = client.options data = { @@ -322,7 +320,9 @@ def compute_tracestate_entry(span): "public_key": Dsn(options["dsn"]).public_key, } - return "sentry=" + compute_tracestate_value(data) + return "sentry=" + compute_tracestate_value(data) + + return None def reinflate_tracestate(encoded_tracestate): diff --git a/tests/tracing/test_http_headers.py b/tests/tracing/test_http_headers.py index 5c0cad320b..d86c7bf4d4 100644 --- a/tests/tracing/test_http_headers.py +++ b/tests/tracing/test_http_headers.py @@ -2,6 +2,7 @@ import pytest +import sentry_sdk from sentry_sdk.tracing import Transaction, Span from sentry_sdk.tracing_utils import ( compute_tracestate_value, @@ -31,6 +32,9 @@ def test_tracestate_computation(sentry_init): trace_id="12312012123120121231201212312012", ) + # force lazy computation to create a value + transaction.to_tracestate() + computed_value = transaction._sentry_tracestate.replace("sentry=", "") # we have to decode and reinflate the data because we can guarantee that the # order of the entries in the jsonified dict will be the same here as when @@ -45,7 +49,7 @@ def test_tracestate_computation(sentry_init): } -def test_adds_new_tracestate_to_transaction_when_none_given(sentry_init): +def test_doesnt_add_new_tracestate_to_transaction_when_none_given(sentry_init): sentry_init( dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", environment="dogpark", @@ -58,9 +62,92 @@ def test_adds_new_tracestate_to_transaction_when_none_given(sentry_init): # sentry_tracestate=< value would be passed here > ) + assert transaction._sentry_tracestate is None + + +def test_adds_tracestate_to_transaction_when_to_traceparent_called(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + ) + + # no inherited tracestate, and none created in Transaction constructor + assert transaction._sentry_tracestate is None + + transaction.to_tracestate() + assert transaction._sentry_tracestate is not None +def test_adds_tracestate_to_transaction_when_getting_trace_context(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + ) + + # no inherited tracestate, and none created in Transaction constructor + assert transaction._sentry_tracestate is None + + transaction.get_trace_context() + + assert transaction._sentry_tracestate is not None + + +@pytest.mark.parametrize( + "set_by", ["inheritance", "to_tracestate", "get_trace_context"] +) +def test_tracestate_is_immutable_once_set(sentry_init, monkeypatch, set_by): + monkeypatch.setattr( + sentry_sdk.tracing, + "compute_tracestate_entry", + mock.Mock(return_value="sentry=doGsaREgReaT"), + ) + + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + # for each scenario, get to the point where tracestate has been set + if set_by == "inheritance": + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + sentry_tracestate=("sentry=doGsaREgReaT"), + ) + else: + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + ) + + if set_by == "to_tracestate": + transaction.to_tracestate() + if set_by == "get_trace_context": + transaction.get_trace_context() + + assert transaction._sentry_tracestate == "sentry=doGsaREgReaT" + + # user data would be included in tracestate if it were recomputed at this point + sentry_sdk.set_user({"id": 12312013, "segment": "bigs"}) + + # value hasn't changed + assert transaction._sentry_tracestate == "sentry=doGsaREgReaT" + + @pytest.mark.parametrize("sampled", [True, False, None]) def test_to_traceparent(sentry_init, sampled): From e18355df94877b5775807ea7aabe3a9100494714 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 30 Aug 2021 08:55:19 -0700 Subject: [PATCH 5/7] feat(tracing): Add user data/transaction name to tracestate value (#1177) This adds user data (specifically `id` and `segment`) and transaction name to the `tracetate` value. Doing this has two known limitations, which, though we've discussed them, I'm adding here for posterity: 1) Adding this data puts us over the character limit for tracestate values [listed in the W3C spec](https://www.w3.org/TR/trace-context/#value) (256 characters). For reference: Tracestate data: ``` { "trace_id": "12312012123120121231201212312012", "environment": "dogpark", "release": "off.leash.park", "public_key": "dogsarebadatkeepingsecrets", "user": {"id": 12312013, "segment": "bigs"}, "transaction": "/interactions/other-dogs/new-dog/" } ``` `tracestate` with without either: 196 characters `tracestate` with user data: 256 characters `tracestate` with transaction name: 264 characters `tracestate` with both: 324 characters 2) This data may change and/or get added to the scope after the tracestate value has been calculated, making it impossible to sample based on those attributes. This is especially a problem for transaction name, which in some frameworks isn't set to its final value until the transaction ends. This poses the added problem that the transaction name in its raw, un-finalized form may contain PII, because it is often the raw URL as opposed to the parameterized one (so, `/users/maisey/tricks/` rather than `/users/:username/tricks/`). More work needs to be done to investigate whether the final transaction name can be set earlier in any/all of the frameworks where this poses a problem. (For instance, it is a known problem in our Express integration, but not yet clear if it is a problem in any Python frameworks.) --- sentry_sdk/tracing_utils.py | 21 ++++++++++++++++++++- tests/tracing/test_http_headers.py | 4 ++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index ec7ad7743f..b2714f3e92 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -309,10 +309,15 @@ def compute_tracestate_entry(span): """ data = {} - client = (span.hub or sentry_sdk.Hub.current).client + hub = span.hub or sentry_sdk.Hub.current + + client = hub.client + scope = hub.scope if client and client.options.get("dsn"): options = client.options + user = scope._user + data = { "trace_id": span.trace_id, "environment": options["environment"], @@ -320,6 +325,20 @@ def compute_tracestate_entry(span): "public_key": Dsn(options["dsn"]).public_key, } + if user and (user.get("id") or user.get("segment")): + user_data = {} + + if user.get("id"): + user_data["id"] = user["id"] + + if user.get("segment"): + user_data["segment"] = user["segment"] + + data["user"] = user_data + + if span.containing_transaction: + data["transaction"] = span.containing_transaction.name + return "sentry=" + compute_tracestate_value(data) return None diff --git a/tests/tracing/test_http_headers.py b/tests/tracing/test_http_headers.py index d86c7bf4d4..2e1f506032 100644 --- a/tests/tracing/test_http_headers.py +++ b/tests/tracing/test_http_headers.py @@ -26,6 +26,8 @@ def test_tracestate_computation(sentry_init): release="off.leash.park", ) + sentry_sdk.set_user({"id": 12312013, "segment": "bigs"}) + transaction = Transaction( name="/interactions/other-dogs/new-dog", op="greeting.sniff", @@ -46,6 +48,8 @@ def test_tracestate_computation(sentry_init): "environment": "dogpark", "release": "off.leash.park", "public_key": "dogsarebadatkeepingsecrets", + "user": {"id": 12312013, "segment": "bigs"}, + "transaction": "/interactions/other-dogs/new-dog", } From bf1eb5306cf1a6975e7bafba3a5f0f1ffb821f81 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 7 Sep 2021 15:07:35 -0700 Subject: [PATCH 6/7] feat(tracing): Add tracestate feature flag (#1182) This adds a feature flag, `"propagate_tracestate"`, to `_experiments`, which enables the tracestate header handling behavior. In particular, if the flag is `False` or missing: - `tracestate` headers won't be attached to outgoing HTTP requests, and - `trace` data will not be added to envelope headers. --- sentry_sdk/client.py | 4 +-- sentry_sdk/consts.py | 1 + sentry_sdk/tracing.py | 5 ++- sentry_sdk/tracing_utils.py | 9 ++++++ tests/test_envelope.py | 49 +++++++++++++++++++++++------- tests/tracing/test_http_headers.py | 15 +++++++-- tests/tracing/test_misc.py | 17 +++++++++++ 7 files changed, 84 insertions(+), 16 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 6c9eb44939..c8f7967705 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -22,7 +22,7 @@ from sentry_sdk.utils import ContextVar from sentry_sdk.sessions import SessionFlusher from sentry_sdk.envelope import Envelope -from sentry_sdk.tracing_utils import reinflate_tracestate +from sentry_sdk.tracing_utils import has_tracestate_enabled, reinflate_tracestate from sentry_sdk._types import MYPY @@ -349,7 +349,7 @@ def capture_event( tracestate_data = raw_tracestate and reinflate_tracestate( raw_tracestate.replace("sentry=", "") ) - if tracestate_data: + if tracestate_data and has_tracestate_enabled(): headers["trace"] = tracestate_data envelope = Envelope(headers=headers) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index a9822e8223..9a827adcc8 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -32,6 +32,7 @@ "max_spans": Optional[int], "record_sql_params": Optional[bool], "smart_transaction_trimming": Optional[bool], + "propagate_tracestate": Optional[bool], }, total=False, ) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 4a4f8b2e3f..105be67e02 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -12,6 +12,7 @@ compute_tracestate_entry, extract_sentrytrace_data, extract_tracestate_data, + has_tracestate_enabled, has_tracing_enabled, is_valid_sample_rate, maybe_create_breadcrumbs_from_span, @@ -270,8 +271,10 @@ def iter_headers(self): """ yield "sentry-trace", self.to_traceparent() - tracestate = self.to_tracestate() + tracestate = self.to_tracestate() if has_tracestate_enabled(self) else None # `tracestate` will only be `None` if there's no client or no DSN + # TODO (kmclb) the above will be true once the feature is no longer + # behind a flag if tracestate: yield "tracestate", tracestate diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index b2714f3e92..4214c208b9 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -396,3 +396,12 @@ def _format_sql(cursor, sql): real_sql = None return real_sql or to_string(sql) + + +def has_tracestate_enabled(span=None): + # type: (Optional[Span]) -> bool + + client = ((span and span.hub) or sentry_sdk.Hub.current).client + options = client and client.options + + return bool(options and options["_experiments"].get("propagate_tracestate")) diff --git a/tests/test_envelope.py b/tests/test_envelope.py index 877c67bace..6e990aa96c 100644 --- a/tests/test_envelope.py +++ b/tests/test_envelope.py @@ -4,6 +4,13 @@ from sentry_sdk.tracing_utils import compute_tracestate_value import sentry_sdk.client +import pytest + +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 + def generate_transaction_item(): return { @@ -25,6 +32,8 @@ def generate_transaction_item(): "environment": "dogpark", "release": "off.leash.park", "public_key": "dogsarebadatkeepingsecrets", + "user": {"id": 12312013, "segment": "bigs"}, + "transaction": "/interactions/other-dogs/new-dog", } ), } @@ -79,13 +88,23 @@ def test_add_and_get_session(): assert item.payload.json == expected.to_json() -def test_envelope_headers(sentry_init, capture_envelopes, monkeypatch): +# TODO (kmclb) remove this parameterization once tracestate is a real feature +@pytest.mark.parametrize("tracestate_enabled", [True, False]) +def test_envelope_headers( + sentry_init, capture_envelopes, monkeypatch, tracestate_enabled +): monkeypatch.setattr( sentry_sdk.client, "format_timestamp", lambda x: "2012-11-21T12:31:12.415908Z", ) + monkeypatch.setattr( + sentry_sdk.client, + "has_tracestate_enabled", + mock.Mock(return_value=tracestate_enabled), + ) + sentry_init( dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", ) @@ -95,13 +114,21 @@ def test_envelope_headers(sentry_init, capture_envelopes, monkeypatch): assert len(envelopes) == 1 - assert envelopes[0].headers == { - "event_id": "15210411201320122115110420122013", - "sent_at": "2012-11-21T12:31:12.415908Z", - "trace": { - "trace_id": "12312012123120121231201212312012", - "environment": "dogpark", - "release": "off.leash.park", - "public_key": "dogsarebadatkeepingsecrets", - }, - } + if tracestate_enabled: + assert envelopes[0].headers == { + "event_id": "15210411201320122115110420122013", + "sent_at": "2012-11-21T12:31:12.415908Z", + "trace": { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + "user": {"id": 12312013, "segment": "bigs"}, + "transaction": "/interactions/other-dogs/new-dog", + }, + } + else: + assert envelopes[0].headers == { + "event_id": "15210411201320122115110420122013", + "sent_at": "2012-11-21T12:31:12.415908Z", + } diff --git a/tests/tracing/test_http_headers.py b/tests/tracing/test_http_headers.py index 2e1f506032..3db967b24b 100644 --- a/tests/tracing/test_http_headers.py +++ b/tests/tracing/test_http_headers.py @@ -282,7 +282,9 @@ def test_tracestate_extraction( } -def test_iter_headers(sentry_init, monkeypatch): +# TODO (kmclb) remove this parameterization once tracestate is a real feature +@pytest.mark.parametrize("tracestate_enabled", [True, False]) +def test_iter_headers(sentry_init, monkeypatch, tracestate_enabled): monkeypatch.setattr( Transaction, "to_traceparent", @@ -293,6 +295,11 @@ def test_iter_headers(sentry_init, monkeypatch): "to_tracestate", mock.Mock(return_value="sentry=doGsaREgReaT,charlie=goofy"), ) + monkeypatch.setattr( + sentry_sdk.tracing, + "has_tracestate_enabled", + mock.Mock(return_value=tracestate_enabled), + ) transaction = Transaction( name="/interactions/other-dogs/new-dog", @@ -303,7 +310,11 @@ def test_iter_headers(sentry_init, monkeypatch): assert ( headers["sentry-trace"] == "12312012123120121231201212312012-0415201309082013-0" ) - assert headers["tracestate"] == "sentry=doGsaREgReaT,charlie=goofy" + if tracestate_enabled: + assert "tracestate" in headers + assert headers["tracestate"] == "sentry=doGsaREgReaT,charlie=goofy" + else: + assert "tracestate" not in headers @pytest.mark.parametrize( diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index 1ab4dfcb63..bd0ddaf10d 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -2,6 +2,7 @@ from sentry_sdk import Hub, start_span, start_transaction from sentry_sdk.tracing import Span, Transaction +from sentry_sdk.tracing_utils import has_tracestate_enabled def test_span_trimming(sentry_init, capture_events): @@ -149,3 +150,19 @@ def test_finds_non_orphan_span_on_scope(sentry_init): assert scope._span is not None assert isinstance(scope._span, Span) assert scope._span.op == "sniffing" + + +# TODO (kmclb) remove this test once tracestate is a real feature +@pytest.mark.parametrize("tracestate_enabled", [True, False, None]) +def test_has_tracestate_enabled(sentry_init, tracestate_enabled): + experiments = ( + {"propagate_tracestate": tracestate_enabled} + if tracestate_enabled is not None + else {} + ) + sentry_init(_experiments=experiments) + + if tracestate_enabled is True: + assert has_tracestate_enabled() is True + else: + assert has_tracestate_enabled() is False From 27240eeccc882b71f46aea1946bdd1567c46ccf8 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 13 Sep 2021 13:43:14 -0700 Subject: [PATCH 7/7] fix(tracing): Break `transaction` / `span` circular references before garbage collection (#1184) There are a few places in the SDK where we have circular references: 1) Transaction -> span recorder -> spans including transaction itself 2) Child span -> span recorder -> spans including child span itself 3) Transaction -> span recorder -> spans -> containing transaction 4) In `serializer.py`, `_serialize_node()` -> `_serialize_node_impl()` -> `_serialize_node()`, making each a closure for the other. This PR addresses points 1-3*, by making the following changes: 1) Transactions are no longer added to their own span recorders. (The SDK doesn't ever use the fact that they're there, and they're stripped out before the event is sent to Sentry.) 2) Child spans no longer have their own span recorder pointer, and instead access the recorder through their containing transaction. 3) When a transaction ends, after it harvests any completed spans, it now jettisons its link to its span recorder before it (the transaction) goes out of scope. It also adds to/modifies tests covering all three scenarios *Point 4, which we only discovered in the process of fixing 1-3, concerns a different system than the rest, and therefore will need a separate fix. --- sentry_sdk/tracing.py | 19 ++-- .../sqlalchemy/test_sqlalchemy.py | 4 +- tests/tracing/test_misc.py | 90 +++++++++++++++++-- 3 files changed, 101 insertions(+), 12 deletions(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 105be67e02..9e1ed1d55a 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -133,11 +133,12 @@ def __init__( self._span_recorder = None # type: Optional[_SpanRecorder] + # TODO this should really live on the Transaction class rather than the Span + # class def init_span_recorder(self, maxlen): # type: (int) -> None if self._span_recorder is None: self._span_recorder = _SpanRecorder(maxlen) - self._span_recorder.add(self) def __repr__(self): # type: () -> str @@ -199,9 +200,11 @@ def start_child(self, **kwargs): **kwargs ) - child._span_recorder = recorder = self._span_recorder - if recorder: - recorder.add(child) + span_recorder = ( + self.containing_transaction and self.containing_transaction._span_recorder + ) + if span_recorder: + span_recorder.add(child) return child def new_span(self, **kwargs): @@ -577,9 +580,15 @@ def finish(self, hub=None): finished_spans = [ span.to_json() for span in self._span_recorder.spans - if span is not self and span.timestamp is not None + if span.timestamp is not None ] + # we do this to break the circular reference of transaction -> span + # recorder -> span -> containing transaction (which is where we started) + # before either the spans or the transaction goes out of scope and has + # to be garbage collected + del self._span_recorder + return hub.capture_event( { "type": "transaction", diff --git a/tests/integrations/sqlalchemy/test_sqlalchemy.py b/tests/integrations/sqlalchemy/test_sqlalchemy.py index 2821126387..421a72ebae 100644 --- a/tests/integrations/sqlalchemy/test_sqlalchemy.py +++ b/tests/integrations/sqlalchemy/test_sqlalchemy.py @@ -189,7 +189,7 @@ def processor(event, hint): assert len(json_dumps(event)) < max_bytes # Some spans are discarded. - assert len(event["spans"]) == 999 + assert len(event["spans"]) == 1000 # Some spans have their descriptions truncated. Because the test always # generates the same amount of descriptions and truncation is deterministic, @@ -197,7 +197,7 @@ def processor(event, hint): # # Which exact span descriptions are truncated depends on the span durations # of each SQL query and is non-deterministic. - assert len(event["_meta"]["spans"]) == 536 + assert len(event["_meta"]["spans"]) == 537 for i, span in enumerate(event["spans"]): description = span["description"] diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index bd0ddaf10d..5d6613cd28 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -1,9 +1,18 @@ import pytest +import gc +import uuid +import os +import sentry_sdk from sentry_sdk import Hub, start_span, start_transaction from sentry_sdk.tracing import Span, Transaction from sentry_sdk.tracing_utils import has_tracestate_enabled +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 + def test_span_trimming(sentry_init, capture_events): sentry_init(traces_sample_rate=1.0, _experiments={"max_spans": 3}) @@ -16,14 +25,12 @@ def test_span_trimming(sentry_init, capture_events): (event,) = events - # the transaction is its own first span (which counts for max_spans) but it - # doesn't show up in the span list in the event, so this is 1 less than our - # max_spans value - assert len(event["spans"]) == 2 + assert len(event["spans"]) == 3 - span1, span2 = event["spans"] + span1, span2, span3 = event["spans"] assert span1["op"] == "foo0" assert span2["op"] == "foo1" + assert span3["op"] == "foo2" def test_transaction_naming(sentry_init, capture_events): @@ -152,6 +159,79 @@ def test_finds_non_orphan_span_on_scope(sentry_init): assert scope._span.op == "sniffing" +def test_circular_references(monkeypatch, sentry_init, request): + # TODO: We discovered while writing this test about transaction/span + # reference cycles that there's actually also a circular reference in + # `serializer.py`, between the functions `_serialize_node` and + # `_serialize_node_impl`, both of which are defined inside of the main + # `serialize` function, and each of which calls the other one. For now, in + # order to avoid having those ref cycles give us a false positive here, we + # can mock out `serialize`. In the long run, though, we should probably fix + # that. (Whenever we do work on fixing it, it may be useful to add + # + # gc.set_debug(gc.DEBUG_LEAK) + # request.addfinalizer(lambda: gc.set_debug(~gc.DEBUG_LEAK)) + # + # immediately after the initial collection below, so we can see what new + # objects the garbage collecter has to clean up once `transaction.finish` is + # called and the serializer runs.) + monkeypatch.setattr( + sentry_sdk.client, + "serialize", + mock.Mock( + return_value=None, + ), + ) + + # In certain versions of python, in some environments (specifically, python + # 3.4 when run in GH Actions), we run into a `ctypes` bug which creates + # circular references when `uuid4()` is called, as happens when we're + # generating event ids. Mocking it with an implementation which doesn't use + # the `ctypes` function lets us avoid having false positives when garbage + # collecting. See https://bugs.python.org/issue20519. + monkeypatch.setattr( + uuid, + "uuid4", + mock.Mock( + return_value=uuid.UUID(bytes=os.urandom(16)), + ), + ) + + gc.disable() + request.addfinalizer(gc.enable) + + sentry_init(traces_sample_rate=1.0) + + # Make sure that we're starting with a clean slate before we start creating + # transaction/span reference cycles + gc.collect() + + dogpark_transaction = start_transaction(name="dogpark") + sniffing_span = dogpark_transaction.start_child(op="sniffing") + wagging_span = dogpark_transaction.start_child(op="wagging") + + # At some point, you have to stop sniffing - there are balls to chase! - so finish + # this span while the dogpark transaction is still open + sniffing_span.finish() + + # The wagging, however, continues long past the dogpark, so that span will + # NOT finish before the transaction ends. (Doing it in this order proves + # that both finished and unfinished spans get their cycles broken.) + dogpark_transaction.finish() + + # Eventually you gotta sleep... + wagging_span.finish() + + # assuming there are no cycles by this point, these should all be able to go + # out of scope and get their memory deallocated without the garbage + # collector having anything to do + del sniffing_span + del wagging_span + del dogpark_transaction + + assert gc.collect() == 0 + + # TODO (kmclb) remove this test once tracestate is a real feature @pytest.mark.parametrize("tracestate_enabled", [True, False, None]) def test_has_tracestate_enabled(sentry_init, tracestate_enabled):