diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 7687baa76f..c8f7967705 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -22,6 +22,7 @@ from sentry_sdk.utils import ContextVar from sentry_sdk.sessions import SessionFlusher from sentry_sdk.envelope import Envelope +from sentry_sdk.tracing_utils import has_tracestate_enabled, reinflate_tracestate from sentry_sdk._types import MYPY @@ -329,15 +330,29 @@ def capture_event( attachments = hint.get("attachments") is_transaction = event_opt.get("type") == "transaction" + # this is outside of the `if` immediately below because even if we don't + # use the value, we want to make sure we remove it before the event is + # sent + raw_tracestate = ( + event_opt.get("contexts", {}).get("trace", {}).pop("tracestate", "") + ) + + # Transactions or events with attachments should go to the /envelope/ + # endpoint. if is_transaction or attachments: - # Transactions or events with attachments should go to the - # /envelope/ endpoint. - envelope = Envelope( - headers={ - "event_id": event_opt["event_id"], - "sent_at": format_timestamp(datetime.utcnow()), - } + + headers = { + "event_id": event_opt["event_id"], + "sent_at": format_timestamp(datetime.utcnow()), + } + + tracestate_data = raw_tracestate and reinflate_tracestate( + raw_tracestate.replace("sentry=", "") ) + if tracestate_data and has_tracestate_enabled(): + headers["trace"] = tracestate_data + + envelope = Envelope(headers=headers) if is_transaction: envelope.add_transaction(event_opt) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index a9822e8223..9a827adcc8 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -32,6 +32,7 @@ "max_spans": Optional[int], "record_sql_params": Optional[bool], "smart_transaction_trimming": Optional[bool], + "propagate_tracestate": Optional[bool], }, total=False, ) diff --git a/sentry_sdk/hub.py b/sentry_sdk/hub.py index 1976aaba34..addca57417 100644 --- a/sentry_sdk/hub.py +++ b/sentry_sdk/hub.py @@ -700,7 +700,8 @@ def iter_trace_propagation_headers(self, span=None): if not propagate_traces: return - yield "sentry-trace", span.to_traceparent() + for header in span.iter_headers(): + yield header GLOBAL_HUB = Hub() diff --git a/sentry_sdk/integrations/django/__init__.py b/sentry_sdk/integrations/django/__init__.py index e26948e2dd..87f9c7bc61 100644 --- a/sentry_sdk/integrations/django/__init__.py +++ b/sentry_sdk/integrations/django/__init__.py @@ -9,7 +9,7 @@ from sentry_sdk.hub import Hub, _should_send_default_pii from sentry_sdk.scope import add_global_event_processor from sentry_sdk.serializer import add_global_repr_processor -from sentry_sdk.tracing import record_sql_queries +from sentry_sdk.tracing_utils import record_sql_queries from sentry_sdk.utils import ( HAS_REAL_CONTEXTVARS, CONTEXTVARS_ERROR_MESSAGE, diff --git a/sentry_sdk/integrations/httpx.py b/sentry_sdk/integrations/httpx.py index af67315338..3d4bbf8300 100644 --- a/sentry_sdk/integrations/httpx.py +++ b/sentry_sdk/integrations/httpx.py @@ -1,5 +1,6 @@ from sentry_sdk import Hub from sentry_sdk.integrations import Integration, DidNotEnable +from sentry_sdk.utils import logger from sentry_sdk._types import MYPY @@ -45,6 +46,11 @@ def send(self, request, **kwargs): span.set_data("method", request.method) span.set_data("url", str(request.url)) for key, value in hub.iter_trace_propagation_headers(): + logger.debug( + "[Tracing] Adding `{key}` header {value} to outgoing request to {url}.".format( + key=key, value=value, url=request.url + ) + ) request.headers[key] = value rv = real_send(self, request, **kwargs) @@ -72,6 +78,11 @@ async def send(self, request, **kwargs): span.set_data("method", request.method) span.set_data("url", str(request.url)) for key, value in hub.iter_trace_propagation_headers(): + logger.debug( + "[Tracing] Adding `{key}` header {value} to outgoing request to {url}.".format( + key=key, value=value, url=request.url + ) + ) request.headers[key] = value rv = await real_send(self, request, **kwargs) diff --git a/sentry_sdk/integrations/sqlalchemy.py b/sentry_sdk/integrations/sqlalchemy.py index 6c8e5eb88e..4b0207f5ec 100644 --- a/sentry_sdk/integrations/sqlalchemy.py +++ b/sentry_sdk/integrations/sqlalchemy.py @@ -3,7 +3,7 @@ from sentry_sdk._types import MYPY from sentry_sdk.hub import Hub from sentry_sdk.integrations import Integration, DidNotEnable -from sentry_sdk.tracing import record_sql_queries +from sentry_sdk.tracing_utils import record_sql_queries try: from sqlalchemy.engine import Engine # type: ignore diff --git a/sentry_sdk/integrations/stdlib.py b/sentry_sdk/integrations/stdlib.py index ac2ec103c7..adea742b2d 100644 --- a/sentry_sdk/integrations/stdlib.py +++ b/sentry_sdk/integrations/stdlib.py @@ -6,8 +6,8 @@ from sentry_sdk.hub import Hub from sentry_sdk.integrations import Integration from sentry_sdk.scope import add_global_event_processor -from sentry_sdk.tracing import EnvironHeaders -from sentry_sdk.utils import capture_internal_exceptions, safe_repr +from sentry_sdk.tracing_utils import EnvironHeaders +from sentry_sdk.utils import capture_internal_exceptions, logger, safe_repr from sentry_sdk._types import MYPY @@ -86,6 +86,11 @@ def putrequest(self, method, url, *args, **kwargs): rv = real_putrequest(self, method, url, *args, **kwargs) for key, value in hub.iter_trace_propagation_headers(span): + logger.debug( + "[Tracing] Adding `{key}` header {value} to outgoing request to {real_url}.".format( + key=key, value=value, real_url=real_url + ) + ) self.putheader(key, value) self._sentrysdk_span = span diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index ccf6f4e086..fb3bee42f1 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -150,19 +150,13 @@ def transaction(self): if self._span is None: return None - # the span on the scope is itself a transaction - if isinstance(self._span, Transaction): - return self._span - - # the span on the scope isn't a transaction but belongs to one - if self._span._containing_transaction: - return self._span._containing_transaction + # there is an orphan span on the scope + if self._span.containing_transaction is None: + return None - # there's a span (not a transaction) on the scope, but it was started on - # its own, not as the descendant of a transaction (this is deprecated - # behavior, but as long as the start_span function exists, it can still - # happen) - return None + # there is either a transaction (which is its own containing + # transaction) or a non-orphan span on the scope + return self._span.containing_transaction @transaction.setter def transaction(self, value): @@ -174,7 +168,7 @@ def transaction(self, value): # anything set in the scope. # XXX: note that with the introduction of the Scope.transaction getter, # there is a semantic and type mismatch between getter and setter. The - # getter returns a transaction, the setter sets a transaction name. + # getter returns a Transaction, the setter sets a transaction name. # Without breaking version compatibility, we could make the setter set a # transaction name or transaction (self._span) depending on the type of # the value argument. diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 4ce25f27c2..9e1ed1d55a 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -1,79 +1,37 @@ -import re import uuid -import contextlib -import math import random import time from datetime import datetime, timedelta -from numbers import Real import sentry_sdk -from sentry_sdk.utils import ( - capture_internal_exceptions, - logger, - to_string, +from sentry_sdk.utils import logger +from sentry_sdk.tracing_utils import ( + EnvironHeaders, + compute_tracestate_entry, + extract_sentrytrace_data, + extract_tracestate_data, + has_tracestate_enabled, + has_tracing_enabled, + is_valid_sample_rate, + maybe_create_breadcrumbs_from_span, ) -from sentry_sdk._compat import PY2 from sentry_sdk._types import MYPY -if PY2: - from collections import Mapping -else: - from collections.abc import Mapping if MYPY: import typing - from typing import Generator from typing import Optional from typing import Any from typing import Dict from typing import List from typing import Tuple + from typing import Iterator from sentry_sdk._types import SamplingContext -_traceparent_header_format_re = re.compile( - "^[ \t]*" # whitespace - "([0-9a-f]{32})?" # trace_id - "-?([0-9a-f]{16})?" # span_id - "-?([01])?" # sampled - "[ \t]*$" # whitespace -) - - -class EnvironHeaders(Mapping): # type: ignore - def __init__( - self, - environ, # type: typing.Mapping[str, str] - prefix="HTTP_", # type: str - ): - # type: (...) -> None - self.environ = environ - self.prefix = prefix - - def __getitem__(self, key): - # type: (str) -> Optional[Any] - return self.environ[self.prefix + key.replace("-", "_").upper()] - - def __len__(self): - # type: () -> int - return sum(1 for _ in iter(self)) - - def __iter__(self): - # type: () -> Generator[str, None, None] - for k in self.environ: - if not isinstance(k, str): - continue - - k = k.replace("-", "_").upper() - if not k.startswith(self.prefix): - continue - - yield k[len(self.prefix) :] - class _SpanRecorder(object): """Limits the number of spans recorded in a transaction.""" @@ -116,8 +74,6 @@ class Span(object): "_span_recorder", "hub", "_context_manager_state", - # TODO: rename this "transaction" once we fully and truly deprecate the - # old "transaction" attribute (which was actually the transaction name)? "_containing_transaction", ) @@ -147,6 +103,7 @@ def __init__( hub=None, # type: Optional[sentry_sdk.Hub] status=None, # type: Optional[str] transaction=None, # type: Optional[str] # deprecated + containing_transaction=None, # type: Optional[Transaction] ): # type: (...) -> None self.trace_id = trace_id or uuid.uuid4().hex @@ -160,6 +117,7 @@ def __init__( self.hub = hub self._tags = {} # type: Dict[str, str] self._data = {} # type: Dict[str, Any] + self._containing_transaction = containing_transaction self.start_timestamp = datetime.utcnow() try: # TODO: For Python 3.7+, we could use a clock with ns resolution: @@ -174,13 +132,13 @@ def __init__( self.timestamp = None # type: Optional[datetime] self._span_recorder = None # type: Optional[_SpanRecorder] - self._containing_transaction = None # type: Optional[Transaction] + # TODO this should really live on the Transaction class rather than the Span + # class def init_span_recorder(self, maxlen): # type: (int) -> None if self._span_recorder is None: self._span_recorder = _SpanRecorder(maxlen) - self._span_recorder.add(self) def __repr__(self): # type: () -> str @@ -215,6 +173,15 @@ def __exit__(self, ty, value, tb): self.finish(hub) scope.span = old_span + @property + def containing_transaction(self): + # type: () -> Optional[Transaction] + + # this is a getter rather than a regular attribute so that transactions + # can return `self` here instead (as a way to prevent them circularly + # referencing themselves) + return self._containing_transaction + def start_child(self, **kwargs): # type: (**Any) -> Span """ @@ -226,19 +193,19 @@ def start_child(self, **kwargs): """ kwargs.setdefault("sampled", self.sampled) - rv = Span( - trace_id=self.trace_id, span_id=None, parent_span_id=self.span_id, **kwargs + child = Span( + trace_id=self.trace_id, + parent_span_id=self.span_id, + containing_transaction=self.containing_transaction, + **kwargs ) - if isinstance(self, Transaction): - rv._containing_transaction = self - else: - rv._containing_transaction = self._containing_transaction - - rv._span_recorder = recorder = self._span_recorder - if recorder: - recorder.add(rv) - return rv + span_recorder = ( + self.containing_transaction and self.containing_transaction._span_recorder + ) + if span_recorder: + span_recorder.add(child) + return child def new_span(self, **kwargs): # type: (**Any) -> Span @@ -255,11 +222,12 @@ def continue_from_environ( # type: (...) -> Transaction """ Create a Transaction with the given params, then add in data pulled from - the 'sentry-trace' header in the environ (if any) before returning the - Transaction. + the 'sentry-trace' and 'tracestate' headers from the environ (if any) + before returning the Transaction. - If the 'sentry-trace' header is malformed or missing, just create and - return a Transaction instance with the given params. + This is different from `continue_from_headers` in that it assumes header + names in the form "HTTP_HEADER_NAME" - such as you would get from a wsgi + environ - rather than the form "header-name". """ if cls is Span: logger.warning( @@ -276,29 +244,43 @@ def continue_from_headers( ): # type: (...) -> Transaction """ - Create a Transaction with the given params, then add in data pulled from - the 'sentry-trace' header (if any) before returning the Transaction. - - If the 'sentry-trace' header is malformed or missing, just create and - return a Transaction instance with the given params. + Create a transaction with the given params (including any data pulled from + the 'sentry-trace' and 'tracestate' headers). """ + # TODO move this to the Transaction class if cls is Span: logger.warning( "Deprecated: use Transaction.continue_from_headers " "instead of Span.continue_from_headers." ) - transaction = Transaction.from_traceparent( - headers.get("sentry-trace"), **kwargs - ) - if transaction is None: - transaction = Transaction(**kwargs) + + kwargs.update(extract_sentrytrace_data(headers.get("sentry-trace"))) + kwargs.update(extract_tracestate_data(headers.get("tracestate"))) + + transaction = Transaction(**kwargs) transaction.same_process_as_parent = False + return transaction def iter_headers(self): - # type: () -> Generator[Tuple[str, str], None, None] + # type: () -> Iterator[Tuple[str, str]] + """ + Creates a generator which returns the span's `sentry-trace` and + `tracestate` headers. + + If the span's containing transaction doesn't yet have a + `sentry_tracestate` value, this will cause one to be generated and + stored. + """ yield "sentry-trace", self.to_traceparent() + tracestate = self.to_tracestate() if has_tracestate_enabled(self) else None + # `tracestate` will only be `None` if there's no client or no DSN + # TODO (kmclb) the above will be true once the feature is no longer + # behind a flag + if tracestate: + yield "tracestate", tracestate + @classmethod def from_traceparent( cls, @@ -307,46 +289,21 @@ def from_traceparent( ): # type: (...) -> Optional[Transaction] """ + DEPRECATED: Use Transaction.continue_from_headers(headers, **kwargs) + Create a Transaction with the given params, then add in data pulled from the given 'sentry-trace' header value before returning the Transaction. - If the header value is malformed or missing, just create and return a - Transaction instance with the given params. """ - if cls is Span: - logger.warning( - "Deprecated: use Transaction.from_traceparent " - "instead of Span.from_traceparent." - ) + logger.warning( + "Deprecated: Use Transaction.continue_from_headers(headers, **kwargs) " + "instead of from_traceparent(traceparent, **kwargs)" + ) if not traceparent: return None - if traceparent.startswith("00-") and traceparent.endswith("-00"): - traceparent = traceparent[3:-3] - - match = _traceparent_header_format_re.match(str(traceparent)) - if match is None: - return None - - trace_id, parent_span_id, sampled_str = match.groups() - - if trace_id is not None: - trace_id = "{:032x}".format(int(trace_id, 16)) - if parent_span_id is not None: - parent_span_id = "{:016x}".format(int(parent_span_id, 16)) - - if sampled_str: - parent_sampled = sampled_str != "0" # type: Optional[bool] - else: - parent_sampled = None - - return Transaction( - trace_id=trace_id, - parent_span_id=parent_span_id, - parent_sampled=parent_sampled, - **kwargs - ) + return cls.continue_from_headers({"sentry-trace": traceparent}, **kwargs) def to_traceparent(self): # type: () -> str @@ -357,6 +314,57 @@ def to_traceparent(self): sampled = "0" return "%s-%s-%s" % (self.trace_id, self.span_id, sampled) + def to_tracestate(self): + # type: () -> Optional[str] + """ + Computes the `tracestate` header value using data from the containing + transaction. + + If the containing transaction doesn't yet have a `sentry_tracestate` + value, this will cause one to be generated and stored. + + If there is no containing transaction, a value will be generated but not + stored. + + Returns None if there's no client and/or no DSN. + """ + + sentry_tracestate = self.get_or_set_sentry_tracestate() + third_party_tracestate = ( + self.containing_transaction._third_party_tracestate + if self.containing_transaction + else None + ) + + if not sentry_tracestate: + return None + + header_value = sentry_tracestate + + if third_party_tracestate: + header_value = header_value + "," + third_party_tracestate + + return header_value + + def get_or_set_sentry_tracestate(self): + # type: (Span) -> Optional[str] + """ + Read sentry tracestate off of the span's containing transaction. + + If the transaction doesn't yet have a `_sentry_tracestate` value, + compute one and store it. + """ + transaction = self.containing_transaction + + if transaction: + if not transaction._sentry_tracestate: + transaction._sentry_tracestate = compute_tracestate_entry(self) + + return transaction._sentry_tracestate + + # orphan span - nowhere to store the value, so just return it + return compute_tracestate_entry(self) + def set_tag(self, key, value): # type: (str, Any) -> None self._tags[key] = value @@ -422,7 +430,7 @@ def finish(self, hub=None): except AttributeError: self.timestamp = datetime.utcnow() - _maybe_create_breadcrumbs_from_span(hub, self) + maybe_create_breadcrumbs_from_span(hub, self) return None def to_json(self): @@ -463,16 +471,37 @@ def get_trace_context(self): if self.status: rv["status"] = self.status + # if the transaction didn't inherit a tracestate value, and no outgoing + # requests - whose need for headers would have caused a tracestate value + # to be created - were made as part of the transaction, the transaction + # still won't have a tracestate value, so compute one now + sentry_tracestate = self.get_or_set_sentry_tracestate() + + if sentry_tracestate: + rv["tracestate"] = sentry_tracestate + return rv class Transaction(Span): - __slots__ = ("name", "parent_sampled") + __slots__ = ( + "name", + "parent_sampled", + # the sentry portion of the `tracestate` header used to transmit + # correlation context for server-side dynamic sampling, of the form + # `sentry=xxxxx`, where `xxxxx` is the base64-encoded json of the + # correlation context data, missing trailing any = + "_sentry_tracestate", + # tracestate data from other vendors, of the form `dogs=yes,cats=maybe` + "_third_party_tracestate", + ) def __init__( self, name="", # type: str parent_sampled=None, # type: Optional[bool] + sentry_tracestate=None, # type: Optional[str] + third_party_tracestate=None, # type: Optional[str] **kwargs # type: Any ): # type: (...) -> None @@ -488,6 +517,11 @@ def __init__( Span.__init__(self, **kwargs) self.name = name self.parent_sampled = parent_sampled + # if tracestate isn't inherited and set here, it will get set lazily, + # either the first time an outgoing request needs it for a header or the + # first time an event needs it for inclusion in the captured data + self._sentry_tracestate = sentry_tracestate + self._third_party_tracestate = third_party_tracestate def __repr__(self): # type: () -> str @@ -501,6 +535,15 @@ def __repr__(self): self.sampled, ) + @property + def containing_transaction(self): + # type: () -> Transaction + + # Transactions (as spans) belong to themselves (as transactions). This + # is a getter rather than a regular attribute to avoid having a circular + # reference. + return self + def finish(self, hub=None): # type: (Optional[sentry_sdk.Hub]) -> Optional[str] if self.timestamp is not None: @@ -537,9 +580,15 @@ def finish(self, hub=None): finished_spans = [ span.to_json() for span in self._span_recorder.spans - if span is not self and span.timestamp is not None + if span.timestamp is not None ] + # we do this to break the circular reference of transaction -> span + # recorder -> span -> containing transaction (which is where we started) + # before either the spans or the transaction goes out of scope and has + # to be garbage collected + del self._span_recorder + return hub.capture_event( { "type": "transaction", @@ -617,7 +666,7 @@ def _set_initial_sampling_decision(self, sampling_context): # Since this is coming from the user (or from a function provided by the # user), who knows what we might get. (The only valid values are # booleans or numbers between 0 and 1.) - if not _is_valid_sample_rate(sample_rate): + if not is_valid_sample_rate(sample_rate): logger.warning( "[Tracing] Discarding {transaction_description} because of invalid sample rate.".format( transaction_description=transaction_description, @@ -660,127 +709,3 @@ def _set_initial_sampling_decision(self, sampling_context): sample_rate=float(sample_rate), ) ) - - -def has_tracing_enabled(options): - # type: (Dict[str, Any]) -> bool - """ - Returns True if either traces_sample_rate or traces_sampler is - defined, False otherwise. - """ - - return bool( - options.get("traces_sample_rate") is not None - or options.get("traces_sampler") is not None - ) - - -def _is_valid_sample_rate(rate): - # type: (Any) -> bool - """ - Checks the given sample rate to make sure it is valid type and value (a - boolean or a number between 0 and 1, inclusive). - """ - - # both booleans and NaN are instances of Real, so a) checking for Real - # checks for the possibility of a boolean also, and b) we have to check - # separately for NaN - if not isinstance(rate, Real) or math.isnan(rate): - logger.warning( - "[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format( - rate=rate, type=type(rate) - ) - ) - return False - - # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False - rate = float(rate) - if rate < 0 or rate > 1: - logger.warning( - "[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format( - rate=rate - ) - ) - return False - - return True - - -def _format_sql(cursor, sql): - # type: (Any, str) -> Optional[str] - - real_sql = None - - # If we're using psycopg2, it could be that we're - # looking at a query that uses Composed objects. Use psycopg2's mogrify - # function to format the query. We lose per-parameter trimming but gain - # accuracy in formatting. - try: - if hasattr(cursor, "mogrify"): - real_sql = cursor.mogrify(sql) - if isinstance(real_sql, bytes): - real_sql = real_sql.decode(cursor.connection.encoding) - except Exception: - real_sql = None - - return real_sql or to_string(sql) - - -@contextlib.contextmanager -def record_sql_queries( - hub, # type: sentry_sdk.Hub - cursor, # type: Any - query, # type: Any - params_list, # type: Any - paramstyle, # type: Optional[str] - executemany, # type: bool -): - # type: (...) -> Generator[Span, None, None] - - # TODO: Bring back capturing of params by default - if hub.client and hub.client.options["_experiments"].get( - "record_sql_params", False - ): - if not params_list or params_list == [None]: - params_list = None - - if paramstyle == "pyformat": - paramstyle = "format" - else: - params_list = None - paramstyle = None - - query = _format_sql(cursor, query) - - data = {} - if params_list is not None: - data["db.params"] = params_list - if paramstyle is not None: - data["db.paramstyle"] = paramstyle - if executemany: - data["db.executemany"] = True - - with capture_internal_exceptions(): - hub.add_breadcrumb(message=query, category="query", data=data) - - with hub.start_span(op="db", description=query) as span: - for k, v in data.items(): - span.set_data(k, v) - yield span - - -def _maybe_create_breadcrumbs_from_span(hub, span): - # type: (sentry_sdk.Hub, Span) -> None - if span.op == "redis": - hub.add_breadcrumb( - message=span.description, type="redis", category="redis", data=span._tags - ) - elif span.op == "http": - hub.add_breadcrumb(type="http", category="httplib", data=span._data) - elif span.op == "subprocess": - hub.add_breadcrumb( - type="subprocess", - category="subprocess", - message=span.description, - data=span._data, - ) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py new file mode 100644 index 0000000000..4214c208b9 --- /dev/null +++ b/sentry_sdk/tracing_utils.py @@ -0,0 +1,407 @@ +import re +import contextlib +import json +import math + +from numbers import Real + +import sentry_sdk + +from sentry_sdk.utils import ( + capture_internal_exceptions, + Dsn, + logger, + to_base64, + to_string, + from_base64, +) +from sentry_sdk._compat import PY2 +from sentry_sdk._types import MYPY + +if PY2: + from collections import Mapping +else: + from collections.abc import Mapping + +if MYPY: + import typing + + from typing import Generator + from typing import Optional + from typing import Any + from typing import Dict + from typing import Union + + from sentry_sdk.tracing import Span + + +SENTRY_TRACE_REGEX = re.compile( + "^[ \t]*" # whitespace + "([0-9a-f]{32})?" # trace_id + "-?([0-9a-f]{16})?" # span_id + "-?([01])?" # sampled + "[ \t]*$" # whitespace +) + +# This is a normal base64 regex, modified to reflect that fact that we strip the +# trailing = or == off +base64_stripped = ( + # any of the characters in the base64 "alphabet", in multiples of 4 + "([a-zA-Z0-9+/]{4})*" + # either nothing or 2 or 3 base64-alphabet characters (see + # https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding for + # why there's never only 1 extra character) + "([a-zA-Z0-9+/]{2,3})?" +) + +# comma-delimited list of entries of the form `xxx=yyy` +tracestate_entry = "[^=]+=[^=]+" +TRACESTATE_ENTRIES_REGEX = re.compile( + # one or more xxxxx=yyyy entries + "^({te})+" + # each entry except the last must be followed by a comma + "(,|$)".format(te=tracestate_entry) +) + +# this doesn't check that the value is valid, just that there's something there +# of the form `sentry=xxxx` +SENTRY_TRACESTATE_ENTRY_REGEX = re.compile( + # either sentry is the first entry or there's stuff immediately before it, + # ending in a commma (this prevents matching something like `coolsentry=xxx`) + "(?:^|.+,)" + # sentry's part, not including the potential comma + "(sentry=[^,]*)" + # either there's a comma and another vendor's entry or we end + "(?:,.+|$)" +) + + +class EnvironHeaders(Mapping): # type: ignore + def __init__( + self, + environ, # type: typing.Mapping[str, str] + prefix="HTTP_", # type: str + ): + # type: (...) -> None + self.environ = environ + self.prefix = prefix + + def __getitem__(self, key): + # type: (str) -> Optional[Any] + return self.environ[self.prefix + key.replace("-", "_").upper()] + + def __len__(self): + # type: () -> int + return sum(1 for _ in iter(self)) + + def __iter__(self): + # type: () -> Generator[str, None, None] + for k in self.environ: + if not isinstance(k, str): + continue + + k = k.replace("-", "_").upper() + if not k.startswith(self.prefix): + continue + + yield k[len(self.prefix) :] + + +def has_tracing_enabled(options): + # type: (Dict[str, Any]) -> bool + """ + Returns True if either traces_sample_rate or traces_sampler is + non-zero/defined, False otherwise. + """ + + return bool( + options.get("traces_sample_rate") is not None + or options.get("traces_sampler") is not None + ) + + +def is_valid_sample_rate(rate): + # type: (Any) -> bool + """ + Checks the given sample rate to make sure it is valid type and value (a + boolean or a number between 0 and 1, inclusive). + """ + + # both booleans and NaN are instances of Real, so a) checking for Real + # checks for the possibility of a boolean also, and b) we have to check + # separately for NaN + if not isinstance(rate, Real) or math.isnan(rate): + logger.warning( + "[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format( + rate=rate, type=type(rate) + ) + ) + return False + + # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False + rate = float(rate) + if rate < 0 or rate > 1: + logger.warning( + "[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format( + rate=rate + ) + ) + return False + + return True + + +@contextlib.contextmanager +def record_sql_queries( + hub, # type: sentry_sdk.Hub + cursor, # type: Any + query, # type: Any + params_list, # type: Any + paramstyle, # type: Optional[str] + executemany, # type: bool +): + # type: (...) -> Generator[Span, None, None] + + # TODO: Bring back capturing of params by default + if hub.client and hub.client.options["_experiments"].get( + "record_sql_params", False + ): + if not params_list or params_list == [None]: + params_list = None + + if paramstyle == "pyformat": + paramstyle = "format" + else: + params_list = None + paramstyle = None + + query = _format_sql(cursor, query) + + data = {} + if params_list is not None: + data["db.params"] = params_list + if paramstyle is not None: + data["db.paramstyle"] = paramstyle + if executemany: + data["db.executemany"] = True + + with capture_internal_exceptions(): + hub.add_breadcrumb(message=query, category="query", data=data) + + with hub.start_span(op="db", description=query) as span: + for k, v in data.items(): + span.set_data(k, v) + yield span + + +def maybe_create_breadcrumbs_from_span(hub, span): + # type: (sentry_sdk.Hub, Span) -> None + if span.op == "redis": + hub.add_breadcrumb( + message=span.description, type="redis", category="redis", data=span._tags + ) + elif span.op == "http": + hub.add_breadcrumb(type="http", category="httplib", data=span._data) + elif span.op == "subprocess": + hub.add_breadcrumb( + type="subprocess", + category="subprocess", + message=span.description, + data=span._data, + ) + + +def extract_sentrytrace_data(header): + # type: (Optional[str]) -> typing.Mapping[str, Union[str, bool, None]] + """ + Given a `sentry-trace` header string, return a dictionary of data. + """ + trace_id = parent_span_id = parent_sampled = None + + if header: + if header.startswith("00-") and header.endswith("-00"): + header = header[3:-3] + + match = SENTRY_TRACE_REGEX.match(header) + + if match: + trace_id, parent_span_id, sampled_str = match.groups() + + if trace_id: + trace_id = "{:032x}".format(int(trace_id, 16)) + if parent_span_id: + parent_span_id = "{:016x}".format(int(parent_span_id, 16)) + if sampled_str: + parent_sampled = sampled_str != "0" + + return { + "trace_id": trace_id, + "parent_span_id": parent_span_id, + "parent_sampled": parent_sampled, + } + + +def extract_tracestate_data(header): + # type: (Optional[str]) -> typing.Mapping[str, Optional[str]] + """ + Extracts the sentry tracestate value and any third-party data from the given + tracestate header, returning a dictionary of data. + """ + sentry_entry = third_party_entry = None + before = after = "" + + if header: + # find sentry's entry, if any + sentry_match = SENTRY_TRACESTATE_ENTRY_REGEX.search(header) + + if sentry_match: + sentry_entry = sentry_match.group(1) + + # remove the commas after the split so we don't end up with + # `xxx=yyy,,zzz=qqq` (double commas) when we put them back together + before, after = map(lambda s: s.strip(","), header.split(sentry_entry)) + + # extract sentry's value from its entry and test to make sure it's + # valid; if it isn't, discard the entire entry so that a new one + # will be created + sentry_value = sentry_entry.replace("sentry=", "") + if not re.search("^{b64}$".format(b64=base64_stripped), sentry_value): + sentry_entry = None + else: + after = header + + # if either part is invalid or empty, remove it before gluing them together + third_party_entry = ( + ",".join(filter(TRACESTATE_ENTRIES_REGEX.search, [before, after])) or None + ) + + return { + "sentry_tracestate": sentry_entry, + "third_party_tracestate": third_party_entry, + } + + +def compute_tracestate_value(data): + # type: (typing.Mapping[str, str]) -> str + """ + Computes a new tracestate value using the given data. + + Note: Returns just the base64-encoded data, NOT the full `sentry=...` + tracestate entry. + """ + + tracestate_json = json.dumps(data) + + # Base64-encoded strings always come out with a length which is a multiple + # of 4. In order to achieve this, the end is padded with one or more `=` + # signs. Because the tracestate standard calls for using `=` signs between + # vendor name and value (`sentry=xxx,dogsaregreat=yyy`), to avoid confusion + # we strip the `=` + return (to_base64(tracestate_json) or "").rstrip("=") + + +def compute_tracestate_entry(span): + # type: (Span) -> Optional[str] + """ + Computes a new sentry tracestate for the span. Includes the `sentry=`. + + Will return `None` if there's no client and/or no DSN. + """ + data = {} + + hub = span.hub or sentry_sdk.Hub.current + + client = hub.client + scope = hub.scope + + if client and client.options.get("dsn"): + options = client.options + user = scope._user + + data = { + "trace_id": span.trace_id, + "environment": options["environment"], + "release": options.get("release"), + "public_key": Dsn(options["dsn"]).public_key, + } + + if user and (user.get("id") or user.get("segment")): + user_data = {} + + if user.get("id"): + user_data["id"] = user["id"] + + if user.get("segment"): + user_data["segment"] = user["segment"] + + data["user"] = user_data + + if span.containing_transaction: + data["transaction"] = span.containing_transaction.name + + return "sentry=" + compute_tracestate_value(data) + + return None + + +def reinflate_tracestate(encoded_tracestate): + # type: (str) -> typing.Optional[Mapping[str, str]] + """ + Given a sentry tracestate value in its encoded form, translate it back into + a dictionary of data. + """ + inflated_tracestate = None + + if encoded_tracestate: + # Base64-encoded strings always come out with a length which is a + # multiple of 4. In order to achieve this, the end is padded with one or + # more `=` signs. Because the tracestate standard calls for using `=` + # signs between vendor name and value (`sentry=xxx,dogsaregreat=yyy`), + # to avoid confusion we strip the `=` when the data is initially + # encoded. Python's decoding function requires they be put back. + # Fortunately, it doesn't complain if there are too many, so we just + # attach two `=` on spec (there will never be more than 2, see + # https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding). + tracestate_json = from_base64(encoded_tracestate + "==") + + try: + assert tracestate_json is not None + inflated_tracestate = json.loads(tracestate_json) + except Exception as err: + logger.warning( + ( + "Unable to attach tracestate data to envelope header: {err}" + + "\nTracestate value is {encoded_tracestate}" + ).format(err=err, encoded_tracestate=encoded_tracestate), + ) + + return inflated_tracestate + + +def _format_sql(cursor, sql): + # type: (Any, str) -> Optional[str] + + real_sql = None + + # If we're using psycopg2, it could be that we're + # looking at a query that uses Composed objects. Use psycopg2's mogrify + # function to format the query. We lose per-parameter trimming but gain + # accuracy in formatting. + try: + if hasattr(cursor, "mogrify"): + real_sql = cursor.mogrify(sql) + if isinstance(real_sql, bytes): + real_sql = real_sql.decode(cursor.connection.encoding) + except Exception: + real_sql = None + + return real_sql or to_string(sql) + + +def has_tracestate_enabled(span=None): + # type: (Optional[Span]) -> bool + + client = ((span and span.hub) or sentry_sdk.Hub.current).client + options = client and client.options + + return bool(options and options["_experiments"].get("propagate_tracestate")) diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 43b63b41ac..8fb03e014d 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1,3 +1,4 @@ +import base64 import json import linecache import logging @@ -5,6 +6,7 @@ import sys import threading import subprocess +import re from datetime import datetime @@ -39,6 +41,7 @@ MAX_STRING_LENGTH = 512 MAX_FORMAT_PARAM_LENGTH = 128 +BASE64_ALPHABET = re.compile(r"^[a-zA-Z0-9/+=]*$") def json_dumps(data): @@ -968,3 +971,42 @@ def run(self): integer_configured_timeout ) ) + + +def to_base64(original): + # type: (str) -> Optional[str] + """ + Convert a string to base64, via UTF-8. Returns None on invalid input. + """ + base64_string = None + + try: + utf8_bytes = original.encode("UTF-8") + base64_bytes = base64.b64encode(utf8_bytes) + base64_string = base64_bytes.decode("UTF-8") + except Exception as err: + logger.warning("Unable to encode {orig} to base64:".format(orig=original), err) + + return base64_string + + +def from_base64(base64_string): + # type: (str) -> Optional[str] + """ + Convert a string from base64, via UTF-8. Returns None on invalid input. + """ + utf8_string = None + + try: + only_valid_chars = BASE64_ALPHABET.match(base64_string) + assert only_valid_chars + + base64_bytes = base64_string.encode("UTF-8") + utf8_bytes = base64.b64decode(base64_bytes) + utf8_string = utf8_bytes.decode("UTF-8") + except Exception as err: + logger.warning( + "Unable to decode {b64} from base64:".format(b64=base64_string), err + ) + + return utf8_string diff --git a/tests/integrations/sqlalchemy/test_sqlalchemy.py b/tests/integrations/sqlalchemy/test_sqlalchemy.py index 2821126387..421a72ebae 100644 --- a/tests/integrations/sqlalchemy/test_sqlalchemy.py +++ b/tests/integrations/sqlalchemy/test_sqlalchemy.py @@ -189,7 +189,7 @@ def processor(event, hint): assert len(json_dumps(event)) < max_bytes # Some spans are discarded. - assert len(event["spans"]) == 999 + assert len(event["spans"]) == 1000 # Some spans have their descriptions truncated. Because the test always # generates the same amount of descriptions and truncation is deterministic, @@ -197,7 +197,7 @@ def processor(event, hint): # # Which exact span descriptions are truncated depends on the span durations # of each SQL query and is non-deterministic. - assert len(event["_meta"]["spans"]) == 536 + assert len(event["_meta"]["spans"]) == 537 for i, span in enumerate(event["spans"]): description = span["description"] diff --git a/tests/test_envelope.py b/tests/test_envelope.py index e795e9d93c..6e990aa96c 100644 --- a/tests/test_envelope.py +++ b/tests/test_envelope.py @@ -1,36 +1,58 @@ from sentry_sdk.envelope import Envelope from sentry_sdk.session import Session +from sentry_sdk import capture_event +from sentry_sdk.tracing_utils import compute_tracestate_value +import sentry_sdk.client + +import pytest + +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 def generate_transaction_item(): return { - "event_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "event_id": "15210411201320122115110420122013", "type": "transaction", - "transaction": "/organizations/:orgId/performance/:eventSlug/", - "start_timestamp": 1597976392.6542819, - "timestamp": 1597976400.6189718, + "transaction": "/interactions/other-dogs/new-dog", + "start_timestamp": 1353568872.11122131, + "timestamp": 1356942672.09040815, "contexts": { "trace": { - "trace_id": "4C79F60C11214EB38604F4AE0781BFB2", - "span_id": "FA90FDEAD5F74052", - "type": "trace", + "trace_id": "12312012123120121231201212312012", + "span_id": "0415201309082013", + "parent_span_id": None, + "description": "", + "op": "greeting.sniff", + "tracestate": compute_tracestate_value( + { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + "user": {"id": 12312013, "segment": "bigs"}, + "transaction": "/interactions/other-dogs/new-dog", + } + ), } }, "spans": [ { "description": "", - "op": "react.mount", - "parent_span_id": "8f5a2b8768cafb4e", - "span_id": "bd429c44b67a3eb4", - "start_timestamp": 1597976393.4619668, - "timestamp": 1597976393.4718769, - "trace_id": "ff62a8b040f340bda5d830223def1d81", + "op": "greeting.sniff", + "parent_span_id": None, + "span_id": "0415201309082013", + "start_timestamp": 1353568872.11122131, + "timestamp": 1356942672.09040815, + "trace_id": "12312012123120121231201212312012", } ], } -def test_basic_event(): +def test_add_and_get_basic_event(): envelope = Envelope() expected = {"message": "Hello, World!"} @@ -39,7 +61,7 @@ def test_basic_event(): assert envelope.get_event() == {"message": "Hello, World!"} -def test_transaction_event(): +def test_add_and_get_transaction_event(): envelope = Envelope() transaction_item = generate_transaction_item() @@ -55,7 +77,7 @@ def test_transaction_event(): assert envelope.get_transaction_event() == transaction_item -def test_session(): +def test_add_and_get_session(): envelope = Envelope() expected = Session() @@ -64,3 +86,49 @@ def test_session(): for item in envelope: if item.type == "session": assert item.payload.json == expected.to_json() + + +# TODO (kmclb) remove this parameterization once tracestate is a real feature +@pytest.mark.parametrize("tracestate_enabled", [True, False]) +def test_envelope_headers( + sentry_init, capture_envelopes, monkeypatch, tracestate_enabled +): + monkeypatch.setattr( + sentry_sdk.client, + "format_timestamp", + lambda x: "2012-11-21T12:31:12.415908Z", + ) + + monkeypatch.setattr( + sentry_sdk.client, + "has_tracestate_enabled", + mock.Mock(return_value=tracestate_enabled), + ) + + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + ) + envelopes = capture_envelopes() + + capture_event(generate_transaction_item()) + + assert len(envelopes) == 1 + + if tracestate_enabled: + assert envelopes[0].headers == { + "event_id": "15210411201320122115110420122013", + "sent_at": "2012-11-21T12:31:12.415908Z", + "trace": { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + "user": {"id": 12312013, "segment": "bigs"}, + "transaction": "/interactions/other-dogs/new-dog", + }, + } + else: + assert envelopes[0].headers == { + "event_id": "15210411201320122115110420122013", + "sent_at": "2012-11-21T12:31:12.415908Z", + } diff --git a/tests/tracing/test_http_headers.py b/tests/tracing/test_http_headers.py new file mode 100644 index 0000000000..3db967b24b --- /dev/null +++ b/tests/tracing/test_http_headers.py @@ -0,0 +1,332 @@ +import json + +import pytest + +import sentry_sdk +from sentry_sdk.tracing import Transaction, Span +from sentry_sdk.tracing_utils import ( + compute_tracestate_value, + extract_sentrytrace_data, + extract_tracestate_data, + reinflate_tracestate, +) +from sentry_sdk.utils import from_base64, to_base64 + + +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 + + +def test_tracestate_computation(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + sentry_sdk.set_user({"id": 12312013, "segment": "bigs"}) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + trace_id="12312012123120121231201212312012", + ) + + # force lazy computation to create a value + transaction.to_tracestate() + + computed_value = transaction._sentry_tracestate.replace("sentry=", "") + # we have to decode and reinflate the data because we can guarantee that the + # order of the entries in the jsonified dict will be the same here as when + # the tracestate is computed + reinflated_trace_data = json.loads(from_base64(computed_value)) + + assert reinflated_trace_data == { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + "user": {"id": 12312013, "segment": "bigs"}, + "transaction": "/interactions/other-dogs/new-dog", + } + + +def test_doesnt_add_new_tracestate_to_transaction_when_none_given(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + # sentry_tracestate=< value would be passed here > + ) + + assert transaction._sentry_tracestate is None + + +def test_adds_tracestate_to_transaction_when_to_traceparent_called(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + ) + + # no inherited tracestate, and none created in Transaction constructor + assert transaction._sentry_tracestate is None + + transaction.to_tracestate() + + assert transaction._sentry_tracestate is not None + + +def test_adds_tracestate_to_transaction_when_getting_trace_context(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + ) + + # no inherited tracestate, and none created in Transaction constructor + assert transaction._sentry_tracestate is None + + transaction.get_trace_context() + + assert transaction._sentry_tracestate is not None + + +@pytest.mark.parametrize( + "set_by", ["inheritance", "to_tracestate", "get_trace_context"] +) +def test_tracestate_is_immutable_once_set(sentry_init, monkeypatch, set_by): + monkeypatch.setattr( + sentry_sdk.tracing, + "compute_tracestate_entry", + mock.Mock(return_value="sentry=doGsaREgReaT"), + ) + + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + # for each scenario, get to the point where tracestate has been set + if set_by == "inheritance": + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + sentry_tracestate=("sentry=doGsaREgReaT"), + ) + else: + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + ) + + if set_by == "to_tracestate": + transaction.to_tracestate() + if set_by == "get_trace_context": + transaction.get_trace_context() + + assert transaction._sentry_tracestate == "sentry=doGsaREgReaT" + + # user data would be included in tracestate if it were recomputed at this point + sentry_sdk.set_user({"id": 12312013, "segment": "bigs"}) + + # value hasn't changed + assert transaction._sentry_tracestate == "sentry=doGsaREgReaT" + + +@pytest.mark.parametrize("sampled", [True, False, None]) +def test_to_traceparent(sentry_init, sampled): + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + trace_id="12312012123120121231201212312012", + sampled=sampled, + ) + + traceparent = transaction.to_traceparent() + + trace_id, parent_span_id, parent_sampled = traceparent.split("-") + assert trace_id == "12312012123120121231201212312012" + assert parent_span_id == transaction.span_id + assert parent_sampled == ( + "1" if sampled is True else "0" if sampled is False else "" + ) + + +def test_to_tracestate(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + # it correctly uses the value from the transaction itself or the span's + # containing transaction + transaction_no_third_party = Transaction( + trace_id="12312012123120121231201212312012", + sentry_tracestate="sentry=doGsaREgReaT", + ) + non_orphan_span = Span() + non_orphan_span._containing_transaction = transaction_no_third_party + assert transaction_no_third_party.to_tracestate() == "sentry=doGsaREgReaT" + assert non_orphan_span.to_tracestate() == "sentry=doGsaREgReaT" + + # it combines sentry and third-party values correctly + transaction_with_third_party = Transaction( + trace_id="12312012123120121231201212312012", + sentry_tracestate="sentry=doGsaREgReaT", + third_party_tracestate="maisey=silly", + ) + assert ( + transaction_with_third_party.to_tracestate() + == "sentry=doGsaREgReaT,maisey=silly" + ) + + # it computes a tracestate from scratch for orphan transactions + orphan_span = Span( + trace_id="12312012123120121231201212312012", + ) + assert orphan_span._containing_transaction is None + assert orphan_span.to_tracestate() == "sentry=" + compute_tracestate_value( + { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + } + ) + + +@pytest.mark.parametrize("sampling_decision", [True, False]) +def test_sentrytrace_extraction(sampling_decision): + sentrytrace_header = "12312012123120121231201212312012-0415201309082013-{}".format( + 1 if sampling_decision is True else 0 + ) + assert extract_sentrytrace_data(sentrytrace_header) == { + "trace_id": "12312012123120121231201212312012", + "parent_span_id": "0415201309082013", + "parent_sampled": sampling_decision, + } + + +@pytest.mark.parametrize( + ("incoming_header", "expected_sentry_value", "expected_third_party"), + [ + # sentry only + ("sentry=doGsaREgReaT", "sentry=doGsaREgReaT", None), + # sentry only, invalid (`!` isn't a valid base64 character) + ("sentry=doGsaREgReaT!", None, None), + # stuff before + ("maisey=silly,sentry=doGsaREgReaT", "sentry=doGsaREgReaT", "maisey=silly"), + # stuff after + ("sentry=doGsaREgReaT,maisey=silly", "sentry=doGsaREgReaT", "maisey=silly"), + # stuff before and after + ( + "charlie=goofy,sentry=doGsaREgReaT,maisey=silly", + "sentry=doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple before + ( + "charlie=goofy,maisey=silly,sentry=doGsaREgReaT", + "sentry=doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple after + ( + "sentry=doGsaREgReaT,charlie=goofy,maisey=silly", + "sentry=doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple before and after + ( + "charlie=goofy,maisey=silly,sentry=doGsaREgReaT,bodhi=floppy,cory=loyal", + "sentry=doGsaREgReaT", + "charlie=goofy,maisey=silly,bodhi=floppy,cory=loyal", + ), + # only third-party data + ("maisey=silly", None, "maisey=silly"), + # invalid third-party data, valid sentry data + ("maisey_is_silly,sentry=doGsaREgReaT", "sentry=doGsaREgReaT", None), + # valid third-party data, invalid sentry data + ("maisey=silly,sentry=doGsaREgReaT!", None, "maisey=silly"), + # nothing valid at all + ("maisey_is_silly,sentry=doGsaREgReaT!", None, None), + ], +) +def test_tracestate_extraction( + incoming_header, expected_sentry_value, expected_third_party +): + assert extract_tracestate_data(incoming_header) == { + "sentry_tracestate": expected_sentry_value, + "third_party_tracestate": expected_third_party, + } + + +# TODO (kmclb) remove this parameterization once tracestate is a real feature +@pytest.mark.parametrize("tracestate_enabled", [True, False]) +def test_iter_headers(sentry_init, monkeypatch, tracestate_enabled): + monkeypatch.setattr( + Transaction, + "to_traceparent", + mock.Mock(return_value="12312012123120121231201212312012-0415201309082013-0"), + ) + monkeypatch.setattr( + Transaction, + "to_tracestate", + mock.Mock(return_value="sentry=doGsaREgReaT,charlie=goofy"), + ) + monkeypatch.setattr( + sentry_sdk.tracing, + "has_tracestate_enabled", + mock.Mock(return_value=tracestate_enabled), + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + ) + + headers = dict(transaction.iter_headers()) + assert ( + headers["sentry-trace"] == "12312012123120121231201212312012-0415201309082013-0" + ) + if tracestate_enabled: + assert "tracestate" in headers + assert headers["tracestate"] == "sentry=doGsaREgReaT,charlie=goofy" + else: + assert "tracestate" not in headers + + +@pytest.mark.parametrize( + "data", + [ # comes out with no trailing `=` + {"name": "Maisey", "birthday": "12/31/12"}, + # comes out with one trailing `=` + {"dogs": "yes", "cats": "maybe"}, + # comes out with two trailing `=` + {"name": "Charlie", "birthday": "11/21/12"}, + ], +) +def test_tracestate_reinflation(data): + encoded_tracestate = to_base64(json.dumps(data)).strip("=") + assert reinflate_tracestate(encoded_tracestate) == data diff --git a/tests/tracing/test_integration_tests.py b/tests/tracing/test_integration_tests.py index b2ce2e3a18..f9530d31b3 100644 --- a/tests/tracing/test_integration_tests.py +++ b/tests/tracing/test_integration_tests.py @@ -47,46 +47,46 @@ def test_basic(sentry_init, capture_events, sample_rate): @pytest.mark.parametrize("sampled", [True, False, None]) -@pytest.mark.parametrize( - "sample_rate", [0.0, 1.0] -) # ensure sampling decision is actually passed along via headers +@pytest.mark.parametrize("sample_rate", [0.0, 1.0]) def test_continue_from_headers(sentry_init, capture_events, sampled, sample_rate): + """ + Ensure data is actually passed along via headers, and that they are read + correctly. + """ sentry_init(traces_sample_rate=sample_rate) events = capture_events() # make a parent transaction (normally this would be in a different service) - with start_transaction(name="hi", sampled=True if sample_rate == 0 else None): + with start_transaction( + name="hi", sampled=True if sample_rate == 0 else None + ) as parent_transaction: with start_span() as old_span: old_span.sampled = sampled headers = dict(Hub.current.iter_trace_propagation_headers(old_span)) - - # test that the sampling decision is getting encoded in the header correctly - header = headers["sentry-trace"] - if sampled is True: - assert header.endswith("-1") - if sampled is False: - assert header.endswith("-0") - if sampled is None: - assert header.endswith("-") - - # child transaction, to prove that we can read 'sentry-trace' header data - # correctly - transaction = Transaction.continue_from_headers(headers, name="WRONG") - assert transaction is not None - assert transaction.parent_sampled == sampled - assert transaction.trace_id == old_span.trace_id - assert transaction.same_process_as_parent is False - assert transaction.parent_span_id == old_span.span_id - assert transaction.span_id != old_span.span_id + tracestate = parent_transaction._sentry_tracestate + + # child transaction, to prove that we can read 'sentry-trace' and + # `tracestate` header data correctly + child_transaction = Transaction.continue_from_headers(headers, name="WRONG") + assert child_transaction is not None + assert child_transaction.parent_sampled == sampled + assert child_transaction.trace_id == old_span.trace_id + assert child_transaction.same_process_as_parent is False + assert child_transaction.parent_span_id == old_span.span_id + assert child_transaction.span_id != old_span.span_id + assert child_transaction._sentry_tracestate == tracestate # add child transaction to the scope, to show that the captured message will # be tagged with the trace id (since it happens while the transaction is # open) - with start_transaction(transaction): + with start_transaction(child_transaction): with configure_scope() as scope: + # change the transaction name from "WRONG" to make sure the change + # is reflected in the final data scope.transaction = "ho" capture_message("hello") + # in this case the child transaction won't be captured if sampled is False or (sample_rate == 0 and sampled is None): trace1, message = events @@ -100,7 +100,7 @@ def test_continue_from_headers(sentry_init, capture_events, sampled, sample_rate assert ( trace1["contexts"]["trace"]["trace_id"] == trace2["contexts"]["trace"]["trace_id"] - == transaction.trace_id + == child_transaction.trace_id == message["contexts"]["trace"]["trace_id"] ) diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index f5b8aa5e85..5d6613cd28 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -1,7 +1,17 @@ import pytest +import gc +import uuid +import os +import sentry_sdk from sentry_sdk import Hub, start_span, start_transaction from sentry_sdk.tracing import Span, Transaction +from sentry_sdk.tracing_utils import has_tracestate_enabled + +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 def test_span_trimming(sentry_init, capture_events): @@ -15,40 +25,59 @@ def test_span_trimming(sentry_init, capture_events): (event,) = events - # the transaction is its own first span (which counts for max_spans) but it - # doesn't show up in the span list in the event, so this is 1 less than our - # max_spans value - assert len(event["spans"]) == 2 + assert len(event["spans"]) == 3 - span1, span2 = event["spans"] + span1, span2, span3 = event["spans"] assert span1["op"] == "foo0" assert span2["op"] == "foo1" + assert span3["op"] == "foo2" -def test_transaction_method_signature(sentry_init, capture_events): +def test_transaction_naming(sentry_init, capture_events): sentry_init(traces_sample_rate=1.0) events = capture_events() + # only transactions have names - spans don't with pytest.raises(TypeError): start_span(name="foo") assert len(events) == 0 + # default name in event if no name is passed with start_transaction() as transaction: pass - assert transaction.name == "" assert len(events) == 1 + assert events[0]["transaction"] == "" + # the name can be set once the transaction's already started with start_transaction() as transaction: transaction.name = "name-known-after-transaction-started" assert len(events) == 2 + assert events[1]["transaction"] == "name-known-after-transaction-started" + # passing in a name works, too with start_transaction(name="a"): pass assert len(events) == 3 + assert events[2]["transaction"] == "a" - with start_transaction(Transaction(name="c")): - pass - assert len(events) == 4 + +def test_start_transaction(sentry_init): + sentry_init(traces_sample_rate=1.0) + + # you can have it start a transaction for you + result1 = start_transaction( + name="/interactions/other-dogs/new-dog", op="greeting.sniff" + ) + assert isinstance(result1, Transaction) + assert result1.name == "/interactions/other-dogs/new-dog" + assert result1.op == "greeting.sniff" + + # or you can pass it an already-created transaction + preexisting_transaction = Transaction( + name="/interactions/other-dogs/new-dog", op="greeting.sniff" + ) + result2 = start_transaction(preexisting_transaction) + assert result2 is preexisting_transaction def test_finds_transaction_on_scope(sentry_init): @@ -77,7 +106,7 @@ def test_finds_transaction_on_scope(sentry_init): assert scope._span.name == "dogpark" -def test_finds_transaction_when_decedent_span_is_on_scope( +def test_finds_transaction_when_descendent_span_is_on_scope( sentry_init, ): sentry_init(traces_sample_rate=1.0) @@ -128,3 +157,92 @@ def test_finds_non_orphan_span_on_scope(sentry_init): assert scope._span is not None assert isinstance(scope._span, Span) assert scope._span.op == "sniffing" + + +def test_circular_references(monkeypatch, sentry_init, request): + # TODO: We discovered while writing this test about transaction/span + # reference cycles that there's actually also a circular reference in + # `serializer.py`, between the functions `_serialize_node` and + # `_serialize_node_impl`, both of which are defined inside of the main + # `serialize` function, and each of which calls the other one. For now, in + # order to avoid having those ref cycles give us a false positive here, we + # can mock out `serialize`. In the long run, though, we should probably fix + # that. (Whenever we do work on fixing it, it may be useful to add + # + # gc.set_debug(gc.DEBUG_LEAK) + # request.addfinalizer(lambda: gc.set_debug(~gc.DEBUG_LEAK)) + # + # immediately after the initial collection below, so we can see what new + # objects the garbage collecter has to clean up once `transaction.finish` is + # called and the serializer runs.) + monkeypatch.setattr( + sentry_sdk.client, + "serialize", + mock.Mock( + return_value=None, + ), + ) + + # In certain versions of python, in some environments (specifically, python + # 3.4 when run in GH Actions), we run into a `ctypes` bug which creates + # circular references when `uuid4()` is called, as happens when we're + # generating event ids. Mocking it with an implementation which doesn't use + # the `ctypes` function lets us avoid having false positives when garbage + # collecting. See https://bugs.python.org/issue20519. + monkeypatch.setattr( + uuid, + "uuid4", + mock.Mock( + return_value=uuid.UUID(bytes=os.urandom(16)), + ), + ) + + gc.disable() + request.addfinalizer(gc.enable) + + sentry_init(traces_sample_rate=1.0) + + # Make sure that we're starting with a clean slate before we start creating + # transaction/span reference cycles + gc.collect() + + dogpark_transaction = start_transaction(name="dogpark") + sniffing_span = dogpark_transaction.start_child(op="sniffing") + wagging_span = dogpark_transaction.start_child(op="wagging") + + # At some point, you have to stop sniffing - there are balls to chase! - so finish + # this span while the dogpark transaction is still open + sniffing_span.finish() + + # The wagging, however, continues long past the dogpark, so that span will + # NOT finish before the transaction ends. (Doing it in this order proves + # that both finished and unfinished spans get their cycles broken.) + dogpark_transaction.finish() + + # Eventually you gotta sleep... + wagging_span.finish() + + # assuming there are no cycles by this point, these should all be able to go + # out of scope and get their memory deallocated without the garbage + # collector having anything to do + del sniffing_span + del wagging_span + del dogpark_transaction + + assert gc.collect() == 0 + + +# TODO (kmclb) remove this test once tracestate is a real feature +@pytest.mark.parametrize("tracestate_enabled", [True, False, None]) +def test_has_tracestate_enabled(sentry_init, tracestate_enabled): + experiments = ( + {"propagate_tracestate": tracestate_enabled} + if tracestate_enabled is not None + else {} + ) + sentry_init(_experiments=experiments) + + if tracestate_enabled is True: + assert has_tracestate_enabled() is True + else: + assert has_tracestate_enabled() is False diff --git a/tests/tracing/test_sampling.py b/tests/tracing/test_sampling.py index 672110ada2..6f09b451e1 100644 --- a/tests/tracing/test_sampling.py +++ b/tests/tracing/test_sampling.py @@ -3,7 +3,8 @@ import pytest from sentry_sdk import Hub, start_span, start_transaction -from sentry_sdk.tracing import Transaction, _is_valid_sample_rate +from sentry_sdk.tracing import Transaction +from sentry_sdk.tracing_utils import is_valid_sample_rate from sentry_sdk.utils import logger try: @@ -56,7 +57,7 @@ def test_no_double_sampling(sentry_init, capture_events): ) def test_accepts_valid_sample_rate(rate): with mock.patch.object(logger, "warning", mock.Mock()): - result = _is_valid_sample_rate(rate) + result = is_valid_sample_rate(rate) assert logger.warning.called is False assert result is True @@ -77,7 +78,7 @@ def test_accepts_valid_sample_rate(rate): ) def test_warns_on_invalid_sample_rate(rate, StringContaining): # noqa: N803 with mock.patch.object(logger, "warning", mock.Mock()): - result = _is_valid_sample_rate(rate) + result = is_valid_sample_rate(rate) logger.warning.assert_any_call(StringContaining("Given sample rate is invalid")) assert result is False @@ -231,7 +232,9 @@ def test_passes_parent_sampling_decision_in_sampling_context( ) ) - transaction = Transaction.from_traceparent(sentry_trace_header, name="dogpark") + transaction = Transaction.continue_from_headers( + headers={"sentry-trace": sentry_trace_header}, name="dogpark" + ) spy = mock.Mock(wraps=transaction) start_transaction(transaction=spy) diff --git a/tests/utils/test_general.py b/tests/utils/test_general.py index 370a6327ff..03be52ca17 100644 --- a/tests/utils/test_general.py +++ b/tests/utils/test_general.py @@ -13,8 +13,10 @@ filename_for_module, handle_in_app_impl, iter_event_stacktraces, + to_base64, + from_base64, ) -from sentry_sdk._compat import text_type +from sentry_sdk._compat import text_type, string_types try: @@ -168,3 +170,56 @@ def test_iter_stacktraces(): ) == {1, 2, 3} ) + + +@pytest.mark.parametrize( + ("original", "base64_encoded"), + [ + # ascii only + ("Dogs are great!", "RG9ncyBhcmUgZ3JlYXQh"), + # emoji + (u"🐶", "8J+Qtg=="), + # non-ascii + ( + u"Καλό κορίτσι, Μάιζεϊ!", + "zprOsc67z4wgzrrOv8+Bzq/PhM+DzrksIM6czqzOuc62zrXPiiE=", + ), + # mix of ascii and non-ascii + ( + u"Of margir hundar! Ég geri ráð fyrir að ég þurfi stærra rúm.", + "T2YgbWFyZ2lyIGh1bmRhciEgw4lnIGdlcmkgcsOhw7AgZnlyaXIgYcOwIMOpZyDDvnVyZmkgc3TDpnJyYSByw7ptLg==", + ), + ], +) +def test_successful_base64_conversion(original, base64_encoded): + # all unicode characters should be handled correctly + assert to_base64(original) == base64_encoded + assert from_base64(base64_encoded) == original + + # "to" and "from" should be inverses + assert from_base64(to_base64(original)) == original + assert to_base64(from_base64(base64_encoded)) == base64_encoded + + +@pytest.mark.parametrize( + "input", + [ + 1231, # incorrect type + True, # incorrect type + [], # incorrect type + {}, # incorrect type + None, # incorrect type + "yayfordogs", # wrong length + "#dog", # invalid ascii character + "🐶", # non-ascii character + ], +) +def test_failed_base64_conversion(input): + # conversion from base64 should fail if given input of the wrong type or + # input which isn't a valid base64 string + assert from_base64(input) is None + + # any string can be converted to base64, so only type errors will cause + # failures + if type(input) not in string_types: + assert to_base64(input) is None