From 17829740712d0576286a06ee3661c38a2e4a310a Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 12 Jan 2021 22:26:59 -0800 Subject: [PATCH 01/24] add base64 conversion methods --- sentry_sdk/utils.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 323e4ceffa..468165bebe 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1,3 +1,4 @@ +import base64 import json import linecache import logging @@ -956,3 +957,26 @@ def run(self): integer_configured_timeout ) ) + + +def to_base64(original): + # type: (str) -> str + """ + Convert a string to base64, via UTF-8. + """ + utf8_bytes = original.encode("UTF-8") + base64_bytes = base64.b64encode(utf8_bytes) + base64_string = base64_bytes.decode("UTF-8") + + return base64_string + + +def from_base64(base64_string): + # type: (str) -> str + """ + Convert a string from base64, via UTF-8. + """ + base64_bytes = base64_string.encode("UTF-8") + utf8_bytes = base64.b64decode(base64_bytes) + utf8_string = utf8_bytes.decode("UTF-8") + return utf8_string From b187c2fc70ac8657c58fc00e4b34ff2fbcb82e91 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 12 Jan 2021 22:28:38 -0800 Subject: [PATCH 02/24] add method to compute a transaction's tracestate value --- sentry_sdk/tracing_utils.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index 62489105c1..ee16747300 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -1,5 +1,6 @@ import re import contextlib +import json import math from numbers import Real @@ -174,6 +175,36 @@ def maybe_create_breadcrumbs_from_span(hub, span): ) +def compute_new_tracestate(transaction): + # type: (Transaction) -> str + """ + Computes a new tracestate value for the transaction. + """ + data = {} + + client = (transaction.hub or sentry_sdk.Hub.current).client + + # if there's no client and/or no DSN, we're not sending anything anywhere, + # so it's fine to not have any tracestate data + if client and client.options.get("dsn"): + options = client.options + data = { + "trace_id": transaction.trace_id, + "environment": options["environment"], + "release": options.get("release"), + "public_key": Dsn(options["dsn"]).public_key, + } + + tracestate_json = json.dumps(data) + + # Base64-encoded strings always come out with a length which is a multiple + # of 4. In order to achieve this, the end is padded with one or more `=` + # signs. Because the tracestate standard calls for using `=` signs between + # vendor name and value (`sentry=xxx,dogsaregreat=yyy`), to avoid confusion + # we strip the `=` + return to_base64(tracestate_json).rstrip("=") + + def _format_sql(cursor, sql): # type: (Any, str) -> Optional[str] From bba28805987bb17564de15c08660d519f3c635e5 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 12 Jan 2021 22:29:36 -0800 Subject: [PATCH 03/24] add methods for extracting data from sentry-trace and tracestate headers --- sentry_sdk/tracing_utils.py | 93 ++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index ee16747300..70c0956714 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -9,7 +9,9 @@ from sentry_sdk.utils import ( capture_internal_exceptions, + Dsn, logger, + to_base64, to_string, ) from sentry_sdk._compat import PY2 @@ -27,8 +29,9 @@ from typing import Optional from typing import Any from typing import Dict + from typing import Union - from sentry_sdk.tracing import Span + from sentry_sdk.tracing import Span, Transaction SENTRY_TRACE_REGEX = re.compile( @@ -39,6 +42,33 @@ "[ \t]*$" # whitespace ) +# This is a normal base64 regex, modified to reflect that fact that we strip the +# trailing = or == off +b64 = base64_stripped = ( + # any of the characters in the base64 "alphabet", in multiples of 4 + "([a-zA-Z0-9+/]{4})*" + # either nothing or 2 or 3 base64-alphabet characters (see + # https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding for + # why there's never only 1 extra character) + "([a-zA-Z0-9+/]{2,3})?" +) +ov = outside_vendor_entry = r"\w+=\w+" + +TRACESTATE_HEADER_REGEX = re.compile( + # zero or more other vendors' entries, each followed by a comma, + # captured together as one group + "^((?:{ov},)*)" + # sentry's entry, with only the value captured + "(?:sentry=({b64}))?" + # zero or more other vendors' entries, each preceded by a comma, + # captured together as one group + "((?:,{ov})*)$".format(ov=ov, b64=b64) +) + +TRACESTATE_SENTRY_VALUE_REGEX = re.compile( + "sentry=({b64})^[a-zA-Z0-9+/]?".format(b64=b64) +) + class EnvironHeaders(Mapping): # type: ignore def __init__( @@ -175,6 +205,67 @@ def maybe_create_breadcrumbs_from_span(hub, span): ) +def extract_sentrytrace_data(header): + # type: (Optional[str]) -> typing.Mapping[str, Union[Optional[str], Optional[bool]]] + + """ + Given a `sentry-trace` header string, return a dictionary of data. + """ + trace_id = parent_span_id = parent_sampled = None + + if header: + if header.startswith("00-") and header.endswith("-00"): + header = header[3:-3] + + match = SENTRY_TRACE_REGEX.match(header) + + if match: + trace_id, parent_span_id, sampled_str = match.groups() + + if trace_id: + trace_id = "{:032x}".format(int(trace_id, 16)) + if parent_span_id: + parent_span_id = "{:016x}".format(int(parent_span_id, 16)) + if sampled_str: + parent_sampled = sampled_str != "0" + + return { + "trace_id": trace_id, + "parent_span_id": parent_span_id, + "parent_sampled": parent_sampled, + } + + +def extract_tracestate_data(header): + # type: (Optional[str]) -> typing.Mapping[str, Optional[str]] + """ + Extracts the sentry tracestate value and any third-party data from the given + tracestate header, returning a dictionary of data. + """ + sentry_value = third_party = None + + if header: + + match = TRACESTATE_HEADER_REGEX.match(header) + + if match: + before, sentry_value, after = match.groups() + if before or after: + # filter out empty strings, and make sure there aren't too many + # commas between them + third_party = ",".join( + [value.strip(",") for value in [before, after] if value != ""] + ) + + else: + # if the header is malformed, at least try to grab sentry's part, if any + sentry_value_match = TRACESTATE_SENTRY_VALUE_REGEX.search(header) + if sentry_value_match: + sentry_value = sentry_value_match.group(1) + + return {"sentry_tracestate": sentry_value, "third_party_tracestate": third_party} + + def compute_new_tracestate(transaction): # type: (Transaction) -> str """ From 94099d2cbe43dcac468ff9219c7e1bedd76a0e2b Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 12 Jan 2021 22:35:11 -0800 Subject: [PATCH 04/24] add sentry and third-party tracestate attributes to Transaction class --- sentry_sdk/tracing.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index ae7b75bef2..7b00760466 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -10,6 +10,7 @@ from sentry_sdk.tracing_utils import ( SENTRY_TRACE_REGEX, EnvironHeaders, + compute_new_tracestate, has_tracing_enabled, is_valid_sample_rate, maybe_create_breadcrumbs_from_span, @@ -422,12 +423,22 @@ def get_trace_context(self): class Transaction(Span): - __slots__ = ("name", "parent_sampled") + __slots__ = ( + "name", + "parent_sampled", + # base64-encoded json of trace correlation context, missing trailing = + # (note: does NOT include the `sentry=`) + "_sentry_tracestate_value", + # tracestate data from other vendors, of the form `dogs=yes,cats=maybe` + "_third_party_tracestate", + ) def __init__( self, name="", # type: str parent_sampled=None, # type: Optional[bool] + sentry_tracestate=None, # type: Optional[str] + third_party_tracestate=None, # type: Optional[str] **kwargs # type: Any ): # type: (...) -> None @@ -443,6 +454,10 @@ def __init__( Span.__init__(self, **kwargs) self.name = name self.parent_sampled = parent_sampled + self._sentry_tracestate_value = sentry_tracestate or compute_new_tracestate( + self + ) + self._third_party_tracestate = third_party_tracestate def __repr__(self): # type: () -> str From f6de1043765196f2e5a2efcaa02ac339f3e4ee8c Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 12 Jan 2021 22:37:50 -0800 Subject: [PATCH 05/24] pass sentry tracestate in event.contexts.trace for extraction when envelope is created --- sentry_sdk/tracing.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 7b00760466..bfe493ed1d 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -419,6 +419,9 @@ def get_trace_context(self): if self.status: rv["status"] = self.status + if self._containing_transaction: + rv["tracestate"] = self._containing_transaction._sentry_tracestate_value + return rv From 5727ffcf5a54932d409df027728acf6a2afeaa9e Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 12 Jan 2021 22:39:15 -0800 Subject: [PATCH 06/24] update docstrings --- sentry_sdk/tracing.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index bfe493ed1d..6476b7e275 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -211,11 +211,12 @@ def continue_from_environ( # type: (...) -> Transaction """ Create a Transaction with the given params, then add in data pulled from - the 'sentry-trace' header in the environ (if any) before returning the - Transaction. + the 'sentry-trace' and 'tracestate' headers from the environ (if any) + before returning the Transaction. - If the 'sentry-trace' header is malformed or missing, just create and - return a Transaction instance with the given params. + This is different from `continue_from_headers` in that it assumes header + names in the form "HTTP_HEADER_NAME" - such as you would get from a wsgi + environ - rather than the form "header-name". """ if cls is Span: logger.warning( @@ -232,11 +233,8 @@ def continue_from_headers( ): # type: (...) -> Transaction """ - Create a Transaction with the given params, then add in data pulled from - the 'sentry-trace' header (if any) before returning the Transaction. - - If the 'sentry-trace' header is malformed or missing, just create and - return a Transaction instance with the given params. + Create a transaction with the given params (including any data pulled from + the 'sentry-trace' and 'tracestate' headers). """ if cls is Span: logger.warning( From dfbcbb7ce22d5e98a7ffea0947c9a8c864998c3e Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 12 Jan 2021 22:40:00 -0800 Subject: [PATCH 07/24] use header data extraction methods rather than from_traceparent --- sentry_sdk/tracing.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 6476b7e275..e069c9e8f5 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -11,6 +11,8 @@ SENTRY_TRACE_REGEX, EnvironHeaders, compute_new_tracestate, + extract_sentrytrace_data, + extract_tracestate_data, has_tracing_enabled, is_valid_sample_rate, maybe_create_breadcrumbs_from_span, @@ -241,12 +243,13 @@ def continue_from_headers( "Deprecated: use Transaction.continue_from_headers " "instead of Span.continue_from_headers." ) - transaction = Transaction.from_traceparent( - headers.get("sentry-trace"), **kwargs - ) - if transaction is None: - transaction = Transaction(**kwargs) + + kwargs.update(extract_sentrytrace_data(headers.get("sentry-trace"))) + kwargs.update(extract_tracestate_data(headers.get("tracestate"))) + + transaction = Transaction(**kwargs) transaction.same_process_as_parent = False + return transaction def iter_headers(self): From e2fbc7a487c58e1438bad7f13d7be7fe7e66530f Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 12 Jan 2021 22:40:35 -0800 Subject: [PATCH 08/24] add trace correlation context to envelope header --- sentry_sdk/client.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 7687baa76f..bfdf99b7de 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -3,6 +3,7 @@ import random from datetime import datetime import socket +import json from sentry_sdk._compat import string_types, text_type, iteritems from sentry_sdk.utils import ( @@ -10,6 +11,7 @@ current_stacktrace, disable_capture_event, format_timestamp, + from_base64, get_type_name, get_default_release, handle_in_app, @@ -328,16 +330,36 @@ def capture_event( attachments = hint.get("attachments") is_transaction = event_opt.get("type") == "transaction" + raw_tracestate = ( + event_opt.get("contexts", {}).get("trace", {}).pop("tracestate", None) + ) if is_transaction or attachments: # Transactions or events with attachments should go to the # /envelope/ endpoint. - envelope = Envelope( - headers={ - "event_id": event_opt["event_id"], - "sent_at": format_timestamp(datetime.utcnow()), - } - ) + headers = { + "event_id": event_opt["event_id"], + "sent_at": format_timestamp(datetime.utcnow()), + } + + if raw_tracestate: + # Base64-encoded strings always come out with a length which is a multiple + # of 4. In order to achieve this, the end is padded with one or more `=` + # signs. Because the tracestate standard calls for using `=` signs between + # vendor name and value (`sentry=xxx,dogsaregreat=yyy`), to avoid confusion + # we strip the `=` when the data is initially encoded. Python's decoding + # function requires they be put back. + + # The final mod 4 is necessary because 4 is represented as 4 + # rather than 0. + missing_equals = (4 - (len(raw_tracestate) % 4)) % 4 + base64_tracestate = raw_tracestate + "=" * missing_equals + + tracestate_json = from_base64(base64_tracestate) + + headers["trace"] = json.loads(tracestate_json) + + envelope = Envelope(headers=headers) if is_transaction: envelope.add_transaction(event_opt) From a4799363fdb33cf1f6aeea24172697b87568c0bc Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Thu, 14 Jan 2021 16:18:06 -0800 Subject: [PATCH 09/24] add to_tracestate and use it in iter_headers --- sentry_sdk/hub.py | 4 ++++ sentry_sdk/tracing.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/hub.py b/sentry_sdk/hub.py index 2e378cb56d..310cb4b134 100644 --- a/sentry_sdk/hub.py +++ b/sentry_sdk/hub.py @@ -697,6 +697,10 @@ def iter_trace_propagation_headers(self): yield "sentry-trace", span.to_traceparent() + tracestate = span.to_tracestate() + if tracestate: + yield "tracestate", tracestate + GLOBAL_HUB = Hub() _local.set(GLOBAL_HUB) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index e069c9e8f5..04e3860e80 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -255,6 +255,9 @@ def continue_from_headers( def iter_headers(self): # type: () -> Generator[Tuple[str, str], None, None] yield "sentry-trace", self.to_traceparent() + tracestate = self.to_tracestate() + if tracestate: + yield "tracestate", tracestate @classmethod def from_traceparent( @@ -314,6 +317,22 @@ def to_traceparent(self): sampled = "0" return "%s-%s-%s" % (self.trace_id, self.span_id, sampled) + def to_tracestate(self): + # type: () -> Optional[str] + transaction = self._containing_transaction + + if not transaction: + return None + + # it should never happen that thre's no tracestate value, so this "or" + # is just insurance + header_value = transaction._sentry_tracestate_value or "{}" + + if transaction._third_party_tracestate: + header_value = ",".join([header_value, transaction._third_party_tracestate]) + + return header_value + def set_tag(self, key, value): # type: (str, Any) -> None self._tags[key] = value @@ -420,8 +439,13 @@ def get_trace_context(self): if self.status: rv["status"] = self.status - if self._containing_transaction: - rv["tracestate"] = self._containing_transaction._sentry_tracestate_value + if isinstance(self, Transaction): + transaction = self # type: Optional[Transaction] + else: + transaction = self._containing_transaction + + if transaction: + rv["tracestate"] = transaction._sentry_tracestate_value return rv From 60014d2dd753b257de65b96587fba3a42c0d674a Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Thu, 14 Jan 2021 16:27:35 -0800 Subject: [PATCH 10/24] deprecate from_traceparent --- sentry_sdk/tracing.py | 40 ++++++---------------------------- tests/tracing/test_sampling.py | 4 +++- 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 04e3860e80..23dc3c83db 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -8,7 +8,6 @@ from sentry_sdk.utils import logger from sentry_sdk.tracing_utils import ( - SENTRY_TRACE_REGEX, EnvironHeaders, compute_new_tracestate, extract_sentrytrace_data, @@ -267,46 +266,21 @@ def from_traceparent( ): # type: (...) -> Optional[Transaction] """ + DEPRECATED: Use Transaction.continue_from_headers(headers, **kwargs) + Create a Transaction with the given params, then add in data pulled from the given 'sentry-trace' header value before returning the Transaction. - If the header value is malformed or missing, just create and return a - Transaction instance with the given params. """ - if cls is Span: - logger.warning( - "Deprecated: use Transaction.from_traceparent " - "instead of Span.from_traceparent." - ) + logger.warning( + "Deprecated: Use Transaction.continue_from_headers(headers, **kwargs) " + "instead of from_traceparent(traceparent, **kwargs)" + ) if not traceparent: return None - if traceparent.startswith("00-") and traceparent.endswith("-00"): - traceparent = traceparent[3:-3] - - match = SENTRY_TRACE_REGEX.match(str(traceparent)) - if match is None: - return None - - trace_id, parent_span_id, sampled_str = match.groups() - - if trace_id is not None: - trace_id = "{:032x}".format(int(trace_id, 16)) - if parent_span_id is not None: - parent_span_id = "{:016x}".format(int(parent_span_id, 16)) - - if sampled_str: - parent_sampled = sampled_str != "0" # type: Optional[bool] - else: - parent_sampled = None - - return Transaction( - trace_id=trace_id, - parent_span_id=parent_span_id, - parent_sampled=parent_sampled, - **kwargs - ) + return cls.continue_from_headers({"sentry-trace": traceparent}, **kwargs) def to_traceparent(self): # type: () -> str diff --git a/tests/tracing/test_sampling.py b/tests/tracing/test_sampling.py index 758b4be2da..6f09b451e1 100644 --- a/tests/tracing/test_sampling.py +++ b/tests/tracing/test_sampling.py @@ -232,7 +232,9 @@ def test_passes_parent_sampling_decision_in_sampling_context( ) ) - transaction = Transaction.from_traceparent(sentry_trace_header, name="dogpark") + transaction = Transaction.continue_from_headers( + headers={"sentry-trace": sentry_trace_header}, name="dogpark" + ) spy = mock.Mock(wraps=transaction) start_transaction(transaction=spy) From c6e19e4bf680e3d5c8e3509e5ddae78a3f335509 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 19 Jan 2021 15:28:17 -0800 Subject: [PATCH 11/24] make base64 conversion methods handle errors, add tests --- sentry_sdk/tracing_utils.py | 2 +- sentry_sdk/utils.py | 33 ++++++++++++++------- tests/utils/test_general.py | 57 ++++++++++++++++++++++++++++++++++++- 3 files changed, 80 insertions(+), 12 deletions(-) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index 70c0956714..c1f78642b8 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -293,7 +293,7 @@ def compute_new_tracestate(transaction): # signs. Because the tracestate standard calls for using `=` signs between # vendor name and value (`sentry=xxx,dogsaregreat=yyy`), to avoid confusion # we strip the `=` - return to_base64(tracestate_json).rstrip("=") + return (to_base64(tracestate_json) or "").rstrip("=") def _format_sql(cursor, sql): diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 468165bebe..cc2b9d0227 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -960,23 +960,36 @@ def run(self): def to_base64(original): - # type: (str) -> str + # type: (str) -> Optional[str] """ - Convert a string to base64, via UTF-8. + Convert a string to base64, via UTF-8. Returns None on invalid input. """ - utf8_bytes = original.encode("UTF-8") - base64_bytes = base64.b64encode(utf8_bytes) - base64_string = base64_bytes.decode("UTF-8") + base64_string = None + + try: + utf8_bytes = original.encode("UTF-8") + base64_bytes = base64.b64encode(utf8_bytes) + base64_string = base64_bytes.decode("UTF-8") + except Exception as err: + logger.warning("Unable to encode {orig} to base64:".format(orig=original), err) return base64_string def from_base64(base64_string): - # type: (str) -> str + # type: (str) -> Optional[str] """ - Convert a string from base64, via UTF-8. + Convert a string from base64, via UTF-8. Returns None on invalid input. """ - base64_bytes = base64_string.encode("UTF-8") - utf8_bytes = base64.b64decode(base64_bytes) - utf8_string = utf8_bytes.decode("UTF-8") + utf8_string = None + + try: + base64_bytes = base64_string.encode("UTF-8") + utf8_bytes = base64.b64decode(base64_bytes, validate=True) + utf8_string = utf8_bytes.decode("UTF-8") + except Exception as err: + logger.warning( + "Unable to decode {b64} from base64:".format(b64=base64_string), err + ) + return utf8_string diff --git a/tests/utils/test_general.py b/tests/utils/test_general.py index 370a6327ff..0fd2c47f4f 100644 --- a/tests/utils/test_general.py +++ b/tests/utils/test_general.py @@ -13,8 +13,10 @@ filename_for_module, handle_in_app_impl, iter_event_stacktraces, + to_base64, + from_base64, ) -from sentry_sdk._compat import text_type +from sentry_sdk._compat import text_type, string_types try: @@ -168,3 +170,56 @@ def test_iter_stacktraces(): ) == {1, 2, 3} ) + + +@pytest.mark.parametrize( + ("original", "base64_encoded"), + [ + # ascii only + ("Dogs are great!", "RG9ncyBhcmUgZ3JlYXQh"), + # emoji + ("🐶", "8J+Qtg=="), + # non-ascii + ( + "Καλό κορίτσι, Μάιζεϊ!", + "zprOsc67z4wgzrrOv8+Bzq/PhM+DzrksIM6czqzOuc62zrXPiiE=", + ), + # mix of ascii and non-ascii + ( + "Of margir hundar! Ég geri ráð fyrir að ég þurfi stærra rúm.", + "T2YgbWFyZ2lyIGh1bmRhciEgw4lnIGdlcmkgcsOhw7AgZnlyaXIgYcOwIMOpZyDDvnVyZmkgc3TDpnJyYSByw7ptLg==", + ), + ], +) +def test_successful_base64_conversion(original, base64_encoded): + # all unicode characters should be handled correctly + assert to_base64(original) == base64_encoded + assert from_base64(base64_encoded) == original + + # "to" and "from" should be inverses + assert from_base64(to_base64(original)) == original + assert to_base64(from_base64(base64_encoded)) == base64_encoded + + +@pytest.mark.parametrize( + "input", + [ + 1231, # incorrect type + True, # incorrect type + [], # incorrect type + {}, # incorrect type + None, # incorrect type + "yayfordogs", # wrong length + "#dog", # invalid ascii character + "🐶", # non-ascii character + ], +) +def test_failed_base64_conversion(input): + # conversion from base64 should fail if given input of the wrong type or + # input which isn't a valid base64 string + assert from_base64(input) is None + + # any string can be converted to base64, so it should only fail if given the + # wrong type + if type(input) not in string_types: + assert to_base64(input) is None From 2e0c7c2249a63b5da282a6a18212ffacf3d1871c Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 19 Jan 2021 22:09:15 -0800 Subject: [PATCH 12/24] fix tracestate extraction, add tests --- sentry_sdk/tracing_utils.py | 65 +++++++++++++++------------ tests/utils/test_tracing.py | 87 +++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 27 deletions(-) create mode 100644 tests/utils/test_tracing.py diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index c1f78642b8..2e4dbd9ac0 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -44,7 +44,7 @@ # This is a normal base64 regex, modified to reflect that fact that we strip the # trailing = or == off -b64 = base64_stripped = ( +base64_stripped = ( # any of the characters in the base64 "alphabet", in multiples of 4 "([a-zA-Z0-9+/]{4})*" # either nothing or 2 or 3 base64-alphabet characters (see @@ -52,21 +52,26 @@ # why there's never only 1 extra character) "([a-zA-Z0-9+/]{2,3})?" ) -ov = outside_vendor_entry = r"\w+=\w+" - -TRACESTATE_HEADER_REGEX = re.compile( - # zero or more other vendors' entries, each followed by a comma, - # captured together as one group - "^((?:{ov},)*)" - # sentry's entry, with only the value captured - "(?:sentry=({b64}))?" - # zero or more other vendors' entries, each preceded by a comma, - # captured together as one group - "((?:,{ov})*)$".format(ov=ov, b64=b64) + +# comma-delimited list of entries of the form `xxx=yyy` +tracestate_entry = "[^=]+=[^=]+" +TRACESTATE_ENTRIES_REGEX = re.compile( + # one or more xxxxx=yyyy entries + "^({te})+" + # each entry except the last must be followed by a comma + "(,|$)".format(te=tracestate_entry) ) -TRACESTATE_SENTRY_VALUE_REGEX = re.compile( - "sentry=({b64})^[a-zA-Z0-9+/]?".format(b64=b64) +# this doesn't check that the value is valid, just that there's something there +# of the form `sentry=xxxx` +SENTRY_TRACESTATE_ENTRY_REGEX = re.compile( + # either sentry is the first entry or there's stuff immediately before it, + # ending in a commma (this prevents matching something like `coolsentry=xxx`) + "(?:^|.+,)" + # sentry's part + "(sentry=[^,]*)" + # either there's another vendor's entry or we end + "(?:,|$)" ) @@ -243,25 +248,31 @@ def extract_tracestate_data(header): tracestate header, returning a dictionary of data. """ sentry_value = third_party = None + before = after = "" if header: + # find sentry's entry, if any + sentry_match = SENTRY_TRACESTATE_ENTRY_REGEX.search(header) - match = TRACESTATE_HEADER_REGEX.match(header) + if sentry_match: + sentry_entry = sentry_match.group(1) - if match: - before, sentry_value, after = match.groups() - if before or after: - # filter out empty strings, and make sure there aren't too many - # commas between them - third_party = ",".join( - [value.strip(",") for value in [before, after] if value != ""] - ) + # we have to strip them after the split so we don't end up with + # `xxx=yyy,,zzz=qqq` (double commas) when we put them back together + before, after = map(lambda s: s.strip(","), header.split(sentry_entry)) + # extract sentry's value from its entry and test to make sure it's + # valid; if it isn't, discard it so that a new one will be created + sentry_value = sentry_entry.replace("sentry=", "") + if not re.search("^{b64}$".format(b64=base64_stripped), sentry_value): + sentry_value = None else: - # if the header is malformed, at least try to grab sentry's part, if any - sentry_value_match = TRACESTATE_SENTRY_VALUE_REGEX.search(header) - if sentry_value_match: - sentry_value = sentry_value_match.group(1) + after = header + + # if either part is invalid or empty, remove it before gluing them together + third_party = ( + ",".join(filter(TRACESTATE_ENTRIES_REGEX.search, [before, after])) or None + ) return {"sentry_tracestate": sentry_value, "third_party_tracestate": third_party} diff --git a/tests/utils/test_tracing.py b/tests/utils/test_tracing.py new file mode 100644 index 0000000000..f8f83c946c --- /dev/null +++ b/tests/utils/test_tracing.py @@ -0,0 +1,87 @@ +import pytest + +from sentry_sdk.tracing import Transaction +from sentry_sdk.tracing_utils import extract_sentrytrace_data, extract_tracestate_data + + +def test_sentrytrace_extraction(): + assert extract_sentrytrace_data( + "12312012123120121231201212312012-0415201309082013-0" + ) == { + "trace_id": "12312012123120121231201212312012", + "parent_span_id": "0415201309082013", + "parent_sampled": False, + } + + +@pytest.mark.parametrize( + ("incoming_header", "expected_sentry_value", "expected_third_party"), + [ + # sentry only + ("sentry=doGsaREgReaT", "doGsaREgReaT", None), + # sentry only, invalid (`!` isn't a valid base64 character) + ("sentry=doGsaREgReaT!", None, None), + # stuff before + ("maisey=silly,sentry=doGsaREgReaT", "doGsaREgReaT", "maisey=silly"), + # stuff after + ("sentry=doGsaREgReaT,maisey=silly", "doGsaREgReaT", "maisey=silly"), + # stuff before and after + ( + "charlie=goofy,sentry=doGsaREgReaT,maisey=silly", + "doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple before + ( + "charlie=goofy,maisey=silly,sentry=doGsaREgReaT", + "doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple after + ( + "sentry=doGsaREgReaT,charlie=goofy,maisey=silly", + "doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple before and after + ( + "charlie=goofy,maisey=silly,sentry=doGsaREgReaT,bodhi=floppy,cory=loyal", + "doGsaREgReaT", + "charlie=goofy,maisey=silly,bodhi=floppy,cory=loyal", + ), + # only third party data + ("maisey=silly", None, "maisey=silly"), + # invalid third party data, valid sentry data + ("maisey_is_silly,sentry=doGsaREgReaT", "doGsaREgReaT", None), + # valid third party data, invalid sentry data + ("maisey=silly,sentry=doGsaREgReaT!", None, "maisey=silly"), + # nothing valid at all + ("maisey_is_silly,sentry=doGsaREgReaT!", None, None), + ], +) +def test_tracestate_extraction( + incoming_header, expected_sentry_value, expected_third_party +): + assert extract_tracestate_data(incoming_header) == { + "sentry_tracestate": expected_sentry_value, + "third_party_tracestate": expected_third_party, + } + + +def test_tracestate_computation(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/meet/other-dog/new", + op="greeting.sniff", + trace_id="12312012123120121231201212312012", + ) + assert transaction._sentry_tracestate_value == ( + "eyJ0cmFjZV9pZCI6ICIxMjMxMjAxMjEyMzEyMDEyMTIzMTIwMTIxMjMxMjAxMiIsICJl" + + "bnZpcm9ubWVudCI6ICJkb2dwYXJrIiwgInJlbGVhc2UiOiAib2ZmLmxlYXNoLnBhcm" + + "siLCAicHVibGljX2tleSI6ICJkb2dzYXJlYmFkYXRrZWVwaW5nc2VjcmV0cyJ9" + ) From bc276ae1a57abce3dbadb68f276afd286f0ede2a Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 19 Jan 2021 22:11:34 -0800 Subject: [PATCH 13/24] fix bugs in to_tracestate, add tests --- sentry_sdk/tracing.py | 10 ++++++--- tests/tracing/test_headers.py | 39 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 tests/tracing/test_headers.py diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 23dc3c83db..5b17952b7e 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -293,14 +293,18 @@ def to_traceparent(self): def to_tracestate(self): # type: () -> Optional[str] - transaction = self._containing_transaction + if isinstance(self, Transaction): + transaction = self + else: + transaction = self._containing_transaction if not transaction: return None # it should never happen that thre's no tracestate value, so this "or" - # is just insurance - header_value = transaction._sentry_tracestate_value or "{}" + # is just insurance ("e30" is the base64-encoded version of "{}", a + # jsonified empty dictionary) + header_value = "sentry=" + (transaction._sentry_tracestate_value or "e30") if transaction._third_party_tracestate: header_value = ",".join([header_value, transaction._third_party_tracestate]) diff --git a/tests/tracing/test_headers.py b/tests/tracing/test_headers.py new file mode 100644 index 0000000000..54f3fbc19e --- /dev/null +++ b/tests/tracing/test_headers.py @@ -0,0 +1,39 @@ +import pytest + +from sentry_sdk.tracing import Transaction + + +@pytest.mark.parametrize( + ("sentry_value, third_party_value, expected_tracestate"), + [ + ("doGsaREgReaT", None, "sentry=doGsaREgReaT"), + ( + "doGsaREgReaT", + "maisey=silly", + "sentry=doGsaREgReaT,maisey=silly", + ), + # this should never happen, but this proves things won't blow up if it does + (None, "charlie=goofy", "sentry=e30,charlie=goofy"), + ], +) +def test_to_tracestate( + sentry_init, sentry_value, third_party_value, expected_tracestate +): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/meet/other-dog/new", + op="greeting.sniff", + third_party_tracestate=third_party_value, + ) + + # we have to do this separately (rather than passing it to the Transaction + # constructor) because otherwise the constructor will come up with its own + # sentry_tracestate_value in the case where the one passed in the test is None + transaction._sentry_tracestate_value = sentry_value + + assert transaction.to_tracestate() == expected_tracestate From 197217f1afe1fb07ed46ddbc8e91ea08830a9cb3 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Tue, 19 Jan 2021 22:12:25 -0800 Subject: [PATCH 14/24] handle errors when attaching trace data to envelope headers --- sentry_sdk/client.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index bfdf99b7de..85f05d3074 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -357,7 +357,15 @@ def capture_event( tracestate_json = from_base64(base64_tracestate) - headers["trace"] = json.loads(tracestate_json) + try: + assert tracestate_json is not None + headers["trace"] = json.loads(tracestate_json) + except Exception as err: + logger.warning( + "Unable to attach tracestate data to envelope header: {err}\nTracestate value is {base64_tracestate}".format( + err=err, base64_tracestate=base64_tracestate + ), + ) envelope = Envelope(headers=headers) From fc19e88f887e77ba199b6b29224091e6072b8dc1 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 25 Jan 2021 07:12:49 -0800 Subject: [PATCH 15/24] move all tracing tests to tests/tracing --- tests/utils/test_tracing.py | 87 ------------------------------------- 1 file changed, 87 deletions(-) diff --git a/tests/utils/test_tracing.py b/tests/utils/test_tracing.py index f8f83c946c..e69de29bb2 100644 --- a/tests/utils/test_tracing.py +++ b/tests/utils/test_tracing.py @@ -1,87 +0,0 @@ -import pytest - -from sentry_sdk.tracing import Transaction -from sentry_sdk.tracing_utils import extract_sentrytrace_data, extract_tracestate_data - - -def test_sentrytrace_extraction(): - assert extract_sentrytrace_data( - "12312012123120121231201212312012-0415201309082013-0" - ) == { - "trace_id": "12312012123120121231201212312012", - "parent_span_id": "0415201309082013", - "parent_sampled": False, - } - - -@pytest.mark.parametrize( - ("incoming_header", "expected_sentry_value", "expected_third_party"), - [ - # sentry only - ("sentry=doGsaREgReaT", "doGsaREgReaT", None), - # sentry only, invalid (`!` isn't a valid base64 character) - ("sentry=doGsaREgReaT!", None, None), - # stuff before - ("maisey=silly,sentry=doGsaREgReaT", "doGsaREgReaT", "maisey=silly"), - # stuff after - ("sentry=doGsaREgReaT,maisey=silly", "doGsaREgReaT", "maisey=silly"), - # stuff before and after - ( - "charlie=goofy,sentry=doGsaREgReaT,maisey=silly", - "doGsaREgReaT", - "charlie=goofy,maisey=silly", - ), - # multiple before - ( - "charlie=goofy,maisey=silly,sentry=doGsaREgReaT", - "doGsaREgReaT", - "charlie=goofy,maisey=silly", - ), - # multiple after - ( - "sentry=doGsaREgReaT,charlie=goofy,maisey=silly", - "doGsaREgReaT", - "charlie=goofy,maisey=silly", - ), - # multiple before and after - ( - "charlie=goofy,maisey=silly,sentry=doGsaREgReaT,bodhi=floppy,cory=loyal", - "doGsaREgReaT", - "charlie=goofy,maisey=silly,bodhi=floppy,cory=loyal", - ), - # only third party data - ("maisey=silly", None, "maisey=silly"), - # invalid third party data, valid sentry data - ("maisey_is_silly,sentry=doGsaREgReaT", "doGsaREgReaT", None), - # valid third party data, invalid sentry data - ("maisey=silly,sentry=doGsaREgReaT!", None, "maisey=silly"), - # nothing valid at all - ("maisey_is_silly,sentry=doGsaREgReaT!", None, None), - ], -) -def test_tracestate_extraction( - incoming_header, expected_sentry_value, expected_third_party -): - assert extract_tracestate_data(incoming_header) == { - "sentry_tracestate": expected_sentry_value, - "third_party_tracestate": expected_third_party, - } - - -def test_tracestate_computation(sentry_init): - sentry_init( - dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", - environment="dogpark", - release="off.leash.park", - ) - - transaction = Transaction( - name="/meet/other-dog/new", - op="greeting.sniff", - trace_id="12312012123120121231201212312012", - ) - assert transaction._sentry_tracestate_value == ( - "eyJ0cmFjZV9pZCI6ICIxMjMxMjAxMjEyMzEyMDEyMTIzMTIwMTIxMjMxMjAxMiIsICJl" - + "bnZpcm9ubWVudCI6ICJkb2dwYXJrIiwgInJlbGVhc2UiOiAib2ZmLmxlYXNoLnBhcm" - + "siLCAicHVibGljX2tleSI6ICJkb2dzYXJlYmFkYXRrZWVwaW5nc2VjcmV0cyJ9" - ) From a5900c152d1ad70e190eb55dc43dca339c5116ca Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 25 Jan 2021 07:26:55 -0800 Subject: [PATCH 16/24] split up tracestate computation, make it work for spans, too --- sentry_sdk/tracing_utils.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index 2e4dbd9ac0..8cd32578c4 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -31,7 +31,7 @@ from typing import Dict from typing import Union - from sentry_sdk.tracing import Span, Transaction + from sentry_sdk.tracing import Span SENTRY_TRACE_REGEX = re.compile( @@ -277,34 +277,44 @@ def extract_tracestate_data(header): return {"sentry_tracestate": sentry_value, "third_party_tracestate": third_party} -def compute_new_tracestate(transaction): - # type: (Transaction) -> str +def compute_tracestate_from_data(data): + # type: (typing.Mapping[str, str]) -> str """ - Computes a new tracestate value for the transaction. + Computes a new tracestate value using the given data. + """ + + tracestate_json = json.dumps(data) + + # Base64-encoded strings always come out with a length which is a multiple + # of 4. In order to achieve this, the end is padded with one or more `=` + # signs. Because the tracestate standard calls for using `=` signs between + # vendor name and value (`sentry=xxx,dogsaregreat=yyy`), to avoid confusion + # we strip the `=` + return (to_base64(tracestate_json) or "").rstrip("=") + + +def compute_tracestate(span): + # type: (Span) -> str + """ + Computes a new tracestate value for the span. """ data = {} - client = (transaction.hub or sentry_sdk.Hub.current).client + client = (span.hub or sentry_sdk.Hub.current).client # if there's no client and/or no DSN, we're not sending anything anywhere, # so it's fine to not have any tracestate data if client and client.options.get("dsn"): options = client.options data = { - "trace_id": transaction.trace_id, + "trace_id": span.trace_id, "environment": options["environment"], "release": options.get("release"), "public_key": Dsn(options["dsn"]).public_key, } - tracestate_json = json.dumps(data) + return compute_tracestate_from_data(data) - # Base64-encoded strings always come out with a length which is a multiple - # of 4. In order to achieve this, the end is padded with one or more `=` - # signs. Because the tracestate standard calls for using `=` signs between - # vendor name and value (`sentry=xxx,dogsaregreat=yyy`), to avoid confusion - # we strip the `=` - return (to_base64(tracestate_json) or "").rstrip("=") def _format_sql(cursor, sql): From 850c9031792a5fcbac8f0a04704bff0d7627a6be Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 25 Jan 2021 07:27:41 -0800 Subject: [PATCH 17/24] add method for reinflating tracestate --- sentry_sdk/tracing_utils.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index 8cd32578c4..bc9f4c4da3 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -13,6 +13,7 @@ logger, to_base64, to_string, + from_base64, ) from sentry_sdk._compat import PY2 from sentry_sdk._types import MYPY @@ -316,6 +317,39 @@ def compute_tracestate(span): return compute_tracestate_from_data(data) +def reinflate_tracestate(encoded_tracestate): + # type: (str) -> typing.Optional[Mapping[str, str]] + """ + Given a sentry tracestate value in its encoded form, translate it back into + a dictionary of data. + """ + inflated_tracestate = None + + if encoded_tracestate: + # Base64-encoded strings always come out with a length which is a + # multiple of 4. In order to achieve this, the end is padded with one or + # more `=` signs. Because the tracestate standard calls for using `=` + # signs between vendor name and value (`sentry=xxx,dogsaregreat=yyy`), + # to avoid confusion we strip the `=` when the data is initially + # encoded. Python's decoding function requires they be put back. + # Fortunately, it doesn't complain if there are too many, so we just + # attach two `=` on spec (there will never be more than 2, see + # https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding). + tracestate_json = from_base64(encoded_tracestate + "==") + + try: + assert tracestate_json is not None + inflated_tracestate = json.loads(tracestate_json) + except Exception as err: + logger.warning( + ( + "Unable to attach tracestate data to envelope header: {err}" + + "\nTracestate value is {encoded_tracestate}" + ).format(err=err, encoded_tracestate=encoded_tracestate), + ) + + return inflated_tracestate + def _format_sql(cursor, sql): # type: (Any, str) -> Optional[str] From 4eb0cfb84987ceb866e98e00c893e55f1d99c1c6 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 25 Jan 2021 07:35:10 -0800 Subject: [PATCH 18/24] do base64 validation manually because py2 --- sentry_sdk/utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index cc2b9d0227..f9d769b8f8 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -6,6 +6,7 @@ import sys import threading import subprocess +import re from datetime import datetime @@ -40,6 +41,7 @@ MAX_STRING_LENGTH = 512 MAX_FORMAT_PARAM_LENGTH = 128 +BASE64_ALPHABET = re.compile(r"^[a-zA-Z0-9/+=]*$") def json_dumps(data): @@ -984,8 +986,11 @@ def from_base64(base64_string): utf8_string = None try: + only_valid_chars = BASE64_ALPHABET.match(base64_string) + assert only_valid_chars + base64_bytes = base64_string.encode("UTF-8") - utf8_bytes = base64.b64decode(base64_bytes, validate=True) + utf8_bytes = base64.b64decode(base64_bytes) utf8_string = utf8_bytes.decode("UTF-8") except Exception as err: logger.warning( From 46ff29b8817278c194089ce21fc35ca407941988 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 25 Jan 2021 07:37:47 -0800 Subject: [PATCH 19/24] fixup splitting tracestate calculation --- sentry_sdk/tracing.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 5b17952b7e..fc5f105ee9 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -9,7 +9,7 @@ from sentry_sdk.utils import logger from sentry_sdk.tracing_utils import ( EnvironHeaders, - compute_new_tracestate, + compute_tracestate, extract_sentrytrace_data, extract_tracestate_data, has_tracing_enabled, @@ -460,9 +460,7 @@ def __init__( Span.__init__(self, **kwargs) self.name = name self.parent_sampled = parent_sampled - self._sentry_tracestate_value = sentry_tracestate or compute_new_tracestate( - self - ) + self._sentry_tracestate_value = sentry_tracestate or compute_tracestate(self) self._third_party_tracestate = third_party_tracestate def __repr__(self): From 6edac1340d9c16972fceca1945c9f5857adb7423 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 25 Jan 2021 08:27:25 -0800 Subject: [PATCH 20/24] compute new tracestate value for orphan spans --- sentry_sdk/tracing.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index fc5f105ee9..9624d8bb57 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -293,21 +293,29 @@ def to_traceparent(self): def to_tracestate(self): # type: () -> Optional[str] + """ + Generates the `tracestate` header value to attach to outgoing requests. + """ + header_value = None + if isinstance(self, Transaction): - transaction = self + transaction = self # type: Optional[Transaction] else: transaction = self._containing_transaction - if not transaction: - return None + # we should have the relevant values stored on the transaction, but if + # this is an orphan span, make a new value + if transaction and transaction._sentry_tracestate_value: + sentry_tracestate = transaction._sentry_tracestate_value + third_party_tracestate = transaction._third_party_tracestate + else: + sentry_tracestate = compute_tracestate(self) + third_party_tracestate = None - # it should never happen that thre's no tracestate value, so this "or" - # is just insurance ("e30" is the base64-encoded version of "{}", a - # jsonified empty dictionary) - header_value = "sentry=" + (transaction._sentry_tracestate_value or "e30") + header_value = "sentry=" + sentry_tracestate - if transaction._third_party_tracestate: - header_value = ",".join([header_value, transaction._third_party_tracestate]) + if third_party_tracestate: + header_value = header_value + "," + third_party_tracestate return header_value From a4f83957516ec60aef4645aaac16e471c58948aa Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 25 Jan 2021 08:29:10 -0800 Subject: [PATCH 21/24] use reinflation method in client --- sentry_sdk/client.py | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 85f05d3074..584f2ba7ef 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -3,7 +3,6 @@ import random from datetime import datetime import socket -import json from sentry_sdk._compat import string_types, text_type, iteritems from sentry_sdk.utils import ( @@ -11,7 +10,6 @@ current_stacktrace, disable_capture_event, format_timestamp, - from_base64, get_type_name, get_default_release, handle_in_app, @@ -24,6 +22,7 @@ from sentry_sdk.utils import ContextVar from sentry_sdk.sessions import SessionFlusher from sentry_sdk.envelope import Envelope +from sentry_sdk.tracing_utils import reinflate_tracestate from sentry_sdk._types import MYPY @@ -342,30 +341,9 @@ def capture_event( "sent_at": format_timestamp(datetime.utcnow()), } - if raw_tracestate: - # Base64-encoded strings always come out with a length which is a multiple - # of 4. In order to achieve this, the end is padded with one or more `=` - # signs. Because the tracestate standard calls for using `=` signs between - # vendor name and value (`sentry=xxx,dogsaregreat=yyy`), to avoid confusion - # we strip the `=` when the data is initially encoded. Python's decoding - # function requires they be put back. - - # The final mod 4 is necessary because 4 is represented as 4 - # rather than 0. - missing_equals = (4 - (len(raw_tracestate) % 4)) % 4 - base64_tracestate = raw_tracestate + "=" * missing_equals - - tracestate_json = from_base64(base64_tracestate) - - try: - assert tracestate_json is not None - headers["trace"] = json.loads(tracestate_json) - except Exception as err: - logger.warning( - "Unable to attach tracestate data to envelope header: {err}\nTracestate value is {base64_tracestate}".format( - err=err, base64_tracestate=base64_tracestate - ), - ) + tracestate_data = reinflate_tracestate(raw_tracestate) + if tracestate_data: + headers["trace"] = tracestate_data envelope = Envelope(headers=headers) From 25edca9d4115472941f4243cbf1f8833d9fd90ca Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 25 Jan 2021 08:33:57 -0800 Subject: [PATCH 22/24] add lots of tests --- tests/test_envelope.py | 73 ++++++-- tests/tracing/test_headers.py | 39 ---- tests/tracing/test_http_headers.py | 237 ++++++++++++++++++++++++ tests/tracing/test_integration_tests.py | 51 ++--- tests/tracing/test_misc.py | 33 +++- tests/utils/test_general.py | 10 +- 6 files changed, 352 insertions(+), 91 deletions(-) delete mode 100644 tests/tracing/test_headers.py create mode 100644 tests/tracing/test_http_headers.py diff --git a/tests/test_envelope.py b/tests/test_envelope.py index e795e9d93c..df173d3c64 100644 --- a/tests/test_envelope.py +++ b/tests/test_envelope.py @@ -1,36 +1,49 @@ from sentry_sdk.envelope import Envelope from sentry_sdk.session import Session +from sentry_sdk import capture_event +from sentry_sdk.tracing_utils import compute_tracestate_from_data +import sentry_sdk.client def generate_transaction_item(): return { - "event_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "event_id": "15210411201320122115110420122013", "type": "transaction", - "transaction": "/organizations/:orgId/performance/:eventSlug/", - "start_timestamp": 1597976392.6542819, - "timestamp": 1597976400.6189718, + "transaction": "/interactions/other-dogs/new-dog", + "start_timestamp": 1353568872.11122131, + "timestamp": 1356942672.09040815, "contexts": { "trace": { - "trace_id": "4C79F60C11214EB38604F4AE0781BFB2", - "span_id": "FA90FDEAD5F74052", - "type": "trace", + "trace_id": "12312012123120121231201212312012", + "span_id": "0415201309082013", + "parent_span_id": None, + "description": "", + "op": "greeting.sniff", + "tracestate": compute_tracestate_from_data( + { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + } + ), } }, "spans": [ { "description": "", - "op": "react.mount", - "parent_span_id": "8f5a2b8768cafb4e", - "span_id": "bd429c44b67a3eb4", - "start_timestamp": 1597976393.4619668, - "timestamp": 1597976393.4718769, - "trace_id": "ff62a8b040f340bda5d830223def1d81", + "op": "greeting.sniff", + "parent_span_id": None, + "span_id": "0415201309082013", + "start_timestamp": 1353568872.11122131, + "timestamp": 1356942672.09040815, + "trace_id": "12312012123120121231201212312012", } ], } -def test_basic_event(): +def test_add_and_get_basic_event(): envelope = Envelope() expected = {"message": "Hello, World!"} @@ -39,7 +52,7 @@ def test_basic_event(): assert envelope.get_event() == {"message": "Hello, World!"} -def test_transaction_event(): +def test_add_and_get_transaction_event(): envelope = Envelope() transaction_item = generate_transaction_item() @@ -55,7 +68,7 @@ def test_transaction_event(): assert envelope.get_transaction_event() == transaction_item -def test_session(): +def test_add_and_get_session(): envelope = Envelope() expected = Session() @@ -64,3 +77,31 @@ def test_session(): for item in envelope: if item.type == "session": assert item.payload.json == expected.to_json() + + +def test_envelope_headers(sentry_init, capture_envelopes, monkeypatch): + monkeypatch.setattr( + sentry_sdk.client, + "format_timestamp", + lambda x: "2012-11-21T12:31:12.415908Z", + ) + + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + ) + envelopes = capture_envelopes() + + capture_event(generate_transaction_item()) + + assert len(envelopes) == 1 + + assert envelopes[0].headers == { + "event_id": "15210411201320122115110420122013", + "sent_at": "2012-11-21T12:31:12.415908Z", + "trace": { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + }, + } diff --git a/tests/tracing/test_headers.py b/tests/tracing/test_headers.py deleted file mode 100644 index 54f3fbc19e..0000000000 --- a/tests/tracing/test_headers.py +++ /dev/null @@ -1,39 +0,0 @@ -import pytest - -from sentry_sdk.tracing import Transaction - - -@pytest.mark.parametrize( - ("sentry_value, third_party_value, expected_tracestate"), - [ - ("doGsaREgReaT", None, "sentry=doGsaREgReaT"), - ( - "doGsaREgReaT", - "maisey=silly", - "sentry=doGsaREgReaT,maisey=silly", - ), - # this should never happen, but this proves things won't blow up if it does - (None, "charlie=goofy", "sentry=e30,charlie=goofy"), - ], -) -def test_to_tracestate( - sentry_init, sentry_value, third_party_value, expected_tracestate -): - sentry_init( - dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", - environment="dogpark", - release="off.leash.park", - ) - - transaction = Transaction( - name="/meet/other-dog/new", - op="greeting.sniff", - third_party_tracestate=third_party_value, - ) - - # we have to do this separately (rather than passing it to the Transaction - # constructor) because otherwise the constructor will come up with its own - # sentry_tracestate_value in the case where the one passed in the test is None - transaction._sentry_tracestate_value = sentry_value - - assert transaction.to_tracestate() == expected_tracestate diff --git a/tests/tracing/test_http_headers.py b/tests/tracing/test_http_headers.py new file mode 100644 index 0000000000..60145f8cce --- /dev/null +++ b/tests/tracing/test_http_headers.py @@ -0,0 +1,237 @@ +import json + +import pytest + +from sentry_sdk.tracing import Transaction, Span +from sentry_sdk.tracing_utils import ( + compute_tracestate_from_data, + extract_sentrytrace_data, + extract_tracestate_data, + reinflate_tracestate, +) +from sentry_sdk.utils import from_base64, to_base64 + +# from sentry_sdk import Hub, Client + +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 + + +def test_tracestate_computation(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + trace_id="12312012123120121231201212312012", + ) + + computed_value = transaction._sentry_tracestate_value + # we have to decode and reinflate the data because we can guarantee that the + # order of the entries in the jsonified dict will be the same here as when + # the tracestate is computed + reinflated_trace_data = json.loads(from_base64(computed_value)) + + assert reinflated_trace_data == { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + } + + # assert reinflated_trace_data["trace_id"] == "12312012123120121231201212312012" + # assert reinflated_trace_data["environment"] == "dogpark" + # assert reinflated_trace_data["release"] == "off.leash.park" + # assert reinflated_trace_data["public_key"] == "dogsarebadatkeepingsecrets" + + +def test_adds_new_tracestate_to_transaction_when_none_given(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + # sentry_tracestate=< value would be passed here > + ) + + assert transaction._sentry_tracestate_value is not None + + +@pytest.mark.parametrize("sampled", [True, False, None]) +def test_to_traceparent(sentry_init, sampled): + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + trace_id="12312012123120121231201212312012", + sampled=sampled, + ) + + traceparent = transaction.to_traceparent() + + trace_id, parent_span_id, parent_sampled = traceparent.split("-") + assert trace_id == "12312012123120121231201212312012" + assert parent_span_id == transaction.span_id + assert parent_sampled == ( + "1" if sampled is True else "0" if sampled is False else "" + ) + + +def test_to_tracestate(sentry_init): + sentry_init( + dsn="https://dogsarebadatkeepingsecrets@squirrelchasers.ingest.sentry.io/12312012", + environment="dogpark", + release="off.leash.park", + ) + + # it correctly uses the value from the transaction itself or the span's + # containing transaction + transaction_no_third_party = Transaction( + trace_id="12312012123120121231201212312012", + sentry_tracestate="doGsaREgReaT", + ) + non_orphan_span = Span() + non_orphan_span._containing_transaction = transaction_no_third_party + assert transaction_no_third_party.to_tracestate() == "sentry=doGsaREgReaT" + assert non_orphan_span.to_tracestate() == "sentry=doGsaREgReaT" + + # it combines sentry and third-party values correctly + transaction_with_third_party = Transaction( + trace_id="12312012123120121231201212312012", + sentry_tracestate="doGsaREgReaT", + third_party_tracestate="maisey=silly", + ) + assert ( + transaction_with_third_party.to_tracestate() + == "sentry=doGsaREgReaT,maisey=silly" + ) + + # it computes a tracestate from scratch for orphan transactions + orphan_span = Span( + trace_id="12312012123120121231201212312012", + ) + assert orphan_span._containing_transaction is None + assert orphan_span.to_tracestate() == "sentry=" + compute_tracestate_from_data( + { + "trace_id": "12312012123120121231201212312012", + "environment": "dogpark", + "release": "off.leash.park", + "public_key": "dogsarebadatkeepingsecrets", + } + ) + + +@pytest.mark.parametrize("sampling_decision", [True, False]) +def test_sentrytrace_extraction(sampling_decision): + sentrytrace_header = "12312012123120121231201212312012-0415201309082013-{}".format( + 1 if sampling_decision is True else 0 + ) + assert extract_sentrytrace_data(sentrytrace_header) == { + "trace_id": "12312012123120121231201212312012", + "parent_span_id": "0415201309082013", + "parent_sampled": sampling_decision, + } + + +@pytest.mark.parametrize( + ("incoming_header", "expected_sentry_value", "expected_third_party"), + [ + # sentry only + ("sentry=doGsaREgReaT", "doGsaREgReaT", None), + # sentry only, invalid (`!` isn't a valid base64 character) + ("sentry=doGsaREgReaT!", None, None), + # stuff before + ("maisey=silly,sentry=doGsaREgReaT", "doGsaREgReaT", "maisey=silly"), + # stuff after + ("sentry=doGsaREgReaT,maisey=silly", "doGsaREgReaT", "maisey=silly"), + # stuff before and after + ( + "charlie=goofy,sentry=doGsaREgReaT,maisey=silly", + "doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple before + ( + "charlie=goofy,maisey=silly,sentry=doGsaREgReaT", + "doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple after + ( + "sentry=doGsaREgReaT,charlie=goofy,maisey=silly", + "doGsaREgReaT", + "charlie=goofy,maisey=silly", + ), + # multiple before and after + ( + "charlie=goofy,maisey=silly,sentry=doGsaREgReaT,bodhi=floppy,cory=loyal", + "doGsaREgReaT", + "charlie=goofy,maisey=silly,bodhi=floppy,cory=loyal", + ), + # only third party data + ("maisey=silly", None, "maisey=silly"), + # invalid third party data, valid sentry data + ("maisey_is_silly,sentry=doGsaREgReaT", "doGsaREgReaT", None), + # valid third party data, invalid sentry data + ("maisey=silly,sentry=doGsaREgReaT!", None, "maisey=silly"), + # nothing valid at all + ("maisey_is_silly,sentry=doGsaREgReaT!", None, None), + ], +) +def test_tracestate_extraction( + incoming_header, expected_sentry_value, expected_third_party +): + assert extract_tracestate_data(incoming_header) == { + "sentry_tracestate": expected_sentry_value, + "third_party_tracestate": expected_third_party, + } + + +def test_iter_headers(sentry_init, monkeypatch): + + monkeypatch.setattr( + Transaction, + "to_traceparent", + mock.Mock(return_value="12312012123120121231201212312012-0415201309082013-0"), + ) + monkeypatch.setattr( + Transaction, + "to_tracestate", + mock.Mock(return_value="sentry=doGsaREgReaT,charlie=goofy"), + ) + + transaction = Transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + ) + + headers = dict(transaction.iter_headers()) + assert ( + headers["sentry-trace"] == "12312012123120121231201212312012-0415201309082013-0" + ) + assert headers["tracestate"] == "sentry=doGsaREgReaT,charlie=goofy" + + +@pytest.mark.parametrize( + "data", + [ # comes out with no trailing `=` + {"name": "Maisey", "birthday": "12/31/12"}, + # comes out with one trailing `=` + {"dogs": "yes", "cats": "maybe"}, + # comes out with two trailing `=` + {"name": "Charlie", "birthday": "11/21/12"}, + ], +) +def test_tracestate_reinflation(data): + encoded_tracestate = to_base64(json.dumps(data)).strip("=") + assert reinflate_tracestate(encoded_tracestate) == data diff --git a/tests/tracing/test_integration_tests.py b/tests/tracing/test_integration_tests.py index c4c316be96..804001933f 100644 --- a/tests/tracing/test_integration_tests.py +++ b/tests/tracing/test_integration_tests.py @@ -46,47 +46,48 @@ def test_basic(sentry_init, capture_events, sample_rate): assert not events +@pytest.mark.only @pytest.mark.parametrize("sampled", [True, False, None]) -@pytest.mark.parametrize( - "sample_rate", [0.0, 1.0] -) # ensure sampling decision is actually passed along via headers +@pytest.mark.parametrize("sample_rate", [0.0, 1.0]) def test_continue_from_headers(sentry_init, capture_events, sampled, sample_rate): + """ + Ensure sampling decision is actually passed along via headers, and that they + are read correctly. + """ sentry_init(traces_sample_rate=sample_rate) events = capture_events() # make a parent transaction (normally this would be in a different service) - with start_transaction(name="hi", sampled=True if sample_rate == 0 else None): + with start_transaction( + name="hi", sampled=True if sample_rate == 0 else None + ) as parent_transaction: with start_span() as old_span: old_span.sampled = sampled headers = dict(Hub.current.iter_trace_propagation_headers()) - - # test that the sampling decision is getting encoded in the header correctly - header = headers["sentry-trace"] - if sampled is True: - assert header.endswith("-1") - if sampled is False: - assert header.endswith("-0") - if sampled is None: - assert header.endswith("-") - - # child transaction, to prove that we can read 'sentry-trace' header data - # correctly - transaction = Transaction.continue_from_headers(headers, name="WRONG") - assert transaction is not None - assert transaction.parent_sampled == sampled - assert transaction.trace_id == old_span.trace_id - assert transaction.same_process_as_parent is False - assert transaction.parent_span_id == old_span.span_id - assert transaction.span_id != old_span.span_id + tracestate = parent_transaction._sentry_tracestate_value + + # child transaction, to prove that we can read 'sentry-trace' and + # `tracestate` header data correctly + child_transaction = Transaction.continue_from_headers(headers, name="WRONG") + assert child_transaction is not None + assert child_transaction.parent_sampled == sampled + assert child_transaction.trace_id == old_span.trace_id + assert child_transaction.same_process_as_parent is False + assert child_transaction.parent_span_id == old_span.span_id + assert child_transaction.span_id != old_span.span_id + assert child_transaction._sentry_tracestate_value == tracestate # add child transaction to the scope, to show that the captured message will # be tagged with the trace id (since it happens while the transaction is # open) - with start_transaction(transaction): + with start_transaction(child_transaction): with configure_scope() as scope: + # change the transaction name from "WRONG" to make sure the change + # is reflected in the final data scope.transaction = "ho" capture_message("hello") + # in this case the child transaction won't be captured if sampled is False or (sample_rate == 0 and sampled is None): trace1, message = events @@ -100,7 +101,7 @@ def test_continue_from_headers(sentry_init, capture_events, sampled, sample_rate assert ( trace1["contexts"]["trace"]["trace_id"] == trace2["contexts"]["trace"]["trace_id"] - == transaction.trace_id + == child_transaction.trace_id == message["contexts"]["trace"]["trace_id"] ) diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py index f5b8aa5e85..1ab4dfcb63 100644 --- a/tests/tracing/test_misc.py +++ b/tests/tracing/test_misc.py @@ -25,30 +25,51 @@ def test_span_trimming(sentry_init, capture_events): assert span2["op"] == "foo1" -def test_transaction_method_signature(sentry_init, capture_events): +def test_transaction_naming(sentry_init, capture_events): sentry_init(traces_sample_rate=1.0) events = capture_events() + # only transactions have names - spans don't with pytest.raises(TypeError): start_span(name="foo") assert len(events) == 0 + # default name in event if no name is passed with start_transaction() as transaction: pass - assert transaction.name == "" assert len(events) == 1 + assert events[0]["transaction"] == "" + # the name can be set once the transaction's already started with start_transaction() as transaction: transaction.name = "name-known-after-transaction-started" assert len(events) == 2 + assert events[1]["transaction"] == "name-known-after-transaction-started" + # passing in a name works, too with start_transaction(name="a"): pass assert len(events) == 3 + assert events[2]["transaction"] == "a" - with start_transaction(Transaction(name="c")): - pass - assert len(events) == 4 + +def test_start_transaction(sentry_init): + sentry_init(traces_sample_rate=1.0) + + # you can have it start a transaction for you + result1 = start_transaction( + name="/interactions/other-dogs/new-dog", op="greeting.sniff" + ) + assert isinstance(result1, Transaction) + assert result1.name == "/interactions/other-dogs/new-dog" + assert result1.op == "greeting.sniff" + + # or you can pass it an already-created transaction + preexisting_transaction = Transaction( + name="/interactions/other-dogs/new-dog", op="greeting.sniff" + ) + result2 = start_transaction(preexisting_transaction) + assert result2 is preexisting_transaction def test_finds_transaction_on_scope(sentry_init): @@ -77,7 +98,7 @@ def test_finds_transaction_on_scope(sentry_init): assert scope._span.name == "dogpark" -def test_finds_transaction_when_decedent_span_is_on_scope( +def test_finds_transaction_when_descendent_span_is_on_scope( sentry_init, ): sentry_init(traces_sample_rate=1.0) diff --git a/tests/utils/test_general.py b/tests/utils/test_general.py index 0fd2c47f4f..03be52ca17 100644 --- a/tests/utils/test_general.py +++ b/tests/utils/test_general.py @@ -178,15 +178,15 @@ def test_iter_stacktraces(): # ascii only ("Dogs are great!", "RG9ncyBhcmUgZ3JlYXQh"), # emoji - ("🐶", "8J+Qtg=="), + (u"🐶", "8J+Qtg=="), # non-ascii ( - "Καλό κορίτσι, Μάιζεϊ!", + u"Καλό κορίτσι, Μάιζεϊ!", "zprOsc67z4wgzrrOv8+Bzq/PhM+DzrksIM6czqzOuc62zrXPiiE=", ), # mix of ascii and non-ascii ( - "Of margir hundar! Ég geri ráð fyrir að ég þurfi stærra rúm.", + u"Of margir hundar! Ég geri ráð fyrir að ég þurfi stærra rúm.", "T2YgbWFyZ2lyIGh1bmRhciEgw4lnIGdlcmkgcsOhw7AgZnlyaXIgYcOwIMOpZyDDvnVyZmkgc3TDpnJyYSByw7ptLg==", ), ], @@ -219,7 +219,7 @@ def test_failed_base64_conversion(input): # input which isn't a valid base64 string assert from_base64(input) is None - # any string can be converted to base64, so it should only fail if given the - # wrong type + # any string can be converted to base64, so only type errors will cause + # failures if type(input) not in string_types: assert to_base64(input) is None From d3a6303554bc094e7ffbf8a7d866bba3643c7415 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 25 Jan 2021 08:35:04 -0800 Subject: [PATCH 23/24] general cleanup --- sentry_sdk/client.py | 9 +++++++-- sentry_sdk/hub.py | 12 ++++++------ sentry_sdk/tracing_utils.py | 7 +++---- tests/tracing/test_http_headers.py | 7 ------- tests/tracing/test_integration_tests.py | 5 ++--- tests/utils/test_tracing.py | 0 6 files changed, 18 insertions(+), 22 deletions(-) delete mode 100644 tests/utils/test_tracing.py diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 584f2ba7ef..942d0970fd 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -329,13 +329,18 @@ def capture_event( attachments = hint.get("attachments") is_transaction = event_opt.get("type") == "transaction" + + # this is outside of the `if` immediately below because even if we don't + # use the value, we want to make sure we remove it before the event is + # sent (which the `.pop()` does) raw_tracestate = ( event_opt.get("contexts", {}).get("trace", {}).pop("tracestate", None) ) + # Transactions or events with attachments should go to the /envelope/ + # endpoint. if is_transaction or attachments: - # Transactions or events with attachments should go to the - # /envelope/ endpoint. + headers = { "event_id": event_opt["event_id"], "sent_at": format_timestamp(datetime.utcnow()), diff --git a/sentry_sdk/hub.py b/sentry_sdk/hub.py index 310cb4b134..61c25a2620 100644 --- a/sentry_sdk/hub.py +++ b/sentry_sdk/hub.py @@ -684,7 +684,10 @@ def flush( def iter_trace_propagation_headers(self): # type: () -> Generator[Tuple[str, str], None, None] - # TODO: Document + """ + Returns a generator for the headers to attach to outgoing requests when + tracing. + """ client, scope = self._stack[-1] span = scope.span @@ -695,11 +698,8 @@ def iter_trace_propagation_headers(self): if not propagate_traces: return - yield "sentry-trace", span.to_traceparent() - - tracestate = span.to_tracestate() - if tracestate: - yield "tracestate", tracestate + for header in span.iter_headers(): + yield header GLOBAL_HUB = Hub() diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index bc9f4c4da3..7ad3f276bc 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -71,8 +71,8 @@ "(?:^|.+,)" # sentry's part "(sentry=[^,]*)" - # either there's another vendor's entry or we end - "(?:,|$)" + # either there's a comma and another vendor's entry or we end + "(?:,.+|$)" ) @@ -212,8 +212,7 @@ def maybe_create_breadcrumbs_from_span(hub, span): def extract_sentrytrace_data(header): - # type: (Optional[str]) -> typing.Mapping[str, Union[Optional[str], Optional[bool]]] - + # type: (Optional[str]) -> typing.Mapping[str, Union[str, bool, None]] """ Given a `sentry-trace` header string, return a dictionary of data. """ diff --git a/tests/tracing/test_http_headers.py b/tests/tracing/test_http_headers.py index 60145f8cce..e44e8f530c 100644 --- a/tests/tracing/test_http_headers.py +++ b/tests/tracing/test_http_headers.py @@ -11,7 +11,6 @@ ) from sentry_sdk.utils import from_base64, to_base64 -# from sentry_sdk import Hub, Client try: from unittest import mock # python 3.3 and above @@ -45,11 +44,6 @@ def test_tracestate_computation(sentry_init): "public_key": "dogsarebadatkeepingsecrets", } - # assert reinflated_trace_data["trace_id"] == "12312012123120121231201212312012" - # assert reinflated_trace_data["environment"] == "dogpark" - # assert reinflated_trace_data["release"] == "off.leash.park" - # assert reinflated_trace_data["public_key"] == "dogsarebadatkeepingsecrets" - def test_adds_new_tracestate_to_transaction_when_none_given(sentry_init): sentry_init( @@ -198,7 +192,6 @@ def test_tracestate_extraction( def test_iter_headers(sentry_init, monkeypatch): - monkeypatch.setattr( Transaction, "to_traceparent", diff --git a/tests/tracing/test_integration_tests.py b/tests/tracing/test_integration_tests.py index 804001933f..97d3c62304 100644 --- a/tests/tracing/test_integration_tests.py +++ b/tests/tracing/test_integration_tests.py @@ -46,13 +46,12 @@ def test_basic(sentry_init, capture_events, sample_rate): assert not events -@pytest.mark.only @pytest.mark.parametrize("sampled", [True, False, None]) @pytest.mark.parametrize("sample_rate", [0.0, 1.0]) def test_continue_from_headers(sentry_init, capture_events, sampled, sample_rate): """ - Ensure sampling decision is actually passed along via headers, and that they - are read correctly. + Ensure data is actually passed along via headers, and that they are read + correctly. """ sentry_init(traces_sample_rate=sample_rate) events = capture_events() diff --git a/tests/utils/test_tracing.py b/tests/utils/test_tracing.py deleted file mode 100644 index e69de29bb2..0000000000 From 97a4aafc67d3475c68da001d577ecb785720dbd0 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Thu, 25 Feb 2021 16:55:34 -0800 Subject: [PATCH 24/24] store full sentry entry (rather than value) for tracestate --- sentry_sdk/client.py | 6 +++-- sentry_sdk/tracing.py | 23 +++++++++-------- sentry_sdk/tracing_utils.py | 24 +++++++++-------- tests/test_envelope.py | 4 +-- tests/tracing/test_http_headers.py | 34 ++++++++++++------------- tests/tracing/test_integration_tests.py | 4 +-- 6 files changed, 52 insertions(+), 43 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 942d0970fd..6da9870a5f 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -334,7 +334,7 @@ def capture_event( # use the value, we want to make sure we remove it before the event is # sent (which the `.pop()` does) raw_tracestate = ( - event_opt.get("contexts", {}).get("trace", {}).pop("tracestate", None) + event_opt.get("contexts", {}).get("trace", {}).pop("tracestate", "") ) # Transactions or events with attachments should go to the /envelope/ @@ -346,7 +346,9 @@ def capture_event( "sent_at": format_timestamp(datetime.utcnow()), } - tracestate_data = reinflate_tracestate(raw_tracestate) + tracestate_data = reinflate_tracestate( + raw_tracestate.replace("sentry=", "") + ) if tracestate_data: headers["trace"] = tracestate_data diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 9624d8bb57..2fa06a85b4 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -9,7 +9,7 @@ from sentry_sdk.utils import logger from sentry_sdk.tracing_utils import ( EnvironHeaders, - compute_tracestate, + compute_tracestate_entry, extract_sentrytrace_data, extract_tracestate_data, has_tracing_enabled, @@ -237,6 +237,7 @@ def continue_from_headers( Create a transaction with the given params (including any data pulled from the 'sentry-trace' and 'tracestate' headers). """ + # TODO move this to the Transaction class if cls is Span: logger.warning( "Deprecated: use Transaction.continue_from_headers " @@ -305,14 +306,14 @@ def to_tracestate(self): # we should have the relevant values stored on the transaction, but if # this is an orphan span, make a new value - if transaction and transaction._sentry_tracestate_value: - sentry_tracestate = transaction._sentry_tracestate_value + if transaction: + sentry_tracestate = transaction._sentry_tracestate third_party_tracestate = transaction._third_party_tracestate else: - sentry_tracestate = compute_tracestate(self) + sentry_tracestate = compute_tracestate_entry(self) third_party_tracestate = None - header_value = "sentry=" + sentry_tracestate + header_value = sentry_tracestate if third_party_tracestate: header_value = header_value + "," + third_party_tracestate @@ -431,7 +432,7 @@ def get_trace_context(self): transaction = self._containing_transaction if transaction: - rv["tracestate"] = transaction._sentry_tracestate_value + rv["tracestate"] = transaction._sentry_tracestate return rv @@ -440,9 +441,11 @@ class Transaction(Span): __slots__ = ( "name", "parent_sampled", - # base64-encoded json of trace correlation context, missing trailing = - # (note: does NOT include the `sentry=`) - "_sentry_tracestate_value", + # the sentry portion of the `tracestate` header used to transmit + # correlation context for server-side dynamic sampling, of the form + # `sentry=xxxxx`, where `xxxxx` is the base64-encoded json of the + # correlation context data, missing trailing any = + "_sentry_tracestate", # tracestate data from other vendors, of the form `dogs=yes,cats=maybe` "_third_party_tracestate", ) @@ -468,7 +471,7 @@ def __init__( Span.__init__(self, **kwargs) self.name = name self.parent_sampled = parent_sampled - self._sentry_tracestate_value = sentry_tracestate or compute_tracestate(self) + self._sentry_tracestate = sentry_tracestate or compute_tracestate_entry(self) self._third_party_tracestate = third_party_tracestate def __repr__(self): diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index 7ad3f276bc..f13a81a026 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -69,7 +69,7 @@ # either sentry is the first entry or there's stuff immediately before it, # ending in a commma (this prevents matching something like `coolsentry=xxx`) "(?:^|.+,)" - # sentry's part + # sentry's part, not including the potential comma "(sentry=[^,]*)" # either there's a comma and another vendor's entry or we end "(?:,.+|$)" @@ -247,7 +247,7 @@ def extract_tracestate_data(header): Extracts the sentry tracestate value and any third-party data from the given tracestate header, returning a dictionary of data. """ - sentry_value = third_party = None + sentry_entry = third_party_entry = None before = after = "" if header: @@ -257,27 +257,31 @@ def extract_tracestate_data(header): if sentry_match: sentry_entry = sentry_match.group(1) - # we have to strip them after the split so we don't end up with + # remove the commas after the split so we don't end up with # `xxx=yyy,,zzz=qqq` (double commas) when we put them back together before, after = map(lambda s: s.strip(","), header.split(sentry_entry)) # extract sentry's value from its entry and test to make sure it's - # valid; if it isn't, discard it so that a new one will be created + # valid; if it isn't, discard the entire entry so that a new one + # will be created sentry_value = sentry_entry.replace("sentry=", "") if not re.search("^{b64}$".format(b64=base64_stripped), sentry_value): - sentry_value = None + sentry_entry = None else: after = header # if either part is invalid or empty, remove it before gluing them together - third_party = ( + third_party_entry = ( ",".join(filter(TRACESTATE_ENTRIES_REGEX.search, [before, after])) or None ) - return {"sentry_tracestate": sentry_value, "third_party_tracestate": third_party} + return { + "sentry_tracestate": sentry_entry, + "third_party_tracestate": third_party_entry, + } -def compute_tracestate_from_data(data): +def compute_tracestate_value(data): # type: (typing.Mapping[str, str]) -> str """ Computes a new tracestate value using the given data. @@ -293,7 +297,7 @@ def compute_tracestate_from_data(data): return (to_base64(tracestate_json) or "").rstrip("=") -def compute_tracestate(span): +def compute_tracestate_entry(span): # type: (Span) -> str """ Computes a new tracestate value for the span. @@ -313,7 +317,7 @@ def compute_tracestate(span): "public_key": Dsn(options["dsn"]).public_key, } - return compute_tracestate_from_data(data) + return "sentry=" + compute_tracestate_value(data) def reinflate_tracestate(encoded_tracestate): diff --git a/tests/test_envelope.py b/tests/test_envelope.py index df173d3c64..877c67bace 100644 --- a/tests/test_envelope.py +++ b/tests/test_envelope.py @@ -1,7 +1,7 @@ from sentry_sdk.envelope import Envelope from sentry_sdk.session import Session from sentry_sdk import capture_event -from sentry_sdk.tracing_utils import compute_tracestate_from_data +from sentry_sdk.tracing_utils import compute_tracestate_value import sentry_sdk.client @@ -19,7 +19,7 @@ def generate_transaction_item(): "parent_span_id": None, "description": "", "op": "greeting.sniff", - "tracestate": compute_tracestate_from_data( + "tracestate": compute_tracestate_value( { "trace_id": "12312012123120121231201212312012", "environment": "dogpark", diff --git a/tests/tracing/test_http_headers.py b/tests/tracing/test_http_headers.py index e44e8f530c..5c0cad320b 100644 --- a/tests/tracing/test_http_headers.py +++ b/tests/tracing/test_http_headers.py @@ -4,7 +4,7 @@ from sentry_sdk.tracing import Transaction, Span from sentry_sdk.tracing_utils import ( - compute_tracestate_from_data, + compute_tracestate_value, extract_sentrytrace_data, extract_tracestate_data, reinflate_tracestate, @@ -31,7 +31,7 @@ def test_tracestate_computation(sentry_init): trace_id="12312012123120121231201212312012", ) - computed_value = transaction._sentry_tracestate_value + computed_value = transaction._sentry_tracestate.replace("sentry=", "") # we have to decode and reinflate the data because we can guarantee that the # order of the entries in the jsonified dict will be the same here as when # the tracestate is computed @@ -58,7 +58,7 @@ def test_adds_new_tracestate_to_transaction_when_none_given(sentry_init): # sentry_tracestate=< value would be passed here > ) - assert transaction._sentry_tracestate_value is not None + assert transaction._sentry_tracestate is not None @pytest.mark.parametrize("sampled", [True, False, None]) @@ -92,7 +92,7 @@ def test_to_tracestate(sentry_init): # containing transaction transaction_no_third_party = Transaction( trace_id="12312012123120121231201212312012", - sentry_tracestate="doGsaREgReaT", + sentry_tracestate="sentry=doGsaREgReaT", ) non_orphan_span = Span() non_orphan_span._containing_transaction = transaction_no_third_party @@ -102,7 +102,7 @@ def test_to_tracestate(sentry_init): # it combines sentry and third-party values correctly transaction_with_third_party = Transaction( trace_id="12312012123120121231201212312012", - sentry_tracestate="doGsaREgReaT", + sentry_tracestate="sentry=doGsaREgReaT", third_party_tracestate="maisey=silly", ) assert ( @@ -115,7 +115,7 @@ def test_to_tracestate(sentry_init): trace_id="12312012123120121231201212312012", ) assert orphan_span._containing_transaction is None - assert orphan_span.to_tracestate() == "sentry=" + compute_tracestate_from_data( + assert orphan_span.to_tracestate() == "sentry=" + compute_tracestate_value( { "trace_id": "12312012123120121231201212312012", "environment": "dogpark", @@ -141,42 +141,42 @@ def test_sentrytrace_extraction(sampling_decision): ("incoming_header", "expected_sentry_value", "expected_third_party"), [ # sentry only - ("sentry=doGsaREgReaT", "doGsaREgReaT", None), + ("sentry=doGsaREgReaT", "sentry=doGsaREgReaT", None), # sentry only, invalid (`!` isn't a valid base64 character) ("sentry=doGsaREgReaT!", None, None), # stuff before - ("maisey=silly,sentry=doGsaREgReaT", "doGsaREgReaT", "maisey=silly"), + ("maisey=silly,sentry=doGsaREgReaT", "sentry=doGsaREgReaT", "maisey=silly"), # stuff after - ("sentry=doGsaREgReaT,maisey=silly", "doGsaREgReaT", "maisey=silly"), + ("sentry=doGsaREgReaT,maisey=silly", "sentry=doGsaREgReaT", "maisey=silly"), # stuff before and after ( "charlie=goofy,sentry=doGsaREgReaT,maisey=silly", - "doGsaREgReaT", + "sentry=doGsaREgReaT", "charlie=goofy,maisey=silly", ), # multiple before ( "charlie=goofy,maisey=silly,sentry=doGsaREgReaT", - "doGsaREgReaT", + "sentry=doGsaREgReaT", "charlie=goofy,maisey=silly", ), # multiple after ( "sentry=doGsaREgReaT,charlie=goofy,maisey=silly", - "doGsaREgReaT", + "sentry=doGsaREgReaT", "charlie=goofy,maisey=silly", ), # multiple before and after ( "charlie=goofy,maisey=silly,sentry=doGsaREgReaT,bodhi=floppy,cory=loyal", - "doGsaREgReaT", + "sentry=doGsaREgReaT", "charlie=goofy,maisey=silly,bodhi=floppy,cory=loyal", ), - # only third party data + # only third-party data ("maisey=silly", None, "maisey=silly"), - # invalid third party data, valid sentry data - ("maisey_is_silly,sentry=doGsaREgReaT", "doGsaREgReaT", None), - # valid third party data, invalid sentry data + # invalid third-party data, valid sentry data + ("maisey_is_silly,sentry=doGsaREgReaT", "sentry=doGsaREgReaT", None), + # valid third-party data, invalid sentry data ("maisey=silly,sentry=doGsaREgReaT!", None, "maisey=silly"), # nothing valid at all ("maisey_is_silly,sentry=doGsaREgReaT!", None, None), diff --git a/tests/tracing/test_integration_tests.py b/tests/tracing/test_integration_tests.py index 97d3c62304..9f7b8d3bbb 100644 --- a/tests/tracing/test_integration_tests.py +++ b/tests/tracing/test_integration_tests.py @@ -63,7 +63,7 @@ def test_continue_from_headers(sentry_init, capture_events, sampled, sample_rate with start_span() as old_span: old_span.sampled = sampled headers = dict(Hub.current.iter_trace_propagation_headers()) - tracestate = parent_transaction._sentry_tracestate_value + tracestate = parent_transaction._sentry_tracestate # child transaction, to prove that we can read 'sentry-trace' and # `tracestate` header data correctly @@ -74,7 +74,7 @@ def test_continue_from_headers(sentry_init, capture_events, sampled, sample_rate assert child_transaction.same_process_as_parent is False assert child_transaction.parent_span_id == old_span.span_id assert child_transaction.span_id != old_span.span_id - assert child_transaction._sentry_tracestate_value == tracestate + assert child_transaction._sentry_tracestate == tracestate # add child transaction to the scope, to show that the captured message will # be tagged with the trace id (since it happens while the transaction is