Skip to content

Commit 3f6fbb4

Browse files
authoredSep 24, 2024
[client] Improve STIX2 bundle splitter (OpenCTI-Platform#736)
1 parent fe0a730 commit 3f6fbb4

12 files changed

+70700
-67
lines changed
 

‎pycti/connector/opencti_connector_helper.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -1556,6 +1556,8 @@ def send_stix2_bundle(self, bundle: str, **kwargs) -> list:
15561556
:type entities_types: list, optional
15571557
:param update: whether to updated data in the database, defaults to False
15581558
:type update: bool, optional
1559+
:param bypass_split: use to prevent splitting of the bundle. This option has been removed since 6.3 and is no longer used.
1560+
:type bypass_split: bool, optional
15591561
:raises ValueError: if the bundle is empty
15601562
:return: list of bundles
15611563
:rtype: list
@@ -1564,11 +1566,11 @@ def send_stix2_bundle(self, bundle: str, **kwargs) -> list:
15641566
entities_types = kwargs.get("entities_types", None)
15651567
update = kwargs.get("update", False)
15661568
event_version = kwargs.get("event_version", None)
1567-
bypass_split = kwargs.get("bypass_split", False)
15681569
bypass_validation = kwargs.get("bypass_validation", False)
15691570
entity_id = kwargs.get("entity_id", None)
15701571
file_name = kwargs.get("file_name", None)
15711572
bundle_send_to_queue = kwargs.get("send_to_queue", self.bundle_send_to_queue)
1573+
cleanup_inconsistent_bundle = kwargs.get("cleanup_inconsistent_bundle", False)
15721574
bundle_send_to_directory = kwargs.get(
15731575
"send_to_directory", self.bundle_send_to_directory
15741576
)
@@ -1690,17 +1692,16 @@ def send_stix2_bundle(self, bundle: str, **kwargs) -> list:
16901692
final_write_file = os.path.join(bundle_send_to_directory_path, bundle_file)
16911693
os.rename(write_file, final_write_file)
16921694

1693-
if bypass_split:
1694-
bundles = [bundle]
1695-
expectations_number = len(json.loads(bundle)["objects"])
1696-
else:
1697-
stix2_splitter = OpenCTIStix2Splitter()
1698-
(
1699-
expectations_number,
1700-
bundles,
1701-
) = stix2_splitter.split_bundle_with_expectations(
1702-
bundle, True, event_version
1703-
)
1695+
stix2_splitter = OpenCTIStix2Splitter()
1696+
(
1697+
expectations_number,
1698+
bundles,
1699+
) = stix2_splitter.split_bundle_with_expectations(
1700+
bundle=bundle,
1701+
use_json=True,
1702+
event_version=event_version,
1703+
cleanup_inconsistent_bundle=cleanup_inconsistent_bundle,
1704+
)
17041705

17051706
if len(bundles) == 0:
17061707
self.metric.inc("error_count")

‎pycti/entities/opencti_kill_chain_phase.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
# coding: utf-8
22

33
import json
4-
import uuid
54

6-
from stix2.canonicalization.Canonicalize import canonicalize
5+
from pycti.utils.opencti_stix2_identifier import kill_chain_phase_generate_id
76

87

98
class KillChainPhase:
@@ -25,10 +24,9 @@ def __init__(self, opencti):
2524

2625
@staticmethod
2726
def generate_id(phase_name, kill_chain_name):
28-
data = {"phase_name": phase_name, "kill_chain_name": kill_chain_name}
29-
data = canonicalize(data, utf8=False)
30-
id = str(uuid.uuid5(uuid.UUID("00abedb4-aa42-466c-9c01-fed23315a9b7"), data))
31-
return "kill-chain-phase--" + id
27+
return kill_chain_phase_generate_id(
28+
phase_name=phase_name, kill_chain_name=kill_chain_name
29+
)
3230

3331
"""
3432
List Kill-Chain-Phase objects

‎pycti/utils/opencti_stix2.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -2619,10 +2619,9 @@ def import_bundle(
26192619
else None
26202620
)
26212621
stix2_splitter = OpenCTIStix2Splitter()
2622-
try:
2623-
bundles = stix2_splitter.split_bundle(stix_bundle, False, event_version)
2624-
except RecursionError:
2625-
bundles = [stix_bundle]
2622+
_, bundles = stix2_splitter.split_bundle_with_expectations(
2623+
stix_bundle, False, event_version
2624+
)
26262625
# Import every element in a specific order
26272626
imported_elements = []
26282627
for bundle in bundles:
+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import uuid
2+
3+
from stix2.canonicalization.Canonicalize import canonicalize
4+
5+
6+
def external_reference_generate_id(url=None, source_name=None, external_id=None):
7+
if url is not None:
8+
data = {"url": url}
9+
elif source_name is not None and external_id is not None:
10+
data = {"source_name": source_name, "external_id": external_id}
11+
else:
12+
return None
13+
data = canonicalize(data, utf8=False)
14+
id = str(uuid.uuid5(uuid.UUID("00abedb4-aa42-466c-9c01-fed23315a9b7"), data))
15+
return "external-reference--" + id
16+
17+
18+
def kill_chain_phase_generate_id(phase_name, kill_chain_name):
19+
data = {"phase_name": phase_name, "kill_chain_name": kill_chain_name}
20+
data = canonicalize(data, utf8=False)
21+
id = str(uuid.uuid5(uuid.UUID("00abedb4-aa42-466c-9c01-fed23315a9b7"), data))
22+
return "kill-chain-phase--" + id

‎pycti/utils/opencti_stix2_splitter.py

+153-34
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,187 @@
11
import json
2-
import re
32
import uuid
43
from typing import Tuple
54

65
from typing_extensions import deprecated
76

8-
MITRE_X_CAPEC = (
9-
"x_capec_*" # https://github.com/mitre-attack/attack-stix-data/issues/34
7+
from pycti.utils.opencti_stix2_identifier import (
8+
external_reference_generate_id,
9+
kill_chain_phase_generate_id,
1010
)
11-
unsupported_ref_patterns = [MITRE_X_CAPEC]
11+
from pycti.utils.opencti_stix2_utils import (
12+
STIX_CYBER_OBSERVABLE_MAPPING,
13+
SUPPORTED_STIX_ENTITY_OBJECTS,
14+
)
15+
16+
supported_types = (
17+
SUPPORTED_STIX_ENTITY_OBJECTS # entities
18+
+ list(STIX_CYBER_OBSERVABLE_MAPPING.keys()) # observables
19+
+ ["relationship", "sighting"] # relationships
20+
)
21+
22+
23+
def is_id_supported(key):
24+
id_type = key.split("--")[0]
25+
return id_type in supported_types
1226

1327

1428
class OpenCTIStix2Splitter:
1529
def __init__(self):
1630
self.cache_index = {}
31+
self.cache_refs = {}
1732
self.elements = []
18-
self.unsupported_patterns = list(
19-
map(lambda pattern: re.compile(pattern), unsupported_ref_patterns)
20-
)
2133

22-
def is_ref_key_supported(self, key):
23-
for pattern in self.unsupported_patterns:
24-
if pattern.match(key):
25-
return False
26-
return True
27-
28-
def enlist_element(self, item_id, raw_data):
34+
def enlist_element(
35+
self, item_id, raw_data, cleanup_inconsistent_bundle, parent_acc
36+
):
2937
nb_deps = 1
3038
if item_id not in raw_data:
3139
return 0
40+
3241
existing_item = self.cache_index.get(item_id)
3342
if existing_item is not None:
3443
return existing_item["nb_deps"]
35-
# Recursive enlist for every refs
44+
3645
item = raw_data[item_id]
46+
if self.cache_refs.get(item_id) is None:
47+
self.cache_refs[item_id] = []
3748
for key in list(item.keys()):
3849
value = item[key]
39-
if key.endswith("_refs") and self.is_ref_key_supported(key):
50+
# Recursive enlist for every refs
51+
if key.endswith("_refs"):
4052
to_keep = []
4153
for element_ref in item[key]:
42-
if element_ref != item_id:
43-
nb_deps += self.enlist_element(element_ref, raw_data)
44-
to_keep.append(element_ref)
54+
# We need to check if this ref is not already a reference
55+
is_missing_ref = raw_data.get(element_ref) is None
56+
must_be_cleaned = is_missing_ref and cleanup_inconsistent_bundle
57+
not_dependency_ref = (
58+
self.cache_refs.get(element_ref) is None
59+
or item_id not in self.cache_refs[element_ref]
60+
)
61+
# Prevent any self reference
62+
if (
63+
is_id_supported(element_ref)
64+
and not must_be_cleaned
65+
and element_ref not in parent_acc
66+
and element_ref != item_id
67+
and not_dependency_ref
68+
):
69+
self.cache_refs[item_id].append(element_ref)
70+
nb_deps += self.enlist_element(
71+
element_ref,
72+
raw_data,
73+
cleanup_inconsistent_bundle,
74+
parent_acc + [element_ref],
75+
)
76+
if element_ref not in to_keep:
77+
to_keep.append(element_ref)
4578
item[key] = to_keep
46-
elif key.endswith("_ref") and self.is_ref_key_supported(key):
47-
if item[key] == item_id:
48-
item[key] = None
79+
elif key.endswith("_ref"):
80+
is_missing_ref = raw_data.get(value) is None
81+
must_be_cleaned = is_missing_ref and cleanup_inconsistent_bundle
82+
not_dependency_ref = (
83+
self.cache_refs.get(value) is None
84+
or item_id not in self.cache_refs[value]
85+
)
86+
# Prevent any self reference
87+
if (
88+
value is not None
89+
and not must_be_cleaned
90+
and value not in parent_acc
91+
and is_id_supported(value)
92+
and value != item_id
93+
and not_dependency_ref
94+
):
95+
self.cache_refs[item_id].append(value)
96+
nb_deps += self.enlist_element(
97+
value,
98+
raw_data,
99+
cleanup_inconsistent_bundle,
100+
parent_acc + [value],
101+
)
49102
else:
50-
# Need to handle the special case of recursive ref for created by ref
51-
is_created_by_ref = key == "created_by_ref"
52-
if is_created_by_ref:
53-
is_marking = item["id"].startswith("marking-definition--")
54-
if is_marking is False:
55-
nb_deps += self.enlist_element(value, raw_data)
56-
else:
57-
nb_deps += self.enlist_element(value, raw_data)
103+
item[key] = None
104+
# Case for embedded elements (deduplicating and cleanup)
105+
elif key == "external_references":
106+
# specific case of splitting external references
107+
# reference_ids = []
108+
deduplicated_references = []
109+
deduplicated_references_cache = {}
110+
references = item[key]
111+
for reference in references:
112+
reference_id = external_reference_generate_id(
113+
url=reference.get("url"),
114+
source_name=reference.get("source_name"),
115+
external_id=reference.get("external_id"),
116+
)
117+
if (
118+
reference_id is not None
119+
and deduplicated_references_cache.get(reference_id) is None
120+
):
121+
deduplicated_references_cache[reference_id] = reference_id
122+
deduplicated_references.append(reference)
123+
# - Needed for a future move of splitting the elements
124+
# reference["id"] = reference_id
125+
# reference["type"] = "External-Reference"
126+
# raw_data[reference_id] = reference
127+
# if reference_id not in reference_ids:
128+
# reference_ids.append(reference_id)
129+
# nb_deps += self.enlist_element(reference_id, raw_data)
130+
item[key] = deduplicated_references
131+
elif key == "kill_chain_phases":
132+
# specific case of splitting kill_chain phases
133+
# kill_chain_ids = []
134+
deduplicated_kill_chain = []
135+
deduplicated_kill_chain_cache = {}
136+
kill_chains = item[key]
137+
for kill_chain in kill_chains:
138+
kill_chain_id = kill_chain_phase_generate_id(
139+
kill_chain_name=kill_chain.get("kill_chain_name"),
140+
phase_name=kill_chain.get("phase_name"),
141+
)
142+
if (
143+
kill_chain_id is not None
144+
and deduplicated_kill_chain_cache.get(kill_chain_id) is None
145+
):
146+
deduplicated_kill_chain_cache[kill_chain_id] = kill_chain_id
147+
deduplicated_kill_chain.append(kill_chain)
148+
# - Needed for a future move of splitting the elements
149+
# kill_chain["id"] = kill_chain_id
150+
# kill_chain["type"] = "Kill-Chain-Phase"
151+
# raw_data[kill_chain_id] = kill_chain
152+
# if kill_chain_id not in kill_chain_ids:
153+
# kill_chain_ids.append(kill_chain_id)
154+
# nb_deps += self.enlist_element(kill_chain_id, raw_data)
155+
item[key] = deduplicated_kill_chain
156+
58157
# Get the final dep counting and add in cache
59158
item["nb_deps"] = nb_deps
60-
self.elements.append(item)
61-
self.cache_index[item_id] = item # Put in cache
159+
# Put in cache
160+
if self.cache_index.get(item_id) is None:
161+
# enlist only if compatible
162+
if item["type"] == "relationship":
163+
is_compatible = (
164+
item["source_ref"] is not None and item["target_ref"] is not None
165+
)
166+
elif item["type"] == "sighting":
167+
is_compatible = (
168+
item["sighting_of_ref"] is not None
169+
and len(item["where_sighted_refs"]) > 0
170+
)
171+
else:
172+
is_compatible = is_id_supported(item_id)
173+
if is_compatible:
174+
self.elements.append(item)
175+
self.cache_index[item_id] = item
176+
62177
return nb_deps
63178

64179
def split_bundle_with_expectations(
65-
self, bundle, use_json=True, event_version=None
180+
self,
181+
bundle,
182+
use_json=True,
183+
event_version=None,
184+
cleanup_inconsistent_bundle=False,
66185
) -> Tuple[int, list]:
67186
"""splits a valid stix2 bundle into a list of bundles"""
68187
if use_json:
@@ -84,7 +203,7 @@ def split_bundle_with_expectations(
84203
for item in bundle_data["objects"]:
85204
raw_data[item["id"]] = item
86205
for item in bundle_data["objects"]:
87-
self.enlist_element(item["id"], raw_data)
206+
self.enlist_element(item["id"], raw_data, cleanup_inconsistent_bundle, [])
88207

89208
# Build the bundles
90209
bundles = []

0 commit comments

Comments
 (0)
Please sign in to comment.