1
1
import json
2
- import re
3
2
import uuid
4
3
from typing import Tuple
5
4
6
5
from typing_extensions import deprecated
7
6
8
- MITRE_X_CAPEC = (
9
- "x_capec_*" # https://github.com/mitre-attack/attack-stix-data/issues/34
7
+ from pycti .utils .opencti_stix2_identifier import (
8
+ external_reference_generate_id ,
9
+ kill_chain_phase_generate_id ,
10
10
)
11
- unsupported_ref_patterns = [MITRE_X_CAPEC ]
11
+ from pycti .utils .opencti_stix2_utils import (
12
+ STIX_CYBER_OBSERVABLE_MAPPING ,
13
+ SUPPORTED_STIX_ENTITY_OBJECTS ,
14
+ )
15
+
16
+ supported_types = (
17
+ SUPPORTED_STIX_ENTITY_OBJECTS # entities
18
+ + list (STIX_CYBER_OBSERVABLE_MAPPING .keys ()) # observables
19
+ + ["relationship" , "sighting" ] # relationships
20
+ )
21
+
22
+
23
+ def is_id_supported (key ):
24
+ id_type = key .split ("--" )[0 ]
25
+ return id_type in supported_types
12
26
13
27
14
28
class OpenCTIStix2Splitter :
15
29
def __init__ (self ):
16
30
self .cache_index = {}
31
+ self .cache_refs = {}
17
32
self .elements = []
18
- self .unsupported_patterns = list (
19
- map (lambda pattern : re .compile (pattern ), unsupported_ref_patterns )
20
- )
21
33
22
- def is_ref_key_supported (self , key ):
23
- for pattern in self .unsupported_patterns :
24
- if pattern .match (key ):
25
- return False
26
- return True
27
-
28
- def enlist_element (self , item_id , raw_data ):
34
+ def enlist_element (
35
+ self , item_id , raw_data , cleanup_inconsistent_bundle , parent_acc
36
+ ):
29
37
nb_deps = 1
30
38
if item_id not in raw_data :
31
39
return 0
40
+
32
41
existing_item = self .cache_index .get (item_id )
33
42
if existing_item is not None :
34
43
return existing_item ["nb_deps" ]
35
- # Recursive enlist for every refs
44
+
36
45
item = raw_data [item_id ]
46
+ if self .cache_refs .get (item_id ) is None :
47
+ self .cache_refs [item_id ] = []
37
48
for key in list (item .keys ()):
38
49
value = item [key ]
39
- if key .endswith ("_refs" ) and self .is_ref_key_supported (key ):
50
+ # Recursive enlist for every refs
51
+ if key .endswith ("_refs" ):
40
52
to_keep = []
41
53
for element_ref in item [key ]:
42
- if element_ref != item_id :
43
- nb_deps += self .enlist_element (element_ref , raw_data )
44
- to_keep .append (element_ref )
54
+ # We need to check if this ref is not already a reference
55
+ is_missing_ref = raw_data .get (element_ref ) is None
56
+ must_be_cleaned = is_missing_ref and cleanup_inconsistent_bundle
57
+ not_dependency_ref = (
58
+ self .cache_refs .get (element_ref ) is None
59
+ or item_id not in self .cache_refs [element_ref ]
60
+ )
61
+ # Prevent any self reference
62
+ if (
63
+ is_id_supported (element_ref )
64
+ and not must_be_cleaned
65
+ and element_ref not in parent_acc
66
+ and element_ref != item_id
67
+ and not_dependency_ref
68
+ ):
69
+ self .cache_refs [item_id ].append (element_ref )
70
+ nb_deps += self .enlist_element (
71
+ element_ref ,
72
+ raw_data ,
73
+ cleanup_inconsistent_bundle ,
74
+ parent_acc + [element_ref ],
75
+ )
76
+ if element_ref not in to_keep :
77
+ to_keep .append (element_ref )
45
78
item [key ] = to_keep
46
- elif key .endswith ("_ref" ) and self .is_ref_key_supported (key ):
47
- if item [key ] == item_id :
48
- item [key ] = None
79
+ elif key .endswith ("_ref" ):
80
+ is_missing_ref = raw_data .get (value ) is None
81
+ must_be_cleaned = is_missing_ref and cleanup_inconsistent_bundle
82
+ not_dependency_ref = (
83
+ self .cache_refs .get (value ) is None
84
+ or item_id not in self .cache_refs [value ]
85
+ )
86
+ # Prevent any self reference
87
+ if (
88
+ value is not None
89
+ and not must_be_cleaned
90
+ and value not in parent_acc
91
+ and is_id_supported (value )
92
+ and value != item_id
93
+ and not_dependency_ref
94
+ ):
95
+ self .cache_refs [item_id ].append (value )
96
+ nb_deps += self .enlist_element (
97
+ value ,
98
+ raw_data ,
99
+ cleanup_inconsistent_bundle ,
100
+ parent_acc + [value ],
101
+ )
49
102
else :
50
- # Need to handle the special case of recursive ref for created by ref
51
- is_created_by_ref = key == "created_by_ref"
52
- if is_created_by_ref :
53
- is_marking = item ["id" ].startswith ("marking-definition--" )
54
- if is_marking is False :
55
- nb_deps += self .enlist_element (value , raw_data )
56
- else :
57
- nb_deps += self .enlist_element (value , raw_data )
103
+ item [key ] = None
104
+ # Case for embedded elements (deduplicating and cleanup)
105
+ elif key == "external_references" :
106
+ # specific case of splitting external references
107
+ # reference_ids = []
108
+ deduplicated_references = []
109
+ deduplicated_references_cache = {}
110
+ references = item [key ]
111
+ for reference in references :
112
+ reference_id = external_reference_generate_id (
113
+ url = reference .get ("url" ),
114
+ source_name = reference .get ("source_name" ),
115
+ external_id = reference .get ("external_id" ),
116
+ )
117
+ if (
118
+ reference_id is not None
119
+ and deduplicated_references_cache .get (reference_id ) is None
120
+ ):
121
+ deduplicated_references_cache [reference_id ] = reference_id
122
+ deduplicated_references .append (reference )
123
+ # - Needed for a future move of splitting the elements
124
+ # reference["id"] = reference_id
125
+ # reference["type"] = "External-Reference"
126
+ # raw_data[reference_id] = reference
127
+ # if reference_id not in reference_ids:
128
+ # reference_ids.append(reference_id)
129
+ # nb_deps += self.enlist_element(reference_id, raw_data)
130
+ item [key ] = deduplicated_references
131
+ elif key == "kill_chain_phases" :
132
+ # specific case of splitting kill_chain phases
133
+ # kill_chain_ids = []
134
+ deduplicated_kill_chain = []
135
+ deduplicated_kill_chain_cache = {}
136
+ kill_chains = item [key ]
137
+ for kill_chain in kill_chains :
138
+ kill_chain_id = kill_chain_phase_generate_id (
139
+ kill_chain_name = kill_chain .get ("kill_chain_name" ),
140
+ phase_name = kill_chain .get ("phase_name" ),
141
+ )
142
+ if (
143
+ kill_chain_id is not None
144
+ and deduplicated_kill_chain_cache .get (kill_chain_id ) is None
145
+ ):
146
+ deduplicated_kill_chain_cache [kill_chain_id ] = kill_chain_id
147
+ deduplicated_kill_chain .append (kill_chain )
148
+ # - Needed for a future move of splitting the elements
149
+ # kill_chain["id"] = kill_chain_id
150
+ # kill_chain["type"] = "Kill-Chain-Phase"
151
+ # raw_data[kill_chain_id] = kill_chain
152
+ # if kill_chain_id not in kill_chain_ids:
153
+ # kill_chain_ids.append(kill_chain_id)
154
+ # nb_deps += self.enlist_element(kill_chain_id, raw_data)
155
+ item [key ] = deduplicated_kill_chain
156
+
58
157
# Get the final dep counting and add in cache
59
158
item ["nb_deps" ] = nb_deps
60
- self .elements .append (item )
61
- self .cache_index [item_id ] = item # Put in cache
159
+ # Put in cache
160
+ if self .cache_index .get (item_id ) is None :
161
+ # enlist only if compatible
162
+ if item ["type" ] == "relationship" :
163
+ is_compatible = (
164
+ item ["source_ref" ] is not None and item ["target_ref" ] is not None
165
+ )
166
+ elif item ["type" ] == "sighting" :
167
+ is_compatible = (
168
+ item ["sighting_of_ref" ] is not None
169
+ and len (item ["where_sighted_refs" ]) > 0
170
+ )
171
+ else :
172
+ is_compatible = is_id_supported (item_id )
173
+ if is_compatible :
174
+ self .elements .append (item )
175
+ self .cache_index [item_id ] = item
176
+
62
177
return nb_deps
63
178
64
179
def split_bundle_with_expectations (
65
- self , bundle , use_json = True , event_version = None
180
+ self ,
181
+ bundle ,
182
+ use_json = True ,
183
+ event_version = None ,
184
+ cleanup_inconsistent_bundle = False ,
66
185
) -> Tuple [int , list ]:
67
186
"""splits a valid stix2 bundle into a list of bundles"""
68
187
if use_json :
@@ -84,7 +203,7 @@ def split_bundle_with_expectations(
84
203
for item in bundle_data ["objects" ]:
85
204
raw_data [item ["id" ]] = item
86
205
for item in bundle_data ["objects" ]:
87
- self .enlist_element (item ["id" ], raw_data )
206
+ self .enlist_element (item ["id" ], raw_data , cleanup_inconsistent_bundle , [] )
88
207
89
208
# Build the bundles
90
209
bundles = []
0 commit comments