forked from vdqbstp/TinyCheck
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparsezeeklogs.py
178 lines (150 loc) · 6.61 KB
/
parsezeeklogs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from json import loads, dumps
from collections import OrderedDict
from datetime import datetime
from traceback import print_exc
# Taken from https://github.com/dgunter/ParseZeekLogs <3
class ParseZeekLogs(object):
"""
Class that parses Zeek logs and allows log data to be output in CSV or json format.
Attributes: filepath: Path of Zeek log file to read
"""
def __init__(self, filepath, batchsize=500, fields=None, output_format=None, ignore_keys=[], meta={}, safe_headers=False):
self.fd = open(filepath, "r")
self.options = OrderedDict()
self.firstRun = True
self.filtered_fields = fields
self.batchsize = batchsize
self.output_format = output_format
self.ignore_keys = ignore_keys
self.meta = meta
self.safe_headers = safe_headers
# Convert ' to " in meta string
meta = loads(dumps(meta).replace("'", '"'))
# Read the header option lines
l = self.fd.readline().strip()
while l.strip().startswith("#"):
# Parse the options out
if l.startswith("#separator"):
key = str(l[1:].split(" ")[0])
value = str.encode(l[1:].split(
" ")[1].strip()).decode('unicode_escape')
self.options[key] = value
elif l.startswith("#"):
key = str(l[1:].split(self.options.get('separator'))[0])
value = l[1:].split(self.options.get('separator'))[1:]
self.options[key] = value
# Read the next line
l = self.fd.readline().strip()
self.firstLine = l
# Save mapping of fields to values:
self.fields = self.options.get('fields')
self.types = self.options.get('types')
self.data_types = {}
for i, val in enumerate(self.fields):
# Convert field names if safe_headers is enabled
if self.safe_headers is True:
self.fields[i] = self.fields[i].replace(".", "_")
# Match types with each other
self.data_types[self.fields[i]] = self.types[i]
def __del__(self):
self.fd.close()
def __iter__(self):
return self
def __next__(self):
retVal = ""
if self.firstRun is True:
retVal = self.firstLine
self.firstRun = False
else:
retVal = self.fd.readline().strip()
# If an empty string is returned, readline is done reading
if retVal == "" or retVal is None:
raise StopIteration
# Split out the data we are going to return
retVal = retVal.split(self.options.get('separator'))
record = None
# Make sure we aren't dealing with a comment line
if len(retVal) > 0 and not str(retVal[0]).strip().startswith("#") \
and len(retVal) is len(self.options.get("fields")):
record = OrderedDict()
# Prepare fields for conversion
for x in range(0, len(retVal)):
if self.safe_headers is True:
converted_field_name = self.options.get(
"fields")[x].replace(".", "_")
else:
converted_field_name = self.options.get("fields")[x]
if self.filtered_fields is None or converted_field_name in self.filtered_fields:
# Translate - to "" to fix a conversation error
if retVal[x] == "-":
retVal[x] = ""
# Save the record field if the field isn't filtered out
record[converted_field_name] = retVal[x]
# Convert values to the appropriate record type
record = self.convert_values(
record, self.ignore_keys, self.data_types)
if record is not None and self.output_format == "json":
# Output will be json
# Add metadata to json
for k, v in self.meta.items():
record[k] = v
retVal = record
elif record is not None and self.output_format == "csv":
retVal = ""
# Add escaping to csv format
for k, v in record.items():
# Add escaping to string values
if isinstance(v, str):
retVal += str("\"" + str(v).strip() + "\"" + ",")
else:
retVal += str(str(v).strip() + ",")
# Remove the trailing comma
retVal = retVal[:-1]
else:
retVal = None
return retVal
def convert_values(self, data, ignore_keys=[], data_types={}):
keys_to_delete = []
for k, v in data.items():
# print("evaluating k: " + str(k) + " v: " + str(v))
if isinstance(v, dict):
data[k] = self.convert_values(v)
else:
if data_types.get(k) is not None:
if (data_types.get(k) == "port" or data_types.get(k) == "count"):
if v != "":
data[k] = int(v)
else:
keys_to_delete.append(k)
elif (data_types.get(k) == "double" or data_types.get(k) == "interval"):
if v != "":
data[k] = float(v)
else:
keys_to_delete.append(k)
elif data_types.get(k) == "bool":
data[k] = bool(v)
else:
data[k] = v
for k in keys_to_delete:
del data[k]
return data
def get_fields(self):
"""Returns all fields present in the log file
Returns:
A python list containing all field names in the log file
"""
field_names = ""
if self.output_format == "csv":
for i, v in enumerate(self.fields):
if self.filtered_fields is None or v in self.filtered_fields:
field_names += str(v) + ","
# Remove the trailing comma
field_names = field_names[:-1].strip()
else:
field_names = []
for i, v in enumerate(self.fields):
if self.filtered_fields is None or v in self.filtered_fields:
field_names.append(v)
return field_names