Skip to content

Commit 0e0b949

Browse files
committed
ENG: Add support for creating separated MISP Events
With `event_separator` parameter, user can decide to create more than one MISP event in the output bot and group incomming messages based on given field. In additon, the message library was fixed not to modify the parameter directly.
1 parent f2b8b53 commit 0e0b949

File tree

5 files changed

+191
-74
lines changed

5 files changed

+191
-74
lines changed

CHANGELOG.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@
3636
#### Outputs
3737
- `intelmq.bots.outputs.misp.output_feed`:
3838
- Handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski).
39-
- Allow saving messages in bulks instead of refreshing the feed immediately (PR#2505 by Kamil Mankowski).
40-
- Add `attribute_mapping` parameter to allow selecting a subset of event attributes as well as additional attribute parameters (PR by Kamil Mankowski).
39+
- Allow saving messages in bulks instead of refreshing the feed immediately (PR#2509 by Kamil Mankowski).
40+
- Add `attribute_mapping` parameter to allow selecting a subset of event attributes as well as additional attribute parameters (PR#2509 by Kamil Mankowski).
41+
- Add `event_separator` parameter to allow keeping IntelMQ events in separated MISP Events based on a given field (PR#2509 by Kamil Mankowski).
4142
- `intelmq.bots.outputs.smtp_batch.output`: Documentation on multiple recipients added (PR#2501 by Edvard Rejthar).
4243

4344
### Documentation

docs/user/bots.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4605,9 +4605,9 @@ a new MISP event based on `interval_event` triggers saving regardless of the cac
46054605
**`attribute_mapping`**
46064606

46074607
(optional, dict) If set, allows selecting which IntelMQ event fields are mapped to MISP attributes
4608-
as well as attribute parameters (like e.g. a comment). The expected format is a *dictonary of dictionaries*:
4608+
as well as attribute parameters (like e.g. a comment). The expected format is a *dictionary of dictionaries*:
46094609
first-level key represents an IntelMQ field that will be directly translated to a MISP attribute; nested
4610-
dictionary represents addditional parameters PyMISP can take when creating an attribute. They can use
4610+
dictionary represents additional parameters PyMISP can take when creating an attribute. They can use
46114611
names of other IntelMQ fields (then the value of such field will be used), or static values. If not needed,
46124612
leave empty dict.
46134613

@@ -4627,6 +4627,12 @@ and set their values as in the IntelMQ event. In addition, the `feed.name` would
46274627
as given in the `event_description.text` from IntelMQ event, and `destination.ip` would be set
46284628
as not usable for IDS.
46294629

4630+
**`event_separator`
4631+
4632+
(optional, string): If set to a field name from IntelMQ event, the bot will group incoming messages
4633+
in separated MISP events, based on the value of this field. The `interval_event` parameter acts
4634+
for all grouping events together.
4635+
46304636
**Usage in MISP**
46314637

46324638
Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server.

intelmq/bots/outputs/misp/output_feed.py

Lines changed: 94 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,22 @@
99
from pathlib import Path
1010
from uuid import uuid4
1111

12-
import pymisp
13-
1412
from intelmq.lib.bot import OutputBot
1513
from intelmq.lib.exceptions import MissingDependencyError
16-
from ....lib.message import Message, MessageFactory
14+
from ....lib.message import MessageFactory
1715
from intelmq.lib.mixins import CacheMixin
1816
from intelmq.lib.utils import parse_relative
1917

2018
try:
21-
from pymisp import MISPEvent, MISPOrganisation, NewAttributeError
19+
from pymisp import MISPEvent, MISPObject, MISPOrganisation, NewAttributeError
2220
from pymisp.tools import feed_meta_generator
2321
except ImportError:
2422
# catching SyntaxError because of https://github.com/MISP/PyMISP/issues/501
2523
MISPEvent = None
2624
import_fail_reason = "import"
2725

26+
DEFAULT_KEY = "default"
27+
2828

2929
class MISPFeedOutputBot(OutputBot, CacheMixin):
3030
"""Generate an output in the MISP Feed format"""
@@ -38,6 +38,7 @@ class MISPFeedOutputBot(OutputBot, CacheMixin):
3838
)
3939
_is_multithreadable: bool = False
4040
attribute_mapping: dict = None
41+
event_separator: str = None
4142

4243
@staticmethod
4344
def check_output_dir(dirname):
@@ -50,7 +51,8 @@ def init(self):
5051
if MISPEvent is None:
5152
raise MissingDependencyError("pymisp", version=">=2.4.117.3")
5253

53-
self.current_event = None
54+
self.current_events = {}
55+
self.current_files = {}
5456

5557
self.misp_org = MISPOrganisation()
5658
self.misp_org.name = self.misp_org_name
@@ -66,58 +68,57 @@ def init(self):
6668
minutes=parse_relative(self.interval_event)
6769
)
6870

71+
self.min_time_current = datetime.datetime.max
72+
self.max_time_current = datetime.datetime.min
73+
6974
if (self.output_dir / ".current").exists():
7075
try:
7176
with (self.output_dir / ".current").open() as f:
72-
self.current_file = Path(f.read())
73-
74-
if self.current_file.exists():
75-
self.current_event = MISPEvent()
76-
self.current_event.load_file(self.current_file)
77-
78-
last_min_time, last_max_time = re.findall(
79-
"IntelMQ event (.*) - (.*)", self.current_event.info
80-
)[0]
81-
last_min_time = datetime.datetime.strptime(
82-
last_min_time, "%Y-%m-%dT%H:%M:%S.%f"
83-
)
84-
last_max_time = datetime.datetime.strptime(
85-
last_max_time, "%Y-%m-%dT%H:%M:%S.%f"
86-
)
87-
if last_max_time < datetime.datetime.now():
88-
self.min_time_current = datetime.datetime.now()
89-
self.max_time_current = self.min_time_current + self.timedelta
90-
self.current_event = None
91-
else:
92-
self.min_time_current = last_min_time
93-
self.max_time_current = last_max_time
94-
except:
77+
current = f.read()
78+
79+
if not self.event_separator:
80+
self.current_files[DEFAULT_KEY] = Path(current)
81+
else:
82+
self.current_files = {
83+
k: Path(v) for k, v in json.loads(current).items()
84+
}
85+
86+
for key, path in self.current_files.items():
87+
self._load_event(path, key)
88+
except Exception:
9589
self.logger.exception(
96-
"Loading current event %s failed. Skipping it.", self.current_event
90+
"Loading current events %s failed. Skipping it.", self.current_files
9791
)
98-
self.current_event = None
99-
else:
92+
self.current_events = {}
93+
94+
if not self.current_files or self.max_time_current < datetime.datetime.now():
10095
self.min_time_current = datetime.datetime.now()
10196
self.max_time_current = self.min_time_current + self.timedelta
97+
self.current_events = {}
98+
99+
def _load_event(self, file_path: Path, key: str):
100+
if file_path.exists():
101+
self.current_events[key] = MISPEvent()
102+
self.current_events[key].load_file(file_path)
103+
104+
last_min_time, last_max_time = re.findall(
105+
"IntelMQ event (.*) - (.*)", self.current_events[key].info
106+
)[0]
107+
last_min_time = datetime.datetime.strptime(
108+
last_min_time, "%Y-%m-%dT%H:%M:%S.%f"
109+
)
110+
last_max_time = datetime.datetime.strptime(
111+
last_max_time, "%Y-%m-%dT%H:%M:%S.%f"
112+
)
113+
114+
self.min_time_current = min(last_min_time, self.min_time_current)
115+
self.max_time_current = max(last_max_time, self.max_time_current)
102116

103117
def process(self):
104-
if not self.current_event or datetime.datetime.now() > self.max_time_current:
118+
if datetime.datetime.now() > self.max_time_current:
105119
self.min_time_current = datetime.datetime.now()
106120
self.max_time_current = self.min_time_current + self.timedelta
107-
self.current_event = MISPEvent()
108-
self.current_event.info = "IntelMQ event {begin} - {end}" "".format(
109-
begin=self.min_time_current.isoformat(),
110-
end=self.max_time_current.isoformat(),
111-
)
112-
self.current_event.set_date(datetime.date.today())
113-
self.current_event.Orgc = self.misp_org
114-
self.current_event.uuid = str(uuid4())
115-
self.current_file = self.output_dir / f"{self.current_event.uuid}.json"
116-
with (self.output_dir / ".current").open("w") as f:
117-
f.write(str(self.current_file))
118-
119-
# On startup or when timeout occurs, clean the queue to ensure we do not
120-
# keep events forever because there was not enough generated
121+
121122
self._generate_feed()
122123

123124
event = self.receive_message().to_dict(jsondict_as_string=True)
@@ -128,19 +129,57 @@ def process(self):
128129

129130
if cache_size is None:
130131
self._generate_feed(event)
132+
elif not self.current_events:
133+
# Always create the first event so we can keep track of the interval.
134+
# It also ensures cleaning the queue after startup in case of awaiting
135+
# messages from the previous run
136+
self._generate_feed()
131137
elif cache_size >= self.bulk_save_count:
132138
self._generate_feed()
133139

134140
self.acknowledge_message()
135141

142+
def _generate_new_event(self, key):
143+
self.current_events[key] = MISPEvent()
144+
self.current_events[key].info = "IntelMQ event {begin} - {end}" "".format(
145+
begin=self.min_time_current.isoformat(),
146+
end=self.max_time_current.isoformat(),
147+
)
148+
self.current_events[key].set_date(datetime.date.today())
149+
self.current_events[key].Orgc = self.misp_org
150+
self.current_events[key].uuid = str(uuid4())
151+
self.current_files[key] = (
152+
self.output_dir / f"{self.current_events[key].uuid}.json"
153+
)
154+
with (self.output_dir / ".current").open("w") as f:
155+
if not self.event_separator:
156+
f.write(str(self.current_files[key]))
157+
else:
158+
json.dump({k: str(v) for k, v in self.current_files.items()}, f)
159+
return self.current_events[key]
160+
136161
def _add_message_to_feed(self, message: dict):
137-
obj = self.current_event.add_object(name="intelmq_event")
162+
if not self.event_separator:
163+
key = DEFAULT_KEY
164+
else:
165+
# For proper handling of nested fields
166+
message_obj = MessageFactory.from_dict(
167+
message, harmonization=self.harmonization, default_type="Event"
168+
)
169+
key = message_obj.get(self.event_separator) or DEFAULT_KEY
170+
171+
if key in self.current_events:
172+
event = self.current_events[key]
173+
else:
174+
event = self._generate_new_event(key)
175+
176+
obj = event.add_object(name="intelmq_event")
138177
if not self.attribute_mapping:
139178
self._default_mapping(obj, message)
140179
else:
141180
self._custom_mapping(obj, message)
142181

143-
def _default_mapping(self, obj: pymisp.MISPObject, message: dict):
182+
def _default_mapping(self, obj: "MISPObject", message: dict):
144183
for object_relation, value in message.items():
145184
try:
146185
obj.add_attribute(object_relation, value=value)
@@ -162,15 +201,15 @@ def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dic
162201
for parameter, value in definition.items():
163202
# Check if the value is a harmonization key or a static value
164203
if isinstance(value, str) and (
165-
value in self.harmonization["event"]
166-
or value.split(".", 1)[0] in self.harmonization["event"]
204+
value in self.harmonization["event"] or
205+
value.split(".", 1)[0] in self.harmonization["event"]
167206
):
168207
result[parameter] = message.get(value)
169208
else:
170209
result[parameter] = value
171210
return result
172211

173-
def _custom_mapping(self, obj: pymisp.MISPObject, message: dict):
212+
def _custom_mapping(self, obj: "MISPObject", message: dict):
174213
for object_relation, definition in self.attribute_mapping.items():
175214
obj.add_attribute(
176215
object_relation,
@@ -188,9 +227,10 @@ def _generate_feed(self, message: dict = None):
188227
self._add_message_to_feed(message)
189228
message = self.cache_pop()
190229

191-
feed_output = self.current_event.to_feed(with_meta=False)
192-
with self.current_file.open("w") as f:
193-
json.dump(feed_output, f)
230+
for key, event in self.current_events.items():
231+
feed_output = event.to_feed(with_meta=False)
232+
with self.current_files[key].open("w") as f:
233+
json.dump(feed_output, f)
194234

195235
feed_meta_generator(self.output_dir)
196236

intelmq/lib/message.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,18 @@ def from_dict(message: dict, harmonization=None,
4848
MessageFactory.unserialize
4949
MessageFactory.serialize
5050
"""
51-
if default_type and "__type" not in message:
52-
message["__type"] = default_type
51+
# don't modify the parameter
52+
message_copy = message.copy()
53+
54+
if default_type and "__type" not in message_copy:
55+
message_copy["__type"] = default_type
5356
try:
54-
class_reference = getattr(intelmq.lib.message, message["__type"])
57+
class_reference = getattr(intelmq.lib.message, message_copy["__type"])
5558
except AttributeError:
5659
raise exceptions.InvalidArgument('__type',
57-
got=message["__type"],
60+
got=message_copy["__type"],
5861
expected=VALID_MESSSAGE_TYPES,
5962
docs=HARMONIZATION_CONF_FILE)
60-
# don't modify the parameter
61-
message_copy = message.copy()
6263
del message_copy["__type"]
6364
return class_reference(message_copy, auto=True, harmonization=harmonization)
6465

0 commit comments

Comments
 (0)