Configuration of the GeoipEnricher
This presentations goal is to introduce the configuration of the output subfields of the GeoipEnricher.
Prerequisites: a local geo ip database is available
The challenge
The given document
[19]:
document = {"client": {"ip": "8.8.8.8"}}
will result in the default output
[20]:
default_output = {
"client": {"ip: 8.8.8.8"},
"geoip": {
"geometry": {"coordinates": [-97.822, 37.751], "type": "Point"},
"properties": {
"accuracy_radius": 1000,
"continent": "North America",
"continent_code": "NA",
"country": "United States",
"country_iso_code": "US",
"time_zone": "America/Chicago",
},
"type": "Feature",
},
}
which instead should be configured to look like
[21]:
expected_output = {
"client": {
"geo": {
"accuracy": 1000,
"continent_code": "NA",
"continent_name": "North America",
"country_iso_code": "US",
"country_name": "United States",
"geometry_type": "Point",
"location": [-97.822, 37.751],
"timezone": "America/Chicago",
"type": "Feature",
},
"ip": "8.8.8.8",
}
}
Create rule and processor
create the rule:
[22]:
import sys
sys.path.append("../../../../../")
from pathlib import Path
import tempfile
rule_yaml = """---
filter: "client.ip"
geoip_enricher:
source_fields: ["client.ip"]
customize_target_subfields:
type: client.geo.type
geometry.type: client.geo.geometry_type
geometry.coordinates: client.geo.location
properties.accuracy_radius: client.geo.accuracy
properties.continent: client.geo.continent_name
properties.continent_code: client.geo.continent_code
properties.country: client.geo.country_name
properties.city: client.geo.city_name
properties.postal_code: client.geo.postal_code
properties.subdivision: client.geo.subdivision
properties.time_zone: client.geo.timezone
properties.country_iso_code: client.geo.country_iso_code
"""
rule_path = Path(tempfile.gettempdir()) / "geoip"
rule_path.mkdir(exist_ok=True)
rule_file = rule_path / "data-stream.yml"
rule_file.write_text(rule_yaml)
[22]:
678
create the processor config and replace the db_path with your local geo ip database:
[23]:
processor_config = {
"geoip_enricher": {
"type": "geoip_enricher",
"rules": [str(rule_path), "/dev"],
"db_path": "<INSERT_PATH_TO_GEOIP_DATABASE>"
}
}
create the processor with the factory:
[24]:
from unittest import mock
from logprep.factory import Factory
mock_logger = mock.MagicMock()
geoip_enricher = Factory.create(processor_config)
geoip_enricher
---------------------------------------------------------------------------
InvalidConfigurationError Traceback (most recent call last)
Cell In[24], line 5
2 from logprep.factory import Factory
4 mock_logger = mock.MagicMock()
----> 5 geoip_enricher = Factory.create(processor_config, mock_logger)
6 geoip_enricher
File ~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/factory.py:36, in Factory.create(cls, configuration, logger)
34 metric_labels = configuration[connector_name].pop("metric_labels")
35 connector = Configuration.get_class(connector_name, connector_configuration_dict)
---> 36 connector_configuration = Configuration.create(
37 connector_name, connector_configuration_dict
38 )
39 connector_configuration.metric_labels = copy.deepcopy(metric_labels)
40 return connector(connector_name, connector_configuration, logger)
File ~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/configuration.py:34, in Configuration.create(cls, name, config_)
19 """factory method to create component configuration
20
21 Parameters
(...)
31 the pipeline component configuration
32 """
33 class_ = cls.get_class(name, config_)
---> 34 return class_.Config(**config_)
File <attrs generated init logprep.processor.geoip_enricher.processor.GeoipEnricher.Config>:13, in __init__(self, type, rules, tree_config, db_path)
11 __attr_validator_generic_rules(self, __attr_generic_rules, self.generic_rules)
12 __attr_validator_tree_config(self, __attr_tree_config, self.tree_config)
---> 13 __attr_validator_db_path(self, __attr_db_path, self.db_path)
File ~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/util/validators.py:53, in url_validator(_, attribute, value)
51 raise InvalidConfigurationError(f"{attribute.name} has no schema, net location and path")
52 if not parsed_url.scheme and not parsed_url.netloc and parsed_url.path:
---> 53 file_validator(_, attribute, value)
54 if parsed_url.scheme == "file":
55 if parsed_url.params or parsed_url.query or parsed_url.fragment:
File ~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/util/validators.py:23, in file_validator(_, attribute, value)
21 raise InvalidConfigurationError(f"{attribute.name} is not a str")
22 if not os.path.exists(value):
---> 23 raise InvalidConfigurationError(f"{attribute.name} file '{value}' does not exist")
24 if not os.path.isfile(value):
25 raise InvalidConfigurationError(f"{attribute.name} '{value}' is not a file")
InvalidConfigurationError: db_path file 'tests/testdata/mock_external/MockGeoLite2-City.mmdb' does not exist
Process event
[ ]:
from copy import deepcopy
mydocument = deepcopy(document)
geoip_enricher.process(mydocument)
assert mydocument == expected_output
f"The output has the expected form: {mydocument == expected_output}"
'The output has the expected form: True'