Skip to content

Commit d7fc99f

Browse files
chidoziemanagwuWutche
authored andcommitted
Fix: Add comprehensive unit tests for non-ecs-schema.json and clean up data (#2322)
1 parent e788ab7 commit d7fc99f

2 files changed

Lines changed: 149 additions & 8 deletions

File tree

detection_rules/etc/non-ecs-schema.json

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@
9090
"file.Ext.header_bytes": "keyword",
9191
"file.Ext.entropy": "long",
9292
"file.Ext.windows.zone_identifier": "long",
93-
"file.size": "long",
9493
"file.Ext.original.name": "keyword",
9594
"dll.Ext.device.product_id": "keyword",
9695
"dll.Ext.relative_file_creation_time": "double",
@@ -268,15 +267,8 @@
268267
"okta.debug_context.debug_data.flattened.privilegeGranted": "keyword"
269268
},
270269
"logs-network_traffic.http*": {
271-
"data_stream.dataset": "keyword",
272-
"url.path": "keyword",
273-
"http.request.referrer": "keyword",
274270
"http.request.headers.content-type": "keyword",
275-
"network.direction": "keyword",
276-
"http.request.method": "keyword",
277271
"request": "keyword",
278-
"http.request.body.bytes": "long",
279-
"http.request.body.content": "keyword",
280272
"http.response.headers.server": "keyword"
281273
},
282274
"metrics-*": {

tests/test_non_ecs_schema.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
# or more contributor license agreements. Licensed under the Elastic License
3+
# 2.0; you may not use this file except in compliance with the Elastic License
4+
# 2.0.
5+
6+
"""Test non-ecs-schema.json for data integrity and consistency."""
7+
8+
import unittest
9+
from collections import defaultdict
10+
11+
from detection_rules.ecs import flatten, get_non_ecs_schema, get_schema
12+
13+
14+
class TestNonEcsSchema(unittest.TestCase):
15+
"""Test the non-ecs-schema.json file for data quality and integrity."""
16+
17+
VALID_ES_TYPES = frozenset(
18+
{
19+
"keyword",
20+
"text",
21+
"long",
22+
"integer",
23+
"short",
24+
"byte",
25+
"double",
26+
"float",
27+
"half_float",
28+
"scaled_float",
29+
"boolean",
30+
"date",
31+
"ip",
32+
"geo_point",
33+
"geo_shape",
34+
"binary",
35+
"object",
36+
"nested",
37+
"flattened",
38+
"wildcard",
39+
"match_only_text",
40+
"constant_keyword",
41+
}
42+
)
43+
44+
@classmethod
45+
def setUpClass(cls):
46+
cls.non_ecs_schema = get_non_ecs_schema()
47+
48+
def test_valid_json_structure(self):
49+
"""Ensure the non-ecs-schema.json loads and has the expected top-level structure."""
50+
self.assertIsInstance(self.non_ecs_schema, dict, "non-ecs-schema.json should be a JSON object")
51+
self.assertGreater(len(self.non_ecs_schema), 0, "non-ecs-schema.json should not be empty")
52+
53+
for index_pattern, fields in self.non_ecs_schema.items():
54+
self.assertIsInstance(
55+
index_pattern,
56+
str,
57+
f"Index pattern key should be a string, got {type(index_pattern)}",
58+
)
59+
self.assertIsInstance(
60+
fields,
61+
dict,
62+
f"Fields for index pattern '{index_pattern}' should be a dict, got {type(fields)}",
63+
)
64+
65+
def test_no_duplicate_fields_within_index(self):
66+
"""Ensure no index pattern contains duplicate flattened field names."""
67+
duplicates = {}
68+
69+
for index_pattern, fields in self.non_ecs_schema.items():
70+
flattened = flatten(fields)
71+
field_names = list(flattened.keys())
72+
seen = set()
73+
dupes = set()
74+
75+
for field_name in field_names:
76+
if field_name in seen:
77+
dupes.add(field_name)
78+
seen.add(field_name)
79+
80+
if dupes:
81+
duplicates[index_pattern] = sorted(dupes)
82+
83+
if duplicates:
84+
err_lines = [f" {idx}: {', '.join(dupe_fields)}" for idx, dupe_fields in duplicates.items()]
85+
self.fail("Duplicate fields found within index patterns:\n" + "\n".join(err_lines))
86+
87+
def test_no_conflicting_field_types_across_indices(self):
88+
"""Ensure the same field name does not have conflicting types across different index patterns."""
89+
field_type_map = defaultdict(dict)
90+
91+
for index_pattern, fields in self.non_ecs_schema.items():
92+
flattened = flatten(fields)
93+
for field_name, field_type in flattened.items():
94+
field_type_map[field_name][index_pattern] = field_type
95+
96+
conflicts = {}
97+
for field_name, index_types in field_type_map.items():
98+
unique_types = set(index_types.values())
99+
if len(unique_types) > 1:
100+
conflicts[field_name] = dict(index_types)
101+
102+
if conflicts:
103+
err_lines = []
104+
for field_name, index_types in sorted(conflicts.items()):
105+
type_details = ", ".join(f"{idx}={t}" for idx, t in index_types.items())
106+
err_lines.append(f" {field_name}: {type_details}")
107+
self.fail("Fields with conflicting types across index patterns:\n" + "\n".join(err_lines))
108+
109+
def test_valid_field_types(self):
110+
"""Validate that all field type values are valid Elasticsearch field types."""
111+
invalid = []
112+
113+
for index_pattern, fields in self.non_ecs_schema.items():
114+
flattened = flatten(fields)
115+
for field_name, field_type in flattened.items():
116+
if field_type not in self.VALID_ES_TYPES:
117+
invalid.append(f" {index_pattern} -> {field_name}: '{field_type}'")
118+
119+
if invalid:
120+
self.fail(
121+
"Invalid Elasticsearch field types found:\n"
122+
+ "\n".join(invalid)
123+
+ "\n\nValid types: "
124+
+ ", ".join(sorted(self.VALID_ES_TYPES))
125+
)
126+
127+
def test_fields_not_in_ecs(self):
128+
"""Verify that fields in non-ecs-schema.json are not already present in the ECS flat schema."""
129+
ecs_schema = get_schema()
130+
overlapping = []
131+
132+
for index_pattern, fields in self.non_ecs_schema.items():
133+
flattened = flatten(fields)
134+
overlapping.extend(
135+
f" {index_pattern} -> {field_name}" for field_name in flattened if field_name in ecs_schema
136+
)
137+
138+
if overlapping:
139+
self.fail(
140+
"The following fields in non-ecs-schema.json are already present in the ECS schema "
141+
"and should be removed to prevent redundancy:\n" + "\n".join(overlapping)
142+
)
143+
144+
def test_no_empty_index_patterns(self):
145+
"""Ensure no index pattern has an empty field mapping."""
146+
empty = [idx for idx, fields in self.non_ecs_schema.items() if not fields]
147+
148+
if empty:
149+
self.fail("Empty index patterns found (no fields defined): " + ", ".join(empty))

0 commit comments

Comments
 (0)