|
| 1 | +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 2 | +# or more contributor license agreements. Licensed under the Elastic License |
| 3 | +# 2.0; you may not use this file except in compliance with the Elastic License |
| 4 | +# 2.0. |
| 5 | + |
| 6 | +"""Test non-ecs-schema.json for data integrity and consistency.""" |
| 7 | + |
| 8 | +import unittest |
| 9 | +from collections import defaultdict |
| 10 | + |
| 11 | +from detection_rules.ecs import flatten, get_non_ecs_schema, get_schema |
| 12 | + |
| 13 | + |
| 14 | +class TestNonEcsSchema(unittest.TestCase): |
| 15 | + """Test the non-ecs-schema.json file for data quality and integrity.""" |
| 16 | + |
| 17 | + VALID_ES_TYPES = frozenset( |
| 18 | + { |
| 19 | + "keyword", |
| 20 | + "text", |
| 21 | + "long", |
| 22 | + "integer", |
| 23 | + "short", |
| 24 | + "byte", |
| 25 | + "double", |
| 26 | + "float", |
| 27 | + "half_float", |
| 28 | + "scaled_float", |
| 29 | + "boolean", |
| 30 | + "date", |
| 31 | + "ip", |
| 32 | + "geo_point", |
| 33 | + "geo_shape", |
| 34 | + "binary", |
| 35 | + "object", |
| 36 | + "nested", |
| 37 | + "flattened", |
| 38 | + "wildcard", |
| 39 | + "match_only_text", |
| 40 | + "constant_keyword", |
| 41 | + } |
| 42 | + ) |
| 43 | + |
| 44 | + @classmethod |
| 45 | + def setUpClass(cls): |
| 46 | + cls.non_ecs_schema = get_non_ecs_schema() |
| 47 | + |
| 48 | + def test_valid_json_structure(self): |
| 49 | + """Ensure the non-ecs-schema.json loads and has the expected top-level structure.""" |
| 50 | + self.assertIsInstance(self.non_ecs_schema, dict, "non-ecs-schema.json should be a JSON object") |
| 51 | + self.assertGreater(len(self.non_ecs_schema), 0, "non-ecs-schema.json should not be empty") |
| 52 | + |
| 53 | + for index_pattern, fields in self.non_ecs_schema.items(): |
| 54 | + self.assertIsInstance( |
| 55 | + index_pattern, |
| 56 | + str, |
| 57 | + f"Index pattern key should be a string, got {type(index_pattern)}", |
| 58 | + ) |
| 59 | + self.assertIsInstance( |
| 60 | + fields, |
| 61 | + dict, |
| 62 | + f"Fields for index pattern '{index_pattern}' should be a dict, got {type(fields)}", |
| 63 | + ) |
| 64 | + |
| 65 | + def test_no_duplicate_fields_within_index(self): |
| 66 | + """Ensure no index pattern contains duplicate flattened field names.""" |
| 67 | + duplicates = {} |
| 68 | + |
| 69 | + for index_pattern, fields in self.non_ecs_schema.items(): |
| 70 | + flattened = flatten(fields) |
| 71 | + field_names = list(flattened.keys()) |
| 72 | + seen = set() |
| 73 | + dupes = set() |
| 74 | + |
| 75 | + for field_name in field_names: |
| 76 | + if field_name in seen: |
| 77 | + dupes.add(field_name) |
| 78 | + seen.add(field_name) |
| 79 | + |
| 80 | + if dupes: |
| 81 | + duplicates[index_pattern] = sorted(dupes) |
| 82 | + |
| 83 | + if duplicates: |
| 84 | + err_lines = [f" {idx}: {', '.join(dupe_fields)}" for idx, dupe_fields in duplicates.items()] |
| 85 | + self.fail("Duplicate fields found within index patterns:\n" + "\n".join(err_lines)) |
| 86 | + |
| 87 | + def test_no_conflicting_field_types_across_indices(self): |
| 88 | + """Ensure the same field name does not have conflicting types across different index patterns.""" |
| 89 | + field_type_map = defaultdict(dict) |
| 90 | + |
| 91 | + for index_pattern, fields in self.non_ecs_schema.items(): |
| 92 | + flattened = flatten(fields) |
| 93 | + for field_name, field_type in flattened.items(): |
| 94 | + field_type_map[field_name][index_pattern] = field_type |
| 95 | + |
| 96 | + conflicts = {} |
| 97 | + for field_name, index_types in field_type_map.items(): |
| 98 | + unique_types = set(index_types.values()) |
| 99 | + if len(unique_types) > 1: |
| 100 | + conflicts[field_name] = dict(index_types) |
| 101 | + |
| 102 | + if conflicts: |
| 103 | + err_lines = [] |
| 104 | + for field_name, index_types in sorted(conflicts.items()): |
| 105 | + type_details = ", ".join(f"{idx}={t}" for idx, t in index_types.items()) |
| 106 | + err_lines.append(f" {field_name}: {type_details}") |
| 107 | + self.fail("Fields with conflicting types across index patterns:\n" + "\n".join(err_lines)) |
| 108 | + |
| 109 | + def test_valid_field_types(self): |
| 110 | + """Validate that all field type values are valid Elasticsearch field types.""" |
| 111 | + invalid = [] |
| 112 | + |
| 113 | + for index_pattern, fields in self.non_ecs_schema.items(): |
| 114 | + flattened = flatten(fields) |
| 115 | + for field_name, field_type in flattened.items(): |
| 116 | + if field_type not in self.VALID_ES_TYPES: |
| 117 | + invalid.append(f" {index_pattern} -> {field_name}: '{field_type}'") |
| 118 | + |
| 119 | + if invalid: |
| 120 | + self.fail( |
| 121 | + "Invalid Elasticsearch field types found:\n" |
| 122 | + + "\n".join(invalid) |
| 123 | + + "\n\nValid types: " |
| 124 | + + ", ".join(sorted(self.VALID_ES_TYPES)) |
| 125 | + ) |
| 126 | + |
| 127 | + def test_fields_not_in_ecs(self): |
| 128 | + """Verify that fields in non-ecs-schema.json are not already present in the ECS flat schema.""" |
| 129 | + ecs_schema = get_schema() |
| 130 | + overlapping = [] |
| 131 | + |
| 132 | + for index_pattern, fields in self.non_ecs_schema.items(): |
| 133 | + flattened = flatten(fields) |
| 134 | + overlapping.extend( |
| 135 | + f" {index_pattern} -> {field_name}" for field_name in flattened if field_name in ecs_schema |
| 136 | + ) |
| 137 | + |
| 138 | + if overlapping: |
| 139 | + self.fail( |
| 140 | + "The following fields in non-ecs-schema.json are already present in the ECS schema " |
| 141 | + "and should be removed to prevent redundancy:\n" + "\n".join(overlapping) |
| 142 | + ) |
| 143 | + |
| 144 | + def test_no_empty_index_patterns(self): |
| 145 | + """Ensure no index pattern has an empty field mapping.""" |
| 146 | + empty = [idx for idx, fields in self.non_ecs_schema.items() if not fields] |
| 147 | + |
| 148 | + if empty: |
| 149 | + self.fail("Empty index patterns found (no fields defined): " + ", ".join(empty)) |
0 commit comments