Skip to content

Commit 3c85f3f

Browse files
committed
dmidecode.py: collapse near-duplicate data in an admin-friendly way
1 parent 3ecab99 commit 3c85f3f

2 files changed

Lines changed: 135 additions & 44 deletions

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
2020
* redfish.py: make lib more robust and check the sensor state against the user's thresholds first
2121

2222

23+
### Changed ("refactor", "chore" etc.)
24+
25+
* dmidecode.py: collapse near-duplicate data in an admin-friendly way
26+
27+
28+
2329
## [v2.3.0] - 2025-06-20
2430

2531
### Added ("feat")

dmidecode.py

Lines changed: 129 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"""
1515

1616
__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
17-
__version__ = '2025042001'
17+
__version__ = '2025090901'
1818

1919
import re
2020
import subprocess
@@ -219,49 +219,86 @@ def cpu_type(dmi):
219219

220220
def dmidecode_parse(output):
221221
"""
222-
Parse the raw output of `dmidecode` into a structured dictionary.
223-
224-
This function processes the raw textual output of the `dmidecode` tool and extracts
225-
structured information about system hardware, organized by DMI handle.
226-
227-
### Parameters
228-
- **output** (`str`):
229-
The raw string output from the `dmidecode` command.
230-
231-
### Returns
232-
- **dict**:
233-
A dictionary keyed by DMI handle. Each value contains fields such as dminame, dmisize,
234-
dmitype, and parsed key-value pairs from the output.
235-
236-
### Notes
237-
- Records are separated by double newlines in the output.
238-
- Only records with at least three lines are considered valid.
239-
- Multi-line blocks are handled if needed.
222+
Parse `dmidecode` output into a dict, collapsing near-duplicates in an admin-friendly way.
223+
224+
Type-aware dedupe rules:
225+
- Type 4 (Processor Information): ignore per-thread/core/socket noise fields; merge;
226+
add dedup_count/dedup_sockets
227+
- Type 17 (Memory Device): drop unpopulated; ignore slot labels; merge;
228+
add dedup_count/dedup_slots
229+
- Other types: generic dedupe (exact content after normalization)
230+
231+
Returns:
232+
{ dmi_handle_tuple: parsed_record_dict, ... }
233+
where parsed_record_dict includes keys: dminame, dmisize, dmitype, parsed fields,
234+
and possibly:
235+
- dedup_count (int >= 1)
236+
- dedup_sockets / dedup_slots (sorted list of labels encountered)
237+
- dedup_handles (list of original DMI handle strings that were merged)
238+
"""
239+
data = {}
240+
seen = {} # fp -> (first_handle, aggregated_record)
241+
242+
# --- helpers -------------------------------------------------------------
243+
def _normalize(s):
244+
if s is None:
245+
return ''
246+
s = str(s).strip()
247+
# treat common "unknown" variants as empty so they don't block dedupe
248+
if s.lower() in {'unknown', 'not specified', 'not provided', 'n/a'}:
249+
return ''
250+
# collapse whitespace
251+
return ' '.join(s.split())
252+
253+
def _lower(s):
254+
return _normalize(s).lower()
255+
256+
def _drop_unpopulated_type17(rec):
257+
size = _lower(rec.get('Size'))
258+
if not size:
259+
return True
260+
if 'no module installed' in size:
261+
return True
262+
# Sometimes vendors encode 0-sized entries
263+
if size.startswith('0 ') or size == '0':
264+
return True
265+
return False
240266

241-
### Example
242-
>>> dmidecode_parse(dmidecode_output)
243-
{
244-
('0xDA00', '218', '251'): {
245-
'dminame': 'OEM-specific Type',
246-
'dmisize': 251,
247-
'dmitype': 218,
248-
'H': 'D\t\t0\t\t0\t\t0\t\t0\t\t0\t\t0\t\t0\t\t0\t\t0\t\t0\t\t0\t\t0\t\t0\t\t0\t\t0'
267+
# Fields to ignore by DMI type when constructing fingerprints (order-independent)
268+
IGNORE_BY_TYPE = {
269+
4: { # Processor Information
270+
'Socket Designation', 'ID',
271+
'L1 Cache Handle', 'L2 Cache Handle', 'L3 Cache Handle',
272+
'Serial Number', 'Asset Tag', 'Part Number',
273+
'Core Count', 'Core Enabled', # often bogus or per-core
249274
},
250-
('0x0001', '0', '26'): {
251-
'dminame': 'BIOS Information',
252-
'dmisize': 26,
253-
'dmitype': 0,
254-
'Vendor': 'Dell Inc.',
255-
'Version': '1.7.1',
256-
'Release Date': '12/06/2024',
257-
'ROM Size': '64 MB',
258-
...,
275+
17: { # Memory Device
276+
'Locator', 'Bank Locator', 'Device Locator',
277+
'Memory Array Mapped Address Handle', 'Mem Array Error Info Handle',
278+
'Total Width', 'Data Width', # width can vary by board reporting; not essential
279+
'Serial Number', # sometimes blank; can differ even for identical sticks
259280
},
260-
...
261281
}
262-
"""
263-
data = {}
264282

283+
def _fingerprint(rec):
284+
"""Build a stable, type-aware fingerprint for dedupe."""
285+
dtype = int(rec.get('dmitype', -1))
286+
ignore = IGNORE_BY_TYPE.get(dtype, set())
287+
base = (_normalize(rec.get('dminame', '')), dtype)
288+
289+
# normalize all fields except ignored + meta
290+
items = []
291+
for k in sorted(rec.keys()):
292+
if k in ('dminame', 'dmitype', 'dmisize'):
293+
continue
294+
if k in ignore:
295+
continue
296+
v = rec[k]
297+
# Multi-line blocks were joined with tabs; normalize them
298+
items.append((k, _normalize(v)))
299+
return base + tuple(items)
300+
301+
# --- parse loop ----------------------------------------------------------
265302
for record in output.split('\n\n'):
266303
record_element = record.splitlines()
267304
if len(record_element) < 3:
@@ -271,8 +308,8 @@ def dmidecode_parse(output):
271308
if not handle_data:
272309
continue
273310

274-
dmi_handle = handle_data[0]
275-
data[dmi_handle] = {
311+
dmi_handle = handle_data[0] # ('0x0004','4','42')
312+
current = {
276313
'dminame': record_element[1],
277314
'dmisize': int(dmi_handle[2]),
278315
'dmitype': int(dmi_handle[1]),
@@ -282,11 +319,11 @@ def dmidecode_parse(output):
282319
in_block_list = []
283320

284321
for line in record_element[2:]:
285-
if in_block_element:
322+
if in_block_element is not None:
286323
in_block_data = IN_BLOCK_RE.findall(line)
287324
if in_block_data:
288325
in_block_list.append(in_block_data[0][0])
289-
data[dmi_handle][in_block_element] = '\t\t'.join(in_block_list)
326+
current[in_block_element] = '\t\t'.join(in_block_list)
290327
continue
291328
else:
292329
in_block_element = None
@@ -295,14 +332,62 @@ def dmidecode_parse(output):
295332
record_data = RECORD_RE.findall(line)
296333
if record_data:
297334
key, value = record_data[0]
298-
data[dmi_handle][key] = value
335+
current[key] = value
299336
continue
300337

301338
record_data2 = RECORD2_RE.findall(line)
302339
if record_data2:
303340
in_block_element = record_data2[0][0]
304341
in_block_list = []
305342

343+
# Type-specific filters (drop obviously irrelevant entries)
344+
dtype = int(current.get('dmitype', -1))
345+
if dtype == 4:
346+
# keep only populated/enabled when reported
347+
status = _lower(current.get('Status'))
348+
if status and not ('populated' in status and 'enabled' in status):
349+
continue
350+
if dtype == 17:
351+
if _drop_unpopulated_type17(current):
352+
continue
353+
354+
# Build type-aware fingerprint and aggregate
355+
fp = _fingerprint(current)
356+
if fp not in seen:
357+
# first occurrence becomes the representative
358+
# attach dedupe metadata containers up-front (lazy-friendly)
359+
rep = dict(current)
360+
rep['dedup_count'] = 1
361+
rep['dedup_handles'] = [dmi_handle[0]]
362+
# capture socket/slot labels if present for admin visibility
363+
if dtype == 4 and 'Socket Designation' in current:
364+
rep['dedup_sockets'] = [current['Socket Designation']]
365+
if dtype == 17:
366+
labels = []
367+
for k in ('Locator', 'Device Locator', 'Bank Locator'):
368+
if current.get(k):
369+
labels.append(current[k])
370+
if labels:
371+
rep['dedup_slots'] = sorted({*labels})
372+
seen[fp] = (dmi_handle, rep)
373+
data[dmi_handle] = rep
374+
else:
375+
first_handle, rep = seen[fp]
376+
rep['dedup_count'] = int(rep.get('dedup_count', 1)) + 1
377+
rep['dedup_handles'].append(dmi_handle[0])
378+
# enrich socket/slot lists
379+
if dtype == 4 and current.get('Socket Designation'):
380+
sockets = set(rep.get('dedup_sockets', []))
381+
sockets.add(current['Socket Designation'])
382+
rep['dedup_sockets'] = sorted(sockets)
383+
if dtype == 17:
384+
slots = set(rep.get('dedup_slots', []))
385+
for k in ('Locator', 'Device Locator', 'Bank Locator'):
386+
if current.get(k):
387+
slots.add(current[k])
388+
if slots:
389+
rep['dedup_slots'] = sorted(slots)
390+
306391
return data
307392

308393

0 commit comments

Comments
 (0)