1414"""
1515
1616__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
17- __version__ = '2025042001 '
17+ __version__ = '2025090901 '
1818
1919import re
2020import subprocess
@@ -219,49 +219,86 @@ def cpu_type(dmi):
219219
220220def dmidecode_parse (output ):
221221 """
222- Parse the raw output of `dmidecode` into a structured dictionary.
223-
224- This function processes the raw textual output of the `dmidecode` tool and extracts
225- structured information about system hardware, organized by DMI handle.
226-
227- ### Parameters
228- - **output** (`str`):
229- The raw string output from the `dmidecode` command.
230-
231- ### Returns
232- - **dict**:
233- A dictionary keyed by DMI handle. Each value contains fields such as dminame, dmisize,
234- dmitype, and parsed key-value pairs from the output.
235-
236- ### Notes
237- - Records are separated by double newlines in the output.
238- - Only records with at least three lines are considered valid.
239- - Multi-line blocks are handled if needed.
222+ Parse `dmidecode` output into a dict, collapsing near-duplicates in an admin-friendly way.
223+
224+ Type-aware dedupe rules:
225+ - Type 4 (Processor Information): ignore per-thread/core/socket noise fields; merge;
226+ add dedup_count/dedup_sockets
227+ - Type 17 (Memory Device): drop unpopulated; ignore slot labels; merge;
228+ add dedup_count/dedup_slots
229+ - Other types: generic dedupe (exact content after normalization)
230+
231+ Returns:
232+ { dmi_handle_tuple: parsed_record_dict, ... }
233+ where parsed_record_dict includes keys: dminame, dmisize, dmitype, parsed fields,
234+ and possibly:
235+ - dedup_count (int >= 1)
236+ - dedup_sockets / dedup_slots (sorted list of labels encountered)
237+ - dedup_handles (list of original DMI handle strings that were merged)
238+ """
239+ data = {}
240+ seen = {} # fp -> (first_handle, aggregated_record)
241+
242+ # --- helpers -------------------------------------------------------------
243+ def _normalize (s ):
244+ if s is None :
245+ return ''
246+ s = str (s ).strip ()
247+ # treat common "unknown" variants as empty so they don't block dedupe
248+ if s .lower () in {'unknown' , 'not specified' , 'not provided' , 'n/a' }:
249+ return ''
250+ # collapse whitespace
251+ return ' ' .join (s .split ())
252+
253+ def _lower (s ):
254+ return _normalize (s ).lower ()
255+
256+ def _drop_unpopulated_type17 (rec ):
257+ size = _lower (rec .get ('Size' ))
258+ if not size :
259+ return True
260+ if 'no module installed' in size :
261+ return True
262+ # Sometimes vendors encode 0-sized entries
263+ if size .startswith ('0 ' ) or size == '0' :
264+ return True
265+ return False
240266
241- ### Example
242- >>> dmidecode_parse(dmidecode_output)
243- {
244- ('0xDA00', '218', '251'): {
245- 'dminame': 'OEM-specific Type',
246- 'dmisize': 251,
247- 'dmitype': 218,
248- 'H': 'D\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0\t \t 0'
267+ # Fields to ignore by DMI type when constructing fingerprints (order-independent)
268+ IGNORE_BY_TYPE = {
269+ 4 : { # Processor Information
270+ 'Socket Designation' , 'ID' ,
271+ 'L1 Cache Handle' , 'L2 Cache Handle' , 'L3 Cache Handle' ,
272+ 'Serial Number' , 'Asset Tag' , 'Part Number' ,
273+ 'Core Count' , 'Core Enabled' , # often bogus or per-core
249274 },
250- ('0x0001', '0', '26'): {
251- 'dminame': 'BIOS Information',
252- 'dmisize': 26,
253- 'dmitype': 0,
254- 'Vendor': 'Dell Inc.',
255- 'Version': '1.7.1',
256- 'Release Date': '12/06/2024',
257- 'ROM Size': '64 MB',
258- ...,
275+ 17 : { # Memory Device
276+ 'Locator' , 'Bank Locator' , 'Device Locator' ,
277+ 'Memory Array Mapped Address Handle' , 'Mem Array Error Info Handle' ,
278+ 'Total Width' , 'Data Width' , # width can vary by board reporting; not essential
279+ 'Serial Number' , # sometimes blank; can differ even for identical sticks
259280 },
260- ...
261281 }
262- """
263- data = {}
264282
283+ def _fingerprint (rec ):
284+ """Build a stable, type-aware fingerprint for dedupe."""
285+ dtype = int (rec .get ('dmitype' , - 1 ))
286+ ignore = IGNORE_BY_TYPE .get (dtype , set ())
287+ base = (_normalize (rec .get ('dminame' , '' )), dtype )
288+
289+ # normalize all fields except ignored + meta
290+ items = []
291+ for k in sorted (rec .keys ()):
292+ if k in ('dminame' , 'dmitype' , 'dmisize' ):
293+ continue
294+ if k in ignore :
295+ continue
296+ v = rec [k ]
297+ # Multi-line blocks were joined with tabs; normalize them
298+ items .append ((k , _normalize (v )))
299+ return base + tuple (items )
300+
301+ # --- parse loop ----------------------------------------------------------
265302 for record in output .split ('\n \n ' ):
266303 record_element = record .splitlines ()
267304 if len (record_element ) < 3 :
@@ -271,8 +308,8 @@ def dmidecode_parse(output):
271308 if not handle_data :
272309 continue
273310
274- dmi_handle = handle_data [0 ]
275- data [ dmi_handle ] = {
311+ dmi_handle = handle_data [0 ] # ('0x0004','4','42')
312+ current = {
276313 'dminame' : record_element [1 ],
277314 'dmisize' : int (dmi_handle [2 ]),
278315 'dmitype' : int (dmi_handle [1 ]),
@@ -282,11 +319,11 @@ def dmidecode_parse(output):
282319 in_block_list = []
283320
284321 for line in record_element [2 :]:
285- if in_block_element :
322+ if in_block_element is not None :
286323 in_block_data = IN_BLOCK_RE .findall (line )
287324 if in_block_data :
288325 in_block_list .append (in_block_data [0 ][0 ])
289- data [ dmi_handle ] [in_block_element ] = '\t \t ' .join (in_block_list )
326+ current [in_block_element ] = '\t \t ' .join (in_block_list )
290327 continue
291328 else :
292329 in_block_element = None
@@ -295,14 +332,62 @@ def dmidecode_parse(output):
295332 record_data = RECORD_RE .findall (line )
296333 if record_data :
297334 key , value = record_data [0 ]
298- data [ dmi_handle ] [key ] = value
335+ current [key ] = value
299336 continue
300337
301338 record_data2 = RECORD2_RE .findall (line )
302339 if record_data2 :
303340 in_block_element = record_data2 [0 ][0 ]
304341 in_block_list = []
305342
343+ # Type-specific filters (drop obviously irrelevant entries)
344+ dtype = int (current .get ('dmitype' , - 1 ))
345+ if dtype == 4 :
346+ # keep only populated/enabled when reported
347+ status = _lower (current .get ('Status' ))
348+ if status and not ('populated' in status and 'enabled' in status ):
349+ continue
350+ if dtype == 17 :
351+ if _drop_unpopulated_type17 (current ):
352+ continue
353+
354+ # Build type-aware fingerprint and aggregate
355+ fp = _fingerprint (current )
356+ if fp not in seen :
357+ # first occurrence becomes the representative
358+ # attach dedupe metadata containers up-front (lazy-friendly)
359+ rep = dict (current )
360+ rep ['dedup_count' ] = 1
361+ rep ['dedup_handles' ] = [dmi_handle [0 ]]
362+ # capture socket/slot labels if present for admin visibility
363+ if dtype == 4 and 'Socket Designation' in current :
364+ rep ['dedup_sockets' ] = [current ['Socket Designation' ]]
365+ if dtype == 17 :
366+ labels = []
367+ for k in ('Locator' , 'Device Locator' , 'Bank Locator' ):
368+ if current .get (k ):
369+ labels .append (current [k ])
370+ if labels :
371+ rep ['dedup_slots' ] = sorted ({* labels })
372+ seen [fp ] = (dmi_handle , rep )
373+ data [dmi_handle ] = rep
374+ else :
375+ first_handle , rep = seen [fp ]
376+ rep ['dedup_count' ] = int (rep .get ('dedup_count' , 1 )) + 1
377+ rep ['dedup_handles' ].append (dmi_handle [0 ])
378+ # enrich socket/slot lists
379+ if dtype == 4 and current .get ('Socket Designation' ):
380+ sockets = set (rep .get ('dedup_sockets' , []))
381+ sockets .add (current ['Socket Designation' ])
382+ rep ['dedup_sockets' ] = sorted (sockets )
383+ if dtype == 17 :
384+ slots = set (rep .get ('dedup_slots' , []))
385+ for k in ('Locator' , 'Device Locator' , 'Bank Locator' ):
386+ if current .get (k ):
387+ slots .add (current [k ])
388+ if slots :
389+ rep ['dedup_slots' ] = sorted (slots )
390+
306391 return data
307392
308393
0 commit comments