Skip to content

Commit 0f68877

Browse files
authored
Added support for paths with spaces (#26)
Instead of trying to parse by spaces, this commit implements a parser based on regex. It is separated in two different use cases to not over complicate the parser. Fixes issue #25.
1 parent 1eb2070 commit 0f68877

2 files changed

Lines changed: 66 additions & 71 deletions

File tree

lddwrap/__init__.py

Lines changed: 48 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import collections
55
import copy
66
import json
7+
import os
78
import pathlib
89
import re
910
import subprocess
@@ -87,34 +88,13 @@ def as_mapping(self) -> Mapping[str, Any]:
8788
("unused", self.unused)])
8889

8990

90-
_MEM_ADDRESS_RE = re.compile(r'^\s*\(([^)]*)\)\s*$')
91-
92-
93-
def _strip_mem_address(text: str) -> str:
94-
r"""
95-
Strip the space and brackets from the mem address in the output.
96-
97-
:param text: to be stripped
98-
:return: bare mem address
99-
100-
>>> _strip_mem_address('(0x00007f9a1a329000)')
101-
'0x00007f9a1a329000'
102-
103-
>>> _strip_mem_address(' (0x00007f9a1a329000) ')
104-
'0x00007f9a1a329000'
105-
106-
>>> _strip_mem_address('\t(0x00007f9a1a329000)\t')
107-
'0x00007f9a1a329000'
108-
"""
109-
mtch = _MEM_ADDRESS_RE.match(text)
110-
if not mtch:
111-
raise RuntimeError(("Unexpected mem address. Expected to match {}, "
112-
"but got: {!r}").format(_MEM_ADDRESS_RE.pattern,
113-
text))
114-
115-
return mtch.group(1)
91+
_LDD_ARROW_OUTPUT_RE = re.compile(
92+
r"(?P<soname>.+)\s=>\s(?P<dep_path>.*)\s\(?(?P<mem_address>\w*)\)?")
93+
_LDD_NON_ARROW_OUTPUT_RE = re.compile(
94+
r"(?P<dep_path>.+)\s\(?(?P<mem_address>\w*)\)?")
11695

11796

97+
# pylint: disable=too-many-branches
11898
def _parse_line(line: str) -> Optional[Dependency]:
11999
"""
120100
Parse single line of ldd output.
@@ -123,8 +103,6 @@ def _parse_line(line: str) -> Optional[Dependency]:
123103
:return: dependency or None if line was empty
124104
125105
"""
126-
found = not 'not found' in line
127-
parts = [part.strip() for part in line.split(' ')]
128106
# pylint: disable=line-too-long
129107
# There are two types of outputs for a dependency, with or without soname.
130108
# The VDSO is a special case (see https://man7.org/linux/man-pages/man7/vdso.7.html)
@@ -137,51 +115,55 @@ def _parse_line(line: str) -> Optional[Dependency]:
137115
# with soname but not found: 'libboost_program_options.so.1.62.0 => not found'
138116
# with soname but without rpath: 'linux-vdso.so.1 => (0x00007ffd7c7fd000)'
139117
# pylint: enable=line-too-long
118+
found = not 'not found' in line
119+
soname = None
120+
dep_path = None
121+
mem_address = None
140122
if '=>' in line:
141-
if len(parts) != 4:
142-
raise RuntimeError(
143-
"Expected 4 parts in the line but found {}: {}".format(
144-
len(parts), line))
145-
146-
soname = None
147-
dep_path = None
148-
mem_address = None
123+
mtch = _LDD_ARROW_OUTPUT_RE.match(line)
124+
if not mtch:
125+
raise RuntimeError(("Unexpected ldd output. Expected to match {}, "
126+
"but got: {!r}").format(
127+
_LDD_ARROW_OUTPUT_RE.pattern, line))
149128
if found:
150-
soname = parts[0]
151-
if parts[2] != '':
152-
dep_path = pathlib.Path(parts[2])
153-
154-
mem_address = _strip_mem_address(text=parts[3])
129+
soname = mtch["soname"]
130+
if mtch["dep_path"]:
131+
dep_path = pathlib.Path(mtch["dep_path"])
132+
if mtch["mem_address"]:
133+
mem_address = mtch["mem_address"]
155134
else:
156-
if "/" in parts[0]:
157-
dep_path = pathlib.Path(parts[0])
135+
if os.sep in mtch["soname"]:
136+
# This is a special case where the dep_path comes before the
137+
# arrow and we have no soname
138+
dep_path = pathlib.Path(mtch["soname"])
158139
else:
159-
soname = parts[0]
160-
161-
return Dependency(
162-
soname=soname, path=dep_path, found=found, mem_address=mem_address)
140+
soname = mtch["soname"]
163141
else:
164-
if len(parts) != 2:
165-
# Please see https://github.com/Parquery/pylddwrap/pull/14
166-
if 'no version information available' in line:
167-
return None
142+
# Please see https://github.com/Parquery/pylddwrap/pull/14
143+
if 'no version information available' in line:
144+
return None
145+
146+
mtch = _LDD_NON_ARROW_OUTPUT_RE.match(line)
147+
if not mtch:
148+
raise RuntimeError(("Unexpected ldd output. Expected to match {}, "
149+
"but got: {!r}").format(
150+
_LDD_NON_ARROW_OUTPUT_RE.pattern, line))
151+
# Special case for linux-vdso
152+
if mtch["dep_path"].startswith("linux-vdso"):
153+
soname = mtch["dep_path"]
154+
else:
155+
dep_path = pathlib.Path(mtch["dep_path"])
168156

169-
raise RuntimeError(
170-
"Expected 2 parts in the line but found {}: {}".format(
171-
len(parts), line))
157+
found = True
158+
mem_address = mtch["mem_address"]
172159

173-
if parts[0].startswith('linux-vdso'):
174-
soname = parts[0]
175-
path = None
176-
else:
177-
soname = None
178-
path = pathlib.Path(parts[0])
179-
180-
return Dependency(
181-
soname=soname,
182-
path=path,
183-
found=True,
184-
mem_address=_strip_mem_address(text=parts[1]))
160+
# Sanity check to see if it didn't parse garbage:
161+
# dep_path should have at least a `/` somewhere in the filepath
162+
if dep_path and os.sep not in str(dep_path):
163+
raise RuntimeError("Unexpected library path: {}".format(dep_path))
164+
165+
return Dependency(
166+
soname=soname, path=dep_path, found=found, mem_address=mem_address)
185167

186168

187169
@icontract.require(lambda path: path.is_file())

tests/test_ldd.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ def test_parse_line(self):
6060
"(0x00007f4b78462000)",
6161
"libz.so.1 => not found",
6262
"../build/debug/libextstr.so => not found",
63-
"/home/user/lib/liblmdb.so => not found"
63+
"/home/user/lib/liblmdb.so => not found",
64+
"/home/u s e r/lib/liblmdb.so => not found",
65+
"UnityPlayer.so => /home/user/games/q u d/./UnityPlayer.so "
66+
"(0x00007f90f290f000)"
6467
]
6568
# yapf: enable
6669

@@ -114,7 +117,19 @@ def test_parse_line(self):
114117
path=pathlib.Path("/home/user/lib/liblmdb.so"),
115118
found=False,
116119
mem_address=None,
117-
unused=None)
120+
unused=None),
121+
lddwrap.Dependency(
122+
soname=None,
123+
path=pathlib.Path("/home/u s e r/lib/liblmdb.so"),
124+
found=False,
125+
mem_address=None,
126+
unused=None),
127+
lddwrap.Dependency(
128+
soname="UnityPlayer.so",
129+
path=pathlib.Path("/home/user/games/q u d/./UnityPlayer.so"),
130+
found=True,
131+
mem_address="0x00007f90f290f000",
132+
unused=None),
118133
]
119134

120135
for i, line in enumerate(lines):
@@ -137,9 +152,7 @@ def test_parse_wrong_line(self):
137152
run_err = err
138153

139154
self.assertIsNotNone(run_err)
140-
self.assertEqual(
141-
'Expected 2 parts in the line but found {}: {}'.format(
142-
line.count(' ') + 1, line), str(run_err))
155+
self.assertTrue(str(run_err).startswith("Unexpected library path:"))
143156

144157
def test_parse_non_indented_line(self):
145158
"""Lines without leading indentation, at this point in processing, are

0 commit comments

Comments
 (0)