Skip to content

Commit 319e66b

Browse files
authored
Merge pull request #216 from kotfu/fix/transcript_regexes
Transcript regexes now have predictable, tested, and documented behavior
2 parents 3a842ab + d5f91bd commit 319e66b

21 files changed

Lines changed: 466 additions & 143 deletions

cmd2.py

Lines changed: 95 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2021,10 +2021,12 @@ def parsed(self, raw):
20212021
class HistoryItem(str):
20222022
"""Class used to represent an item in the History list.
20232023
2024-
Thing wrapper around str class which adds a custom format for printing. It also keeps track of its index in the
2025-
list as well as a lowercase representation of itself for convenience/efficiency.
2024+
Thin wrapper around str class which adds a custom format for printing. It
2025+
also keeps track of its index in the list as well as a lowercase
2026+
representation of itself for convenience/efficiency.
2027+
20262028
"""
2027-
listformat = '-------------------------[%d]\n%s\n'
2029+
listformat = '-------------------------[{}]\n{}\n'
20282030

20292031
# noinspection PyUnusedLocal
20302032
def __init__(self, instr):
@@ -2037,7 +2039,7 @@ def pr(self):
20372039
20382040
:return: str - pretty print string version of a HistoryItem
20392041
"""
2040-
return self.listformat % (self.idx, str(self))
2042+
return self.listformat.format(self.idx, str(self).rstrip())
20412043

20422044

20432045
class History(list):
@@ -2230,12 +2232,6 @@ class Cmd2TestCase(unittest.TestCase):
22302232
that will execute the commands in a transcript file and expect the results shown.
22312233
See example.py"""
22322234
cmdapp = None
2233-
regexPattern = pyparsing.QuotedString(quoteChar=r'/', escChar='\\', multiline=True, unquoteResults=True)
2234-
regexPattern.ignore(pyparsing.cStyleComment)
2235-
notRegexPattern = pyparsing.Word(pyparsing.printables)
2236-
notRegexPattern.setParseAction(lambda t: re.escape(t[0]))
2237-
expectationParser = regexPattern | notRegexPattern
2238-
anyWhitespace = re.compile(r'\s', re.DOTALL | re.MULTILINE)
22392235

22402236
def fetchTranscripts(self):
22412237
self.transcripts = {}
@@ -2295,8 +2291,8 @@ def _test_transcript(self, fname, transcript):
22952291
result = self.cmdapp.stdout.read()
22962292
# Read the expected result from transcript
22972293
if strip_ansi(line).startswith(self.cmdapp.visible_prompt):
2298-
message = '\nFile %s, line %d\nCommand was:\n%r\nExpected: (nothing)\nGot:\n%r\n' % \
2299-
(fname, line_num, command, result)
2294+
message = '\nFile {}, line {}\nCommand was:\n{}\nExpected: (nothing)\nGot:\n{}\n'.format(
2295+
fname, line_num, command, result)
23002296
self.assert_(not (result.strip()), message)
23012297
continue
23022298
expected = []
@@ -2309,15 +2305,95 @@ def _test_transcript(self, fname, transcript):
23092305
break
23102306
line_num += 1
23112307
expected = ''.join(expected)
2312-
# Compare actual result to expected
2313-
message = '\nFile %s, line %d\nCommand was:\n%s\nExpected:\n%s\nGot:\n%s\n' % \
2314-
(fname, line_num, command, expected, result)
2315-
expected = self.expectationParser.transformString(expected)
2316-
# checking whitespace is a pain - let's skip it
2317-
expected = self.anyWhitespace.sub('', expected)
2318-
result = self.anyWhitespace.sub('', result)
2308+
2309+
# transform the expected text into a valid regular expression
2310+
expected = self._transform_transcript_expected(expected)
2311+
message = '\nFile {}, line {}\nCommand was:\n{}\nExpected:\n{}\nGot:\n{}\n'.format(
2312+
fname, line_num, command, expected, result)
23192313
self.assertTrue(re.match(expected, result, re.MULTILINE | re.DOTALL), message)
23202314

2315+
def _transform_transcript_expected(self, s):
2316+
"""parse the string with slashed regexes into a valid regex"""
2317+
slash = '/'
2318+
backslash = '\\'
2319+
regex = ''
2320+
start = 0
2321+
2322+
while True:
2323+
(regex, first_slash_pos, start) = self._escaped_find(regex, s, start, False)
2324+
if first_slash_pos == -1:
2325+
# no more slashes, add the rest of the string and bail
2326+
regex += re.escape(s[start:])
2327+
break
2328+
else:
2329+
# there is a slash, add everything we have found so far
2330+
# add stuff before the first slash as plain text
2331+
regex += re.escape(s[start:first_slash_pos])
2332+
start = first_slash_pos+1
2333+
# and go find the next one
2334+
(regex, second_slash_pos, start) = self._escaped_find(regex, s, start, True)
2335+
if second_slash_pos > 0:
2336+
# add everything between the slashes (but not the slashes)
2337+
# as a regular expression
2338+
regex += s[start:second_slash_pos]
2339+
# and change where we start looking for slashed on the
2340+
# turn through the loop
2341+
start = second_slash_pos + 1
2342+
else:
2343+
# No closing slash, we have to add the first slash,
2344+
# and the rest of the text
2345+
regex += re.escape(s[start-1:])
2346+
break
2347+
return regex
2348+
2349+
def _escaped_find(self, regex, s, start, in_regex):
2350+
"""
2351+
Find the next slash in {s} after {start} that is not preceded by a backslash.
2352+
2353+
If we find an escaped slash, add everything up to and including it to regex,
2354+
updating {start}. {start} therefore serves two purposes, tells us where to start
2355+
looking for the next thing, and also tells us where in {s} we have already
2356+
added things to {regex}
2357+
2358+
{in_regex} specifies whether we are currently searching in a regex, we behave
2359+
differently if we are or if we aren't.
2360+
"""
2361+
2362+
while True:
2363+
pos = s.find('/', start)
2364+
if pos == -1:
2365+
# no match, return to caller
2366+
break
2367+
elif pos == 0:
2368+
# slash at the beginning of the string, so it can't be
2369+
# escaped. We found it.
2370+
break
2371+
else:
2372+
# check if the slash is preceeded by a backslash
2373+
if s[pos-1:pos] == '\\':
2374+
# it is.
2375+
if in_regex:
2376+
# add everything up to the backslash as a
2377+
# regular expression
2378+
regex += s[start:pos-1]
2379+
# skip the backslash, and add the slash
2380+
regex += s[pos]
2381+
else:
2382+
# add everything up to the backslash as escaped
2383+
# plain text
2384+
regex += re.escape(s[start:pos-1])
2385+
# and then add the slash as escaped
2386+
# plain text
2387+
regex += re.escape(s[pos])
2388+
# update start to show we have handled everything
2389+
# before it
2390+
start = pos+1
2391+
# and continue to look
2392+
else:
2393+
# slash is not escaped, this is what we are looking for
2394+
break
2395+
return (regex, pos, start)
2396+
23212397
def tearDown(self):
23222398
if self.cmdapp:
23232399
# Restore stdout

docs/freefeatures.rst

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ quotation marks if it is more than a one-word command.
8282

8383
.. _Argparse: https://docs.python.org/3/library/argparse.html
8484

85+
.. _output_redirection:
86+
8587
Output redirection
8688
==================
8789

@@ -301,34 +303,20 @@ is equivalent to ``shell ls``.)
301303
Transcript-based testing
302304
========================
303305

304-
If the entire transcript (input and output) of a successful session of
305-
a ``cmd2``-based app is copied from the screen and pasted into a text
306-
file, ``transcript.txt``, then a transcript test can be run against it::
307-
308-
python app.py --test transcript.txt
306+
A transcript is both the input and output of a successful session of a
307+
``cmd2``-based app which is saved to a text file. The transcript can be played
308+
back into the app as a unit test.
309309

310-
Any non-whitespace deviations between the output prescribed in ``transcript.txt`` and
311-
the actual output from a fresh run of the application will be reported
312-
as a unit test failure. (Whitespace is ignored during the comparison.)
310+
.. code-block:: none
313311
314-
Regular expressions can be embedded in the transcript inside paired ``/``
315-
slashes. These regular expressions should not include any whitespace
316-
expressions.
317-
318-
.. note::
312+
$ python example.py --test transcript_regex.txt
313+
.
314+
----------------------------------------------------------------------
315+
Ran 1 test in 0.013s
319316
320-
If you have set ``allow_cli_args`` to False in order to disable parsing of command line arguments at invocation,
321-
then the use of ``-t`` or ``--test`` to run transcript testing is automatically disabled. In this case, you can
322-
alternatively provide a value for the optional ``transcript_files`` when constructing the instance of your
323-
``cmd2.Cmd`` derived class in order to cause a transcript test to run::
324-
325-
from cmd2 import Cmd
326-
class App(Cmd):
327-
# customized attributes and methods here
317+
OK
328318
329-
if __name__ == '__main__':
330-
app = App(transcript_files=['exampleSession.txt'])
331-
app.cmdloop()
319+
See :doc:`transcript` for more details.
332320

333321

334322
Tab-Completion

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ Contents:
6666
freefeatures
6767
settingchanges
6868
unfreefeatures
69+
transcript
6970
integrating
7071
hooks
7172
alternatives

docs/transcript.rst

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
========================
2+
Transcript based testing
3+
========================
4+
5+
A transcript is both the input and output of a successful session of a
6+
``cmd2``-based app which is saved to a text file. With no extra work on your
7+
part, your app can play back these transcripts as a unit test. Transcripts can
8+
contain regular expressions, which provide the flexibility to match responses
9+
from commands that produce dynamic or variable output.
10+
11+
.. highlight:: none
12+
13+
Creating a transcript
14+
=====================
15+
16+
Here's a transcript created from ``python examples/example.py``::
17+
18+
(Cmd) say -r 3 Goodnight, Gracie
19+
Goodnight, Gracie
20+
Goodnight, Gracie
21+
Goodnight, Gracie
22+
(Cmd) mumble maybe we could go to lunch
23+
like maybe we ... could go to hmmm lunch
24+
(Cmd) mumble maybe we could go to lunch
25+
well maybe we could like go to er lunch right?
26+
27+
This transcript has three commands: they are on the lines that begin with the
28+
prompt. The first command looks like this::
29+
30+
(Cmd) say -r 3 Goodnight, Gracie
31+
32+
Following each command is the output generated by that command.
33+
34+
The transcript ignores all lines in the file until it reaches the first line
35+
that begins with the prompt. You can take advantage of this by using the first
36+
lines of the transcript as comments::
37+
38+
# Lines at the beginning of the transcript that do not
39+
; start with the prompt i.e. '(Cmd) ' are ignored.
40+
/* You can use them for comments. */
41+
42+
All six of these lines before the first prompt are treated as comments.
43+
44+
(Cmd) say -r 3 Goodnight, Gracie
45+
Goodnight, Gracie
46+
Goodnight, Gracie
47+
Goodnight, Gracie
48+
(Cmd) mumble maybe we could go to lunch
49+
like maybe we ... could go to hmmm lunch
50+
(Cmd) mumble maybe we could go to lunch
51+
maybe we could like go to er lunch right?
52+
53+
In this example I've used several different commenting styles, and even bare
54+
text. It doesn't matter what you put on those beginning lines. Everything before::
55+
56+
(Cmd) say -r 3 Goodnight, Gracie
57+
58+
will be ignored.
59+
60+
61+
Regular Expressions
62+
===================
63+
64+
If we used the above transcript as-is, it would likely fail. As you can see,
65+
the ``mumble`` command doesn't always return the same thing: it inserts random
66+
words into the input.
67+
68+
Regular expressions can be included in the response portion of a transcript,
69+
and are surrounded by slashes::
70+
71+
(Cmd) mumble maybe we could go to lunch
72+
/.*\bmaybe\b.*\bcould\b.*\blunch\b.*/
73+
(Cmd) mumble maybe we could go to lunch
74+
/.*\bmaybe\b.*\bcould\b.*\blunch\b.*/
75+
76+
Without creating a tutorial on regular expressions, this one matches anything
77+
that has the words ``maybe``, ``could``, and ``lunch`` in that order. It doesn't
78+
ensure that ``we`` or ``go`` or ``to`` appear in the output, but it does work if
79+
mumble happens to add words to the beginning or the end of the output.
80+
81+
Since the output could be multiple lines long, ``cmd2`` uses multiline regular
82+
expression matching, and also uses the ``DOTALL`` flag. These two flags subtly
83+
change the behavior of commonly used special characters like ``.``, ``^`` and
84+
``$``, so you may want to double check the `Python regular expression
85+
documentation <https://docs.python.org/3/library/re.html>`_.
86+
87+
If your output has slashes in it, you will need to escape those slashes so the
88+
stuff between them is not interpred as a regular expression. In this transcript::
89+
90+
(Cmd) say cd /usr/local/lib/python3.6/site-packages
91+
/usr/local/lib/python3.6/site-packages
92+
93+
the output contains slashes. The text between the first slash and the second
94+
slash, will be interpreted as a regular expression, and those two slashes will
95+
not be included in the comparison. When replayed, this transcript would
96+
therefore fail. To fix it, we could either write a regular expression to match
97+
the path instead of specifying it verbatim, or we can escape the slashes::
98+
99+
(Cmd) say cd /usr/local/lib/python3.6/site-packages
100+
\/usr\/local\/lib\/python3.6\/site-packages
101+
102+
.. warning::
103+
104+
Be aware of trailing spaces and newlines. Your commands might output
105+
trailing spaces which are impossible to see. Instead of leaving them
106+
invisible, you can add a regular expression to match them, so that you can
107+
see where they are when you look at the transcript::
108+
109+
(Cmd) set prompt
110+
prompt: (Cmd)/ /
111+
112+
Some terminal emulators strip trailing space when you copy text from them.
113+
This could make the actual data generated by your app different than the
114+
text you pasted into the transcript, and it might not be readily obvious why
115+
the transcript is not passing. Consider using :ref:`output_redirection` to
116+
the clipboard or to a file to ensure you accurately capture the output of
117+
your command.
118+
119+
If you aren't using regular expressions, make sure the newlines at the end
120+
of your transcript exactly match the output of your commands. A common cause
121+
of a failing transcript is an extra or missing newline.
122+
123+
If you are using regular expressions, be aware that depending on how you
124+
write your regex, the newlines after the regex may or may not matter.
125+
``\Z`` matches *after* the newline at the end of the string, whereas
126+
``$`` matches the end of the string *or* just before a newline.
127+
128+
129+
Running a transcript
130+
====================
131+
132+
Once you have created a transcript, it's easy to have your application play it
133+
back and check the output. From within the ``examples/`` directory::
134+
135+
$ python example.py --test transcript_regex.txt
136+
.
137+
----------------------------------------------------------------------
138+
Ran 1 test in 0.013s
139+
140+
OK
141+
142+
The output will look familiar if you use ``unittest``, because that's exactly
143+
what happens. Each command in the transcript is run, and we ``assert`` the
144+
output matches the expected result from the transcript.
145+
146+
.. note::
147+
148+
If you have set ``allow_cli_args`` to False in order to disable parsing of
149+
command line arguments at invocation, then the use of ``-t`` or ``--test``
150+
to run transcript testing is automatically disabled. In this case, you can
151+
alternatively provide a value for the optional ``transcript_files`` when
152+
constructing the instance of your ``cmd2.Cmd`` derived class in order to
153+
cause a transcript test to run::
154+
155+
from cmd2 import Cmd
156+
class App(Cmd):
157+
# customized attributes and methods here
158+
159+
if __name__ == '__main__':
160+
app = App(transcript_files=['exampleSession.txt'])
161+
app.cmdloop()

0 commit comments

Comments
 (0)