Merge pull request #11 from EducationalTestingService/feature/add_unit_tests

desilinguist · desilinguist · commit 2388603666b6 · 2014-12-10T20:15:56.000-05:00
Unit Tests
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1,64 @@
+"""
+Module for running a bunch of simple unit tests. Should be expanded more in
+the future.
+
+:author: Nitin Madnani (nmadnani@ets.org)
+"""
+
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+
+import glob
+import itertools
+import os
+
+from io import open
+from os.path import abspath, dirname, exists, join
+
+from zpar import ZPar
+
+_my_dir = abspath(dirname(__file__))
+
+z = None
+tagger = None
+parser = None
+depparser = None
+
+
+def setUp():
+    """
+    set up things we need for the tests
+    """
+    global z, tagger, parser, depparser
+
+    assert 'ZPAR_MODEL_DIR' in os.environ
+
+    model_dir = os.environ['ZPAR_MODEL_DIR']
+
+    z = ZPar(model_dir)
+    tagger = z.get_tagger()
+    parser = z.get_parser()
+    depparser = z.get_depparser()
+
+
+def tearDown():
+    """
+    Clean up after the tests
+    """
+    global z, tagger, parser, depparser
+
+    if z:
+        z.close()
+        del tagger
+        del parser
+        del depparser
+        del z
+
+    # delete all the files we may have created
+    data_dir = abspath(join(_my_dir, '..', 'examples'))
+    for f in glob.glob(join(data_dir, 'test*.tag')):
+        os.unlink(f)
+    for f in glob.glob(join(data_dir, 'test*.parse')):
+        os.unlink(f)
+    for f in glob.glob(join(data_dir, 'test*.dep')):
+        os.unlink(f)
diff --git a/tests/test_depparser.py b/tests/test_depparser.py
@@ -0,0 +1,73 @@
+"""
+Module for running a bunch of simple unit tests. Should be expanded more in
+the future.
+
+:author: Nitin Madnani (nmadnani@ets.org)
+"""
+
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+
+import itertools
+import os
+
+from io import open
+from os.path import abspath, dirname, exists, join
+
+import numpy as np
+from nose.tools import eq_, raises, assert_equal, assert_not_equal
+
+_my_dir = abspath(dirname(__file__))
+
+
+def check_dep_parse_sentence(tokenize=False):
+    """
+    Check dep_parse_sentence method with and without tokenization
+    """
+    from tests import depparser
+
+    sentence = "I'm going to the market." if tokenize else "I 'm going to the market ."
+    correct_output = "I\tPRP\t1\tSUB\n'm\tVBP\t-1\tROOT\ngoing\tVBG\t1\tVC\nto\tTO\t2\tVMOD\nthe\tDT\t5\tNMOD\nmarket\tNN\t3\tPMOD\n.\t.\t1\tP\n"
+    parsed_sentence = depparser.dep_parse_sentence(sentence, tokenize=tokenize)
+    assert_equal(parsed_sentence, correct_output)
+
+
+def test_dep_parse_sentence():
+    yield check_dep_parse_sentence, False
+    yield check_dep_parse_sentence, True
+
+
+def check_dep_parse_file(tokenize=False):
+    """
+    Check parse_file method with and without tokenization
+    """
+
+    from tests import depparser
+
+    prefix = 'test' if tokenize else 'test_tokenized'
+
+    correct_output = ['I\tPRP\t1\tSUB', 'am\tVBP\t-1\tROOT',
+                      'going\tVBG\t1\tVC', 'to\tTO\t2\tVMOD',
+                      'the\tDT\t5\tNMOD', 'market\tNN\t3\tPMOD',
+                      '.\t.\t1\tP', '', 'Are\tVBP\t-1\tROOT',
+                      'you\tPRP\t0\tSUB', 'going\tVBG\t0\tVMOD',
+                      'to\tTO\t4\tVMOD', 'come\tVB\t2\tVMOD',
+                      'with\tIN\t4\tVMOD', 'me\tPRP\t5\tPMOD',
+                      '?\t.\t0\tP', '']
+
+    input_file = abspath(join(_my_dir, '..', 'examples', '{}.txt'.format(prefix)))
+    output_file = abspath(join(_my_dir, '..', 'examples', '{}.dep'.format(prefix)))
+
+    # dependency parse the file
+    depparser.dep_parse_file(input_file, output_file, tokenize=tokenize)
+
+    # read the output file and make sure we have the expected output
+    with open(output_file, 'r') as outf:
+        output = [l.strip() for l in outf.readlines()]
+
+    assert_equal(output, correct_output)
+
+
+def test_dep_parse_file():
+    yield check_dep_parse_file, False
+    yield check_dep_parse_file, True
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -0,0 +1,68 @@
+"""
+Module for running a bunch of simple unit tests. Should be expanded more in
+the future.
+
+:author: Nitin Madnani (nmadnani@ets.org)
+"""
+
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+
+import itertools
+import os
+
+from io import open
+from os.path import abspath, dirname, exists, join
+
+import numpy as np
+from nose.tools import eq_, raises, assert_equal, assert_not_equal
+
+_my_dir = abspath(dirname(__file__))
+
+
+def check_parse_sentence(tokenize=False):
+    """
+    Check parse_sentence method with and without tokenization
+    """
+    from tests import parser
+
+    sentence = "I'm going to the market." if tokenize else "I 'm going to the market ."
+    correct_output = "(S (NP (PRP I)) (VP (VBP 'm) (VP (VBG going) (PP (TO to) (NP (DT the) (NN market))))) (. .))"
+    parsed_sentence = parser.parse_sentence(sentence, tokenize=tokenize)
+
+    assert_equal(parsed_sentence, correct_output)
+
+
+def test_parse_sentence():
+    yield check_parse_sentence, False
+    yield check_parse_sentence, True
+
+
+def check_parse_file(tokenize=False):
+    """
+    Check parse_file method with and without tokenization
+    """
+
+    from tests import parser
+
+    prefix = 'test' if tokenize else 'test_tokenized'
+
+    correct_output = ["(S (NP (PRP I)) (VP (VBP am) (VP (VBG going) (PP (TO to) (NP (DT the) (NN market))))) (. .))",
+                      "(SQ (VBP Are) (NP (PRP you)) (VP (VBG going) (S (VP (TO to) (VP (VB come) (PP (IN with) (NP (PRP me))))))) (. ?))"]
+
+    input_file = abspath(join(_my_dir, '..', 'examples', '{}.txt'.format(prefix)))
+    output_file = abspath(join(_my_dir, '..', 'examples', '{}.parse'.format(prefix)))
+
+    # parse the file
+    parser.parse_file(input_file, output_file, tokenize=tokenize)
+
+    # read the output file and make sure we have the expected output
+    with open(output_file, 'r') as outf:
+        output = [l.strip() for l in outf.readlines()]
+
+    assert_equal(output, correct_output)
+
+
+def test_parse_file():
+    yield check_parse_file, False
+    yield check_parse_file, True
diff --git a/tests/test_tagger.py b/tests/test_tagger.py
@@ -0,0 +1,68 @@
+"""
+Module for running a bunch of simple unit tests. Should be expanded more in
+the future.
+
+:author: Nitin Madnani (nmadnani@ets.org)
+"""
+
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+
+import itertools
+import os
+
+from io import open
+from os.path import abspath, dirname, exists, join
+
+import numpy as np
+from nose.tools import eq_, raises, assert_equal, assert_not_equal
+
+_my_dir = abspath(dirname(__file__))
+
+
+def check_tag_sentence(tokenize=False):
+    """
+    Check tag_sentence method with and without tokenization
+    """
+    from tests import tagger
+
+    sentence = "I'm going to the market." if tokenize else "I 'm going to the market ."
+    correct_output = "I/PRP 'm/VBP going/VBG to/TO the/DT market/NN ./."
+    tagged_sentence = tagger.tag_sentence(sentence, tokenize=tokenize)
+
+    assert_equal(tagged_sentence, correct_output)
+
+
+def test_tag_sentence():
+    yield check_tag_sentence, False
+    yield check_tag_sentence, True
+
+
+def check_tag_file(tokenize=False):
+    """
+    Check tag_file method with and without tokenization
+    """
+
+    from tests import tagger
+
+    prefix = 'test' if tokenize else 'test_tokenized'
+
+    correct_output = ['I/PRP am/VBP going/VBG to/TO the/DT market/NN ./.',
+                      'Are/VBP you/PRP going/VBG to/TO come/VB with/IN me/PRP ?/.']
+
+    input_file = abspath(join(_my_dir, '..', 'examples', '{}.txt'.format(prefix)))
+    output_file = abspath(join(_my_dir, '..', 'examples', '{}.tag'.format(prefix)))
+
+    # tag the file
+    tagger.tag_file(input_file, output_file, tokenize=tokenize)
+
+    # read the output file and make sure we have the expected output
+    with open(output_file, 'r') as outf:
+        output = [l.strip() for l in outf.readlines()]
+
+    assert_equal(output, correct_output)
+
+
+def test_tag_file():
+    yield check_tag_file, False
+    yield check_tag_file, True