|
7 | 7 | import ctypes as c |
8 | 8 | import logging |
9 | 9 | import os |
10 | | -import re |
11 | 10 |
|
12 | 11 | # set up the logging |
13 | 12 | logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.WARNING) |
@@ -55,25 +54,12 @@ def dep_parse_sentence(self, sentence, tokenize=True): |
55 | 54 | # return empty string if the input is empty |
56 | 55 | ans = "" |
57 | 56 | else: |
58 | | - zpar_compatible_sentence = sentence |
59 | | - all_caps_word = '' |
60 | | - # detect if we are processing a sentence with a single word in all caps |
61 | | - # because that is a known bug. This is a hack for now and will be removed |
62 | | - # once the underlying bug is fixed in ZPar. |
63 | | - m = re.match(r'^([A-Z]+)$', zpar_compatible_sentence.strip()) |
64 | | - if m: |
65 | | - all_caps_word = m.group(1) |
66 | | - fixed_word = all_caps_word.title() |
67 | | - self.logger.warning('Encountered sentence with all caps single word ' |
68 | | - 'which triggers a known bug in ZPar. Title-casing ' |
69 | | - 'to avoid buggy behavior.') |
70 | | - zpar_compatible_sentence = sentence.title() |
| 57 | + zpar_compatible_sentence = sentence.strip() + "\n " |
71 | 58 | zpar_compatible_sentence = zpar_compatible_sentence.strip() + "\n " |
72 | 59 | zpar_compatible_sentence = zpar_compatible_sentence.encode('utf-8') |
73 | 60 | parsed_sent = self._dep_parse_sentence(self._zpar_session_obj, zpar_compatible_sentence, tokenize) |
74 | | - # replace the title-cased word with the original all-caps word if we need to |
75 | | - parsed_sent = parsed_sent.decode('utf-8') |
76 | | - ans = parsed_sent if not all_caps_word else parsed_sent.replace(fixed_word, all_caps_word) |
| 61 | + ans = parsed_sent.decode('utf-8') |
| 62 | + |
77 | 63 | return ans |
78 | 64 |
|
79 | 65 | def dep_parse_file(self, inputfile, outputfile, tokenize=True): |
|
0 commit comments