|
111 | 111 | import sys |
112 | 112 | import time |
113 | 113 | from pathlib import Path |
114 | | - |
115 | | -try: |
116 | | - unicode_string = unicode |
117 | | - binary_string = str |
118 | | -except NameError: |
119 | | - unicode_string = str |
120 | | - binary_string = bytes |
121 | | -try: |
122 | | - from urlparse import urlparse |
123 | | -except ImportError: |
124 | | - from urllib.parse import urlparse as urlparse |
| 114 | +from urllib.parse import urlparse as urlparse |
125 | 115 |
|
126 | 116 | try: |
127 | 117 | from rfc6266 import build_header |
@@ -197,7 +187,7 @@ def make_content_disposition_header(fn): |
197 | 187 | class TikaException(Exception): |
198 | 188 | pass |
199 | 189 |
|
200 | | -def echo2(*s): sys.stderr.write(unicode_string('tika.py: %s\n') % unicode_string(' ').join(map(unicode_string, s))) |
| 190 | +def echo2(*s): sys.stderr.write(str('tika.py: %s\n') % str(' ').join(map(str, s))) |
201 | 191 | def warn(*s): echo2('Warn:', *s) |
202 | 192 | def die(*s): warn('Error:', *s); echo2(USAGE); sys.exit() |
203 | 193 |
|
@@ -246,7 +236,7 @@ def getPaths(urlOrPaths): |
246 | 236 | :param urlOrPaths: the url or path to be scanned |
247 | 237 | :return: ``list`` of paths |
248 | 238 | ''' |
249 | | - if isinstance(urlOrPaths, unicode_string): |
| 239 | + if isinstance(urlOrPaths, str): |
250 | 240 | urlOrPaths = [urlOrPaths] # do not recursively walk over letters of a single path which can include "/" |
251 | 241 | paths = [] |
252 | 242 | for eachUrlOrPaths in urlOrPaths: |
@@ -326,13 +316,13 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti |
326 | 316 | headers = headers or {} |
327 | 317 |
|
328 | 318 | path, file_type = getRemoteFile(urlOrPath, TikaFilesPath) |
329 | | - headers.update({'Accept': responseMimeType, 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is unicode_string else path)}) |
| 319 | + headers.update({'Accept': responseMimeType, 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is str else path)}) |
330 | 320 |
|
331 | 321 | if option not in services: |
332 | 322 | log.warning('config option must be one of meta, text, or all; using all.') |
333 | 323 | service = services.get(option, services['all']) |
334 | 324 | if service == '/tika': responseMimeType = 'text/plain' |
335 | | - headers.update({'Accept': responseMimeType, 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is unicode_string else path)}) |
| 325 | + headers.update({'Accept': responseMimeType, 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is str else path)}) |
336 | 326 | with urlOrPath if _is_file_object(urlOrPath) else open(path, 'rb') as f: |
337 | 327 | status, response = callServer('put', serverEndpoint, service, f, |
338 | 328 | headers, verbose, tikaServerJar, config_path=config_path, |
@@ -375,8 +365,8 @@ def detectLang1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbos |
375 | 365 | ''' |
376 | 366 | path, mode = getRemoteFile(urlOrPath, TikaFilesPath) |
377 | 367 | if option not in services: |
378 | | - log.exception('Language option must be one of %s ' % binary_string(services.keys())) |
379 | | - raise TikaException('Language option must be one of %s ' % binary_string(services.keys())) |
| 368 | + log.exception('Language option must be one of %s ' % bytes(services.keys())) |
| 369 | + raise TikaException('Language option must be one of %s ' % bytes(services.keys())) |
380 | 370 | service = services[option] |
381 | 371 | status, response = callServer('put', serverEndpoint, service, open(path, 'rb'), |
382 | 372 | {'Accept': responseMimeType}, verbose, tikaServerJar, requestOptions=requestOptions) |
@@ -471,13 +461,13 @@ def detectType1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbos |
471 | 461 | ''' |
472 | 462 | path, mode = getRemoteFile(urlOrPath, TikaFilesPath) |
473 | 463 | if option not in services: |
474 | | - log.exception('Detect option must be one of %s' % binary_string(services.keys())) |
475 | | - raise TikaException('Detect option must be one of %s' % binary_string(services.keys())) |
| 464 | + log.exception('Detect option must be one of %s' % bytes(services.keys())) |
| 465 | + raise TikaException('Detect option must be one of %s' % bytes(services.keys())) |
476 | 466 | service = services[option] |
477 | 467 | status, response = callServer('put', serverEndpoint, service, open(path, 'rb'), |
478 | 468 | { |
479 | 469 | 'Accept': responseMimeType, |
480 | | - 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is unicode_string else path) |
| 470 | + 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is str else path) |
481 | 471 | }, |
482 | 472 | verbose, tikaServerJar, config_path=config_path, requestOptions=requestOptions) |
483 | 473 | if csvOutput == 1: |
@@ -533,15 +523,15 @@ def callServer(verb, serverEndpoint, service, data, headers, verbose=Verbose, ti |
533 | 523 |
|
534 | 524 | serviceUrl = serverEndpoint + service |
535 | 525 | if verb not in httpVerbs: |
536 | | - log.exception('Tika Server call must be one of %s' % binary_string(httpVerbs.keys())) |
537 | | - raise TikaException('Tika Server call must be one of %s' % binary_string(httpVerbs.keys())) |
| 526 | + log.exception('Tika Server call must be one of %s' % bytes(httpVerbs.keys())) |
| 527 | + raise TikaException('Tika Server call must be one of %s' % bytes(httpVerbs.keys())) |
538 | 528 | verbFn = httpVerbs[verb] |
539 | 529 |
|
540 | 530 | if Windows and hasattr(data, "read"): |
541 | 531 | data = data.read() |
542 | 532 |
|
543 | 533 | encodedData = data |
544 | | - if type(data) is unicode_string: |
| 534 | + if type(data) is str: |
545 | 535 | encodedData = data.encode('utf-8') |
546 | 536 |
|
547 | 537 | requestOptionsDefault = { |
|
0 commit comments