Skip to content

Commit d3bc51d

Browse files
committed
unicode_string
1 parent 0397f44 commit d3bc51d

1 file changed

Lines changed: 13 additions & 23 deletions

File tree

tika/tika.py

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -111,17 +111,7 @@
111111
import sys
112112
import time
113113
from pathlib import Path
114-
115-
try:
116-
unicode_string = unicode
117-
binary_string = str
118-
except NameError:
119-
unicode_string = str
120-
binary_string = bytes
121-
try:
122-
from urlparse import urlparse
123-
except ImportError:
124-
from urllib.parse import urlparse as urlparse
114+
from urllib.parse import urlparse as urlparse
125115

126116
try:
127117
from rfc6266 import build_header
@@ -197,7 +187,7 @@ def make_content_disposition_header(fn):
197187
class TikaException(Exception):
198188
pass
199189

200-
def echo2(*s): sys.stderr.write(unicode_string('tika.py: %s\n') % unicode_string(' ').join(map(unicode_string, s)))
190+
def echo2(*s): sys.stderr.write(str('tika.py: %s\n') % str(' ').join(map(str, s)))
201191
def warn(*s): echo2('Warn:', *s)
202192
def die(*s): warn('Error:', *s); echo2(USAGE); sys.exit()
203193

@@ -246,7 +236,7 @@ def getPaths(urlOrPaths):
246236
:param urlOrPaths: the url or path to be scanned
247237
:return: ``list`` of paths
248238
'''
249-
if isinstance(urlOrPaths, unicode_string):
239+
if isinstance(urlOrPaths, str):
250240
urlOrPaths = [urlOrPaths] # do not recursively walk over letters of a single path which can include "/"
251241
paths = []
252242
for eachUrlOrPaths in urlOrPaths:
@@ -326,13 +316,13 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti
326316
headers = headers or {}
327317

328318
path, file_type = getRemoteFile(urlOrPath, TikaFilesPath)
329-
headers.update({'Accept': responseMimeType, 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is unicode_string else path)})
319+
headers.update({'Accept': responseMimeType, 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is str else path)})
330320

331321
if option not in services:
332322
log.warning('config option must be one of meta, text, or all; using all.')
333323
service = services.get(option, services['all'])
334324
if service == '/tika': responseMimeType = 'text/plain'
335-
headers.update({'Accept': responseMimeType, 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is unicode_string else path)})
325+
headers.update({'Accept': responseMimeType, 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is str else path)})
336326
with urlOrPath if _is_file_object(urlOrPath) else open(path, 'rb') as f:
337327
status, response = callServer('put', serverEndpoint, service, f,
338328
headers, verbose, tikaServerJar, config_path=config_path,
@@ -375,8 +365,8 @@ def detectLang1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbos
375365
'''
376366
path, mode = getRemoteFile(urlOrPath, TikaFilesPath)
377367
if option not in services:
378-
log.exception('Language option must be one of %s ' % binary_string(services.keys()))
379-
raise TikaException('Language option must be one of %s ' % binary_string(services.keys()))
368+
log.exception('Language option must be one of %s ' % bytes(services.keys()))
369+
raise TikaException('Language option must be one of %s ' % bytes(services.keys()))
380370
service = services[option]
381371
status, response = callServer('put', serverEndpoint, service, open(path, 'rb'),
382372
{'Accept': responseMimeType}, verbose, tikaServerJar, requestOptions=requestOptions)
@@ -471,13 +461,13 @@ def detectType1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbos
471461
'''
472462
path, mode = getRemoteFile(urlOrPath, TikaFilesPath)
473463
if option not in services:
474-
log.exception('Detect option must be one of %s' % binary_string(services.keys()))
475-
raise TikaException('Detect option must be one of %s' % binary_string(services.keys()))
464+
log.exception('Detect option must be one of %s' % bytes(services.keys()))
465+
raise TikaException('Detect option must be one of %s' % bytes(services.keys()))
476466
service = services[option]
477467
status, response = callServer('put', serverEndpoint, service, open(path, 'rb'),
478468
{
479469
'Accept': responseMimeType,
480-
'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is unicode_string else path)
470+
'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is str else path)
481471
},
482472
verbose, tikaServerJar, config_path=config_path, requestOptions=requestOptions)
483473
if csvOutput == 1:
@@ -533,15 +523,15 @@ def callServer(verb, serverEndpoint, service, data, headers, verbose=Verbose, ti
533523

534524
serviceUrl = serverEndpoint + service
535525
if verb not in httpVerbs:
536-
log.exception('Tika Server call must be one of %s' % binary_string(httpVerbs.keys()))
537-
raise TikaException('Tika Server call must be one of %s' % binary_string(httpVerbs.keys()))
526+
log.exception('Tika Server call must be one of %s' % bytes(httpVerbs.keys()))
527+
raise TikaException('Tika Server call must be one of %s' % bytes(httpVerbs.keys()))
538528
verbFn = httpVerbs[verb]
539529

540530
if Windows and hasattr(data, "read"):
541531
data = data.read()
542532

543533
encodedData = data
544-
if type(data) is unicode_string:
534+
if type(data) is str:
545535
encodedData = data.encode('utf-8')
546536

547537
requestOptionsDefault = {

0 commit comments

Comments
 (0)