Skip to content
This repository was archived by the owner on Jan 7, 2021. It is now read-only.

Commit 1e68505

Browse files
committed
Tried to fix MultiPartEncoder to work with Python3 and BytesIO.
1 parent 829a318 commit 1e68505

6 files changed

Lines changed: 118 additions & 63 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
.tox/
12
scratch.py
23
_sources/*
34
sphinx/*

documentcloud/MultipartPostHandler.py

Lines changed: 75 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,14 @@
4747
from os import SEEK_END
4848
if six.PY3:
4949
import io
50-
import urllib
50+
import urllib.parse
51+
import urllib.request
5152
from email.generator import _make_boundary as choose_boundary
5253
else:
5354
import cStringIO as io
5455
from six.moves import urllib
5556
from mimetools import choose_boundary
5657

57-
class Callable:
58-
def __init__(self, anycallable):
59-
self.__call__ = anycallable
60-
6158
# Controls how sequences are uncoded. If true, elements
6259
# may be given multiple values byassigning a sequence.
6360
doseq = 1
@@ -101,36 +98,79 @@ def http_request(self, request):
10198

10299
return request
103100

104-
def multipart_encode(vars, files, boundary=None, buf=None):
105-
if boundary is None:
106-
boundary = choose_boundary()
107-
if buf is None:
108-
buf = io.StringIO()
109-
for(key, value) in vars:
110-
buf.write('--%s\r\n' % boundary)
111-
buf.write('Content-Disposition: form-data; name="%s"' % key)
112-
buf.write('\r\n\r\n' + value + '\r\n')
113-
for(key, fd) in files:
114-
try:
115-
filename = fd.name.split('/')[-1]
116-
except AttributeError:
117-
# Spoof a file name if the object doesn't have one.
118-
# This is designed to catch when the user submits
119-
# a StringIO object
120-
filename = 'temp.pdf'
121-
contenttype = mimetypes.guess_type(filename)[0] or \
122-
'application/octet-stream'
123-
buf.write('--%s\r\n' % boundary)
124-
buf.write('Content-Disposition: form-data; \
125-
name="%s"; filename="%s"\r\n' % (key, filename))
126-
buf.write('Content-Type: %s\r\n' % contenttype)
127-
# buffer += 'Content-Length: %s\r\n' % file_size
128-
fd.seek(0)
129-
buf.write('\r\n' + fd.read() + '\r\n')
130-
buf.write('--' + boundary + '--\r\n\r\n')
131-
buf = buf.getvalue()
132-
return boundary, buf
133-
multipart_encode = Callable(multipart_encode)
101+
def multipart_encode(self, v_vars, files, boundary=None, buf=None):
102+
if six.PY3:
103+
if boundary is None:
104+
boundary = choose_boundary()
105+
if buf is None:
106+
buf = io.BytesIO()
107+
for(key, value) in v_vars:
108+
buf.write(b'--' + boundary.encode("utf-8") + b'\r\n')
109+
buf.write(
110+
b'Content-Disposition: form-data; name="' +
111+
key.encode("utf-8") +
112+
b'"'
113+
)
114+
buf.write(b'\r\n\r\n' + value.encode("utf-8") + b'\r\n')
115+
for(key, fd) in files:
116+
try:
117+
filename = fd.name.split('/')[-1]
118+
except AttributeError:
119+
# Spoof a file name if the object doesn't have one.
120+
# This is designed to catch when the user submits
121+
# a StringIO object
122+
filename = 'temp.pdf'
123+
contenttype = mimetypes.guess_type(filename)[0] or \
124+
b'application/octet-stream'
125+
buf.write(b'--' + boundary.encode("utf-8") + b'\r\n')
126+
buf.write(
127+
b'Content-Disposition: form-data; ' +
128+
b'name="' + key.encode("utf-8") + b'"; ' +
129+
b'filename="' + filename.encode("utf-8") + b'"\r\n'
130+
)
131+
buf.write(
132+
b'Content-Type: ' +
133+
contenttype.encode("utf-8") +
134+
b'\r\n'
135+
)
136+
fd.seek(0)
137+
buf.write(
138+
b'\r\n' + fd.read() + b'\r\n'
139+
)
140+
buf.write(b'--')
141+
buf.write(boundary.encode("utf-8"))
142+
buf.write(b'--\r\n\r\n')
143+
buf = buf.getvalue()
144+
return boundary, buf
145+
else:
146+
if boundary is None:
147+
boundary = choose_boundary()
148+
if buf is None:
149+
buf = io.StringIO()
150+
for(key, value) in v_vars:
151+
buf.write('--%s\r\n' % boundary)
152+
buf.write('Content-Disposition: form-data; name="%s"' % key)
153+
buf.write('\r\n\r\n' + value + '\r\n')
154+
for(key, fd) in files:
155+
try:
156+
filename = fd.name.split('/')[-1]
157+
except AttributeError:
158+
# Spoof a file name if the object doesn't have one.
159+
# This is designed to catch when the user submits
160+
# a StringIO object
161+
filename = 'temp.pdf'
162+
contenttype = mimetypes.guess_type(filename)[0] or \
163+
'application/octet-stream'
164+
buf.write('--%s\r\n' % boundary)
165+
buf.write('Content-Disposition: form-data; \
166+
name="%s"; filename="%s"\r\n' % (key, filename))
167+
buf.write('Content-Type: %s\r\n' % contenttype)
168+
# buffer += 'Content-Length: %s\r\n' % file_size
169+
fd.seek(0)
170+
buf.write('\r\n' + fd.read() + '\r\n')
171+
buf.write('--' + boundary + '--\r\n\r\n')
172+
buf = buf.getvalue()
173+
return boundary, buf
134174
https_request = http_request
135175

136176

documentcloud/__init__.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
from dateutil.parser import parse as dateparser
2626
from .MultipartPostHandler import MultipartPostHandler
2727
if six.PY3:
28-
import urllib
28+
import urllib.parse
29+
import urllib.error
30+
import urllib.request
2931
else:
3032
from six.moves import urllib
3133
try:
@@ -48,7 +50,7 @@ def __init__(self, username, password):
4850
self.username = username
4951
self.password = password
5052

51-
@retry(Exception, tries=4)
53+
@retry(Exception, tries=3)
5254
def _make_request(self, url, params=None, opener=None):
5355
"""
5456
Configure a HTTP request, fire it off and return the response.
@@ -60,8 +62,8 @@ def _make_request(self, url, params=None, opener=None):
6062
if self.username and self.password:
6163
credentials = '%s:%s' % (self.username, self.password)
6264
encoded_credentials = base64.encodestring(
63-
credentials
64-
).replace("\n", "")
65+
credentials.encode("utf-8")
66+
).decode("utf-8").replace("\n", "")
6567
header = 'Basic %s' % encoded_credentials
6668
request.add_header('Authorization', header)
6769
# If the request provides a custom opener, like the upload request,
@@ -124,10 +126,11 @@ def put(self, method, params):
124126
else:
125127
# Otherwise, we can just use the vanilla urllib prep method
126128
params = urllib.parse.urlencode(params, doseq=True)
129+
127130
# Make the request
128131
self._make_request(
129132
self.BASE_URI + method,
130-
params,
133+
params.encode("utf-8"),
131134
)
132135

133136
def fetch(self, method, params=None):
@@ -136,13 +139,13 @@ def fetch(self, method, params=None):
136139
"""
137140
# Encode params if they exist
138141
if params:
139-
params = urllib.parse.urlencode(params, doseq=True)
142+
params = urllib.parse.urlencode(params, doseq=True).encode("utf-8")
140143
content = self._make_request(
141144
self.BASE_URI + method,
142145
params,
143146
)
144147
# Convert its JSON to a Python dictionary and return
145-
return json.loads(content)
148+
return json.loads(content.decode("utf-8"))
146149

147150

148151
class DocumentCloud(BaseDocumentCloudClient):
@@ -286,7 +289,7 @@ def upload(
286289
params,
287290
MultipartPostHandler
288291
)
289-
return self.get(json.loads(response)['id'])
292+
return self.get(json.loads(response.decode("utf-8"))['id'])
290293

291294
@credentials_required
292295
def upload_directory(
@@ -441,8 +444,11 @@ def create(self, title, description=None, document_ids=None):
441444
params += "".join([
442445
'&document_ids[]=%s' % id for id in document_ids
443446
])
444-
response = self._make_request(self.BASE_URI + "projects.json", params)
445-
new_id = json.loads(response)['project']['id']
447+
response = self._make_request(
448+
self.BASE_URI + "projects.json",
449+
params.encode("utf-8")
450+
)
451+
new_id = json.loads(response.decode("utf-8"))['project']['id']
446452
# If it doesn't exist, that suggests the project already exists
447453
if not new_id:
448454
raise DuplicateObjectError("The Project title you tried to create \

documentcloud/toolbox.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def _checkcredentials(self, *args, **kwargs):
5959
return wraps(method_func)(_checkcredentials)
6060

6161

62-
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2):
62+
def retry(ExceptionToCheck, tries=3, delay=2, backoff=2):
6363
"""
6464
Retry decorator published by Saltry Crane.
6565

test.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
"""
99
import os
1010
import sys
11+
import six
1112
import random
1213
import string
1314
import textwrap
@@ -32,7 +33,11 @@ def get_random_string(length=6):
3233
"""
3334
Generate a random string of letters and numbers
3435
"""
35-
return unicode(''.join(random.choice(string.letters + string.digits) for i in xrange(length)))
36+
return six.u(''.join(
37+
random.choice(string.ascii_letters + string.digits)
38+
for i in range(length)
39+
))
40+
3641

3742
PANGRAMS = {
3843
'en': 'The quick brown fox jumps over the lazy dog.',
@@ -80,7 +85,7 @@ def get_editable_document(self, version):
8085
"2.5": "351008-lbex-docid-3383445",
8186
"2.6": "15144-mitchrpt",
8287
"2.7": "351151-lbex-docid-130036",
83-
"3.3": "50986-lbhi_sec07940_755445",
88+
"3.3": "351029-lbex-docid-149714",
8489
}
8590
return version2slug[str(version)]
8691

@@ -283,14 +288,14 @@ def test_get_put(self):
283288
source = get_random_string()
284289
description = get_random_string()
285290
data = {get_random_string(): get_random_string()}
286-
if obj.resources.related_article == u'http://documents.latimes.com':
287-
related_article = u'http://documentcloud.org'
291+
if obj.resources.related_article == 'http://documents.latimes.com':
292+
related_article = 'http://documentcloud.org'
288293
else:
289-
related_article = u'http://documents.latimes.com'
290-
if obj.resources.published_url == u'http://documents.latimes.com':
291-
published_url = u'http://documentcloud.org'
294+
related_article = 'http://documents.latimes.com'
295+
if obj.resources.published_url == 'http://documents.latimes.com':
296+
published_url = 'http://documentcloud.org'
292297
else:
293-
published_url = u'http://documents.latimes.com'
298+
published_url = 'http://documents.latimes.com'
294299
# Set the random strings our local object's attributes
295300
obj.title = title
296301
obj.source = source
@@ -438,7 +443,10 @@ def test_virtual_file_upload_and_delete(self):
438443
"""
439444
path = os.path.join(os.path.dirname(__file__), "español.pdf")
440445
real_file = open(path, 'rb')
441-
virtual_file = io.StringIO(real_file.read())
446+
if six.PY3:
447+
virtual_file = io.BytesIO(real_file.read())
448+
else:
449+
virtual_file = io.StringIO(real_file.read())
442450
obj = self.private_client.documents.upload(virtual_file, title='Espanola!')
443451
self.assertEqual(type(obj), Document)
444452
# Delete it
@@ -535,7 +543,7 @@ def test_get_document(self):
535543
Verify that a project can pull a particular document by id
536544
"""
537545
obj = self.private_client.projects.get('934')
538-
doc = obj.get_document(u'25798-pr-01092011-loughner')
546+
doc = obj.get_document('25798-pr-01092011-loughner')
539547
self.assertEqual(type(doc), Document)
540548

541549
def test_put(self):
@@ -545,8 +553,8 @@ def test_put(self):
545553
# Pull the object we'll deface
546554
obj = self.private_client.projects.get(self.editable_project)
547555
# Create random strings we will save to the editable attributes
548-
title = u'The Klee Report (%s)' % get_random_string()
549-
description = textwrap.dedent(u"""
556+
title = 'The Klee Report (%s)' % get_random_string()
557+
description = textwrap.dedent("""
550558
An independent probe into Sam Zell\'s purchase of Tribune Company by
551559
investigator Kenneth Klee. Released at the end of July 2010. (%s)
552560
""")
@@ -565,8 +573,8 @@ def test_put(self):
565573
self.assertEqual(len(obj.document_list), 0)
566574
# Now add all the documents back in
567575
proj_ids = [
568-
u'12667-the-klee-report-volume-2',
569-
u'12666-the-klee-report-volume-1'
576+
'12667-the-klee-report-volume-2',
577+
'12666-the-klee-report-volume-1'
570578
]
571579
for id in proj_ids:
572580
doc = self.private_client.documents.get(id)
@@ -660,7 +668,7 @@ def test_duplicate_object(self):
660668
Make sure DuplicateObjectError works.
661669
"""
662670
obj = self.private_client.projects.get("703")
663-
doc = self.private_client.documents.get(u'12666-the-klee-report-volume-1')
671+
doc = self.private_client.documents.get('12666-the-klee-report-volume-1')
664672
self.assertRaises(DuplicateObjectError, obj.document_list.append, doc)
665673

666674

tox.ini

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[tox]
2-
envlist=py25,py26,py27,py32,py33
2+
envlist=py33
33

44
[testenv]
55
deps=
66
python-dateutil
77
simplejson
88
six
9-
commands=python quicktest.py documentcloud
9+
commands=python test.py

0 commit comments

Comments
 (0)