Skip to content
This repository was archived by the owner on Jun 3, 2021. It is now read-only.

Commit ff41594

Browse files
Merge pull request #8 from SelfHacked/http
Http
2 parents 44b0345 + f8f569d commit ff41594

5 files changed

Lines changed: 149 additions & 4 deletions

File tree

.coveragerc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ omit =
55
stream/io/s3.py
66
# ftp
77
stream/io/ftp.py
8+
# http
9+
stream/io/http.py

setup.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
extra_cached_property = [
44
'cached-property',
55
]
6+
extra_http = [
7+
'requests',
8+
]
69
extra_s3 = [
710
'boto3>=1.9',
811
'botocore',
@@ -12,10 +15,12 @@
1215
]
1316
extra_bin = [
1417
*extra_cached_property,
18+
*extra_http,
1519
*extra_s3,
1620
]
1721
extra_all = [
1822
*extra_cached_property,
23+
*extra_http,
1924
*extra_s3,
2025
*extra_sql,
2126
]
@@ -77,6 +82,8 @@
7782
's3-copy=stream.io.s3:copy_cmd',
7883
'ftp-download=stream.io.ftp:download_cmd',
7984
'ftp-get=stream.io.ftp:get_cmd',
85+
'http-download=stream.io.http:download_cmd',
86+
'http-get=stream.io.http:get_cmd',
8087
],
8188
},
8289
)

stream/functions/bytes.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
decode: _ApplyEach[bytes, str] = _ApplyEach(bytes.decode, encoding='utf-8')
1313

1414

15-
def un_gzip(iterable: _typing.Iterable[bytes]) -> _typing.Iterator[str]:
15+
def un_gzip(iterable: _typing.Iterable[bytes]) -> _typing.Iterator[bytes]:
1616
"""
17-
Unzip a gzip byte stream into str, and split by lines.
17+
Unzip a gzip byte stream, and split by lines.
1818
"""
1919
readable = _BytesIO(iterable)
20-
with _gzip.open(readable) as f:
20+
with _gzip.open(readable, mode='rb') as f:
2121
yield from f

stream/io/ftp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def __init__(
135135
*,
136136
threaded: bool = False,
137137
tmpfile: bool = False,
138-
blocksize=8192,
138+
blocksize: int = 8192,
139139
rest=None,
140140
**kwargs,
141141
):

stream/io/http.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import io as _io
2+
import typing as _typing
3+
4+
import requests as _requests
5+
from gimme_cached_property import cached_property
6+
7+
from stream.io import (
8+
BinaryFile as _BinaryFile,
9+
)
10+
11+
12+
class BaseHttpFile(_BinaryFile):
13+
def __init__(self, url: str):
14+
self.__url = url
15+
16+
@property
17+
def url(self) -> str:
18+
return self.__url
19+
20+
def __eq__(self, other):
21+
if not isinstance(other, BaseHttpFile):
22+
return False
23+
if self.url != other.url:
24+
return False
25+
return True
26+
27+
# --- os ---
28+
29+
@property
30+
def name(self) -> str:
31+
return self.url
32+
33+
def fileno(self) -> int:
34+
raise self.NotSupported
35+
36+
@property
37+
def isatty(self) -> bool:
38+
return False
39+
40+
# --- seek ---
41+
42+
def seekable(self) -> bool:
43+
return False
44+
45+
def tell(self) -> int:
46+
raise self.NotSupported
47+
48+
def seek(self, offset: int, whence: int = _io.SEEK_SET) -> int:
49+
raise self.NotSupported
50+
51+
def truncate(self, size: _typing.Optional[int] = None) -> int:
52+
raise self.NotSupported
53+
54+
55+
class HttpDownloadFile(BaseHttpFile):
56+
def __init__(
57+
self,
58+
url: str,
59+
*,
60+
chuck_size: int = 8192,
61+
):
62+
super().__init__(url)
63+
self.__chuck_size = chuck_size
64+
65+
self.__request = _requests.get(self.url, stream=True)
66+
67+
@property
68+
def _chuck_size(self) -> int:
69+
return self.__chuck_size
70+
71+
@property
72+
def _request(self):
73+
return self.__request
74+
75+
# --- os ---
76+
77+
def __enter__(self):
78+
self.__request.__enter__()
79+
return self
80+
81+
def close(self) -> None:
82+
self.__request.close()
83+
84+
# --- read ---
85+
86+
def readable(self) -> bool:
87+
return True
88+
89+
@cached_property
90+
def _iter(self) -> _typing.Iterator[int]:
91+
for chunk in self._request.iter_content(chunk_size=self._chuck_size):
92+
if not chunk:
93+
continue # filter out keep-alive new chunks
94+
yield from chunk
95+
96+
def _read_character(self) -> int:
97+
try:
98+
return next(self._iter)
99+
except StopIteration:
100+
raise self.EOF from None
101+
102+
# --- not writable ---
103+
104+
def writable(self) -> bool:
105+
return False
106+
107+
def write(self, s: bytes) -> None:
108+
raise self.NotSupported
109+
110+
def flush(self) -> None:
111+
raise self.NotSupported
112+
113+
114+
def download_cmd():
115+
import argparse
116+
from stream.io.local import LocalFile
117+
parser = argparse.ArgumentParser()
118+
parser.add_argument('url', type=str)
119+
parser.add_argument('file', type=str)
120+
args = parser.parse_args()
121+
122+
with HttpDownloadFile(args.url) as f1:
123+
with LocalFile(args.file, 'wb') as f2:
124+
f1.copy_to(f2)
125+
126+
127+
def get_cmd():
128+
import argparse
129+
from stream.io.std import StdOut
130+
parser = argparse.ArgumentParser()
131+
parser.add_argument('url', type=str)
132+
args = parser.parse_args()
133+
134+
with HttpDownloadFile(args.url) as f:
135+
with StdOut() as stdout:
136+
f.copy_to(stdout.buffer)

0 commit comments

Comments
 (0)