Merge pull request #8 from SelfHacked/http

MichaelKim0407 · web-flow · commit ff41594e5268 · 2019-04-30T16:48:48.000-07:00
Http
diff --git a/.coveragerc b/.coveragerc
@@ -5,3 +5,5 @@ omit =
     stream/io/s3.py
     # ftp
     stream/io/ftp.py
+    # http
+    stream/io/http.py
diff --git a/setup.py b/setup.py
@@ -3,6 +3,9 @@
 extra_cached_property = [
     'cached-property',
 ]
+extra_http = [
+    'requests',
+]
 extra_s3 = [
     'boto3>=1.9',
     'botocore',
@@ -12,10 +15,12 @@
 ]
 extra_bin = [
     *extra_cached_property,
+    *extra_http,
     *extra_s3,
 ]
 extra_all = [
     *extra_cached_property,
+    *extra_http,
     *extra_s3,
     *extra_sql,
 ]
@@ -77,6 +82,8 @@
             's3-copy=stream.io.s3:copy_cmd',
             'ftp-download=stream.io.ftp:download_cmd',
             'ftp-get=stream.io.ftp:get_cmd',
+            'http-download=stream.io.http:download_cmd',
+            'http-get=stream.io.http:get_cmd',
         ],
     },
 )
diff --git a/stream/functions/bytes.py b/stream/functions/bytes.py
@@ -12,10 +12,10 @@
 decode: _ApplyEach[bytes, str] = _ApplyEach(bytes.decode, encoding='utf-8')
 
 
-def un_gzip(iterable: _typing.Iterable[bytes]) -> _typing.Iterator[str]:
+def un_gzip(iterable: _typing.Iterable[bytes]) -> _typing.Iterator[bytes]:
     """
-    Unzip a gzip byte stream into str, and split by lines.
+    Unzip a gzip byte stream, and split by lines.
     """
     readable = _BytesIO(iterable)
-    with _gzip.open(readable) as f:
+    with _gzip.open(readable, mode='rb') as f:
         yield from f
diff --git a/stream/io/ftp.py b/stream/io/ftp.py
@@ -135,7 +135,7 @@ def __init__(
             *,
             threaded: bool = False,
             tmpfile: bool = False,
-            blocksize=8192,
+            blocksize: int = 8192,
             rest=None,
             **kwargs,
     ):
diff --git a/stream/io/http.py b/stream/io/http.py
@@ -0,0 +1,136 @@
+import io as _io
+import typing as _typing
+
+import requests as _requests
+from gimme_cached_property import cached_property
+
+from stream.io import (
+    BinaryFile as _BinaryFile,
+)
+
+
+class BaseHttpFile(_BinaryFile):
+    def __init__(self, url: str):
+        self.__url = url
+
+    @property
+    def url(self) -> str:
+        return self.__url
+
+    def __eq__(self, other):
+        if not isinstance(other, BaseHttpFile):
+            return False
+        if self.url != other.url:
+            return False
+        return True
+
+    # --- os ---
+
+    @property
+    def name(self) -> str:
+        return self.url
+
+    def fileno(self) -> int:
+        raise self.NotSupported
+
+    @property
+    def isatty(self) -> bool:
+        return False
+
+    # --- seek ---
+
+    def seekable(self) -> bool:
+        return False
+
+    def tell(self) -> int:
+        raise self.NotSupported
+
+    def seek(self, offset: int, whence: int = _io.SEEK_SET) -> int:
+        raise self.NotSupported
+
+    def truncate(self, size: _typing.Optional[int] = None) -> int:
+        raise self.NotSupported
+
+
+class HttpDownloadFile(BaseHttpFile):
+    def __init__(
+            self,
+            url: str,
+            *,
+            chuck_size: int = 8192,
+    ):
+        super().__init__(url)
+        self.__chuck_size = chuck_size
+
+        self.__request = _requests.get(self.url, stream=True)
+
+    @property
+    def _chuck_size(self) -> int:
+        return self.__chuck_size
+
+    @property
+    def _request(self):
+        return self.__request
+
+    # --- os ---
+
+    def __enter__(self):
+        self.__request.__enter__()
+        return self
+
+    def close(self) -> None:
+        self.__request.close()
+
+    # --- read ---
+
+    def readable(self) -> bool:
+        return True
+
+    @cached_property
+    def _iter(self) -> _typing.Iterator[int]:
+        for chunk in self._request.iter_content(chunk_size=self._chuck_size):
+            if not chunk:
+                continue  # filter out keep-alive new chunks
+            yield from chunk
+
+    def _read_character(self) -> int:
+        try:
+            return next(self._iter)
+        except StopIteration:
+            raise self.EOF from None
+
+    # --- not writable ---
+
+    def writable(self) -> bool:
+        return False
+
+    def write(self, s: bytes) -> None:
+        raise self.NotSupported
+
+    def flush(self) -> None:
+        raise self.NotSupported
+
+
+def download_cmd():
+    import argparse
+    from stream.io.local import LocalFile
+    parser = argparse.ArgumentParser()
+    parser.add_argument('url', type=str)
+    parser.add_argument('file', type=str)
+    args = parser.parse_args()
+
+    with HttpDownloadFile(args.url) as f1:
+        with LocalFile(args.file, 'wb') as f2:
+            f1.copy_to(f2)
+
+
+def get_cmd():
+    import argparse
+    from stream.io.std import StdOut
+    parser = argparse.ArgumentParser()
+    parser.add_argument('url', type=str)
+    args = parser.parse_args()
+
+    with HttpDownloadFile(args.url) as f:
+        with StdOut() as stdout:
+            f.copy_to(stdout.buffer)