Skip to content
This repository was archived by the owner on Jun 3, 2021. It is now read-only.

Commit e65c722

Browse files
committed
update readme file.
1 parent b987f77 commit e65c722

1 file changed

Lines changed: 98 additions & 0 deletions

File tree

README.md

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,101 @@
88
```bash
99
pip install git+git://github.com/SelfHacked/python-stream.git#egg=python-stream
1010
```
11+
12+
13+
## Usage
14+
15+
### Read file from S3
16+
17+
```python
18+
from stream.io.s3 import S3ReadFile
19+
from stream.functions.bytes import un_gzip
20+
21+
# Read line by line
22+
with S3ReadFile(bucket='bucket', key='path/to/file.txt.gz', lines=True) as s3_file:
23+
for line in un_gzip(s3_file):
24+
print(line)
25+
26+
# Read in chunks
27+
with S3ReadFile(bucket='bucket', key='path/to/file.txt.gz', lines=False) as s3_file:
28+
while chunk := s3_file.read(10):
29+
print(chunk)
30+
```
31+
32+
### Upload data to S3
33+
34+
S3WriteFile only uses binary data. Text data should be converted via an encoder before passing to S3WriteFile write methods.
35+
36+
```python
37+
from stream.io.s3 import S3ReadFile
38+
from stream.io.s3 import S3WriteFile
39+
40+
# write data to s3 file.
41+
with S3WriteFile(bucket='bucket', key='path/to/file.txt.gz') as s3_file:
42+
s3_file.write(b'test\n')
43+
s3_file.writelines([b'line 1\n', b'line 2\n'])
44+
45+
# read from the s3 file.
46+
with S3ReadFile(bucket='bucket', key='path/to/file.txt.gz', lines=True) as s3_file:
47+
for line in s3_file:
48+
print(line)
49+
```
50+
51+
### Encoding and Decoding
52+
53+
```python
54+
from stream.functions.bytes import encode, decode
55+
56+
text = [f'line {i}' for i in range(0, 10)]
57+
58+
print('Encoded')
59+
encoded = encode(text)
60+
for line in encoded:
61+
print(f' {line}')
62+
63+
print('\nDecoded')
64+
decoded = decode(encode(text))
65+
for line in decoded:
66+
print(f' {line}')
67+
```
68+
69+
70+
### Example pipeline
71+
72+
```python
73+
import gzip
74+
import csv
75+
from stream.io.s3 import S3ReadFile, S3WriteFile
76+
from stream.functions.bytes import encode, decode
77+
78+
# Read from s3 file
79+
with S3ReadFile('dev-varuna', 'prs/trait1/ss1/data_head.tsv.gz', lines=False) as s3_file:
80+
# uncompress
81+
gzip_in = gzip.open(s3_file, 'rb')
82+
83+
# decode
84+
decode_in = decode(gzip_in)
85+
86+
# parse the csv row
87+
csv_in = csv.reader(decode_in, delimiter='\t')
88+
89+
# process step (process step should be in iterable format)
90+
process = ((line[14], line[12], line[13]) for line in csv_in)
91+
92+
# create tsv lines using a generator
93+
csv_out = ('\t'.join(line) for line in process)
94+
95+
# add new line
96+
text_out = (f'{line}\n' for line in csv_out)
97+
98+
# encode text lines into bytes
99+
encode_out = encode(text_out)
100+
101+
# Write to S3
102+
with S3WriteFile(
103+
'dev-varuna', 'prs/trait1/ss1/snps_custom.tsv.gz',
104+
) as s3_writer, gzip.open(
105+
s3_writer, 'wb',
106+
) as gzip_out:
107+
gzip_out.writelines(encode_out)
108+
```

0 commit comments

Comments
 (0)