-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstream.go
More file actions
67 lines (57 loc) · 2.13 KB
/
stream.go
File metadata and controls
67 lines (57 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
package fileprep
import (
"bytes"
"io"
"github.com/nao1215/fileparser"
)
// Stream represents a preprocessed data stream with format information.
// It implements io.Reader and provides metadata about the file format.
type Stream interface {
io.Reader
// Format returns the actual output format of the stream data.
// For CSV/TSV/LTSV input, this matches the input format.
// For JSON/JSONL input, this returns JSONL since the output is JSONL-formatted.
// For XLSX/Parquet input, this returns CSV since the output is CSV-formatted.
Format() fileparser.FileType
// OriginalFormat returns the original input file type including compression
OriginalFormat() fileparser.FileType
}
// stream implements the Stream interface
type stream struct {
reader *bytes.Reader
format fileparser.FileType
originalFormat fileparser.FileType
}
// newStream creates a new Stream from data and format information.
// outputFormat is the actual format of the data in the stream.
// originalFormat is the format of the input file.
func newStream(data []byte, outputFormat fileparser.FileType, originalFormat fileparser.FileType) *stream {
return &stream{
reader: bytes.NewReader(data),
format: outputFormat,
originalFormat: originalFormat,
}
}
// Read implements io.Reader
func (s *stream) Read(p []byte) (n int, err error) {
return s.reader.Read(p)
}
// Format returns the actual output format of the stream data.
// For CSV/TSV/LTSV input, this matches the input format.
// For JSON/JSONL input, this returns JSONL since the output is JSONL-formatted.
// For XLSX/Parquet input, this returns CSV since the output is CSV-formatted.
func (s *stream) Format() fileparser.FileType {
return s.format
}
// OriginalFormat returns the original file type including compression info
func (s *stream) OriginalFormat() fileparser.FileType {
return s.originalFormat
}
// Seek implements io.Seeker for rewinding the stream
func (s *stream) Seek(offset int64, whence int) (int64, error) {
return s.reader.Seek(offset, whence)
}
// Len returns the number of bytes of the unread portion of the stream
func (s *stream) Len() int {
return s.reader.Len()
}