Skip to content

Commit 2b0c0bf

Browse files
authored
Support arrow struct (#739)
1 parent 5e9f8c2 commit 2b0c0bf

36 files changed

Lines changed: 3587 additions & 19 deletions

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,5 @@ cpp/third_party/zlib-1.3.1
4343
.vscode/
4444

4545
build/*
46+
cpp/third_party/zlib-1.3.1/treebuild.xml
47+
cpp/third_party/zlib-1.3.1/zlib-1.3.1/treebuild.xml

cpp/src/common/container/bit_map.h

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ class BitMap {
5555
*start_addr = (*start_addr) & (~bit_mask);
5656
}
5757

58+
FORCE_INLINE void clear_all() { memset(bitmap_, 0x00, size_); }
59+
5860
FORCE_INLINE bool test(uint32_t index) {
5961
uint32_t offset = index >> 3;
6062
ASSERT(offset < size_);
@@ -64,9 +66,46 @@ class BitMap {
6466
return (*start_addr & bit_mask);
6567
}
6668

69+
// Count the number of bits set to 1 (i.e., number of null entries).
70+
// __builtin_popcount is supported by GCC, Clang, and MinGW on Windows.
71+
// TODO: add MSVC support if needed (e.g. __popcnt or manual bit count).
72+
FORCE_INLINE uint32_t count_set_bits() const {
73+
uint32_t count = 0;
74+
const uint8_t* p = reinterpret_cast<const uint8_t*>(bitmap_);
75+
for (uint32_t i = 0; i < size_; i++) {
76+
count += __builtin_popcount(p[i]);
77+
}
78+
return count;
79+
}
80+
81+
// Find the next set bit (null position) at or after @from,
82+
// within [0, total_bits). Returns total_bits if none found.
83+
// Skips zero bytes in bulk so cost is proportional to the number
84+
// of null bytes, not total rows.
85+
FORCE_INLINE uint32_t next_set_bit(uint32_t from,
86+
uint32_t total_bits) const {
87+
if (from >= total_bits) return total_bits;
88+
const uint8_t* p = reinterpret_cast<const uint8_t*>(bitmap_);
89+
uint32_t byte_idx = from >> 3;
90+
// Check remaining bits in the first (partial) byte
91+
uint8_t byte_val = p[byte_idx] >> (from & 7);
92+
if (byte_val) {
93+
return from + __builtin_ctz(byte_val);
94+
}
95+
// Scan subsequent full bytes, skipping zeros
96+
const uint32_t byte_end = (total_bits + 7) >> 3;
97+
for (++byte_idx; byte_idx < byte_end; ++byte_idx) {
98+
if (p[byte_idx]) {
99+
uint32_t pos = (byte_idx << 3) + __builtin_ctz(p[byte_idx]);
100+
return pos < total_bits ? pos : total_bits;
101+
}
102+
}
103+
return total_bits;
104+
}
105+
67106
FORCE_INLINE uint32_t get_size() { return size_; }
68107

69-
FORCE_INLINE char* get_bitmap() { return bitmap_; } // for debug
108+
FORCE_INLINE char* get_bitmap() { return bitmap_; }
70109

71110
private:
72111
FORCE_INLINE uint8_t get_bit_mask(uint32_t index) {

cpp/src/common/container/byte_buffer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ class ByteBuffer {
118118

119119
FORCE_INLINE char* get_data() { return data_; }
120120

121+
FORCE_INLINE uint32_t get_data_size() const { return real_data_size_; }
122+
121123
private:
122124
char* data_;
123125
uint8_t variable_type_len_;

cpp/src/common/tablet.cc

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,80 @@ int Tablet::add_timestamp(uint32_t row_index, int64_t timestamp) {
163163
return E_OK;
164164
}
165165

166+
int Tablet::set_timestamps(const int64_t* timestamps, uint32_t count) {
167+
if (err_code_ != E_OK) {
168+
return err_code_;
169+
}
170+
ASSERT(timestamps_ != NULL);
171+
if (UNLIKELY(count > static_cast<uint32_t>(max_row_num_))) {
172+
return E_OUT_OF_RANGE;
173+
}
174+
std::memcpy(timestamps_, timestamps, count * sizeof(int64_t));
175+
cur_row_size_ = std::max(count, cur_row_size_);
176+
return E_OK;
177+
}
178+
179+
int Tablet::set_column_values(uint32_t schema_index, const void* data,
180+
const uint8_t* bitmap, uint32_t count) {
181+
if (err_code_ != E_OK) {
182+
return err_code_;
183+
}
184+
if (UNLIKELY(schema_index >= schema_vec_->size())) {
185+
return E_OUT_OF_RANGE;
186+
}
187+
if (UNLIKELY(count > static_cast<uint32_t>(max_row_num_))) {
188+
return E_OUT_OF_RANGE;
189+
}
190+
191+
const MeasurementSchema& schema = schema_vec_->at(schema_index);
192+
size_t elem_size = 0;
193+
void* dst = nullptr;
194+
switch (schema.data_type_) {
195+
case BOOLEAN:
196+
elem_size = sizeof(bool);
197+
dst = value_matrix_[schema_index].bool_data;
198+
break;
199+
case DATE:
200+
case INT32:
201+
elem_size = sizeof(int32_t);
202+
dst = value_matrix_[schema_index].int32_data;
203+
break;
204+
case TIMESTAMP:
205+
case INT64:
206+
elem_size = sizeof(int64_t);
207+
dst = value_matrix_[schema_index].int64_data;
208+
break;
209+
case FLOAT:
210+
elem_size = sizeof(float);
211+
dst = value_matrix_[schema_index].float_data;
212+
break;
213+
case DOUBLE:
214+
elem_size = sizeof(double);
215+
dst = value_matrix_[schema_index].double_data;
216+
break;
217+
default:
218+
return E_TYPE_NOT_SUPPORTED;
219+
}
220+
221+
if (bitmap == nullptr) {
222+
// All valid: bulk copy + mark all as non-null
223+
std::memcpy(dst, data, count * elem_size);
224+
bitmaps_[schema_index].clear_all();
225+
} else {
226+
// Bulk copy all data (null positions will have garbage but won't be
227+
// read).
228+
std::memcpy(dst, data, count * elem_size);
229+
230+
// bitmap uses TsFile convention (1=null, 0=valid), same as
231+
// internal BitMap, so copy directly.
232+
char* tsfile_bm = bitmaps_[schema_index].get_bitmap();
233+
uint32_t bm_bytes = (count + 7) / 8;
234+
std::memcpy(tsfile_bm, bitmap, bm_bytes);
235+
}
236+
cur_row_size_ = std::max(count, cur_row_size_);
237+
return E_OK;
238+
}
239+
166240
void* Tablet::get_value(int row_index, uint32_t schema_index,
167241
common::TSDataType& data_type) const {
168242
if (UNLIKELY(schema_index >= schema_vec_->size())) {

cpp/src/common/tablet.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,25 @@ class Tablet {
181181
*/
182182
int add_timestamp(uint32_t row_index, int64_t timestamp);
183183

184+
/**
185+
* @brief Bulk copy timestamps into the tablet.
186+
*
187+
* @param timestamps Pointer to an array of timestamp values.
188+
* @param count Number of timestamps to copy. Must be <= max_row_num.
189+
* If count > cur_row_size_, cur_row_size_ is updated to count,
190+
* so that subsequent operations know how many rows are populated.
191+
* @return Returns 0 on success, or a non-zero error code on failure
192+
* (E_OUT_OF_RANGE if count > max_row_num).
193+
*/
194+
int set_timestamps(const int64_t* timestamps, uint32_t count);
195+
196+
// Bulk copy fixed-length column data. If bitmap is nullptr, all rows are
197+
// non-null. Otherwise bit=1 means null, bit=0 means valid (same as TsFile
198+
// BitMap convention). Callers using other conventions (e.g. Arrow, where
199+
// 1=valid) must invert before calling.
200+
int set_column_values(uint32_t schema_index, const void* data,
201+
const uint8_t* bitmap, uint32_t count);
202+
184203
void* get_value(int row_index, uint32_t schema_index,
185204
common::TSDataType& data_type) const;
186205
/**

cpp/src/common/tsblock/vector/vector.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ class Vector {
7878

7979
FORCE_INLINE bool has_null() { return has_null_; }
8080

81+
FORCE_INLINE common::BitMap& get_bitmap() { return nulls_; }
82+
83+
FORCE_INLINE common::ByteBuffer& get_value_data() { return values_; }
84+
8185
// We want derived class to have access to base class members, so it is
8286
// defined as protected
8387
protected:

cpp/src/cwrapper/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ under the License.
1818
]]
1919
message("Running in cwrapper directory")
2020
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
21-
set(CWRAPPER_SRC_LIST tsfile_cwrapper.cc)
21+
set(CWRAPPER_SRC_LIST tsfile_cwrapper.cc arrow_c.cc)
2222
add_library(cwrapper_obj OBJECT ${CWRAPPER_SRC_LIST})
2323

2424
# install header files

0 commit comments

Comments
 (0)