Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,5 @@ cpp/third_party/zlib-1.2.13/zconf.h
.vscode/

build/*
cpp/third_party/zlib-1.3.1/treebuild.xml
cpp/third_party/zlib-1.3.1/zlib-1.3.1/treebuild.xml
41 changes: 40 additions & 1 deletion cpp/src/common/container/bit_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class BitMap {
*start_addr = (*start_addr) & (~bit_mask);
}

FORCE_INLINE void clear_all() { memset(bitmap_, 0x00, size_); }

FORCE_INLINE bool test(uint32_t index) {
uint32_t offset = index >> 3;
ASSERT(offset < size_);
Expand All @@ -64,9 +66,46 @@ class BitMap {
return (*start_addr & bit_mask);
}

// Count the number of bits set to 1 (i.e., number of null entries).
// __builtin_popcount is supported by GCC, Clang, and MinGW on Windows.
// TODO: add MSVC support if needed (e.g. __popcnt or manual bit count).
FORCE_INLINE uint32_t count_set_bits() const {
uint32_t count = 0;
const uint8_t* p = reinterpret_cast<const uint8_t*>(bitmap_);
for (uint32_t i = 0; i < size_; i++) {
count += __builtin_popcount(p[i]);
}
return count;
}

// Find the next set bit (null position) at or after @from,
// within [0, total_bits). Returns total_bits if none found.
// Skips zero bytes in bulk so cost is proportional to the number
// of null bytes, not total rows.
FORCE_INLINE uint32_t next_set_bit(uint32_t from,
uint32_t total_bits) const {
if (from >= total_bits) return total_bits;
const uint8_t* p = reinterpret_cast<const uint8_t*>(bitmap_);
uint32_t byte_idx = from >> 3;
// Check remaining bits in the first (partial) byte
uint8_t byte_val = p[byte_idx] >> (from & 7);
if (byte_val) {
return from + __builtin_ctz(byte_val);
}
// Scan subsequent full bytes, skipping zeros
const uint32_t byte_end = (total_bits + 7) >> 3;
for (++byte_idx; byte_idx < byte_end; ++byte_idx) {
if (p[byte_idx]) {
uint32_t pos = (byte_idx << 3) + __builtin_ctz(p[byte_idx]);
return pos < total_bits ? pos : total_bits;
}
}
return total_bits;
}

FORCE_INLINE uint32_t get_size() { return size_; }

FORCE_INLINE char* get_bitmap() { return bitmap_; } // for debug
FORCE_INLINE char* get_bitmap() { return bitmap_; }

private:
FORCE_INLINE uint8_t get_bit_mask(uint32_t index) {
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/common/container/byte_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ class ByteBuffer {

FORCE_INLINE char* get_data() { return data_; }

FORCE_INLINE uint32_t get_data_size() const { return real_data_size_; }

private:
char* data_;
uint8_t variable_type_len_;
Expand Down
74 changes: 74 additions & 0 deletions cpp/src/common/tablet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,80 @@ int Tablet::add_timestamp(uint32_t row_index, int64_t timestamp) {
return E_OK;
}

int Tablet::set_timestamps(const int64_t* timestamps, uint32_t count) {
if (err_code_ != E_OK) {
return err_code_;
}
ASSERT(timestamps_ != NULL);
if (UNLIKELY(count > static_cast<uint32_t>(max_row_num_))) {
return E_OUT_OF_RANGE;
}
std::memcpy(timestamps_, timestamps, count * sizeof(int64_t));
cur_row_size_ = std::max(count, cur_row_size_);
return E_OK;
}

int Tablet::set_column_values(uint32_t schema_index, const void* data,
const uint8_t* bitmap, uint32_t count) {
if (err_code_ != E_OK) {
return err_code_;
}
if (UNLIKELY(schema_index >= schema_vec_->size())) {
return E_OUT_OF_RANGE;
}
if (UNLIKELY(count > static_cast<uint32_t>(max_row_num_))) {
return E_OUT_OF_RANGE;
}

const MeasurementSchema& schema = schema_vec_->at(schema_index);
size_t elem_size = 0;
void* dst = nullptr;
switch (schema.data_type_) {
case BOOLEAN:
elem_size = sizeof(bool);
dst = value_matrix_[schema_index].bool_data;
break;
case DATE:
case INT32:
elem_size = sizeof(int32_t);
dst = value_matrix_[schema_index].int32_data;
break;
case TIMESTAMP:
case INT64:
elem_size = sizeof(int64_t);
dst = value_matrix_[schema_index].int64_data;
break;
case FLOAT:
elem_size = sizeof(float);
dst = value_matrix_[schema_index].float_data;
break;
case DOUBLE:
elem_size = sizeof(double);
dst = value_matrix_[schema_index].double_data;
break;
default:
return E_TYPE_NOT_SUPPORTED;
}

if (bitmap == nullptr) {
// All valid: bulk copy + mark all as non-null
std::memcpy(dst, data, count * elem_size);
bitmaps_[schema_index].clear_all();
} else {
// Bulk copy all data (null positions will have garbage but won't be
// read).
std::memcpy(dst, data, count * elem_size);

// bitmap uses TsFile convention (1=null, 0=valid), same as
// internal BitMap, so copy directly.
char* tsfile_bm = bitmaps_[schema_index].get_bitmap();
uint32_t bm_bytes = (count + 7) / 8;
std::memcpy(tsfile_bm, bitmap, bm_bytes);
}
cur_row_size_ = std::max(count, cur_row_size_);
return E_OK;
}

void* Tablet::get_value(int row_index, uint32_t schema_index,
common::TSDataType& data_type) const {
if (UNLIKELY(schema_index >= schema_vec_->size())) {
Expand Down
19 changes: 19 additions & 0 deletions cpp/src/common/tablet.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,25 @@ class Tablet {
*/
int add_timestamp(uint32_t row_index, int64_t timestamp);

/**
* @brief Bulk copy timestamps into the tablet.
*
* @param timestamps Pointer to an array of timestamp values.
* @param count Number of timestamps to copy. Must be <= max_row_num.
* If count > cur_row_size_, cur_row_size_ is updated to count,
* so that subsequent operations know how many rows are populated.
* @return Returns 0 on success, or a non-zero error code on failure
* (E_OUT_OF_RANGE if count > max_row_num).
*/
int set_timestamps(const int64_t* timestamps, uint32_t count);
Comment thread
jt2594838 marked this conversation as resolved.

// Bulk copy fixed-length column data. If bitmap is nullptr, all rows are
// non-null. Otherwise bit=1 means null, bit=0 means valid (same as TsFile
// BitMap convention). Callers using other conventions (e.g. Arrow, where
// 1=valid) must invert before calling.
int set_column_values(uint32_t schema_index, const void* data,
const uint8_t* bitmap, uint32_t count);

void* get_value(int row_index, uint32_t schema_index,
common::TSDataType& data_type) const;
/**
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/common/tsblock/vector/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ class Vector {

FORCE_INLINE bool has_null() { return has_null_; }

FORCE_INLINE common::BitMap& get_bitmap() { return nulls_; }

FORCE_INLINE common::ByteBuffer& get_value_data() { return values_; }

// We want derived class to have access to base class members, so it is
// defined as protected
protected:
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/cwrapper/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ under the License.
]]
message("Running in cwrapper directory")
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CWRAPPER_SRC_LIST tsfile_cwrapper.cc)
set(CWRAPPER_SRC_LIST tsfile_cwrapper.cc arrow_c.cc)
add_library(cwrapper_obj OBJECT ${CWRAPPER_SRC_LIST})

# install header files
Expand Down
Loading
Loading