forked from AliceO2Group/AliceO2
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTableBuilder.cxx
More file actions
133 lines (116 loc) · 4.08 KB
/
TableBuilder.cxx
File metadata and controls
133 lines (116 loc) · 4.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.
#include "Framework/TableBuilder.h"
#include <memory>
#if defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#endif
#include <arrow/builder.h>
#include <arrow/memory_pool.h>
#include <arrow/record_batch.h>
#include <arrow/status.h>
#include <arrow/table.h>
#include <arrow/type_traits.h>
#include <arrow/util/key_value_metadata.h>
#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
using namespace arrow;
namespace
{
// FIXME: Dummy schema, to compile.
template <typename TYPE, typename C_TYPE>
void ArrayFromVector(const std::vector<C_TYPE>& values, std::shared_ptr<arrow::Array>* out)
{
typename arrow::TypeTraits<TYPE>::BuilderType builder;
for (size_t i = 0; i < values.size(); ++i) {
auto status = builder.Append(values[i]);
assert(status.ok());
}
auto status = builder.Finish(out);
assert(status.ok());
}
} // namespace
namespace o2::framework
{
void addLabelToSchema(std::shared_ptr<arrow::Schema>& schema, const char* label)
{
schema = schema->WithMetadata(
std::make_shared<arrow::KeyValueMetadata>(
std::vector{std::string{"label"}},
std::vector{std::string{label}}));
}
std::shared_ptr<arrow::Table>
TableBuilder::finalize()
{
bool status = mFinalizer(mArrays, mHolders);
if (!status) {
throwError(runtime_error("Unable to finalize"));
}
assert(mSchema->num_fields() > 0 && "Schema needs to be non-empty");
return arrow::Table::Make(mSchema, mArrays);
}
void TableBuilder::throwError(RuntimeErrorRef const& ref)
{
throw ref;
}
void TableBuilder::validate() const
{
if (mHolders != nullptr) {
throwError(runtime_error("TableBuilder::persist can only be invoked once per instance"));
}
}
void TableBuilder::setLabel(const char* label)
{
mSchema = mSchema->WithMetadata(std::make_shared<arrow::KeyValueMetadata>(std::vector{std::string{"label"}}, std::vector{std::string{label}}));
}
std::shared_ptr<arrow::Table> spawnerHelper(std::shared_ptr<arrow::Table> const& fullTable, std::shared_ptr<arrow::Schema> newSchema, size_t nColumns,
expressions::Projector* projectors, const char* name,
std::shared_ptr<gandiva::Projector>& projector)
{
if (projector == nullptr) {
projector = framework::expressions::createProjectorHelper(nColumns, projectors, fullTable->schema(), newSchema->fields());
}
arrow::TableBatchReader reader(*fullTable);
std::shared_ptr<arrow::RecordBatch> batch;
arrow::ArrayVector v;
std::vector<arrow::ArrayVector> chunks;
chunks.resize(nColumns);
std::vector<std::shared_ptr<arrow::ChunkedArray>> arrays;
while (true) {
auto s = reader.ReadNext(&batch);
if (!s.ok()) {
throw runtime_error_f("Cannot read batches from source table to spawn %s: %s", name, s.ToString().c_str());
}
if (batch == nullptr) {
break;
}
try {
s = projector->Evaluate(*batch, arrow::default_memory_pool(), &v);
if (!s.ok()) {
throw runtime_error_f("Cannot apply projector to source table of %s: %s", name, s.ToString().c_str());
}
} catch (std::exception& e) {
throw runtime_error_f("Cannot apply projector to source table of %s: exception caught: %s", name, e.what());
}
for (auto i = 0U; i < nColumns; ++i) {
chunks[i].emplace_back(v.at(i));
}
}
arrays.reserve(nColumns);
for (auto i = 0U; i < nColumns; ++i) {
arrays.push_back(std::make_shared<arrow::ChunkedArray>(chunks[i]));
}
addLabelToSchema(newSchema, name);
return arrow::Table::Make(newSchema, arrays);
}
} // namespace o2::framework