Skip to content

Commit a479711

Browse files
CopilotpoyrazK
andauthored
fix: correct TupleView correctness bugs and add next_view unit tests
Agent-Logs-Url: https://github.com/poyrazK/cloudSQL/sessions/14113f04-c8af-42c4-b421-be219fb6c2e9 Co-authored-by: poyrazK <83272398+poyrazK@users.noreply.github.com>
1 parent 5d93bc7 commit a479711

3 files changed

Lines changed: 281 additions & 28 deletions

File tree

src/executor/operator.cpp

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@ bool SeqScanOperator::next_view(storage::HeapTable::TupleView& out_view) {
110110
}
111111

112112
/* MVCC Visibility Check */
113-
bool visible = true;
114113
const Transaction* const txn = get_txn();
115114
if (txn != nullptr) {
116115
const auto& snapshot = txn->get_snapshot();
@@ -121,12 +120,8 @@ bool SeqScanOperator::next_view(storage::HeapTable::TupleView& out_view) {
121120
const bool xmax_visible = (out_view.xmax == 0) || (out_view.xmax != my_id &&
122121
!snapshot.is_visible(out_view.xmax));
123122

124-
visible = xmin_visible && xmax_visible;
125-
} else {
126-
visible = (out_view.xmax == 0);
123+
if (xmin_visible && xmax_visible) return true;
127124
}
128-
129-
if (visible) return true;
130125
}
131126

132127
set_state(ExecState::Done);
@@ -965,19 +960,15 @@ void LimitOperator::set_params(const std::vector<common::Value>* params) {
965960
}
966961

967962
bool ProjectOperator::next_view(storage::HeapTable::TupleView& out_view) {
968-
if (!child_) return false;
963+
// The zero-allocation path is only valid for simple column projections.
964+
// Return false immediately (without consuming any child rows) when the
965+
// projection includes computed expressions — callers must use next() instead.
966+
if (!child_ || !is_simple_projection_) return false;
967+
969968
if (child_->next_view(out_view)) {
970-
if (is_simple_projection_) {
971-
out_view.column_mapping = &column_mapping_;
972-
out_view.schema = &schema_;
973-
return true;
974-
} else {
975-
// Fallback: This is not optimal but satisfies the semantics.
976-
// Future work: Batch materialization or local buffer.
977-
// For now, we dont return true for computed stuff in next_view
978-
// to avoid exposing raw data incorrectly.
979-
return false;
980-
}
969+
out_view.column_mapping = &column_mapping_;
970+
out_view.schema = &schema_;
971+
return true;
981972
}
982973
return false;
983974
}

src/storage/heap_table.cpp

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,34 @@ HeapTable::~HeapTable() {
5656
/* --- Iterator Implementation --- */
5757

5858
common::Value HeapTable::TupleView::get_value(size_t col_index) const {
59-
if (!schema || col_index >= schema->column_count()) {
60-
return common::Value::make_null();
61-
}
62-
63-
// We must walk the serialized payload from the beginning to reach col_index
59+
if (!schema) return common::Value::make_null();
60+
61+
// When a column_mapping is present its size determines the number of
62+
// accessible logical columns (it may differ from schema->column_count()
63+
// for SELECT * queries where the projected schema is built before the star
64+
// is expanded into concrete column entries).
65+
const size_t logical_count =
66+
(column_mapping && !column_mapping->empty()) ? column_mapping->size()
67+
: schema->column_count();
68+
if (col_index >= logical_count) return common::Value::make_null();
69+
70+
// Resolve the physical column index through the mapping when present.
71+
// col_index is a logical index into the (possibly projected) schema; the
72+
// serialized payload is always laid out in physical table column order.
73+
const size_t physical_idx =
74+
(column_mapping && col_index < column_mapping->size())
75+
? (*column_mapping)[col_index]
76+
: col_index;
77+
78+
// Walk the serialized payload from the beginning to reach physical_idx.
6479
size_t cursor = 0;
65-
for (size_t i = 0; i <= col_index; ++i) {
80+
for (size_t i = 0; i <= physical_idx; ++i) {
6681
if (cursor >= payload_len) return common::Value::make_null();
6782

6883
auto type = static_cast<common::ValueType>(payload_data[cursor++]);
6984

7085
if (type == common::ValueType::TYPE_NULL) {
71-
if (i == col_index) return common::Value::make_null();
86+
if (i == physical_idx) return common::Value::make_null();
7287
continue;
7388
}
7489

@@ -78,7 +93,7 @@ common::Value HeapTable::TupleView::get_value(size_t col_index) const {
7893
type == common::ValueType::TYPE_FLOAT64) {
7994
if (cursor + 8 > payload_len) return common::Value::make_null();
8095

81-
if (i == col_index) {
96+
if (i == physical_idx) {
8297
if (type == common::ValueType::TYPE_FLOAT32 ||
8398
type == common::ValueType::TYPE_FLOAT64) {
8499
double v;
@@ -103,7 +118,7 @@ common::Value HeapTable::TupleView::get_value(size_t col_index) const {
103118

104119
if (cursor + len > payload_len) return common::Value::make_null();
105120

106-
if (i == col_index) {
121+
if (i == physical_idx) {
107122
std::string s(reinterpret_cast<const char*>(payload_data + cursor), len);
108123
return common::Value::make_text(s);
109124
}
@@ -811,7 +826,9 @@ bool HeapTable::Iterator::next_view(TupleView& out_view) {
811826
std::memcpy(&out_view.xmin, data + 2, 8);
812827
std::memcpy(&out_view.xmax, data + 10, 8);
813828

829+
out_view.table_schema = &table_.schema_;
814830
out_view.schema = &table_.schema_;
831+
out_view.column_mapping = nullptr;
815832
out_view.payload_data = data + 18;
816833
out_view.payload_len = record_len - 18;
817834

@@ -835,7 +852,12 @@ bool HeapTable::Iterator::next_view(TupleView& out_view) {
835852

836853
executor::Tuple HeapTable::TupleView::materialize(std::pmr::memory_resource* mr) const {
837854
if (!mr) mr = std::pmr::get_default_resource();
838-
size_t num_cols = schema->columns().size();
855+
// Use the same logical_count logic as get_value so that SELECT * views
856+
// (which have column_mapping with more entries than schema->column_count())
857+
// are materialized correctly.
858+
const size_t num_cols =
859+
(column_mapping && !column_mapping->empty()) ? column_mapping->size()
860+
: schema->columns().size();
839861

840862
std::pmr::vector<common::Value> values(mr);
841863
values.reserve(num_cols);

tests/cloudSQL_tests.cpp

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,4 +1022,244 @@ TEST(ParserTests, ExhaustiveParserErrors) {
10221022
}
10231023
}
10241024

1025+
// ============= TupleView / next_view Tests =============
1026+
1027+
// Helper: build a two-column (INT, TEXT) HeapTable with N rows.
1028+
namespace {
1029+
struct TupleViewTestCtx {
1030+
StorageManager disk;
1031+
BufferPoolManager sm;
1032+
Schema schema;
1033+
std::unique_ptr<HeapTable> table;
1034+
1035+
explicit TupleViewTestCtx(const std::string& name)
1036+
: disk("./test_data"),
1037+
sm(config::Config::DEFAULT_BUFFER_POOL_SIZE, disk) {
1038+
schema.add_column("id", ValueType::TYPE_INT64);
1039+
schema.add_column("tag", ValueType::TYPE_TEXT);
1040+
table = std::make_unique<HeapTable>(name, sm, schema);
1041+
table->create();
1042+
}
1043+
1044+
void insert(int64_t id, const std::string& tag) {
1045+
table->insert(Tuple({Value::make_int64(id), Value::make_text(tag)}));
1046+
}
1047+
};
1048+
} // namespace
1049+
1050+
// 1. Basic scan via next_view: correct row count and values for SELECT *
1051+
TEST(TupleViewTests, BasicScanSelectStar) {
1052+
const std::string name = "tv_basic";
1053+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1054+
1055+
TupleViewTestCtx ctx(name);
1056+
ctx.insert(1, "a");
1057+
ctx.insert(2, "b");
1058+
ctx.insert(3, "c");
1059+
1060+
// Build a SeqScan wrapped by a SELECT * ProjectOperator (no txn = fast path)
1061+
std::vector<std::unique_ptr<parser::Expression>> cols;
1062+
cols.push_back(std::make_unique<parser::ColumnExpr>("*"));
1063+
1064+
auto scan = std::make_unique<SeqScanOperator>(
1065+
std::make_shared<HeapTable>(name, ctx.sm, ctx.schema), nullptr, nullptr);
1066+
auto proj =
1067+
std::make_unique<ProjectOperator>(std::move(scan), std::move(cols));
1068+
1069+
ASSERT_TRUE(proj->init());
1070+
ASSERT_TRUE(proj->open());
1071+
1072+
HeapTable::TupleView view;
1073+
int count = 0;
1074+
while (proj->next_view(view)) {
1075+
count++;
1076+
// Values should be accessible through the view
1077+
EXPECT_FALSE(view.get_value(0).is_null());
1078+
EXPECT_FALSE(view.get_value(1).is_null());
1079+
}
1080+
proj->close();
1081+
1082+
EXPECT_EQ(count, 3);
1083+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1084+
}
1085+
1086+
// 2. Deleted tuples (xmax != 0) are skipped by next_view
1087+
TEST(TupleViewTests, DeletedTuplesSkipped) {
1088+
const std::string name = "tv_deleted";
1089+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1090+
1091+
TupleViewTestCtx ctx(name);
1092+
auto id1 = ctx.table->insert(Tuple({Value::make_int64(10), Value::make_text("alive")}));
1093+
auto id2 = ctx.table->insert(Tuple({Value::make_int64(20), Value::make_text("dead")}));
1094+
// Mark id2 as deleted by setting xmax != 0
1095+
ctx.table->remove(id2, /*xmax=*/1);
1096+
1097+
std::vector<std::unique_ptr<parser::Expression>> cols;
1098+
cols.push_back(std::make_unique<parser::ColumnExpr>("*"));
1099+
1100+
auto scan = std::make_unique<SeqScanOperator>(
1101+
std::make_shared<HeapTable>(name, ctx.sm, ctx.schema), nullptr, nullptr);
1102+
auto proj = std::make_unique<ProjectOperator>(std::move(scan), std::move(cols));
1103+
1104+
ASSERT_TRUE(proj->init());
1105+
ASSERT_TRUE(proj->open());
1106+
1107+
HeapTable::TupleView view;
1108+
int count = 0;
1109+
while (proj->next_view(view)) {
1110+
count++;
1111+
// Only the alive row should come through
1112+
EXPECT_EQ(view.get_value(0).to_int64(), 10);
1113+
}
1114+
proj->close();
1115+
1116+
EXPECT_EQ(count, 1);
1117+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1118+
}
1119+
1120+
// 3. Non-identity column projection: SELECT tag, id (columns reversed)
1121+
// get_value must resolve physical indices through column_mapping.
1122+
TEST(TupleViewTests, NonIdentityProjectionValues) {
1123+
const std::string name = "tv_proj";
1124+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1125+
1126+
TupleViewTestCtx ctx(name);
1127+
ctx.insert(42, "hello");
1128+
1129+
// SELECT tag, id (logical 0 -> physical 1, logical 1 -> physical 0)
1130+
std::vector<std::unique_ptr<parser::Expression>> cols;
1131+
cols.push_back(std::make_unique<parser::ColumnExpr>("tag"));
1132+
cols.push_back(std::make_unique<parser::ColumnExpr>("id"));
1133+
1134+
auto scan = std::make_unique<SeqScanOperator>(
1135+
std::make_shared<HeapTable>(name, ctx.sm, ctx.schema), nullptr, nullptr);
1136+
auto proj = std::make_unique<ProjectOperator>(std::move(scan), std::move(cols));
1137+
1138+
ASSERT_TRUE(proj->init());
1139+
ASSERT_TRUE(proj->open());
1140+
1141+
HeapTable::TupleView view;
1142+
ASSERT_TRUE(proj->next_view(view));
1143+
1144+
// Logical column 0 is "tag" -> physical index 1 -> "hello"
1145+
EXPECT_EQ(view.get_value(0).as_text(), "hello");
1146+
// Logical column 1 is "id" -> physical index 0 -> 42
1147+
EXPECT_EQ(view.get_value(1).to_int64(), 42);
1148+
1149+
// No more rows
1150+
EXPECT_FALSE(proj->next_view(view));
1151+
proj->close();
1152+
1153+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1154+
}
1155+
1156+
// 4. Computed-expression projection returns false immediately without consuming rows.
1157+
// Callers must fall back to next() for computed projections.
1158+
TEST(TupleViewTests, ComputedProjectionDoesNotConsumeRows) {
1159+
const std::string name = "tv_computed";
1160+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1161+
1162+
TupleViewTestCtx ctx(name);
1163+
ctx.insert(5, "x");
1164+
ctx.insert(6, "y");
1165+
1166+
// SELECT id + 1 (computed expression — not a simple column reference)
1167+
std::vector<std::unique_ptr<parser::Expression>> cols;
1168+
cols.push_back(std::make_unique<parser::BinaryExpr>(
1169+
std::make_unique<parser::ColumnExpr>("id"), parser::TokenType::Plus,
1170+
std::make_unique<parser::ConstantExpr>(Value::make_int64(1))));
1171+
1172+
auto scan = std::make_unique<SeqScanOperator>(
1173+
std::make_shared<HeapTable>(name, ctx.sm, ctx.schema), nullptr, nullptr);
1174+
auto proj = std::make_unique<ProjectOperator>(std::move(scan), std::move(cols));
1175+
1176+
ASSERT_TRUE(proj->init());
1177+
ASSERT_TRUE(proj->open());
1178+
1179+
// next_view should return false immediately (unsupported path).
1180+
HeapTable::TupleView view;
1181+
EXPECT_FALSE(proj->next_view(view));
1182+
1183+
// Rows must still be readable via the regular next() path.
1184+
// Reopen to reset state — use a fresh operator.
1185+
proj->close();
1186+
1187+
std::vector<std::unique_ptr<parser::Expression>> cols2;
1188+
cols2.push_back(std::make_unique<parser::BinaryExpr>(
1189+
std::make_unique<parser::ColumnExpr>("id"), parser::TokenType::Plus,
1190+
std::make_unique<parser::ConstantExpr>(Value::make_int64(1))));
1191+
1192+
auto scan2 = std::make_unique<SeqScanOperator>(
1193+
std::make_shared<HeapTable>(name, ctx.sm, ctx.schema), nullptr, nullptr);
1194+
auto proj2 = std::make_unique<ProjectOperator>(std::move(scan2), std::move(cols2));
1195+
1196+
ASSERT_TRUE(proj2->init());
1197+
ASSERT_TRUE(proj2->open());
1198+
1199+
Tuple t;
1200+
int count = 0;
1201+
while (proj2->next(t)) {
1202+
count++;
1203+
// id + 1: first row is 5+1=6, second is 6+1=7
1204+
EXPECT_GT(t.get(0).to_int64(), 5);
1205+
}
1206+
proj2->close();
1207+
EXPECT_EQ(count, 2);
1208+
1209+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1210+
}
1211+
1212+
// 5. table_schema is set correctly in next_view (non-null)
1213+
TEST(TupleViewTests, TableSchemaSetByNextView) {
1214+
const std::string name = "tv_schema";
1215+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1216+
1217+
TupleViewTestCtx ctx(name);
1218+
ctx.insert(99, "z");
1219+
1220+
auto iter = ctx.table->scan();
1221+
HeapTable::TupleView view;
1222+
ASSERT_TRUE(iter.next_view(view));
1223+
1224+
EXPECT_NE(view.table_schema, nullptr);
1225+
EXPECT_NE(view.schema, nullptr);
1226+
EXPECT_EQ(view.table_schema->column_count(), 2u);
1227+
1228+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1229+
}
1230+
1231+
// 6. FilterOperator::next_view filters correctly (materializes per-row for condition eval)
1232+
TEST(TupleViewTests, FilterOperatorNextView) {
1233+
const std::string name = "tv_filter";
1234+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1235+
1236+
TupleViewTestCtx ctx(name);
1237+
ctx.insert(1, "a");
1238+
ctx.insert(2, "b");
1239+
ctx.insert(3, "c");
1240+
1241+
// WHERE id >= 2
1242+
auto condition = std::make_unique<parser::BinaryExpr>(
1243+
std::make_unique<parser::ColumnExpr>("id"), parser::TokenType::Ge,
1244+
std::make_unique<parser::ConstantExpr>(Value::make_int64(2)));
1245+
1246+
auto scan = std::make_unique<SeqScanOperator>(
1247+
std::make_shared<HeapTable>(name, ctx.sm, ctx.schema), nullptr, nullptr);
1248+
auto filter = std::make_unique<FilterOperator>(std::move(scan), std::move(condition));
1249+
1250+
ASSERT_TRUE(filter->init());
1251+
ASSERT_TRUE(filter->open());
1252+
1253+
HeapTable::TupleView view;
1254+
int count = 0;
1255+
while (filter->next_view(view)) {
1256+
count++;
1257+
EXPECT_GE(view.get_value(0).to_int64(), 2);
1258+
}
1259+
filter->close();
1260+
1261+
EXPECT_EQ(count, 2);
1262+
static_cast<void>(std::remove(("./test_data/" + name + ".heap").c_str()));
1263+
}
1264+
10251265
} // namespace

0 commit comments

Comments
 (0)