Skip to content

Commit a11c638

Browse files
committed
fix: reject OUTER joins (LEFT/RIGHT/FULL) in distributed shuffle join
The distributed shuffle join algorithm only supports INNER joins. LEFT, RIGHT, and FULL outer joins require different handling (e.g., broadcasting the outer table, or double-shuffle with side tables) that is not yet implemented. Instead of producing incorrect results, we now return a clear error message. Also add unit tests RightJoinRejection and FullJoinRejection to verify this behavior.
1 parent 0650ee0 commit a11c638

2 files changed

Lines changed: 56 additions & 0 deletions

File tree

src/distributed/distributed_executor.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,22 @@ QueryResult DistributedExecutor::execute(const parser::Statement& stmt,
188188
const std::string left_table = select_stmt->from()->to_string();
189189
const std::string right_table = join.table->to_string();
190190

191+
// Check join type - shuffle join only supports INNER joins
192+
if (join.type != parser::SelectStatement::JoinType::Inner) {
193+
QueryResult res;
194+
std::string join_type_name;
195+
switch (join.type) {
196+
case parser::SelectStatement::JoinType::Left: join_type_name = "LEFT"; break;
197+
case parser::SelectStatement::JoinType::Right: join_type_name = "RIGHT"; break;
198+
case parser::SelectStatement::JoinType::Full: join_type_name = "FULL"; break;
199+
default: join_type_name = "OUTER"; break;
200+
}
201+
res.set_error("Distributed Shuffle Join only supports INNER joins. " +
202+
join_type_name +
203+
" joins are not yet supported in distributed mode.");
204+
return res;
205+
}
206+
191207
// Assume join key is in the condition
192208
std::string left_key;
193209
std::string right_key;

tests/distributed_tests.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,4 +447,44 @@ TEST(DistributedExecutorTests, NonEqualityJoinRejection) {
447447
EXPECT_THAT(res.error(), testing::HasSubstr("equality join condition"));
448448
}
449449

450+
TEST(DistributedExecutorTests, RightJoinRejection) {
451+
auto catalog = Catalog::create();
452+
const config::Config config;
453+
ClusterManager cm(&config);
454+
cm.register_node("n1", "127.0.0.1", 7800, config::RunMode::Data);
455+
DistributedExecutor exec(*catalog, cm);
456+
457+
auto lexer =
458+
std::make_unique<Lexer>("SELECT * FROM table1 RIGHT JOIN table2 ON table1.id = table2.id");
459+
Parser parser(std::move(lexer));
460+
auto stmt = parser.parse_statement();
461+
462+
auto res = exec.execute(*stmt, "SELECT * FROM table1 RIGHT JOIN table2 ON table1.id = table2.id");
463+
464+
// Should fail because distributed shuffle join only supports INNER joins
465+
EXPECT_FALSE(res.success());
466+
EXPECT_THAT(res.error(), testing::HasSubstr("only supports INNER joins"));
467+
EXPECT_THAT(res.error(), testing::HasSubstr("RIGHT"));
468+
}
469+
470+
TEST(DistributedExecutorTests, FullJoinRejection) {
471+
auto catalog = Catalog::create();
472+
const config::Config config;
473+
ClusterManager cm(&config);
474+
cm.register_node("n1", "127.0.0.1", 7800, config::RunMode::Data);
475+
DistributedExecutor exec(*catalog, cm);
476+
477+
auto lexer =
478+
std::make_unique<Lexer>("SELECT * FROM table1 FULL JOIN table2 ON table1.id = table2.id");
479+
Parser parser(std::move(lexer));
480+
auto stmt = parser.parse_statement();
481+
482+
auto res = exec.execute(*stmt, "SELECT * FROM table1 FULL JOIN table2 ON table1.id = table2.id");
483+
484+
// Should fail because distributed shuffle join only supports INNER joins
485+
EXPECT_FALSE(res.success());
486+
EXPECT_THAT(res.error(), testing::HasSubstr("only supports INNER joins"));
487+
EXPECT_THAT(res.error(), testing::HasSubstr("FULL"));
488+
}
489+
450490
} // namespace

0 commit comments

Comments
 (0)