@@ -472,8 +472,48 @@ int main(int argc, char* argv[]) {
472472 (void )h;
473473 auto args = cloudsql::network::PushDataArgs::deserialize (p);
474474 if (cluster_manager != nullptr ) {
475- cluster_manager->buffer_shuffle_data (args.context_id , args.table_name ,
476- std::move (args.rows ));
475+ // Apply bloom filter if available for this context
476+ if (cluster_manager->has_bloom_filter (args.context_id )) {
477+ auto bloom = cluster_manager->get_bloom_filter (args.context_id );
478+ std::string probe_key_col = cluster_manager->get_probe_key_col (args.context_id );
479+
480+ // Get probe table schema to find key column index
481+ auto table_meta_opt = catalog->get_table_by_name (args.table_name );
482+ if (table_meta_opt.has_value () && !probe_key_col.empty ()) {
483+ const auto * table_meta = table_meta_opt.value ();
484+ size_t key_idx = static_cast <size_t >(-1 );
485+ for (size_t i = 0 ; i < table_meta->columns .size (); ++i) {
486+ if (table_meta->columns [i].name == probe_key_col) {
487+ key_idx = i;
488+ break ;
489+ }
490+ }
491+
492+ if (key_idx != static_cast <size_t >(-1 )) {
493+ // Filter rows using bloom filter
494+ std::vector<cloudsql::executor::Tuple> filtered_rows;
495+ filtered_rows.reserve (args.rows .size ());
496+ for (auto & row : args.rows ) {
497+ if (bloom.might_contain (row.get (key_idx))) {
498+ filtered_rows.push_back (std::move (row));
499+ }
500+ }
501+ cluster_manager->buffer_shuffle_data (args.context_id , args.table_name ,
502+ std::move (filtered_rows));
503+ } else {
504+ // Key column not found, buffer as-is
505+ cluster_manager->buffer_shuffle_data (args.context_id , args.table_name ,
506+ std::move (args.rows ));
507+ }
508+ } else {
509+ // No metadata, buffer as-is
510+ cluster_manager->buffer_shuffle_data (args.context_id , args.table_name ,
511+ std::move (args.rows ));
512+ }
513+ } else {
514+ cluster_manager->buffer_shuffle_data (args.context_id , args.table_name ,
515+ std::move (args.rows ));
516+ }
477517 }
478518
479519 cloudsql::network::QueryResultsReply reply;
@@ -489,6 +529,31 @@ int main(int argc, char* argv[]) {
489529 static_cast <void >(send (fd, resp_p.data (), resp_p.size (), 0 ));
490530 });
491531
532+ rpc_server->set_handler (
533+ cloudsql::network::RpcType::BloomFilterPush,
534+ [&](const cloudsql::network::RpcHeader& h, const std::vector<uint8_t >& p,
535+ int fd) {
536+ (void )h;
537+ auto args = cloudsql::network::BloomFilterArgs::deserialize (p);
538+ if (cluster_manager != nullptr ) {
539+ cluster_manager->set_bloom_filter (args.context_id , args.build_table ,
540+ args.probe_table , args.probe_key_col ,
541+ args.filter_data , args.expected_elements ,
542+ args.num_hashes );
543+ }
544+ cloudsql::network::QueryResultsReply reply;
545+ reply.success = true ;
546+ auto resp_p = reply.serialize ();
547+ cloudsql::network::RpcHeader resp_h;
548+ resp_h.type = cloudsql::network::RpcType::QueryResults;
549+ resp_h.payload_len = static_cast <uint16_t >(resp_p.size ());
550+ char h_buf[cloudsql::network::RpcHeader::HEADER_SIZE];
551+ resp_h.encode (h_buf);
552+ static_cast <void >(
553+ send (fd, h_buf, cloudsql::network::RpcHeader::HEADER_SIZE, 0 ));
554+ static_cast <void >(send (fd, resp_p.data (), resp_p.size (), 0 ));
555+ });
556+
492557 rpc_server->set_handler (
493558 cloudsql::network::RpcType::ShuffleFragment,
494559 [&](const cloudsql::network::RpcHeader& h, const std::vector<uint8_t >& p,
0 commit comments