@@ -516,6 +516,31 @@ int main(int argc, char* argv[]) {
516516 static_cast <void >(send (fd, resp_p.data (), resp_p.size (), 0 ));
517517 });
518518
519+ // Handler for collecting local bloom filter bits from data nodes
520+ // Coordinator calls this after Phase 1 to aggregate bloom filters
521+ rpc_server->set_handler (
522+ cloudsql::network::RpcType::BloomFilterBits,
523+ [&](const cloudsql::network::RpcHeader& h, const std::vector<uint8_t >& p,
524+ int fd) {
525+ (void )h;
526+ auto args = cloudsql::network::BloomFilterBitsArgs::deserialize (p);
527+ cloudsql::network::BloomFilterBitsArgs reply_args;
528+ reply_args.context_id = args.context_id ;
529+ reply_args.filter_data = cluster_manager->get_local_bloom_bits (args.context_id );
530+ reply_args.expected_elements = cluster_manager->get_local_expected_elements ();
531+ reply_args.num_hashes = cluster_manager->get_local_num_hashes ();
532+
533+ auto resp_p = reply_args.serialize ();
534+ cloudsql::network::RpcHeader resp_h;
535+ resp_h.type = cloudsql::network::RpcType::QueryResults;
536+ resp_h.payload_len = static_cast <uint16_t >(resp_p.size ());
537+ char h_buf[cloudsql::network::RpcHeader::HEADER_SIZE];
538+ resp_h.encode (h_buf);
539+ static_cast <void >(
540+ send (fd, h_buf, cloudsql::network::RpcHeader::HEADER_SIZE, 0 ));
541+ static_cast <void >(send (fd, resp_p.data (), resp_p.size (), 0 ));
542+ });
543+
519544 rpc_server->set_handler (
520545 cloudsql::network::RpcType::ShuffleFragment,
521546 [&](const cloudsql::network::RpcHeader& h, const std::vector<uint8_t >& p,
@@ -556,11 +581,18 @@ int main(int argc, char* argv[]) {
556581 partitions[node.id ] = {};
557582 }
558583
584+ // Estimate expected elements for bloom filter
585+ // For now, estimate based on table size (will be refined with actual count)
586+ size_t estimated_count = 1000 ;
587+ cloudsql::common::BloomFilter local_bloom (estimated_count);
588+
559589 auto iter = table.scan ();
560590 cloudsql::storage::HeapTable::TupleMeta t_meta;
561591 while (iter.next_meta (t_meta)) {
562592 if (t_meta.xmax == 0 ) { // Visible
563593 const auto & key_val = t_meta.tuple .get (key_idx);
594+ // Build bloom filter from join key values
595+ local_bloom.insert (key_val);
564596 uint32_t node_idx =
565597 cloudsql::cluster::ShardManager::compute_shard (
566598 key_val, static_cast <uint32_t >(data_nodes.size ()));
@@ -569,6 +601,14 @@ int main(int argc, char* argv[]) {
569601 }
570602 }
571603
604+ // Store local bloom filter bits for coordinator to collect
605+ // The coordinator will aggregate these during Phase 1
606+ auto bloom_bits = local_bloom.serialize ();
607+ cluster_manager->set_local_bloom_bits (
608+ args.context_id , bloom_bits,
609+ local_bloom.expected_elements (),
610+ local_bloom.num_hashes ());
611+
572612 bool overall_success = true ;
573613 std::string delivery_errors;
574614
0 commit comments