Merge pull request #16 from NESTLab/feat/gsp-b-broadcast

jdbloom · web-flow · commit d452013d9394 · 2026-04-13T10:50:55.000-04:00
feat(agent): add GSP-B (full-broadcast) variant
diff --git a/rl_code/Main.py b/rl_code/Main.py
@@ -111,6 +111,7 @@
     'recurrent': config['RECURRENT'],
     'attention': config['ATTENTION'],
     'neighbors': config['NEIGHBORS'],
+    'broadcast': config.get('BROADCAST', False),
     'gsp_input_size':config['GSP_INPUT_SIZE'],
     'gsp_output_size':config['GSP_OUTPUT_SIZE'],
     'gsp_look_back':config['GSP_LOOK_BACK'],
@@ -362,6 +363,11 @@
                             if model.gsp_neighbors:
                                 agent_gsp_states = model.make_gsp_states(agent_prox_flags, old_heading_gsp)
                                 ctde_gsp = model.choose_agent_gsp(agent_gsp_states, test_mode)
+                            elif model.gsp_broadcast:
+                                # GSP-B: per-agent self-centric view with full-broadcast
+                                # [self_prox, self_prev_gsp, other_i_prox, other_i_prev_gsp, ...]
+                                agent_gsp_states = model.make_gsp_states_broadcast(agent_prox_flags, old_heading_gsp)
+                                ctde_gsp = model.choose_agent_gsp(agent_gsp_states, test_mode)
                             else:
                                 ctde_gsp = model.choose_agent_gsp(agent_prox_flags, test_mode)
                             for i in range(Utility.params['num_robots']):
@@ -377,20 +383,27 @@
                             states, state_prox_flags = model.make_gsp_states(old_agent_prox_flags, neighbors_old_heading_gsp, True)
                             new_states = model.make_gsp_states(agent_prox_flags, old_heading_gsp)
                             for i in range(Utility.params['num_robots']):
-                                # print(f'[AGENT] {i} PROX FLAGS:', state_prox_flags[i])
-                                # only store if state has value
                                 if np.sum(state_prox_flags[i]) > 0:
-                                    # print(f'[AGENT] {i} Has Value, Storing GSP State: {states[i]}')
                                     if model.gsp_networks['learning_scheme'] == 'attention':
                                         model.store_gsp_transition(states[i], label, 0, 0, 0)
                                     else:
-                                        # Under the direct-MSE GSP training path, the 2nd arg
-                                        # (action field) carries the supervised target label.
-                                        # See GSP-RL fix/gsp-direct-mse-training PR #24 and
-                                        # Stelaris docs/research/2026-04-13-gsp-information-collapse-analysis.md.
+                                        # 2nd arg = label (supervised target for direct-MSE GSP training)
                                         state = states[i]
                                         new_state = new_states[i]
                                         model.store_gsp_transition(state, label, 0, new_state, 0)
+                        elif model.gsp_broadcast:
+                            # GSP-B per-agent storage with broadcast inputs.
+                            # state_t : broadcast view at previous step (uses neighbors_old_heading_gsp so
+                            #            the prev_gsp slot reflects the prediction from the previous tick)
+                            # state_{t+1}: broadcast view at current step
+                            states = model.make_gsp_states_broadcast(old_agent_prox_flags, neighbors_old_heading_gsp)
+                            new_states = model.make_gsp_states_broadcast(agent_prox_flags, old_heading_gsp)
+                            for i in range(Utility.params['num_robots']):
+                                # Gate on self-prox being non-zero so we only store informative transitions,
+                                # matching the GSP and GSP-N branches. Self-prox lives at index 0 under the
+                                # self-first layout.
+                                if states[i][0] != 0:
+                                    model.store_gsp_transition(states[i], label, 0, new_states[i], 0)
                         else:
                             for i in range(Utility.params['num_robots']):
                                 if model.gsp_networks['learning_scheme'] == 'attention':
diff --git a/rl_code/src/agent.py b/rl_code/src/agent.py
@@ -32,14 +32,25 @@ def __init__(
             gsp_min_max_action: float,
             gsp_look_back: int,
             gsp_sequence_length: int,
+            broadcast: bool = False,
             prox_filter_angle_deg: float = 45.0,
             n_hop_neighbors: int = 1,
     ):
+        if neighbors and broadcast:
+            raise ValueError(
+                "GSP variants neighbors=True and broadcast=True are mutually exclusive — "
+                "they overload gsp_input_size differently. Pick one."
+            )
         if neighbors:
             # 2 inputs from ownship (prev_gsp, avg_prox)
             # 2 inputs from each neighbor (prev_gsp, avg_prox)
             # 2*n_hop_neighbors for symmetry in both CW and CCW
-            gsp_input_size = 2+2*(n_hop_neighbors*2)  
+            gsp_input_size = 2+2*(n_hop_neighbors*2)
+        if broadcast:
+            # GSP-B: each agent's view is (self_prox, self_prev_gsp) + (other_prox, other_prev_gsp)
+            # for all (n_agents - 1) other agents. Total 2*n_agents. Known limitation:
+            # coupled to team size, not transferable across num_robots.
+            gsp_input_size = 2 * n_agents  
 
         output_size = n_actions
         if network in ['DQN', 'DDQN']:
@@ -68,13 +79,16 @@ def __init__(
         self._network = network
         self._n_actions = n_actions
         self._neighbors = neighbors
+        self._broadcast = broadcast
         self._n_hop_neighbors = n_hop_neighbors
         self.neighbors_dict = {}
         self._options_per_action = options_per_action
         self._prox_filter_angle_deg = prox_filter_angle_deg
 
 
-        if self._neighbors:
+        if self._neighbors or self._broadcast:
+            # Per-agent observation ring buffers: GSP-N and GSP-B both produce
+            # per-agent self-centric views, so each agent has its own history.
             self.gsp_observation = []
             for _ in range(self._n_agents):
                 self.gsp_observation.append([[0 for _ in range(self.gsp_network_input)] for _ in range(self.gsp_sequence_length)])
@@ -98,6 +112,10 @@ def __init__(
     def gsp_neighbors(self):
         return self._neighbors
 
+    @property
+    def gsp_broadcast(self):
+        return self._broadcast
+
     @property
     def n_agents(self):
         return self._n_agents
@@ -155,6 +173,40 @@ def make_agent_state(self, env_obs, heading_gsp=None, global_knowledge=None):
             env_obs = np.concatenate((env_obs, global_knowledge))
         return env_obs   
     
+    def make_gsp_states_broadcast(self, agent_prox_values, agent_prev_gsp):
+        """Build per-agent GSP inputs for GSP-B (full-broadcast variant).
+
+        Each agent's view is self-first: [self_prox, self_prev_gsp, other_0_prox,
+        other_0_prev_gsp, other_1_prox, other_1_prev_gsp, ..., other_{n-1}_prox,
+        other_{n-1}_prev_gsp]. "other" iterates all agents in ascending id order,
+        skipping self. Total length = 2 * n_agents.
+
+        Known limitation: the network input size is coupled to n_agents, so a
+        trained GSP-B policy does not transfer to teams of different size. This
+        is the tradeoff vs GSP-N, which uses fixed (self + n_hop_neighbors * 2)
+        inputs and transfers across team sizes.
+        """
+        states = []
+        for agent in range(self._n_agents):
+            agent_state = np.zeros(self.gsp_network_input)
+            # Self first
+            agent_state[0] = agent_prox_values[agent]
+            agent_state[1] = agent_prev_gsp[agent]
+            i = 2
+            # Then every other agent in ascending id order, skipping self
+            for other in range(self._n_agents):
+                if other == agent:
+                    continue
+                agent_state[i] = agent_prox_values[other]
+                agent_state[i + 1] = agent_prev_gsp[other]
+                i += 2
+            # Maintain gsp_observation ring buffer the same way make_gsp_states does,
+            # so recurrent/attention variants can still see sequences if added later.
+            self.gsp_observation[agent].pop(0)
+            self.gsp_observation[agent].append(agent_state)
+            states.append(agent_state)
+        return states
+
     def make_gsp_states(self, agent_prox_values, agent_prev_gsp, return_prox_flags = False):
         states = []
         prox_flags = []
@@ -242,7 +294,11 @@ def choose_agent_action(self, observation, failures, test=False):
         return actions, action_num
     
     def choose_agent_gsp(self, agent_gsp_states, test = False):
-        if self._neighbors:
+        if self._neighbors or self._broadcast:
+            # Per-agent predictions with self-centric inputs. GSP-N (neighbors)
+            # and GSP-B (broadcast) share the same per-agent forward-pass shape;
+            # only the input vector differs. Non-recurrent broadcast uses the
+            # same stateless path as non-recurrent neighbors.
             actions = []
             for i in range(self._n_agents):
                 if self.recurrent_gsp:
@@ -257,7 +313,7 @@ def choose_agent_gsp(self, agent_gsp_states, test = False):
                     )
                     # Take the last timestep's action
                     actions.append(action_tensor[-1].cpu().detach().numpy())
-                else: 
+                else:
                     actions.append(self.choose_action(agent_gsp_states[i], self.gsp_networks, test))
             return actions
         else:
diff --git a/tests/test_agent/test_gsp_broadcast.py b/tests/test_agent/test_gsp_broadcast.py
@@ -0,0 +1,123 @@
+"""Tests for GSP-B (full-broadcast variant) state construction.
+
+GSP-B: each agent's input is [self_prox, self_prev_gsp, other_0_prox,
+other_0_prev_gsp, other_1_prox, other_1_prev_gsp, ..., other_{n-1}_prox,
+other_{n-1}_prev_gsp], length 2*n_agents. Self-first ordering.
+
+Known limitation (inherited from plain GSP): the network input size is
+coupled to n_agents, so a trained GSP-B policy only transfers to the same
+team size. This is the tradeoff vs GSP-N's fixed (self + n_hop_neighbors)
+input which transfers across team sizes.
+"""
+
+import numpy as np
+import pytest
+
+from src.agent import Agent
+
+
+BASE_CONFIG = {
+    "GAMMA": 0.99, "TAU": 0.005, "ALPHA": 0.001, "BETA": 0.002, "LR": 0.0001,
+    "EPSILON": 0.0, "EPS_MIN": 0.0, "EPS_DEC": 0.0,
+    "BATCH_SIZE": 16, "MEM_SIZE": 1000, "REPLACE_TARGET_COUNTER": 10,
+    "NOISE": 0.0, "UPDATE_ACTOR_ITER": 1, "WARMUP": 0,
+    "GSP_LEARNING_FREQUENCY": 1, "GSP_BATCH_SIZE": 16,
+}
+
+
+def make_agent(n_agents=4, network="DDQN", broadcast=True):
+    return Agent(
+        config=BASE_CONFIG,
+        network=network,
+        n_agents=n_agents,
+        n_obs=8,
+        n_actions=4,
+        options_per_action=3,
+        id=0,
+        min_max_action=1.0,
+        meta_param_size=1,
+        gsp=True,
+        recurrent=False,
+        attention=False,
+        neighbors=False,
+        broadcast=broadcast,
+        gsp_input_size=4,  # overridden when broadcast=True
+        gsp_output_size=1,
+        gsp_min_max_action=1.0,
+        gsp_look_back=2,
+        gsp_sequence_length=5,
+    )
+
+
+def test_broadcast_agent_has_gsp_broadcast_property_true():
+    agent = make_agent()
+    assert agent.gsp_broadcast is True
+
+
+def test_broadcast_agent_gsp_input_size_is_two_times_n_agents():
+    """For 4 agents, the broadcast input is [self_prox, self_prev_gsp, +3×(prox, prev_gsp)] = 8."""
+    agent = make_agent(n_agents=4)
+    assert agent.gsp_network_input == 8
+
+
+def test_broadcast_agent_gsp_input_size_scales_with_n_agents():
+    """For 8 agents, input is 16. Known limitation: coupled to team size."""
+    agent = make_agent(n_agents=8)
+    assert agent.gsp_network_input == 16
+
+
+def test_make_gsp_states_broadcast_returns_one_state_per_agent():
+    agent = make_agent(n_agents=4)
+    prox = [0.1, 0.2, 0.3, 0.4]
+    prev_gsp = [-0.5, 0.0, 0.25, 0.75]
+    states = agent.make_gsp_states_broadcast(prox, prev_gsp)
+    assert len(states) == 4
+    for s in states:
+        assert len(s) == 8
+
+
+def test_make_gsp_states_broadcast_self_first_ordering():
+    """For each agent i, the first two entries must be (prox[i], prev_gsp[i])."""
+    agent = make_agent(n_agents=4)
+    prox = [0.11, 0.22, 0.33, 0.44]
+    prev_gsp = [-0.1, -0.2, -0.3, -0.4]
+    states = agent.make_gsp_states_broadcast(prox, prev_gsp)
+    for i in range(4):
+        assert states[i][0] == pytest.approx(prox[i]), f"agent {i} self_prox"
+        assert states[i][1] == pytest.approx(prev_gsp[i]), f"agent {i} self_prev_gsp"
+
+
+def test_make_gsp_states_broadcast_others_in_order():
+    """After the self-pair, the remaining entries are other agents in ascending id order (skipping self)."""
+    agent = make_agent(n_agents=4)
+    prox = [0.10, 0.20, 0.30, 0.40]
+    prev_gsp = [0.01, 0.02, 0.03, 0.04]
+    states = agent.make_gsp_states_broadcast(prox, prev_gsp)
+    # Agent 0: self=0, others=[1, 2, 3]
+    assert list(states[0]) == pytest.approx([0.10, 0.01, 0.20, 0.02, 0.30, 0.03, 0.40, 0.04])
+    # Agent 2: self=2, others=[0, 1, 3]
+    assert list(states[2]) == pytest.approx([0.30, 0.03, 0.10, 0.01, 0.20, 0.02, 0.40, 0.04])
+    # Agent 3: self=3, others=[0, 1, 2]
+    assert list(states[3]) == pytest.approx([0.40, 0.04, 0.10, 0.01, 0.20, 0.02, 0.30, 0.03])
+
+
+def test_broadcast_is_mutually_exclusive_with_neighbors():
+    """Can't have both neighbors=True and broadcast=True; they overload gsp_input_size."""
+    with pytest.raises((ValueError, AssertionError)):
+        Agent(
+            config=BASE_CONFIG,
+            network="DDQN", n_agents=4, n_obs=8, n_actions=4,
+            options_per_action=3, id=0, min_max_action=1.0, meta_param_size=1,
+            gsp=True, recurrent=False, attention=False,
+            neighbors=True, broadcast=True,
+            gsp_input_size=4, gsp_output_size=1,
+            gsp_min_max_action=1.0, gsp_look_back=2, gsp_sequence_length=5,
+        )
+
+
+def test_plain_gsp_without_broadcast_unchanged():
+    """Plain GSP (neighbors=False, broadcast=False) keeps the legacy input size."""
+    agent = make_agent(broadcast=False)
+    # Should fall through to the config-provided gsp_input_size=4
+    assert agent.gsp_network_input == 4
+    assert agent.gsp_broadcast is False