|
| 1 | +"""Tests for GSP-B (full-broadcast variant) state construction. |
| 2 | +
|
| 3 | +GSP-B: each agent's input is [self_prox, self_prev_gsp, other_0_prox, |
| 4 | +other_0_prev_gsp, other_1_prox, other_1_prev_gsp, ..., other_{n-1}_prox, |
| 5 | +other_{n-1}_prev_gsp], length 2*n_agents. Self-first ordering. |
| 6 | +
|
| 7 | +Known limitation (inherited from plain GSP): the network input size is |
| 8 | +coupled to n_agents, so a trained GSP-B policy only transfers to the same |
| 9 | +team size. This is the tradeoff vs GSP-N's fixed (self + n_hop_neighbors) |
| 10 | +input which transfers across team sizes. |
| 11 | +""" |
| 12 | + |
| 13 | +import numpy as np |
| 14 | +import pytest |
| 15 | + |
| 16 | +from src.agent import Agent |
| 17 | + |
| 18 | + |
| 19 | +BASE_CONFIG = { |
| 20 | + "GAMMA": 0.99, "TAU": 0.005, "ALPHA": 0.001, "BETA": 0.002, "LR": 0.0001, |
| 21 | + "EPSILON": 0.0, "EPS_MIN": 0.0, "EPS_DEC": 0.0, |
| 22 | + "BATCH_SIZE": 16, "MEM_SIZE": 1000, "REPLACE_TARGET_COUNTER": 10, |
| 23 | + "NOISE": 0.0, "UPDATE_ACTOR_ITER": 1, "WARMUP": 0, |
| 24 | + "GSP_LEARNING_FREQUENCY": 1, "GSP_BATCH_SIZE": 16, |
| 25 | +} |
| 26 | + |
| 27 | + |
| 28 | +def make_agent(n_agents=4, network="DDQN", broadcast=True): |
| 29 | + return Agent( |
| 30 | + config=BASE_CONFIG, |
| 31 | + network=network, |
| 32 | + n_agents=n_agents, |
| 33 | + n_obs=8, |
| 34 | + n_actions=4, |
| 35 | + options_per_action=3, |
| 36 | + id=0, |
| 37 | + min_max_action=1.0, |
| 38 | + meta_param_size=1, |
| 39 | + gsp=True, |
| 40 | + recurrent=False, |
| 41 | + attention=False, |
| 42 | + neighbors=False, |
| 43 | + broadcast=broadcast, |
| 44 | + gsp_input_size=4, # overridden when broadcast=True |
| 45 | + gsp_output_size=1, |
| 46 | + gsp_min_max_action=1.0, |
| 47 | + gsp_look_back=2, |
| 48 | + gsp_sequence_length=5, |
| 49 | + ) |
| 50 | + |
| 51 | + |
| 52 | +def test_broadcast_agent_has_gsp_broadcast_property_true(): |
| 53 | + agent = make_agent() |
| 54 | + assert agent.gsp_broadcast is True |
| 55 | + |
| 56 | + |
| 57 | +def test_broadcast_agent_gsp_input_size_is_two_times_n_agents(): |
| 58 | + """For 4 agents, the broadcast input is [self_prox, self_prev_gsp, +3×(prox, prev_gsp)] = 8.""" |
| 59 | + agent = make_agent(n_agents=4) |
| 60 | + assert agent.gsp_network_input == 8 |
| 61 | + |
| 62 | + |
| 63 | +def test_broadcast_agent_gsp_input_size_scales_with_n_agents(): |
| 64 | + """For 8 agents, input is 16. Known limitation: coupled to team size.""" |
| 65 | + agent = make_agent(n_agents=8) |
| 66 | + assert agent.gsp_network_input == 16 |
| 67 | + |
| 68 | + |
| 69 | +def test_make_gsp_states_broadcast_returns_one_state_per_agent(): |
| 70 | + agent = make_agent(n_agents=4) |
| 71 | + prox = [0.1, 0.2, 0.3, 0.4] |
| 72 | + prev_gsp = [-0.5, 0.0, 0.25, 0.75] |
| 73 | + states = agent.make_gsp_states_broadcast(prox, prev_gsp) |
| 74 | + assert len(states) == 4 |
| 75 | + for s in states: |
| 76 | + assert len(s) == 8 |
| 77 | + |
| 78 | + |
| 79 | +def test_make_gsp_states_broadcast_self_first_ordering(): |
| 80 | + """For each agent i, the first two entries must be (prox[i], prev_gsp[i]).""" |
| 81 | + agent = make_agent(n_agents=4) |
| 82 | + prox = [0.11, 0.22, 0.33, 0.44] |
| 83 | + prev_gsp = [-0.1, -0.2, -0.3, -0.4] |
| 84 | + states = agent.make_gsp_states_broadcast(prox, prev_gsp) |
| 85 | + for i in range(4): |
| 86 | + assert states[i][0] == pytest.approx(prox[i]), f"agent {i} self_prox" |
| 87 | + assert states[i][1] == pytest.approx(prev_gsp[i]), f"agent {i} self_prev_gsp" |
| 88 | + |
| 89 | + |
| 90 | +def test_make_gsp_states_broadcast_others_in_order(): |
| 91 | + """After the self-pair, the remaining entries are other agents in ascending id order (skipping self).""" |
| 92 | + agent = make_agent(n_agents=4) |
| 93 | + prox = [0.10, 0.20, 0.30, 0.40] |
| 94 | + prev_gsp = [0.01, 0.02, 0.03, 0.04] |
| 95 | + states = agent.make_gsp_states_broadcast(prox, prev_gsp) |
| 96 | + # Agent 0: self=0, others=[1, 2, 3] |
| 97 | + assert list(states[0]) == pytest.approx([0.10, 0.01, 0.20, 0.02, 0.30, 0.03, 0.40, 0.04]) |
| 98 | + # Agent 2: self=2, others=[0, 1, 3] |
| 99 | + assert list(states[2]) == pytest.approx([0.30, 0.03, 0.10, 0.01, 0.20, 0.02, 0.40, 0.04]) |
| 100 | + # Agent 3: self=3, others=[0, 1, 2] |
| 101 | + assert list(states[3]) == pytest.approx([0.40, 0.04, 0.10, 0.01, 0.20, 0.02, 0.30, 0.03]) |
| 102 | + |
| 103 | + |
| 104 | +def test_broadcast_is_mutually_exclusive_with_neighbors(): |
| 105 | + """Can't have both neighbors=True and broadcast=True; they overload gsp_input_size.""" |
| 106 | + with pytest.raises((ValueError, AssertionError)): |
| 107 | + Agent( |
| 108 | + config=BASE_CONFIG, |
| 109 | + network="DDQN", n_agents=4, n_obs=8, n_actions=4, |
| 110 | + options_per_action=3, id=0, min_max_action=1.0, meta_param_size=1, |
| 111 | + gsp=True, recurrent=False, attention=False, |
| 112 | + neighbors=True, broadcast=True, |
| 113 | + gsp_input_size=4, gsp_output_size=1, |
| 114 | + gsp_min_max_action=1.0, gsp_look_back=2, gsp_sequence_length=5, |
| 115 | + ) |
| 116 | + |
| 117 | + |
| 118 | +def test_plain_gsp_without_broadcast_unchanged(): |
| 119 | + """Plain GSP (neighbors=False, broadcast=False) keeps the legacy input size.""" |
| 120 | + agent = make_agent(broadcast=False) |
| 121 | + # Should fall through to the config-provided gsp_input_size=4 |
| 122 | + assert agent.gsp_network_input == 4 |
| 123 | + assert agent.gsp_broadcast is False |
0 commit comments