Create plot_boxplot_initial_constraint.py

Andrea-Fox · web-flow · commit ac79400be0fe · 2025-10-23T11:16:22.000+02:00
diff --git a/visualization/plot_boxplot_initial_constraint.py b/visualization/plot_boxplot_initial_constraint.py
@@ -0,0 +1,82 @@
+# in this plot we need to collect all the data from the systems and compare the final average reward
+# all the data is normalized, so that 1 is the value  obtained by D3C-QL with constraint = 0
+
+
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import pickle as pk
+from scipy.stats import sem
+import copy
+import matplotlib.patches as mpatches
+
+plt.rcParams.update({'font.size': 12})  # sets default font size everywhere
+
+
+list_N_agents = [10, 20, 50]
+convexity_exponents = [1]  # convexity exponents to consider
+
+initial_folder = os.getcwd()
+
+final_data_RL = []
+final_data_uniform_constraint = []
+
+
+for N_agents in list_N_agents:
+    print(N_agents)
+
+    # data_D3CQL = []
+    # data_IQL = []
+    # data_A2C = []
+
+    folder = initial_folder + "/results/{}_agents/".format(N_agents)
+
+    with open(folder + "results_RL_0_initial_constraint.pkl", "rb") as f:
+        data_D3CQL = pk.load(f)
+
+    with open(folder + "results_RL_uniform_initial.pkl", "rb") as f:
+        data_uniform_constraint = pk.load(f)
+
+
+
+    data_RL = [data_D3CQL[experiment][1][0] for experiment in range(len(data_D3CQL))]
+    normalization_value = [data[0] for data in data_RL] 
+    final_data_RL.append([data_RL[i][-1]/normalization_value[i] for i in range(len(data_RL))])
+
+    data_uniform_constraint = [data_uniform_constraint[experiment][1][0] for experiment in range(len(data_uniform_constraint))]
+    final_data_uniform_constraint.append([data_uniform_constraint[i][-1]/normalization_value[i] for i in range(len(data_uniform_constraint))])
+
+       
+
+# now we need to make the boxplot: we need to have for each value of N_agents a list of the final average reward for each method
+methods = ['DCC-QL (naive initial constraint)', 'DCC-QL (improved initial Constraint)']
+data_to_plot = [final_data_RL, final_data_uniform_constraint]
+
+colors = ['lightblue', 'blue', 'lightcoral', '#713969'] #, 'lightyellow', 'lightpink', 'lightcyan', 'lightgoldenrodyellow']
+
+plt.figure(figsize=(10, 6))
+for m_idx, method in enumerate(methods):
+    # For each method, collect data for all N_agents
+    method_data = [data_to_plot[m_idx][list_N_agents.index(N)] for N in list_N_agents]
+
+    positions = [i + (m_idx-.5) * 0.4 for i in range(len(list_N_agents))]
+    bp = plt.boxplot(method_data, positions=positions, widths=0.4, patch_artist=True, showfliers=False)
+    for patch in bp['boxes']:
+        patch.set_facecolor(colors[m_idx])
+
+legend_handles = [mpatches.Patch(color=colors[i], linewidth=1.5, label=methods[i]) for i in range(len(methods))]
+plt.legend(handles=legend_handles, title="Algorithm", loc="lower center", fontsize='large', frameon=True, edgecolor='black', facecolor='white', ncol = 4)
+
+plt.xlabel('N Agents in the system')
+plt.xticks(range(len(list_N_agents)), list_N_agents)
+plt.ylabel('Final Average Reward (normalized)')
+plt.xlim(-.5, len(list_N_agents) - 0.5)
+plt.ylim(.5, 1.1)
+plt.grid(True)
+plt.xticks(rotation=45)
+plt.tight_layout()
+plt.savefig(initial_folder + '/results/improved_initial_constraint_boxplot.png', dpi=300, bbox_inches='tight')
+
+plt.show()
+
+