|
| 1 | +# in this plot we need to collect all the data from the systems and compare the final average reward |
| 2 | +# all the data is normalized, so that 1 is the value obtained by D3C-QL with constraint = 0 |
| 3 | + |
| 4 | + |
| 5 | +import numpy as np |
| 6 | +import matplotlib.pyplot as plt |
| 7 | +import os |
| 8 | +import pickle as pk |
| 9 | +from scipy.stats import sem |
| 10 | +import copy |
| 11 | +import matplotlib.patches as mpatches |
| 12 | + |
| 13 | +plt.rcParams.update({'font.size': 12}) # sets default font size everywhere |
| 14 | + |
| 15 | + |
| 16 | +list_N_agents = [10, 20, 50] |
| 17 | +convexity_exponents = [1] # convexity exponents to consider |
| 18 | + |
| 19 | +initial_folder = os.getcwd() |
| 20 | + |
| 21 | +final_data_RL = [] |
| 22 | +final_data_uniform_constraint = [] |
| 23 | + |
| 24 | + |
| 25 | +for N_agents in list_N_agents: |
| 26 | + print(N_agents) |
| 27 | + |
| 28 | + # data_D3CQL = [] |
| 29 | + # data_IQL = [] |
| 30 | + # data_A2C = [] |
| 31 | + |
| 32 | + folder = initial_folder + "/results/{}_agents/".format(N_agents) |
| 33 | + |
| 34 | + with open(folder + "results_RL_0_initial_constraint.pkl", "rb") as f: |
| 35 | + data_D3CQL = pk.load(f) |
| 36 | + |
| 37 | + with open(folder + "results_RL_uniform_initial.pkl", "rb") as f: |
| 38 | + data_uniform_constraint = pk.load(f) |
| 39 | + |
| 40 | + |
| 41 | + |
| 42 | + data_RL = [data_D3CQL[experiment][1][0] for experiment in range(len(data_D3CQL))] |
| 43 | + normalization_value = [data[0] for data in data_RL] |
| 44 | + final_data_RL.append([data_RL[i][-1]/normalization_value[i] for i in range(len(data_RL))]) |
| 45 | + |
| 46 | + data_uniform_constraint = [data_uniform_constraint[experiment][1][0] for experiment in range(len(data_uniform_constraint))] |
| 47 | + final_data_uniform_constraint.append([data_uniform_constraint[i][-1]/normalization_value[i] for i in range(len(data_uniform_constraint))]) |
| 48 | + |
| 49 | + |
| 50 | + |
| 51 | +# now we need to make the boxplot: we need to have for each value of N_agents a list of the final average reward for each method |
| 52 | +methods = ['DCC-QL (naive initial constraint)', 'DCC-QL (improved initial Constraint)'] |
| 53 | +data_to_plot = [final_data_RL, final_data_uniform_constraint] |
| 54 | + |
| 55 | +colors = ['lightblue', 'blue', 'lightcoral', '#713969'] #, 'lightyellow', 'lightpink', 'lightcyan', 'lightgoldenrodyellow'] |
| 56 | + |
| 57 | +plt.figure(figsize=(10, 6)) |
| 58 | +for m_idx, method in enumerate(methods): |
| 59 | + # For each method, collect data for all N_agents |
| 60 | + method_data = [data_to_plot[m_idx][list_N_agents.index(N)] for N in list_N_agents] |
| 61 | + |
| 62 | + positions = [i + (m_idx-.5) * 0.4 for i in range(len(list_N_agents))] |
| 63 | + bp = plt.boxplot(method_data, positions=positions, widths=0.4, patch_artist=True, showfliers=False) |
| 64 | + for patch in bp['boxes']: |
| 65 | + patch.set_facecolor(colors[m_idx]) |
| 66 | + |
| 67 | +legend_handles = [mpatches.Patch(color=colors[i], linewidth=1.5, label=methods[i]) for i in range(len(methods))] |
| 68 | +plt.legend(handles=legend_handles, title="Algorithm", loc="lower center", fontsize='large', frameon=True, edgecolor='black', facecolor='white', ncol = 4) |
| 69 | + |
| 70 | +plt.xlabel('N Agents in the system') |
| 71 | +plt.xticks(range(len(list_N_agents)), list_N_agents) |
| 72 | +plt.ylabel('Final Average Reward (normalized)') |
| 73 | +plt.xlim(-.5, len(list_N_agents) - 0.5) |
| 74 | +plt.ylim(.5, 1.1) |
| 75 | +plt.grid(True) |
| 76 | +plt.xticks(rotation=45) |
| 77 | +plt.tight_layout() |
| 78 | +plt.savefig(initial_folder + '/results/improved_initial_constraint_boxplot.png', dpi=300, bbox_inches='tight') |
| 79 | + |
| 80 | +plt.show() |
| 81 | + |
| 82 | + |
0 commit comments