|
| 1 | +# here we compare the usage of the offloading action in D3CQL, IQL and MAPPO |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import os |
| 6 | +import pickle as pk |
| 7 | +from scipy.stats import sem |
| 8 | +import copy |
| 9 | +import matplotlib.patches as mpatches |
| 10 | +import pandas as pd |
| 11 | + |
| 12 | +N_agents_list = [10] #, 20, 50] #, 50] |
| 13 | +n_experiments = 15 |
| 14 | +convexity_exponents = [1] # convexity exponents to consider |
| 15 | + |
| 16 | + |
| 17 | +list_mean_data_RL = [] |
| 18 | +list_mean_data_IQL_individual = [] |
| 19 | +list_mean_data_IQL_common = [] |
| 20 | +list_mean_data_MAPPO = [] |
| 21 | + |
| 22 | +list_std_data_RL = [] |
| 23 | +list_std_data_IQL_individual = [] |
| 24 | +list_std_data_IQL_common = [] |
| 25 | +list_std_data_MAPPO = [] |
| 26 | + |
| 27 | + |
| 28 | +initial_folder = os.getcwd() |
| 29 | + |
| 30 | + |
| 31 | + |
| 32 | +for N_agents in N_agents_list: |
| 33 | + |
| 34 | + folder = initial_folder + "/results/{}_agents/".format(N_agents) |
| 35 | + final_data_RL = [] |
| 36 | + final_data_IQL_individual = [] |
| 37 | + final_data_IQL_common = [] |
| 38 | + final_data_MAPPO = [] |
| 39 | + |
| 40 | + |
| 41 | + # import data MAPPO |
| 42 | + data_MAPPO = pd.read_csv(folder + 'frequency_offloading_MAPPO.csv') |
| 43 | + data_MAPPO = data_MAPPO.drop(columns=['Step']) |
| 44 | + # choose only the columns whose index is divisible by 12 |
| 45 | + print(data_MAPPO.shape) |
| 46 | + data_MAPPO = data_MAPPO.iloc[:200, ::3] # take only the first 200 rows and every third column |
| 47 | + data_MAPPO = data_MAPPO.iloc[::12, :] |
| 48 | + data_MAPPO = data_MAPPO.iloc[1:, :] |
| 49 | + # print(data_MAPPO.shape) |
| 50 | + average_use_constraint_MAPPO = data_MAPPO[data_MAPPO.columns[0]].values |
| 51 | + std_use_constraint_MAPPO = data_MAPPO[data_MAPPO.columns[2]].values - data_MAPPO[data_MAPPO.columns[0]].values |
| 52 | + list_mean_data_MAPPO.append(average_use_constraint_MAPPO) |
| 53 | + list_std_data_MAPPO.append(std_use_constraint_MAPPO) |
| 54 | + |
| 55 | + |
| 56 | + # print(average_use_constraint_MAPPO, std_use_constraint_MAPPO, len(average_use_constraint_MAPPO)) |
| 57 | + # data D3C |
| 58 | + data_RL = [] |
| 59 | + with open(folder + "results_RL_0_initial_constraint.pkl", "rb") as f: |
| 60 | + data_RL = pk.load(f) |
| 61 | + |
| 62 | + constraint_use_RL = [] |
| 63 | + for experiment in range(n_experiments): |
| 64 | + constraint_use_RL.append(data_RL[experiment][-2]) |
| 65 | + # print(constraint_use_RL) |
| 66 | + # print('--------------') |
| 67 | + average_use_constraint_RL = [] |
| 68 | + for experiment in range(n_experiments): |
| 69 | + experiment_data = constraint_use_RL[experiment] |
| 70 | + average_experiment_data = np.mean(experiment_data, axis=1) |
| 71 | + # print(experiment_data) |
| 72 | + print(average_experiment_data, len(average_experiment_data)) |
| 73 | + average_use_constraint_RL.append(average_experiment_data) |
| 74 | + # raise Exception("Stop here to check the data") |
| 75 | + # print(average_use_constraint_RL) |
| 76 | + average_use_constraint_RL = np.array(average_use_constraint_RL) |
| 77 | + mean_use_constraint_RL = np.mean(average_use_constraint_RL, axis=0) |
| 78 | + std_use_constraint_RL = np.std(average_use_constraint_RL, axis=0) |
| 79 | + # print(mean_use_constraint_RL, len(mean_use_constraint_RL)) |
| 80 | + list_mean_data_RL.append(mean_use_constraint_RL) |
| 81 | + list_std_data_RL.append(std_use_constraint_RL) |
| 82 | + |
| 83 | + # data IQL |
| 84 | + with open(folder + "results_IQL_low_LR.pkl", "rb") as f: |
| 85 | + data_IQL = pk.load(f) |
| 86 | + constraint_use_IQL = [] |
| 87 | + for experiment in range(n_experiments): |
| 88 | + constraint_use_IQL.append(data_IQL[experiment][-1]) |
| 89 | + |
| 90 | + # now, for each experiment we have a datarframe of size (1000, 10), with 10 being the number of agents |
| 91 | + # we need to create the evaluations for each experiment, by taking the last 100 episodes every 1000 |
| 92 | + interval_evaluations = 1000 |
| 93 | + episodes_to_consider_evaluation = 100 |
| 94 | + |
| 95 | + IQL_data = [] |
| 96 | + for experiment in range(n_experiments): |
| 97 | + experiment_data = constraint_use_IQL[experiment] |
| 98 | + # print("Experiment data shape: {}".format(experiment_data.shape)) |
| 99 | + final_experiment_data = [] |
| 100 | + for evaluation in range(0, len(experiment_data) // interval_evaluations): |
| 101 | + # print("Experiment: {}, Evaluation: {}".format(experiment, evaluation)) |
| 102 | + # print("Interval to consider at evaluation {}: {}:{}".format(evaluation, (evaluation + 1) *(interval_evaluations) - episodes_to_consider_evaluation, (evaluation + 1)*(interval_evaluations))) |
| 103 | + evaluation_data = experiment_data[((evaluation + 1) * interval_evaluations) - episodes_to_consider_evaluation: (evaluation + 1)*(interval_evaluations)] |
| 104 | + evaluation_average_data = np.mean(evaluation_data) |
| 105 | + final_experiment_data.append(evaluation_average_data) |
| 106 | + # print("Evaluation data shape: {}".format(evaluation_average_data.shape)) |
| 107 | + # print("Final experiment data shape: {}".format(final_experiment_data)) |
| 108 | + IQL_data.append(final_experiment_data) |
| 109 | + |
| 110 | + # now for each evaluation we take the average and the std |
| 111 | + IQL_data = np.array(IQL_data) |
| 112 | + mean_use_constraint_IQL = np.mean(IQL_data, axis=0) |
| 113 | + std_use_constraint_IQL = np.std(IQL_data, axis=0) |
| 114 | + list_mean_data_IQL_individual.append(mean_use_constraint_IQL) |
| 115 | + list_std_data_IQL_individual.append(std_use_constraint_IQL) |
| 116 | + |
| 117 | + # print(mean_use_constraint_IQL, len(mean_use_constraint_IQL)) |
| 118 | + # print(std_use_constraint_IQL, len(std_use_constraint_IQL)) |
| 119 | + |
| 120 | + |
| 121 | + |
| 122 | +# now we plot everything together |
| 123 | +fig, ax = plt.subplots(1, len(N_agents_list), figsize=(10, 4), sharey=True) |
| 124 | +color_list = {'DCC-QL': 'lightblue', 'IQL-I': 'lightgreen', 'MAPPO': 'orange'} |
| 125 | +algorithms_list = ['DCC-QL', 'IQL-I', 'MAPPO'] |
| 126 | +linestyles_list = {'DCC-QL': '-', 'IQL-I': '--', 'MAPPO': '-.'} |
| 127 | + |
| 128 | +full_data_D3CQL = False # if True, we plot the full data of D3CQL, otherwise we plot only the points of the evaluations |
| 129 | +# now we plot the data |
| 130 | +# create a scatter plot with the points of the mean and std, but only the point of the evaluations of the improved constraint |
| 131 | +points_no_noise_evaluation = [0, 3, 6, 9, 12, 15] #, 18, 21, 24, 27, 30] # indices of the points to consider for the scatter plot |
| 132 | + |
| 133 | +for i in range(len(N_agents_list)): |
| 134 | + |
| 135 | + mean_use_constraint_RL = list_mean_data_RL[i] |
| 136 | + std_use_constraint_RL = list_std_data_RL[i] |
| 137 | + mean_use_constraint_IQL = list_mean_data_IQL_individual[i] |
| 138 | + std_use_constraint_IQL = list_std_data_IQL_individual[i] |
| 139 | + average_use_constraint_MAPPO = list_mean_data_MAPPO[i] |
| 140 | + std_use_constraint_MAPPO = list_std_data_MAPPO[i] |
| 141 | + |
| 142 | + if not full_data_D3CQL: |
| 143 | + ax[i].plot(points_no_noise_evaluation, mean_use_constraint_RL[points_no_noise_evaluation], label='DCC-QL', color= color_list['DCC-QL'], linestyle=linestyles_list['DCC-QL']) #, marker='o') |
| 144 | + # plt.scatter(points_to_consider, mean_use_constraint_RL[points_to_consider], color='blue', s=10) # scatter plot for points |
| 145 | + #plt.scatter(np.arange(len(mean_use_constraint_RL)), mean_use_constraint_RL, color='blue', s=10) # scatter plot for points |
| 146 | + ax[i].fill_between(points_no_noise_evaluation, |
| 147 | + mean_use_constraint_RL[points_no_noise_evaluation] - std_use_constraint_RL[points_no_noise_evaluation], |
| 148 | + mean_use_constraint_RL[points_no_noise_evaluation] + std_use_constraint_RL[points_no_noise_evaluation], |
| 149 | + color=color_list['DCC-QL'], alpha=0.2) |
| 150 | + else: |
| 151 | + ax[i].plot(mean_use_constraint_RL, label='DCC-QL', color=color_list['DCC-QL']) |
| 152 | + ax[i].scatter(points_no_noise_evaluation, mean_use_constraint_RL[points_no_noise_evaluation], color=color_list['DCC-QL'], s=10) # scatter plot for points |
| 153 | + ax[i].fill_between(range(len(mean_use_constraint_RL)), |
| 154 | + mean_use_constraint_RL - std_use_constraint_RL, |
| 155 | + mean_use_constraint_RL + std_use_constraint_RL, |
| 156 | + color=color_list['DCC-QL'], alpha=0.2) |
| 157 | + |
| 158 | + ax[i].plot(mean_use_constraint_IQL, label='IQL-I', color= color_list['IQL-I'], linestyle=linestyles_list['IQL-I']) #, marker='o') |
| 159 | + ax[i].fill_between(range(len(mean_use_constraint_IQL)), |
| 160 | + mean_use_constraint_IQL - std_use_constraint_IQL, |
| 161 | + mean_use_constraint_IQL + std_use_constraint_IQL, |
| 162 | + color=color_list['IQL-I'], alpha=0.2) |
| 163 | + |
| 164 | + ax[i].plot(average_use_constraint_MAPPO, label='MAPPO', color= color_list['MAPPO'], linestyle=linestyles_list['MAPPO']) #, marker='o') |
| 165 | + ax[i].fill_between(range(len(average_use_constraint_MAPPO)), |
| 166 | + average_use_constraint_MAPPO - std_use_constraint_MAPPO, |
| 167 | + average_use_constraint_MAPPO + std_use_constraint_MAPPO, |
| 168 | + color=color_list['MAPPO'], alpha=0.2) |
| 169 | + |
| 170 | + # plt.xlabel('Learning steps') |
| 171 | + ax[i].set_xticks(np.arange(0, len(mean_use_constraint_RL), 1)) |
| 172 | + labels = [r'$0.1 $', r'$0.4 $', r'$0.7 $', r'$1 $', r'$1.3 $', r'$1.6$'] |
| 173 | + ax[i].set_xticks(np.arange(0, len(mean_use_constraint_RL), 3), labels) |
| 174 | + # plt.xtick_labels(labels) |
| 175 | + ax[i].set_xlabel(r"Learning steps $(\times 10^6)$") |
| 176 | + ax[i].set_ylabel('Average use of offloading action') |
| 177 | + ax[i].set_title('{} devices'.format(N_agents_list[i]), fontsize=14) |
| 178 | + ax[i].grid(which='both', linewidth=0.5, alpha=0.5, color='black') |
| 179 | +handles, labels = ax[0].get_legend_handles_labels() |
| 180 | +fig.legend(handles, labels, loc='upper center', ncol=3, frameon=True, fontsize='large', edgecolor='black', facecolor='white', bbox_to_anchor=(0.5, 1.05), bbox_transform=plt.gcf().transFigure) |
| 181 | +plt.tight_layout(rect=[0, 0, 1, .94]) # leave space for the legend |
| 182 | +# plt.title('IQL MISSING') |
| 183 | + |
| 184 | +plt.savefig(initial_folder + '/results/comparison_offloading_action.png'.format(N_agents), dpi=300, bbox_inches='tight') |
| 185 | +plt.show() |
| 186 | + |
0 commit comments