Skip to content

Commit 4199efe

Browse files
authored
Updated files for data visualization
1 parent ac79400 commit 4199efe

12 files changed

Lines changed: 1696 additions & 1 deletion

visualization/plot_boxplot_initial_constraint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,4 @@
7979

8080
plt.show()
8181

82-
82+
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# in this plot we need to collect all the data from the systems and compare the final average reward
2+
# all the data is normalized, so that 1 is the value obtained by D3C-QL with constraint = 0
3+
4+
5+
import numpy as np
6+
import matplotlib.pyplot as plt
7+
import os
8+
import pickle as pk
9+
from scipy.stats import sem
10+
import copy
11+
import matplotlib.patches as mpatches
12+
import pandas as pd
13+
14+
plt.rcParams.update({'font.size': 17}) # sets default font size everywhere
15+
16+
17+
N_agents = 10 #, 50]
18+
convexity_exponents_list = [0.5, 1, 1.5, 2] #, 2] # convexity exponents to consider
19+
20+
initial_folder = os.getcwd()
21+
22+
final_data_RL = []
23+
final_data_IQL_individual = []
24+
final_data_IQL_common = []
25+
final_data_A2C = []
26+
final_data_MAPPO = []
27+
28+
MAX_agents_A2C = 10 # Maximum number of agents for which A2C data is available
29+
30+
for convexity_exponent in convexity_exponents_list:
31+
print(N_agents)
32+
33+
# data_D3CQL = []
34+
# data_IQL = []
35+
# data_A2C = []
36+
37+
if convexity_exponent == 1:
38+
folder = initial_folder + "/results/{}_agents/".format(N_agents)
39+
else:
40+
folder = initial_folder + "/results/{}_agents_{}_penalty_exponent/".format(N_agents, convexity_exponent)
41+
42+
with open(folder + "results_RL_0_initial_constraint.pkl", "rb") as f:
43+
data_D3CQL = pk.load(f)
44+
45+
if convexity_exponent == 1:
46+
with open(folder + "results_IQL.pkl", "rb") as f:
47+
data_IQL = pk.load(f)
48+
else:
49+
with open(folder + "results_IQL_individual.pkl", "rb") as f:
50+
data_IQL = pk.load(f)
51+
52+
df_MAPPO = pd.read_csv(folder + 'results_MAPPO.csv')
53+
# turn MAPPO data into suitable data
54+
df_MAPPO = df_MAPPO.drop(columns=['Step'])
55+
df_MAPPO = df_MAPPO.iloc[:200, ::3]
56+
print(df_MAPPO)
57+
58+
for i in range(len(df_MAPPO.columns)) : #-1, -1, -1):
59+
print(i)
60+
df_MAPPO.rename(columns={df_MAPPO.columns[i]: 'Experiment {}'.format(len(df_MAPPO.columns) - i)}, inplace=True)
61+
df_MAPPO = df_MAPPO * (1 - 0.95**100) / (1 - 0.95) * N_agents
62+
print(df_MAPPO)
63+
64+
65+
66+
data_RL = [data_D3CQL[experiment][1][0] for experiment in range(len(data_D3CQL))]
67+
normalization_value = [data[0] for data in data_RL]
68+
final_data_RL.append([data_RL[i][-1]/normalization_value[i] for i in range(len(data_RL))])
69+
70+
data_IQL_individual = [data_IQL[experiment][1][1] for experiment in range(len(data_IQL))]
71+
final_data_IQL_individual.append([data_IQL_individual[i][-2]/normalization_value[i] for i in range(len(data_IQL_individual))])
72+
73+
74+
75+
data_MAPPO = []
76+
for experiment in range(15):
77+
# Normalize MAPPO data
78+
data_MAPPO_experiment = list(df_MAPPO['Experiment {}'.format(experiment + 1)])
79+
data_MAPPO.append(data_MAPPO_experiment)
80+
81+
final_data_MAPPO.append([data_MAPPO[i][-1]/normalization_value[i] for i in range(len(data_MAPPO))])
82+
# data_MAPPO_experiment = list(df_MAPPO['Experiment {}'.format(experiment + 1)])
83+
84+
# print(data_A2C)
85+
86+
print(final_data_RL)
87+
# print(final_data_IQL_individual)
88+
# print(final_data_MAPPO)
89+
90+
91+
92+
# now we need to make the boxplot: we need to have for each value of N_agents a list of the final average reward for each method
93+
full_data = False # If True, include A2C and IQL-C, otherwise only D3C-QL, IQL-I and MAPPO
94+
if full_data:
95+
methods = ['D3C-QL', 'IQL-I', 'IQL-C', 'A2C', 'MAPPO']
96+
data_to_plot = [final_data_RL, final_data_IQL_individual, final_data_IQL_common, final_data_A2C, final_data_MAPPO]
97+
colors = ['lightblue', 'lightgreen', 'lightcoral', '#713969', 'orange'] #, 'lightyellow', 'lightpink', 'lightcyan', 'lightgoldenrodyellow']
98+
else:
99+
methods = ['D3C-QL', 'IQL-I', 'MAPPO']
100+
data_to_plot = [final_data_RL, final_data_IQL_individual, final_data_MAPPO]
101+
colors = ['lightblue', 'lightgreen', 'orange'] #, 'lightcoral', 'lightyellow', 'lightpink', 'lightcyan', 'lightgoldenrodyellow']
102+
103+
if full_data:
104+
plt.figure(figsize=(15, 6))
105+
else:
106+
plt.figure(figsize=(10, 5))
107+
for m_idx, method in enumerate(methods):
108+
# For each method, collect data for all N_agents
109+
method_data = [data_to_plot[m_idx][convexity_exponents_list.index(convexity_exponent)] for convexity_exponent in convexity_exponents_list]
110+
111+
width = (0.15 if full_data else 0.2) # Width of the boxplot
112+
shift = (1 if full_data else 0) # Shift for the boxplot positions
113+
positions = [i + (m_idx-1 - shift) * width for i in range(len(convexity_exponents_list))]
114+
bp = plt.boxplot(method_data, positions=positions, widths=width, patch_artist=True, showfliers=False)
115+
for patch in bp['boxes']:
116+
patch.set_facecolor(colors[m_idx])
117+
118+
legend_handles = [mpatches.Patch(color=colors[i], linewidth=1.5, label=methods[i]) for i in range(len(methods))]
119+
plt.legend(handles=legend_handles, title="Algorithm", loc="lower center", fontsize='large', frameon=True, edgecolor='black', facecolor='white', ncol = 5)
120+
121+
plt.xlabel('N Agents in the system')
122+
plt.xticks(range(len(convexity_exponents_list)), convexity_exponents_list)
123+
plt.ylabel('Final Reward (normalized)')
124+
plt.xlim(-.5, len(convexity_exponents_list) - 0.5)
125+
plt.ylim(-.1, 2.1)
126+
plt.grid(True)
127+
plt.xticks(rotation=45)
128+
plt.tight_layout()
129+
plt.savefig(initial_folder + '/results/scalability_comparison_non_linear_penalty.png', dpi=300, bbox_inches='tight')
130+
131+
plt.show()
132+
133+
134+
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
# here we compare the usage of the offloading action in D3CQL, IQL and MAPPO
2+
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
import os
6+
import pickle as pk
7+
from scipy.stats import sem
8+
import copy
9+
import matplotlib.patches as mpatches
10+
import pandas as pd
11+
12+
N_agents_list = [10] #, 20, 50] #, 50]
13+
n_experiments = 15
14+
convexity_exponents = [1] # convexity exponents to consider
15+
16+
17+
list_mean_data_RL = []
18+
list_mean_data_IQL_individual = []
19+
list_mean_data_IQL_common = []
20+
list_mean_data_MAPPO = []
21+
22+
list_std_data_RL = []
23+
list_std_data_IQL_individual = []
24+
list_std_data_IQL_common = []
25+
list_std_data_MAPPO = []
26+
27+
28+
initial_folder = os.getcwd()
29+
30+
31+
32+
for N_agents in N_agents_list:
33+
34+
folder = initial_folder + "/results/{}_agents/".format(N_agents)
35+
final_data_RL = []
36+
final_data_IQL_individual = []
37+
final_data_IQL_common = []
38+
final_data_MAPPO = []
39+
40+
41+
# import data MAPPO
42+
data_MAPPO = pd.read_csv(folder + 'frequency_offloading_MAPPO.csv')
43+
data_MAPPO = data_MAPPO.drop(columns=['Step'])
44+
# choose only the columns whose index is divisible by 12
45+
print(data_MAPPO.shape)
46+
data_MAPPO = data_MAPPO.iloc[:200, ::3] # take only the first 200 rows and every third column
47+
data_MAPPO = data_MAPPO.iloc[::12, :]
48+
data_MAPPO = data_MAPPO.iloc[1:, :]
49+
# print(data_MAPPO.shape)
50+
average_use_constraint_MAPPO = data_MAPPO[data_MAPPO.columns[0]].values
51+
std_use_constraint_MAPPO = data_MAPPO[data_MAPPO.columns[2]].values - data_MAPPO[data_MAPPO.columns[0]].values
52+
list_mean_data_MAPPO.append(average_use_constraint_MAPPO)
53+
list_std_data_MAPPO.append(std_use_constraint_MAPPO)
54+
55+
56+
# print(average_use_constraint_MAPPO, std_use_constraint_MAPPO, len(average_use_constraint_MAPPO))
57+
# data D3C
58+
data_RL = []
59+
with open(folder + "results_RL_0_initial_constraint.pkl", "rb") as f:
60+
data_RL = pk.load(f)
61+
62+
constraint_use_RL = []
63+
for experiment in range(n_experiments):
64+
constraint_use_RL.append(data_RL[experiment][-2])
65+
# print(constraint_use_RL)
66+
# print('--------------')
67+
average_use_constraint_RL = []
68+
for experiment in range(n_experiments):
69+
experiment_data = constraint_use_RL[experiment]
70+
average_experiment_data = np.mean(experiment_data, axis=1)
71+
# print(experiment_data)
72+
print(average_experiment_data, len(average_experiment_data))
73+
average_use_constraint_RL.append(average_experiment_data)
74+
# raise Exception("Stop here to check the data")
75+
# print(average_use_constraint_RL)
76+
average_use_constraint_RL = np.array(average_use_constraint_RL)
77+
mean_use_constraint_RL = np.mean(average_use_constraint_RL, axis=0)
78+
std_use_constraint_RL = np.std(average_use_constraint_RL, axis=0)
79+
# print(mean_use_constraint_RL, len(mean_use_constraint_RL))
80+
list_mean_data_RL.append(mean_use_constraint_RL)
81+
list_std_data_RL.append(std_use_constraint_RL)
82+
83+
# data IQL
84+
with open(folder + "results_IQL_low_LR.pkl", "rb") as f:
85+
data_IQL = pk.load(f)
86+
constraint_use_IQL = []
87+
for experiment in range(n_experiments):
88+
constraint_use_IQL.append(data_IQL[experiment][-1])
89+
90+
# now, for each experiment we have a datarframe of size (1000, 10), with 10 being the number of agents
91+
# we need to create the evaluations for each experiment, by taking the last 100 episodes every 1000
92+
interval_evaluations = 1000
93+
episodes_to_consider_evaluation = 100
94+
95+
IQL_data = []
96+
for experiment in range(n_experiments):
97+
experiment_data = constraint_use_IQL[experiment]
98+
# print("Experiment data shape: {}".format(experiment_data.shape))
99+
final_experiment_data = []
100+
for evaluation in range(0, len(experiment_data) // interval_evaluations):
101+
# print("Experiment: {}, Evaluation: {}".format(experiment, evaluation))
102+
# print("Interval to consider at evaluation {}: {}:{}".format(evaluation, (evaluation + 1) *(interval_evaluations) - episodes_to_consider_evaluation, (evaluation + 1)*(interval_evaluations)))
103+
evaluation_data = experiment_data[((evaluation + 1) * interval_evaluations) - episodes_to_consider_evaluation: (evaluation + 1)*(interval_evaluations)]
104+
evaluation_average_data = np.mean(evaluation_data)
105+
final_experiment_data.append(evaluation_average_data)
106+
# print("Evaluation data shape: {}".format(evaluation_average_data.shape))
107+
# print("Final experiment data shape: {}".format(final_experiment_data))
108+
IQL_data.append(final_experiment_data)
109+
110+
# now for each evaluation we take the average and the std
111+
IQL_data = np.array(IQL_data)
112+
mean_use_constraint_IQL = np.mean(IQL_data, axis=0)
113+
std_use_constraint_IQL = np.std(IQL_data, axis=0)
114+
list_mean_data_IQL_individual.append(mean_use_constraint_IQL)
115+
list_std_data_IQL_individual.append(std_use_constraint_IQL)
116+
117+
# print(mean_use_constraint_IQL, len(mean_use_constraint_IQL))
118+
# print(std_use_constraint_IQL, len(std_use_constraint_IQL))
119+
120+
121+
122+
# now we plot everything together
123+
fig, ax = plt.subplots(1, len(N_agents_list), figsize=(10, 4), sharey=True)
124+
color_list = {'DCC-QL': 'lightblue', 'IQL-I': 'lightgreen', 'MAPPO': 'orange'}
125+
algorithms_list = ['DCC-QL', 'IQL-I', 'MAPPO']
126+
linestyles_list = {'DCC-QL': '-', 'IQL-I': '--', 'MAPPO': '-.'}
127+
128+
full_data_D3CQL = False # if True, we plot the full data of D3CQL, otherwise we plot only the points of the evaluations
129+
# now we plot the data
130+
# create a scatter plot with the points of the mean and std, but only the point of the evaluations of the improved constraint
131+
points_no_noise_evaluation = [0, 3, 6, 9, 12, 15] #, 18, 21, 24, 27, 30] # indices of the points to consider for the scatter plot
132+
133+
for i in range(len(N_agents_list)):
134+
135+
mean_use_constraint_RL = list_mean_data_RL[i]
136+
std_use_constraint_RL = list_std_data_RL[i]
137+
mean_use_constraint_IQL = list_mean_data_IQL_individual[i]
138+
std_use_constraint_IQL = list_std_data_IQL_individual[i]
139+
average_use_constraint_MAPPO = list_mean_data_MAPPO[i]
140+
std_use_constraint_MAPPO = list_std_data_MAPPO[i]
141+
142+
if not full_data_D3CQL:
143+
ax[i].plot(points_no_noise_evaluation, mean_use_constraint_RL[points_no_noise_evaluation], label='DCC-QL', color= color_list['DCC-QL'], linestyle=linestyles_list['DCC-QL']) #, marker='o')
144+
# plt.scatter(points_to_consider, mean_use_constraint_RL[points_to_consider], color='blue', s=10) # scatter plot for points
145+
#plt.scatter(np.arange(len(mean_use_constraint_RL)), mean_use_constraint_RL, color='blue', s=10) # scatter plot for points
146+
ax[i].fill_between(points_no_noise_evaluation,
147+
mean_use_constraint_RL[points_no_noise_evaluation] - std_use_constraint_RL[points_no_noise_evaluation],
148+
mean_use_constraint_RL[points_no_noise_evaluation] + std_use_constraint_RL[points_no_noise_evaluation],
149+
color=color_list['DCC-QL'], alpha=0.2)
150+
else:
151+
ax[i].plot(mean_use_constraint_RL, label='DCC-QL', color=color_list['DCC-QL'])
152+
ax[i].scatter(points_no_noise_evaluation, mean_use_constraint_RL[points_no_noise_evaluation], color=color_list['DCC-QL'], s=10) # scatter plot for points
153+
ax[i].fill_between(range(len(mean_use_constraint_RL)),
154+
mean_use_constraint_RL - std_use_constraint_RL,
155+
mean_use_constraint_RL + std_use_constraint_RL,
156+
color=color_list['DCC-QL'], alpha=0.2)
157+
158+
ax[i].plot(mean_use_constraint_IQL, label='IQL-I', color= color_list['IQL-I'], linestyle=linestyles_list['IQL-I']) #, marker='o')
159+
ax[i].fill_between(range(len(mean_use_constraint_IQL)),
160+
mean_use_constraint_IQL - std_use_constraint_IQL,
161+
mean_use_constraint_IQL + std_use_constraint_IQL,
162+
color=color_list['IQL-I'], alpha=0.2)
163+
164+
ax[i].plot(average_use_constraint_MAPPO, label='MAPPO', color= color_list['MAPPO'], linestyle=linestyles_list['MAPPO']) #, marker='o')
165+
ax[i].fill_between(range(len(average_use_constraint_MAPPO)),
166+
average_use_constraint_MAPPO - std_use_constraint_MAPPO,
167+
average_use_constraint_MAPPO + std_use_constraint_MAPPO,
168+
color=color_list['MAPPO'], alpha=0.2)
169+
170+
# plt.xlabel('Learning steps')
171+
ax[i].set_xticks(np.arange(0, len(mean_use_constraint_RL), 1))
172+
labels = [r'$0.1 $', r'$0.4 $', r'$0.7 $', r'$1 $', r'$1.3 $', r'$1.6$']
173+
ax[i].set_xticks(np.arange(0, len(mean_use_constraint_RL), 3), labels)
174+
# plt.xtick_labels(labels)
175+
ax[i].set_xlabel(r"Learning steps $(\times 10^6)$")
176+
ax[i].set_ylabel('Average use of offloading action')
177+
ax[i].set_title('{} devices'.format(N_agents_list[i]), fontsize=14)
178+
ax[i].grid(which='both', linewidth=0.5, alpha=0.5, color='black')
179+
handles, labels = ax[0].get_legend_handles_labels()
180+
fig.legend(handles, labels, loc='upper center', ncol=3, frameon=True, fontsize='large', edgecolor='black', facecolor='white', bbox_to_anchor=(0.5, 1.05), bbox_transform=plt.gcf().transFigure)
181+
plt.tight_layout(rect=[0, 0, 1, .94]) # leave space for the legend
182+
# plt.title('IQL MISSING')
183+
184+
plt.savefig(initial_folder + '/results/comparison_offloading_action.png'.format(N_agents), dpi=300, bbox_inches='tight')
185+
plt.show()
186+

0 commit comments

Comments
 (0)