Andrea-Fox
diff --git a/‎visualization/plot_boxplot_initial_constraint.py‎
Lines changed: 1 addition & 1 deletion b/‎visualization/plot_boxplot_initial_constraint.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎visualization/plot_comparison_non_linear_reward.py‎
Lines changed: 134 additions & 0 deletions b/‎visualization/plot_comparison_non_linear_reward.py‎
Lines changed: 134 additions & 0 deletions
diff --git a/‎visualization/plot_comparison_offloading_action.py‎
Lines changed: 186 additions & 0 deletions b/‎visualization/plot_comparison_offloading_action.py‎
Lines changed: 186 additions & 0 deletions
@@ -79,4 +79,4 @@
 
 plt.show()
 
-    
+    
@@ -0,0 +1,134 @@
+# in this plot we need to collect all the data from the systems and compare the final average reward
+# all the data is normalized, so that 1 is the value  obtained by D3C-QL with constraint = 0
+
+
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import pickle as pk
+from scipy.stats import sem
+import copy
+import matplotlib.patches as mpatches
+import pandas as pd
+
+plt.rcParams.update({'font.size': 17})  # sets default font size everywhere
+
+
+N_agents = 10 #, 50]
+convexity_exponents_list = [0.5, 1, 1.5, 2] #, 2]  # convexity exponents to consider
+
+initial_folder = os.getcwd()
+
+final_data_RL = []
+final_data_IQL_individual = []
+final_data_IQL_common = []
+final_data_A2C = []
+final_data_MAPPO = []
+
+MAX_agents_A2C = 10  # Maximum number of agents for which A2C data is available
+
+for convexity_exponent in convexity_exponents_list:
+    print(N_agents)
+
+    # data_D3CQL = []
+    # data_IQL = []
+    # data_A2C = []
+
+    if convexity_exponent == 1:
+        folder = initial_folder + "/results/{}_agents/".format(N_agents)
+    else:
+        folder = initial_folder + "/results/{}_agents_{}_penalty_exponent/".format(N_agents, convexity_exponent)
+
+    with open(folder + "results_RL_0_initial_constraint.pkl", "rb") as f:
+        data_D3CQL = pk.load(f)
+
+    if convexity_exponent == 1:
+        with open(folder + "results_IQL.pkl", "rb") as f:
+            data_IQL = pk.load(f)
+    else:
+        with open(folder + "results_IQL_individual.pkl", "rb") as f:
+            data_IQL = pk.load(f)
+
+    df_MAPPO = pd.read_csv(folder + 'results_MAPPO.csv')
+    # turn MAPPO data into suitable data
+    df_MAPPO = df_MAPPO.drop(columns=['Step'])
+    df_MAPPO = df_MAPPO.iloc[:200, ::3]
+    print(df_MAPPO)
+
+    for i in range(len(df_MAPPO.columns)) : #-1, -1, -1):
+        print(i)
+        df_MAPPO.rename(columns={df_MAPPO.columns[i]: 'Experiment {}'.format(len(df_MAPPO.columns) - i)}, inplace=True)
+    df_MAPPO = df_MAPPO * (1 - 0.95**100) / (1 - 0.95) * N_agents
+    print(df_MAPPO)
+
+
+
+    data_RL = [data_D3CQL[experiment][1][0] for experiment in range(len(data_D3CQL))]
+    normalization_value = [data[0] for data in data_RL] 
+    final_data_RL.append([data_RL[i][-1]/normalization_value[i] for i in range(len(data_RL))])
+
+    data_IQL_individual = [data_IQL[experiment][1][1] for experiment in range(len(data_IQL))]
+    final_data_IQL_individual.append([data_IQL_individual[i][-2]/normalization_value[i] for i in range(len(data_IQL_individual))])
+
+
+
+    data_MAPPO = []
+    for experiment in range(15):
+        # Normalize MAPPO data
+        data_MAPPO_experiment = list(df_MAPPO['Experiment {}'.format(experiment + 1)])
+        data_MAPPO.append(data_MAPPO_experiment)
+
+    final_data_MAPPO.append([data_MAPPO[i][-1]/normalization_value[i] for i in range(len(data_MAPPO))])
+    # data_MAPPO_experiment = list(df_MAPPO['Experiment {}'.format(experiment + 1)])
+    
+    # print(data_A2C)
+
+    print(final_data_RL)
+    # print(final_data_IQL_individual)
+    # print(final_data_MAPPO)
+
+    
+
+# now we need to make the boxplot: we need to have for each value of N_agents a list of the final average reward for each method
+full_data = False  # If True, include A2C and IQL-C, otherwise only D3C-QL, IQL-I and MAPPO
+if full_data:
+    methods = ['D3C-QL', 'IQL-I', 'IQL-C', 'A2C', 'MAPPO']
+    data_to_plot = [final_data_RL, final_data_IQL_individual, final_data_IQL_common, final_data_A2C, final_data_MAPPO]
+    colors = ['lightblue', 'lightgreen', 'lightcoral', '#713969', 'orange'] #, 'lightyellow', 'lightpink', 'lightcyan', 'lightgoldenrodyellow']
+else:
+    methods = ['D3C-QL', 'IQL-I', 'MAPPO']
+    data_to_plot = [final_data_RL, final_data_IQL_individual, final_data_MAPPO]
+    colors = ['lightblue', 'lightgreen', 'orange']  #, 'lightcoral', 'lightyellow', 'lightpink', 'lightcyan', 'lightgoldenrodyellow']
+
+if full_data:
+    plt.figure(figsize=(15, 6))
+else:
+    plt.figure(figsize=(10, 5))
+for m_idx, method in enumerate(methods):
+    # For each method, collect data for all N_agents
+    method_data = [data_to_plot[m_idx][convexity_exponents_list.index(convexity_exponent)] for convexity_exponent in convexity_exponents_list]
+
+    width = (0.15 if full_data else 0.2)  # Width of the boxplot
+    shift = (1 if full_data else 0)  # Shift for the boxplot positions
+    positions = [i + (m_idx-1 - shift) * width for i in range(len(convexity_exponents_list))]
+    bp = plt.boxplot(method_data, positions=positions, widths=width, patch_artist=True, showfliers=False)
+    for patch in bp['boxes']:
+        patch.set_facecolor(colors[m_idx])
+
+legend_handles = [mpatches.Patch(color=colors[i], linewidth=1.5, label=methods[i]) for i in range(len(methods))]
+plt.legend(handles=legend_handles, title="Algorithm", loc="lower center", fontsize='large', frameon=True, edgecolor='black', facecolor='white', ncol = 5)
+
+plt.xlabel('N Agents in the system')
+plt.xticks(range(len(convexity_exponents_list)), convexity_exponents_list)
+plt.ylabel('Final Reward (normalized)')
+plt.xlim(-.5, len(convexity_exponents_list) - 0.5)
+plt.ylim(-.1, 2.1)
+plt.grid(True)
+plt.xticks(rotation=45)
+plt.tight_layout()
+plt.savefig(initial_folder + '/results/scalability_comparison_non_linear_penalty.png', dpi=300, bbox_inches='tight')
+
+plt.show()
+
+
+    
@@ -0,0 +1,186 @@
+# here we compare the usage of the offloading action in D3CQL, IQL and MAPPO
+
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import pickle as pk
+from scipy.stats import sem
+import copy
+import matplotlib.patches as mpatches
+import pandas as pd
+
+N_agents_list = [10] #, 20, 50] #, 50]
+n_experiments = 15
+convexity_exponents = [1]  # convexity exponents to consider
+
+
+list_mean_data_RL = []
+list_mean_data_IQL_individual = []
+list_mean_data_IQL_common = []
+list_mean_data_MAPPO = []
+
+list_std_data_RL = []
+list_std_data_IQL_individual = []
+list_std_data_IQL_common = []
+list_std_data_MAPPO = []
+
+
+initial_folder = os.getcwd()
+
+
+
+for N_agents in N_agents_list:
+
+    folder = initial_folder + "/results/{}_agents/".format(N_agents)
+    final_data_RL = []
+    final_data_IQL_individual = []
+    final_data_IQL_common = []
+    final_data_MAPPO = []
+
+
+    # import data MAPPO
+    data_MAPPO = pd.read_csv(folder + 'frequency_offloading_MAPPO.csv')
+    data_MAPPO = data_MAPPO.drop(columns=['Step'])
+    # choose only the columns whose index is divisible by 12
+    print(data_MAPPO.shape)
+    data_MAPPO = data_MAPPO.iloc[:200, ::3]  # take only the first 200 rows and every third column
+    data_MAPPO = data_MAPPO.iloc[::12, :]
+    data_MAPPO = data_MAPPO.iloc[1:, :]
+    # print(data_MAPPO.shape)
+    average_use_constraint_MAPPO = data_MAPPO[data_MAPPO.columns[0]].values
+    std_use_constraint_MAPPO = data_MAPPO[data_MAPPO.columns[2]].values - data_MAPPO[data_MAPPO.columns[0]].values
+    list_mean_data_MAPPO.append(average_use_constraint_MAPPO)
+    list_std_data_MAPPO.append(std_use_constraint_MAPPO)
+    
+    
+    # print(average_use_constraint_MAPPO, std_use_constraint_MAPPO, len(average_use_constraint_MAPPO))
+    # data D3C
+    data_RL = []
+    with open(folder + "results_RL_0_initial_constraint.pkl", "rb") as f:
+        data_RL = pk.load(f)
+
+    constraint_use_RL = []
+    for experiment in range(n_experiments):
+        constraint_use_RL.append(data_RL[experiment][-2])
+    # print(constraint_use_RL)
+    # print('--------------')
+    average_use_constraint_RL = []
+    for experiment in range(n_experiments):
+        experiment_data = constraint_use_RL[experiment] 
+        average_experiment_data = np.mean(experiment_data, axis=1)
+        # print(experiment_data)
+        print(average_experiment_data, len(average_experiment_data))
+        average_use_constraint_RL.append(average_experiment_data)
+        # raise Exception("Stop here to check the data")
+    # print(average_use_constraint_RL)
+    average_use_constraint_RL = np.array(average_use_constraint_RL)
+    mean_use_constraint_RL = np.mean(average_use_constraint_RL, axis=0)
+    std_use_constraint_RL = np.std(average_use_constraint_RL, axis=0)
+    # print(mean_use_constraint_RL, len(mean_use_constraint_RL))
+    list_mean_data_RL.append(mean_use_constraint_RL)
+    list_std_data_RL.append(std_use_constraint_RL)
+
+    # data IQL
+    with open(folder + "results_IQL_low_LR.pkl", "rb") as f:
+        data_IQL = pk.load(f)
+    constraint_use_IQL = []
+    for experiment in range(n_experiments):
+        constraint_use_IQL.append(data_IQL[experiment][-1])
+    
+    # now, for each experiment we have a datarframe of size (1000, 10), with 10 being the number of agents
+    # we need to create the evaluations for each experiment, by taking the last 100 episodes every 1000
+    interval_evaluations = 1000
+    episodes_to_consider_evaluation = 100
+
+    IQL_data = []
+    for experiment in range(n_experiments):
+        experiment_data = constraint_use_IQL[experiment]
+        # print("Experiment data shape: {}".format(experiment_data.shape))
+        final_experiment_data = []
+        for evaluation in range(0, len(experiment_data) // interval_evaluations):
+            # print("Experiment: {}, Evaluation: {}".format(experiment, evaluation))
+            # print("Interval to consider at evaluation {}: {}:{}".format(evaluation, (evaluation + 1) *(interval_evaluations) - episodes_to_consider_evaluation, (evaluation + 1)*(interval_evaluations)))
+            evaluation_data = experiment_data[((evaluation + 1) * interval_evaluations) - episodes_to_consider_evaluation: (evaluation + 1)*(interval_evaluations)]
+            evaluation_average_data = np.mean(evaluation_data)
+            final_experiment_data.append(evaluation_average_data)
+            # print("Evaluation data shape: {}".format(evaluation_average_data.shape))
+        # print("Final experiment data shape: {}".format(final_experiment_data))
+        IQL_data.append(final_experiment_data)
+
+    # now for each evaluation we take the average and the std
+    IQL_data = np.array(IQL_data)
+    mean_use_constraint_IQL = np.mean(IQL_data, axis=0)
+    std_use_constraint_IQL = np.std(IQL_data, axis=0)
+    list_mean_data_IQL_individual.append(mean_use_constraint_IQL)
+    list_std_data_IQL_individual.append(std_use_constraint_IQL)
+
+    # print(mean_use_constraint_IQL, len(mean_use_constraint_IQL))
+    # print(std_use_constraint_IQL, len(std_use_constraint_IQL))
+
+
+
+# now we plot everything together
+fig, ax = plt.subplots(1, len(N_agents_list), figsize=(10, 4), sharey=True)
+color_list = {'DCC-QL': 'lightblue', 'IQL-I': 'lightgreen', 'MAPPO': 'orange'}
+algorithms_list = ['DCC-QL', 'IQL-I', 'MAPPO']
+linestyles_list = {'DCC-QL': '-', 'IQL-I': '--', 'MAPPO': '-.'}
+
+full_data_D3CQL = False  # if True, we plot the full data of D3CQL, otherwise we plot only the points of the evaluations
+# now we plot the data
+# create a scatter plot with the points of the mean and std, but only the point of the evaluations of the improved constraint
+points_no_noise_evaluation = [0, 3, 6, 9, 12, 15] #, 18, 21, 24, 27, 30]  # indices of the points to consider for the scatter plot
+
+for i in range(len(N_agents_list)):
+
+    mean_use_constraint_RL = list_mean_data_RL[i]
+    std_use_constraint_RL = list_std_data_RL[i]
+    mean_use_constraint_IQL = list_mean_data_IQL_individual[i]
+    std_use_constraint_IQL = list_std_data_IQL_individual[i]
+    average_use_constraint_MAPPO = list_mean_data_MAPPO[i]
+    std_use_constraint_MAPPO = list_std_data_MAPPO[i]
+
+    if not full_data_D3CQL:
+        ax[i].plot(points_no_noise_evaluation, mean_use_constraint_RL[points_no_noise_evaluation], label='DCC-QL', color= color_list['DCC-QL'], linestyle=linestyles_list['DCC-QL']) #, marker='o')
+    # plt.scatter(points_to_consider, mean_use_constraint_RL[points_to_consider], color='blue', s=10)  # scatter plot for points
+    #plt.scatter(np.arange(len(mean_use_constraint_RL)), mean_use_constraint_RL, color='blue', s=10)  # scatter plot for points
+        ax[i].fill_between(points_no_noise_evaluation, 
+                    mean_use_constraint_RL[points_no_noise_evaluation] - std_use_constraint_RL[points_no_noise_evaluation], 
+                    mean_use_constraint_RL[points_no_noise_evaluation] + std_use_constraint_RL[points_no_noise_evaluation], 
+                    color=color_list['DCC-QL'], alpha=0.2)
+    else:
+        ax[i].plot(mean_use_constraint_RL, label='DCC-QL', color=color_list['DCC-QL'])
+        ax[i].scatter(points_no_noise_evaluation, mean_use_constraint_RL[points_no_noise_evaluation], color=color_list['DCC-QL'], s=10)  # scatter plot for points
+        ax[i].fill_between(range(len(mean_use_constraint_RL)), 
+                        mean_use_constraint_RL - std_use_constraint_RL, 
+                        mean_use_constraint_RL + std_use_constraint_RL, 
+                        color=color_list['DCC-QL'], alpha=0.2)
+
+    ax[i].plot(mean_use_constraint_IQL, label='IQL-I', color= color_list['IQL-I'], linestyle=linestyles_list['IQL-I']) #, marker='o')
+    ax[i].fill_between(range(len(mean_use_constraint_IQL)), 
+                    mean_use_constraint_IQL - std_use_constraint_IQL, 
+                    mean_use_constraint_IQL + std_use_constraint_IQL, 
+                    color=color_list['IQL-I'], alpha=0.2)
+
+    ax[i].plot(average_use_constraint_MAPPO, label='MAPPO', color= color_list['MAPPO'], linestyle=linestyles_list['MAPPO']) #, marker='o')
+    ax[i].fill_between(range(len(average_use_constraint_MAPPO)), 
+                    average_use_constraint_MAPPO - std_use_constraint_MAPPO, 
+                    average_use_constraint_MAPPO + std_use_constraint_MAPPO, 
+                    color=color_list['MAPPO'], alpha=0.2)
+
+    # plt.xlabel('Learning steps')
+    ax[i].set_xticks(np.arange(0, len(mean_use_constraint_RL), 1))
+    labels = [r'$0.1 $', r'$0.4 $', r'$0.7 $', r'$1 $', r'$1.3 $', r'$1.6$'] 
+    ax[i].set_xticks(np.arange(0, len(mean_use_constraint_RL), 3), labels)
+    # plt.xtick_labels(labels)
+    ax[i].set_xlabel(r"Learning steps $(\times 10^6)$")
+    ax[i].set_ylabel('Average use of offloading action')
+    ax[i].set_title('{} devices'.format(N_agents_list[i]), fontsize=14)
+    ax[i].grid(which='both', linewidth=0.5, alpha=0.5, color='black')
+handles, labels = ax[0].get_legend_handles_labels()
+fig.legend(handles, labels, loc='upper center', ncol=3, frameon=True, fontsize='large', edgecolor='black', facecolor='white', bbox_to_anchor=(0.5, 1.05), bbox_transform=plt.gcf().transFigure)
+plt.tight_layout(rect=[0, 0, 1, .94])  # leave space for the legend
+# plt.title('IQL MISSING')
+
+plt.savefig(initial_folder + '/results/comparison_offloading_action.png'.format(N_agents), dpi=300, bbox_inches='tight')
+plt.show()           
+
Original file line number	Diff line number	Diff line change
`@@ -79,4 +79,4 @@`
`79`	`79`
`80`	`80`	`plt.show()`
`81`	`81`
`82`		`-`
	`82`	`+`