HumanCompatibleAI · AdamGleave · Oct 10, 2023 · Jan 4, 2023 · Jan 10, 2023 · Jan 26, 2023
diff --git a/setup.py b/setup.py
@@ -208,6 +208,7 @@ def get_local_version(version: "ScmVersion", time_format="%Y%m%d") -> str:
         "sacred>=0.8.4",
         "tensorboard>=1.14",
         "huggingface_sb3>=2.2.1",
+        "optuna>=3.0.1",
     ],
     tests_require=TESTS_REQUIRE,
     extras_require={

diff --git a/src/imitation/scripts/analyze.py b/src/imitation/scripts/analyze.py
@@ -166,6 +166,9 @@ def _get_algo_name(sd: sacred_util.SacredDicts) -> str:
 
 def _return_summaries(sd: sacred_util.SacredDicts) -> dict:
     imit_stats = get(sd.run, "result.imit_stats")
+    if imit_stats is None:
+        # stored in rollout key for preference comparison
+        imit_stats = get(sd.run, "result.rollout")
     expert_stats = get(sd.run, "result.expert_stats")
 
     expert_return_summary = None
@@ -262,26 +265,35 @@ def analyze_imitation(
         csv_output_path: If provided, then save a CSV output file to this path.
         tex_output_path: If provided, then save a LaTeX-format table to this path.
         print_table: If True, then print the dataframe to stdout.
-        table_verbosity: Increasing levels of verbosity, from 0 to 2, increase the
-            number of columns in the table.
+        table_verbosity: Increasing levels of verbosity, from 0 to 3, increase the
+            number of columns in the table. Level 3 prints all of the columns available.
 
     Returns:
         The DataFrame generated from the Sacred logs.
     """
-    table_entry_fns_subset = _get_table_entry_fns_subset(table_verbosity)
+    if table_verbosity == 3:
+        table_entry_fns_subset = _get_table_entry_fns_subset(2)
+    else:
+        table_entry_fns_subset = _get_table_entry_fns_subset(table_verbosity)
 
-    rows = []
+    df = pd.DataFrame()
     for sd in _gather_sacred_dicts():
-        row = {}
+        new_df = pd.DataFrame()
+        if table_verbosity == -1:
+            # gets all config columns
+            new_df = pd.json_normalize(sd.config)
+        else:
+            new_df = new_df.append({}, ignore_index=True)
+
         for col_name, make_entry_fn in table_entry_fns_subset.items():
-            row[col_name] = make_entry_fn(sd)
-        rows.append(row)
+            new_df[col_name] = make_entry_fn(sd)
+
+        df = pd.concat([df, new_df])
 
-    df = pd.DataFrame(rows)
     if len(df) > 0:
         df.sort_values(by=["algo", "env_name"], inplace=True)
 
-    display_options = dict(index=False)
+    display_options: Mapping[str, Any] = dict(index=False)
     if csv_output_path is not None:
         df.to_csv(csv_output_path, **display_options)
         print(f"Wrote CSV file to {csv_output_path}")

diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py
@@ -5,13 +5,20 @@
 `@parallel_ex.named_config` to define a new parallel experiment.
 
 Adding custom named configs is necessary because the CLI interface can't add
-search spaces to the config like `"seed": tune.grid_search([0, 1, 2, 3])`.
+search spaces to the config like `"seed": tune.choice([0, 1, 2, 3])`.
+
+For tuning hyperparameters of an algorithm on a given environment, override
+the `base_named_configs` argument with the named config of the environment.
+Ex: python -m imitation.scripts.parallel with example_gail \
+    'base_named_configs=["logging.wandb_logging", "seals_half_cheetah"]'
 """
 
 import numpy as np
 import ray.tune as tune
 import sacred
+from torch import nn
 
+from imitation.algorithms import dagger
 from imitation.util.util import make_unique_timestamp
 
 parallel_ex = sacred.Experiment("parallel")
@@ -33,17 +40,11 @@ def config():
 
     local_dir = None  # `local_dir` arg for `ray.tune.run`
     upload_dir = None  # `upload_dir` arg for `ray.tune.run`
-    n_seeds = 3  # Number of seeds to search over by default
-
-
-@parallel_ex.config
-def seeds(n_seeds):
-    search_space = {"config_updates": {"seed": tune.grid_search(list(range(n_seeds)))}}
-
-
-@parallel_ex.named_config
-def s3():
-    upload_dir = "s3://shwang-chai/private"
+    experiment_checkpoint_path = ""
+    eval_best_trial = False
+    eval_trial_seeds = 5  # Number of seeds to search over by default
+    num_samples = 1  # Number of samples per grid search configuration
+    repeat = 1
 
 
 # Debug named configs
@@ -58,12 +59,12 @@ def generate_test_data():
     """
     sacred_ex_name = "train_rl"
     run_name = "TEST"
-    n_seeds = 1
+    repeat = 1
     search_space = {
         "config_updates": {
             "rl": {
                 "rl_kwargs": {
-                    "learning_rate": tune.grid_search(
+                    "learning_rate": tune.choice(
                         [3e-4 * x for x in (1 / 3, 1 / 2)],
                     ),
                 },
@@ -86,13 +87,13 @@ def generate_test_data():
 def example_cartpole_rl():
     sacred_ex_name = "train_rl"
     run_name = "example-cartpole"
-    n_seeds = 2
+    repeat = 2
     search_space = {
         "config_updates": {
             "rl": {
                 "rl_kwargs": {
-                    "learning_rate": tune.grid_search(np.logspace(3e-6, 1e-1, num=3)),
-                    "nminibatches": tune.grid_search([16, 32, 64]),
+                    "learning_rate": tune.choice(np.logspace(3e-6, 1e-1, num=3)),
+                    "nminibatches": tune.choice([16, 32, 64]),
                 },
             },
         },
@@ -105,44 +106,204 @@ def example_cartpole_rl():
 
 
 @parallel_ex.named_config
-def example_rl_easy():
+def example_rl():
     sacred_ex_name = "train_rl"
-    run_name = "example-rl-easy"
-    n_seeds = 2
+    run_name = "rl_tuning"
+    base_named_configs = ["logging.wandb_logging"]
+    base_config_updates = {"environment": {"num_vec": 1}}
     search_space = {
-        "named_configs": tune.grid_search([[env] for env in EASY_ENVS]),
         "config_updates": {
             "rl": {
+                "batch_size": tune.choice([512, 1024, 2048, 4096, 8192]),
                 "rl_kwargs": {
-                    "learning_rate": tune.grid_search(np.logspace(3e-6, 1e-1, num=3)),
-                    "nminibatches": tune.grid_search([16, 32, 64]),
+                    "learning_rate": tune.loguniform(1e-5, 1e-2),
+                    "batch_size": tune.choice([64, 128, 256, 512]),
+                    "n_epochs": tune.choice([5, 10, 20]),
                 },
             },
         },
     }
-    resources_per_trial = dict(cpu=4)
+    num_samples = 100
+    eval_best_trial = True
+    eval_trial_seeds = 5
+    repeat = 1
+    resources_per_trial = dict(cpu=1)
+
+
+@parallel_ex.named_config
+def example_bc():
+    sacred_ex_name = "train_imitation"
+    run_name = "bc_tuning"
+    base_named_configs = ["logging.wandb_logging"]
+    base_config_updates = {"environment": {"num_vec": 1}}
+    search_space = {
+        "config_updates": {
+            "bc_kwargs": dict(
+                batch_size=tune.choice([8, 16, 32, 64]),
+                l2_weight=tune.loguniform(1e-6, 1e-2),  # L2 regularization weight
+                optimizer_kwargs=dict(
+                    lr=tune.loguniform(1e-5, 1e-2),
+                ),
+            ),
+            "bc_train_kwargs": dict(
+                n_epochs=tune.choice([1, 5, 10, 20]),
+            ),
+        },
+        "command_name": "bc",
+    }
+    num_samples = 64
+    eval_best_trial = True
+    eval_trial_seeds = 5
+    repeat = 3
+    resources_per_trial = dict(cpu=1)
+
+
+@parallel_ex.named_config
+def example_dagger():
+    sacred_ex_name = "train_imitation"
+    run_name = "dagger_tuning"
+    base_named_configs = ["logging.wandb_logging"]
+    base_config_updates = {
+        "environment": {"num_vec": 1},
+        "dagger": {"total_timesteps": 1e5},
+        "bc_kwargs": {
+            "batch_size": 16,
+            "l2_weight": 1e-4,
+            "optimizer_kwargs": {"lr": 1e-3},
+        },
+    }
+    search_space = {
+        "config_updates": {
+            "bc_train_kwargs": dict(
+                n_epochs=tune.choice([1, 5, 10]),
+            ),
+            "dagger": dict(
+                beta_schedule=tune.choice(
+                    [dagger.LinearBetaSchedule(i) for i in [1, 5, 15]]
+                    + [dagger.ExponentialBetaSchedule(i) for i in [0.3, 0.5, 0.7]],
+                ),
+                rollout_round_min_episodes=tune.choice([3, 5, 10]),
+            ),
+        },
+        "command_name": "dagger",
+    }
+    num_samples = 50
+    repeat = 3
+    eval_best_trial = True
+    eval_trial_seeds = 5
+    resources_per_trial = dict(cpu=1)
+
+
+@parallel_ex.named_config
+def example_gail():
+    sacred_ex_name = "train_adversarial"
+    run_name = "gail_tuning_hc"
+    base_named_configs = ["logging.wandb_logging"]
+    base_config_updates = {
+        "environment": {"num_vec": 1},
+        "total_timesteps": 1e7,
+    }
+    search_space = {
+        "config_updates": {
+            "algorithm_kwargs": dict(
+                demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]),
+                n_disc_updates_per_round=tune.choice([8, 16]),
+            ),
+            "rl": {
+                "batch_size": tune.choice([4096, 8192, 16384]),
+                "rl_kwargs": {
+                    "ent_coef": tune.loguniform(1e-7, 1e-3),
+                    "learning_rate": tune.loguniform(1e-5, 1e-2),
+                },
+            },
+            "algorithm_specific": {},
+        },
+        "command_name": "gail",
+    }
+    num_samples = 100
+    eval_best_trial = True
+    eval_trial_seeds = 5
+    repeat = 3
+    resources_per_trial = dict(cpu=1)
 
 
 @parallel_ex.named_config
-def example_gail_easy():
+def example_airl():
     sacred_ex_name = "train_adversarial"
-    run_name = "example-gail-easy"
-    n_seeds = 1
+    run_name = "airl_tuning"
+    base_named_configs = ["logging.wandb_logging"]
+    base_config_updates = {
+        "environment": {"num_vec": 1},
+        "total_timesteps": 1e7,
+    }
     search_space = {
-        "named_configs": tune.grid_search([[env] for env in EASY_ENVS]),
         "config_updates": {
-            "init_trainer_kwargs": {
-                "rl": {
-                    "rl_kwargs": {
-                        "learning_rate": tune.grid_search(
-                            np.logspace(3e-6, 1e-1, num=3),
-                        ),
-                        "nminibatches": tune.grid_search([16, 32, 64]),
-                    },
+            "algorithm_kwargs": dict(
+                demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]),
+                n_disc_updates_per_round=tune.choice([8, 16]),
+            ),
+            "rl": {
+                "batch_size": tune.choice([4096, 8192, 16384]),
+                "rl_kwargs": {
+                    "ent_coef": tune.loguniform(1e-7, 1e-3),
+                    "learning_rate": tune.loguniform(1e-5, 1e-2),
                 },
             },
+            "algorithm_specific": {},
         },
+        "command_name": "airl",
+    }
+    num_samples = 100
+    eval_best_trial = True
+    eval_trial_seeds = 5
+    repeat = 3
+    resources_per_trial = dict(cpu=1)
+
+
+@parallel_ex.named_config
+def example_pc():
+    sacred_ex_name = "train_preference_comparisons"
+    run_name = "pc_tuning"
+    base_named_configs = ["logging.wandb_logging"]
+    base_config_updates = {
+        "environment": {"num_vec": 1},
+        "total_timesteps": 2e7,
+        "total_comparisons": 5000,
+        "query_schedule": "hyperbolic",
+        "gatherer_kwargs": {"sample": True},
     }
     search_space = {
-        "command_name": "gail",
+        "named_configs": tune.choice(
+            [
+                ["reward.normalize_output_disable"],
+            ],
+        ),
+        "config_updates": {
+            "train": {
+                "policy_kwargs": {
+                    "activation_fn": tune.choice(
+                        [
+                            nn.ReLU,
+                        ],
+                    ),
+                },
+            },
+            "num_iterations": tune.choice([25, 50]),
+            "initial_comparison_frac": tune.choice([0.1, 0.25]),
+            "reward_trainer_kwargs": {
+                "epochs": tune.choice([1, 3, 6]),
+            },
+            "rl": {
+                "batch_size": tune.choice([512, 2048, 8192]),
+                "rl_kwargs": {
+                    "learning_rate": tune.loguniform(1e-5, 1e-2),
+                    "ent_coef": tune.loguniform(1e-7, 1e-3),
+                },
+            },
+        },
     }
+    num_samples = 100
+    eval_best_trial = True
+    eval_trial_seeds = 5
+    repeat = 3
+    resources_per_trial = dict(cpu=1)