import csv
import itertools

if __name__ == '__main__':
    run_name = "2023_04_13_ParticleReplicate"

    randomize_starts = [True, False]
    shields_observe_momentums = \
        [("pobs_label_cent", "shields/smv/particle_momentum_relative", True),
         ("pobs_label", "shields/smv/particle_momentum_relative_naive", True),
         ("pobs_label", "shields/smv/particle_momentum_relative_sat", True),
         ("none", None, True),
         ("pobs_label_cent", "shields/smv/particle_momentum_relative_pos_only", False),
         ("pobs_label", "shields/smv/particle_momentum_relative_pos_only_hist_naive", False),
         ("pobs_label", "shields/smv/particle_momentum_relative_pos_only_hist_sat", False),
         ("none", None, False)]

    learner_anneal_eps = [(1.0, 0.05)]
    punish_unsafe_orig_actions = [(True, -10)]
    num_runs = 10

    with open(f"../../parallel_configs/{run_name}.csv", "w") as train_file:
        base_params = ["run_name", "shield", "shield_specification", "punish_unsafe_orig_action",
                       "punish_unsafe_orig_action_modifier", "randomize_starts", "map_type",
                       "particle_agents_observe_momentum",
                       "particle_terminate_on_collision", "particle_world_size", "particle_collision_penalty",
                       "learner_type", "learner_deep_network_model", "learner_transform_one_hot",
                       "learner_anneal_eps_start",
                       "learner_anneal_eps_finish", "max_total_steps",
                       "seed", "learner_evaluation_epsilon"]
        train_writer = csv.DictWriter(train_file, base_params)
        train_writer.writeheader()

        for run_type_idx, (
                random_start, (shield, shield_specification, agents_observe_momentum),
                (eps_anneal_start, eps_anneal_finish),
                (punish_unsafe_action, unsafe_action_rew_modifier)) in enumerate(
            itertools.product(randomize_starts, shields_observe_momentums, learner_anneal_eps,
                              punish_unsafe_orig_actions)):

            for run_num_of_same_type in range(num_runs):
                global_run_idx = run_type_idx * num_runs + run_num_of_same_type

                concat_run_name = run_name + "/" + str(global_run_idx) + "_" + str(run_type_idx) + "_" + str(
                    run_num_of_same_type)

                base_param_values = {
                    "run_name": concat_run_name,
                    "shield": shield,
                    "shield_specification": shield_specification,
                    "punish_unsafe_orig_action": punish_unsafe_action,
                    "punish_unsafe_orig_action_modifier": unsafe_action_rew_modifier,
                    "randomize_starts": random_start,
                    "map_type": "ParticleMomentum",
                    "particle_world_size": 10,
                    "particle_collision_penalty": -30,
                    "particle_terminate_on_collision": False,
                    "particle_agents_observe_momentum": agents_observe_momentum,
                    "learner_type": "Individual_Deep_Q",
                    "learner_deep_network_model": "simple_mlp",
                    "learner_transform_one_hot": True,
                    "learner_anneal_eps_start": eps_anneal_start,
                    "learner_anneal_eps_finish": eps_anneal_finish,
                    "max_total_steps": int(2.5e6),
                    "seed": run_num_of_same_type,
                    "learner_evaluation_epsilon": eps_anneal_finish
                }

                train_writer.writerow(base_param_values)
