From 292b6534dbb1dac98871f4b89fa37aaf16439092 Mon Sep 17 00:00:00 2001 From: hutianxiang Date: Fri, 14 Feb 2020 20:13:45 +0800 Subject: [PATCH 01/17] fix space.Discrete type error --- utilities/Parallel_Experience_Generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/Parallel_Experience_Generator.py b/utilities/Parallel_Experience_Generator.py index 872ef388..e3ed74bc 100644 --- a/utilities/Parallel_Experience_Generator.py +++ b/utilities/Parallel_Experience_Generator.py @@ -16,7 +16,7 @@ class Parallel_Experience_Generator(object): def __init__(self, environment, policy, seed, hyperparameters, action_size, use_GPU=False, action_choice_output_columns=None): self.use_GPU = use_GPU self.environment = environment - self.action_types = "DISCRETE" if self.environment.action_space.dtype == int else "CONTINUOUS" + self.action_types = "DISCRETE" if self.environment.action_space.dtype in [int, 'int64'] else "CONTINUOUS" self.action_size = action_size self.policy = policy self.action_choice_output_columns = action_choice_output_columns From 01f97aad19b9142b8e5b71933fd6532f65c324c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20D=C3=B6rr?= Date: Tue, 17 Mar 2020 18:43:41 +0100 Subject: [PATCH 02/17] Fix typo --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4563ab4d..7784811d 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ conda activate myenvname pip3 install -r requirements.txt -python Results/Cart_Pole.py +python results/Cart_Pole.py ``` For other games change the last line to one of the other files in the Results folder. @@ -120,4 +120,4 @@ For other games change the last line to one of the other files in the Results fo Most Open AI gym environments should work. All you would need to do is change the config.environment field (look at `Results/Cart_Pole.py` for an example of this). You can also play with your own custom game if you create a separate class that inherits from gym.Env. See `Environments/Four_Rooms_Environment.py` -for an example of a custom environment and then see the script `Results/Four_Rooms.py` to see how to have agents play the environment. \ No newline at end of file +for an example of a custom environment and then see the script `Results/Four_Rooms.py` to see how to have agents play the environment. From 44ef1895f54520d6c18256811a5068aba4b99297 Mon Sep 17 00:00:00 2001 From: Johan Strombom Date: Mon, 11 May 2020 11:58:06 +0200 Subject: [PATCH 03/17] max_probability_action bugfix for batchsize > 1 When batch size is larger than 1 max_probability_action only returned a single action, often with a number larger than the action space. --- agents/actor_critic_agents/SAC_Discrete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/actor_critic_agents/SAC_Discrete.py b/agents/actor_critic_agents/SAC_Discrete.py index 404f19dc..9c7bcc62 100644 --- a/agents/actor_critic_agents/SAC_Discrete.py +++ b/agents/actor_critic_agents/SAC_Discrete.py @@ -52,7 +52,7 @@ def produce_action_and_action_info(self, state): """Given the state, produces an action, the probability of the action, the log probability of the action, and the argmax action""" action_probabilities = self.actor_local(state) - max_probability_action = torch.argmax(action_probabilities).unsqueeze(0) + max_probability_action = torch.argmax(action_probabilities, dim=-1).unsqueeze(0) action_distribution = create_actor_distribution(self.action_types, action_probabilities, self.action_size) action = action_distribution.sample().cpu() # Have to deal with situation of 0.0 probabilities because we can't do log 0 From fbc84b8bd3accb480a140b9cf441a72fb40515c0 Mon Sep 17 00:00:00 2001 From: Toshiki Watanabe Date: Fri, 18 Sep 2020 08:20:31 +0900 Subject: [PATCH 04/17] fix bugs --- agents/actor_critic_agents/SAC_Discrete.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/agents/actor_critic_agents/SAC_Discrete.py b/agents/actor_critic_agents/SAC_Discrete.py index 404f19dc..587ec245 100644 --- a/agents/actor_critic_agents/SAC_Discrete.py +++ b/agents/actor_critic_agents/SAC_Discrete.py @@ -52,7 +52,7 @@ def produce_action_and_action_info(self, state): """Given the state, produces an action, the probability of the action, the log probability of the action, and the argmax action""" action_probabilities = self.actor_local(state) - max_probability_action = torch.argmax(action_probabilities).unsqueeze(0) + max_probability_action = torch.argmax(action_probabilities, dim=1) action_distribution = create_actor_distribution(self.action_types, action_probabilities, self.action_size) action = action_distribution.sample().cpu() # Have to deal with situation of 0.0 probabilities because we can't do log 0 @@ -69,7 +69,7 @@ def calculate_critic_losses(self, state_batch, action_batch, reward_batch, next_ qf1_next_target = self.critic_target(next_state_batch) qf2_next_target = self.critic_target_2(next_state_batch) min_qf_next_target = action_probabilities * (torch.min(qf1_next_target, qf2_next_target) - self.alpha * log_action_probabilities) - min_qf_next_target = min_qf_next_target.mean(dim=1).unsqueeze(-1) + min_qf_next_target = min_qf_next_target.sum(dim=1).unsqueeze(-1) next_q_value = reward_batch + (1.0 - mask_batch) * self.hyperparameters["discount_rate"] * (min_qf_next_target) qf1 = self.critic_local(state_batch).gather(1, action_batch.long()) @@ -85,7 +85,6 @@ def calculate_actor_loss(self, state_batch): qf2_pi = self.critic_local_2(state_batch) min_qf_pi = torch.min(qf1_pi, qf2_pi) inside_term = self.alpha * log_action_probabilities - min_qf_pi - policy_loss = action_probabilities * inside_term - policy_loss = policy_loss.mean() + policy_loss = (action_probabilities * inside_term).sum(dim=1).mean() log_action_probabilities = torch.sum(log_action_probabilities * action_probabilities, dim=1) return policy_loss, log_action_probabilities From bc6ee5f94d712ed15834b801f907992b23aff2aa Mon Sep 17 00:00:00 2001 From: Toshiki Watanabe Date: Fri, 18 Sep 2020 08:54:27 +0900 Subject: [PATCH 05/17] fix device to properly calculate SAC-Discrete on cpu --- agents/actor_critic_agents/SAC_Discrete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/actor_critic_agents/SAC_Discrete.py b/agents/actor_critic_agents/SAC_Discrete.py index 587ec245..178a9536 100644 --- a/agents/actor_critic_agents/SAC_Discrete.py +++ b/agents/actor_critic_agents/SAC_Discrete.py @@ -30,7 +30,7 @@ def __init__(self, config): Base_Agent.copy_model_over(self.critic_local, self.critic_target) Base_Agent.copy_model_over(self.critic_local_2, self.critic_target_2) self.memory = Replay_Buffer(self.hyperparameters["Critic"]["buffer_size"], self.hyperparameters["batch_size"], - self.config.seed) + self.config.seed, device=self.device) self.actor_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size, key_to_use="Actor") self.actor_optimizer = torch.optim.Adam(self.actor_local.parameters(), From 8cc5c59b2190a24ea83e119c79d84c029ca5c87c Mon Sep 17 00:00:00 2001 From: Toshiki Watanabe Date: Fri, 18 Sep 2020 09:17:04 +0900 Subject: [PATCH 06/17] fix errors of SAC and SAC-Discrete caused by torch>=1.4.0 --- agents/actor_critic_agents/SAC.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/agents/actor_critic_agents/SAC.py b/agents/actor_critic_agents/SAC.py index bbe91860..b997fe97 100644 --- a/agents/actor_critic_agents/SAC.py +++ b/agents/actor_critic_agents/SAC.py @@ -144,10 +144,12 @@ def learn(self): """Runs a learning iteration for the actor, both critics and (if specified) the temperature parameter""" state_batch, action_batch, reward_batch, next_state_batch, mask_batch = self.sample_experiences() qf1_loss, qf2_loss = self.calculate_critic_losses(state_batch, action_batch, reward_batch, next_state_batch, mask_batch) + self.update_critic_parameters(qf1_loss, qf2_loss) + policy_loss, log_pi = self.calculate_actor_loss(state_batch) if self.automatic_entropy_tuning: alpha_loss = self.calculate_entropy_tuning_loss(log_pi) else: alpha_loss = None - self.update_all_parameters(qf1_loss, qf2_loss, policy_loss, alpha_loss) + self.update_actor_parameters(policy_loss, alpha_loss) def sample_experiences(self): return self.memory.sample() @@ -182,18 +184,21 @@ def calculate_entropy_tuning_loss(self, log_pi): alpha_loss = -(self.log_alpha * (log_pi + self.target_entropy).detach()).mean() return alpha_loss - def update_all_parameters(self, critic_loss_1, critic_loss_2, actor_loss, alpha_loss): - """Updates the parameters for the actor, both critics and (if specified) the temperature parameter""" + def update_critic_parameters(self, critic_loss_1, critic_loss_2): + """Updates the parameters for both critics""" self.take_optimisation_step(self.critic_optimizer, self.critic_local, critic_loss_1, self.hyperparameters["Critic"]["gradient_clipping_norm"]) self.take_optimisation_step(self.critic_optimizer_2, self.critic_local_2, critic_loss_2, self.hyperparameters["Critic"]["gradient_clipping_norm"]) - self.take_optimisation_step(self.actor_optimizer, self.actor_local, actor_loss, - self.hyperparameters["Actor"]["gradient_clipping_norm"]) self.soft_update_of_target_network(self.critic_local, self.critic_target, self.hyperparameters["Critic"]["tau"]) self.soft_update_of_target_network(self.critic_local_2, self.critic_target_2, self.hyperparameters["Critic"]["tau"]) + + def update_actor_parameters(self, actor_loss, alpha_loss): + """Updates the parameters for the actor and (if specified) the temperature parameter""" + self.take_optimisation_step(self.actor_optimizer, self.actor_local, actor_loss, + self.hyperparameters["Actor"]["gradient_clipping_norm"]) if alpha_loss is not None: self.take_optimisation_step(self.alpha_optim, None, alpha_loss, None) self.alpha = self.log_alpha.exp() From b9f13d3116d7878c9d75d1120b85faa5477f00e3 Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Sat, 19 Sep 2020 10:31:05 +0100 Subject: [PATCH 07/17] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6b2e2f7f..34f3df1e 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,11 @@ ![RL](utilities/RL_image.jpeg) ![PyTorch](utilities/PyTorch-logo-2.jpg) -This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. (To help you remember things you learn about RL in general write them in [Save All](https://www.saveall.ai/)) +This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. -## **Algorithms Implemented** +To help you remember things you learn about machine learning in general write them in [Save All](https://www.saveall.ai/) and try out the public deck there about Fast AI's machine learning textbook. + +## **Algorithms Implemented** 1. *Deep Q Learning (DQN)* ([Mnih et al. 2013](https://arxiv.org/pdf/1312.5602.pdf)) 1. *DQN with Fixed Q Targets* ([Mnih et al. 2013](https://arxiv.org/pdf/1312.5602.pdf)) From c40f21dfedb8e49fc0cb9088ccce6e3044d61582 Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Sat, 19 Sep 2020 10:31:26 +0100 Subject: [PATCH 08/17] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 34f3df1e..cfaad900 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. -To help you remember things you learn about machine learning in general write them in [Save All](https://www.saveall.ai/) and try out the public deck there about Fast AI's machine learning textbook. +(To help you remember things you learn about machine learning in general write them in [Save All](https://www.saveall.ai/) and try out the public deck there about Fast AI's machine learning textbook.) ## **Algorithms Implemented** From 94455f48c10cb88330b426769f7dda3fc5a7cc91 Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Thu, 24 Sep 2020 09:18:46 +0100 Subject: [PATCH 09/17] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cfaad900..fbf67922 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. -(To help you remember things you learn about machine learning in general write them in [Save All](https://www.saveall.ai/) and try out the public deck there about Fast AI's machine learning textbook.) +(To help you remember things you learn about machine learning in general write them in [Save All](https://www.saveall.ai/public_decks/deck=140) and try out the public deck there about Fast AI's machine learning textbook.) ## **Algorithms Implemented** From 79fc69c1010f91795ca5319bc263f5c3442b0d25 Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Sun, 27 Sep 2020 19:51:03 +0100 Subject: [PATCH 10/17] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fbf67922..550bf311 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. -(To help you remember things you learn about machine learning in general write them in [Save All](https://www.saveall.ai/public_decks/deck=140) and try out the public deck there about Fast AI's machine learning textbook.) +(To help you remember things you learn about machine learning in general write them in [Save All](https://www.saveall.ai) and try out the public deck there about Fast AI's machine learning textbook.) ## **Algorithms Implemented** From 818874360e4bb138000f56fa0b9940cfc2da8ccb Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Fri, 15 Jan 2021 13:08:51 +0000 Subject: [PATCH 11/17] Create LICENSE --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..e807d457 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Petros Christodoulou + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From 6297608b8524774c847ad5cad87e14b80abf69ce Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Thu, 28 Jan 2021 17:22:59 +0000 Subject: [PATCH 12/17] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 550bf311..40b7c2b7 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. -(To help you remember things you learn about machine learning in general write them in [Save All](https://www.saveall.ai) and try out the public deck there about Fast AI's machine learning textbook.) +(To help you remember things you learn about machine learning in general write them in [Save All](https://saveall.ai/shared/deck/140&4&eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJvd25lciI6NCwiZGVja19pZCI6MTQwfQ.Xd5RK2RLlHjGKoYQET39IqjAZ4JLCkqf119wOjZLjSCdGxwjGxZFWTBbXLvMoHAbhnXOOi6A2bSu3KwGH_S8L5WEn5Cej8kRZ_Bp_XMD_AzFQYM9cR8TvfqSaYQT07HnLkbSCZnM-9OL9rdrzm-hKA2sjLqxOLkDQEqcwCQHiZT0KoweH8Y0nqqKuymWVNUb4A8hyYnORyNXgTaTFbwU2YpEdU43z7PMnPXmr1MtWMe4GQdhCTAfCrsQout8nkHyLE-yUWAFa4jAo-GUAVALgiFr5n0Q7ya5wgA9OWHFwrXYkLSFKzCpw90hpZ_8UYh1dpBCwIIK0CJpOaeIN0ieAg) and try out the public deck there about Fast AI's machine learning textbook.) ## **Algorithms Implemented** From b338c87bebb672e39304e47e0eed55aeb462b243 Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Fri, 14 May 2021 11:51:00 +0100 Subject: [PATCH 13/17] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 40b7c2b7..b0d1b204 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. -(To help you remember things you learn about machine learning in general write them in [Save All](https://saveall.ai/shared/deck/140&4&eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJvd25lciI6NCwiZGVja19pZCI6MTQwfQ.Xd5RK2RLlHjGKoYQET39IqjAZ4JLCkqf119wOjZLjSCdGxwjGxZFWTBbXLvMoHAbhnXOOi6A2bSu3KwGH_S8L5WEn5Cej8kRZ_Bp_XMD_AzFQYM9cR8TvfqSaYQT07HnLkbSCZnM-9OL9rdrzm-hKA2sjLqxOLkDQEqcwCQHiZT0KoweH8Y0nqqKuymWVNUb4A8hyYnORyNXgTaTFbwU2YpEdU43z7PMnPXmr1MtWMe4GQdhCTAfCrsQout8nkHyLE-yUWAFa4jAo-GUAVALgiFr5n0Q7ya5wgA9OWHFwrXYkLSFKzCpw90hpZ_8UYh1dpBCwIIK0CJpOaeIN0ieAg) and try out the public deck there about Fast AI's machine learning textbook.) +(To help you remember things you learn about machine learning in general write them in [Save All](https://saveall.ai/shared/deck/140&4&3K3uXPazkg4) and try out the public deck there about Fast AI's machine learning textbook.) ## **Algorithms Implemented** From cf812316c59fe7af3d9f8c23305429ce310c73fe Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Sat, 20 Nov 2021 19:43:36 +0000 Subject: [PATCH 14/17] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b0d1b204..819b323a 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. -(To help you remember things you learn about machine learning in general write them in [Save All](https://saveall.ai/shared/deck/140&4&3K3uXPazkg4) and try out the public deck there about Fast AI's machine learning textbook.) +(To help you remember things you learn about machine learning in general write them in [Save All](https://saveall.ai/shared/deck/140&4&3K3uXPazkg4&reddit_posts) and try out the public deck there about Fast AI's machine learning textbook.) ## **Algorithms Implemented** From 135d3e2e06bbde2868047d738e3fc2d73fd8cc93 Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Sat, 20 Nov 2021 19:44:20 +0000 Subject: [PATCH 15/17] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 819b323a..017e3c8b 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. -(To help you remember things you learn about machine learning in general write them in [Save All](https://saveall.ai/shared/deck/140&4&3K3uXPazkg4&reddit_posts) and try out the public deck there about Fast AI's machine learning textbook.) +(To help you remember things you learn about machine learning in general write them in [Save All](https://saveall.ai/shared/deck/140&4&3K3uXPazkg4&github_links) and try out the public deck there about Fast AI's machine learning textbook.) ## **Algorithms Implemented** From 41934b3e5d247fd9cf0b4dca41802d8b242fb34c Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Wed, 23 Aug 2023 20:02:51 +0100 Subject: [PATCH 16/17] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 017e3c8b..70dd0641 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,7 @@ This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. -(To help you remember things you learn about machine learning in general write them in [Save All](https://saveall.ai/shared/deck/140&4&3K3uXPazkg4&github_links) and try out the public deck there about Fast AI's machine learning textbook.) - +(To help you remember things you learn about machine learning in general write them in [Gizmo](https://gizmo.ai) ## **Algorithms Implemented** 1. *Deep Q Learning (DQN)* ([Mnih et al. 2013](https://arxiv.org/pdf/1312.5602.pdf)) From 4835bac8557fdacff1735eca004e35ea5a4b7443 Mon Sep 17 00:00:00 2001 From: Petros Christodoulou Date: Wed, 23 Aug 2023 20:03:14 +0100 Subject: [PATCH 17/17] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 70dd0641..8fb8eb88 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This repository contains PyTorch implementations of deep reinforcement learning algorithms and environments. -(To help you remember things you learn about machine learning in general write them in [Gizmo](https://gizmo.ai) +(To help you remember things you learn about machine learning in general write them in [Gizmo](https://gizmo.ai)) ## **Algorithms Implemented** 1. *Deep Q Learning (DQN)* ([Mnih et al. 2013](https://arxiv.org/pdf/1312.5602.pdf))