[GitHub] piiswrong closed pull request #9046: Fix example example/reinforcement-learning/a3c

GitBox Thu, 25 Jan 2018 10:58:55 -0800

piiswrong closed pull request #9046: Fix example  
example/reinforcement-learning/a3c
URL: https://github.com/apache/incubator-mxnet/pull/9046


This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index a1a5a5b50e..9c68dc2333 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -153,3 +153,4 @@ List of Contributors
 * [Marco de Abreu](https://github.com/marcoabreu)
  - Marco is the creator of the current MXNet CI.
 * [Julian Salazar](https://github.com/JulianSlzr)
+* [Meghna Baijal](https://github.com/mbaijal)
diff --git a/example/reinforcement-learning/a3c/README.md 
b/example/reinforcement-learning/a3c/README.md
index 63faf92d7e..5eaba66a5b 100644
--- a/example/reinforcement-learning/a3c/README.md
+++ b/example/reinforcement-learning/a3c/README.md
@@ -7,8 +7,14 @@ The algorithm should be mostly correct. However I cannot 
reproduce the result in
 
 Note this is a generalization of the original algorithm since we use 
`batch_size` threads for each worker instead of the original 1 thread.
 
+## Prerequisites
+  - Install OpenAI Gym: `pip install gym`
+  - Install the Atari Env: `pip install gym[atari]`
+  - You may need to install flask: `pip install flask`
+  - You may have to install cv2: `pip install opencv-python`
+
 ## Usage
 run `python a3c.py --batch-size=32 --gpus=0` to run training on gpu 0 with 
batch-size=32.
 
 run `python launcher.py --gpus=0,1 -n 2 python a3c.py` to launch training on 2 
gpus (0 and 1), each gpu has two workers.
-
+Note: You might have to update the path to dmlc-core in launcher.py.
diff --git a/example/reinforcement-learning/a3c/a3c.py 
b/example/reinforcement-learning/a3c/a3c.py
index 4d89a24852..f74ce77b65 100644
--- a/example/reinforcement-learning/a3c/a3c.py
+++ b/example/reinforcement-learning/a3c/a3c.py
@@ -26,6 +26,11 @@
 import gym
 from datetime import datetime
 import time
+import sys
+try:
+    from importlib import reload
+except ImportError:
+    pass
 
 parser = argparse.ArgumentParser(description='Traing A3C with OpenAI Gym')
 parser.add_argument('--test', action='store_true', help='run testing', 
default=False)
@@ -139,7 +144,7 @@ def train():
             module.save_params('%s-%04d.params'%(save_model_prefix, epoch))
 
 
-        for _ in range(epoch_size/args.t_max):
+        for _ in range(int(epoch_size/args.t_max)):
             tic = time.time()
             # clear gradients
             for exe in module._exec_group.grad_arrays:
diff --git a/example/reinforcement-learning/a3c/rl_data.py 
b/example/reinforcement-learning/a3c/rl_data.py
index ad78975753..70f2853978 100644
--- a/example/reinforcement-learning/a3c/rl_data.py
+++ b/example/reinforcement-learning/a3c/rl_data.py
@@ -21,13 +21,18 @@
 import gym
 import cv2
 import math
-import Queue
 from threading import Thread
 import time
 import multiprocessing
 import multiprocessing.pool
 from flask import Flask, render_template, Response
 import signal
+import sys
+is_py3 = sys.version[0] == '3'
+if is_py3:
+    import queue as queue
+else:
+    import Queue as queue
 
 def make_web(queue):
     app = Flask(__name__)
@@ -62,7 +67,7 @@ def visual(X, show=True):
     buf = np.zeros((h*n, w*n, X.shape[3]), dtype=np.uint8)
     for i in range(N):
         x = i%n
-        y = i/n
+        y = i//n
         buf[h*y:h*(y+1), w*x:w*(x+1), :] = X[i]
     if show:
         cv2.imshow('a', buf)
@@ -88,7 +93,7 @@ def __init__(self, batch_size, input_length, nthreads=6, 
web_viz=False):
 
         self.web_viz = web_viz
         if web_viz:
-            self.queue = Queue.Queue()
+            self.queue = queue.Queue()
             self.thread = Thread(target=make_web, args=(self.queue,))
             self.thread.daemon = True
             self.thread.start()
@@ -117,7 +122,7 @@ def act(self, action):
         reward = np.asarray([i[1] for i in new], dtype=np.float32)
         done = np.asarray([i[2] for i in new], dtype=np.float32)
 
-        channels = self.state_.shape[1]/self.input_length
+        channels = self.state_.shape[1]//self.input_length
         state = np.zeros_like(self.state_)
         state[:,:-channels,:,:] = self.state_[:,channels:,:,:]
         for i, (ob, env) in enumerate(zip(new, self.env)):
@@ -151,7 +156,7 @@ def make_env(self):
         return gym.make(self.game)
 
     def visual(self):
-        data = self.state_[:4, -self.state_.shape[1]/self.input_length:, :, :]
+        data = self.state_[:4, -self.state_.shape[1]//self.input_length:, :, :]
         return visual(np.asarray(data, dtype=np.uint8), False)
 
 if __name__ == '__main__':


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

[GitHub] piiswrong closed pull request #9046: Fix example example/reinforcement-learning/a3c

Reply via email to