[MCTS] Really really bad threading, working on it

dylandjian · dylandjian · commit e335eb2a8a4b · 2018-04-25T08:47:40.000+02:00
diff --git a/README.md b/README.md
@@ -70,7 +70,8 @@ Ongoing project.
 
 * 0.1947162s / move - 0.003894324s / simulation with 2 threads and 2 batch_size_eval with 50 simulations
 * 0.1360865s / move - 0.00272173s / simulation 4 threads 4 batch_size_eval 50 simulations
-* 0.1222489s / move - 0.002444978s / simulation 8 threads 8 batch_size eval 50 simulations
+* 0.1222489s / move - 0.002444978s / simulation 8 threads 8 batch_size_eval 50 simulations
+* 0.1372498 / move - 0.00214452812s / simulations 16 threads 16 batch_size_eval 64 simulations
 
 ### 19x19 board
 
diff --git a/const.py b/const.py
@@ -13,21 +13,21 @@
 ## Number of evaluation parallel games 
 PARALLEL_EVAL = 2
 ## MCTS parallel
-MCTS_PARALLEL = 2
+MCTS_PARALLEL = 16
 
 
 ##### GLOBAL
 
 ## Size of the Go board
 GOBAN_SIZE = 9
 ## Number of move to end a game
-MOVE_LIMIT = GOBAN_SIZE ** 2
+MOVE_LIMIT = GOBAN_SIZE ** 2 * 2.5
 ## Number of last states to keep
 HISTORY = 7
 ## Learning rate
 LR = 0.01
 ## Number of MCTS simulation
-MCTS_SIM = 5
+MCTS_SIM = 64
 ## Exploration constant
 C_PUCT = 0.2
 ## L2 Regularization
@@ -41,7 +41,7 @@
 ## Alpha for Dirichlet noise
 ALPHA = 0.03
 ## Batch size for evaluation during MCTS
-BATCH_SIZE_EVAL = 2
+BATCH_SIZE_EVAL = 4
 ## Number of self-play before training
 SELF_PLAY_MATCH = 2 * PARALLEL_SELF_PLAY
 
diff --git a/lib/play.py b/lib/play.py
@@ -86,18 +86,20 @@ def self_play(current_time, loaded_version):
         queue, results = create_matches(player , \
                     cores=PARALLEL_SELF_PLAY, match_number=SELF_PLAY_MATCH) 
         print("[PLAY] Starting to fetch fresh games")
-        queue.join()
-        for _ in range(SELF_PLAY_MATCH):
-            result = results.get()
-            if result:
-                collection.insert({
-                    "game": result,
-                    "id": game_id
-                })
-                game_id += 1
-        print("[PLAY] Done fetching")
-        queue.close()
-        results.close()
+        try:
+            queue.join()
+            for _ in range(SELF_PLAY_MATCH):
+                result = results.get()
+                if result:
+                    collection.insert({
+                        "game": result,
+                        "id": game_id
+                    })
+                    game_id += 1
+            print("[PLAY] Done fetching")
+        finally:
+            queue.close()
+            results.close()
 
 
 def play(player, opponent):
@@ -257,9 +259,9 @@ def __call__(self):
         while not done:
             ## Prevent cycling in 2 atari situations
             if moves > MOVE_LIMIT:
-                print("cc")
                 return pickle.dumps((dataset, self.board.get_winner()))
             
+            ## Magic ratio for adaptative temperature
             if moves > MOVE_LIMIT / 24:
                 comp = True
 
@@ -287,6 +289,7 @@ def __call__(self):
             print("[EVALUATION] Match %d done in eval" % self.id)
             self.opponent.passed = False
             return pickle.dumps([reward])
+
         self.player.passed = False
         return pickle.dumps((dataset, reward))
 
diff --git a/lib/train.py b/lib/train.py
@@ -171,6 +171,7 @@ def new_agent(result):
             print("[EVALUATION] New best player saved !")
         else:
             nonlocal last_id
+            ## Force a new fetch in case the player didnt improve
             last_id = fetch_new_games(collection, dataset, last_id)
 
     ## Wait before the circular before is full
@@ -186,12 +187,10 @@ def new_agent(result):
     while True:
         batch_loss = []
         for batch_idx, (state, move, winner) in enumerate(dataloader):
-
             running_loss = []
-            ## Force the network to stop training the current network
-            ## since the new one is better (from the callback)
-
             lr, optimizer = update_lr(lr, optimizer, total_ite)
+    
+            ## Evaluate a copy of the current network asynchronously
             if total_ite % TRAIN_STEPS == 0:
                 pending_player = deepcopy(player)
                 last_id = fetch_new_games(collection, dataset, last_id)
@@ -206,7 +205,6 @@ def new_agent(result):
                     pool.apply_async(evaluate, args=(pending_player, best_player), \
                             callback=new_agent)
                 except Exception as e:
-                    print(e)
                     client.close()
                     pool.terminate()
             
diff --git a/lib/utils.py b/lib/utils.py
@@ -81,8 +81,9 @@ def get_player(current_time, version):
     return player, checkpoint
 
 
-# @profile
 def sample_rotation(state, num=8):
+    """ Apply a certain number of random transformation to the input state """
+
     dh_group = [(None, None), ((np.rot90, 1), None), ((np.rot90, 2), None),
                 ((np.rot90, 3), None), (np.fliplr, None), (np.flipud, None),
                 (np.flipud,  (np.rot90, 1)), (np.fliplr, (np.rot90, 1))]
@@ -109,15 +110,10 @@ def sample_rotation(state, num=8):
 
 
 def formate_state(state, probas, winner):
+    """ Repeat the probas and the winner to make every example identical after
+        the dihedral rotation have been applied """
+
     probas = np.reshape(probas, (1, probas.shape[0]))
     probas = np.repeat(probas, 8, axis=0)
     winner = np.full((8, 1), winner)
     return state, probas, winner
-
-
-if __name__ == "__main__":
-    pass
-
-
-
-
diff --git a/main.py b/main.py
@@ -32,6 +32,7 @@ def main(folder, version):
         train_proc = pool.apply_async(train, args=(current_time, version,))
 
         ## Comment one line or the other to get the stack trace
+        ## Must add a loooooong timer otherwise signals are not caught
         self_play_proc.get(60000000)
         train_proc.get(60000000)
 
diff --git a/models/mcts.py b/models/mcts.py
@@ -8,8 +8,7 @@
 from lib.utils import _prepare_state, sample_rotation
 
 
-class Node():
-
+class Node:
     def __init__(self, parent=None, proba=None, move=None):
         """
         p : probability of reaching that node, given by the policy net
@@ -29,19 +28,14 @@ def __init__(self, parent=None, proba=None, move=None):
     def update(self, v):
         """ Update the node statistics after a playout """
 
-        self.w = self.w + float(v)
-        if self.n > 0:
-            self.q = self.w / self.n
-        else:
-            self.q = 0
+        self.w = self.w + v
+        self.q = self.w / self.n if self.n > 0 else 0
 
 
     def is_leaf(self):
         """ Check whether node is a leaf or not """
 
-        if self.childrens and len(self.childrens) > 0:
-            return False
-        return True
+        return len(self.childrens) == 0
 
 
     def expand(self, probas):
@@ -86,55 +80,63 @@ def _opt_select(nodes, c_puct):
     return equals[0]
 
 
-
 class EvaluatorThread(threading.Thread):
-    def __init__(self, player, eval_queue, condition_search, condition_eval):
+    def __init__(self, player, eval_queue, result_queue, condition_search, condition_eval, condition_mix):
         threading.Thread.__init__(self)
         self.eval_queue = eval_queue
+        self.result_queue = result_queue
         self.player = player
         self.condition_search = condition_search
         self.condition_eval = condition_eval
+        self.condition_mix = condition_mix
 
     def run(self):
         total_sim = MCTS_SIM
         while total_sim > 0:
-            self.condition_search.acquire()
-            while (len(self.eval_queue.values()) != BATCH_SIZE_EVAL or \
-                  (len(self.eval_queue.values()) < BATCH_SIZE_EVAL and \
-                   len(self.eval_queue.values()) != total_sim)) or \
-                  (len(self.eval_queue.values()) == BATCH_SIZE_EVAL and \
-                  not all(isinstance(state, np.ndarray) for state in self.eval_queue.values())):
-                self.condition_search.wait()
-
-            self.condition_search.release()
             self.condition_eval.acquire()
+            while (len(self.eval_queue.values()) < BATCH_SIZE_EVAL or \
+                  len(self.result_queue.values()) > 0):
+                print("notifying in evaluator, current len: %d" % len(self.eval_queue.values()))
+                self.condition_eval.wait()
+            self.condition_eval.release()
+
             states = []
             for idx, state in self.eval_queue.items():
                 states.append(sample_rotation(state, num=1))
+            print('states len: %d' % len(states))
             states = _prepare_state(states)
             feature_maps = self.player.extractor(states[0])
 
             ## Policy and value prediction
             probas = self.player.policy_net(feature_maps)
             v = self.player.value_net(feature_maps)
-            for idx in range(BATCH_SIZE_EVAL):
-                self.eval_queue[idx] = (probas[idx].cpu().data.numpy(), v[idx])
-            self.condition_eval.notifyAll()
-            self.condition_eval.release()
+            keys = list(self.eval_queue.keys())
+            idx = 0
+            for key in keys:
+                self.result_queue[key] = (probas[idx].cpu().data.numpy(), float(v[idx]))
+                del self.eval_queue[key]
+                idx += 1
+            del probas, v, feature_maps
+            self.condition_mix.acquire()
+            self.condition_mix.notifyAll()
+            self.condition_mix.release()
             total_sim -= BATCH_SIZE_EVAL
 
 
 
 class SearchThread(threading.Thread):
-    def __init__(self, mcts, game, eval_queue, thread_id, lock, condition_search, condition_eval):
+    def __init__(self, mcts, game, eval_queue, result_queue, thread_id, \
+                lock, condition_search, condition_eval, condition_mix):
         threading.Thread.__init__(self)
+        self.result_queue = result_queue
         self.eval_queue = eval_queue
         self.mcts = mcts
         self.game = game
         self.lock = lock
         self.thread_id = thread_id
         self.condition_eval = condition_eval
         self.condition_search = condition_search
+        self.condition_mix = condition_mix
     
 
     def run(self):
@@ -153,18 +155,25 @@ def run(self):
         ## Predict the probas
         if not done:
             self.condition_search.acquire()
-            self.eval_queue[self.thread_id] = state
-            self.condition_search.notify()
-
+            while len(self.eval_queue.values()) < BATCH_SIZE_EVAL and \
+                  len(self.result_queue.values()) == 0:
+                print("trying to release in thread id %d" % self.thread_id)
+                self.condition_search.wait()
+            print("added move in thread %d" % self.thread_id)
             self.condition_search.release()
+            
+            self.eval_queue[self.thread_id] = state
             self.condition_eval.acquire()
-            self.condition_eval.wait()
-
-            res = self.eval_queue[self.thread_id]
-            probas = np.array(res[0])
-            v = float(res[1])
+            self.condition_eval.notify()
             self.condition_eval.release()
 
+            self.condition_mix.acquire()
+            self.condition_mix.wait()
+            self.condition_mix.release()
+
+            probas = np.array(self.result_queue[self.thread_id][0], copy=True)
+            v = float(self.result_queue[self.thread_id][1])
+            del self.result_queue[self.thread_id]
 
             ## Add noise in the root node
             if not current_node.parent:
@@ -186,6 +195,7 @@ def run(self):
                 current_node.update(v)
                 current_node = current_node.parent
             self.lock.release()
+            print("done in thread id %d" % self.thread_id)
 
 
 class MCTS:
@@ -228,18 +238,22 @@ def search(self, current_game, player, competitive=False):
         threads = []
         condition_eval = threading.Condition()
         condition_search = threading.Condition()
+        condition_mix = threading.Condition()
         lock = threading.Lock()
         eval_queue = {}
-        evaluator = EvaluatorThread(player, eval_queue, condition_search, condition_eval)
+        result_queue = {}
+        evaluator = EvaluatorThread(player, eval_queue, result_queue, condition_search, \
+                                    condition_eval, condition_mix)
         evaluator.start()
         for sim in range(MCTS_SIM // MCTS_PARALLEL):
             eval_queue.clear()
             for idx in range(MCTS_PARALLEL):
-                threads.append(SearchThread(self, current_game, eval_queue, idx, 
-                                        lock, condition_search, condition_eval))
+                threads.append(SearchThread(self, current_game, eval_queue, result_queue, idx, 
+                                        lock, condition_search, condition_eval, condition_mix))
                 threads[-1].start()
             for thread in threads:
                 thread.join()
+        evaluator.join()
 
         action_scores = np.zeros((current_game.board_size ** 2 + 1,))
         for node in self.root.childrens:
@@ -251,6 +265,7 @@ def search(self, current_game, player, competitive=False):
                 break
 
         self.root = self.root.childrens[idx]
+        print(final_probas, final_move)
         return final_probas, final_move