[ALL] Migrated to pytorch 0.5, minor fixes and reverted to old threading on MCTS

dylandjian · dylandjian · commit 3860bbed8552 · 2018-05-02T09:35:25.000+02:00
diff --git a/README.md b/README.md
@@ -64,14 +64,14 @@ Ongoing project.
 
 # Statistics
 
-## For a 10 layers deep Resnet evaluated on 5 games
+## For a 10 layers deep Resnet evaluated on 50 games 64 simulations
 
 ### 9x9 board
 
-* 0.1947162s / move - 0.003894324s / simulation with 2 threads and 2 batch_size_eval with 50 simulations
-* 0.1360865s / move - 0.00272173s / simulation 4 threads 4 batch_size_eval 50 simulations
-* 0.1222489s / move - 0.002444978s / simulation 8 threads 8 batch_size_eval 50 simulations
-* 0.1372498 / move - 0.00214452812s / simulations 16 threads 16 batch_size_eval 64 simulations
+* 0.2377991s / move - 0.00371561093s / simulation 2 threads 2 batch_size_eval
+* 0.1624937s / move - 0.00253896406s / simulation 4 threads 4 batch_size_eval
+* 0.1465123s / move - 0.00228925468s / simulation 8 threads 8 batch_size_eval
+* 0.1401098s / move - 0.00218921563s / simulation 16 threads 16 batch_size_eval
 
 ### 19x19 board
 
diff --git a/const.py b/const.py
@@ -6,14 +6,13 @@
 ## CUDA variable from Torch
 CUDA = torch.cuda.is_available()
 ## Dtype of the tensors depending on CUDA
-DTYPE_FLOAT = torch.cuda.FloatTensor if CUDA else torch.FloatTensor
-DTYPE_LONG = torch.cuda.LongTensor if CUDA else torch.LongTensor
+DEVICE = torch.device("cuda") if CUDA else torch.device("cpu")
 ## Number of self-play parallel games
-PARALLEL_SELF_PLAY = 2
+PARALLEL_SELF_PLAY = 3
 ## Number of evaluation parallel games 
 PARALLEL_EVAL = 2
 ## MCTS parallel
-MCTS_PARALLEL = 16
+MCTS_PARALLEL = 4
 
 
 ##### GLOBAL
@@ -61,13 +60,13 @@
 ## Number of residual blocks
 BLOCKS = 10
 ## Number of training step before evaluating
-TRAIN_STEPS = 7 * BATCH_SIZE
+TRAIN_STEPS = 6 * BATCH_SIZE
 ## Optimizer
 ADAM = False
 ## Learning rate annealing factor
 LR_DECAY = 0.1
 ## Learning rate annnealing interval
-LR_DECAY_ITE = 50 * TRAIN_STEPS
+LR_DECAY_ITE = 100 * TRAIN_STEPS
 ## Print the loss
 LOSS_TICK = BATCH_SIZE // 4
 ## Refresh the dataset
@@ -78,6 +77,6 @@
 
 ## Number of matches against its old version to evaluate
 ## the newly trained network
-EVAL_MATCHS = 21
+EVAL_MATCHS = 20
 ## Threshold to keep the new neural net
 EVAL_THRESH = 0.55
diff --git a/lib/evaluate.py b/lib/evaluate.py
@@ -11,7 +11,7 @@ def evaluate(player, new_player):
     black_wins = 0
     white_wins = 0
     for result in results:
-        if result[0] == 0:
+        if result[0] == 1:
             white_wins += 1
         else:
             black_wins += 1
@@ -20,4 +20,4 @@ def evaluate(player, new_player):
              % (black_wins, white_wins))
     if black_wins >= EVAL_THRESH * len(results):
         return True
-    return False
+    return False
diff --git a/lib/gtp.py b/lib/gtp.py
@@ -129,11 +129,8 @@ def __init__(self, game, komi=7.5, board_size=19, version="0.2", name="AlphaGo")
     def send(self, message):
         message_id, command, arguments = parse_message(message)
         if command in self.known_commands:
-            # try:
             return format_success(
                 message_id, getattr(self, "cmd_" + command)(arguments))
-            # except ValueError as exception:
-            #     return format_error(message_id, exception.args[0])
         else:
             return format_error(message_id, "unknown command")
 
diff --git a/lib/play.py b/lib/play.py
@@ -153,7 +153,7 @@ def run(self):
                 self.game_queue.task_done()
                 self.result_queue.put(answer)
             except Exception as e:
-                print("xd")
+                print("Game has thrown an error")
 
 
 
@@ -195,8 +195,6 @@ def _get_move(self, board, probas):
         """ Select a move without MCTS """
 
         player_move = None
-        valid_move = False
-        can_pass = False
         legal_moves = board.get_legal_moves()
 
         while player_move not in legal_moves and len(legal_moves) > 0:
@@ -208,7 +206,6 @@ def _get_move(self, board, probas):
 
         return player_move
 
-    # @profile
     def _play(self, state, player, other_pass, competitive=False):
         """ Choose a move depending on MCTS or not """
 
@@ -286,7 +283,7 @@ def __call__(self):
             
         ## Pickle the result because multiprocessing
         if self.opponent:
-            print("[EVALUATION] Match %d done in eval" % self.id)
+            print("[EVALUATION] Match %d done in eval, winner %s" % (self.id, "black" if reward == 0 else "white"))
             self.opponent.passed = False
             return pickle.dumps([reward])
 
diff --git a/lib/train.py b/lib/train.py
@@ -29,6 +29,7 @@ def __init__(self):
     def forward(self, winner, self_play_winner, probas, self_play_probas):
         value_error = F.mse_loss(winner, self_play_winner)
         policy_error = F.kl_div(probas, self_play_probas)
+        # policy_error = torch.sum(-probas * torch.log(self_play_probas))
         return value_error + policy_error
 
 
@@ -104,9 +105,9 @@ def collate_fn(example):
         state.extend(ex[0])
         probas.extend(ex[1])
         winner.extend(ex[2])
-    state = torch.tensor(state).type(DTYPE_FLOAT)
-    probas = torch.tensor(probas).type(DTYPE_FLOAT)
-    winner = torch.tensor(winner).type(DTYPE_FLOAT)
+    state = torch.tensor(state, dtype=torch.float, device=DEVICE)
+    probas = torch.tensor(probas, dtype=torch.float, device=DEVICE)
+    winner = torch.tensor(winner, dtype=torch.float, device=DEVICE)
     return state, probas, winner
 
 
@@ -151,7 +152,7 @@ def train(current_time, loaded_version):
         total_ite = checkpoint['total_ite']
         lr = checkpoint['lr']
         version = checkpoint['version']
-        last_id = collection.find().count() - 120
+        last_id = collection.find().count() - (MOVES // MOVE_LIMIT) * 2 
     else:
         player = Player()
         optimizer = create_optimizer(player, lr)
@@ -209,9 +210,9 @@ def new_agent(result):
                     pool.terminate()
             
             example = {
-                'state': Variable(state).type(DTYPE_FLOAT),
-                'winner': Variable(winner).type(DTYPE_FLOAT),
-                'move' : Variable(move).type(DTYPE_FLOAT)
+                'state': state,
+                'winner': winner,
+                'move' : move
             }
             loss = train_epoch(player, optimizer, example, criterion)
             running_loss.append(loss)
diff --git a/lib/utils.py b/lib/utils.py
@@ -15,7 +15,7 @@ def _prepare_state(state):
     """
 
     x = torch.from_numpy(np.array([state]))
-    x = Variable(x).type(DTYPE_FLOAT)
+    x = torch.tensor(x, dtype=torch.float, device=DEVICE)
     return x
 
 
diff --git a/main.py b/main.py
@@ -33,7 +33,7 @@ def main(folder, version):
 
         ## Comment one line or the other to get the stack trace
         ## Must add a loooooong timer otherwise signals are not caught
-        self_play_proc.get(60000000)
+        # self_play_proc.get(60000000)
         train_proc.get(60000000)
 
     except KeyboardInterrupt:
diff --git a/models/mcts.py b/models/mcts.py
@@ -81,62 +81,53 @@ def _opt_select(nodes, c_puct):
 
 
 class EvaluatorThread(threading.Thread):
-    def __init__(self, player, eval_queue, result_queue, condition_search, condition_eval, condition_mix):
+    def __init__(self, player, eval_queue, condition_search, condition_eval):
         threading.Thread.__init__(self)
         self.eval_queue = eval_queue
-        self.result_queue = result_queue
         self.player = player
         self.condition_search = condition_search
         self.condition_eval = condition_eval
-        self.condition_mix = condition_mix
 
     def run(self):
         total_sim = MCTS_SIM
         while total_sim > 0:
-            self.condition_eval.acquire()
-            while (len(self.eval_queue.values()) < BATCH_SIZE_EVAL or \
-                  len(self.result_queue.values()) > 0):
-                print("notifying in evaluator, current len: %d" % len(self.eval_queue.values()))
-                self.condition_eval.wait()
-            self.condition_eval.release()
+            self.condition_search.acquire()
+            while (len(self.eval_queue.values()) != BATCH_SIZE_EVAL or \
+                  (len(self.eval_queue.values()) < BATCH_SIZE_EVAL and \
+                   len(self.eval_queue.values()) != total_sim)) or \
+                  (len(self.eval_queue.values()) == BATCH_SIZE_EVAL and \
+                  not all(isinstance(state, np.ndarray) for state in self.eval_queue.values())):
+                self.condition_search.wait()
 
+            self.condition_search.release()
+            self.condition_eval.acquire()
             states = []
             for idx, state in self.eval_queue.items():
                 states.append(sample_rotation(state, num=1))
-            print('states len: %d' % len(states))
             states = _prepare_state(states)
             feature_maps = self.player.extractor(states[0])
 
             ## Policy and value prediction
             probas = self.player.policy_net(feature_maps)
             v = self.player.value_net(feature_maps)
-            keys = list(self.eval_queue.keys())
-            idx = 0
-            for key in keys:
-                self.result_queue[key] = (probas[idx].cpu().data.numpy(), float(v[idx]))
-                del self.eval_queue[key]
-                idx += 1
-            del probas, v, feature_maps
-            self.condition_mix.acquire()
-            self.condition_mix.notifyAll()
-            self.condition_mix.release()
+            for idx in range(BATCH_SIZE_EVAL):
+                self.eval_queue[idx] = (probas[idx].cpu().data.numpy(), v[idx])
+            self.condition_eval.notifyAll()
+            self.condition_eval.release()
             total_sim -= BATCH_SIZE_EVAL
 
 
 
 class SearchThread(threading.Thread):
-    def __init__(self, mcts, game, eval_queue, result_queue, thread_id, \
-                lock, condition_search, condition_eval, condition_mix):
+    def __init__(self, mcts, game, eval_queue, thread_id, lock, condition_search, condition_eval):
         threading.Thread.__init__(self)
-        self.result_queue = result_queue
         self.eval_queue = eval_queue
         self.mcts = mcts
         self.game = game
         self.lock = lock
         self.thread_id = thread_id
         self.condition_eval = condition_eval
         self.condition_search = condition_search
-        self.condition_mix = condition_mix
     
 
     def run(self):
@@ -155,25 +146,18 @@ def run(self):
         ## Predict the probas
         if not done:
             self.condition_search.acquire()
-            while len(self.eval_queue.values()) < BATCH_SIZE_EVAL and \
-                  len(self.result_queue.values()) == 0:
-                print("trying to release in thread id %d" % self.thread_id)
-                self.condition_search.wait()
-            print("added move in thread %d" % self.thread_id)
-            self.condition_search.release()
-            
             self.eval_queue[self.thread_id] = state
+            self.condition_search.notify()
+
+            self.condition_search.release()
             self.condition_eval.acquire()
-            self.condition_eval.notify()
-            self.condition_eval.release()
+            self.condition_eval.wait()
 
-            self.condition_mix.acquire()
-            self.condition_mix.wait()
-            self.condition_mix.release()
+            res = self.eval_queue[self.thread_id]
+            probas = np.array(res[0])
+            v = float(res[1])
+            self.condition_eval.release()
 
-            probas = np.array(self.result_queue[self.thread_id][0], copy=True)
-            v = float(self.result_queue[self.thread_id][1])
-            del self.result_queue[self.thread_id]
 
             ## Add noise in the root node
             if not current_node.parent:
@@ -195,7 +179,7 @@ def run(self):
                 current_node.update(v)
                 current_node = current_node.parent
             self.lock.release()
-            print("done in thread id %d" % self.thread_id)
+
 
 
 class MCTS:
@@ -238,18 +222,15 @@ def search(self, current_game, player, competitive=False):
         threads = []
         condition_eval = threading.Condition()
         condition_search = threading.Condition()
-        condition_mix = threading.Condition()
         lock = threading.Lock()
         eval_queue = {}
-        result_queue = {}
-        evaluator = EvaluatorThread(player, eval_queue, result_queue, condition_search, \
-                                    condition_eval, condition_mix)
+        evaluator = EvaluatorThread(player, eval_queue, condition_search, condition_eval)
         evaluator.start()
         for sim in range(MCTS_SIM // MCTS_PARALLEL):
             eval_queue.clear()
             for idx in range(MCTS_PARALLEL):
-                threads.append(SearchThread(self, current_game, eval_queue, result_queue, idx, 
-                                        lock, condition_search, condition_eval, condition_mix))
+                threads.append(SearchThread(self, current_game, eval_queue, idx, 
+                                        lock, condition_search, condition_eval))
                 threads[-1].start()
             for thread in threads:
                 thread.join()
@@ -265,7 +246,6 @@ def search(self, current_game, player, competitive=False):
                 break
 
         self.root = self.root.childrens[idx]
-        print(final_probas, final_move)
         return final_probas, final_move
 
 
diff --git a/viewer.py b/viewer.py
@@ -9,12 +9,12 @@
 from pymongo import MongoClient
 
 
-def game_to_gtp(game, game_id, collection_name):
+def game_to_gtp(game, game_id, collection_name, color):
     """ Take a game from the database and convert it to send GTP instructions """
 
     board_size = int(np.sqrt(len(game[0][0][1]) - 1))
     moves = np.array(game[0])[:,3]
-    move_count = 0
+    move_count = 0 if color == 0 else 1
     game_winner = game[1]
 
     ## Wait for input
@@ -32,20 +32,23 @@ def game_to_gtp(game, game_id, collection_name):
                 else:
                     print(format_success(None, response="{}{}".format("ABCDEFGHJKLMNOPQRSTYVWYZ"\
                             [int(move % board_size)], int(board_size - move // board_size))))
-                move_count += 1
+                move_count += 2
             else:
                 print('?name    %s    ???\n\n' % (command))
         elif "name" in command:
             print(format_success(None, response="folder {}, game id: {}, winner: {}"\
                                     .format(collection_name, game_id, game_winner)))
+        elif "play" in command:
+            print(format_success(message_id, ""))
         else:
             print('?name    %s    ???\n\n' % (command))
 
 
 @click.command()
 @click.option("--folder", default=-1)
 @click.option("--game_id", default=-1)
-def main(folder, game_id):
+@click.option("--color", default=1)
+def main(folder, game_id, color):
     ## Init Mongo
     client = MongoClient()
     db = client.superGo
@@ -71,7 +74,7 @@ def main(folder, game_id):
         else:
             for game in last_game:
                 final_game = pickle.loads(game['game'])
-                game_to_gtp(final_game, game['id'], collection)
+                game_to_gtp(final_game, game['id'], collection, color)
                 break
 
 if __name__ == "__main__":