commit 6284b7b526a0c1173db6c6c4aefb80a924659ef4
Author: John Kubach <johnkubach@gmail.com>
Date: Tue, 28 Sep 2021 11:24:53 -0400
Add Python code
Diffstat:
A | Actions.py | | | 23 | +++++++++++++++++++++++ |
A | Space_Invaders.py | | | 121 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | Steps.py | | | 13 | +++++++++++++ |
A | __init__.py | | | 1 | + |
A | main.py | | | 137 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 295 insertions(+), 0 deletions(-)
diff --git a/Actions.py b/Actions.py
@@ -0,0 +1,23 @@
+from enum import Enum
+from MAMEToolkit.emulator import Action
+
+class Actions(Enum):
+ # coins
+ insert_coin = Action(':COIN', 'Coin 1')
+ p1_start = Action(':IN1', '1 Player Start')
+ p2_start = Action(':IN1', '2 Players Start')
+ coinage = Action(':IN2', 'Display Coinage')
+
+ # lives
+ bonus_life = Action(':IN2', 'Bonus Life')
+ lives = Action(':IN2', 'Lives')
+
+ # p1 movement
+ p1_right = Action(':CONTP1', 'P1 Right')
+ p1_left = Action(':CONTP1', 'P1 Left')
+ p1_shoot = Action(':CONTP1', 'P1 Button 1')
+
+ #p2 movement
+ p2_right = Action(':CONTP2', 'P2 Right')
+ p2_left = Action(':CONTP2', 'P2 Left')
+ p2_shoot = Action(':CONTP2', 'P2 Button 1')
diff --git a/Space_Invaders.py b/Space_Invaders.py
@@ -0,0 +1,121 @@
+import math
+from MAMEToolkit.emulator import Emulator
+from MAMEToolkit.emulator import Address
+from Actions import Actions
+from Steps import *
+
+def add_rewards(old_data, new_data):
+ for k in old_data.keys():
+ if "rewards" in k:
+ for player in old_data[k]:
+ new_data[k][player] += old_data[k][player]
+ return new_data
+
+def memory_addresses():
+ return {
+ "score": Address('0x20F8', 's16'),
+ "player_x": Address('0x201B', 's16'),
+ "player_y": Address('0x201A', 's16'),
+ "shot_status": Address('0x2025', 's16'),
+ "num_aliens": Address('0x2082', 's16'),
+ "alien_shot_x": Address('0x207C', 's16'),
+ "alien_shot_y": Address('0x207B', 's16'),
+ "shot_collision": Address('0x2061', 's16'),
+ "player_alive": Address('0x2068', 's16'),
+ "has_lives": Address('0x20E7', 's16'),
+ "ships_remain": Address('0x21FF', 's16')
+ }
+
+def index_to_action(action):
+ return {
+ 0: [Actions.p1_shoot],
+ 1: [Actions.p1_left],
+ 2: [Actions.p1_right],
+ 3: []
+ }[action]
+
+class Space_Invaders(object):
+ def __init__(self, env_id, roms_path, frame_ratio=3, frames_per_step=3):
+ self.frame_ratio = frame_ratio
+ self.frames_per_step = frames_per_step
+ self.emu = Emulator(env_id, roms_path, "invaders", memory_addresses(), frame_ratio=frame_ratio)
+ self.expected_score = {"score": 1000, "alive": 1}
+ self.game_over = False
+
+ def run_steps(self, steps):
+ for step in steps:
+ for i in range(step["wait"]):
+ self.emu.step([])
+ self.emu.step([action.value for action in step["actions"]])
+
+ def start(self):
+ self.run_steps(start_game(self.frame_ratio))
+ self.started = True
+
+ def check_game_over(self, data):
+ if data["has_lives"] < 257:
+ self.game_over = True
+
+ return data
+
+ def new_game(self):
+ self.run_steps(new_game(self.frame_ratio))
+ self.expected_score = {"scoreL": 1000, "alive": 1}
+ self.game_over = False
+
+ def gather_frames(self, actions):
+ data = self.sub_step(actions)
+ frames = [data["frame"]]
+ for i in range(self.frames_per_step - 1):
+ data = add_rewards(data, self.sub_step(actions))
+ frames.append(data["frame"])
+ data["frame"] = frames[0] if self.frames_per_step == 1 else frames
+ return data
+
+ def sub_step(self, actions):
+ data = self.emu.step([action.value for action in actions])
+
+ score = data["score"]
+ aliens = data["num_aliens"]
+ alive = data["player_alive"]
+ x_diff = (data["player_x"] - data["alien_shot_x"])
+ y_diff = (data["player_y"] - data["alien_shot_y"])
+ alien_shot_x = data["alien_shot_x"]
+ alien_shot_y = data["alien_shot_y"]
+ shot_status = (data["shot_status"])
+ has_lives = (data["has_lives"])
+ distance = math.sqrt( ((data["player_x"] - data["alien_shot_x"])**2) + ((data["player_y"] - data["alien_shot_y"])**2))
+ collide = data["shot_collision"]
+
+ rewards = {
+ "score": score,
+ "aliens": aliens,
+ "x_diff": x_diff,
+ "y_diff": y_diff,
+ "alien_shot_x": alien_shot_x,
+ "alien_shot_y": alien_shot_y,
+ "distance": distance,
+ "alive": alive,
+ "shot_status": shot_status,
+ "collide": collide,
+ "has_lives": has_lives
+ }
+
+ data["rewards"] = rewards
+ return data
+
+ def step(self, action):
+ if self.started:
+ if not self.game_over:
+ actions = []
+ actions += index_to_action(action)
+ data = self.gather_frames(actions)
+ data = self.check_game_over(data)
+ return data["frame"], data["rewards"], self.game_over
+ else:
+ raise EnvironmentError("Attempted to step while game not playing")
+ else:
+ raise EnvironmentError("Start must be called before stepping")
+
+ def close(self):
+ self.emu.close()
diff --git a/Steps.py b/Steps.py
@@ -0,0 +1,13 @@
+from Actions import Actions
+
+def start_game(frame_ratio):
+ return [
+ {"wait": int(300/frame_ratio), "actions": [Actions.insert_coin]},
+ {"wait": int(60/frame_ratio), "actions": [Actions.p1_start]},
+ {"wait": int(60/frame_ratio), "actions": [Actions.p1_start]}]
+
+def new_game(frame_ratio):
+ return [
+ {"wait": int(600/frame_ratio), "actions": [Actions.insert_coin]},
+ {"wait": int(60/frame_ratio), "actions": [Actions.p1_start]},
+ {"wait": int(60/frame_ratio), "actions": [Actions.p1_start]}]
diff --git a/__init__.py b/__init__.py
@@ -0,0 +1 @@
+from Actions import Actions
diff --git a/main.py b/main.py
@@ -0,0 +1,137 @@
+import random
+import numpy as np
+import matplotlib.pyplot as plt
+import pickle
+import time
+from matplotlib import style
+from Space_Invaders import Space_Invaders
+from Actions import Actions
+from MAMEToolkit.emulator import Emulator
+from MAMEToolkit.emulator import Address
+from MAMEToolkit.emulator import Action
+from MAMEToolkit.emulator import list_actions
+
+roms_path = "/home/john/media/downloads/Transmission/MAME 0.220 ROMs (split)/"
+game_id = "invaders"
+
+#print(list_actions(roms_path, game_id))
+
+# env = Space_Invaders("env1", roms_path)
+# env.start()
+
+
+def add_action_to_observation(observation, action):
+ return np.append([action], observation)
+
+def initial_training():
+ env = Space_Invaders("env1", roms_path)
+ env.start()
+
+ episode_rewards = []
+ SHOOT_PENALTY = 1
+ DEATH_PENALTY = 50
+ KILL_REWARD = 500
+ MISS_PENALTY = 10
+ epsilon = 0.6
+ EPS_DECAY = 0.9998
+ SHOW_EVERY = 1000 # how often to play through env visually.
+
+ q_table = np.random.rand(5,4)
+ # start_q_table = "qtable-1608050271.pickle"
+
+ # with open(start_q_table, "rb") as f:
+ # q_table = pickle.load(f)
+
+ avg_reward = []
+
+
+ LEARNING_RATE = 0.1
+ DISCOUNT = 0.95
+ games = 15
+ score = 0
+ reward = 0
+ iterations = 0
+ action = random.randint(0, 3)
+ score_list = []
+
+ while games > 0:
+ iterations = iterations + 1
+ frames, rewards, game_over = env.step(action)
+ episode_reward = 0
+ old_score = score
+ state_index = 2
+ score = rewards["score"]
+ aliens = rewards["aliens"]
+ alive = rewards["alive"]
+ shot_status = rewards["shot_status"]
+
+ if alive == 0:
+ state_index = 1
+ elif shot_status != 12303 and shot_status != 12298 and shot_status != 12294:
+ state_index = 4
+ elif shot_status == 12303 or shot_status == 12298:
+ state_index = 0
+ elif shot_status == 12294:
+ state_index = 3
+ else:
+ state_index = 2
+
+ obs = (state_index, action)
+
+ if game_over:
+ games = games - 1
+ score_list.append(score)
+ env.new_game()
+ else:
+ if np.random.random() > epsilon:
+ action = np.argmax(q_table[obs])
+ else:
+ action = np.random.randint(0, 3)
+
+ if state_index == 1:
+ reward = -DEATH_PENALTY
+ elif state_index == 4:
+ reward = MISS_PENALTY
+ elif state_index == 0:
+ reward = KILL_REWARD
+ elif state_index == 3:
+ reward = 0
+ else:
+ reward = -1
+
+
+ new_obs = (state_index, action)
+ max_future_q = np.max(q_table[new_obs])
+ current_q = q_table[obs]
+
+ if reward == KILL_REWARD:
+ new_q = KILL_REWARD
+ else:
+ new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward + DISCOUNT * max_future_q)
+
+ episode_reward += reward
+ episode_rewards.append(episode_reward)
+ epsilon *= EPS_DECAY
+ avg_reward.append(sum(episode_rewards) / len(episode_rewards) )
+
+ moving_avg = np.convolve(episode_rewards, np.ones((iterations,))/iterations, mode='valid')
+
+ env.close()
+
+ print(q_table)
+
+ with open(f"qtable-{int(time.time())}.pickle", "wb") as f:
+ pickle.dump(q_table, f)
+
+ plt.plot([i for i in range(len(episode_rewards))], avg_reward)
+ plt.ylabel(f"Reward {SHOW_EVERY}ma")
+ plt.xlabel("episode #")
+ plt.show()
+
+
+if __name__ == "__main__":
+# # training_data = initial_training()
+ initial_training()
+# # np.save('training', training_data)
+# train_net()
+# # test_net()