diff --git a/.gitignore b/.gitignore index 30836af..380fc0c 100644 --- a/.gitignore +++ b/.gitignore @@ -142,3 +142,6 @@ savedmodel/* *.slp *.npz *.tfrecord +scratch* +Logs/ +melee/ \ No newline at end of file diff --git a/advantage_bar.py b/advantage_bar.py index 0d9a87d..f3bc6b8 100755 --- a/advantage_bar.py +++ b/advantage_bar.py @@ -10,30 +10,22 @@ import pathlib from model import AdvantageBarModel -parser = argparse.ArgumentParser(description='AI-powered Melee in-game advantage bar') -parser.add_argument('--train', - '-t', - action='store_true', - help='Training mode') -parser.add_argument('--evaluate', - '-e', - action='store_true', - help='Evaluation mode') -parser.add_argument('--predict', - '-p', - help='Prediction mode. Specify the directory where dolphin is') -parser.add_argument('--build', - '-b', - action='store_true', - help='Build dataset from SLP files') -parser.add_argument('--max_split', - '-m', - help='Split building dataset into this many pieces') -parser.add_argument('--split', - '-s', - help='Handle split number S') +parser = argparse.ArgumentParser(description="AI-powered Melee in-game advantage bar") +parser.add_argument("--train", "-t", action="store_true", help="Training mode") +parser.add_argument("--evaluate", "-e", action="store_true", help="Evaluation mode") +parser.add_argument( + "--predict", "-p", help="Prediction mode. Specify the directory where dolphin is" +) +parser.add_argument( + "--build", "-b", action="store_true", help="Build dataset from SLP files" +) +parser.add_argument( + "--max_split", "-m", help="Split building dataset into this many pieces" +) +parser.add_argument("--split", "-s", help="Handle split number S") args = parser.parse_args() + def who_died(past_p1, past_p2, current_p1, current_p2): """Returns who died.""" if past_p1 > current_p1: @@ -42,11 +34,22 @@ def who_died(past_p1, past_p2, current_p1, current_p2): return 1 return -1 + def _float_feature(value): - return tf.train.FeatureList(feature=[tf.train.Feature(float_list=tf.train.FloatList(value=[x])) for x in value]) + return tf.train.FeatureList( + feature=[ + tf.train.Feature(float_list=tf.train.FloatList(value=[x])) for x in value + ] + ) + def _int64_feature(value): - return tf.train.FeatureList(feature=[tf.train.Feature(int64_list=tf.train.Int64List(value=[x])) for x in value]) + return tf.train.FeatureList( + feature=[ + tf.train.Feature(int64_list=tf.train.Int64List(value=[x])) for x in value + ] + ) + if args.build: """Builds the tfrecord dataset @@ -67,8 +70,10 @@ def _int64_feature(value): if args.max_split and args.split: max_split, split = int(args.max_split), int(args.split) - directory = 'training_data/' - num_files = len([f for f in os.listdir(directory)if os.path.isfile(os.path.join(directory, f))]) + directory = "training_data/" + num_files = len( + [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] + ) bar = progressbar.ProgressBar(maxval=num_files) file_index = 0 for entry in bar(os.scandir(directory)): @@ -92,12 +97,14 @@ def _int64_feature(value): "stage": [], "frame": [], "stock_winner": [], - "game_winner": [] + "game_winner": [], } if entry.path.endswith(".slp") and entry.is_file(): console = None try: - console = melee.Console(is_dolphin=False, path=entry.path, allow_old_version=True) + console = melee.Console( + is_dolphin=False, path=entry.path, allow_old_version=True + ) except Exception as ex: print("Got error, skipping file", ex) continue @@ -106,7 +113,7 @@ def _int64_feature(value): except ubjson.decoder.DecoderException as ex: print("Got error, skipping file", ex) continue - stocks = (4,4) + stocks = (4, 4) ports = None # Pick a game to be part of the evaluation set 20% of the time is_evaluation = random.random() > 0.8 @@ -149,13 +156,19 @@ def _int64_feature(value): frames["player2_stock"].append(player_two.stock) frames["player2_action"].append(player_two.action.value) frames["player2_character"].append(player_one.character.value) - frames["stage"].append(AdvantageBarModel.stage_flatten(gamestate.stage.value)) + frames["stage"].append( + AdvantageBarModel.stage_flatten(gamestate.stage.value) + ) frames["frame"].append(gamestate.frame) # Did someone lose a stock? Add the labels on - died = who_died(stocks[0], stocks[1], player_one.stock, player_two.stock) + died = who_died( + stocks[0], stocks[1], player_one.stock, player_two.stock + ) if died > -1: - frames_needed = len(frames["frame"]) - len(frames["stock_winner"]) + frames_needed = len(frames["frame"]) - len( + frames["stock_winner"] + ) frames["stock_winner"].extend([died] * frames_needed) stocks = (player_one.stock, player_two.stock) @@ -167,37 +180,66 @@ def _int64_feature(value): filename = None if is_evaluation: - filename = "tfrecords/eval" / pathlib.Path(pathlib.Path(entry.path + ".tfrecord").name) + filename = "tfrecords/eval" / pathlib.Path( + pathlib.Path(entry.path + ".tfrecord").name + ) else: - filename = "tfrecords/train" / pathlib.Path(pathlib.Path(entry.path + ".tfrecord").name) + filename = "tfrecords/train" / pathlib.Path( + pathlib.Path(entry.path + ".tfrecord").name + ) if len(frames) > 0 and game_winner > -1: with tf.io.TFRecordWriter(str(filename)) as file_writer: # This is all that we actually have full data for data_cap = len(frames["stock_winner"]) # "Context" features are static for the whole data record. Not in the time series - context_features = tf.train.Features(feature={ - "game_winner": tf.train.Feature(float_list=tf.train.FloatList(value=[game_winner])) - }) + context_features = tf.train.Features( + feature={ + "game_winner": tf.train.Feature( + float_list=tf.train.FloatList(value=[game_winner]) + ) + } + ) features = { - "player1_character": _int64_feature(frames["player1_character"][:data_cap]), + "player1_character": _int64_feature( + frames["player1_character"][:data_cap] + ), "player1_x": _float_feature(frames["player1_x"][:data_cap]), "player1_y": _float_feature(frames["player1_y"][:data_cap]), - "player1_percent": _float_feature(frames["player1_percent"][:data_cap]), - "player1_stock": _float_feature(frames["player1_stock"][:data_cap]), - "player1_action": _int64_feature(frames["player1_action"][:data_cap]), + "player1_percent": _float_feature( + frames["player1_percent"][:data_cap] + ), + "player1_stock": _float_feature( + frames["player1_stock"][:data_cap] + ), + "player1_action": _int64_feature( + frames["player1_action"][:data_cap] + ), "player2_x": _float_feature(frames["player2_x"][:data_cap]), "player2_y": _float_feature(frames["player2_y"][:data_cap]), - "player2_percent": _float_feature(frames["player2_percent"][:data_cap]), - "player2_stock": _float_feature(frames["player2_stock"][:data_cap]), - "player2_action": _int64_feature(frames["player2_action"][:data_cap]), - "player2_character": _int64_feature(frames["player2_character"][:data_cap]), + "player2_percent": _float_feature( + frames["player2_percent"][:data_cap] + ), + "player2_stock": _float_feature( + frames["player2_stock"][:data_cap] + ), + "player2_action": _int64_feature( + frames["player2_action"][:data_cap] + ), + "player2_character": _int64_feature( + frames["player2_character"][:data_cap] + ), "stage": _int64_feature(frames["stage"][:data_cap]), "frame": _float_feature(frames["frame"][:data_cap]), - "stock_winner": _float_feature(frames["stock_winner"][:data_cap]), + "stock_winner": _float_feature( + frames["stock_winner"][:data_cap] + ), } - newexample = tf.train.SequenceExample(feature_lists=tf.train.FeatureLists(feature_list=features), context=context_features) + newexample = tf.train.SequenceExample( + feature_lists=tf.train.FeatureLists(feature_list=features), + context=context_features, + ) file_writer.write(newexample.SerializeToString()) if args.train: @@ -213,8 +255,7 @@ def _int64_feature(value): model.load() # Start a real game - console = melee.Console(path=args.predict, - is_dolphin=False) + console = melee.Console(path=args.predict, is_dolphin=False) # Run the console console.run() diff --git a/lstm notes.txt b/lstm notes.txt new file mode 100644 index 0000000..d0f00ca --- /dev/null +++ b/lstm notes.txt @@ -0,0 +1,11 @@ +If an entire game is used to predict the winner, it will no doubt learn that the person +closest to losing their last stock determines the winner so if a data point is a single +game, it will bias its learning to the end of the game and get 100% accuracy, but in +real time it will not have access to that. + +So for each game, it should be broken into several records. Probably 5 seconds at a +time, 2 frames per second, but also right padded so that almost the entire game +history can be taken into account. + +500 time steps per record, and using 2 frames per second, the time length for input is +just over 4 minutes. Matches that go longer will truncate from the beginning of the match. diff --git a/model.py b/model.py index 70a9d93..4c3d08d 100644 --- a/model.py +++ b/model.py @@ -4,16 +4,18 @@ import numpy as np import os + def _parse_winner(record): context_feature_map = { "game_winner": tf.io.FixedLenFeature([], dtype=tf.float32), } - ctx, _ = tf.io.parse_single_sequence_example(record, - sequence_features=None, - context_features=context_feature_map) + ctx, _ = tf.io.parse_single_sequence_example( + record, sequence_features=None, context_features=context_feature_map + ) return ctx["game_winner"] + def _parse_features(record): """Parse a batch of tfrecord data, output batch of tensors ready for training @@ -35,9 +37,9 @@ def _parse_features(record): "stock_winner": tf.io.FixedLenSequenceFeature([], dtype=tf.float32), } - _, parsed = tf.io.parse_single_sequence_example(record, - sequence_features=feature_map, - context_features=None) + _, parsed = tf.io.parse_single_sequence_example( + record, sequence_features=feature_map, context_features=None + ) p1character = tf.one_hot(parsed["player1_character"], 26) p2character = tf.one_hot(parsed["player2_character"], 26) @@ -53,23 +55,39 @@ def _parse_features(record): p2percent = tf.expand_dims(parsed["player2_percent"], 1) p2stock = tf.expand_dims(parsed["player2_stock"], 1) - final = tf.concat([p1character, - p2character, - stage, - p1x, - p1y, - p1percent, - p1stock, - p2x, - p2y, - p2percent, - p2stock, - ], 1) + final = tf.concat( + [ + p1character, + p2character, + stage, + p1x, + p1y, + p1percent, + p1stock, + p2x, + p2y, + p2percent, + p2stock, + ], + 1, + ) return final + +def _window(sequence, time_length): + # This comes in as a tensor, so convert it to a dataset + dataset = tf.data.Dataset.from_tensor_slices(sequence) + dataset = dataset.window( + size=time_length, shift=150, stride=30, drop_remainder=True + ) + dataset = dataset.flat_map(lambda x: x.batch(time_length)) + return dataset + + class AdvantageBarModel: """Tensorflow model for the advantage bar """ + def __init__(self): """AdvantageBarModel @@ -87,7 +105,7 @@ def __init__(self): (float): Stock of player 2 """ self._BATCH_SIZE = 10 - self._TIME_LENGTH = 20 + self._TIME_LENGTH = 10 # Build the model self.model = tf.keras.Sequential() @@ -100,9 +118,11 @@ def __init__(self): self.model.add(tf.keras.layers.Dropout(0.2)) self.model.add(tf.keras.layers.Dense(1, activation="sigmoid")) - self.model.compile(optimizer=tf.keras.optimizers.Adam(0.0010), - loss="binary_crossentropy", - metrics=["accuracy"]) + self.model.compile( + optimizer=tf.keras.optimizers.Adam(0.0010), + loss="binary_crossentropy", + metrics=["accuracy"], + ) print(self.model.summary()) def load(self): @@ -150,60 +170,85 @@ def train(self, epochs=10): eval_data_labels = eval_data.map(_parse_winner) # This part is working - print("Printing features") - for thing in dataset_train_features: - print(thing) - - print("Printing labels") - for thing in dataset_train_labels: - print(thing) + # print("Printing features") + # for thing in dataset_train_features: + # print(thing) + # + # print("Printing labels") + # for thing in dataset_train_labels: + # print(thing) # Window the data - # XXX I think this should work, but it doesn't? It's always empty :( - dataset_train_features = dataset_train_features.window(self._TIME_LENGTH, drop_remainder=True) - dataset_train_features = dataset_train_features.flat_map(lambda window: window.batch(self._BATCH_SIZE)) + dataset_train_features = dataset_train_features.flat_map( + lambda x: _window(x, self._TIME_LENGTH) + ) - dataset_validation_features = dataset_validation_features.window(self._TIME_LENGTH, drop_remainder=True) - dataset_validation_features = dataset_validation_features.flat_map(lambda window: window.batch(self._BATCH_SIZE)) + dataset_train_features = dataset_train_features.map( + lambda x: tf.reshape(x, [1, self._TIME_LENGTH, 66]) + ) - eval_data_features = eval_data_features.window(self._TIME_LENGTH, drop_remainder=True) - eval_data_features = eval_data_features.flat_map(lambda window: window.batch(self._BATCH_SIZE)) + dataset_validation_features = dataset_validation_features.flat_map( + lambda x: _window(x, self._TIME_LENGTH) + ) - # dataset_train = dataset_train.batch(self._BATCH_SIZE) - # dataset_validation = dataset_validation.batch(self._BATCH_SIZE) - # eval_data = eval_data.batch(self._BATCH_SIZE) + dataset_validation_features = dataset_validation_features.map( + lambda x: tf.reshape(x, [1, self._TIME_LENGTH, 66]) + ) + + eval_data_features = eval_data_features.flat_map( + lambda x: _window(x, self._TIME_LENGTH) + ) + + eval_data_features = eval_data_features.map( + lambda x: tf.reshape(x, [1, self._TIME_LENGTH, 66]) + ) + + dataset_train_labels = dataset_train_labels.map( + lambda x: tf.reshape(x, (-1, 1)) + ) + + dataset_validation_labels = dataset_validation_labels.map( + lambda x: tf.reshape(x, (-1, 1)) + ) + + eval_data_labels = eval_data_labels.map(lambda x: tf.reshape(x, (-1, 1))) # Zip the datasets back together # TODO The sizes on these are probably wrong. I bet the zips won't match up - training_set = tf.data.Dataset.zip((dataset_train_features, dataset_train_labels)) - validation_set = tf.data.Dataset.zip((dataset_validation_features, dataset_validation_labels)) + training_set = tf.data.Dataset.zip( + (dataset_train_features, dataset_train_labels) + ) + validation_set = tf.data.Dataset.zip( + (dataset_validation_features, dataset_validation_labels) + ) eval_set = tf.data.Dataset.zip((eval_data_features, eval_data_labels)) - self.model.fit(training_set, - validation_data=validation_set, - epochs=epochs) + self.model.fit(training_set, validation_data=validation_set, epochs=epochs) self.model.evaluate(eval_set) - def predict(self, gamestate): """Given a single libmelee gamestate, make a prediction""" p1character = tf.one_hot(gamestate.player[1].character.value, 26).numpy() p2character = tf.one_hot(gamestate.player[2].character.value, 26).numpy() - stage = tf.one_hot(AdvantageBarModel.stage_flatten(gamestate.stage.value), 6).numpy() - - input_array = np.concatenate([ - p1character, - p2character, - stage, - [gamestate.player[1].x], - [gamestate.player[1].y], - [gamestate.player[1].percent], - [gamestate.player[1].stock], - [gamestate.player[2].x], - [gamestate.player[2].y], - [gamestate.player[2].percent], - [gamestate.player[2].stock], - ]) + stage = tf.one_hot( + AdvantageBarModel.stage_flatten(gamestate.stage.value), 6 + ).numpy() + + input_array = np.concatenate( + [ + p1character, + p2character, + stage, + [gamestate.player[1].x], + [gamestate.player[1].y], + [gamestate.player[1].percent], + [gamestate.player[1].stock], + [gamestate.player[2].x], + [gamestate.player[2].y], + [gamestate.player[2].percent], + [gamestate.player[2].stock], + ] + ) input_array = np.array([input_array,])