Skip to content
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,6 @@ savedmodel/*
*.slp
*.npz
*.tfrecord
scratch*
Logs/
melee/
139 changes: 90 additions & 49 deletions advantage_bar.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,22 @@
import pathlib
from model import AdvantageBarModel

parser = argparse.ArgumentParser(description='AI-powered Melee in-game advantage bar')
parser.add_argument('--train',
'-t',
action='store_true',
help='Training mode')
parser.add_argument('--evaluate',
'-e',
action='store_true',
help='Evaluation mode')
parser.add_argument('--predict',
'-p',
help='Prediction mode. Specify the directory where dolphin is')
parser.add_argument('--build',
'-b',
action='store_true',
help='Build dataset from SLP files')
parser.add_argument('--max_split',
'-m',
help='Split building dataset into this many pieces')
parser.add_argument('--split',
'-s',
help='Handle split number S')
parser = argparse.ArgumentParser(description="AI-powered Melee in-game advantage bar")
parser.add_argument("--train", "-t", action="store_true", help="Training mode")
parser.add_argument("--evaluate", "-e", action="store_true", help="Evaluation mode")
parser.add_argument(
"--predict", "-p", help="Prediction mode. Specify the directory where dolphin is"
)
parser.add_argument(
"--build", "-b", action="store_true", help="Build dataset from SLP files"
)
parser.add_argument(
"--max_split", "-m", help="Split building dataset into this many pieces"
)
parser.add_argument("--split", "-s", help="Handle split number S")
args = parser.parse_args()


def who_died(past_p1, past_p2, current_p1, current_p2):
"""Returns who died."""
if past_p1 > current_p1:
Expand All @@ -42,11 +34,22 @@ def who_died(past_p1, past_p2, current_p1, current_p2):
return 1
return -1


def _float_feature(value):
return tf.train.FeatureList(feature=[tf.train.Feature(float_list=tf.train.FloatList(value=[x])) for x in value])
return tf.train.FeatureList(
feature=[
tf.train.Feature(float_list=tf.train.FloatList(value=[x])) for x in value
]
)


def _int64_feature(value):
return tf.train.FeatureList(feature=[tf.train.Feature(int64_list=tf.train.Int64List(value=[x])) for x in value])
return tf.train.FeatureList(
feature=[
tf.train.Feature(int64_list=tf.train.Int64List(value=[x])) for x in value
]
)


if args.build:
"""Builds the tfrecord dataset
Expand All @@ -67,8 +70,10 @@ def _int64_feature(value):
if args.max_split and args.split:
max_split, split = int(args.max_split), int(args.split)

directory = 'training_data/'
num_files = len([f for f in os.listdir(directory)if os.path.isfile(os.path.join(directory, f))])
directory = "training_data/"
num_files = len(
[f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
)
bar = progressbar.ProgressBar(maxval=num_files)
file_index = 0
for entry in bar(os.scandir(directory)):
Expand All @@ -92,12 +97,14 @@ def _int64_feature(value):
"stage": [],
"frame": [],
"stock_winner": [],
"game_winner": []
"game_winner": [],
}
if entry.path.endswith(".slp") and entry.is_file():
console = None
try:
console = melee.Console(is_dolphin=False, path=entry.path, allow_old_version=True)
console = melee.Console(
is_dolphin=False, path=entry.path, allow_old_version=True
)
except Exception as ex:
print("Got error, skipping file", ex)
continue
Expand All @@ -106,7 +113,7 @@ def _int64_feature(value):
except ubjson.decoder.DecoderException as ex:
print("Got error, skipping file", ex)
continue
stocks = (4,4)
stocks = (4, 4)
ports = None
# Pick a game to be part of the evaluation set 20% of the time
is_evaluation = random.random() > 0.8
Expand Down Expand Up @@ -149,13 +156,19 @@ def _int64_feature(value):
frames["player2_stock"].append(player_two.stock)
frames["player2_action"].append(player_two.action.value)
frames["player2_character"].append(player_one.character.value)
frames["stage"].append(AdvantageBarModel.stage_flatten(gamestate.stage.value))
frames["stage"].append(
AdvantageBarModel.stage_flatten(gamestate.stage.value)
)
frames["frame"].append(gamestate.frame)

# Did someone lose a stock? Add the labels on
died = who_died(stocks[0], stocks[1], player_one.stock, player_two.stock)
died = who_died(
stocks[0], stocks[1], player_one.stock, player_two.stock
)
if died > -1:
frames_needed = len(frames["frame"]) - len(frames["stock_winner"])
frames_needed = len(frames["frame"]) - len(
frames["stock_winner"]
)
frames["stock_winner"].extend([died] * frames_needed)
stocks = (player_one.stock, player_two.stock)

Expand All @@ -167,37 +180,66 @@ def _int64_feature(value):

filename = None
if is_evaluation:
filename = "tfrecords/eval" / pathlib.Path(pathlib.Path(entry.path + ".tfrecord").name)
filename = "tfrecords/eval" / pathlib.Path(
pathlib.Path(entry.path + ".tfrecord").name
)
else:
filename = "tfrecords/train" / pathlib.Path(pathlib.Path(entry.path + ".tfrecord").name)
filename = "tfrecords/train" / pathlib.Path(
pathlib.Path(entry.path + ".tfrecord").name
)
if len(frames) > 0 and game_winner > -1:
with tf.io.TFRecordWriter(str(filename)) as file_writer:
# This is all that we actually have full data for
data_cap = len(frames["stock_winner"])

# "Context" features are static for the whole data record. Not in the time series
context_features = tf.train.Features(feature={
"game_winner": tf.train.Feature(float_list=tf.train.FloatList(value=[game_winner]))
})
context_features = tf.train.Features(
feature={
"game_winner": tf.train.Feature(
float_list=tf.train.FloatList(value=[game_winner])
)
}
)

features = {
"player1_character": _int64_feature(frames["player1_character"][:data_cap]),
"player1_character": _int64_feature(
frames["player1_character"][:data_cap]
),
"player1_x": _float_feature(frames["player1_x"][:data_cap]),
"player1_y": _float_feature(frames["player1_y"][:data_cap]),
"player1_percent": _float_feature(frames["player1_percent"][:data_cap]),
"player1_stock": _float_feature(frames["player1_stock"][:data_cap]),
"player1_action": _int64_feature(frames["player1_action"][:data_cap]),
"player1_percent": _float_feature(
frames["player1_percent"][:data_cap]
),
"player1_stock": _float_feature(
frames["player1_stock"][:data_cap]
),
"player1_action": _int64_feature(
frames["player1_action"][:data_cap]
),
"player2_x": _float_feature(frames["player2_x"][:data_cap]),
"player2_y": _float_feature(frames["player2_y"][:data_cap]),
"player2_percent": _float_feature(frames["player2_percent"][:data_cap]),
"player2_stock": _float_feature(frames["player2_stock"][:data_cap]),
"player2_action": _int64_feature(frames["player2_action"][:data_cap]),
"player2_character": _int64_feature(frames["player2_character"][:data_cap]),
"player2_percent": _float_feature(
frames["player2_percent"][:data_cap]
),
"player2_stock": _float_feature(
frames["player2_stock"][:data_cap]
),
"player2_action": _int64_feature(
frames["player2_action"][:data_cap]
),
"player2_character": _int64_feature(
frames["player2_character"][:data_cap]
),
"stage": _int64_feature(frames["stage"][:data_cap]),
"frame": _float_feature(frames["frame"][:data_cap]),
"stock_winner": _float_feature(frames["stock_winner"][:data_cap]),
"stock_winner": _float_feature(
frames["stock_winner"][:data_cap]
),
}
newexample = tf.train.SequenceExample(feature_lists=tf.train.FeatureLists(feature_list=features), context=context_features)
newexample = tf.train.SequenceExample(
feature_lists=tf.train.FeatureLists(feature_list=features),
context=context_features,
)
file_writer.write(newexample.SerializeToString())

if args.train:
Expand All @@ -213,8 +255,7 @@ def _int64_feature(value):
model.load()

# Start a real game
console = melee.Console(path=args.predict,
is_dolphin=False)
console = melee.Console(path=args.predict, is_dolphin=False)
# Run the console
console.run()

Expand Down
11 changes: 11 additions & 0 deletions lstm notes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
If an entire game is used to predict the winner, it will no doubt learn that the person
closest to losing their last stock determines the winner so if a data point is a single
game, it will bias its learning to the end of the game and get 100% accuracy, but in
real time it will not have access to that.

So for each game, it should be broken into several records. Probably 5 seconds at a
time, 2 frames per second, but also right padded so that almost the entire game
history can be taken into account.

500 time steps per record, and using 2 frames per second, the time length for input is
just over 4 minutes. Matches that go longer will truncate from the beginning of the match.
Loading