From 9c29e9fd7097e16db6f02edaf64e12b34f1503ae Mon Sep 17 00:00:00 2001 From: Egor Vorontsov Date: Sat, 24 Jan 2026 08:45:47 +0300 Subject: [PATCH] Improved `Chain.build()` performance by 1/3. --- markovify/chain.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/markovify/chain.py b/markovify/chain.py index b19e8e2..828d05f 100644 --- a/markovify/chain.py +++ b/markovify/chain.py @@ -75,22 +75,18 @@ def build(self, corpus, state_size): appears. """ - # Using a DefaultDict here would be a lot more convenient, however the memory - # usage is far higher. model = {} for run in corpus: items = ([BEGIN] * state_size) + run + [END] for i in range(len(run) + 1): - state = tuple(items[i : i + state_size]) + state = model.setdefault(tuple(items[i : i + state_size]), {}) follow = items[i + state_size] - if state not in model: - model[state] = {} + try: + state[follow] += 1 + except KeyError: + state[follow] = 0 - if follow not in model[state]: - model[state][follow] = 0 - - model[state][follow] += 1 return model def precompute_begin_state(self):