Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions worker/transcribee_worker/webvtt/export_webvtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,19 @@ def split_atoms(atoms):
return result


def emit_cue(vtt, speaker_prefix, pars, config):
def ensure_trailing_space(par: Paragraph):
if not par.children:
return

last_child = par.children[-1]
if not last_child.text.endswith(" "):
last_child.text += " "


def emit_cue(vtt, speaker_prefix: str | None, pars: list[Paragraph], config):
for par in pars:
ensure_trailing_space(par)

atoms = sum((split_atoms(par.children) for par in pars), start=[])
if len(atoms) == 0:
return
Expand All @@ -157,15 +169,17 @@ def emit_cue(vtt, speaker_prefix, pars, config):
end = max(atom.end for atom in atoms)
if end <= start:
end = start + 0.02
atoms = [
Atom(
text=speaker_prefix,
start=start,
end=start,
conf=1.0,
conf_ts=0.0,
)
] + atoms

if speaker_prefix is not None:
atoms = [
Atom(
text=speaker_prefix,
start=start,
end=start,
conf=1.0,
conf_ts=0.0,
)
] + atoms

payload_length = len("".join(a.text for a in atoms))
if payload_length < (config.max_line_length + config.min_line_length):
Expand Down Expand Up @@ -226,7 +240,7 @@ def generate_web_vtt(
character_limit_single = character_limit_pack + config.min_line_length

last_speaker = None
pars = []
pars: list[Paragraph] = []

def par_len(pars):
return sum(len(p.text()) for p in pars)
Expand All @@ -252,7 +266,6 @@ def par_len(pars):
fits = par_len(pars) + this_par_len < character_limit_pack
if fits:
pars.append(par)
last_speaker = speaker
continue

# here we are done packing with the previous one, flush any remaining...
Expand Down