From 353d7d12c50a4c6126217cbc4099486aece7c174 Mon Sep 17 00:00:00 2001 From: AdityaKulshrestha Date: Sun, 21 Dec 2025 10:56:30 +0000 Subject: [PATCH 1/2] Added typo fix --- intel_extension_for_pytorch/llm/modules/mha_fusion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/intel_extension_for_pytorch/llm/modules/mha_fusion.py b/intel_extension_for_pytorch/llm/modules/mha_fusion.py index ee08a4bd7..cbde1b923 100644 --- a/intel_extension_for_pytorch/llm/modules/mha_fusion.py +++ b/intel_extension_for_pytorch/llm/modules/mha_fusion.py @@ -553,12 +553,12 @@ class PagedAttention: alibi_slopes (torch.Tensor, optinal): which is the alibi slope with the shape of (num_heads). softcap (float): the positive softcap value to apply on the attention weights, default is -1. - [class method]: flash_atten_varlen + [class method]: flash_attn_varlen_func .. highlight:: python .. code-block:: python - ipex.llm.modules.PagedAttention.flash_atten_varlen( + ipex.llm.modules.PagedAttention.flash_attn_varlen_func( out, query, key_cache, @@ -573,8 +573,8 @@ class PagedAttention: alibi_slopes, window_size_left, window_size_right, - k_scale, - v_scale + k_scale=k_scale, + v_scale=v_scale ) Args: From 68bc3b806b63ce37a6c4e1dd7977600bf3ca6129 Mon Sep 17 00:00:00 2001 From: AdityaKulshrestha Date: Sun, 21 Dec 2025 11:16:08 +0000 Subject: [PATCH 2/2] linting fixes --- examples/cpu/features/int8_recipe_tuning/int8_autotune.py | 2 +- .../models/bert_large/training/cpu/run_pretrain_mlperf.py | 4 ++-- .../inference/distributed/run_generation_with_deepspeed.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/cpu/features/int8_recipe_tuning/int8_autotune.py b/examples/cpu/features/int8_recipe_tuning/int8_autotune.py index b70155ca4..05b0a3d10 100644 --- a/examples/cpu/features/int8_recipe_tuning/int8_autotune.py +++ b/examples/cpu/features/int8_recipe_tuning/int8_autotune.py @@ -84,7 +84,7 @@ def train(dataloader, model, loss_fn, optimizer): epochs = 5 for t in range(epochs): - print(f"Epoch {t+1}\n-------------------------------") + print(f"Epoch {t + 1}\n-------------------------------") train(train_dataloader, model, loss_fn, optimizer) print("Done!") diff --git a/examples/cpu/inference/python/models/bert_large/training/cpu/run_pretrain_mlperf.py b/examples/cpu/inference/python/models/bert_large/training/cpu/run_pretrain_mlperf.py index a45397f82..26df9d98b 100644 --- a/examples/cpu/inference/python/models/bert_large/training/cpu/run_pretrain_mlperf.py +++ b/examples/cpu/inference/python/models/bert_large/training/cpu/run_pretrain_mlperf.py @@ -1158,8 +1158,8 @@ def main(): print( f"Step {training_steps:5d}: loss: {gloss:6.3f} lm_acc: {lm_acc:.3f} \ seq_acc: {seq_acc:.3f} lbs: {args.train_batch_size} gbs: {total_batch_size} \ - DT: {(t1-t0)*1000.0:.1f} XT: {(t2-t1)*1000.0:.1f} FT: {(t3-t2)*1000.0:.1f} \ - BT: {(t4-t3)*1000.0:.1f} OT: {(t5-t4)*1000.0:.1f} TT: {(t5-t0)*1000.0:.1f}" + DT: {(t1 - t0) * 1000.0:.1f} XT: {(t2 - t1) * 1000.0:.1f} FT: {(t3 - t2) * 1000.0:.1f} \ + BT: {(t4 - t3) * 1000.0:.1f} OT: {(t5 - t4) * 1000.0:.1f} TT: {(t5 - t0) * 1000.0:.1f}" ) update_step = training_steps % args.gradient_accumulation_steps == 0 diff --git a/examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py b/examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py index 30e04ff84..b015a44a1 100644 --- a/examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py +++ b/examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py @@ -1064,7 +1064,7 @@ def trace_handler(prof): generated, _ = generate() t_generate_span = time.time() - t_generate_start for i, o, _ in generated: - print_rank0(f"{'-'*60}\nin={i}\nout={o}\n") + print_rank0(f"{'-' * 60}\nin={i}\nout={o}\n") # benchmark it! else: