From b3dd4542a3ec11237573209c7b18ecdeb91933f8 Mon Sep 17 00:00:00 2001
From: Mitch Lewis <mitch.lewis930@gmail.com>
Date: Tue, 13 Jan 2026 18:36:04 -0700
Subject: [PATCH] Revert "[Bugfix] Fix incorrect dispatch for
 CutlassBlockScaledGroupedGemm and DeepGEMM (#20933)"

This reverts commit bcdfb2a3308e14fbf46da6d6d41747f289af9300.
---
 vllm/model_executor/layers/quantization/fp8.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
index 824dfe15ae25..59db3e6c4449 100644
--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -488,16 +488,11 @@ def __init__(self, quant_config: Fp8Config):
                 logger.warning_once("Failed to import DeepGemm kernels.")
             elif not self.block_quant:
                 logger.warning_once("Model is not block quantized. Not using "
-                                    "DeepGemm kernels")
+                                    " DeepGemm kernels")
             elif (current_platform.is_cuda()
-                  and current_platform.is_device_capability(90)):
+                  and current_platform.has_device_capability(90)):
                 logger.info_once("Using DeepGemm kernels for Fp8MoEMethod.")
                 self.allow_deep_gemm = True
-            elif (current_platform.is_cuda()
-                  and is_blackwell_deep_gemm_used()):
-                logger.info_once("Using DeepGemm SM100 kernels for "
-                                 "Fp8MoEMethod.")
-                self.allow_deep_gemm = True
             else:
                 logger.warning_once(
                     "DeepGemm not supported on the current platform.")
@@ -505,10 +500,10 @@ def __init__(self, quant_config: Fp8Config):
         # Check for CutlassBlockScaledGroupedGemm support.
         self.allow_cutlass_block_scaled_grouped_gemm = False
         if not self.block_quant:
-            logger.debug_once("Model is not block quantized. Not using "
-                              "CutlassBlockScaledGroupedGemm kernels")
+            logger.warning_once("Model is not block quantized. Not using "
+                                "CutlassBlockScaledGroupedGemm kernels")
         elif (current_platform.is_cuda()
-              and current_platform.is_device_capability(100)):
+              and current_platform.has_device_capability(100)):
             logger.info_once(
                 "Using CutlassBlockScaledGroupedGemm kernels for Fp8MoEMethod."
             )