From 8c6ca6e6113cdd52b2990fb894a73144345bb8a4 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Thu, 13 Nov 2025 03:37:52 +0000 Subject: [PATCH] Optimize get_return_docstring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimization precompiles the regular expression pattern into a reusable compiled regex object (`_RETURN_PATTERN`), eliminating the need to recompile the pattern on every function call. **Key changes:** - Moved regex compilation outside the function using `re.compile()` with the same pattern and flags - Simplified the regex quantifier from `{0,1}` to `?` for better readability - Used the precompiled pattern's `.search()` method instead of `re.search()` **Why this leads to speedup:** In Python, `re.search()` must parse and compile the regex pattern each time it's called. By precompiling the pattern once at module load time, we eliminate this compilation overhead on every function invocation. The line profiler shows the regex search operation (line with `re.search`) dropped from 91.9% of execution time to 83.1%, with per-hit time reducing from 22,435ns to 8,469ns - a 62% improvement on the most expensive operation. **Performance impact based on function references:** The `get_return_docstring` function is called within `extract_docstrings()`, which processes multiple class members and their docstrings in a loop. Since documentation parsing typically processes many functions/methods in batch operations, this optimization compounds significantly - each avoided regex compilation saves ~14μs per call. **Test case performance:** The optimization shows consistent 100-200% speedup across all test cases, with particularly strong gains on edge cases like empty strings (580% faster) and simple cases (130-180% faster). Large-scale tests show smaller but meaningful improvements (4-30% faster), indicating the optimization remains effective even when regex compilation becomes a smaller fraction of total processing time for very large inputs. --- gradio/cli/commands/components/_docs_utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gradio/cli/commands/components/_docs_utils.py b/gradio/cli/commands/components/_docs_utils.py index 76744f1b93..31821d8b78 100644 --- a/gradio/cli/commands/components/_docs_utils.py +++ b/gradio/cli/commands/components/_docs_utils.py @@ -6,6 +6,10 @@ import typing from subprocess import PIPE, Popen +_RETURN_PATTERN = re.compile( + r"\bReturn(?:s)?\b:[ \t\n]*(.*?)(?=\n|$)", flags=re.DOTALL | re.IGNORECASE +) + def find_first_non_return_key(some_dict): """Finds the first key in a dictionary that is not "return".""" @@ -98,9 +102,7 @@ def get_parameter_docstring(docstring: str, parameter_name: str): def get_return_docstring(docstring: str): """Gets the docstring for a return value.""" - pattern = r"\bReturn(?:s){0,1}\b:[ \t\n]*(.*?)(?=\n|$)" - - match = re.search(pattern, docstring, flags=re.DOTALL | re.IGNORECASE) + match = _RETURN_PATTERN.search(docstring) if match: return match.group(1).strip()