Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ python3 scripts/generate_and_eval_single_sample.py dataset_src="huggingface" lev
**What you might need to modify**
* **`gpu_arch`** - Depend on your GPU, you might need to adjust the `gpu_arch` argument to reflect your hardware.
* **`precision`** - You can specify the precision of tensor by `precision=fp32`. Currently all of our reported results are `fp32` but we added support for `fp16` & `bf16`.
* **`backend`** - We are also supporting other GPU programming languages beyond `cuda`. Simply specify `backend=triton`. For now we support DSLs: `cuda`, `triton`, `cute`, `tilelang`.
* **`backend`** - We are also supporting GPU programming languages beyond `cuda`, e.g. simply specify `backend=triton` or `backend=hip`. For now we support: `cuda`, `hip`, `triton`, `cute`, `tilelang`.

Check the config fields for comprehensive set of options.

Expand Down
4 changes: 2 additions & 2 deletions scripts/generate_and_eval_single_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,11 @@ def main(config: EvalConfig):
# Use appropriate prompt constructor based on backend
if config.backend == "cuda":
custom_prompt = prompt_generate_custom_cuda_from_prompt_template(ref_arch_src)
elif config.backend in ["triton", "tilelang", "cute"]:
elif config.backend in ["triton", "tilelang", "cute", "hip"]:
custom_prompt = get_prompt_for_backend(ref_arch_src, config.backend)
else:
raise ValueError(
f"Unsupported backend: {config.backend}. Must be 'cuda', 'triton', 'tilelang', or 'cute'."
f"Unsupported backend: {config.backend}. Must be 'cuda', 'triton', 'tilelang', 'cute', or 'hip'."
)

if config.log_prompt:
Expand Down
4 changes: 2 additions & 2 deletions scripts/generate_and_eval_single_sample_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,10 @@ def main(config: EvalConfig):
# Use appropriate prompt constructor based on backend
if config.backend == "cuda":
custom_prompt = prompt_generate_custom_cuda_from_prompt_template(ref_arch_src)
elif config.backend in ["triton", "tilelang", "cute"]:
elif config.backend in ["triton", "tilelang", "cute", "hip"]:
custom_prompt = get_prompt_for_backend(ref_arch_src, config.backend)
else:
raise ValueError(f"Unsupported backend: {config.backend}. Must be 'cuda', 'triton', 'tilelang', or 'cute'.")
raise ValueError(f"Unsupported backend: {config.backend}. Must be 'cuda', 'hip', 'triton', 'tilelang', or 'cute'.")

if config.log_prompt:
with open(os.path.join(config.logdir, f"prompt_level_{config.level}_problem_{config.problem_id}.txt"), "w") as f:
Expand Down
4 changes: 2 additions & 2 deletions scripts/generate_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ def generate_sample_single(
custom_cuda_prompt = prompt_generate_custom_cuda_from_prompt_template(
ref_arch_src
)
elif config.backend in ["triton", "cute", "tilelang"]:
elif config.backend in ["triton", "hip", "cute", "tilelang"]:
custom_cuda_prompt = get_prompt_for_backend(ref_arch_src, config.backend)
else:
raise ValueError(
f"Unsupported backend: {config.backend}. Must be 'cuda', 'triton', 'cute', or 'tilelang'."
f"Unsupported backend: {config.backend}. Must be 'cuda', `hip`, 'triton', 'cute', or 'tilelang'."
)
if config.log_prompt:
prompt_path = os.path.join(
Expand Down
8 changes: 4 additions & 4 deletions src/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def _process_input_tensor(input, device, backend="cuda", precision=torch.float32
Args:
input: Input tensor or non-tensor value
device: Target CUDA device
backend: Backend type (e.g., 'cuda', 'triton', 'cute')
backend: Backend type (e.g., 'cuda', `hip`, 'triton', 'cute')
precision: torch.dtype
Returns:
Processed tensor on correct device with correct dtype, or original value if not a tensor
Expand Down Expand Up @@ -399,7 +399,7 @@ def eval_kernel_against_ref(
device: Union[torch.device, int] = (
torch.cuda.current_device() if torch.cuda.is_available() else None
), # have to run on GPU
backend: str = "cuda", # can be 'cuda', 'triton', 'tilelang', or 'cute'
backend: str = "cuda", # can be 'cuda', 'hip', 'triton', 'tilelang', or 'cute'
precision: torch.dtype = torch.float32,
) -> KernelExecResult:
"""
Expand All @@ -408,7 +408,7 @@ def eval_kernel_against_ref(
num_correct_trials: number of trials to initialize different random inputs; correctness pass only if all trials pass
num_perf_trials: run the evalutation many times to take the average
device: GPU (cuda) device to run the evalutation on
backend: str, one of 'cuda', 'triton', 'tilelang', or 'cute'
backend: str, one of 'cuda', 'hip', 'triton', 'tilelang', or 'cute'
precision: torch.dtype for computation (note: tilelang only supports fp16)
"""
# TODO: check device is busy
Expand Down Expand Up @@ -488,7 +488,7 @@ def eval_kernel_against_ref(
custom_model_src, entry_point="ModelNew"
)
else:
# Default CUDA backend
# Default CUDA/HIP backend
ModelNew = load_custom_model(custom_model_src, context, build_dir)
torch.cuda.synchronize(device=device) # not sure if this is too much
except Exception as e:
Expand Down
6 changes: 0 additions & 6 deletions src/prompt_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,12 +456,6 @@ def prompt_generate_prompt_with_hardware_info(ref_arch_src: str,
return prompt


return Nonoe





def prompt_fix_compile(ref_arch_src, custom_cuda, metadata):
prompt = PROBLEM_STATEMENT
prompt += f"""
Expand Down
225 changes: 224 additions & 1 deletion src/prompt_constructor_multilang.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,227 @@ def prompt_fix_correctness_cute(ref_arch_src, custom_kernel, metadata):
return prompt


################################################################################
# HIP Backend
################################################################################

HIP_PROBLEM_STATEMENT = """You write custom HIP kernels to replace the pytorch operators in the given architecture to get speedups. \n
You have complete freedom to choose the set of operators you want to replace. You may make the decision to replace some operators with custom HIP kernels and leave others unchanged. You may replace multiple operators with custom implementations, consider operator fusion opportunities (combining multiple operators into a single kernel, for example, combining matmul+relu), or algorithmic changes (such as online softmax). You are only limited by your imagination.\n
"""

HIP_PROBLEM_INSTRUCTION = """
Optimize the architecture named Model with custom HIP operators! Name your optimized output architecture ModelNew. Output the new code in codeblocks. Please generate real code, NOT pseudocode, make sure the code compiles and is fully functional. Just output the new model code, no other text, and NO testing code! \n
"""

HIP_PROBLEM_STATEMENT_CLEANED = """You write custom HIP kernels to replace the pytorch operators in the given architecture to get speedups.\n\nYou have complete freedom to choose the set of operators you want to replace. You may make the decision to replace some operators with custom HIP kernels and leave others unchanged. You may replace multiple operators with custom implementations, consider operator fusion opportunities (combining multiple operators into a single kernel, for example, combining matmul+relu), or algorithmic changes (such as online softmax). You are only limited by your imagination.\n
"""

HIP_PROBLEM_INSTRUCTION_CLEANED = """
Optimize the architecture named Model with custom HIP operators! Name your optimized output architecture ModelNew. Output the new code in codeblocks. Please generate real code, NOT pseudocode, make sure the code compiles and is fully functional. Just output the new model code, no other text, and NO testing code! \n
"""

def prompt_generate_custom_hip(
arc_src: str, example_arch_src: str, example_new_arch_src: str
) -> str:
prompt = HIP_PROBLEM_STATEMENT

if example_arch_src != "" and example_new_arch_src != "":
prompt += f"""
Here's an example to show you the syntax of inline embedding custom HIP operators in torch: The example given architecture is: \n
``` \n
{example_arch_src}
``` \n
The example new arch with custom HIP kernels looks like this:
```
{example_new_arch_src}
``` \n
"""

prompt += f"""
You are given the following architecture: \n
```
{arc_src}
```
"""
prompt += HIP_PROBLEM_INSTRUCTION
return prompt


def prompt_generate_custom_hip_from_prompt_template(ref_arch_src: str) -> str:
"""
Using prompt example (an element-wise addition) for prompt templates
The most basic form of example just to show LLM the task and the expected output format
"""
arch = ref_arch_src
# These are strictly defined for now

# path to prompt template, show an example of Model (torch specifications) and ModelNew (torch + custom HIP kernels)
example_arch_path = os.path.join(
REPO_TOP_PATH, f"src/prompts/model_ex_add.py"
)
example_new_arch_path = os.path.join(
REPO_TOP_PATH, f"src/prompts/model_new_ex_add_hip.py"
)

if not os.path.exists(example_arch_path):
raise FileNotFoundError(
f"Example architecture file not found: {example_arch_path}"
)
if not os.path.exists(example_new_arch_path):
raise FileNotFoundError(
f"Example new architecture file not found: {example_new_arch_path}"
)

example_arch = read_file(example_arch_path)
example_new_arch = read_file(example_new_arch_path)

return prompt_generate_custom_hip(arch, example_arch, example_new_arch)


def prompt_generate_prompt_with_hardware_info_from_template_hip(ref_arch_src: str, gpu_name: str) -> str:
"""
Similar to prompt_generate_custom_hip_from_prompt_template,
but with hardware information for the given GPU
"""

arch = ref_arch_src
# These are strictly defined for now

# path to prompt template, show an example of Model (torch specifications) and ModelNew (torch + custom CUDA kernels)
example_arch_path = os.path.join(
REPO_TOP_PATH, f"src/prompts/model_ex_add.py"
)
example_new_arch_path = os.path.join(
REPO_TOP_PATH, f"src/prompts/model_new_ex_add.py"
)

gpu_spec_file_path = os.path.join(REPO_TOP_PATH, f"src/prompts/hardware/gpu_specs.py")

example_arch = read_file(example_arch_path)
example_new_arch = read_file(example_new_arch_path)
gpu_spec_info = read_file(gpu_spec_file_path)

return prompt_generate_prompt_with_hardware_info_hip(
ref_arch_src=arch,
gpu_name=gpu_name,
example_arch_src=example_arch,
example_new_arch_src=example_new_arch,
gpu_spec_info_src=gpu_spec_info
)



def prompt_generate_prompt_with_hardware_info_hip(ref_arch_src: str,
gpu_name: str,
example_arch_src: str,
example_new_arch_src: str,
gpu_spec_info_src: str) -> str:
"""
Generate a prompt with hardware information for the given GPU
gpu_spec_info_src: str of the gpu spec src file
"""

local_dict = {}
exec(gpu_spec_info_src, {}, local_dict)

GPU_SPEC_INFO = local_dict.get('GPU_SPEC_INFO')
GPU_DEFINITIONS = local_dict.get('GPU_DEFINITIONS')
GPU_BEST_PRACTICES = local_dict.get('GPU_BEST_PRACTICES')

if not GPU_SPEC_INFO or not GPU_DEFINITIONS or not GPU_BEST_PRACTICES:
raise ValueError("GPU_SPEC_INFO or GPU_DEFINITIONS or GPU_BEST_PRACTICES not found in gpu_spec_info_src")

assert gpu_name in GPU_SPEC_INFO, f"GPU name {gpu_name} not found in GPU_SPEC_INFO"

prompt = HIP_PROBLEM_STATEMENT

if example_arch_src != "" and example_new_arch_src != "":
prompt += f"""
Here's an example to show you the syntax of inline embedding custom CUDA operators in torch: The example given architecture is: \n
``` \n
{example_arch_src}
``` \n
The example new arch with custom CUDA kernels looks like this:
```
{example_new_arch_src}
``` \n
"""

curr_gpu_spec_info = GPU_SPEC_INFO[gpu_name]

gpu_architecture = curr_gpu_spec_info.get("GPU Architecture")
prompt += f"""
Here is some information about the underlying hardware that you should keep in mind. \n\n
The GPU that will run the kernel is AMD {gpu_name}, {gpu_architecture} architecture.\n\n"""

for key, value in curr_gpu_spec_info.items():
if key == "GPU Architecture":
continue
prompt += f"""- We have {value} of {key}.\n"""

prompt += f"""\n\n
Here are some concepts about the GPU architecture that could be helpful: \n\n"""
for key, value in GPU_DEFINITIONS.items():
prompt += f"""- {key}: {value}\n"""

prompt += f"""\n\n
Here are some best practices for writing HIP kernels on GPU: \n\n"""
for best_practice in GPU_BEST_PRACTICES:
prompt += f"""- {best_practice}\n"""


prompt += f"""
You are given the following architecture: \n
```
{ref_arch_src}
```
"""

prompt += HIP_PROBLEM_INSTRUCTION
return prompt


def prompt_fix_compile_hip(ref_arch_src, custom_hip_kernel, metadata):
prompt = HIP_PROBLEM_STATEMENT
prompt += f"""
With the following architecture:
```
{ref_arch_src}
```
You generated the following solution and it failed to compile:
```
{custom_hip_kernel}
```
Here's the metadata of the compilation error:
```
{metadata}
```

Please fix the compilation error in the new model code. Please output the corrected code in codeblocks.
"""
return prompt


def prompt_fix_correctness_hip(ref_arch_src, custom_hip_kernel, metadata):
prompt = HIP_PROBLEM_STATEMENT
prompt += f"""
With the following architecture:
```
{ref_arch_src}
```
You generated the following solution and it failed correctness:
```
{custom_hip_kernel}
```
Here's the metadata of the correctness error:
```
{metadata}
```
Please consider how your custom Triton kernels are implemented, how it is different from the reference implementation, and fix the correctness error in the new model code. Please output the corrected code in codeblocks.
"""
return prompt


################################################################################
# Unified API
################################################################################
Expand All @@ -502,7 +723,7 @@ def get_prompt_for_backend(ref_arch_src: str, backend: str = "triton") -> str:

Args:
ref_arch_src: Reference architecture source code
backend: One of 'triton', 'tilelang', 'cute'
backend: One of 'triton', 'tilelang', 'cute', 'hip'

Returns:
Prompt string for the specified backend
Expand All @@ -515,6 +736,8 @@ def get_prompt_for_backend(ref_arch_src: str, backend: str = "triton") -> str:
return prompt_generate_custom_tilelang_from_prompt_template(ref_arch_src)
elif backend_lower == "cute":
return prompt_generate_custom_cute_from_prompt_template(ref_arch_src)
elif backend_lower == "hip":
return prompt_generate_custom_hip_from_prompt_template(ref_arch_src)
else:
raise ValueError(
f"Unsupported backend: {backend}. Must be one of: 'triton', 'tilelang', 'cute'"
Expand Down
Loading