diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index a070e85ac..8359396b5 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import importlib import logging from pathlib import Path from typing import TYPE_CHECKING, Optional, Union @@ -18,7 +19,13 @@ logger = logging.getLogger(__name__) -def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTrainedModel": +def load_model_from_task( + task: str, + model_name_or_path: str, + custom_task_class_name: str = None, + custom_task_class_module: str = None, + **kwargs, +) -> "PreTrainedModel": """Load huggingface model from task and model_name_or_path.""" from transformers.pipelines import check_task @@ -55,7 +62,12 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr AUTO_QUANTIZATION_CONFIG_MAPPING["olive"] = OliveHfQuantizationConfig AUTO_QUANTIZER_MAPPING["olive"] = OliveHfQuantizer - class_tuple = targeted_task["pt"] or (AutoModel,) + if custom_task_class_module is not None and custom_task_class_name is not None: + module = importlib.import_module(custom_task_class_module) + class_tuple = (getattr(module, custom_task_class_name),) + else: + class_tuple = targeted_task["pt"] or (AutoModel,) + model = None for i, model_class in enumerate(class_tuple): try: @@ -119,6 +131,10 @@ def save_model_config(config: Union["PretrainedConfig", "GenerationConfig"], out config.save_pretrained(output_dir, **kwargs) +def get_model_attributes_config(config: "PretrainedConfig", model_type: str): + return config.text_config if model_type == "gemma3" else config + + def save_module_files( config: "PretrainedConfig", model_name_or_path: str, output_dir: str, **kwargs ) -> tuple["PretrainedConfig", list[str]]: diff --git a/olive/common/hf/wrapper.py b/olive/common/hf/wrapper.py index 9236b20b3..9f3939dfd 100644 --- a/olive/common/hf/wrapper.py +++ b/olive/common/hf/wrapper.py @@ -9,6 +9,7 @@ from torch import nn from transformers import PretrainedConfig +from olive.common.hf.utils import get_model_attributes_config from olive.common.utils import find_first_matched_value, get_attr, replace_submodules, set_attr if TYPE_CHECKING: @@ -196,6 +197,7 @@ class ModelWrapper: "default": ["model.embed_tokens"], "bloom": ["transformer.word_embeddings", "transformer.word_embeddings_layernorm"], "falcon": ["transformer.word_embeddings"], + "gemma3": ["model.language_model.embed_tokens"], "gpt2": ["transformer.wte", "transformer.wpe"], "gpt_neox": ["gpt_neox.embed_in"], "gptj": ["transformer.wte"], @@ -210,11 +212,17 @@ class ModelWrapper: "qwen": "transformer.rotary_emb", } LM_HEAD = {"default": "lm_head"} - PRE_HEAD_LAYERNORM = {"default": "model.norm", "gpt2": "transformer.ln_f", "qwen": "transformer.ln_f"} + PRE_HEAD_LAYERNORM = { + "default": "model.norm", + "gemma3": "model.language_model.norm", + "gpt2": "transformer.ln_f", + "qwen": "transformer.ln_f", + } LAYERS = { "default": "model.layers", "bloom": "transformer.h", "falcon": "transformer.h", + "gemma3": "model.language_model.layers", "gpt2": "transformer.h", "gpt_neox": "gpt_neox.layers", "gptj": "transformer.h", @@ -226,17 +234,22 @@ def __init__(self, config: Union[PretrainedConfig, dict]): self.config = config if isinstance(config, PretrainedConfig) else PretrainedConfig.from_dict(config) self.model_type = find_first_matched_value(self.config, "model_type") + logger.error(self.config) + # model attributes - self.hidden_size = find_first_matched_value(self.config, self.HIDDEN_SIZE_NAMES) - self.num_attention_heads = find_first_matched_value(self.config, self.NUM_ATTENTION_HEADS_NAMES) + model_attributes_config = get_model_attributes_config(self.config, self.model_type) + self.hidden_size = find_first_matched_value(model_attributes_config, self.HIDDEN_SIZE_NAMES) + self.num_attention_heads = find_first_matched_value(model_attributes_config, self.NUM_ATTENTION_HEADS_NAMES) self.num_key_value_heads = ( - find_first_matched_value(self.config, self.NUM_KEY_VALUE_HEADS_NAMES) or self.num_attention_heads + find_first_matched_value(model_attributes_config, self.NUM_KEY_VALUE_HEADS_NAMES) + or self.num_attention_heads ) self.head_dim = ( - find_first_matched_value(self.config, self.HEAD_DIM_NAMES) or self.hidden_size // self.num_attention_heads + find_first_matched_value(model_attributes_config, self.HEAD_DIM_NAMES) + or self.hidden_size // self.num_attention_heads ) - self.num_hidden_layers = find_first_matched_value(self.config, self.NUM_HIDDEN_LAYER_NAMES) - self.max_length = find_first_matched_value(self.config, self.MAX_LENGTH) + self.num_hidden_layers = find_first_matched_value(model_attributes_config, self.NUM_HIDDEN_LAYER_NAMES) + self.max_length = find_first_matched_value(model_attributes_config, self.MAX_LENGTH) self._model = None self._layer_wrappers = None @@ -267,6 +280,7 @@ def get_pre_head_layernorm(self, return_name: bool = True): return get_submodules(self.model, self.PRE_HEAD_LAYERNORM, self.model_type, return_name=return_name) def get_layers(self, return_name: bool = True): + logger.error(self.model) return get_submodules(self.model, self.LAYERS, self.model_type, return_name=return_name) def get_layer_wrappers(self): diff --git a/olive/model/handler/hf.py b/olive/model/handler/hf.py index a56a1aab6..4e4bb917d 100644 --- a/olive/model/handler/hf.py +++ b/olive/model/handler/hf.py @@ -28,7 +28,7 @@ @model_handler_registry("HFModel") class HfModelHandler(PyTorchModelHandlerBase, MLFlowTransformersMixin, HfMixin): # pylint: disable=too-many-ancestors resource_keys: tuple[str, ...] = ("model_path", "adapter_path") - json_config_keys: tuple[str, ...] = ("task", "load_kwargs") + json_config_keys: tuple[str, ...] = ("task", "load_kwargs", "custom_task_class_name", "custom_task_class_module") def __init__( self, @@ -38,6 +38,8 @@ def __init__( io_config: Union[dict[str, Any], IoConfig, str] = None, adapter_path: OLIVE_RESOURCE_ANNOTATIONS = None, model_attributes: Optional[dict[str, Any]] = None, + custom_task_class_name: str = None, + custom_task_class_module: str = None, ): super().__init__( model_file_format=None, @@ -47,6 +49,8 @@ def __init__( ) self.add_resources(locals()) self.task = task + self.custom_task_class_name = custom_task_class_name + self.custom_task_class_module = custom_task_class_module self.load_kwargs = validate_config(load_kwargs, HfLoadKwargs, warn_unused_keys=False) if load_kwargs else None self.model_attributes = {**self.get_hf_model_config().to_dict(), **(self.model_attributes or {})} @@ -72,7 +76,13 @@ def load_model(self, rank: int = None, cache_model: bool = True) -> "torch.nn.Mo if self.model: model = self.model else: - model = load_model_from_task(self.task, self.model_path, **self.get_load_kwargs()) + model = load_model_from_task( + self.task, + self.model_path, + self.custom_task_class_name, + self.custom_task_class_module, + **self.get_load_kwargs(), + ) # we only have peft adapters for now if self.adapter_path: @@ -81,7 +91,6 @@ def load_model(self, rank: int = None, cache_model: bool = True) -> "torch.nn.Mo model = PeftModel.from_pretrained(model, self.adapter_path) self.model = model if cache_model else None - return model @property diff --git a/olive/passes/pytorch/gptqmodel.py b/olive/passes/pytorch/gptqmodel.py index cb54385f3..eeedf8f62 100644 --- a/olive/passes/pytorch/gptqmodel.py +++ b/olive/passes/pytorch/gptqmodel.py @@ -189,8 +189,18 @@ def get_dataset( raise ValueError("Data config is required for PyTorch model.") data_config = validate_config(data_config, DataConfig) dataloader = data_config.to_data_container().create_dataloader() - # each batch consists of (input_data, labels) - dataset = [data[0] for data in dataloader] + # each batch consists of (input_data, labels) or just input_data + dataset = [] + for data in dataloader: + if isinstance(data, (tuple, list)) and len(data) > 0: + # Standard format: (input_data, labels) + dataset.append(data[0]) + elif isinstance(data, dict): + # Data is already in the expected dictionary format + dataset.append(data) + else: + # Data is the input data directly + dataset.append(data) if ( not dataset or not isinstance(dataset, list) diff --git a/olive/passes/pytorch/rotate.py b/olive/passes/pytorch/rotate.py index 470eb619a..d82fe947d 100644 --- a/olive/passes/pytorch/rotate.py +++ b/olive/passes/pytorch/rotate.py @@ -44,6 +44,11 @@ class RotateMode(StrEnumBase): @classmethod def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]: return { + "device": PassConfigParam( + type_=str, + default_value="cpu", + description="Whether to run rotation on cpu or gpu. Accepted values are 'cpu' and 'cuda'.", + ), "seed": PassConfigParam( type_=int, default_value=0, @@ -60,6 +65,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon def rotate_model( self, model: HfModelHandler, + device: str, rotate_mode: str, seed: int, training_args: Optional[BaseHFTrainingArguments] = None, @@ -157,10 +163,13 @@ def rotate_model( count_trainable_parameters(model_wrapper.model), ) + if device == "cuda" and not torch.cuda.is_available(): + raise ValueError("Please install CUDA to rotate with it.") + return ( model_wrapper, rotation_params, - [((RotateEmbed, RotateLinear), lambda x: x.create_merged("cuda" if torch.cuda.is_available() else "cpu"))], + [((RotateEmbed, RotateLinear), lambda x: x.create_merged(device))], ) @classmethod @@ -246,7 +255,7 @@ class QuaRot(RotateBase): def _run_for_config( self, model: HfModelHandler, config: type[BasePassConfig], output_model_path: str ) -> HfModelHandler: - model_wrapper, _, save_replacements = self.rotate_model(model, config.rotate_mode, config.seed) + model_wrapper, _, save_replacements = self.rotate_model(model, config.device, config.rotate_mode, config.seed) # save the model model_wrapper.save_model(output_model_path, replacements=save_replacements)