diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py index cb5e57184ae..eb8b5b1f031 100644 --- a/neural_compressor/torch/algorithms/weight_only/autoround.py +++ b/neural_compressor/torch/algorithms/weight_only/autoround.py @@ -154,7 +154,6 @@ def __init__( self.act_dynamic = act_dynamic self.low_cpu_mem_usage = low_cpu_mem_usage self.export_format = export_format - def prepare(self, model: torch.nn.Module, *args, **kwargs): """Prepares a given model for quantization. @@ -213,9 +212,9 @@ def convert(self, model: torch.nn.Module, *args, **kwargs): ) model, weight_config = rounder.quantize() model.autoround_config = weight_config - if "itrex" in self.export_format: + if 'itrex' in self.export_format: model = pack_model(model, weight_config, device=self.device, inplace=True) - else: + else: # pylint: disable=E0401 model = rounder.save_quantized(output_dir=None, format=self.export_format, device=self.device, inplace=True) return model @@ -244,3 +243,5 @@ def get_dataloader(tokenizer, seqlen, dataset_name="NeelNanda/pile-10k", seed=42 tokenizer, seqlen, dataset_name="NeelNanda/pile-10k", seed=seed, bs=bs, nsamples=nsamples ) return dataloader + + diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py index 2405cb1277d..8232d88ba58 100644 --- a/neural_compressor/torch/algorithms/weight_only/save_load.py +++ b/neural_compressor/torch/algorithms/weight_only/save_load.py @@ -53,10 +53,10 @@ def save(model, output_dir="./saved_results", format=LoadFormat.DEFAULT, **kwarg - max_shard_size (str, optional): The maximum size for each shard (only applicable for 'huggingface' format). Defaults to "5GB". """ os.makedirs(output_dir, exist_ok=True) - if format == LoadFormat.HUGGINGFACE: + if format == LoadFormat.HUGGINGFACE: # pylint: disable=E0401 config = model.config quantization_config = config.quantization_config if hasattr(config, "quantization_config") else None - if "backend" in quantization_config and "auto_round" in quantization_config["backend"]: + if "backend" in quantization_config and 'auto_round' in quantization_config['backend']: safe_serialization = kwargs.get("safe_serialization", True) tokenizer = kwargs.get("tokenizer", None) max_shard_size = kwargs.get("max_shard_size", "5GB") @@ -65,7 +65,7 @@ def save(model, output_dir="./saved_results", format=LoadFormat.DEFAULT, **kwarg del model.save model.save_pretrained(output_dir, max_shard_size=max_shard_size, safe_serialization=safe_serialization) return - + qmodel_weight_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), WEIGHT_NAME) qconfig_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), QCONFIG_NAME) # saving process @@ -140,7 +140,7 @@ def load_woq_model(self): """ if self.format == LoadFormat.HUGGINGFACE: assert self.model_name_or_path is not None, "'model_name_or_path' can't be None." - + model = self.load_hf_format_woq_model() logger.info("Loading HuggingFace weight-only quantization model successfully.") elif self.format == LoadFormat.DEFAULT: @@ -213,7 +213,7 @@ def load_hf_format_woq_model(self): """ # check required package from neural_compressor.torch.utils import is_package_available - + if not is_package_available("transformers"): raise ImportError("Loading huggingface model requires transformers: `pip install transformers`") if not is_package_available("accelerate"): @@ -221,14 +221,12 @@ def load_hf_format_woq_model(self): # get model class and config model_class, config = self._get_model_class_and_config() - self.quantization_config = config.quantization_config if hasattr(config, "quantization_config") else None - if "backend" in self.quantization_config and "auto_round" in self.quantization_config["backend"]: + quantization_config = config.quantization_config if hasattr(config, "quantization_config") else None + if "backend" in quantization_config and 'auto_round' in quantization_config['backend']: # pylint: disable=E0401 # load autoround format quantized model from auto_round import AutoRoundConfig - model = model_class.from_pretrained(self.model_name_or_path) return model - # get loaded state_dict self.loaded_state_dict = self._get_loaded_state_dict(config) self.loaded_state_dict_keys = list(set(self.loaded_state_dict.keys())) @@ -890,3 +888,4 @@ def _use_hpu_module(self): # pragma: no cover if os.path.exists(os.path.join(self._model_local_dir, HPU_WEIGHT_NAME)): return True return False +