Skip to content

Commit

Permalink
refactor folder of torch 3x API and refine RTN (#1560)
Browse files Browse the repository at this point in the history
Signed-off-by: xin3he <xin3.he@intel.com>
Signed-off-by: chensuyue <suyue.chen@intel.com>
  • Loading branch information
xin3he authored Jan 30, 2024
1 parent 02233fb commit 7bf89eb
Show file tree
Hide file tree
Showing 34 changed files with 702 additions and 770 deletions.
2 changes: 1 addition & 1 deletion .azure-pipelines/scripts/ut/3x/collect_log_3x.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
source /neural-compressor/.azure-pipelines/scripts/change_color.sh

set -xe
set -e
pip install coverage
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.${1}
coverage_log="/neural-compressor/log_dir/coverage_log"
Expand Down
3 changes: 2 additions & 1 deletion .azure-pipelines/scripts/ut/3x/run_3x_ort.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ cp .coverage ${LOG_DIR}/.coverage

echo "------UT end -------"

if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] \
|| [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "ImportError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
echo "Find errors in UT test, please check the output..."
exit 1
fi
Expand Down
52 changes: 41 additions & 11 deletions .azure-pipelines/scripts/ut/3x/run_3x_pt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,55 @@ pip list

export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_pt
inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
cd /neural-compressor/test || exit 1
find ./3x/torch/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
find ./3x/common/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
cd /neural-compressor/test/3x || exit 1
grep -lrv "import pytest" --include="test*.py" ./torch | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'> run_unittest.sh
grep -lrv "import pytest" --include="test*.py" ./common | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'>> run_unittest.sh
grep -lr "import pytest" --include="test*.py" ./torch | sed 's,\.\/,coverage run --source='"${inc_path}"' --append -m pytest --disable-warnings -v ,g' > run_pytest.sh
grep -lr "import pytest" --include="test*.py" ./common | sed 's,\.\/,coverage run --source='"${inc_path}"' --append -m pytest --disable-warnings -v ,g'>> run_pytest.sh

LOG_DIR=/neural-compressor/log_dir
mkdir -p ${LOG_DIR}
ut_log_name=${LOG_DIR}/ut_3x_pt.log

echo "cat run.sh..."
sort run.sh -o run.sh
cat run.sh | tee ${ut_log_name}
echo "------UT start-------"
bash -x run.sh 2>&1 | tee -a ${ut_log_name}
# unittest and pytest has some incompatible issue, so separate the test.
echo "cat run_unittest.sh..."
sort run_unittest.sh -o run_unittest.sh
cat run_unittest.sh | tee ${ut_log_name}
echo "------unittest start-------"
bash -x run_unittest.sh 2>&1 | tee -a ${ut_log_name}
echo "------unittest end -------"

if [ -s run_pytest.sh ]; then
echo "cat run_pytest.sh..."
sort run_pytest.sh -o run_pytest.sh
cat run_pytest.sh | tee -a ${ut_log_name}
echo "------pytest start-------"
bash -x run_pytest.sh 2>&1 | tee -a ${ut_log_name}
echo "------pytest end -------"
fi

cp .coverage ${LOG_DIR}/.coverage

echo "------UT end -------"
ut_status="passed"
# check unittest issue
if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] \
|| [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "ImportError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
echo "Find errors in unittest case, please check the output..."
echo "Please search for 'FAILED' or 'core dumped' or 'ModuleNotFoundError:' or 'ImportError:'"
ut_status="failed"
fi

if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
echo "Find errors in UT test, please check the output..."
# check pytest issue
if [ -s run_pytest.sh ]; then
if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c 'passed,' ${ut_log_name}) == 0 ]; then
echo "Find errors in pytest case, please check the output..."
echo "Please search for '== FAILURES ==' or '== ERRORS =='"
ut_status="failed"
fi
fi

if [ "$ut_status" = "failed" ]; then
exit 1
fi

echo "UT finished successfully! "
3 changes: 2 additions & 1 deletion .azure-pipelines/scripts/ut/3x/run_3x_tf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ cp .coverage ${LOG_DIR}/.coverage

echo "------UT end -------"

if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] \
|| [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "ImportError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
echo "Find errors in UT test, please check the output..."
exit 1
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
from flash_attn import flash_attn_func, flash_attn_varlen_func
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
### INC code ###
from neural_compressor.torch.quantization.layers import Matmul, BatchMatmul, Autocast
from neural_compressor.torch.quantization.modules import Matmul, BatchMatmul, Autocast

# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
# It means that the function will not be traced through and simply appear as a node in the graph.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def itrex_bootstrap_stderr(f, xs, iters):
if args.approach in ["dynamic", "static"]:
print("device:", next(user_model.parameters()).device)
from neural_compressor.torch.quantization.config import FP8QConfig, get_default_fp8_qconfig
from neural_compressor.torch.quantization.fp8 import quantize_dynamic
from neural_compressor.torch.quantization import quantize, quantize_dynamic
from neural_compressor.torch.algorithms.habana_fp8 import quantize_dynamic
from neural_compressor.torch.quantization import quantize
if args.precision == "fp8_e4m3":
dtype = torch.float8_e4m3fn
else:
Expand Down
18 changes: 0 additions & 18 deletions neural_compressor/torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from neural_compressor.torch.utils.utility import register_algo
from neural_compressor.torch.algorithms import rtn_quantize_entry, gptq_quantize_entry

from neural_compressor.torch.quantization import (
quantize,
RTNConfig,
get_default_rtn_config,
GPTQConfig,
get_default_gptq_config,
StaticQuantConfig,
get_default_static_config,
SmoothQuantConfig,
get_default_sq_config,
)

from neural_compressor.common.base_tuning import TuningConfig
from neural_compressor.torch.quantization.autotune import autotune, get_all_config_set
4 changes: 0 additions & 4 deletions neural_compressor/torch/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from neural_compressor.torch.algorithms.weight_only_algos import rtn_quantize_entry
from neural_compressor.torch.algorithms.weight_only_algos import gptq_quantize_entry
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from .quantization_impl import quantize_dynamic, quantize
from .fp8_quant import quantize_dynamic, quantize, white_list
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from deepspeed.module_inject.layers import LmHeadLinearAllreduce

from neural_compressor.common.utils import FP8_QUANT
from neural_compressor.torch.quantization.modules import Autocast, BatchMatmul, Matmul
from neural_compressor.torch.utils.utility import fetch_module, logger, register_algo, set_module

from ..layers import Autocast, BatchMatmul, Matmul
from .modules import (
FP8BatchMatmul,
FP8Cast,
Expand Down Expand Up @@ -198,7 +198,6 @@ def convert(model, qconfig_mapping):
return model


@register_algo(name=FP8_QUANT)
def quantize(model, qconfig_mapping, run_fn=None, run_args=None, inplace=True):
q_model = model if inplace else copy.deepcopy(model)
q_model = prepare(q_model, qconfig_mapping)
Expand All @@ -209,7 +208,3 @@ def quantize(model, qconfig_mapping, run_fn=None, run_args=None, inplace=True):
run_fn(q_model)
q_model = convert(q_model, qconfig_mapping)
return q_model


# def autotune(fp32_model, quant_config, tune_config, eval_func, ...):
# pass
53 changes: 11 additions & 42 deletions neural_compressor/torch/algorithms/weight_only/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,13 +757,14 @@ def find_params(self, x, weight=False):
if self.wdtype != "int":
from .utility import quant_tensor

tmp = x.clone() # make sure x is not replaced
tmp = x.clone() # tmp will be replaced after quant_tensor

_, scale, zero = quant_tensor(
tmp,
self.wbits,
self.group_size,
scheme=self.scheme,
dtype=self.wdtype,
bits=self.wbits,
group_size=self.group_size,
scheme=self.scheme,
quantile=1.0,
return_int=True,
full_range=False,
Expand Down Expand Up @@ -854,10 +855,10 @@ def find_params(self, x, weight=False):
self.scale = self.scale.reshape(1, -1)
quant_tensor(
self.scale,
self.double_quant_bits,
self.double_quant_group_size,
scheme=self.double_quant_scheme,
dtype=self.double_quant_dtype,
bits=self.double_quant_bits,
group_size=self.double_quant_group_size,
scheme=self.double_quant_scheme,
quantile=1.0,
return_int=False,
full_range=False,
Expand All @@ -879,8 +880,7 @@ def quantize(self, x, scale, zero, maxq):
if self.wdtype != "int":
from .utility import quantize_4bit

tmp = x.clone()

tmp = x.clone() # tmp will be replaced after quant_tensor
return quantize_4bit(tmp, dtype=self.wdtype, scale=scale)
else:
if maxq < 0:
Expand All @@ -893,12 +893,7 @@ def ready(self):


# TODO (Yi) remove it after unifying the algo config parser
from typing import Callable, Dict, Tuple

from neural_compressor.torch.quantization.config import GPTQConfig


def gptq_config_mapping(configs_mapping: Dict[Tuple[str, Callable], GPTQConfig]):
def gptq_config_mapping(configs_mapping):
# convert GPTQ_CONFIG to gptq_quantize's weight config
# convert tune_cfg to gptq_quantize's weight config
# for layer_wise quant mode
Expand Down Expand Up @@ -950,33 +945,7 @@ def gptq_config_mapping(configs_mapping: Dict[Tuple[str, Callable], GPTQConfig])
return weight_config, nsamples, use_max_length, pad_max_length, device, dataloader_len


def gptq_quantize(
model,
weight_config={},
dataloader=None,
nsamples=128,
use_max_length=True,
pad_max_length=2048,
device=None,
layer_wise=False,
model_path=None,
):
"""Run weight-only quantization with."""
# TODO: unify weight_config keys, add docstring, and support default config
assert isinstance(model, torch.nn.Module), "only support torch module"
if layer_wise:
assert model_path is not None, "model_path should not be None when use layer_wise mode"
from .gptq import GPTQuantizer

gptq_quantizer = GPTQuantizer(
model, weight_config, dataloader, nsamples, use_max_length, pad_max_length, device, layer_wise=layer_wise
)
fp32_modified_model, gptq_config = gptq_quantizer.execute_quantization(model_path=model_path)
logger.info("GPTQ quantizing done.")
return fp32_modified_model, gptq_config


def apply_gptq_quantize(model, configs_mapping, *args, **kwargs):
def gptq_quantize(model, configs_mapping, *args, **kwargs):
"""Apply gptq."""
# TODO: unify weight_config keys, add docstring, and support default config
weight_config, nsamples, use_max_length, pad_max_length, device, dataloader_len = gptq_config_mapping(
Expand Down
Loading

0 comments on commit 7bf89eb

Please sign in to comment.