diff --git a/.azure-pipelines/code-scan-neural-insights.yaml b/.azure-pipelines/code-scan-neural-insights.yaml
deleted file mode 100644
index eaf741c7ec1..00000000000
--- a/.azure-pipelines/code-scan-neural-insights.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-trigger: none
-
-pr:
- autoCancel: true
- drafts: false
- branches:
- include:
- - master
- paths:
- include:
- - neural_insights
- - setup.py
- - .azure-pipelines/code-scan-neural-insights.yml
-
-pool:
- vmImage: "ubuntu-latest"
-
-variables:
- CODE_SCAN_LOG_PATH: ".azure-pipelines/scripts/codeScan/scanLog"
-
-stages:
- - stage: DocStyleCodeScan
- displayName: DocStyle Code Scan
- dependsOn: []
- jobs:
- - job: DocStyle
- displayName: DocStyle
- steps:
- - template: template/code-scan-template.yml
- parameters:
- codeScanFileName: "pydocstyle"
- uploadPath: "pydocstyle.log"
- scanModule: "neural_insights"
-
- - stage: BanditCodeScan
- displayName: Bandit Code Scan
- dependsOn: []
- jobs:
- - job: Bandit
- displayName: Bandit
- steps:
- - template: template/code-scan-template.yml
- parameters:
- codeScanFileName: "bandit"
- uploadPath: "bandit.log"
- scanModule: "neural_insights"
-
- - stage: PylintCodeScan
- displayName: Pylint Code Scan
- dependsOn: []
- jobs:
- - job: Pylint
- displayName: Pylint
- steps:
- - template: template/code-scan-template.yml
- parameters:
- codeScanFileName: "pylint"
- uploadPath: "pylint.json"
- scanModule: "neural_insights"
diff --git a/.azure-pipelines/code-scan-neural-solution.yaml b/.azure-pipelines/code-scan-neural-solution.yaml
deleted file mode 100644
index 301c7010465..00000000000
--- a/.azure-pipelines/code-scan-neural-solution.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-trigger: none
-
-pr:
- autoCancel: true
- drafts: false
- branches:
- include:
- - master
- paths:
- include:
- - neural_solution
- - setup.py
- - .azure-pipelines/code-scan-neural-solution.yml
-
-pool:
- vmImage: "ubuntu-latest"
-
-variables:
- CODE_SCAN_LOG_PATH: ".azure-pipelines/scripts/codeScan/scanLog"
-
-stages:
- - stage: DocStyleCodeScan
- displayName: DocStyle Code Scan
- dependsOn: []
- jobs:
- - job: DocStyle
- displayName: DocStyle
- steps:
- - template: template/code-scan-template.yml
- parameters:
- codeScanFileName: "pydocstyle"
- uploadPath: "pydocstyle.log"
- scanModule: "neural_solution"
-
- - stage: BanditCodeScan
- displayName: Bandit Code Scan
- dependsOn: []
- jobs:
- - job: Bandit
- displayName: Bandit
- steps:
- - template: template/code-scan-template.yml
- parameters:
- codeScanFileName: "bandit"
- uploadPath: "bandit.log"
- scanModule: "neural_solution"
-
- - stage: PylintCodeScan
- displayName: Pylint Code Scan
- dependsOn: []
- jobs:
- - job: Pylint
- displayName: Pylint
- steps:
- - template: template/code-scan-template.yml
- parameters:
- codeScanFileName: "pylint"
- uploadPath: "pylint.json"
- scanModule: "neural_solution"
diff --git a/.azure-pipelines/code-scan.yml b/.azure-pipelines/code-scan.yml
index afc894ee034..1f3f3beb647 100644
--- a/.azure-pipelines/code-scan.yml
+++ b/.azure-pipelines/code-scan.yml
@@ -44,15 +44,3 @@ stages:
parameters:
codeScanFileName: "bandit"
uploadPath: "bandit.log"
-
- - stage: PylintCodeScan
- displayName: Pylint Code Scan
- dependsOn: []
- jobs:
- - job: Pylint
- displayName: Pylint
- steps:
- - template: template/code-scan-template.yml
- parameters:
- codeScanFileName: "pylint"
- uploadPath: "pylint.json"
diff --git a/.azure-pipelines/docker/Dockerfile.devel b/.azure-pipelines/docker/Dockerfile.devel
index 30e6bf3ec11..2f3aab3ce72 100644
--- a/.azure-pipelines/docker/Dockerfile.devel
+++ b/.azure-pipelines/docker/Dockerfile.devel
@@ -36,7 +36,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
RUN ln -sf $(which python3) /usr/bin/python
-RUN python -m pip --no-cache-dir install --upgrade pip
+RUN python -m pip install pip==24.0
RUN python -m pip install --no-cache-dir setuptools
RUN pip list
diff --git a/.azure-pipelines/docker/DockerfileCodeScan.devel b/.azure-pipelines/docker/DockerfileCodeScan.devel
index f6359248a7c..611fe02e235 100644
--- a/.azure-pipelines/docker/DockerfileCodeScan.devel
+++ b/.azure-pipelines/docker/DockerfileCodeScan.devel
@@ -30,7 +30,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
RUN ln -sf $(which python3) /usr/bin/python
-RUN python -m pip install --no-cache-dir pylint==2.12.1\
+RUN python -m pip install --no-cache-dir \
bandit\
pyspelling\
pydocstyle
diff --git a/.azure-pipelines/model-test-3x.yml b/.azure-pipelines/model-test-3x.yml
index f247deb96fb..55320d9247c 100644
--- a/.azure-pipelines/model-test-3x.yml
+++ b/.azure-pipelines/model-test-3x.yml
@@ -10,7 +10,7 @@ pr:
include:
- neural_compressor/common
- neural_compressor/torch
- - examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm
+ - examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only
- setup.py
- requirements_pt.txt
- .azure-pipelines/scripts/models
@@ -112,7 +112,7 @@ stages:
displayName: "Publish report"
- script: |
if [ $(is_perf_reg) == 'true' ]; then
- echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
+ echo "Some benchmark regression occurred or the reference data need to be updated, please check artifacts and reports."
exit 1
fi
- displayName: "Specify performance regression"
+ displayName: "Specify regression"
diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml
index cc9e431b608..b73321a8640 100644
--- a/.azure-pipelines/model-test.yml
+++ b/.azure-pipelines/model-test.yml
@@ -40,33 +40,20 @@ parameters:
displayName: Run ONNX models?
type: boolean
default: true
- - name: MXNet_Model
- displayName: Run MXNet models?
- type: boolean
- default: false
- name: TensorFlowModelList
type: object
default:
- resnet50v1.5
- ssd_resnet50_v1
-# - ssd_mobilenet_v1_ckpt
-# - inception_v1
-# - darknet19
-# - resnet-101
- name: PyTorchModelList
type: object
default:
- - resnet18
- resnet18_fx
- name: ONNXModelList
type: object
default:
- resnet50-v1-12
- - name: MXNetModelList
- type: object
- default:
- - resnet50v1
stages:
- stage: TensorFlowModels
@@ -114,21 +101,6 @@ stages:
modelName: ${{ model }}
framework: "onnxrt"
- - stage: MXNetModels
- displayName: Run MXNet Model
- pool: MODEL_PERF_TEST
- dependsOn: []
- condition: and(succeeded(), eq('${{ parameters.MXNet_Model }}', 'true'))
- jobs:
- - ${{ each model in parameters.MXNetModelList }}:
- - job:
- displayName: ${{ model }}
- steps:
- - template: template/model-template.yml
- parameters:
- modelName: ${{ model }}
- framework: "mxnet"
-
- stage: GenerateLogs
displayName: Generate Report
pool:
@@ -191,7 +163,7 @@ stages:
displayName: "Publish report"
- script: |
if [ $(is_perf_reg) == 'true' ]; then
- echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
+ echo "Some benchmark regression occurred or the reference data need to be updated, please check artifacts and reports."
exit 1
fi
- displayName: "Specify performance regression"
+ displayName: "Specify regression"
diff --git a/.azure-pipelines/scripts/codeScan/pydocstyle/pydocstyle.sh b/.azure-pipelines/scripts/codeScan/pydocstyle/pydocstyle.sh
index db0a818db33..96f95bcc68b 100644
--- a/.azure-pipelines/scripts/codeScan/pydocstyle/pydocstyle.sh
+++ b/.azure-pipelines/scripts/codeScan/pydocstyle/pydocstyle.sh
@@ -17,11 +17,6 @@ log_dir="$work_dir/../scanLog"
mkdir -p $log_dir
scan_path="scan_path.txt"
-if [ "${scan_module}" = "neural_solution" ]; then
- scan_path="scan_path_neural_solution.txt"
-elif [ "${scan_module}" = "neural_insights" ]; then
- scan_path="scan_path_neural_insights.txt"
-fi
exit_code=0
for line in $(cat ${work_dir}/${scan_path})
diff --git a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt b/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
index b524f1f61db..ed2c4ccafca 100644
--- a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
+++ b/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
@@ -15,3 +15,15 @@
/neural-compressor/neural_compressor/strategy
/neural-compressor/neural_compressor/training.py
/neural-compressor/neural_compressor/utils
+/neural-compressor/neural_compressor/common
+/neural-compressor/neural_compressor/tensorflow
+/neural-compressor/neural_compressor/torch/algorithms/layer_wise
+/neural-compressor/neural_compressor/torch/algorithms/mixed_precision
+/neural-compressor/neural_compressor/torch/algorithms/mx_quant
+/neural-compressor/neural_compressor/torch/algorithms/pt2e_quant
+/neural-compressor/neural_compressor/torch/algorithms/smooth_quant
+/neural-compressor/neural_compressor/torch/algorithms/static_quant
+/neural-compressor/neural_compressor/torch/algorithms/weight_only
+/neural-compressor/neural_compressor/torch/export
+/neural-compressor/neural_compressor/torch/quantization
+/neural-compressor/neural_compressor/torch/utils
diff --git a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path_neural_insights.txt b/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path_neural_insights.txt
deleted file mode 100644
index 8a008fd19fb..00000000000
--- a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path_neural_insights.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-/neural-compressor/neural_insights/components
-/neural-compressor/neural_insights/utils
-/neural-compressor/neural_insights/web
-
diff --git a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path_neural_solution.txt b/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path_neural_solution.txt
deleted file mode 100644
index 07a5b1483d7..00000000000
--- a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path_neural_solution.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-/neural-compressor/neural_solution/backend
-/neural-compressor/neural_solution/frontend
-/neural-compressor/neural_solution/utils
-
diff --git a/.azure-pipelines/scripts/codeScan/pylint/pylint.sh b/.azure-pipelines/scripts/codeScan/pylint/pylint.sh
deleted file mode 100644
index 9103947e965..00000000000
--- a/.azure-pipelines/scripts/codeScan/pylint/pylint.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/bin/bash
-
-for var in "$@"
-do
- case $var in
- --scan_module=*)
- scan_module=$(echo $var |cut -f2 -d=)
- ;;
- esac
-done
-
-source /neural-compressor/.azure-pipelines/scripts/change_color.sh
-RESET="echo -en \\E[0m \\n" # close color
-
-log_dir="/neural-compressor/.azure-pipelines/scripts/codeScan/scanLog"
-mkdir -p $log_dir
-
-apt-get install -y --no-install-recommends --fix-missing \
- autoconf \
- build-essential
-
-pip install -r /neural-compressor/requirements.txt
-pip install cmake
-
-pip install torch \
- horovod \
- google \
- autograd \
- ofa \
- fvcore \
- pymoo \
- onnxruntime_extensions \
- peft \
- tf_slim \
- transformers \
- accelerate \
- flask==2.1.3 \
- xgboost \
- datasets \
- prettytable \
- psutil \
- py-cpuinfo \
- pyyaml \
- pydantic \
- protobuf
-
-if [ "${scan_module}" = "neural_solution" ]; then
- cd /neural-compressor
- python setup.py install
-
- echo "Install Neural Solution ... "
- bash /neural-compressor/.azure-pipelines/scripts/install_neural_solution.sh
-
-elif [ "${scan_module}" = "neural_insights" ]; then
- cd /neural-compressor
- python setup.py install
-
- echo "Install Neural Insights ... "
- bash /neural-compressor/.azure-pipelines/scripts/install_neural_insights.sh
-
-fi
-
-echo "[DEBUG] list pipdeptree..."
-pip install pipdeptree
-pipdeptree
-
-python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto \
---ignored-modules=tensorflow,keras,torch,torch.quantization,torch.tensor,torchvision,fairseq,mxnet,onnx,onnxruntime,intel_extension_for_pytorch,intel_extension_for_tensorflow,torchinfo,horovod,transformers \
-/neural-compressor/${scan_module} > $log_dir/pylint.json
-
-exit_code=$?
-
-$BOLD_YELLOW && echo " ----------------- Current pylint cmd start --------------------------" && $RESET
-echo "python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto --ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,fairseq,mxnet,onnx,onnxruntime,intel_extension_for_pytorch,intel_extension_for_tensorflow,torchinfo,horovod,transformers
-/neural-compressor/${scan_module}>$log_dir/pylint.json"
-$BOLD_YELLOW && echo " ----------------- Current pylint cmd end --------------------------" && $RESET
-
-$BOLD_YELLOW && echo " ----------------- Current log file output start --------------------------" && $RESET
-cat $log_dir/pylint.json
-$BOLD_YELLOW && echo " ----------------- Current log file output end --------------------------" && $RESET
-
-if [ ${exit_code} -ne 0 ]; then
- $BOLD_RED && echo "Error!! Please Click on the artifact button to download and view Pylint error details." && $RESET
- exit 1
-fi
-$BOLD_PURPLE && echo "Congratulations, Pylint check passed!" && $LIGHT_PURPLE && echo " You can click on the artifact button to see the log details." && $RESET
-exit 0
diff --git a/.azure-pipelines/scripts/fwk_version.sh b/.azure-pipelines/scripts/fwk_version.sh
index b0ea9fa0b4f..82845e8982e 100644
--- a/.azure-pipelines/scripts/fwk_version.sh
+++ b/.azure-pipelines/scripts/fwk_version.sh
@@ -1,27 +1,10 @@
#!/bin/bash
echo "export FWs version..."
-test_mode=$1
-
-if [ "$test_mode" == "coverage" ] || [ "$test_mode" == "latest" ]; then
- export tensorflow_version='2.15.0-official'
- export pytorch_version='2.3.0+cpu'
- export torchvision_version='0.18.0+cpu'
- export ipex_version='2.3.0+cpu'
- export onnx_version='1.16.0'
- export onnxruntime_version='1.18.0'
- export mxnet_version='1.9.1'
-else
- export tensorflow_version='2.15.0-official'
- export pytorch_version='2.2.1+cpu'
- export torchvision_version='0.17.1+cpu'
- export ipex_version='2.2.0+cpu'
- export onnx_version='1.15.0'
- export onnxruntime_version='1.17.1'
- export mxnet_version='1.9.1'
-fi
-
-
-
-
-
+export tensorflow_version='2.15.0-official'
+export pytorch_version='2.3.0+cpu'
+export torchvision_version='0.18.0+cpu'
+export ipex_version='2.3.0+cpu'
+export onnx_version='1.16.0'
+export onnxruntime_version='1.18.0'
+export mxnet_version='1.9.1'
diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh
index d3cee07609c..6a80419259f 100644
--- a/.azure-pipelines/scripts/install_nc.sh
+++ b/.azure-pipelines/scripts/install_nc.sh
@@ -2,21 +2,21 @@
echo -e "\n Install Neural Compressor ... "
cd /neural-compressor
-if [[ $1 = *"3x_pt" ]]; then
+if [[ $1 = *"3x_pt"* ]]; then
+ if [[ $1 != *"3x_pt_fp8"* ]]; then
+ echo -e "\n Install torch CPU ... "
+ pip install torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu
+ fi
python -m pip install --no-cache-dir -r requirements_pt.txt
python setup.py pt bdist_wheel
- pip install dist/neural_compressor*.whl --force-reinstall
+ pip install --no-deps dist/neural_compressor*.whl --force-reinstall
elif [[ $1 = *"3x_tf"* ]]; then
python -m pip install --no-cache-dir -r requirements_tf.txt
python setup.py tf bdist_wheel
pip install dist/neural_compressor*.whl --force-reinstall
-elif [[ $1 = *"3x_ort" ]]; then
- python -m pip install --no-cache-dir -r requirements_ort.txt
- python setup.py ort bdist_wheel
- pip install dist/neural_compressor*.whl --force-reinstall
else
python -m pip install --no-cache-dir -r requirements.txt
- python setup.py 2x bdist_wheel
+ python setup.py bdist_wheel
pip install dist/neural_compressor*.whl --force-reinstall
fi
diff --git a/.azure-pipelines/scripts/install_neural_insights.sh b/.azure-pipelines/scripts/install_neural_insights.sh
deleted file mode 100644
index daa8887f635..00000000000
--- a/.azure-pipelines/scripts/install_neural_insights.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-echo "Install Neural Insights ... "
-cd /neural-compressor
-python -m pip install --no-cache-dir -r neural_insights/requirements.txt
-python setup.py neural_insights bdist_wheel
-pip install dist/neural_insights*.whl
-pip list
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/install_neural_solution.sh b/.azure-pipelines/scripts/install_neural_solution.sh
deleted file mode 100644
index d0139c85132..00000000000
--- a/.azure-pipelines/scripts/install_neural_solution.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-
-echo "Install Open MPI ..."
-wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz
-gunzip -c openmpi-4.1.5.tar.gz | tar xf -
-cd openmpi-4.1.5
-./configure --prefix=/usr/local
-make all install
-MPI_DIR=/usr/local/lib/openmpi
-export PATH=$MPI_DIR/bin:$PATH
-export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
-echo "Current Path ... "
-echo $PATH
-
-echo "Current LD_LIBRARY_PATH ... "
-echo $LD_LIBRARY_PATH
-
-echo "check mpicc ..."
-which mpicc
-which mpirun
-
-echo "Install Neural Solution ... "
-cd /neural-compressor
-python -m pip install --no-cache-dir -r neural_solution/requirements.txt
-python setup.py neural_solution sdist bdist_wheel
-pip install dist/neural_solution*.whl
-pip list
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/models/env_setup.sh b/.azure-pipelines/scripts/models/env_setup.sh
index 2c356d7307b..adf4f01df67 100644
--- a/.azure-pipelines/scripts/models/env_setup.sh
+++ b/.azure-pipelines/scripts/models/env_setup.sh
@@ -100,12 +100,6 @@ if [[ "${fwk_ver}" != "latest" ]]; then
elif [[ "${framework}" == "onnxrt" ]]; then
pip install onnx==1.15.0
pip install onnxruntime==${fwk_ver}
- elif [[ "${framework}" == "mxnet" ]]; then
- pip install numpy==1.23.5
- echo "re-install pycocotools resolve the issue with numpy..."
- pip uninstall pycocotools -y
- pip install --no-cache-dir pycocotools
- pip install mxnet==${fwk_ver}
fi
fi
@@ -118,9 +112,6 @@ if [ -f "requirements.txt" ]; then
sed -i '/tensorflow==/d;/tensorflow$/d' requirements.txt
sed -i '/^intel-tensorflow/d' requirements.txt
fi
- if [ "${framework}" == "mxnet" ]; then
- sed -i '/mxnet==/d;/mxnet$/d;/mxnet-mkl==/d;/mxnet-mkl$/d' requirements.txt
- fi
if [ "${framework}" == "pytorch" ]; then
sed -i '/torch==/d;/torch$/d;/torchvision==/d;/torchvision$/d' requirements.txt
fi
diff --git a/.azure-pipelines/scripts/models/generate_report.sh b/.azure-pipelines/scripts/models/generate_report.sh
index f4194585b6d..e13118fcffe 100644
--- a/.azure-pipelines/scripts/models/generate_report.sh
+++ b/.azure-pipelines/scripts/models/generate_report.sh
@@ -245,13 +245,9 @@ function generate_html_core {
if((new_result == nan && previous_result == nan) || new_result == "unknown"){
printf("
");
} else{
- if(new_result == nan) {
- job_status = "fail"
- status_png = "background-color:#FFD2D2";
- printf(" ", status_png);
- } else{
- printf(" ");
- }
+ job_status = "fail"
+ status_png = "background-color:#FFD2D2";
+ printf(" ", status_png);
}
}
}
diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh
index 8cb5f8c9855..28201dca309 100644
--- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh
+++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh
@@ -80,8 +80,6 @@ if [ "${mode}" == "env_setup" ]; then
elif [ "${mode}" == "tuning" ]; then
if [ "${framework}" == "onnxrt" ]; then
output_model=${log_dir}/${model}/${framework}-${model}-tune.onnx
- elif [ "${framework}" == "mxnet" ]; then
- output_model=${log_dir}/${model}/resnet50_v1
elif [ "${framework}" == "tensorflow" ]; then
output_model=${log_dir}/${model}/${framework}-${model}-tune.pb
fi
@@ -140,8 +138,6 @@ elif [ "${mode}" == "int8_benchmark" ]; then
$BOLD_YELLOW && echo "====== run benchmark int8 =======" && $RESET
if [[ "${framework}" == "onnxrt" ]]; then
model_name="${log_dir}/${model}/${framework}-${model}-tune.onnx"
- elif [[ "${framework}" == "mxnet" ]]; then
- model_name="${log_dir}/${model}"
elif [[ "${framework}" == "tensorflow" ]]; then
model_name="${log_dir}/${model}/${framework}-${model}-tune.pb"
elif [[ "${framework}" == "pytorch" ]]; then
diff --git a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh
deleted file mode 100644
index 21be4c96031..00000000000
--- a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-set -eo pipefail
-# get parameters
-PATTERN='[-a-zA-Z0-9_]*='
-
-for i in "$@"
-do
- case $i in
- --model=*)
- model=`echo $i | sed "s/${PATTERN}//"`;;
- --mode=*)
- mode=`echo $i | sed "s/${PATTERN}//"`;;
- --USE_TUNE_ACC=*)
- USE_TUNE_ACC=`echo $i | sed "s/${PATTERN}//"`;;
- --PERF_STABLE_CHECK=*)
- PERF_STABLE_CHECK=`echo $i | sed "s/${PATTERN}//"`;;
- --BUILD_BUILDID=*)
- BUILD_BUILDID=`echo $i | sed "s/${PATTERN}//"`;;
- *)
- echo "Parameter $i not recognized."; exit 1;;
- esac
-done
-
-echo "specify FWs version..."
-source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh 'latest'
-FRAMEWORK="mxnet"
-FRAMEWORK_VERSION=${mxnet_version}
-
-inc_new_api=false
-# ======== set up config for mxnet models ========
-if [ "${model}" == "resnet50v1" ]; then
- model_src_dir="image_recognition/cnn_models/quantization/ptq"
- dataset_location="/tf_dataset/mxnet/val_256_q90.rec"
- input_model="/tf_dataset/mxnet/resnet50_v1"
- yaml="cnn.yaml"
- strategy="mse"
- batch_size=1
- new_benchmark=false
- tuning_cmd="bash run_tuning.sh --topology=resnet50_v1 --dataset_location=${dataset_location} --input_model=${input_model}"
- benchmark_cmd="bash run_benchmark.sh --topology=resnet50_v1 --dataset_location=${dataset_location} --batch_size=1 --iters=500 --mode=benchmark"
-fi
-
-
-/bin/bash run_model_trigger_common.sh \
- --yaml=${yaml} \
- --framework=${FRAMEWORK} \
- --fwk_ver=${FRAMEWORK_VERSION} \
- --model=${model} \
- --model_src_dir=${model_src_dir} \
- --dataset_location=${dataset_location} \
- --input_model=${input_model} \
- --batch_size=${batch_size} \
- --strategy=${strategy} \
- --new_benchmark=${new_benchmark} \
- --tuning_cmd="${tuning_cmd}" \
- --benchmark_cmd="${benchmark_cmd}" \
- --inc_new_api="${inc_new_api}" \
- --mode=${mode} \
- --USE_TUNE_ACC=${USE_TUNE_ACC} \
- --PERF_STABLE_CHECK=${PERF_STABLE_CHECK} \
- --BUILD_BUILDID=${BUILD_BUILDID}
diff --git a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
index 32bd2eb0109..f0675fe505e 100644
--- a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
+++ b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
@@ -53,15 +53,15 @@ elif [ "${model}" == "resnet18_fx" ]; then
tuning_cmd="bash run_quant.sh --topology=resnet18 --dataset_location=${dataset_location} --input_model=${input_model}"
benchmark_cmd="bash run_benchmark.sh --topology=resnet18 --dataset_location=${dataset_location} --mode=performance --batch_size=${batch_size} --iters=500"
elif [ "${model}" == "opt_125m_woq_gptq_int4" ]; then
- model_src_dir="nlp/huggingface_models/language-modeling/quantization/llm"
+ model_src_dir="nlp/huggingface_models/language-modeling/quantization/weight_only"
inc_new_api=3x_pt
tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_int4"
elif [ "${model}" == "opt_125m_woq_gptq_int4_dq_bnb" ]; then
- model_src_dir="nlp/huggingface_models/language-modeling/quantization/llm"
+ model_src_dir="nlp/huggingface_models/language-modeling/quantization/weight_only"
inc_new_api=3x_pt
tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_int4_dq_bnb"
elif [ "${model}" == "opt_125m_woq_gptq_int4_dq_ggml" ]; then
- model_src_dir="nlp/huggingface_models/language-modeling/quantization/llm"
+ model_src_dir="nlp/huggingface_models/language-modeling/quantization/weight_only"
inc_new_api=3x_pt
tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_int4_dq_ggml"
fi
@@ -72,6 +72,7 @@ FRAMEWORK="pytorch"
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh 'latest'
if [[ "${inc_new_api}" == "3x"* ]]; then
FRAMEWORK_VERSION="latest"
+ export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
else
FRAMEWORK_VERSION=${pytorch_version}
TORCH_VISION_VERSION=${torchvision_version}
diff --git a/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh b/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh
index 386ec397c81..03f4fd02dbf 100644
--- a/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh
+++ b/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh
@@ -25,7 +25,8 @@ git config --global --add safe.directory /neural-compressor
git fetch
git checkout master
rm -rf build dist *egg-info
-echo y | pip uninstall neural_compressor_${1}
+binary_index="${1%_fp8}"
+echo y | pip uninstall neural_compressor_${binary_index}
cd /neural-compressor/.azure-pipelines-pr/scripts && bash install_nc.sh ${1}
coverage erase
diff --git a/.azure-pipelines/scripts/ut/3x/coverage.3x_ort b/.azure-pipelines/scripts/ut/3x/coverage.3x_ort
deleted file mode 100644
index 1404dccbaee..00000000000
--- a/.azure-pipelines/scripts/ut/3x/coverage.3x_ort
+++ /dev/null
@@ -1,15 +0,0 @@
-[run]
-branch = True
-
-[report]
-include =
- */neural_compressor/common/*
- */neural_compressor/onnxrt/*
-exclude_lines =
- pragma: no cover
- raise NotImplementedError
- raise TypeError
- if self.device == "gpu":
- if device == "gpu":
- except ImportError:
- except Exception as e:
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt
index 34fc7f29fcf..dd4991f5fa7 100644
--- a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt
+++ b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt
@@ -5,6 +5,9 @@ branch = True
include =
*/neural_compressor/common/*
*/neural_compressor/torch/*
+omit =
+ */neural_compressor/torch/algorithms/fp8_quant/*
+ */neural_compressor/torch/amp/*
exclude_lines =
pragma: no cover
raise NotImplementedError
diff --git a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
new file mode 100644
index 00000000000..9b12b354d83
--- /dev/null
+++ b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
@@ -0,0 +1,14 @@
+[run]
+branch = True
+
+[report]
+include =
+ */neural_compressor/torch/algorithms/fp8_quant/*
+exclude_lines =
+ pragma: no cover
+ raise NotImplementedError
+ raise TypeError
+ if self.device == "gpu":
+ if device == "gpu":
+ except ImportError:
+ except Exception as e:
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_ort.sh b/.azure-pipelines/scripts/ut/3x/run_3x_ort.sh
deleted file mode 100644
index 5f8550ea742..00000000000
--- a/.azure-pipelines/scripts/ut/3x/run_3x_ort.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-python -c "import neural_compressor as nc"
-test_case="run 3x ONNXRT"
-echo "${test_case}"
-
-# install requirements
-echo "set up UT env..."
-pip install -r /neural-compressor/test/3x/onnxrt/requirements.txt
-pip install pytest-cov
-pip install pytest-html
-pip list
-
-export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_ort
-inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
-cd /neural-compressor/test/3x || exit 1
-rm -rf torch
-rm -rf tensorflow
-
-LOG_DIR=/neural-compressor/log_dir
-mkdir -p ${LOG_DIR}
-ut_log_name=${LOG_DIR}/ut_3x_ort.log
-pytest --cov="${inc_path}" -vs --disable-warnings --html=report.html --self-contained-html . 2>&1 | tee -a ${ut_log_name}
-
-cp report.html ${LOG_DIR}/
-
-if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
- echo "Find errors in pytest case, please check the output..."
- echo "Please search for '== FAILURES ==' or '== ERRORS =='"
- exit 1
-fi
-
-# if ut pass, collect the coverage file into artifacts
-cp .coverage ${LOG_DIR}/.coverage
-
-echo "UT finished successfully! "
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
index b91bc182c7c..fba15ce6c4e 100644
--- a/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
+++ b/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
@@ -5,6 +5,7 @@ echo "${test_case}"
# install requirements
echo "set up UT env..."
+export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
pip install -r /neural-compressor/test/3x/torch/requirements.txt
pip install pytest-cov
pip install pytest-html
@@ -14,7 +15,8 @@ export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverag
inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
cd /neural-compressor/test/3x || exit 1
rm -rf tensorflow
-rm -rf onnxrt
+rm -rf torch/algorithms/fp8_quant
+rm -rf torch/quantization/fp8_quant
LOG_DIR=/neural-compressor/log_dir
mkdir -p ${LOG_DIR}
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
new file mode 100644
index 00000000000..753dd8ac440
--- /dev/null
+++ b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+python -c "import neural_compressor as nc"
+test_case="run 3x Torch Habana FP8"
+echo "${test_case}"
+
+# install requirements
+echo "set up UT env..."
+export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
+sed -i '/^intel_extension_for_pytorch/d' /neural-compressor/test/3x/torch/requirements.txt
+pip install -r /neural-compressor/test/3x/torch/requirements.txt
+pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.16.0
+pip install pytest-cov
+pip install pytest-html
+pip install pytest-html-merger
+pip list
+
+export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
+inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
+cd /neural-compressor/test/3x || exit 1
+
+LOG_DIR=/neural-compressor/log_dir
+mkdir -p ${LOG_DIR}
+ut_log_name=${LOG_DIR}/ut_3x_pt_fp8.log
+pytest --cov="${inc_path}" -vs --disable-warnings --html=report_1.html --self-contained-html torch/quantization/weight_only/test_load.py 2>&1 | tee -a ${ut_log_name}
+pytest --cov="${inc_path}" -vs --disable-warnings --html=report_2.html --self-contained-html torch/quantization/weight_only/test_rtn.py 2>&1 | tee -a ${ut_log_name}
+# pytest --cov="${inc_path}" -vs --disable-warnings --html=report_3.html --self-contained-html torch/quantization/weight_only/test_autoround.py 2>&1 | tee -a ${ut_log_name}
+pytest --cov="${inc_path}" -vs --disable-warnings --html=report_4.html --self-contained-html torch/quantization/fp8_quant 2>&1 | tee -a ${ut_log_name}
+
+mkdir -p report && mv *.html report
+pytest_html_merger -i ./report -o ./report.html
+cp report.html ${LOG_DIR}/
+
+if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
+ echo "Find errors in pytest case, please check the output..."
+ echo "Please search for '== FAILURES ==' or '== ERRORS =='"
+ exit 1
+fi
+
+# if ut pass, collect the coverage file into artifacts
+cp .coverage ${LOG_DIR}/.coverage
+
+echo "UT finished successfully! "
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh b/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh
index d1aee3a98cb..1032e6dc6f1 100644
--- a/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh
+++ b/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh
@@ -16,20 +16,38 @@ inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__
cd /neural-compressor/test/3x || exit 1
rm -rf torch
rm -rf onnxrt
-rm -rf tensorflow/quantization/ptq/newapi
mv tensorflow/keras ../3x_keras
-mv tensorflow/quantization/itex ./3x_itex
+mv tensorflow/quantization/ptq/newapi ../3x_newapi
LOG_DIR=/neural-compressor/log_dir
mkdir -p ${LOG_DIR}
ut_log_name=${LOG_DIR}/ut_3x_tf.log
+
+# test for tensorflow ut
pytest --cov="${inc_path}" -vs --disable-warnings --html=report_tf_quant.html --self-contained-html ./tensorflow/quantization 2>&1 | tee -a ${ut_log_name}
rm -rf tensorflow/quantization
+pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_tf_test_quantize_model.html --self-contained-html ./tensorflow/test_quantize_model.py 2>&1 | tee -a ${ut_log_name}
+rm -rf tensorflow/test_quantize_model.py
pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_tf.html --self-contained-html . 2>&1 | tee -a ${ut_log_name}
+# test for tensorflow new api ut
+pip uninstall tensorflow -y
+pip install /tf_dataset/tf_binary/230928/tensorflow*.whl
+pip install cmake
+pip install protobuf==3.20.3
+pip install horovod==0.27.0
+pip list
+rm -rf tensorflow/*
+mkdir -p tensorflow/quantization/ptq
+mv ../3x_newapi tensorflow/quantization/ptq/newapi
+find . -name "test*.py" | sed "s,\.\/,python -m pytest --cov=${inc_path} --cov-append -vs --disable-warnings ,g" > run.sh
+cat run.sh
+bash run.sh 2>&1 | tee -a ${ut_log_name}
+
+# test for itex ut
rm -rf tensorflow/*
mv ../3x_keras tensorflow/keras
-mv ../3x_itex tensorflow/quantization/itex
+pip uninstall tensorflow -y
pip install intel-extension-for-tensorflow[cpu]
pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_keras.html --self-contained-html ./tensorflow 2>&1 | tee -a ${ut_log_name}
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh b/.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh
deleted file mode 100644
index 218e32a9b3a..00000000000
--- a/.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-python -c "import neural_compressor as nc"
-test_case="run 3x New TF API"
-echo "${test_case}"
-
-# install requirements
-echo "set up UT env..."
-pip install -r /neural-compressor/test/3x/tensorflow/requirements.txt
-pip install pytest-html
-pip install pytest-html-merger
-
-pip uninstall tensorflow -y
-pip install /tf_dataset/tf_binary/230928/tensorflow*.whl
-pip install cmake
-pip install protobuf==3.20.3
-pip install horovod==0.27.0
-pip list
-
-cd /neural-compressor/test/3x || exit 1
-mv tensorflow/quantization/ptq/newapi ../3x_newapi
-rm -rf ./*
-
-LOG_DIR=/neural-compressor/log_dir
-mkdir -p ${LOG_DIR}
-ut_log_name=${LOG_DIR}/ut_3x_new_tf.log
-
-mkdir -p tensorflow/quantization/ptq
-mv ../3x_newapi tensorflow/quantization/ptq/newapi
-
-pytest -vs --disable-warnings --html=report_new_tf_quant_one_case.html --self-contained-html ./tensorflow/quantization/ptq/newapi/test_big_saved_model.py 2>&1 | tee -a ${ut_log_name}
-rm -rf tensorflow/quantization/ptq/newapi/test_big_saved_model.py
-pytest -vs --disable-warnings --html=report_new_tf_quant.html --self-contained-html ./tensorflow/quantization/ptq/newapi 2>&1 | tee -a ${ut_log_name}
-
-mkdir -p report
-mv *.html report
-pytest_html_merger -i ./report -o ./report.html
-
-cp report.html ${LOG_DIR}/
-
-if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
- echo "Find errors in pytest case, please check the output..."
- echo "Please search for '== FAILURES ==' or '== ERRORS =='"
- exit 1
-fi
-
-echo "UT finished successfully! "
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/ut/env_setup.sh b/.azure-pipelines/scripts/ut/env_setup.sh
index d5876b07cef..3715c485631 100644
--- a/.azure-pipelines/scripts/ut/env_setup.sh
+++ b/.azure-pipelines/scripts/ut/env_setup.sh
@@ -92,7 +92,7 @@ elif [[ $(echo "${test_case}" | grep -c "tf pruning") != 0 ]]; then
fi
if [[ $(echo "${test_case}" | grep -c "api") != 0 ]] || [[ $(echo "${test_case}" | grep -c "adaptor") != 0 ]]; then
- pip install auto-round
+ pip install git+https://github.com/intel/auto-round.git@e24b9074af6cdb099e31c92eb81b7f5e9a4a244e
fi
# test deps
diff --git a/.azure-pipelines/scripts/ut/run_basic_others.sh b/.azure-pipelines/scripts/ut/run_basic_others.sh
index df7acf52cf7..301d6fd14d1 100644
--- a/.azure-pipelines/scripts/ut/run_basic_others.sh
+++ b/.azure-pipelines/scripts/ut/run_basic_others.sh
@@ -14,7 +14,6 @@ cd /neural-compressor/test || exit 1
find . -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
sed -i '/ adaptor\//d' run.sh
sed -i '/ tfnewapi\//d' run.sh
-sed -i '/ neural_coder\//d' run.sh
sed -i '/ itex\//d' run.sh
sed -i '/ pruning_with_pt/d' run.sh
sed -i '/ pruning_with_tf/d' run.sh
diff --git a/.azure-pipelines/scripts/ut/run_itrex.sh b/.azure-pipelines/scripts/ut/run_itrex.sh
index 74ff01c7062..2bbbf958398 100644
--- a/.azure-pipelines/scripts/ut/run_itrex.sh
+++ b/.azure-pipelines/scripts/ut/run_itrex.sh
@@ -4,6 +4,10 @@ source /neural-compressor/.azure-pipelines/scripts/change_color.sh
python -c "import neural_compressor as nc;print(nc.version.__version__)"
echo "run itrex ut..."
+# install inc 3x deps
+pip install -r /neural-compressor/requirements_pt.txt
+export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
+
# prepare itrex
git clone https://github.com/intel/intel-extension-for-transformers.git /intel-extension-for-transformers
cd /intel-extension-for-transformers && git rev-parse --short HEAD
@@ -15,6 +19,8 @@ sed -i '/neural-compressor.git/d' /intel-extension-for-transformers/tests/requir
pip install -r /intel-extension-for-transformers/tests/requirements.txt
# workaround
pip install onnx==1.15.0
+echo "pip list itrex ut deps..."
+pip list
LOG_DIR=/neural-compressor/log_dir
mkdir -p ${LOG_DIR}
ut_log_name=${LOG_DIR}/ut_itrex.log
diff --git a/.azure-pipelines/scripts/ut/run_ncoder.sh b/.azure-pipelines/scripts/ut/run_ncoder.sh
deleted file mode 100644
index 3c487eebd7b..00000000000
--- a/.azure-pipelines/scripts/ut/run_ncoder.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-python -c "import neural_compressor as nc;print(nc.version.__version__)"
-echo "run coder"
-
-echo "no FWKs need to be installed..."
-echo "no requirements need to be installed..."
-
-cd /neural-compressor/test || exit 1
-find ./neural_coder -name "test*.py" | sed 's,\.\/,python ,g' | sed 's/$/ --verbose/' > run.sh
-
-LOG_DIR=/neural-compressor/log_dir
-mkdir -p ${LOG_DIR}
-ut_log_name=${LOG_DIR}/ut_neural_coder.log
-
-echo "cat run.sh..."
-sort run.sh -o run.sh
-cat run.sh | tee ${ut_log_name}
-echo "------UT start-------"
-bash -x run.sh 2>&1 | tee -a ${ut_log_name}
-echo "------UT end -------"
-
-if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
- echo "Find errors in UT test, please check the output..."
- exit 1
-fi
-echo "UT finished successfully! "
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/ut/run_neural_insights.sh b/.azure-pipelines/scripts/ut/run_neural_insights.sh
deleted file mode 100644
index 5f0f2fe5521..00000000000
--- a/.azure-pipelines/scripts/ut/run_neural_insights.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-python -c "import neural_compressor as nc;print(nc.version.__version__)"
-echo "run neural insights ut..."
-
-# Install Neural Solution
-bash /neural-compressor/.azure-pipelines/scripts/install_neural_insights.sh
-
-# Install requirements for test
-cd /neural-compressor/neural_insights/test || exit 1
-if [ -f "requirements.txt" ]; then
- n=0
- until [ "$n" -ge 3 ]
- do
- python -m pip install --no-cache-dir -r requirements.txt && break
- n=$((n+1))
- sleep 5
- done
- pip list
-else
- echo "Not found requirements.txt file."
-fi
-
-cd /neural-compressor/neural_insights || exit 1
-find ./test -name "test*.py" | sed 's,\.\/,python ,g' | sed 's/$/ --verbose/' > run.sh
-
-LOG_DIR=/neural-compressor/log_dir
-mkdir -p ${LOG_DIR}
-ut_log_name=${LOG_DIR}/ut_neural_insights.log
-
-echo "cat run.sh..."
-sort run.sh -o run.sh
-cat run.sh | tee ${ut_log_name}
-echo "------UT start-------"
-bash -x run.sh 2>&1 | tee -a ${ut_log_name}
-echo "------UT end -------"
-
-if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
- echo "Find errors in UT test, please check the output..."
- exit 1
-fi
-echo "UT finished successfully! "
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/ut/run_neural_solution.sh b/.azure-pipelines/scripts/ut/run_neural_solution.sh
deleted file mode 100644
index 42041e4a087..00000000000
--- a/.azure-pipelines/scripts/ut/run_neural_solution.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-python -c "import neural_compressor as nc;print(nc.version.__version__)"
-echo "run neural solution ut..."
-
-echo "no FWKs need to be installed..."
-
-# Install Neural Solution
-bash /neural-compressor/.azure-pipelines/scripts/install_neural_solution.sh
-
-# Install requirements for test
-cd /neural-compressor/neural_solution/test || exit 1
-if [ -f "requirements.txt" ]; then
- n=0
- until [ "$n" -ge 3 ]
- do
- python -m pip install --no-cache-dir -r requirements.txt && break
- n=$((n+1))
- sleep 5
- done
- pip list
-else
- echo "Not found requirements.txt file."
-fi
-
-cd /neural-compressor/neural_solution || exit 1
-find ./test -name "test*.py" | sed 's,\.\/,python ,g' | sed 's/$/ --verbose/' > run.sh
-
-LOG_DIR=/neural-compressor/log_dir
-mkdir -p ${LOG_DIR}
-ut_log_name=${LOG_DIR}/ut_neural_solution.log
-
-echo "cat run.sh..."
-sort run.sh -o run.sh
-cat run.sh | tee ${ut_log_name}
-echo "------UT start-------"
-bash -x run.sh 2>&1 | tee -a ${ut_log_name}
-echo "------UT end -------"
-
-if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
- echo "Find errors in UT test, please check the output..."
- exit 1
-fi
-echo "UT finished successfully! "
\ No newline at end of file
diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml
index e7b563bcea7..51103c39e21 100644
--- a/.azure-pipelines/template/docker-template.yml
+++ b/.azure-pipelines/template/docker-template.yml
@@ -16,6 +16,9 @@ parameters:
- name: repo
type: string
default: "https://github.com/intel/neural-compressor"
+ - name: imageSource
+ type: string
+ default: "build"
steps:
- task: Bash@3
@@ -24,7 +27,7 @@ steps:
script: |
docker ps -a
if [[ $(docker ps -a | grep -i '${{ parameters.containerName }}'$) ]]; then
- docker start $(docker ps -aq)
+ docker start $(docker ps -aq --filter "name=${{ parameters.containerName }}")
echo "remove left files through container ..."
docker exec ${{ parameters.containerName }} bash -c "ls -a /neural-compressor && rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* && ls -a /neural-compressor || true"
fi
@@ -33,7 +36,7 @@ steps:
- ${{ if eq(parameters.dockerConfigName, 'commonDockerConfig') }}:
- script: |
rm -fr ${BUILD_SOURCESDIRECTORY} || sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true
- echo y | docker system prune --all
+ echo y | docker image prune -a
displayName: "Clean workspace"
- checkout: self
@@ -45,7 +48,7 @@ steps:
rm -fr ${BUILD_SOURCESDIRECTORY} || sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true
mkdir ${BUILD_SOURCESDIRECTORY}
chmod 777 ${BUILD_SOURCESDIRECTORY}
- echo y | docker system prune --all
+ echo y | docker image prune -a
displayName: "Clean workspace"
- checkout: none
@@ -57,19 +60,25 @@ steps:
git checkout master
displayName: "Checkout out master"
- - script: |
- if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
- docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
- fi
- docker images | grep -i ${{ parameters.repoName }}
- if [[ $? -ne 0 ]]; then
- echo "NO Such Repo"
- exit 1
- fi
- displayName: "Build develop docker image"
+ - ${{ if eq(parameters.imageSource, 'build') }}:
+ - script: |
+ if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
+ docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
+ fi
+ docker images | grep -i ${{ parameters.repoName }}
+ if [[ $? -ne 0 ]]; then
+ echo "NO Such Repo"
+ exit 1
+ fi
+ displayName: "Build develop docker image"
+
+ - ${{ if eq(parameters.imageSource, 'pull') }}:
+ - script: |
+ docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest
+ displayName: "Pull habana docker image"
- script: |
- docker stop $(docker ps -aq)
+ docker stop $(docker ps -aq --filter "name=${{ parameters.containerName }}")
docker rm -vf ${{ parameters.containerName }} || true
env | sort
displayName: "Clean docker container"
@@ -79,8 +88,15 @@ steps:
inputs:
targetType: "inline"
script: |
- docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
- -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 ${{ parameters.repoName }}:${{ parameters.repoTag }}
+ if [[ "${{ parameters.imageSource }}" == "build" ]]; then
+ docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
+ -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 \
+ ${{ parameters.repoName }}:${{ parameters.repoTag }}
+ else
+ docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
+ --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \
+ -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest
+ fi
echo "Show the container list after docker run ... "
docker ps -a
displayName: "Docker run - ${{ parameters.containerName }} Container"
diff --git a/.azure-pipelines/template/ut-template.yml b/.azure-pipelines/template/ut-template.yml
index b7fecacd3d7..d8908d22a35 100644
--- a/.azure-pipelines/template/ut-template.yml
+++ b/.azure-pipelines/template/ut-template.yml
@@ -17,6 +17,9 @@ parameters:
- name: utContainerName
type: string
default: "utTest"
+ - name: imageSource
+ type: string
+ default: "build"
steps:
- template: docker-template.yml
@@ -27,6 +30,7 @@ steps:
dockerFileName: "Dockerfile"
containerName: ${{ parameters.utContainerName }}
repo: ${{ parameters.repo }}
+ imageSource: ${{ parameters.imageSource }}
- script: |
docker exec ${{ parameters.utContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts \
diff --git a/.azure-pipelines/ut-3x-ort.yml b/.azure-pipelines/ut-3x-ort.yml
deleted file mode 100644
index 42636df2314..00000000000
--- a/.azure-pipelines/ut-3x-ort.yml
+++ /dev/null
@@ -1,109 +0,0 @@
-trigger: none
-
-pr:
- autoCancel: true
- drafts: false
- branches:
- include:
- - master
- paths:
- include:
- - neural_compressor/common
- - neural_compressor/onnxrt
- - test/3x/onnxrt
- - test/3x/common
- - setup.py
- - requirements_ort.txt
- - .azure-pipelines/scripts/ut/3x/run_3x_ort.sh
-
-pool: ICX-16C
-
-variables:
- IMAGE_NAME: "neural-compressor"
- IMAGE_TAG: "py310"
- UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
- DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir
- ARTIFACT_NAME: "UT_coverage_report_3x_ort"
- REPO: $(Build.Repository.Uri)
-
-stages:
- - stage: ONNXRT
- displayName: Unit Test 3x ONNXRT
- dependsOn: []
- jobs:
- - job:
- displayName: Unit Test 3x ONNXRT
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: "commonDockerConfig"
- utScriptFileName: "3x/run_3x_ort"
- uploadPath: $(UPLOAD_PATH)
- utArtifact: "ut_3x"
-
-
- - stage: ONNXRT_baseline
- displayName: Unit Test 3x ONNXRT baseline
- dependsOn: []
- jobs:
- - job:
- displayName: Unit Test 3x ONNXRT baseline
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: "gitCloneDockerConfig"
- utScriptFileName: "3x/run_3x_ort"
- uploadPath: $(UPLOAD_PATH)
- utArtifact: "ut_3x_baseline"
- repo: $(REPO)
-
- - stage: Coverage
- displayName: "Coverage Compare"
- pool:
- vmImage: "ubuntu-latest"
- dependsOn: [ONNXRT, ONNXRT_baseline]
- jobs:
- - job: CollectDatafiles
- steps:
- - script: |
- if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then
- docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} .
- fi
- docker images | grep -i ${IMAGE_NAME}
- if [[ $? -ne 0 ]]; then
- echo "NO Such Repo"
- exit 1
- fi
- displayName: "Build develop docker image"
-
- - task: DownloadPipelineArtifact@2
- inputs:
- artifact:
- patterns: '*_coverage/.coverage'
- path: $(DOWNLOAD_PATH)
-
- - script: |
- echo "--- create container ---"
- docker run -d -it --name="collectLogs" -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash
- echo "--- docker ps ---"
- docker ps
- echo "--- collect logs ---"
- docker exec collectLogs /bin/bash +x -c "cd /neural-compressor/.azure-pipelines/scripts \
- && bash install_nc.sh 3x_ort \
- && bash ut/3x/collect_log_3x.sh 3x_ort"
- displayName: "Collect UT Coverage"
-
- - task: PublishPipelineArtifact@1
- condition: succeededOrFailed()
- inputs:
- targetPath: $(UPLOAD_PATH)
- artifact: $(ARTIFACT_NAME)
- publishLocation: "pipeline"
-
- - task: Bash@3
- condition: always()
- inputs:
- targetType: "inline"
- script: |
- docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
- displayName: "Docker clean up"
diff --git a/.azure-pipelines/ut-3x-pt-fp8.yml b/.azure-pipelines/ut-3x-pt-fp8.yml
new file mode 100644
index 00000000000..6f36ddecc64
--- /dev/null
+++ b/.azure-pipelines/ut-3x-pt-fp8.yml
@@ -0,0 +1,110 @@
+trigger: none
+
+pr:
+ autoCancel: true
+ drafts: false
+ branches:
+ include:
+ - master
+ paths:
+ include:
+ - .azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
+ - .azure-pipelines/ut-3x-pt-fp8.yml
+ - neural_compressor/common
+ - neural_compressor/torch
+ - test/3x/torch/algorithms/fp8_quant
+ - test/3x/torch/quantization/fp8_quant
+ - setup.py
+ - requirements_pt.txt
+
+pool: GAUDI
+
+variables:
+ IMAGE_NAME: "neural-compressor"
+ IMAGE_TAG: "py310"
+ UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
+ DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir
+ ARTIFACT_NAME: "UT_coverage_report_3x_pt_fp8"
+ REPO: $(Build.Repository.Uri)
+
+stages:
+ - stage: Torch_habana
+ displayName: Torch 3x Habana FP8
+ dependsOn: []
+ jobs:
+ - job:
+ displayName: Torch 3x Habana FP8
+ steps:
+ - template: template/ut-template.yml
+ parameters:
+ imageSource: "pull"
+ dockerConfigName: "commonDockerConfig"
+ utScriptFileName: "3x/run_3x_pt_fp8"
+ uploadPath: $(UPLOAD_PATH)
+ utArtifact: "ut_3x"
+
+ - stage: Torch_habana_baseline
+ displayName: Torch 3x Habana FP8 baseline
+ dependsOn: []
+ jobs:
+ - job:
+ displayName: Torch 3x Habana FP8 baseline
+ steps:
+ - template: template/ut-template.yml
+ parameters:
+ imageSource: "pull"
+ dockerConfigName: "gitCloneDockerConfig"
+ utScriptFileName: "3x/run_3x_pt_fp8"
+ uploadPath: $(UPLOAD_PATH)
+ utArtifact: "ut_3x_baseline"
+
+ - stage: Coverage
+ displayName: "Coverage Compare"
+ pool:
+ vmImage: "ubuntu-latest"
+ dependsOn: [Torch_habana, Torch_habana_baseline]
+ jobs:
+ - job: CollectDatafiles
+ steps:
+ - script: |
+ if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then
+ docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} .
+ fi
+ docker images | grep -i ${IMAGE_NAME}
+ if [[ $? -ne 0 ]]; then
+ echo "NO Such Repo"
+ exit 1
+ fi
+ displayName: "Build develop docker image"
+
+ - task: DownloadPipelineArtifact@2
+ inputs:
+ artifact:
+ patterns: '*_coverage/.coverage'
+ path: $(DOWNLOAD_PATH)
+
+ - script: |
+ echo "--- create container ---"
+ docker run -d -it --name="collectLogs" -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash
+ echo "--- docker ps ---"
+ docker ps
+ echo "--- collect logs ---"
+ docker exec collectLogs /bin/bash +x -c "cd /neural-compressor/.azure-pipelines/scripts \
+ && bash install_nc.sh 3x_pt_fp8 \
+ && bash ut/3x/collect_log_3x.sh 3x_pt_fp8"
+ displayName: "Collect UT Coverage"
+
+ - task: PublishPipelineArtifact@1
+ condition: succeededOrFailed()
+ inputs:
+ targetPath: $(UPLOAD_PATH)
+ artifact: $(ARTIFACT_NAME)
+ publishLocation: "pipeline"
+
+ - task: Bash@3
+ condition: always()
+ inputs:
+ targetType: "inline"
+ script: |
+ docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
+ displayName: "Docker clean up"
diff --git a/.azure-pipelines/ut-3x-tf.yml b/.azure-pipelines/ut-3x-tf.yml
index 0fdc0c02f26..df852e28000 100644
--- a/.azure-pipelines/ut-3x-tf.yml
+++ b/.azure-pipelines/ut-3x-tf.yml
@@ -41,20 +41,6 @@ stages:
uploadPath: $(UPLOAD_PATH)
utArtifact: "ut_3x"
- - stage: NewTF
- displayName: Unit Test 3x New TF API
- dependsOn: []
- jobs:
- - job:
- displayName: Unit Test 3x New TF API
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: "commonDockerConfig"
- utScriptFileName: "3x/run_3x_tf_new_api"
- uploadPath: $(UPLOAD_PATH)
- utArtifact: "ut_3x_tf_new_api"
-
- stage: TensorFlow_baseline
displayName: Unit Test 3x TensorFlow baseline
dependsOn: []
diff --git a/.azure-pipelines/ut-basic-no-cover.yml b/.azure-pipelines/ut-basic-no-cover.yml
deleted file mode 100644
index 9a0fbb190b5..00000000000
--- a/.azure-pipelines/ut-basic-no-cover.yml
+++ /dev/null
@@ -1,109 +0,0 @@
-trigger: none
-
-pr:
- autoCancel: true
- drafts: false
- branches:
- include:
- - master
- paths:
- include:
- - neural_compressor
- - test
- - setup.py
- - requirements.txt
- - .azure-pipelines/scripts/ut
- exclude:
- - test/neural_coder
- - test/3x
- - neural_compressor/common
- - neural_compressor/torch
- - neural_compressor/tensorflow
- - neural_compressor/onnxrt
- - .azure-pipelines/scripts/ut/3x
-
-pool: ICX-16C
-
-variables:
- IMAGE_NAME: "neural-compressor"
- IMAGE_TAG: "py310"
- UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
- DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir
- ARTIFACT_NAME: "UT_report"
- REPO: $(Build.Repository.Uri)
-
-stages:
- - stage: Adaptor
- displayName: Unit Test FWKs adaptor
- dependsOn: []
- jobs:
- - job:
- displayName: Test FWKs adaptor
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: "commonDockerConfig"
- utScriptFileName: "run_basic_adaptor"
- uploadPath: $(UPLOAD_PATH)
- utArtifact: "ut_adaptor"
- utTestMode: "no-coverage"
- utContainerName: "utTest-no-coverage"
-
- - stage: API
- displayName: Unit Test User facing API
- dependsOn: []
- jobs:
- - job:
- displayName: Test User facing API
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: "commonDockerConfig"
- utScriptFileName: "run_basic_api"
- uploadPath: $(UPLOAD_PATH)
- utArtifact: "ut_api"
- utTestMode: "no-coverage"
- utContainerName: "utTest-no-coverage"
-
- - stage: Pruning
- displayName: Unit Test Pruning
- dependsOn: []
- jobs:
- - job:
- displayName: Test PyTorch Pruning
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: "commonDockerConfig"
- utScriptFileName: "run_basic_pt_pruning"
- uploadPath: $(UPLOAD_PATH)
- utArtifact: "ut_pt-pruning"
- utTestMode: "no-coverage"
- utContainerName: "utTest-no-coverage"
- - job:
- displayName: Test TensorFlow Pruning
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: "commonDockerConfig"
- utScriptFileName: "run_basic_tf_pruning"
- uploadPath: $(UPLOAD_PATH)
- utArtifact: "ut_tf-pruning"
- utTestMode: "no-coverage"
- utContainerName: "utTest-no-coverage"
-
- - stage: Others
- displayName: Unit Test other basic case
- dependsOn: []
- jobs:
- - job:
- displayName: Test other basic case
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: "commonDockerConfig"
- utScriptFileName: "run_basic_others"
- uploadPath: $(UPLOAD_PATH)
- utArtifact: "ut_others"
- utTestMode: "no-coverage"
- utContainerName: "utTest-no-coverage"
diff --git a/.azure-pipelines/ut-basic.yml b/.azure-pipelines/ut-basic.yml
index a6e34a466ca..2249f0c2590 100644
--- a/.azure-pipelines/ut-basic.yml
+++ b/.azure-pipelines/ut-basic.yml
@@ -14,7 +14,6 @@ pr:
- requirements.txt
- .azure-pipelines/scripts/ut
exclude:
- - test/neural_coder
- test/3x
- neural_compressor/common
- neural_compressor/torch
diff --git a/.azure-pipelines/ut-itrex.yml b/.azure-pipelines/ut-itrex.yml
index 574c8a32677..2f038270234 100644
--- a/.azure-pipelines/ut-itrex.yml
+++ b/.azure-pipelines/ut-itrex.yml
@@ -13,10 +13,6 @@ pr:
- requirements.txt
- .azure-pipelines/scripts/ut/run_itrex.sh
- .azure-pipelines/ut-itrex.yml
- exclude:
- - neural_compressor/common
- - neural_compressor/torch
- - neural_compressor/tensorflow
pool: MODEL_PERF_TEST
diff --git a/.azure-pipelines/ut-ncoder.yml b/.azure-pipelines/ut-ncoder.yml
deleted file mode 100644
index a1512ae4803..00000000000
--- a/.azure-pipelines/ut-ncoder.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-trigger: none
-
-pr:
- autoCancel: true
- drafts: false
- branches:
- include:
- - master
- paths:
- include:
- - neural_coder
- - test/neural_coder
- - setup.py
-
-pool: ICX-16C
-
-variables:
- UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
-
-stages:
-- stage:
- displayName: Unit Test for Neural Coder
- jobs:
- - job:
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: 'commonDockerConfig'
- utScriptFileName: 'run_ncoder'
- uploadPath: $(UPLOAD_PATH)
- utArtifact: 'ut_ncoder'
- utTestMode: "no-coverage"
- utContainerName: "utTest-ncoder"
diff --git a/.azure-pipelines/ut-neural-insights.yaml b/.azure-pipelines/ut-neural-insights.yaml
deleted file mode 100644
index b73026c623b..00000000000
--- a/.azure-pipelines/ut-neural-insights.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-trigger: none
-
-pr:
- autoCancel: true
- drafts: false
- branches:
- include:
- - master
- paths:
- include:
- - neural_insights
- - setup.py
- - .azure-pipelines/ut-neural-insights.yaml
-
-pool: ICX-16C
-
-variables:
- UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
-
-stages:
- - stage:
- displayName: Unit Test for Neural Insights
- jobs:
- - job:
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: 'commonDockerConfig'
- utScriptFileName: 'run_neural_insights'
- uploadPath: $(UPLOAD_PATH)
- utArtifact: 'ut_neural-insights'
- utTestMode: "no-coverage"
- utContainerName: "utTest-nInsights"
diff --git a/.azure-pipelines/ut-neural-solution.yaml b/.azure-pipelines/ut-neural-solution.yaml
deleted file mode 100644
index df717899b57..00000000000
--- a/.azure-pipelines/ut-neural-solution.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-trigger: none
-
-pr:
- autoCancel: true
- drafts: false
- branches:
- include:
- - master
- paths:
- include:
- - neural_solution
- - setup.py
- - .azure-pipelines/ut-neural-solution.yaml
-
-pool: ICX-16C
-
-variables:
- UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
-
-stages:
- - stage:
- displayName: Unit Test for Neural Solution
- jobs:
- - job:
- steps:
- - template: template/ut-template.yml
- parameters:
- dockerConfigName: 'commonDockerConfig'
- utScriptFileName: 'run_neural_solution'
- uploadPath: $(UPLOAD_PATH)
- utArtifact: 'ut_neural-solution'
- utTestMode: "no-coverage"
- utContainerName: "utTest-nSolution"
diff --git a/.coverage b/.coverage
deleted file mode 100644
index 02b5b52790b..00000000000
Binary files a/.coverage and /dev/null differ
diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml
index 697e70799c4..9f566749da0 100644
--- a/.github/checkgroup.yml
+++ b/.github/checkgroup.yml
@@ -11,29 +11,6 @@ subprojects:
- "Code-Scan"
- "Code-Scan (Bandit Code Scan Bandit)"
- "Code-Scan (DocStyle Code Scan DocStyle)"
- - "Code-Scan (Pylint Code Scan Pylint)"
-
- - id: "Code Scan Neural-Insights Tests workflow"
- paths:
- - "neural_insights/**"
- - "setup.py"
- - ".azure-pipelines/code-scan-neural-insights.yml"
- checks:
- - "Code-Scan-Neural-Insights"
- - "Code-Scan-Neural-Insights (Bandit Code Scan Bandit)"
- - "Code-Scan-Neural-Insights (DocStyle Code Scan DocStyle)"
- - "Code-Scan-Neural-Insights (Pylint Code Scan Pylint)"
-
- - id: "Code Scan Neural-Solution Tests workflow"
- paths:
- - "neural_solution/**"
- - "setup.py"
- - ".azure-pipelines/code-scan-neural-solution.yml"
- checks:
- - "Code-Scan-Neural-Solution"
- - "Code-Scan-Neural-Solution (Bandit Code Scan Bandit)"
- - "Code-Scan-Neural-Solution (DocStyle Code Scan DocStyle)"
- - "Code-Scan-Neural-Solution (Pylint Code Scan Pylint)"
- id: "Model Tests workflow"
paths:
@@ -51,13 +28,8 @@ subprojects:
- "Model-Test"
- "Model-Test (Generate Report GenerateReport)"
- "Model-Test (Run ONNX Model resnet50-v1-12)"
- - "Model-Test (Run PyTorch Model resnet18)"
- "Model-Test (Run PyTorch Model resnet18_fx)"
- - "Model-Test (Run TensorFlow Model darknet19)"
- - "Model-Test (Run TensorFlow Model inception_v1)"
- - "Model-Test (Run TensorFlow Model resnet-101)"
- "Model-Test (Run TensorFlow Model resnet50v1.5)"
- - "Model-Test (Run TensorFlow Model ssd_mobilenet_v1_ckpt)"
- "Model-Test (Run TensorFlow Model ssd_resnet50_v1)"
- id: "Model Tests 3x workflow"
@@ -82,7 +54,6 @@ subprojects:
- "setup.py"
- "requirements.txt"
- ".azure-pipelines/scripts/ut/**"
- - "!test/neural_coder/**"
- "!test/3x/**"
- "!neural_compressor/common/**"
- "!neural_compressor/torch/**"
@@ -107,28 +78,6 @@ subprojects:
- "UT-Basic (Unit Test other basic case Test other basic case)"
- "UT-Basic (Unit Test other cases baseline Test other cases baseline)"
- - id: "Unit Tests basic no coverage workflow"
- paths:
- - "neural_compressor/**"
- - "test/**"
- - "setup.py"
- - "requirements.txt"
- - ".azure-pipelines/scripts/ut/**"
- - "!test/neural_coder/**"
- - "!test/3x/**"
- - "!neural_compressor/common/**"
- - "!neural_compressor/torch/**"
- - "!neural_compressor/tensorflow/**"
- - "!neural_compressor/onnxrt/**"
- - "!.azure-pipelines/scripts/ut/3x/**"
- checks:
- - "UT-Basic-No-Coverage"
- - "UT-Basic-No-Coverage (Unit Test FWKs adaptor Test FWKs adaptor)"
- - "UT-Basic-No-Coverage (Unit Test Pruning Test PyTorch Pruning)"
- - "UT-Basic-No-Coverage (Unit Test Pruning Test TensorFlow Pruning)"
- - "UT-Basic-No-Coverage (Unit Test User facing API Test User facing API)"
- - "UT-Basic-No-Coverage (Unit Test other basic case Test other basic case)"
-
- id: "Unit Tests ITREX workflow"
paths:
- "neural_compressor/**"
@@ -142,28 +91,6 @@ subprojects:
checks:
- "UT-ITREX"
- - id: "Unit Tests Neural-Insights workflow"
- paths:
- - "neural_insights/**"
- - "setup.py"
- checks:
- - "UT-Neural-Insights"
-
- - id: "Unit Tests Neural-Solution workflow"
- paths:
- - "neural_solution/**"
- - "setup.py"
- checks:
- - "UT-Neural-Solution"
-
- - id: "Unit Tests Neural-Coder workflow"
- paths:
- - "neural_coder/**"
- - "test/neural_coder/**"
- - "setup.py"
- checks:
- - "UT-Coder"
-
- id: "Unit Tests 3x-TensorFlow workflow"
paths:
- "neural_compressor/common/**"
@@ -191,16 +118,3 @@ subprojects:
- "UT-3x-Torch (Coverage Compare CollectDatafiles)"
- "UT-3x-Torch (Unit Test 3x Torch Unit Test 3x Torch)"
- "UT-3x-Torch (Unit Test 3x Torch baseline Unit Test 3x Torch baseline)"
-
- - id: "Unit Tests 3x-ONNXRT workflow"
- paths:
- - "neural_compressor/common/**"
- - "neural_compressor/onnxrt/**"
- - "test/3x/onnxrt/**"
- - "setup.py"
- - "requirements_ort.txt"
- checks:
- - "UT-3x-ONNXRT"
- - "UT-3x-ONNXRT (Coverage Compare CollectDatafiles)"
- - "UT-3x-ONNXRT (Unit Test 3x ONNXRT Unit Test 3x ONNXRT)"
- - "UT-3x-ONNXRT (Unit Test 3x ONNXRT baseline Unit Test 3x ONNXRT baseline)"
diff --git a/.github/workflows/Scanner_Bdba.yml b/.github/workflows/Scanner_Bdba.yml
index 2351406c9a7..4bef08dfc88 100644
--- a/.github/workflows/Scanner_Bdba.yml
+++ b/.github/workflows/Scanner_Bdba.yml
@@ -1,4 +1,5 @@
name: Scanner BDBA
+permissions: read-all
on:
workflow_dispatch:
diff --git a/.github/workflows/Scanner_Coverity.yml b/.github/workflows/Scanner_Coverity.yml
index 50f44da4bf5..a95489e5442 100644
--- a/.github/workflows/Scanner_Coverity.yml
+++ b/.github/workflows/Scanner_Coverity.yml
@@ -1,4 +1,5 @@
name: Scanner Coverity
+permissions: read-all
on:
workflow_dispatch:
diff --git a/.github/workflows/Scanner_McAfee.yml b/.github/workflows/Scanner_McAfee.yml
index 3d449ddaccf..30b882bcf47 100644
--- a/.github/workflows/Scanner_McAfee.yml
+++ b/.github/workflows/Scanner_McAfee.yml
@@ -1,4 +1,5 @@
name: Virus Scan
+permissions: read-all
on:
workflow_dispatch:
diff --git a/.github/workflows/probot.yml b/.github/workflows/probot.yml
index 290af2a86a7..fc74b74134d 100644
--- a/.github/workflows/probot.yml
+++ b/.github/workflows/probot.yml
@@ -1,4 +1,5 @@
name: Probot
+permissions: read-all
on:
pull_request:
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index ec06b4bada1..f71b29376bd 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -1,4 +1,5 @@
name: Publish
+permissions: {}
on:
push:
@@ -7,9 +8,10 @@ on:
jobs:
build:
-
runs-on: ubuntu-latest
-
+ permissions:
+ pull-requests: write
+ contents: write
steps:
- uses: actions/checkout@v3
- name: Build Online Document
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 19e48389c04..d93d64aba33 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,16 +2,6 @@ ci:
autofix_prs: true
autoupdate_schedule: quarterly
-exclude: |
- (?x)^(
- conda_meta/.+|
- neural_insights/gui.+|
- neural_insights/test.+|
- neural_solution/frontend/gRPC/proto/neural_solution_pb2.py|
- neural_coder/extensions/.+|
- neural_coder/examples/.+
- )$
-
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
@@ -60,13 +50,7 @@ repos:
- id: insert-license
files: |
(?x)^(
- neural_solution/.*(py|yaml|yml|sh)|
- neural_compressor/.*(py|yaml|yml|sh)|
- neural_insights/.*(py|yaml|yml|sh)
- )$
- exclude: |
- (?x)^(
- neural_solution/test/.*
+ neural_compressor/.*(py|yaml|yml|sh)
)$
args:
[
@@ -144,7 +128,8 @@ repos:
examples/.*(txt|patch)|
examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json|
examples/notebook/dynas/ResNet50_Quantiation_Search_Supernet_NAS.ipynb|
- examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb
+ examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb|
+ neural_compressor/torch/algorithms/fp8_quant/internal/diffusion_evaluation/SR_evaluation/imagenet1000_clsidx_to_labels.txt
)$
- repo: https://github.com/astral-sh/ruff-pre-commit
diff --git a/README.md b/README.md
index 608080432bd..f4694e991e9 100644
--- a/README.md
+++ b/README.md
@@ -2,114 +2,119 @@
Intel® Neural Compressor
===========================
- An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, ONNX Runtime, and MXNet)
+ An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)
[![python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/intel/neural-compressor)
-[![version](https://img.shields.io/badge/release-2.5-green)](https://github.com/intel/neural-compressor/releases)
+[![version](https://img.shields.io/badge/release-3.0-green)](https://github.com/intel/neural-compressor/releases)
[![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/neural-compressor/blob/master/LICENSE)
[![coverage](https://img.shields.io/badge/coverage-85%25-green)](https://github.com/intel/neural-compressor)
[![Downloads](https://static.pepy.tech/personalized-badge/neural-compressor?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/neural-compressor)
-[Architecture](./docs/source/design.md#architecture) | [Workflow](./docs/source/design.md#workflow) | [LLMs Recipes](./docs/source/llm_recipes.md) | [Results](./docs/source/validated_model_list.md) | [Documentations](https://intel.github.io/neural-compressor)
+[Architecture](./docs/source/3x/design.md#architecture) | [Workflow](./docs/source/3x/design.md#workflows) | [LLMs Recipes](./docs/source/llm_recipes.md) | [Results](./docs/source/validated_model_list.md) | [Documentations](https://intel.github.io/neural-compressor)
---
-Intel® Neural Compressor aims to provide popular model compression techniques such as quantization, pruning (sparsity), distillation, and neural architecture search on mainstream frameworks such as [TensorFlow](https://www.tensorflow.org/), [PyTorch](https://pytorch.org/), [ONNX Runtime](https://onnxruntime.ai/), and [MXNet](https://mxnet.apache.org/),
+Intel® Neural Compressor aims to provide popular model compression techniques such as quantization, pruning (sparsity), distillation, and neural architecture search on mainstream frameworks such as [TensorFlow](https://www.tensorflow.org/), [PyTorch](https://pytorch.org/), and [ONNX Runtime](https://onnxruntime.ai/),
as well as Intel extensions such as [Intel Extension for TensorFlow](https://github.com/intel/intel-extension-for-tensorflow) and [Intel Extension for PyTorch](https://github.com/intel/intel-extension-for-pytorch).
In particular, the tool provides the key features, typical examples, and open collaborations as below:
-* Support a wide range of Intel hardware such as [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon/scalable.html), [Intel Xeon CPU Max Series](https://www.intel.com/content/www/us/en/products/details/processors/xeon/max-series.html), [Intel Data Center GPU Flex Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/flex-series.html), and [Intel Data Center GPU Max Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/max-series.html) with extensive testing; support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testing
+* Support a wide range of Intel hardware such as [Intel Gaudi Al Accelerators](https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi-overview.html), [Intel Core Ultra Processors](https://www.intel.com/content/www/us/en/products/details/processors/core-ultra.html), [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon/scalable.html), [Intel Xeon CPU Max Series](https://www.intel.com/content/www/us/en/products/details/processors/xeon/max-series.html), [Intel Data Center GPU Flex Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/flex-series.html), and [Intel Data Center GPU Max Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/max-series.html) with extensive testing;
+support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testing; support NVidia GPU for some WOQ algorithms like AutoRound and HQQ.
-* Validate popular LLMs such as [LLama2](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Falcon](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [GPT-J](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Bloom](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [OPT](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), and more than 10,000 broad models such as [Stable Diffusion](/examples/pytorch/nlp/huggingface_models/text-to-image/quantization), [BERT-Large](/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx), and [ResNet50](/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx) from popular model hubs such as [Hugging Face](https://huggingface.co/), [Torch Vision](https://pytorch.org/vision/stable/index.html), and [ONNX Model Zoo](https://github.com/onnx/models#models), by leveraging zero-code optimization solution [Neural Coder](/neural_coder#what-do-we-offer) and automatic [accuracy-driven](/docs/source/design.md#workflow) quantization strategies
+* Validate popular LLMs such as [LLama2](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Falcon](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [GPT-J](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Bloom](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [OPT](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), and more than 10,000 broad models such as [Stable Diffusion](/examples/pytorch/nlp/huggingface_models/text-to-image/quantization), [BERT-Large](/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx), and [ResNet50](/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx) from popular model hubs such as [Hugging Face](https://huggingface.co/), [Torch Vision](https://pytorch.org/vision/stable/index.html), and [ONNX Model Zoo](https://github.com/onnx/models#models), with automatic [accuracy-driven](/docs/source/design.md#workflow) quantization strategies
* Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)
## What's New
-* [2024/03] A new SOTA approach [AutoRound](https://github.com/intel/auto-round) Weight-Only Quantization on [Intel Gaudi2 AI accelerator](https://habana.ai/products/gaudi2/) is available for LLMs.
+* [2024/07] From 3.0 release, framework extension API is recommended to be used for quantization.
+* [2024/07] Performance optimizations and usability improvements on [client-side](./docs/source/3x/client_quant.md).
## Installation
+### Install Framework
+#### Install torch for CPU
+```Shell
+pip install torch --index-url https://download.pytorch.org/whl/cpu
+```
+#### Use Docker Image with torch installed for HPU
+https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click
+
+> **Note**:
+> There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
+
+#### Install torch/intel_extension_for_pytorch for Intel GPU
+https://intel.github.io/intel-extension-for-pytorch/index.html#installation
+
+#### Install torch for other platform
+https://pytorch.org/get-started/locally
+
+#### Install tensorflow
+```Shell
+pip install tensorflow
+```
### Install from pypi
```Shell
-pip install neural-compressor
+# Install 2.X API + Framework extension API + PyTorch dependency
+pip install neural-compressor[pt]
+# Install 2.X API + Framework extension API + TensorFlow dependency
+pip install neural-compressor[tf]
```
-> **Note**:
-> Further installation methods can be found under [Installation Guide](https://github.com/intel/neural-compressor/blob/master/docs/source/installation_guide.md). check out our [FAQ](https://github.com/intel/neural-compressor/blob/master/docs/source/faq.md) for more details.
+> **Note**:
+> Further installation methods can be found under [Installation Guide](./docs/source/installation_guide.md). check out our [FAQ](./docs/source/faq.md) for more details.
## Getting Started
-Setting up the environment:
+Setting up the environment:
```bash
pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
```
After successfully installing these packages, try your first quantization program.
-### Weight-Only Quantization (LLMs)
-Following example code demonstrates Weight-Only Quantization on LLMs, it supports Intel CPU, Intel Gauid2 AI Accelerator, Nvidia GPU, best device will be selected automatically.
+### [FP8 Quantization](./examples/3.x_api/pytorch/cv/fp8_quant/)
+Following example code demonstrates FP8 Quantization, it is supported by Intel Gaudi2 AI Accelerator.
-To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
+To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
```bash
-docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
-
-# Check the container ID
-docker ps
-
-# Login into container
-docker exec -it
bash
-
-# Install the optimum-habana
-pip install --upgrade-strategy eager optimum[habana]
-
-# Install INC/auto_round
-pip install neural-compressor auto_round
+# Run a container with an interactive shell
+docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest
```
Run the example:
```python
-from transformers import AutoModel, AutoTokenizer
-
-from neural_compressor.config import PostTrainingQuantConfig
-from neural_compressor.quantization import fit
-from neural_compressor.adaptor.torch_utils.auto_round import get_dataloader
-
-model_name = "EleutherAI/gpt-neo-125m"
-float_model = AutoModel.from_pretrained(model_name)
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-dataloader = get_dataloader(tokenizer, seqlen=2048)
-
-woq_conf = PostTrainingQuantConfig(
- approach="weight_only",
- op_type_dict={
- ".*": { # match all ops
- "weight": {
- "dtype": "int",
- "bits": 4,
- "algorithm": "AUTOROUND",
- },
- }
- },
+from neural_compressor.torch.quantization import (
+ FP8Config,
+ prepare,
+ convert,
)
-quantized_model = fit(model=float_model, conf=woq_conf, calib_dataloader=dataloader)
+import torchvision.models as models
+
+model = models.resnet18()
+qconfig = FP8Config(fp8_config="E4M3")
+model = prepare(model, qconfig)
+# customer defined calibration
+calib_func(model)
+model = convert(model)
```
-**Note:**
-To try INT4 model inference, please directly use [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers), which leverages Intel Neural Compressor for model quantization.
+### Weight-Only Large Language Model Loading (LLMs)
-### Static Quantization (Non-LLMs)
+Following example code demonstrates weight-only large language model loading on Intel Gaudi2 AI Accelerator.
```python
-from torchvision import models
+from neural_compressor.torch.quantization import load
+
+model_name = "TheBloke/Llama-2-7B-GPTQ"
+model = load(
+ model_name_or_path=model_name,
+ format="huggingface",
+ device="hpu",
+ torch_dtype=torch.bfloat16,
+)
+```
-from neural_compressor.config import PostTrainingQuantConfig
-from neural_compressor.data import DataLoader, Datasets
-from neural_compressor.quantization import fit
+**Note:**
-float_model = models.resnet18()
-dataset = Datasets("pytorch")["dummy"](shape=(1, 3, 224, 224))
-calib_dataloader = DataLoader(framework="pytorch", dataset=dataset)
-static_quant_conf = PostTrainingQuantConfig()
-quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloader=calib_dataloader)
-```
+Intel Neural Compressor will convert the model format from auto-gptq to hpu format on the first load and save hpu_model.safetensors to the local cache directory for the next load. So it may take a while to load for the first time.
## Documentation
@@ -121,88 +126,69 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade
- Architecture
- Workflow
- APIs
- LLMs Recipes
- Examples
+ Architecture
+ Workflow
+ APIs
+ LLMs Recipes
+ Examples
- Python-based APIs
+ PyTorch Extension APIs
- Quantization
- Advanced Mixed Precision
- Pruning (Sparsity)
- Distillation
-
-
- Orchestration
- Benchmarking
- Distributed Compression
- Model Export
-
-
-
-
- Neural Coder (Zero-code Optimization)
+ Overview
+ Dynamic Quantization
+ Static Quantization
+ Smooth Quantization
-
-
- Launcher
- JupyterLab Extension
- Visual Studio Code Extension
- Supported Matrix
+ Weight-Only Quantization
+ FP8 Quantization
+ MX Quantization
+ Mixed Precision
- Advanced Topics
+ Tensorflow Extension APIs
- Adaptor
- Strategy
- Distillation for Quantization
- SmoothQuant
-
-
- Weight-Only Quantization (INT8/INT4/FP4/NF4)
- FP8 Quantization
- Layer-Wise Quantization
+ Overview
+ Static Quantization
+ Smooth Quantization
- Innovations for Productivity
+ Other Modules
- Neural Insights
- Neural Solution
+ Auto Tune
+ Benchmark
-> **Note**:
-> Further documentations can be found at [User Guide](https://github.com/intel/neural-compressor/blob/master/docs/source/user_guide.md).
+> **Note**:
+> From 3.0 release, we recommend to use 3.X API. Compression techniques during training such as QAT, Pruning, Distillation only available in [2.X API](https://github.com/intel/neural-compressor/blob/master/docs/source/2x_user_guide.md) currently.
## Selected Publications/Events
-* Blog by Intel: [Neural Compressor: Boosting AI Model Efficiency](https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Neural-Compressor-Boosting-AI-Model-Efficiency/post/1604740) (June 2024)
+* Blog by Intel: [Neural Compressor: Boosting AI Model Efficiency](https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Neural-Compressor-Boosting-AI-Model-Efficiency/post/1604740) (June 2024)
* Blog by Intel: [Optimization of Intel AI Solutions for Alibaba Cloud’s Qwen2 Large Language Models](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-ai-solutions-accelerate-alibaba-qwen2-llms.html) (June 2024)
* Blog by Intel: [Accelerate Meta* Llama 3 with Intel AI Solutions](https://www.intel.com/content/www/us/en/developer/articles/technical/accelerate-meta-llama3-with-intel-ai-solutions.html) (Apr 2024)
* EMNLP'2023 (Under Review): [TEQ: Trainable Equivalent Transformation for Quantization of LLMs](https://openreview.net/forum?id=iaI8xEINAf&referrer=%5BAuthor%20Console%5D) (Sep 2023)
* arXiv: [Efficient Post-training Quantization with FP8 Formats](https://arxiv.org/abs/2309.14592) (Sep 2023)
* arXiv: [Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs](https://arxiv.org/abs/2309.05516) (Sep 2023)
-> **Note**:
+> **Note**:
> View [Full Publication List](https://github.com/intel/neural-compressor/blob/master/docs/source/publication_list.md).
## Additional Content
@@ -212,8 +198,8 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade
* [Legal Information](./docs/source/legal_information.md)
* [Security Policy](SECURITY.md)
-## Communication
+## Communication
- [GitHub Issues](https://github.com/intel/neural-compressor/issues): mainly for bug reports, new feature requests, question asking, etc.
-- [Email](mailto:inc.maintainers@intel.com): welcome to raise any interesting research ideas on model compression techniques by email for collaborations.
+- [Email](mailto:inc.maintainers@intel.com): welcome to raise any interesting research ideas on model compression techniques by email for collaborations.
- [Discord Channel](https://discord.com/invite/Wxk3J3ZJkU): join the discord channel for more flexible technical discussion.
- [WeChat group](/docs/source/imgs/wechat_group.jpg): scan the QA code to join the technical discussion.
diff --git a/conda_meta/basic/meta.yaml b/conda_meta/basic/meta.yaml
deleted file mode 100644
index c894131132e..00000000000
--- a/conda_meta/basic/meta.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-{% set version = "2.6" %}
-{% set buildnumber = 0 %}
-package:
- name: neural-compressor
- version: {{version}}
-build:
- script_env:
- - NC_WHL
- number: {{buildnumber}}
- noarch: python
- script: pip install --no-deps {{NC_WHL}}
-requirements:
- build:
- - python
- - pip
- run:
- - python
- - pip
- - numpy
- - pyyaml
- - scikit-learn
- - schema
- - py-cpuinfo
- - pandas
- - pycocotools
- - opencv-python-headless
- - psutil
- - Pillow
- - requests
- - prettytable
- - packaging
- - deprecated
-test:
- imports:
- - neural_compressor
-about:
- home: https://github.com/intel/neural-compressor
- license: Apache 2.0
- license_family: Apache
- license_file: ../../LICENSE
- description: '
- LEGAL NOTICE: Use of this software package is subject to the software license agreement (as set forth above, in the license section of the installed Conda package and/or the README file) and all notices, disclaimers or license terms for third party or open source software included in or with the software.
-
- EULA: Apache 2.0
- Third Party Programs: https://github.com/intel/neural-compressor/blob/master/third-party-programs.txt
-
- Intel® Neural Compressor.
- '
diff --git a/conda_meta/neural_insights/meta.yaml b/conda_meta/neural_insights/meta.yaml
deleted file mode 100644
index add638448ad..00000000000
--- a/conda_meta/neural_insights/meta.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-{% set version = "2.6" %}
-{% set buildnumber = 0 %}
-package:
- name: neural-insights
- version: {{version}}
-build:
- script_env:
- - NC_WHL
- number: {{buildnumber}}
- noarch: python
- script: pip install --no-deps {{NC_WHL}}
- entry_points:
- - neural_insights = neural_insights.bin.neural_insights:main
-requirements:
- build:
- - python
- - pip
- run:
- - python
- - pip
- - neural-compressor>=2.2
- - Flask
- - Flask-Cors
- - Flask-SocketIO
- - gevent
- - gevent-websocket
- - pywin32 # [win]
-test:
- imports:
- - neural_insights
-about:
- home: https://github.com/intel/neural-compressor
- license: Apache 2.0
- license_family: Apache
- license_file: ../../LICENSE
- description: '
- LEGAL NOTICE: Use of this software package is subject to the software license agreement (as set forth above, in the license section of the installed Conda package and/or the README file) and all notices, disclaimers or license terms for third party or open source software included in or with the software.
-
- EULA: Apache 2.0
- Third Party Programs: https://github.com/intel/neural-compressor/blob/master/third-party-programs.txt
-
- Intel® Neural Compressor.
- '
diff --git a/conda_meta/neural_solution/meta.yaml b/conda_meta/neural_solution/meta.yaml
deleted file mode 100644
index cef0297fb5d..00000000000
--- a/conda_meta/neural_solution/meta.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-{% set version = "2.6" %}
-{% set buildnumber = 0 %}
-package:
- name: neural-solution
- version: {{version}}
-build:
- script_env:
- - NC_WHL
- number: {{buildnumber}}
- noarch: python
- script: pip install --no-deps {{NC_WHL}}
- entry_points:
- - neural_solution = neural_solution.bin.neural_solution:main
-requirements:
- build:
- - python
- - pip
- run:
- - python
- - pip
- - neural-compressor>=2.2
- - pydantic
- - fastapi
- - uvicorn[standard]
- - watchdog
- - protobuf
- - grpcio
- - mpi4py
-test:
- imports:
- - neural_solution
-about:
- home: https://github.com/intel/neural-compressor
- license: Apache 2.0
- license_family: Apache
- license_file: ../../LICENSE
- description: '
- LEGAL NOTICE: Use of this software package is subject to the software license agreement (as set forth above, in the license section of the installed Conda package and/or the README file) and all notices, disclaimers or license terms for third party or open source software included in or with the software.
-
- EULA: Apache 2.0
- Third Party Programs: https://github.com/intel/neural-compressor/blob/master/third-party-programs.txt
-
- Intel® Neural Compressor.
- '
diff --git a/docs/3x/PT_FP8Quant.md b/docs/3x/PT_FP8Quant.md
new file mode 100644
index 00000000000..a0ed3352e8e
--- /dev/null
+++ b/docs/3x/PT_FP8Quant.md
@@ -0,0 +1,113 @@
+FP8 Quantization
+=======
+
+1. [Introduction](#introduction)
+2. [Supported Parameters](#supported-parameters)
+3. [Get Start with FP8 Quantization](#get-start-with-fp8-quantization)
+4. [Examples](#examples)
+
+## Introduction
+
+Float point 8 (FP8) is a promising data type for low precision quantization which provides a data distribution that is completely different from INT8 and it's shown as below.
+
+
+
+
+
+Intel Gaudi2, also known as HPU, provides this data type capability for low precision quantization, which includes `E4M3` and `E5M2`. For more information about these two data type, please refer to [link](https://arxiv.org/abs/2209.05433).
+
+Intel Neural Compressor provides general quantization APIs to leverage HPU FP8 capability. with simple with lower memory usage and lower compute cost, 8 bit model
+
+## Supported Parameters
+
+
+
+
+ Attribute
+ Description
+ Values
+
+
+
+ fp8_config
+ The target data type of FP8 quantization.
+ E4M3 (default) - As Fig. 2 E5M2 - As Fig. 1.
+
+
+ hp_dtype
+ The high precision data type of non-FP8 operators.
+ bf16 (default) - torch.bfloat16 fp16 - torch.float16. fp32 - torch.float32.
+
+
+ observer
+ The observer to measure the statistics.
+ maxabs (default), saves all tensors to files.
+
+
+ allowlist
+ List of nn.Module names or types to quantize. When setting an empty list, all the supported modules will be quantized by default. See Supported Modules. Not setting the list at all is not recommended as it will set the allowlist to these modules only: torch.nn.Linear, torch.nn.Conv2d, and BMM.
+ Default = {'names': [], 'types': FP8_WHITE_LIST}
+
+
+ blocklist
+ List of nn.Module names or types not to quantize. Defaults to empty list, so you may omit it from the config file.
+ Default = {'names': [], 'types': ()}
+
+
+ mode
+ The mode, measure or quantize, to run HQT with.
+ MEASURE - Measure statistics of all modules and emit the results to dump_stats_path. QUANTIZE - Quantize and run the model according to the provided measurements. AUTO (default) - Select from [MEASURE, QUANTIZE] automatically.
+
+
+ dump_stats_path
+ The path to save and load the measurements. The path is created up until the level before last "/". The string after the last / will be used as prefix to all the measurement files that will be created.
+ Default = "./hqt_output/measure"
+
+
+ scale_method
+ The method for calculating the scale from the measurement.
+ - without_scale - Convert to/from FP8 without scaling. - unit_scale - Always use scale of 1. - maxabs_hw (default) - Scale is calculated to stretch/compress the maxabs measurement to the full-scale of FP8 and then aligned to the corresponding HW accelerated scale. - maxabs_pow2 - Scale is calculated to stretch/compress the maxabs measurement to the full-scale of FP8 and then rounded to the power of 2. - maxabs_hw_opt_weight - Scale of model params (weights) is chosen as the scale that provides minimal mean-square-error between quantized and non-quantized weights, from all possible HW accelerated scales. Scale of activations is calculated the same as maxabs_hw. - act_maxabs_pow2_weights_pcs_opt_pow2 - Scale of model params (weights) is calculated per-channel of the params tensor. The scale per-channel is calculated the same as maxabs_hw_opt_weight. Scale of activations is calculated the same as maxabs_pow2. - act_maxabs_hw_weights_pcs_maxabs_pow2 - Scale of model params (weights) is calculated per-channel of the params tensor. The scale per-channel is calculated the same as maxabs_pow2. Scale of activations is calculated the same as maxabs_hw.
+
+
+ measure_exclude
+ If this attribute is not defined, the default is OUTPUT. Since most models do not require measuring output tensors, you can exclude it to speed up the measurement process.
+ NONE - All tensors are measured. OUTPUT (default) - Excludes measurement of output tensors.
+
+
+
+## Get Start with FP8 Quantization
+
+### Demo Usage
+
+```python
+from neural_compressor.torch.quantization import (
+ FP8Config,
+ prepare,
+ convert,
+)
+import torchvision.models as models
+
+model = models.resnet18()
+qconfig = FP8Config(fp8_config="E4M3")
+model = prepare(model, qconfig)
+# customer defined calibration
+calib_func(model)
+model = convert(model)
+```
+
+## Examples
+
+| Task | Example |
+|----------------------|---------|
+| Computer Vision (CV) | [Link](../../examples/3.x_api/pytorch/cv/fp8_quant/) |
+| Large Language Model (LLM) | [Link](https://github.com/HabanaAI/optimum-habana-fork/tree/habana-main/examples/text-generation#running-with-fp8) |
+
+> Note: For LLM, Optimum-habana provides higher performance based on modified modeling files, so here the Link of LLM goes to Optimum-habana, which utilize Intel Neural Compressor for FP8 quantization internally.
diff --git a/docs/3x/PT_MixPrecision.md b/docs/3x/PT_MixPrecision.md
deleted file mode 100644
index c1cd198049b..00000000000
--- a/docs/3x/PT_MixPrecision.md
+++ /dev/null
@@ -1,103 +0,0 @@
-PyTorch Mixed Precision
-========================================
-
-1. [Introduction](#introduction)
-2. [Mixed Precision Support Matrix](#mixed-precision-support-matrix)
-3. [Get Started](#get-start)
-4. [Examples](#examples)
-
-## Introduction
-
-The recent growth of Deep Learning has driven the development of more complex models that require significantly more compute and memory capabilities. Several low precision numeric formats have been proposed to address the problem. Google's [bfloat16](https://cloud.google.com/tpu/docs/bfloat16) and the [FP16: IEEE](https://en.wikipedia.org/wiki/Half-precision_floating-point_format) half-precision format are two of the most widely used sixteen bit formats. [Mixed precision](https://arxiv.org/abs/1710.03740) training and inference using low precision formats have been developed to reduce compute and bandwidth requirements.
-
-The 3rd Gen Intel® Xeon® Scalable processor (codenamed Cooper Lake), featuring Intel® Deep Learning Boost, is the first general-purpose x86 CPU to support the bfloat16 format. Specifically, three new bfloat16 instructions are added as a part of the AVX512_BF16 extension within Intel Deep Learning Boost: VCVTNE2PS2BF16, VCVTNEPS2BF16, and VDPBF16PS. The first two instructions allow converting to and from bfloat16 data type, while the last one performs a dot product of bfloat16 pairs. Further details can be found in the [hardware numerics document](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-deep-learning-boost-new-instruction-bfloat16.html) published by Intel.
-
-The 4th Gen Intel® Xeon® Scalable processor supports FP16 instruction set architecture (ISA) for Intel®
-Advanced Vector Extensions 512 (Intel® AVX-512). The new ISA supports a wide range of general-purpose numeric
-operations for 16-bit half-precision IEEE-754 floating-point and complements the existing 32-bit and 64-bit floating-point instructions already available in the Intel Xeon processor based products. Further details can be found in the [hardware numerics document](https://www.intel.com/content/www/us/en/content-details/669773/intel-avx-512-fp16-instruction-set-for-intel-xeon-processor-based-products-technology-guide.html) published by Intel.
-
-
-
-
-
-## Mixed Precision Support Matrix
-
-
-
-
- Framework
- Backend
- Backend Library
- Backend Value
- Support Device(cpu as default)
- Support BF16
- Support FP16
-
-
-
-
- PyTorch
- FX
- FBGEMM
- "default"
- cpu
- ✔
- ✔
-
-
-
-
-
-### Hardware and Software requests for **BF16**
-- PyTorch
- 1. Hardware: CPU supports `avx512_bf16` instruction set.
- 2. Software: torch >= [1.11.0](https://download.pytorch.org/whl/torch_stable.html).
-
-
-### Hardware and Software requests for **FP16**
-- PyTorch
- 1. Hardware: CPU supports `avx512_fp16` instruction set.
- 2. Software: torch >= [1.11.0](https://download.pytorch.org/whl/torch_stable.html).
-
-
-### Accuracy-driven mixed precision
-BF16/FP16 conversion may lead to accuracy drop. Intel® Neural Compressor provides an accuracy-driven tuning function to reduce accuracy loss,
-which could fallback converted ops to FP32, if set in config, to get better accuracy. To enable this function, users only to provide
-`eval_fn` and `eval_args` for `autotune`.
-To be noticed, IPEX backend doesn't support accuracy-driven mixed precision.
-
-## Get Started with autotune API
-
-To get a bf16/fp16 model, users can use the `autotune` interface with `MixPrecisionConfig` as follows.
-
-- BF16:
-
-```python
-from neural_compressor.torch.quantization import MixPrecisionConfig, TuningConfig, autotune
-
-def eval_acc_fn(model):
- ......
- return acc
-
-# modules might be fallback to fp32 to get better accuracy
-custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig(dtype=["bf16", "fp32"])], max_trials=3)
-best_model = autotune(model=build_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn)
-```
-
-- FP16:
-
-```python
-from neural_compressor.torch.quantization import MixPrecisionConfig, TuningConfig, autotune
-
-def eval_acc_fn(model):
- ......
- return acc
-
-# modules might be fallback to fp32 to get better accuracy
-custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig(dtype=["fp16", "fp32"])], max_trials=3)
-best_model = autotune(model=build_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn)
-```
-
-## Examples
-
-Example will be added later.
diff --git a/docs/build_docs/build.sh b/docs/build_docs/build.sh
index 032a15890a2..43b2a0dd467 100755
--- a/docs/build_docs/build.sh
+++ b/docs/build_docs/build.sh
@@ -81,12 +81,10 @@ fi
source env_sphinx/bin/activate
cp -rf ../docs/ ./source
-cp -rf ../neural_coder ./source/docs/source
-cp -rf ../neural_insights ./source/docs/source
-cp -rf ../neural_solution ./source/docs/source
cp -f "../README.md" "./source/docs/source/Welcome.md"
cp -f "../SECURITY.md" "./source/docs/source/SECURITY.md"
+
all_md_files=`find ./source/docs -name "*.md"`
for md_file in ${all_md_files}
do
@@ -94,18 +92,14 @@ do
done
-sed -i 's/.\/docs\/source\/_static/./g' ./source/docs/source/Welcome.md ./source/docs/source/user_guide.md
-sed -i 's/.md/.html/g; s/.\/docs\/source\//.\//g' ./source/docs/source/Welcome.md ./source/docs/source/user_guide.md
-sed -i 's/\/examples\/README.html/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/README.md/g' ./source/docs/source/user_guide.md
-sed -i 's/href=\"\/neural_coder/href=\".\/neural_coder/g' ./source/docs/source/user_guide.md
-sed -i 's/https\:\/\/intel.github.io\/neural-compressor\/lates.\/api-doc\/apis.html/https\:\/\/intel.github.io\/neural-compressor\/latest\/docs\/source\/api-doc\/apis.html/g' ./source/docs/source/Welcome.md ./source/docs/source/user_guide.md
+sed -i 's/.\/docs\/source\/_static/./g' ./source/docs/source/Welcome.md
+sed -i 's/.md/.html/g; s/.\/docs\/source\//.\//g' ./source/docs/source/Welcome.md
+#sed -i 's/\/examples\/README.html/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/README.md/g' ./source/docs/source/user_guide.md
+sed -i 's/https\:\/\/intel.github.io\/neural-compressor\/lates.\/api-doc\/apis.html/https\:\/\/intel.github.io\/neural-compressor\/latest\/docs\/source\/api-doc\/apis.html/g' ./source/docs/source/Welcome.md
+sed -i 's/\/examples\/pytorch/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/pytorch/g' ./source/docs/source/Welcome.md
sed -i 's/examples\/README.html/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/README.md/g' ./source/docs/source/Welcome.md
-sed -i 's/\"\/neural_coder\/extensions\/screenshots\/extmanager.png/\".\/neural_coder\/extensions\/screenshots\/extmanager.png/g' ./source/docs/source/get_started.md
-
-sed -i 's/\/neural_coder\/extensions\/neural_compressor_ext_lab\/README.md/.\/neural_coder\/extensions\/neural_compressor_ext_lab\/README.md/g' ./source/docs/source/get_started.md
-
sed -i 's/\/examples\/README.md/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/README.md/g' ./source/docs/source/get_started.md
sed -i 's/.\/validated_model_list.md\#/.\/validated_model_list.html\#/g' ./source/docs/source/installation_guide.md
@@ -138,8 +132,8 @@ if [[ ${UPDATE_VERSION_FOLDER} -eq 1 ]]; then
cp -r ${SRC_FOLDER}/* ${DST_FOLDER}
python update_html.py ${DST_FOLDER} ${VERSION}
cp -r ./source/docs/source/imgs ${DST_FOLDER}/docs/source
- cp -r ./source/docs/source/neural_coder/extensions/neural_compressor_ext_vscode/images ${DST_FOLDER}/docs/source/neural_coder/extensions/neural_compressor_ext_vscode
- cp -r ./source/docs/source/neural_coder/extensions/screenshots ${DST_FOLDER}/docs/source/neural_coder/extensions
+ cp -r ./source/docs/source/3x/imgs ${DST_FOLDER}/docs/source/3x
+
cp source/_static/index.html ${DST_FOLDER}
else
@@ -153,8 +147,7 @@ if [[ ${UPDATE_LATEST_FOLDER} -eq 1 ]]; then
cp -r ${SRC_FOLDER}/* ${LATEST_FOLDER}
python update_html.py ${LATEST_FOLDER} ${VERSION}
cp -r ./source/docs/source/imgs ${LATEST_FOLDER}/docs/source
- cp -r ./source/docs/source/neural_coder/extensions/neural_compressor_ext_vscode/images ${LATEST_FOLDER}/docs/source/neural_coder/extensions/neural_compressor_ext_vscode
- cp -r ./source/docs/source/neural_coder/extensions/screenshots ${LATEST_FOLDER}/docs/source/neural_coder/extensions
+ cp -r ./source/docs/source/3x/imgs ${LATEST_FOLDER}/docs/source/3x
cp source/_static/index.html ${LATEST_FOLDER}
else
echo "skip to create ${LATEST_FOLDER}"
@@ -164,7 +157,7 @@ echo "Create document is done"
if [[ ${CHECKOUT_GH_PAGES} -eq 1 ]]; then
git clone -b gh-pages --single-branch https://github.com/intel/neural-compressor.git ${RELEASE_FOLDER}
-
+
if [[ ${UPDATE_VERSION_FOLDER} -eq 1 ]]; then
python update_version.py ${ROOT_DST_FOLDER} ${VERSION}
cp -rf ${DST_FOLDER} ${RELEASE_FOLDER}
diff --git a/docs/source/2x_user_guide.md b/docs/source/2x_user_guide.md
new file mode 100644
index 00000000000..941e80d6a39
--- /dev/null
+++ b/docs/source/2x_user_guide.md
@@ -0,0 +1,76 @@
+2.X API User Guide
+===========================
+
+Intel® Neural Compressor aims to provide popular model compression techniques such as quantization, pruning (sparsity), distillation, and neural architecture search to help the user optimize their model. The below documents could help you to get familiar with concepts and modules in Intel® Neural Compressor. Learn how to utilize the APIs in Intel® Neural Compressor to conduct quantization, pruning (sparsity), distillation, and neural architecture search on mainstream frameworks.
+
+## Overview
+This part helps user to get a quick understand about design structure and workflow of 2.X Intel® Neural Compressor. We provided broad examples to help users get started.
+
+
+## Python-based APIs
+Python-based APIs contains more details about the functional APIs in Intel® Neural Compressor,
+which introduce the mechanism of each function and provides a tutorial to help the user apply in their own cases.
+Please note that we will stop to support Intel Neural Compressor 1.X API in the future.
+So we provide a comprehensive migration document in Code Migration to help the user update their code from previous 1.X version to the new 2.X version.
+In 2.X API, it's very important to create the `DataLoader` and `Metrics` for your examples, so we provide the detail introductions.
+
+
+
+## Advanced Topics
+This part provides the advanced topics that help user dive deep into Intel® Neural Compressor 2.X API.
+
diff --git a/docs/3x/PT_DynamicQuant.md b/docs/source/3x/PT_DynamicQuant.md
similarity index 100%
rename from docs/3x/PT_DynamicQuant.md
rename to docs/source/3x/PT_DynamicQuant.md
diff --git a/docs/3x/PT_MXQuant.md b/docs/source/3x/PT_MXQuant.md
similarity index 98%
rename from docs/3x/PT_MXQuant.md
rename to docs/source/3x/PT_MXQuant.md
index 1cfb17ff30b..42e12d039a6 100644
--- a/docs/3x/PT_MXQuant.md
+++ b/docs/source/3x/PT_MXQuant.md
@@ -95,7 +95,7 @@ user_model = convert(model=user_model)
## Examples
-- PyTorch [huggingface models](/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx)
+- PyTorch [huggingface models](/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant)
## Reference
diff --git a/docs/source/3x/PT_MixedPrecision.md b/docs/source/3x/PT_MixedPrecision.md
new file mode 100644
index 00000000000..3fbd1db6bbf
--- /dev/null
+++ b/docs/source/3x/PT_MixedPrecision.md
@@ -0,0 +1,111 @@
+PyTorch Mixed Precision
+========================================
+
+1. [Introduction](#introduction)
+2. [Mixed Precision Support Matrix](#mixed-precision-support-matrix)
+3. [Get Started](#get-start)
+4. [Examples](#examples)
+
+## Introduction
+
+The recent growth of Deep Learning has driven the development of more complex models that require significantly more compute and memory capabilities. Several low precision numeric formats have been proposed to address the problem.
+Google's [bfloat16](https://cloud.google.com/tpu/docs/bfloat16) and the [FP16: IEEE](https://en.wikipedia.org/wiki/Half-precision_floating-point_format) half-precision format are two of the most widely used sixteen bit formats. [Mixed precision](https://arxiv.org/abs/1710.03740) training and inference using low precision formats have been developed to reduce compute and bandwidth requirements.
+
+The 3rd Gen Intel® Xeon® Scalable processor (codenamed Cooper Lake), featuring Intel® Deep Learning Boost, is the first general-purpose x86 CPU to support the bfloat16 format. Specifically, three new bfloat16 instructions are added as a part of the AVX512_BF16 extension within Intel Deep Learning Boost: VCVTNE2PS2BF16, VCVTNEPS2BF16, and VDPBF16PS. The first two instructions allow converting to and from bfloat16 data type, while the last one performs a dot product of bfloat16 pairs.
+Further details can be found in the [Hardware Numerics Document](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-deep-learning-boost-new-instruction-bfloat16.html) published by Intel.
+
+The 4th Gen Intel® Xeon® Scalable processor supports FP16 instruction set architecture (ISA) for Intel® Advanced Vector Extensions 512 (Intel® AVX-512). The new ISA supports a wide range of general-purpose numeric operations for 16-bit half-precision IEEE-754 floating-point and complements the existing 32-bit and 64-bit floating-point instructions already available in the Intel Xeon processor based products.
+Further details can be found in the [Intel AVX512 FP16 Guide](https://www.intel.com/content/www/us/en/content-details/669773/intel-avx-512-fp16-instruction-set-for-intel-xeon-processor-based-products-technology-guide.html) published by Intel.
+
+The latest Intel Xeon processors deliver flexibility of Intel Advanced Matrix Extensions (Intel AMX) ,an accelerator that improves the performance of deep learning(DL) training and inference, making it ideal for workloads like NLP, recommender systems, and image recognition. Developers can code AI functionality to take advantage of the Intel AMX instruction set, and they can code non-AI functionality to use the processor instruction set architecture (ISA). Intel has integrated the Intel® oneAPI Deep Neural Network Library (oneDNN), its oneAPI DL engine, into Pytorch.
+Further details can be found in the [Intel AMX Document](https://www.intel.com/content/www/us/en/content-details/785250/accelerate-artificial-intelligence-ai-workloads-with-intel-advanced-matrix-extensions-intel-amx.html) published by Intel.
+
+
+
+
+
+## Mixed Precision Support Matrix
+
+
+
+
+ Framework
+ Backend
+ Backend Library
+ Backend Value
+ Support Device(cpu as default)
+ Support BF16
+ Support FP16
+
+
+
+
+ PyTorch
+ FX
+ FBGEMM
+ "default"
+ cpu
+ ✔
+ ✔
+
+
+
+
+
+### Hardware and Software requests for **BF16**
+- PyTorch
+ 1. Hardware: CPU supports `avx512_bf16` instruction set.
+ 2. Software: torch >= [1.11.0](https://download.pytorch.org/whl/torch_stable.html).
+
+
+### Hardware and Software requests for **FP16**
+- PyTorch
+ 1. Hardware: CPU supports `avx512_fp16` instruction set.
+ 2. Software: torch >= [1.11.0](https://download.pytorch.org/whl/torch_stable.html).
+> Note: To run FP16 on Intel-AMX, please set the environment variable `ONEDNN_MAX_CPU_ISA`:
+> ```export ONEDNN_MAX_CPU_ISA=AVX512_CORE_AMX_FP16```
+
+
+
+### Accuracy-driven mixed precision
+BF16/FP16 conversion may lead to accuracy drop. Intel® Neural Compressor provides an accuracy-driven tuning function to reduce accuracy loss,
+which could fallback converted ops to FP32, if set in config, to get better accuracy. To enable this function, users only to provide
+`eval_fn` and `eval_args` for `autotune`.
+To be noticed, IPEX backend doesn't support accuracy-driven mixed precision.
+
+## Get Started with autotune API
+
+To get a bf16/fp16 model, users can use the `autotune` interface with `MixedPrecisionConfig` as follows.
+
+- BF16:
+
+```python
+from neural_compressor.torch.quantization import MixedPrecisionConfig, TuningConfig, autotune
+
+def eval_acc_fn(model):
+ ......
+ return acc
+
+# modules might be fallback to fp32 to get better accuracy
+custom_tune_config = TuningConfig(config_set=[MixedPrecisionConfig(dtype=["bf16", "fp32"])], max_trials=3)
+best_model = autotune(model=build_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn)
+```
+
+- FP16:
+
+```python
+from neural_compressor.torch.quantization import MixedPrecisionConfig, TuningConfig, autotune
+
+def eval_acc_fn(model):
+ ......
+ return acc
+
+# modules might be fallback to fp32 to get better accuracy
+custom_tune_config = TuningConfig(config_set=[MixedPrecisionConfig(dtype=["fp16", "fp32"])], max_trials=3)
+best_model = autotune(model=build_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn)
+```
+
+## Examples
+
+Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/cv/mixed_precision
+) on how to quantize a model with Mixed Precision.
diff --git a/docs/3x/PT_SmoothQuant.md b/docs/source/3x/PT_SmoothQuant.md
similarity index 98%
rename from docs/3x/PT_SmoothQuant.md
rename to docs/source/3x/PT_SmoothQuant.md
index 9e4ae3eb62f..e3a7262dcde 100644
--- a/docs/3x/PT_SmoothQuant.md
+++ b/docs/source/3x/PT_SmoothQuant.md
@@ -46,7 +46,7 @@ run_fn(prepared_model)
q_model = convert(prepared_model)
```
-To get more information, please refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm).
+To get more information, please refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant).
## Validated Models
diff --git a/docs/3x/PT_StaticQuant.md b/docs/source/3x/PT_StaticQuant.md
similarity index 91%
rename from docs/3x/PT_StaticQuant.md
rename to docs/source/3x/PT_StaticQuant.md
index ec967a780d4..d687e83c1f6 100644
--- a/docs/3x/PT_StaticQuant.md
+++ b/docs/source/3x/PT_StaticQuant.md
@@ -1,6 +1,5 @@
PyTorch Static Quantization
========================================
-
1. [Introduction](#introduction)
2. [Get Started](#get-started) \
2.1 [Static Quantization with IPEX Backend](#static-quantization-with-ipex-backend) \
@@ -9,6 +8,7 @@ PyTorch Static Quantization
2.1.3 [Model Examples](#model-examples) \
2.2 [Static Quantization with PT2E Backend](#static-quantization-with-pt2e-backend) \
2.2.1 [Usage Sample with PT2E](#usage-sample-with-pt2e)
+ 2.2.2 [Model Examples with PT2E](#model-examples-with-pt2e)
## Introduction
@@ -68,7 +68,7 @@ q_model = convert(prepared_model)
#### Model Examples
-Users could refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm) on how to quantize a new model.
+Users could refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex) on how to quantize a new model.
### Static Quantization with PT2E Backend
@@ -102,3 +102,7 @@ opt_model = torch.compile(q_model)
```
> Note: The `set_local` of `StaticQuantConfig` will be supported after the torch 2.4 release.
+
+#### Model Examples with PT2E
+
+Users could refer to [cv examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/cv/static_quant) and [llm examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e) on how to quantize a new model.
diff --git a/docs/3x/PT_WeightOnlyQuant.md b/docs/source/3x/PT_WeightOnlyQuant.md
similarity index 90%
rename from docs/3x/PT_WeightOnlyQuant.md
rename to docs/source/3x/PT_WeightOnlyQuant.md
index 37cc934592a..1578b57f8c9 100644
--- a/docs/3x/PT_WeightOnlyQuant.md
+++ b/docs/source/3x/PT_WeightOnlyQuant.md
@@ -28,7 +28,6 @@ Besides, as mentioned in many papers[1][2], activation quantization is the main
Theoretically, round-to-nearest (RTN) is the most straightforward way to quantize weight using scale maps. However, when the number of bits is small (e.g. 3), the MSE loss is larger than expected. A group size is introduced to reduce elements using the same scale to improve accuracy.
-
## Supported Matrix
| Algorithms/Backend | PyTorch eager mode |
@@ -58,12 +57,14 @@ Theoretically, round-to-nearest (RTN) is the most straightforward way to quantiz
WeightOnlyQuant quantization for PyTorch is using prepare and convert [APIs](./PyTorch.md#quantization-apis).
#### Common arguments
+
| Config | Capability |
|---|---|
| dtype (str)| ['int', 'nf4', 'fp4'] |
| bits (int)| [1, ..., 8] |
| group_size (int)| [-1, 1, ..., $C_{in}$] |
| use_sym (bool)| [True, False] |
+| quant_lm_head (bool)| [False, True] |
| use_double_quant (bool) | [True, False] |
| double_quant_dtype (str) | ['int'] |
| double_quant_bits (int) | [1, ..., bits] |
@@ -71,12 +72,14 @@ WeightOnlyQuant quantization for PyTorch is using prepare and convert [APIs](./P
| double_quant_group_size (int) | [-1, 1, ..., $C_{in}$] |
Notes:
+
- *group_size = -1* refers to **per output channel quantization**. Taking a linear layer (input channel = $C_{in}$, output channel = $C_{out}$) for instance, when *group size = -1*, quantization will calculate total $C_{out}$ quantization parameters. Otherwise, when *group_size = gs* quantization parameters are calculate with every $gs$ elements along with the input channel, leading to total $C_{out} \times (C_{in} / gs)$ quantization parameters.
- 4-bit NormalFloat(NF4) is proposed in QLoRA[7]. 'fp4' includes [fp4_e2m1](../../neural_compressor/adaptor/torch_utils/weight_only.py#L37) and [fp4_e2m1_bnb](https://github.com/TimDettmers/bitsandbytes/blob/18e827d666fa2b70a12d539ccedc17aa51b2c97c/bitsandbytes/functional.py#L735). By default, fp4 refers to fp4_e2m1_bnb.
-- Only RTN and GPTQ support double quant.
-
+- *quant_lm_head* defaults to False. This means that, except for transformer blocks, the last layer in transformer models will not be quantized by default. The last layer may be named "lm_head", "output_layer" or "embed_out".
+- Only RTN and GPTQ support double quant.
#### RTN
+
| rtn_args | comments | default value |
|----------|-------------|-------------------------------------------------------------------|
| group_dim (int) | Dimension for grouping | 1 |
@@ -86,6 +89,7 @@ Notes:
| model_path (str) | Model path that is used to load state_dict per layer | |
> **Notes:** `model_path` is only used when use_layer_wise=True. `layer-wise` is stay-tuned.
+
``` python
# Quantization code
from neural_compressor.torch.quantization import prepare, convert, RTNConfig
@@ -96,6 +100,7 @@ model = convert(model)
```
#### GPTQ
+
| gptq_args | comments | default value |
|----------|-------------|-------------------------------------------------------------------|
| use_mse_search (bool) | Enables mean squared error (MSE) search | False
@@ -107,6 +112,7 @@ model = convert(model)
| block_size (int) | Execute GPTQ quantization per block, block shape = [C_out, block_size] | 128 |
| static_groups (bool) | Whether to calculate group wise quantization parameters in advance. This option mitigate actorder's extra computational requirements. | False. |
> **Note:** `model_path` is only used when use_layer_wise=True. `layer-wise` is stay-tuned.
+
``` python
# Quantization code
from neural_compressor.torch.quantization import prepare, convert, GPTQConfig
@@ -118,6 +124,7 @@ model = convert(model)
```
#### AutoRound
+
| autoround_args | comments | default value |
|----------|-------------|-------------------------------------------------------------------|
| enable_full_range (bool) | Whether to enable full range quantization | False
@@ -138,6 +145,7 @@ model = convert(model)
| not_use_best_mse (bool) | Whether to use mean squared error | False |
| dynamic_max_gap (int) | The dynamic maximum gap | -1 |
| scale_dtype (str) | The data type of quantization scale to be used, different kernels have different choices | "float16" |
+
``` python
# Quantization code
from neural_compressor.torch.quantization import prepare, convert, AutoRoundConfig
@@ -149,6 +157,7 @@ model = convert(model)
```
#### AWQ
+
| awq_args | comments | default value |
|----------|-------------|-------------------------------------------------------------------|
| group_dim (int) | Dimension for grouping | 1 |
@@ -159,6 +168,7 @@ model = convert(model)
| use_auto_clip (bool) | Enables clip range search | True |
| folding(bool) | Allow insert mul before linear when the scale cannot be absorbed by last layer | False. |
> **Notes:** `layer-wise` is stay-tuned.
+
``` python
# Quantization code
from neural_compressor.torch.quantization import prepare, convert, AWQConfig
@@ -170,6 +180,7 @@ model = convert(model)
```
#### TEQ
+
| teq_args | comments | default value |
|----------|-------------|-------------------------------------------------------------------|
| group_dim (int) | Dimension for grouping | 1 |
@@ -179,6 +190,7 @@ model = convert(model)
| use_double_quant (bool) | Enables double quantization | False |
| folding(bool) | Allow insert mul before linear when the scale cannot be absorbed by last layer | False |
> **Notes:** `layer-wise` is stay-tuned.
+
``` python
# Quantization code
from neural_compressor.torch.quantization import prepare, convert, TEQConfig
@@ -190,12 +202,13 @@ model = convert(model)
```
#### HQQ
+
| hqq_args | comments | default value |
|----------|-------------|-------------------------------------------------------------------|
| quant_zero (bool) | Whether to quantize zero point | True |
| quant_scale: (bool) | Whether to quantize scale: point | False |
| scale_quant_group_size (int) | The group size for quantizing scale | 128 |
-| skip_lm_head (bool) | Whether to skip for quantizing lm_head | True |
+
``` python
# Quantization code
from neural_compressor.torch.quantization import prepare, convert, HQQConfig
@@ -205,10 +218,13 @@ model = prepare(model, quant_config)
run_fn(model) # calibration
model = convert(model)
```
+
### Specify Quantization Rules
+
Intel(R) Neural Compressor support specify quantization rules by operator name or operator type. Users can set `local` in dict or use `set_local` method of config class to achieve the above purpose.
1. Example of setting `local` from a dict
+
```python
quant_config = {
"rtn": {
@@ -226,7 +242,9 @@ quant_config = {
}
}
```
+
2. Example of using `set_local`
+
```python
quant_config = RTNConfig()
lm_head_config = RTNConfig(dtype="fp32")
@@ -234,7 +252,9 @@ quant_config.set_local("lm_head", lm_head_config)
```
### Saving and Loading
+
The saved_results folder contains two files: quantized_model.pt and qconfig.json, and the generated model is a quantized model. The quantitative model will include WeightOnlyLinear. To support low memory inference, Intel(R) Neural Compressor implemented WeightOnlyLinear, a torch.nn.Module, to compress the fake quantized fp32 model. Since torch does not provide flexible data type storage, WeightOnlyLinear combines low bits data into a long date type, such as torch.int8 and torch.int32. Low bits data includes weights and zero points. When using WeightOnlyLinear for inference, it will restore the compressed data to float32 and run torch linear function.
+
```python
# Quantization code
from neural_compressor.torch.quantization import prepare, convert, RTNConfig
@@ -255,10 +275,38 @@ loaded_model = load(
) # Please note that the original_model parameter passes the original model.
```
+## Layer Wise Quantization
+
+As the size of LLMs continues to grow, loading the entire model into a single GPU card or the RAM of a client machine becomes impractical. To address this challenge, we introduce Layer-wise Quantization (LWQ), a method that quantizes LLMs layer by layer or block by block. This approach significantly reduces memory consumption. The diagram below illustrates the LWQ process.
+
+
+
+*Figure 1: The process of layer-wise quantization for PyTorch model. The color grey means empty parameters and the color blue represents parameters need to be quantized. Every rectangle inside model represents one layer.*
+
+
+Currently, we support LWQ for `RTN`, `AutoRound`, and `GPTQ`.
+
+Here, we take the `RTN` algorithm as example to demonstrate the usage of LWQ.
+
+```python
+from neural_compressor.torch.quantization import RTNConfig, convert, prepare
+from neural_compressor.torch import load_empty_model
+
+model_state_dict_path = "/path/to/model/state/dict"
+float_model = load_empty_model(model_state_dict_path)
+quant_config = RTNConfig(use_layer_wise=True)
+prepared_model = prepare(float_model, quant_config)
+quantized_model = convert(prepared_model)
+```
+
+## Efficient Usage on Client-Side
+
+For client machines with limited RAM and cores, we offer optimizations to reduce computational overhead and minimize memory usage. For detailed information, please refer to [Quantization on Client](https://github.com/intel/neural-compressor/blob/master/docs/source/3x/client_quant.md).
+
## Examples
-Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm) on how to quantize a model with WeightOnlyQuant.
+Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only) on how to quantize a model with WeightOnlyQuant.
## Reference
@@ -272,6 +320,6 @@ Users can also refer to [examples](https://github.com/intel/neural-compressor/bl
[5]. Cheng, Wenhua, et al. "Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs" arXiv preprint arXiv:2309.05516 (2023).
-[6]. Badri, Hicham and Shaji, Appu. "Half-Quadratic Quantization of Large Machine Learning Models." [Online] Available: https://mobiusml.github.io/hqq_blog/ (2023).
+[6]. Badri, Hicham and Shaji, Appu. "Half-Quadratic Quantization of Large Machine Learning Models." [Online] Available: (2023).
[7]. Dettmers, Tim, et al. "Qlora: Efficient finetuning of quantized llms." arXiv preprint arXiv:2305.14314 (2023).
diff --git a/docs/3x/PyTorch.md b/docs/source/3x/PyTorch.md
similarity index 85%
rename from docs/3x/PyTorch.md
rename to docs/source/3x/PyTorch.md
index b8c4ea2c7c5..a3004f6bcfb 100644
--- a/docs/3x/PyTorch.md
+++ b/docs/source/3x/PyTorch.md
@@ -194,6 +194,21 @@ def load(output_dir="./saved_results", model=None):
✔
link
+
+ MX Quantization
+ Microscaling Data Formats for
+Deep Learning
+ PyTorch eager mode
+ ✔
+ link
+
+
+ Mixed Precision
+ Mixed precision
+ PyTorch eager mode
+ ✔
+ link
+
Quantization Aware Training
Quantization Aware Training
@@ -223,3 +238,24 @@ def load(output_dir="./saved_results", model=None):
+
+2. How to set different configuration for specific op_name or op_type?
+ > INC extends a `set_local` method based on the global configuration object to set custom configuration.
+
+ ```python
+ def set_local(self, operator_name_or_list: Union[List, str, Callable], config: BaseConfig) -> BaseConfig:
+ """Set custom configuration based on the global configuration object.
+
+ Args:
+ operator_name_or_list (Union[List, str, Callable]): specific operator
+ config (BaseConfig): specific configuration
+ """
+ ```
+
+ > Demo:
+
+ ```python
+ quant_config = RTNConfig() # Initialize global configuration with default bits=4
+ quant_config.set_local(".*mlp.*", RTNConfig(bits=8)) # For layers with "mlp" in their names, set bits=8
+ quant_config.set_local("Conv1d", RTNConfig(dtype="fp32")) # For Conv1d layers, do not quantize them.
+ ```
diff --git a/docs/3x/TF_Quant.md b/docs/source/3x/TF_Quant.md
similarity index 98%
rename from docs/3x/TF_Quant.md
rename to docs/source/3x/TF_Quant.md
index d80c25ecada..9314a3c8200 100644
--- a/docs/3x/TF_Quant.md
+++ b/docs/source/3x/TF_Quant.md
@@ -13,7 +13,7 @@ TensorFlow Quantization
`neural_compressor.tensorflow` supports quantizing both TensorFlow and Keras model with or without accuracy aware tuning.
-For the detailed quantization fundamentals, please refer to the document for [Quantization](../quantization.md).
+For the detailed quantization fundamentals, please refer to the document for [Quantization](quantization.md).
## Get Started
diff --git a/docs/3x/TF_SQ.md b/docs/source/3x/TF_SQ.md
similarity index 97%
rename from docs/3x/TF_SQ.md
rename to docs/source/3x/TF_SQ.md
index 5225138e502..1d3a08836b5 100644
--- a/docs/3x/TF_SQ.md
+++ b/docs/source/3x/TF_SQ.md
@@ -50,4 +50,4 @@ best_model = autotune(
## Examples
-Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/tensorflow/nlp/large_language_models\quantization\ptq\smoothquant) on how to apply smooth quant to a TensorFlow model with `neural_compressor.tensorflow`.
+Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant) on how to apply smooth quant to a TensorFlow model with `neural_compressor.tensorflow`.
diff --git a/docs/3x/TensorFlow.md b/docs/source/3x/TensorFlow.md
similarity index 82%
rename from docs/3x/TensorFlow.md
rename to docs/source/3x/TensorFlow.md
index 5634a524f14..6e4936f2c63 100644
--- a/docs/3x/TensorFlow.md
+++ b/docs/source/3x/TensorFlow.md
@@ -2,12 +2,16 @@ TensorFlow
===============
-1. [Introduction](#introduction)
-2. [API for TensorFlow](#api-for-tensorflow)
-3. [Support Matrix](#support-matrix)
- 3.1 [Quantization Scheme](#quantization-scheme)
- 3.2 [Quantization Approaches](#quantization-approaches)
- 3.3 [Backend and Device](#backend-and-device)
+- [TensorFlow](#tensorflow)
+ - [Introduction](#introduction)
+ - [API for TensorFlow](#api-for-tensorflow)
+ - [Support Matrix](#support-matrix)
+ - [Quantization Scheme](#quantization-scheme)
+ - [Quantization Approaches](#quantization-approaches)
+ - [Post Training Static Quantization](#post-training-static-quantization)
+ - [Smooth Quantization](#smooth-quantization)
+ - [Mixed Precision](#mixed-precison)
+ - [Backend and Device](#backend-and-device)
## Introduction
@@ -23,7 +27,7 @@ Intel(R) Neural Compressor provides `quantize_model` and `autotune` as main inte
**quantize_model**
-The design philosophy of the `quantize_model` interface is easy-of-use. With minimal parameters requirement, including `model`, `quant_config`, `calib_dataloader` and `calib_iteration`, it offers a straightforward choice of quantizing TF model in one-shot.
+The design philosophy of the `quantize_model` interface is easy-of-use. With minimal parameters requirement, including `model`, `quant_config`, `calib_dataloader`, `calib_iteration`, it offers a straightforward choice of quantizing TF model in one-shot.
```python
def quantize_model(
@@ -31,6 +35,7 @@ def quantize_model(
quant_config: Union[BaseConfig, list],
calib_dataloader: Callable = None,
calib_iteration: int = 100,
+ calib_func: Callable = None,
):
```
`model` should be a string of the model's location, the object of Keras model or INC TF model wrapper class.
@@ -41,6 +46,9 @@ def quantize_model(
`calib_iteration` is used to decide how many iterations the calibration process will be run.
+`calib_func` is a substitution for `calib_dataloader` when the built-in calibration function of INC does not work for model inference.
+
+
Here is a simple example of using `quantize_model` interface with a dummy calibration dataloader and the default `StaticQuantConfig`:
```python
from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
@@ -68,6 +76,7 @@ def autotune(
eval_args: Optional[Tuple[Any]] = None,
calib_dataloader: Callable = None,
calib_iteration: int = 100,
+ calib_func: Callable = None,
) -> Optional[BaseModel]:
```
`model` should be a string of the model's location, the object of Keras model or INC TF model wrapper class.
@@ -82,6 +91,8 @@ def autotune(
`calib_iteration` is used to decide how many iterations the calibration process will be run.
+`calib_func` is a substitution for `calib_dataloader` when the built-in calibration function of INC does not work for model inference.
+
Here is a simple example of using `autotune` interface with different quantization rules defined by a list of `StaticQuantConfig`:
```python
from neural_compressor.common.base_tuning import TuningConfig
@@ -145,9 +156,16 @@ The supported Quantization methods for TensorFlow and Keras are listed below:
TensorFlow /Intel TensorFlow
- Smooth Quantization(SQ)
- weights
- calibration
+ Smooth Quantization(SQ)
+ weights
+ calibration
+ Tensorflow
+ TensorFlow /Intel TensorFlow
+
+
+ Mixed Precision(MP)
+ weights and activations
+ NA
Tensorflow
TensorFlow /Intel TensorFlow
@@ -168,6 +186,10 @@ Smooth Quantization (SQ) is an advanced quantization technique designed to optim
Refer to the [SQ Guide](./TF_SQ.md) for detailed information.
+##### Mixed Precision
+The Mixed Precision (MP) is enabled with Post Training Static Quantization. Once `BF16` is supported on machine, the matched operators will be automatically converted.
+
+
#### Backend and Device
Intel(R) Neural Compressor supports TF GPU with [ITEX-XPU](https://github.com/intel/intel-extension-for-tensorflow). We will automatically run model on GPU by checking if it has been installed.
diff --git a/docs/3x/autotune.md b/docs/source/3x/autotune.md
similarity index 100%
rename from docs/3x/autotune.md
rename to docs/source/3x/autotune.md
diff --git a/docs/source/3x/benchmark.md b/docs/source/3x/benchmark.md
new file mode 100644
index 00000000000..571e0f83f80
--- /dev/null
+++ b/docs/source/3x/benchmark.md
@@ -0,0 +1,61 @@
+Benchmark
+---
+
+1. [Introduction](#introduction)
+
+2. [Supported Matrix](#supported-matrix)
+
+3. [Usage](#usage)
+
+## Introduction
+
+Intel Neural Compressor provides a command `incbench` to launch the Intel CPU performance benchmark.
+
+To get the peak performance on Intel Xeon CPU, we should avoid crossing NUMA node in one instance.
+Therefore, by default, `incbench` will trigger 1 instance on the first NUMA node.
+
+## Supported Matrix
+
+| Platform | Status |
+|:---:|:---:|
+| Linux | ✔ |
+| Windows | ✔ |
+
+## Usage
+
+| Parameters | Default | comments |
+|:----------------------:|:------------------------:|:-------------------------------------:|
+| num_instances | 1 | Number of instances |
+| num_cores_per_instance | None | Number of cores in each instance |
+| C, cores | 0-${num_cores_on_NUMA-1} | decides the visible core range |
+| cross_memory | False | whether to allocate memory cross NUMA |
+
+> Note: cross_memory is set to True only when memory is insufficient.
+
+### General Use Cases
+
+1. `incbench main.py`: run 1 instance on NUMA:0.
+2. `incbench --num_i 2 main.py`: run 2 instances on NUMA:0.
+3. `incbench --num_c 2 main.py`: run multi-instances with 2 cores per instance on NUMA:0.
+4. `incbench -C 24-47 main.py`: run 1 instance on COREs:24-47.
+5. `incbench -C 24-47 --num_c 4 main.py`: run multi-instances with 4 COREs per instance on COREs:24-47.
+
+> Note:
+ > - `num_i` works the same as `num_instances`
+ > - `num_c` works the same as `num_cores_per_instance`
+
+### Dump Throughput and Latency Summary
+
+To merge benchmark results from multi-instances, "incbench" automatically checks log file messages for "throughput" and "latency" information matching the following patterns.
+
+```python
+throughput_pattern = r"[T,t]hroughput:\s*([0-9]*\.?[0-9]+)\s*([a-zA-Z/]*)"
+latency_pattern = r"[L,l]atency:\s*([0-9]*\.?[0-9]+)\s*([a-zA-Z/]*)"
+```
+
+#### Demo usage
+
+```python
+print("Throughput: {:.3f} samples/sec".format(throughput))
+print("Latency: {:.3f} ms".format(latency * 10**3))
+```
diff --git a/docs/source/3x/client_quant.md b/docs/source/3x/client_quant.md
new file mode 100644
index 00000000000..9921560c798
--- /dev/null
+++ b/docs/source/3x/client_quant.md
@@ -0,0 +1,40 @@
+Quantization on Client
+==========================================
+
+1. [Introduction](#introduction)
+2. [Get Started](#get-started)
+
+## Introduction
+
+For `RTN`, and `GPTQ` algorithms, we provide default algorithm configurations for different processor types (`client` and `sever`). Generally, lightweight configurations are tailored specifically for client devices to enhance performance and efficiency.
+
+
+## Get Started
+
+Here, we take the `RTN` algorithm as example to demonstrate the usage on a client machine.
+
+```python
+from neural_compressor.torch.quantization import get_default_rtn_config, convert, prepare
+from neural_compressor.torch import load_empty_model
+
+model_state_dict_path = "/path/to/model/state/dict"
+float_model = load_empty_model(model_state_dict_path)
+quant_config = get_default_rtn_config()
+prepared_model = prepare(float_model, quant_config)
+quantized_model = convert(prepared_model)
+```
+
+> [!TIP]
+> By default, the appropriate configuration is determined based on hardware information, but users can explicitly specify `processor_type` as either `client` or `server` when calling `get_default_rtn_config`.
+
+
+For Windows machines, run the following command to utilize all available cores automatically:
+
+```bash
+python main.py
+```
+
+> [!TIP]
+> For Linux systems, users need to configure the environment variables appropriately to achieve optimal performance. For example, set the `OMP_NUM_THREADS` explicitly. For processors with hybrid architecture (including both P-cores and E-cores), it is recommended to bind tasks to all P-cores using `taskset`.
+
+RTN quantization is a quick process, finishing in tens of seconds and using several GB of RAM when working with 7B models, e.g.,[meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). However, for the higher accuracy, GPTQ algorithm is recommended, but be prepared for a longer quantization time.
diff --git a/docs/source/3x/design.md b/docs/source/3x/design.md
new file mode 100644
index 00000000000..73d4d2e5568
--- /dev/null
+++ b/docs/source/3x/design.md
@@ -0,0 +1,16 @@
+Design
+=====
+
+## Architecture
+
+
+
+
+
+## Workflows
+
+Intel® Neural Compressor provides two workflows: Quantization and Auto-tune.
+
+
+
+
diff --git a/docs/source/3x/gaudi_version_map.md b/docs/source/3x/gaudi_version_map.md
new file mode 100644
index 00000000000..65695479acb
--- /dev/null
+++ b/docs/source/3x/gaudi_version_map.md
@@ -0,0 +1,16 @@
+
+### Version mapping between Intel Neural Compressor to Gaudi Software Stack ###
+
+
+
+ Intel Neural Compressor
+ Gaudi Software Stack
+
+
+
+
+ v3.0
+ v1.17
+
+
+
diff --git a/docs/source/3x/imgs/architecture.png b/docs/source/3x/imgs/architecture.png
new file mode 100644
index 00000000000..8d99b780422
Binary files /dev/null and b/docs/source/3x/imgs/architecture.png differ
diff --git a/docs/3x/imgs/data_format.png b/docs/source/3x/imgs/data_format.png
similarity index 100%
rename from docs/3x/imgs/data_format.png
rename to docs/source/3x/imgs/data_format.png
diff --git a/docs/source/3x/imgs/lwq.png b/docs/source/3x/imgs/lwq.png
new file mode 100644
index 00000000000..b2e75bc5d8e
Binary files /dev/null and b/docs/source/3x/imgs/lwq.png differ
diff --git a/docs/3x/imgs/mx_workflow.png b/docs/source/3x/imgs/mx_workflow.png
similarity index 100%
rename from docs/3x/imgs/mx_workflow.png
rename to docs/source/3x/imgs/mx_workflow.png
diff --git a/docs/3x/imgs/smoothquant.png b/docs/source/3x/imgs/smoothquant.png
similarity index 100%
rename from docs/3x/imgs/smoothquant.png
rename to docs/source/3x/imgs/smoothquant.png
diff --git a/docs/3x/imgs/sq_convert.png b/docs/source/3x/imgs/sq_convert.png
similarity index 100%
rename from docs/3x/imgs/sq_convert.png
rename to docs/source/3x/imgs/sq_convert.png
diff --git a/docs/3x/imgs/sq_pc.png b/docs/source/3x/imgs/sq_pc.png
similarity index 100%
rename from docs/3x/imgs/sq_pc.png
rename to docs/source/3x/imgs/sq_pc.png
diff --git a/docs/3x/imgs/INC3_WORKFLOW.png b/docs/source/3x/imgs/workflow.png
similarity index 100%
rename from docs/3x/imgs/INC3_WORKFLOW.png
rename to docs/source/3x/imgs/workflow.png
diff --git a/neural_solution/examples/custom_models_optimized/tf_example1/dataset/.gitkeep b/docs/source/3x/llm_recipes.md
similarity index 100%
rename from neural_solution/examples/custom_models_optimized/tf_example1/dataset/.gitkeep
rename to docs/source/3x/llm_recipes.md
diff --git a/docs/3x/quantization.md b/docs/source/3x/quantization.md
similarity index 99%
rename from docs/3x/quantization.md
rename to docs/source/3x/quantization.md
index b26c49470a9..26ba158d54f 100644
--- a/docs/3x/quantization.md
+++ b/docs/source/3x/quantization.md
@@ -396,7 +396,7 @@ For supported quantization methods for `accuracy aware tuning` and the detailed
User could refer to below chart to understand the whole tuning flow.
-
+
diff --git a/docs/source/CONTRIBUTING.md b/docs/source/CONTRIBUTING.md
index 58c703a20cd..4decbfb6d0d 100644
--- a/docs/source/CONTRIBUTING.md
+++ b/docs/source/CONTRIBUTING.md
@@ -48,10 +48,10 @@ Intel® Neural Compressor use [Azure DevOps](https://learn.microsoft.com/en-us/a
And generally use [Azure Cloud Instance](https://azure.microsoft.com/en-us/pricing/purchase-options/pay-as-you-go) to deploy pipelines, e.g. Standard E16s v5.
| Test Name | Test Scope | Test Pass Criteria |
|-------------------------------|-----------------------------------------------|---------------------------|
-| Code Scan | Pylint/Bandit/CopyRight/DocStyle/SpellCheck | PASS |
+| Code Scan | Bandit/CopyRight/DocStyle/SpellCheck | PASS |
| [DCO](https://github.com/apps/dco/) | Use `git commit -s` to sign off | PASS |
| Unit Test | Pytest scripts under [test](/test) | PASS (No failure, No core dump, No segmentation fault, No coverage drop) |
-| Model Test | Pytorch + TensorFlow + ONNX Runtime + MXNet | PASS (Functionality pass, FP32/INT8 No performance regression) |
+| Model Test | Pytorch + TensorFlow + ONNX Runtime | PASS (Functionality pass, FP32/INT8 No performance regression) |
## Support
diff --git a/docs/source/NAS.md b/docs/source/NAS.md
deleted file mode 100644
index e922b971c97..00000000000
--- a/docs/source/NAS.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# Neural Architecture Search
-
-1. [Introduction](#introduction)
-
- 1.1. [Basic NAS](#basic-nas)
-
- 1.2. [Dynamic NAS](#dynamic-nas)
-
-2. [NAS Support Matrix](#nas-support-matrix)
-3. [Get Started with NAS API](#get-started-with-nas-api)
-
- 3.1. [Basic Usage](#basic-usage)
-
- 3.2. [Advanced Usage (Custom NAS)](#advanced-usage-custom-nas)
-
-4. [Examples](#examples)
-
-## Introduction
-Neural Architecture Search (NAS) is the process of automating the design of artificial neural networks (ANN) architecture. NAS has been used to design networks that are on par with or outperform hand-designed architectures. Intel® Neural Compressor has supported two different NAS methods: Basic NAS and Dynamic NAS.
-
-### Basic NAS
-Our Basic NAS method leverages a specific search algorithm from built-in search algorithms (grid search, random search, and Bayesian optimization are supported in Intel® Neural Compressor now) or user-defined search algorithms to propose the model architecture based on the given search space, then performs the train evaluation process to evaluate the potential of the proposed model architecture, after several iterations of such procedure, best-performing model architectures which lie in Pareto front will be returned.
-
-### Dynamic NAS
-Dynamic Neural Architecture Search (DyNAS) is a super-network-based NAS approach that uses the metric predictors for predicting the metrics of the model architecture, it is >4x more sample efficient than typical one-shot predictor-based NAS approaches.
-
-The flow of the DyNAS approach is shown in the following figure. In the first phase of the search, a small population of sub-networks is randomly sampled from the super-network and evaluated (validation measurement) to provide the initial training set for the inner predictor loop. After the predictors are trained, a multi-objective evolutionary search is performed in the predictor objective space. After this extensive search is performed, the best-performing sub-network configurations are selected to be the next iteration's validation population. The cycle continues until the search concludes when the user-defined evaluation count is met.
-
-![DyNAS Workflow](./imgs/dynas.png)
-
-## NAS Support Matrix
-
-|NAS Algorithm |PyTorch |TensorFlow |
-|------------------|:--------:|:---------:|
-|Basic NAS |✔ |Not supported yet|
-|Dynamic NAS |✔ |Not supported yet|
-
-## Get Started with NAS API
-
-### Basic Usage
-
-#### 1. Python code + YAML
-
-Simplest launcher code if NAS configuration is defined in user-defined yaml.
-
-```python
-from neural_compressor.experimental import NAS
-
-agent = NAS("/path/to/user/yaml")
-results = agent.search()
-```
-
-#### 2. Python code only
-
-NAS class also support `NASConfig` class as it's argument.
-
-```python
-from neural_compressor.conf.config import NASConfig
-from neural_compressor.experimental import NAS
-
-config = NASConfig(approach="dynas", search_algorithm="nsga2")
-config.dynas.supernet = "ofa_mbv3_d234_e346_k357_w1.2"
-config.dynas.metrics = ["acc", "macs"]
-config.dynas.population = 50
-config.dynas.num_evals = 250
-config.dynas.results_csv_path = "search_results.csv"
-config.dynas.batch_size = 64
-config.dynas.dataset_path = "/datasets/imagenet-ilsvrc2012" # example
-agent = NAS(config)
-results = agent.search()
-```
-
-### Advanced Usage (Custom NAS)
-
-Intel® Neural Compressor NAS API is defined under `neural_compressor.experimental.nas`, which takes a user defined yaml file or a [NASConfig](../../neural_compressor/conf/config.py#NASConfig) object as input. The user defined yaml or the [NASConfig](../../neural_compressor/conf/config.py#NASConfig) object defines necessary configuration of the NAS process. The [NAS](../../neural_compressor/experimental/nas/nas.py#NAS) class aims to create an object according to the defined NAS approach in the configuration, please note this NAS approach should be registered in the Intel® Neural Compressor.
-
-Currently, Intel® Neural Compressor supported two built-in NAS methods: [Basic NAS](../../neural_compressor/experimental/nas/basic_nas.py#BasicNAS) and [Dynamic NAS](../../neural_compressor/experimental/nas/dynas.py#DyNAS). Both methods are inherited from a base class called [NASBase](../../neural_compressor/experimental/nas/nas.py#NASBase). User can also customize their own NAS approach in Intel® Neural Compressor just by decorating their NAS approach class with function [nas_registry](../../neural_compressor/experimental/nas/nas_utils.py#nas_registry) as well as following the API in [NASBase](../../neural_compressor/experimental/nas/nas.py#NASBase), like the way used in the two built-in NAS methods.
-
-## Examples
-
-Following examples are supported in Intel® Neural Compressor:
-
-- DyNAS MobileNetV3 supernet Example:
- - [DyNAS MobileNetV3 supernet Example](../../examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb): DyNAS with MobileNetV3 supernet on ImageNet dataset.
-- DyNAS Transformer LT supernet Example:
- - [DyNAS Transformer LT supernet Example](../../examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb): DyNAS with Transformer LT supernet on WMT En-De dataset.
diff --git a/docs/source/adaptor.md b/docs/source/adaptor.md
index 6765c5405de..b8af7a934fb 100644
--- a/docs/source/adaptor.md
+++ b/docs/source/adaptor.md
@@ -144,9 +144,6 @@ Onnxruntime already has [quantization tools](https://github.com/microsoft/onnxru
tensorboard=False, fp32_baseline=False):
......
- def diagnosis_helper(self, fp32_model, int8_model, tune_cfg=None, save_path=None):
- ......
-
def save(self, model, path):
......
```
diff --git a/docs/source/api-doc/api_2.rst b/docs/source/api-doc/api_2.rst
new file mode 100644
index 00000000000..b5528a0426a
--- /dev/null
+++ b/docs/source/api-doc/api_2.rst
@@ -0,0 +1,29 @@
+2.0 API
+####
+
+**User facing APIs:**
+
+.. toctree::
+ :maxdepth: 1
+
+ quantization.rst
+ mix_precision.rst
+ training.rst
+ benchmark.rst
+ config.rst
+ objective.rst
+
+
+**Advanced APIs:**
+
+.. toctree::
+ :maxdepth: 1
+
+ compression.rst
+ strategy.rst
+ model.rst
+
+**API document example:**
+
+.. toctree::
+ api_doc_example.rst
diff --git a/docs/source/api-doc/api_3.rst b/docs/source/api-doc/api_3.rst
new file mode 100644
index 00000000000..7c01e073f0b
--- /dev/null
+++ b/docs/source/api-doc/api_3.rst
@@ -0,0 +1,27 @@
+3.0 API
+####
+
+**PyTorch Extension API:**
+
+.. toctree::
+ :maxdepth: 1
+
+ torch_quantization_common.rst
+ torch_quantization_config.rst
+ torch_quantization_autotune.rst
+
+**Tensorflow Extension API:**
+
+.. toctree::
+ :maxdepth: 1
+
+ tf_quantization_common.rst
+ tf_quantization_config.rst
+ tf_quantization_autotune.rst
+
+**Other Modules:**
+
+.. toctree::
+ :maxdepth: 1
+
+ benchmark.rst
diff --git a/docs/source/api-doc/apis.rst b/docs/source/api-doc/apis.rst
index 63d8f2f5ca8..15f92f83501 100644
--- a/docs/source/api-doc/apis.rst
+++ b/docs/source/api-doc/apis.rst
@@ -1,29 +1,12 @@
APIs
####
-**User facing APIs:**
-
.. toctree::
:maxdepth: 1
- quantization.rst
- mix_precision.rst
- training.rst
- benchmark.rst
- config.rst
- objective.rst
-
-
-**Advanced APIs:**
+ api_3.rst
.. toctree::
:maxdepth: 1
- compression.rst
- strategy.rst
- model.rst
-
-**API document example:**
-
-.. toctree::
- api_doc_example.rst
+ api_2.rst
diff --git a/docs/source/api-doc/tf_quantization_autotune.rst b/docs/source/api-doc/tf_quantization_autotune.rst
new file mode 100644
index 00000000000..241b7e42c77
--- /dev/null
+++ b/docs/source/api-doc/tf_quantization_autotune.rst
@@ -0,0 +1,6 @@
+Tensorflow Quantization AutoTune
+============
+
+.. autoapisummary::
+
+ neural_compressor.tensorflow.quantization.autotune
diff --git a/docs/source/api-doc/tf_quantization_common.rst b/docs/source/api-doc/tf_quantization_common.rst
new file mode 100644
index 00000000000..3b39d2c79cb
--- /dev/null
+++ b/docs/source/api-doc/tf_quantization_common.rst
@@ -0,0 +1,6 @@
+Tensorflow Quantization Base API
+#################################
+
+.. autoapisummary::
+
+ neural_compressor.tensorflow.quantization.quantize
diff --git a/docs/source/api-doc/tf_quantization_config.rst b/docs/source/api-doc/tf_quantization_config.rst
new file mode 100644
index 00000000000..4f5c757c31c
--- /dev/null
+++ b/docs/source/api-doc/tf_quantization_config.rst
@@ -0,0 +1,6 @@
+Tensorflow Quantization Config
+============
+
+.. autoapisummary::
+
+ neural_compressor.tensorflow.quantization.config
diff --git a/docs/source/api-doc/torch_quantization_autotune.rst b/docs/source/api-doc/torch_quantization_autotune.rst
new file mode 100644
index 00000000000..3466ead4a09
--- /dev/null
+++ b/docs/source/api-doc/torch_quantization_autotune.rst
@@ -0,0 +1,6 @@
+Pytorch Quantization AutoTune
+============
+
+.. autoapisummary::
+
+ neural_compressor.torch.quantization.autotune
diff --git a/docs/source/api-doc/torch_quantization_common.rst b/docs/source/api-doc/torch_quantization_common.rst
new file mode 100644
index 00000000000..d2ad03b933d
--- /dev/null
+++ b/docs/source/api-doc/torch_quantization_common.rst
@@ -0,0 +1,6 @@
+Pytorch Quantization Base API
+#################################
+
+.. autoapisummary::
+
+ neural_compressor.torch.quantization.quantize
diff --git a/docs/source/api-doc/torch_quantization_config.rst b/docs/source/api-doc/torch_quantization_config.rst
new file mode 100644
index 00000000000..cc60be355d6
--- /dev/null
+++ b/docs/source/api-doc/torch_quantization_config.rst
@@ -0,0 +1,6 @@
+Pytorch Quantization Config
+============
+
+.. autoapisummary::
+
+ neural_compressor.torch.quantization.config
diff --git a/docs/source/dataset.md b/docs/source/dataset.md
deleted file mode 100644
index 0695d78a3ac..00000000000
--- a/docs/source/dataset.md
+++ /dev/null
@@ -1,165 +0,0 @@
-Dataset
-=======
-
-1. [Introduction](#introduction)
-
-2. [Supported Framework Dataset Matrix](#supported-framework-dataset-matrix)
-
-3. [Get start with Dataset API](#get-start-with-dataset-api)
-
-4. [Examples](#examples)
-
-## Introduction
-
-To adapt to its internal dataloader API, Intel® Neural Compressor implements some built-in datasets.
-
-A dataset is a container which holds all data that can be used by the dataloader, and have the ability to be fetched by index or created as an iterator. One can implement a specific dataset by inheriting from the Dataset class by implementing `__iter__` method or `__getitem__` method, while implementing `__getitem__` method, `__len__` method is recommended.
-
-Users can use Neural Compressor built-in dataset objects as well as register their own datasets.
-
-## Supported Framework Dataset Matrix
-
-#### TensorFlow
-
-| Dataset | Parameters | Comments | Usage |
-| :------ | :------ | :------ | :------ |
-| MNIST(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/MNIST/, otherwise user should put mnist.npz under root/MNIST/ manually. | **In yaml file:** dataset: MNIST: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['MNIST'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| FashionMNIST(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train**(bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/FashionMNIST/, otherwise user should put train-labels-idx1-ubyte.gz, train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz and t10k-images-idx3-ubyte.gz under root/FashionMNIST/ manually.| **In yaml file:** dataset: FashionMNIST: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['FashionMNIST'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| CIFAR10(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/ and extract it automatically, otherwise user can download file from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz manually to root/ and extract it. | **In yaml file:** dataset: CIFAR10: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['CIFAR10'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| CIFAR100(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/ and extract it automatically, otherwise user can download file from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz manually to root/ and extract it. | **In yaml file:** dataset: CIFAR100: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['CIFAR100'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| ImageRecord(root, transform, filter) | **root** (str): Root directory of dataset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: root/validation-000-of-100 root/validation-001-of-100 ... root/validation-099-of-100 The file name needs to follow this pattern: '* - * -of- *' | **In yaml file:** dataset: ImageRecord: root: /path/to/root **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['ImageRecord'] (root=root, transform=transform, filter=None) |
-| ImageFolder(root, transform, filter) | **root** (str): Root directory of dataset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: root/class_1/xxx.png root/class_1/xxy.png root/class_1/xxz.png ... root/class_n/123.png root/class_n/nsdf3.png root/class_n/asd932_.png Please put images of different categories into different folders. | **In yaml file:** dataset: ImageFolder: root: /path/to/root **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['ImageFolder'] (root=root,transform=transform, filter=None) |
-| ImagenetRaw(data_path, image_list, transform, filter) | **data_path** (str): Root directory of dataset **image_list** (str): data file, record image_names and their labels **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: data_path/img1.jpg data_path/img2.jpg ... data_path/imgx.jpg dataset will read name and label of each image from image_list file, if user set image_list to None, it will read from data_path/val_map.txt automatically. | **In yaml file:** dataset: ImagenetRaw: data_path: /path/to/image image_list: /path/to/label **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['ImagenetRaw'] (data_path, image_list, transform=transform, filter=None) |
-| COCORecord(root, num_cores, transform, filter) | **root** (str): Root directory of dataset **num_cores** (int, default=28):The number of input Datasets to interleave from in parallel **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Root is a full path to tfrecord file, which contains the file name. **Please use Resize transform when batch_size > 1** | **In yaml file:** dataset: COCORecord: root: /path/to/tfrecord num_cores: 28 **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['COCORecord'] (root, num_cores=28, transform=transform, filter=None) |
-| COCORaw(root, img_dir, anno_dir, transform, filter) | **root** (str): Root directory of dataset **img_dir** (str, default='val2017'): image file directory **anno_dir** (str, default='annotations/instances_val2017.json'): annotation file directory **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: /root/img_dir/1.jpg /root/img_dir/2.jpg ... /root/img_dir/n.jpg /root/anno_dir **Please use Resize transform when batch_size > 1** | **In yaml file:** dataset: COCORaw: root: /path/to/root img_dir: /path/to/image anno_dir: /path/to/annotation **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['COCORaw'] (root, img_dir, anno_dir, transform=transform, filter=None) If anno_dir is not set, the dataset will use default label map |
-| COCONpy(root, npy_dir, anno_dir) | **root** (str): Root directory of dataset **npy_dir** (str, default='val2017'): npy file directory **anno_dir** (str, default='annotations/instances_val2017.json'): annotation file directory | Please arrange data in this way: /root/npy_dir/1.jpg.npy /root/npy_dir/2.jpg.npy ... /root/npy_dir/n.jpg.npy /root/anno_dir **Please use Resize transform when batch_size > 1** | **In yaml file:** dataset: COCORaw: root: /path/to/root npy_dir: /path/to/npy anno_dir: /path/to/annotation **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['COCONpy'] (root, npy_dir, anno_dir) If anno_dir is not set, the dataset will use default label map |
-| dummy(shape, low, high, dtype, label, transform, filter) | **shape** (list or tuple):shape of total samples, the first dimension should be the sample count of the dataset. support create multi shape tensors, use list of tuples for each tuple in the list, will create a such size tensor. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **label** (bool, default=True):whether to return 0 as label **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: dummy: shape: [3, 224, 224, 3] low: 0.0 high: 127.0 dtype: float32 label: True **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['dummy'] (shape, low, high, dtype, label, transform=None, filter=None) |
-| dummy_v2(input_shape, label_shape, low, high, dtype, transform, filter) | **input_shape** (list or tuple):create single or multi input tensors list represent the sample shape of the dataset, eg and image size should be represented as (224, 224, 3), tuple contains multiple list and represent multi input tensors. **label_shape** (list or tuple):create single or multi label tensors list represent the sample shape of the label, eg and label size should be represented as (1,), tuple contains multiple list and represent multi label tensors. In yaml usage, it offers (1,) as the default value. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: dummy_v2: input_shape: [224, 224, 3] label_shape: [1] low: 0.0 high: 127.0 dtype: float32 **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['dummy_v2'] (input_shape, low, high, dtype, transform=None, filter=None) |
-| style_transfer(content_folder, style_folder, crop_ratio, resize_shape, image_format, transform, filter) | **content_folder** (str):Root directory of content images **style_folder** (str):Root directory of style images **crop_ratio** (float, default=0.1):cropped ratio to each side **resize_shape** (tuple, default=(256, 256)):target size of image **image_format** (str, default='jpg'): target image format **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Dataset used for style transfer task. This Dataset is to construct a dataset from two specific image holders representing content image folder and style image folder. | **In yaml file:** dataset: style_transfer: content_folder: /path/to/content_folder style_folder: /path/to/style_folder crop_ratio: 0.1 resize_shape: [256, 256] image_format: 'jpg' **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['style_transfer'] (content_folder, style_folder, crop_ratio, resize_shape, image_format, transform=transform, filter=None) |
-| TFRecordDataset(root, transform, filter) | **root** (str): filename of dataset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions |Root is a full path to tfrecord file, which contains the file name. | **In yaml file:** dataset: TFRecordDataset: root: /path/to/tfrecord **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['TFRecordDataset'] (root, transform=transform) |
-| bert(root, label_file, task, transform, filter) | **root** (str): path of dataset **label_file** (str): path of label file **task** (str, default='squad'): task type of model **model_type** (str, default='bert'): model type, support 'bert'. **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset supports tfrecord data, please refer to [Guide](../examples/tensorflow/nlp/bert_large_squad/quantization/ptq/README.md) to create tfrecord file first. | **In yaml file:** dataset: bert: root: /path/to/root label_file: /path/to/label_file task: squad model_type: bert **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['bert'] (root, label_file, transform=transform) |
-| sparse_dummy_v2(dense_shape, label_shape, sparse_ratio, low, high, dtype, transform, filter) | **dense_shape** (list or tuple):create single or multi sparse tensors, tuple represent the sample shape of the dataset, eg and image size should be represented as (224, 224, 3), tuple contains multiple list and represent multi input tensors. **label_shape** (list or tuple):create single or multi label tensors list represent the sample shape of the label, eg and label size should be represented as (1,), tuple contains multiple list and represent multi label tensors. In yaml usage, it offers (1,) as the default value. **sparse_ratio** (float, default=0.5): the ratio of sparsity, support [0, 1]. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: sparse_dummy_v2: dense_shape: [224, 224, 3] label_shape: [1] sparse_ratio: 0.5 low: 0.0 high: 127.0 dtype: float32 **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['sparse_dummy_v2'] (dense_shape, label_shape, sparse_ratio, low, high, dtype, transform=None, filter=None) |
-
-#### PyTorch
-
-| Dataset | Parameters | Comments | Usage |
-| :------ | :------ | :------ | :------ |
-| MNIST(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/MNIST/, otherwise user should put mnist.npz under root/MNIST/ manually. | **In yaml file:** dataset: MNIST: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['MNIST'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| FashionMNIST(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train**(bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/FashionMNIST/, otherwise user should put train-labels-idx1-ubyte.gz, train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz and t10k-images-idx3-ubyte.gz under root/FashionMNIST/ manually.| **In yaml file:** dataset: FashionMNIST: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['FashionMNIST'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| CIFAR10(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/ and extract it automatically, otherwise user can download file from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz manually to root/ and extract it. | **In yaml file:** dataset: CIFAR10: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['CIFAR10'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| CIFAR100(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/ and extract it automatically, otherwise user can download file from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz manually to root/ and extract it. | **In yaml file:** dataset: CIFAR100: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['CIFAR100'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| ImageFolder(root, transform, filter) | **root** (str): Root directory of dataset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: root/class_1/xxx.png root/class_1/xxy.png root/class_1/xxz.png ... root/class_n/123.png root/class_n/nsdf3.png root/class_n/asd932_.png Please put images of different categories into different folders. | **In yaml file:** dataset: ImageFolder: root: /path/to/root **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['ImageFolder'] (root=root,transform=transform, filter=None) |
-| ImagenetRaw(data_path, image_list, transform, filter) | **data_path** (str): Root directory of dataset **image_list** (str): data file, record image_names and their labels **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: data_path/img1.jpg data_path/img2.jpg ... data_path/imgx.jpg dataset will read name and label of each image from image_list file, if user set image_list to None, it will read from data_path/val_map.txt automatically. | **In yaml file:** dataset: ImagenetRaw: data_path: /path/to/image image_list: /path/to/label **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['ImagenetRaw'] (data_path, image_list, transform=transform, filter=None) |
-| COCORaw(root, img_dir, anno_dir, transform, filter) | **root** (str): Root directory of dataset **img_dir** (str, default='val2017'): image file directory **anno_dir** (str, default='annotations/instances_val2017.json'): annotation file directory **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: /root/img_dir/1.jpg /root/img_dir/2.jpg ... /root/img_dir/n.jpg /root/anno_dir **Please use Resize transform when batch_size>1**| **In yaml file:** dataset: COCORaw: root: /path/to/root img_dir: /path/to/image anno_dir: /path/to/annotation **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['COCORaw'] (root, img_dir, anno_dir, transform=transform, filter=None) If anno_dir is not set, the dataset will use default label map |
-| dummy(shape, low, high, dtype, label, transform, filter) | **shape** (list or tuple):shape of total samples, the first dimension should be the sample count of the dataset. support create multi shape tensors, use list of tuples for each tuple in the list, will create a such size tensor. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **label** (bool, default=True):whether to return 0 as label **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: dummy: shape: [3, 224, 224, 3] low: 0.0 high: 127.0 dtype: float32 label: True **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['dummy'] (shape, low, high, dtype, label, transform=None, filter=None) |
-| dummy_v2(input_shape, label_shape, low, high, dtype, transform, filter) | **input_shape** (list or tuple):create single or multi input tensors list represent the sample shape of the dataset, eg and image size should be represented as (224, 224, 3), tuple contains multiple list and represent multi input tensors. **label_shape** (list or tuple):create single or multi label tensors list represent the sample shape of the label, eg and label size should be represented as (1,), tuple contains multiple list and represent multi label tensors. In yaml usage, it offers (1,) as the default value. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: dummy_v2: input_shape: [224, 224, 3] label_shape: [1] low: 0.0 high: 127.0 dtype: float32 **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['dummy_v2'] (input_shape, low, high, dtype, transform=None, filter=None) |
-| bert(dataset, task, model_type, transform, filter) | **dataset** (list): list of data **task** (str): the task of the model, support "classifier", "squad" **model_type** (str, default='bert'): model type, support 'distilbert', 'bert', 'xlnet', 'xlm' **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | This Dataset is to construct from the Bert TensorDataset and not a full implementation from yaml config. The original repo link is: https://github.com/huggingface/transformers. When you want use this Dataset, you should add it before you initialize your DataLoader. | **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['bert'] (dataset, task, model_type, transform=transform, filter=None) Now not support yaml implementation |
-| sparse_dummy_v2(dense_shape, label_shape, sparse_ratio, low, high, dtype, transform, filter) | **dense_shape** (list or tuple):create single or multi sparse tensors, tuple represent the sample shape of the dataset, eg and image size should be represented as (224, 224, 3), tuple contains multiple list and represent multi input tensors. **label_shape** (list or tuple):create single or multi label tensors list represent the sample shape of the label, eg and label size should be represented as (1,), tuple contains multiple list and represent multi label tensors. In yaml usage, it offers (1,) as the default value. **sparse_ratio** (float, default=0.5): the ratio of sparsity, support [0, 1]. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: sparse_dummy_v2: dense_shape: [224, 224, 3] label_shape: [1] sparse_ratio: 0.5 low: 0.0 high: 127.0 dtype: float32 **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['sparse_dummy_v2'] (dense_shape, label_shape, sparse_ratio, low, high, dtype, transform=None, filter=None) |
-
-#### MXNet
-
-| Dataset | Parameters | Comments | Usage |
-| :------ | :------ | :------ | :------ |
-| MNIST(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/MNIST/, otherwise user should put mnist.npz under root/MNIST/ manually. | **In yaml file:** dataset: MNIST: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['MNIST'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| FashionMNIST(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train**(bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/FashionMNIST/, otherwise user should put train-labels-idx1-ubyte.gz, train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz and t10k-images-idx3-ubyte.gz under root/FashionMNIST/ manually.| **In yaml file:** dataset: FashionMNIST: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['FashionMNIST'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| CIFAR10(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/ and extract it automatically, otherwise user can download file from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz manually to root/ and extract it. | **In yaml file:** dataset: CIFAR10: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['CIFAR10'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| CIFAR100(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/ and extract it automatically, otherwise user can download file from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz manually to root/ and extract it. | **In yaml file:** dataset: CIFAR100: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['CIFAR100'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| ImageFolder(root, transform, filter) | **root** (str): Root directory of dataset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: root/class_1/xxx.png root/class_1/xxy.png root/class_1/xxz.png ... root/class_n/123.png root/class_n/nsdf3.png root/class_n/asd932_.png Please put images of different categories into different folders. | **In yaml file:** dataset: ImageFolder: root: /path/to/root **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['ImageFolder'] (root=root,transform=transform, filter=None) |
-| ImagenetRaw(data_path, image_list, transform, filter) | **data_path** (str): Root directory of dataset **image_list** (str): data file, record image_names and their labels **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: data_path/img1.jpg data_path/img2.jpg ... data_path/imgx.jpg dataset will read name and label of each image from image_list file, if user set image_list to None, it will read from data_path/val_map.txt automatically. | **In yaml file:** dataset: ImagenetRaw: data_path: /path/to/image image_list: /path/to/label **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['ImagenetRaw'] (data_path, image_list, transform=transform, filter=None) |
-| COCORaw(root, img_dir, anno_dir, transform, filter) | **root** (str): Root directory of dataset **img_dir** (str, default='val2017'): image file directory **anno_dir** (str, default='annotations/instances_val2017.json'): annotation file directory **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: /root/img_dir/1.jpg /root/img_dir/2.jpg ... /root/img_dir/n.jpg /root/anno_dir **Please use Resize transform when batch_size > 1**| **In yaml file:** dataset: COCORaw: root: /path/to/root img_dir: /path/to/image anno_dir: /path/to/annotation **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['COCORaw'] (root, img_dir, anno_dir, transform=transform, filter=None) If anno_dir is not set, the dataset will use default label map |
-| dummy(shape, low, high, dtype, label, transform, filter) | **shape** (list or tuple):shape of total samples, the first dimension should be the sample count of the dataset. support create multi shape tensors, use list of tuples for each tuple in the list, will create a such size tensor. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **label** (bool, default=True):whether to return 0 as label **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: dummy: shape: [3, 224, 224, 3] low: 0.0 high: 127.0 dtype: float32 label: True **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['dummy'] (shape, low, high, dtype, label, transform=None, filter=None) |
-| dummy_v2(input_shape, label_shape, low, high, dtype, transform, filter) | **input_shape** (list or tuple):create single or multi input tensors list represent the sample shape of the dataset, eg and image size should be represented as (224, 224, 3), tuple contains multiple list and represent multi input tensors. **label_shape** (list or tuple):create single or multi label tensors list represent the sample shape of the label, eg and label size should be represented as (1,), tuple contains multiple list and represent multi label tensors. In yaml usage, it offers (1,) as the default value. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: dummy_v2: input_shape: [224, 224, 3] label_shape: [1] low: 0.0 high: 127.0 dtype: float32 **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['dummy_v2'] (input_shape, low, high, dtype, transform=None, filter=None) |
-| sparse_dummy_v2(dense_shape, label_shape, sparse_ratio, low, high, dtype, transform, filter) | **dense_shape** (list or tuple):create single or multi sparse tensors, tuple represent the sample shape of the dataset, eg and image size should be represented as (224, 224, 3), tuple contains multiple list and represent multi input tensors. **label_shape** (list or tuple):create single or multi label tensors list represent the sample shape of the label, eg and label size should be represented as (1,), tuple contains multiple list and represent multi label tensors. In yaml usage, it offers (1,) as the default value. **sparse_ratio** (float, default=0.5): the ratio of sparsity, support [0, 1]. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: sparse_dummy_v2: dense_shape: [224, 224, 3] label_shape: [1] sparse_ratio: 0.5 low: 0.0 high: 127.0 dtype: float32 **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['sparse_dummy_v2'] (dense_shape, label_shape, sparse_ratio, low, high, dtype, transform=None, filter=None) |
-
-#### ONNXRT
-
-| Dataset | Parameters | Comments | Usage |
-| :------ | :------ | :------ | :------ |
-| MNIST(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/MNIST/, otherwise user should put mnist.npz under root/MNIST/ manually. | **In yaml file:** dataset: MNIST: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['MNIST'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| FashionMNIST(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train**(bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/FashionMNIST/, otherwise user should put train-labels-idx1-ubyte.gz, train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz and t10k-images-idx3-ubyte.gz under root/FashionMNIST/ manually.| **In yaml file:** dataset: FashionMNIST: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['FashionMNIST'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| CIFAR10(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/ and extract it automatically, otherwise user can download file from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz manually to root/ and extract it. | **In yaml file:** dataset: CIFAR10: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['CIFAR10'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| CIFAR100(root, train, transform, filter, download) | **root** (str): Root directory of dataset **train** (bool, default=False): If True, creates dataset from train subset, otherwise from validation subset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions **download** (bool, default=True): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. | If download is True, it will download dataset to root/ and extract it automatically, otherwise user can download file from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz manually to root/ and extract it. | **In yaml file:** dataset: CIFAR100: root: /path/to/root train: False download: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['CIFAR100'] (root=root, train=False, transform=transform, filter=None, download=True) |
-| ImageFolder(root, transform, filter) | **root** (str): Root directory of dataset **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: root/class_1/xxx.png root/class_1/xxy.png root/class_1/xxz.png ... root/class_n/123.png root/class_n/nsdf3.png root/class_n/asd932_.png Please put images of different categories into different folders. | **In yaml file:** dataset: ImageFolder: root: /path/to/root **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['ImageFolder'] (root=root,transform=transform, filter=None) |
-| ImagenetRaw(data_path, image_list, transform, filter) | **data_path** (str): Root directory of dataset **image_list** (str): data file, record image_names and their labels **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: data_path/img1.jpg data_path/img2.jpg ... data_path/imgx.jpg dataset will read name and label of each image from image_list file, if user set image_list to None, it will read from data_path/val_map.txt automatically. | **In yaml file:** dataset: ImagenetRaw: data_path: /path/to/image image_list: /path/to/label **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['ImagenetRaw'] (data_path, image_list, transform=transform, filter=None) |
-| COCORaw(root, img_dir, anno_dir, transform, filter) | **root** (str): Root directory of dataset **img_dir** (str, default='val2017'): image file directory **anno_dir** (str, default='annotations/instances_val2017.json'): annotation file directory **transform** (transform object, default=None): transform to process input data **filter** (Filter objects, default=None): filter out examples according to specific conditions | Please arrange data in this way: /root/img_dir/1.jpg /root/img_dir/2.jpg ... /root/img_dir/n.jpg /root/anno_dir ***Please use Resize transform when batch_size > 1**| **In yaml file:** dataset: COCORaw: root: /path/to/root img_dir: /path/to/image anno_dir: /path/to/annotation **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['COCORaw'] (root, img_dir, anno_dir, transform=transform, filter=None) If anno_dir is not set, the dataset will use default label map |
-| dummy(shape, low, high, dtype, label, transform, filter) | **shape** (list or tuple):shape of total samples, the first dimension should be the sample count of the dataset. support create multi shape tensors, use list of tuples for each tuple in the list, will create a such size tensor. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **label** (bool, default=True):whether to return 0 as label **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: dummy: shape: [3, 224, 224, 3] low: 0.0 high: 127.0 dtype: float32 label: True **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['dummy'] (shape, low, high, dtype, label, transform=None, filter=None) |
-| dummy_v2(input_shape, label_shape, low, high, dtype, transform, filter) | **input_shape** (list or tuple):create single or multi input tensors list represent the sample shape of the dataset, eg and image size should be represented as (224, 224, 3), tuple contains multiple list and represent multi input tensors. **label_shape** (list or tuple):create single or multi label tensors list represent the sample shape of the label, eg and label size should be represented as (1,), tuple contains multiple list and represent multi label tensors. In yaml usage, it offers (1,) as the default value. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: dummy_v2: input_shape: [224, 224, 3] label_shape: [1] low: 0.0 high: 127.0 dtype: float32 **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['dummy_v2'] (input_shape, low, high, dtype, transform=None, filter=None) |
-| GLUE(data_dir, model_name_or_path, max_seq_length, do_lower_case, task, model_type, dynamic_length, evaluate, transform, filter) | **data_dir** (str): The input data dir **model_name_or_path** (str): Path to pre-trained student model or shortcut name, **max_seq_length** (int, default=128): The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded. **do_lower_case** (bool, default=True): Whether or not to lowercase the input. **task** (bool, default=True): The name of the task to fine-tune. Choices include mrpc, qqp, qnli, rte, sts-b, cola, mnli, wnli. **model_type** (str, default='bert'): model type, support 'distilbert', 'bert', 'mobilebert', 'roberta'. **dynamic_length** (bool, default=False): Whether to use fixed sequence length. **evaluate** (bool, default=True): Whether do evaluation or training. **transform** (bool, default=True): If true, **filter** (bool, default=True): If true, | Refer to [this example](/examples/onnxrt/language_translation/bert) on how to prepare dataset | **In yaml file:** dataset: bert: data_dir: False model_name_or_path: True (transform and filter are not set in the range of dataset) **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['bert'] (data_dir='/path/to/data/', model_name_or_path='bert-base-uncased', max_seq_length=128, task='mrpc', model_type='bert', dynamic_length=True, transform=None, filter=None) |
-| sparse_dummy_v2(dense_shape, label_shape, sparse_ratio, low, high, dtype, transform, filter) | **dense_shape** (list or tuple):create single or multi sparse tensors, tuple represent the sample shape of the dataset, eg and image size should be represented as (224, 224, 3), tuple contains multiple list and represent multi input tensors. **label_shape** (list or tuple):create single or multi label tensors list represent the sample shape of the label, eg and label size should be represented as (1,), tuple contains multiple list and represent multi label tensors. In yaml usage, it offers (1,) as the default value. **sparse_ratio** (float, default=0.5): the ratio of sparsity, support [0, 1]. **low** (list or float, default=-128.):low out the tensor value range from[0, 1] to [0, low] or [low, 0] if low < 0, if float, will implement all tensors with same low value. **high** (list or float, default=127.):high the tensor value by add all tensor element value high. If list, length of list should be same with shape list **dtype** (list or str, default='float32'):support multi tensor dtype setting. If list, length of list should be same with shape list, if str, all tensors will use same dtype. dtype support 'float32', 'float16', 'uint8', 'int8', 'int32', 'int64', 'bool' **transform** (transform object, default=None): dummy dataset does not need transform. If transform is not None, it will ignore it. **filter** (Filter objects, default=None): filter out examples according to specific conditions | This dataset is to construct a dataset from a specific shape, the value range is calculated from: low * stand_normal(0, 1) + high. | **In yaml file:** dataset: sparse_dummy_v2: dense_shape: [224, 224, 3] label_shape: [1] sparse_ratio: 0.5 low: 0.0 high: 127.0 dtype: float32 **In user code:** from neural_compressor.data import Datasets datasets = Datasets(framework) dataset = datasets['sparse_dummy_v2'] (dense_shape, label_shape, sparse_ratio, low, high, dtype, transform=None, filter=None) |
-
-## Get start with Dataset API
-
-### Config dataloader in a yaml file
-
-```yaml
-quantization:
- approach: post_training_static_quant
- calibration:
- dataloader:
- dataset:
- COCORaw:
- root: /path/to/calibration/dataset
- filter:
- LabelBalance:
- size: 1
- transform:
- Resize:
- size: 300
-
-evaluation:
- accuracy:
- metric:
- ...
- dataloader:
- batch_size: 16
- dataset:
- COCORaw:
- root: /path/to/evaluation/dataset
- transform:
- Resize:
- size: 300
- performance:
- dataloader:
- batch_size: 16
- dataset:
- dummy_v2:
- input_shape: [224, 224, 3]
-```
-
-## User-specific dataset
-
-Users can register their own datasets as follows:
-
-```python
-class Dataset(object):
- def __init__(self, args):
- # init code here
-
- def __getitem__(self, idx):
- # use idx to get data and label
- return data, label
-
- def __len__(self):
- return len
-
-```
-
-After defining the dataset class, pass it to the quantizer:
-
-```python
-from neural_compressor.experimental import Quantization, common
-
-quantizer = Quantization(yaml_file)
-quantizer.calib_dataloader = common.DataLoader(
- dataset
-) # user can pass more optional args to dataloader such as batch_size and collate_fn
-quantizer.model = graph
-quantizer.eval_func = eval_func
-q_model = quantizer.fit()
-```
-
-## Examples
-
-- Refer to this [example](https://github.com/intel/neural-compressor/tree/v1.14.2/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq) to learn how to define a customised dataset.
-
-- Refer to this [HelloWorld example](/examples/helloworld/tf_example6) to learn how to configure a built-in dataset.
diff --git a/docs/source/diagnosis.md b/docs/source/diagnosis.md
deleted file mode 100644
index 9e70d695489..00000000000
--- a/docs/source/diagnosis.md
+++ /dev/null
@@ -1,197 +0,0 @@
-# Diagnosis
-1. [Diagnosis Introduction](#diagnosis-introduction)
-2. [Supported Feature Matrix](#supported-feature-matrix)
-3. [Get Started](#get-started)
-4. [Example](#example)
-5. [Step by Step Diagnosis Example with TensorFlow](https://github.com/intel/neural-compressor/tree/master/neural_insights/docs/source/tf_accuracy_debug.md)
-5. [Step by Step Diagnosis Example with ONNXRT](https://github.com/intel/neural-compressor/tree/master/neural_insights/docs/source/onnx_accuracy_debug.md)
-
-# Diagnosis Introduction
-The diagnosis feature provides methods to debug the accuracy loss during quantization and profile the performance gap during benchmark.
-There are 2 ways to diagnose a model with Intel® Neural Compressor. First is non-GUI mode that is described below and second is GUI mode with [Neural Insights](https://github.com/intel/neural-compressor/tree/master/neural_insights) component.
-
-The workflow is described in the diagram below. First we have to configure scripts with diagnosis, then run them and check diagnosis info in the terminal. Test if the result is satisfying and repeat the steps if needed.
-![workflow](./imgs/workflow.jpg)
-
-# Supported Feature Matrix
-
-
-# Get Started
-## Install Intel® Neural Compressor
-First you need to install Intel® Neural Compressor.
-```shell
-git clone https://github.com/intel/neural-compressor.git
-cd neural-compressor
-pip install -r requirements.txt
-python setup.py install
-```
-
-## Modify script
-Modify quantization/benchmark script to run diagnosis by adding argument `diagnosis` set to `True` to `PostTrainingQuantConfig`/`BenchmarkConfig` as shown below.
-
-### Quantization diagnosis
-```python
-config = PostTrainingQuantConfig(diagnosis=True, ...)
-```
-
-### Benchmark diagnosis
-```python
-config = BenchmarkConfig(diagnosis=True, ...)
-```
-
-# Example
-Below it is explained how to run diagnosis for ONNX ResNet50 model.
-
-## Prepare dataset
-
-Download dataset [ILSVR2012 validation Imagenet dataset](http://www.image-net.org/challenges/LSVRC/2012/downloads).
-
-Download label:
-```shell
-wget http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz
-tar -xvzf caffe_ilsvrc12.tar.gz val.txt
-```
-
-## Run quantization script
-Then execute script with quantization API in another terminal with --diagnose flag.
-```shell
-python examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py \
- --model_path=/path/to/resnet50_v1.onnx/ \
- --dataset_location=/path/to/ImageNet/ \
- --label_path=/path/to/val.txt/
- --tune
- --diagnose
-```
-
-## Run benchmark script
-To run profiling execute script with parameters shown in the command below.
-```shell
-python examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py \
- --model_path=/path/to/resnet50_v1.onnx/ \
- --dataset_location=/path/to/ImageNet/ \
- --label_path=/path/to/val.txt/
- --mode=performance \
- --benchmark \
- --diagnose
-```
-
-
-## See quantization data
-
-After script's execution you will see the results in your terminal.
-In the activations summary you can see a table with OP name, MSE (mean squared error), activation minimum and maximum sorted by MSE.
-
-![activations](./imgs/terminal-ops.jpg)
-
-In the weights summary table there are parameters like minimum, maximum, mean, standard deviation and variance for input model. The table is also sorted by MSE.
-
-![weights](./imgs/terminal-weights.jpg)
-
-## How to do diagnosis
-Neural Compressor diagnosis mode provides weights and activation data that includes several useful metrics for diagnosing potential losses of model accuracy.
-
-### Parameter description
-Data is presented in the terminal in form of table where each row describes single OP in the model. We present such quantities measures like:
-
-**MSE - Mean Squared Error** - it is a metric that measures how big is the difference between input and optimized model's weights for specific OP.
-
-$$
-MSE = \sum_{i=1}^{n}(x_i-y_i)^2
-$$
-
-**Input model min** - minimum value of the input OP tensor data
-
-$$
-\min{\vec{x}}
-$$
-
-**Input model max** - maximum value of the input OP tensor data
-
-$$
-\max{\vec{x}}
-$$
-
-**Input model mean** - mean value of the input OP tensor data
-
-$$
-\mu =\frac{1}{n} \sum_{i=1}^{n} x_{i}
-$$
-
-**Input model standard deviation** - standard deviation of the input OP tensor data
-
-$$
-\sigma =\sqrt{\frac{1}{n}\sum\limits_{i=1}^n (x_i - \mu)}
-$$
-
-**Input model variance** - variance of the input OP tensor data
-
-$$
-Var = \sigma^2
-$$
-
-where,
-$x_i$ - input OP tensor data,
-$y_i$ - optimized OP tensor data,
-$\mu_x$ - input model mean,
-$\sigma_x$ - input model variance
-
-### Diagnosis suggestions
-1. Check the nodes with MSE order. High MSE usually means higher possibility of accuracy loss happened during the quantization, so fallback those Ops may get some accuracy back.
-2. Check the Min-Max data range. An dispersed data range usually means higher accuracy loss, so we can also try to full back those Ops.
-3. Check with the other data and find some outliers, and try to fallback some Ops and test for the quantization accuracy.
-
-*Note: We can't always trust the debug rules, it's only a reference, sometimes the accuracy regression is hard to explain.*
-
-### Fallback setting example
-```python
-from neural_compressor import quantization, PostTrainingQuantConfig
-
-op_name_dict = {"v0/cg/conv0/conv2d/Conv2D": {"activation": {"dtype": ["fp32"]}}}
-config = PostTrainingQuantConfig(
- diagnosis=True,
- op_name_dict=op_name_dict,
-)
-q_model = quantization.fit(
- model,
- config,
- calib_dataloader=dataloader,
- eval_func=eval,
-)
-```
-
-## See profiling data
-
-In profiling section there is a table with nodes sorted by total execution time. It is possible to check which operations take the most time.
-
-![profiling](./imgs/terminal-profiling.jpg)
diff --git a/docs/source/distillation.md b/docs/source/distillation.md
deleted file mode 100644
index 7e2d6b063ff..00000000000
--- a/docs/source/distillation.md
+++ /dev/null
@@ -1,129 +0,0 @@
-Distillation
-============
-
-1. [Introduction](#introduction)
-
- 1.1. [Knowledge Distillation](#knowledge-distillation)
-
- 1.2. [Intermediate Layer Knowledge Distillation](#intermediate-layer-knowledge-distillation)
-
- 1.3. [Self Distillation](#self-distillation)
-
-2. [Distillation Support Matrix](#distillation-support-matrix)
-3. [Get Started with Distillation API ](#get-started-with-distillation-api)
-4. [Examples](#examples)
-
-## Introduction
-
-Distillation is one of popular approaches of network compression, which transfers knowledge from a large model to a smaller one without loss of validity. As smaller models are less expensive to evaluate, they can be deployed on less powerful hardware (such as a mobile device). Graph shown below is the workflow of the distillation, the teacher model will take the same input that feed into the student model to produce the output that contains knowledge of the teacher model to instruct the student model.
-
-
-
-
-Intel® Neural Compressor supports Knowledge Distillation, Intermediate Layer Knowledge Distillation and Self Distillation algorithms.
-
-### Knowledge Distillation
-Knowledge distillation is proposed in [Distilling the Knowledge in a Neural Network](https://arxiv.org/abs/1503.02531). It leverages the logits (the input of softmax in the classification tasks) of teacher and student model to minimize the the difference between their predicted class distributions, this can be done by minimizing the below loss function.
-
-$$L_{KD} = D(z_t, z_s)$$
-
-Where $D$ is a distance measurement, e.g. Euclidean distance and Kullback–Leibler divergence, $z_t$ and $z_s$ are the logits of teacher and student model, or predicted distributions from softmax of the logits in case the distance is measured in terms of distribution.
-
-### Intermediate Layer Knowledge Distillation
-
-There are more information contained in the teacher model beside its logits, for example, the output features of the teacher model's intermediate layers often been used to guide the student model, as in [Patient Knowledge Distillation for BERT Model Compression](https://arxiv.org/pdf/1908.09355) and [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984). The general loss function for this approach can be summarized as follow.
-
-$$L_{KD} = \sum\limits_i D(T_t^{n_i}(F_t^{n_i}), T_s^{m_i}(F_s^{m_i}))$$
-
-Where $D$ is a distance measurement as before, $F_t^{n_i}$ the output feature of the $n_i$'s layer of the teacher model, $F_s^{m_i}$ the output feature of the $m_i$'s layer of the student model. Since the dimensions of $F_t^{n_i}$ and $F_s^{m_i}$ are usually different, the transformations $T_t^{n_i}$ and $T_s^{m_i}$ are needed to match dimensions of the two features. Specifically, the transformation can take the forms like identity, linear transformation, 1X1 convolution etc.
-
-### Self Distillation
-
-Self-distillation ia a one-stage training method where the teacher model and student models can be trained together. It attaches several attention modules and shallow classifiers at different depths of neural networks and distills knowledge from the deepest classifier to the shallower classifiers. Different from the conventional knowledge distillation methods where the knowledge of the teacher model is transferred to another student model, self-distillation can be considered as knowledge transfer in the same model, from the deeper layers to the shallower layers.
-The additional classifiers in self-distillation allow the neural network to work in a dynamic manner, which leads to a much higher acceleration.
-
-
-
-
-Architecture from paper [Self-Distillation: Towards Efficient and Compact Neural Networks](https://ieeexplore.ieee.org/document/9381661)
-
-## Distillation Support Matrix
-
-|Distillation Algorithm |PyTorch |TensorFlow |
-|------------------------------------------------|:--------:|:---------:|
-|Knowledge Distillation |✔ |✔ |
-|Intermediate Layer Knowledge Distillation |✔ |Will be supported|
-|Self Distillation |✔ |✖ |
-
-## Get Started with Distillation API
-
-User can pass the customized training/evaluation functions to `Distillation` for flexible scenarios. In this case, distillation process can be done by pre-defined hooks in Neural Compressor. User needs to put those hooks inside the training function.
-
-Neural Compressor defines several hooks for user pass
-
-```
-on_train_begin() : Hook executed before training begins
-on_after_compute_loss(input, student_output, student_loss) : Hook executed after each batch inference of student model
-on_epoch_end() : Hook executed at each epoch end
-```
-
-Following section shows how to use hooks in user pass-in training function:
-
-```python
-def training_func_for_nc(model):
- compression_manager.on_train_begin()
- for epoch in range(epochs):
- compression_manager.on_epoch_begin(epoch)
- for i, batch in enumerate(dataloader):
- compression_manager.on_step_begin(i)
- ......
- output = model(batch)
- loss = ......
- loss = compression_manager.on_after_compute_loss(batch, output, loss)
- loss.backward()
- compression_manager.on_before_optimizer_step()
- optimizer.step()
- compression_manager.on_step_end()
- compression_manager.on_epoch_end()
- compression_manager.on_train_end()
-
-...
-```
-
-In this case, the launcher code for Knowledge Distillation is like the following:
-
-```python
-from neural_compressor.training import prepare_compression
-from neural_compressor.config import DistillationConfig, KnowledgeDistillationLossConfig
-
-distil_loss_conf = KnowledgeDistillationLossConfig()
-conf = DistillationConfig(teacher_model=teacher_model, criterion=distil_loss_conf)
-criterion = nn.CrossEntropyLoss()
-optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
-compression_manager = prepare_compression(model, conf)
-model = compression_manager.model
-
-model = training_func_for_nc(model)
-eval_func(model)
-```
-
-For Intermediate Layer Knowledge Distillation or Self Distillation, the only difference to above launcher code is that `distil_loss_conf` should be set accordingly as shown below. More detailed settings can be found in this [example](../../examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/run_glue_no_trainer.py#L510) for Intermediate Layer Knowledge Distillation and this [example](../../examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/main.py#L344) for Self Distillation.
-
-```python
-from neural_compressor.config import (
- IntermediateLayersKnowledgeDistillationLossConfig,
- SelfKnowledgeDistillationLossConfig,
-)
-
-# for Intermediate Layer Knowledge Distillation
-distil_loss_conf = IntermediateLayersKnowledgeDistillationLossConfig(layer_mappings=layer_mappings)
-
-# for Self Distillation
-distil_loss_conf = SelfKnowledgeDistillationLossConfig(layer_mappings=layer_mappings)
-```
-## Examples
-[Distillation PyTorch Examples](../../examples/README.md#distillation-1)
-
-[Distillation TensorFlow Examples](../../examples/README.md#distillation)
-
-[Distillation Examples Results](./validated_model_list.md#validated-knowledge-distillation-examples)
diff --git a/docs/source/faq.md b/docs/source/faq.md
index b13f1784365..7f2e732022a 100644
--- a/docs/source/faq.md
+++ b/docs/source/faq.md
@@ -17,3 +17,12 @@ ImportError: libGL.so.1: cannot open shared object file: No such file or directo
#### Issue 4:
Conda package *neural-compressor-full* (this binary is only available from v1.13 to v2.1.1) dependency conflict may pending on conda installation for a long time.
**Solution:** run *conda install sqlalchemy=1.4.27 alembic=1.7.7 -c conda-forge* before install *neural-compressor-full*.
+#### Issue 5:
+If you run 3X torch extension API inside a docker container, then you may encounter the following error:
+```shell
+ValueError: No threading layer could be loaded.
+HINT:
+Intel TBB is required, try:
+$ conda/pip install tbb
+```
+**Solution:** It's actually already installed by `requirements_pt.txt`, so just need to set up with `export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH`.
diff --git a/docs/source/get_started.md b/docs/source/get_started.md
index 61c22912c41..0ba1e10d111 100644
--- a/docs/source/get_started.md
+++ b/docs/source/get_started.md
@@ -2,35 +2,87 @@
1. [Quick Samples](#quick-samples)
-2. [Validated Models](#validated-models)
+2. [Feature Matrix](#feature-matrix)
## Quick Samples
-### Quantization with Python API
```shell
-# Install Intel Neural Compressor and TensorFlow
-pip install neural-compressor
-pip install tensorflow
-# Prepare fp32 model
-wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb
+# Install Intel Neural Compressor
+pip install neural-compressor-pt
```
```python
-from neural_compressor.data import DataLoader, Datasets
-from neural_compressor.config import PostTrainingQuantConfig
+from transformers import AutoModelForCausalLM
+from neural_compressor.torch.quantization import RTNConfig, prepare, convert
-dataset = Datasets("tensorflow")["dummy"](shape=(1, 224, 224, 3))
-dataloader = DataLoader(framework="tensorflow", dataset=dataset)
+user_model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-125m")
+quant_config = RTNConfig()
+prepared_model = prepare(model=user_model, quant_config=quant_config)
+quantized_model = convert(model=prepared_model)
+```
-from neural_compressor.quantization import fit
+## Feature Matrix
+Intel Neural Compressor 3.X extends PyTorch and TensorFlow's APIs to support compression techniques.
+The below table provides a quick overview of the APIs available in Intel Neural Compressor 3.X.
+The Intel Neural Compressor 3.X mainly focuses on quantization-related features, especially for algorithms that benefit LLM accuracy and inference.
+It also provides some common modules across different frameworks. For example, Auto-tune support accuracy driven quantization and mixed precision, benchmark aimed to measure the multiple instances performance of the quantized model.
-q_model = fit(
- model="./mobilenet_v1_1.0_224_frozen.pb",
- conf=PostTrainingQuantConfig(),
- calib_dataloader=dataloader,
-)
-```
+
-## Validated Models
-Intel® Neural Compressor validated the quantization for 10K+ models from popular model hubs (e.g., HuggingFace Transformers, Torchvision, TensorFlow Model Hub, ONNX Model Zoo).
-Over 30 pruning, knowledge distillation and model export samples are also available.
-More details for validated typical models are available [here](/examples/README.md).
+> **Note**:
+> From 3.0 release, we recommend to use 3.X API. Compression techniques during training such as QAT, Pruning, Distillation only available in [2.X API](https://github.com/intel/neural-compressor/blob/master/docs/source/2x_user_guide.md) currently.
diff --git a/docs/source/imgs/dynas.png b/docs/source/imgs/dynas.png
deleted file mode 100644
index e82a1c03160..00000000000
Binary files a/docs/source/imgs/dynas.png and /dev/null differ
diff --git a/docs/source/imgs/release_data.png b/docs/source/imgs/release_data.png
deleted file mode 100644
index 5e06eb067e1..00000000000
Binary files a/docs/source/imgs/release_data.png and /dev/null differ
diff --git a/docs/source/imgs/tensorboard_baseline_v0_cg_conv0.png b/docs/source/imgs/tensorboard_baseline_v0_cg_conv0.png
deleted file mode 100644
index daa3036c516..00000000000
Binary files a/docs/source/imgs/tensorboard_baseline_v0_cg_conv0.png and /dev/null differ
diff --git a/docs/source/imgs/tensorboard_tune_1_v0_cg_conv0.png b/docs/source/imgs/tensorboard_tune_1_v0_cg_conv0.png
deleted file mode 100644
index 6086a6eb837..00000000000
Binary files a/docs/source/imgs/tensorboard_tune_1_v0_cg_conv0.png and /dev/null differ
diff --git a/docs/source/imgs/tensorboard_v0_cg_conv0_histogram.png b/docs/source/imgs/tensorboard_v0_cg_conv0_histogram.png
deleted file mode 100644
index 6f5d52a5cb8..00000000000
Binary files a/docs/source/imgs/tensorboard_v0_cg_conv0_histogram.png and /dev/null differ
diff --git a/docs/source/imgs/terminal-ops.jpg b/docs/source/imgs/terminal-ops.jpg
deleted file mode 100644
index 32bfee29110..00000000000
Binary files a/docs/source/imgs/terminal-ops.jpg and /dev/null differ
diff --git a/docs/source/imgs/terminal-profiling.jpg b/docs/source/imgs/terminal-profiling.jpg
deleted file mode 100644
index 642a64ab446..00000000000
Binary files a/docs/source/imgs/terminal-profiling.jpg and /dev/null differ
diff --git a/docs/source/imgs/terminal-weights.jpg b/docs/source/imgs/terminal-weights.jpg
deleted file mode 100644
index 3fe8501017b..00000000000
Binary files a/docs/source/imgs/terminal-weights.jpg and /dev/null differ
diff --git a/docs/source/imgs/tutorial.png b/docs/source/imgs/tutorial.png
deleted file mode 100644
index 1c57041b0ce..00000000000
Binary files a/docs/source/imgs/tutorial.png and /dev/null differ
diff --git a/docs/source/imgs/workflow.jpg b/docs/source/imgs/workflow.jpg
deleted file mode 100644
index c40f02b99b9..00000000000
Binary files a/docs/source/imgs/workflow.jpg and /dev/null differ
diff --git a/docs/source/infrastructure.md b/docs/source/infrastructure.md
index 96f0ea3fca8..cfa1912c571 100644
--- a/docs/source/infrastructure.md
+++ b/docs/source/infrastructure.md
@@ -182,19 +182,6 @@ Intel® Neural Compressor has unified interfaces which dispatch tasks to differe
-
-
-
-[Neural architecture search](NAS.md):
-|Approach |Framework |
-|------------------------------------------------|:-----------:|
-|Basic |PyTorch |
-|DyNas |PyTorch |
-
-
-
-
-
[Mixed precision](mixed_precision.md):
|Framework | |
|--------------|:-----------:|
diff --git a/docs/source/installation_guide.md b/docs/source/installation_guide.md
index d73f885d403..ca33138b2a4 100644
--- a/docs/source/installation_guide.md
+++ b/docs/source/installation_guide.md
@@ -27,43 +27,49 @@ The following prerequisites and requirements must be satisfied for a successful
> Notes:
> - If you get some build issues, please check [frequently asked questions](faq.md) at first.
-### Install from Binary
-- Install from Pypi
- ```Shell
- # install stable basic version from pypi
- pip install neural-compressor
- ```
- ```Shell
- # [Experimental] install stable basic + PyTorch framework extension API from pypi
- pip install neural-compressor[pt]
- ```
- ```Shell
- # [Experimental] install stable basic + TensorFlow framework extension API from pypi
- pip install neural-compressor[tf]
- ```
+### Install Framework
+#### Install torch for CPU
+```Shell
+pip install torch --index-url https://download.pytorch.org/whl/cpu
+```
+#### Use Docker Image with torch installed for HPU
+https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click
-- Install from test Pypi
- ```Shell
- # install nightly version
- git clone https://github.com/intel/neural-compressor.git
- cd neural-compressor
- pip install -r requirements.txt
- # install nightly basic version from pypi
- pip install -i https://test.pypi.org/simple/ neural-compressor
- ```
+#### Install torch/intel_extension_for_pytorch for Intel GPU
+https://intel.github.io/intel-extension-for-pytorch/index.html#installation
-- Install from Conda
- ```Shell
- # install on Linux OS
- conda install opencv-python-headless -c fastai
- conda install neural-compressor -c conda-forge -c intel
- ```
- ```Shell
- # install on Windows OS
- conda install pycocotools -c esri
- conda install opencv-python-headless -c fastai
- conda install neural-compressor -c conda-forge -c intel
- ```
+#### Install torch for other platform
+https://pytorch.org/get-started/locally
+
+#### Install tensorflow
+```Shell
+pip install tensorflow
+```
+
+### Install from Binary
+- Install from Pypi
+```Shell
+# Install 2.X API + Framework extension API + PyTorch dependency
+pip install neural-compressor[pt]
+```
+```Shell
+# Install 2.X API + Framework extension API + TensorFlow dependency
+pip install neural-compressor[tf]
+```
+```Shell
+# Install 2.X API + Framework extension API
+# With this install CMD, some dependencies for framework extension API not installed,
+# you can install them separately by `pip install -r requirements_pt.txt` or `pip install -r requirements_tf.txt`.
+pip install neural-compressor
+```
+```Shell
+# Framework extension API + TensorFlow dependency
+pip install neural-compressor-pt
+```
+```Shell
+# Framework extension API + TensorFlow dependency
+pip install neural-compressor-tf
+```
### Install from Source
@@ -71,8 +77,9 @@ The following prerequisites and requirements must be satisfied for a successful
git clone https://github.com/intel/neural-compressor.git
cd neural-compressor
pip install -r requirements.txt
- # build with basic functionality
python setup.py install
+ [optional] pip install -r requirements_pt.txt # for PyTorch framework extension API
+ [optional] pip install -r requirements_tf.txt # for TensorFlow framework extension API
```
### Install from AI Kit
@@ -88,15 +95,20 @@ The AI Kit is distributed through many common channels, including from Intel's w
## System Requirements
### Validated Hardware Environment
+
+#### Intel® Neural Compressor supports HPUs based on heterogeneous architecture with two compute engines (MME and TPC):
+* Intel Gaudi Al Accelerators (Gaudi2)
+
#### Intel® Neural Compressor supports CPUs based on [Intel 64 architecture or compatible processors](https://en.wikipedia.org/wiki/X86-64):
-* Intel Xeon Scalable processor (formerly Skylake, Cascade Lake, Cooper Lake, Ice Lake, and Sapphire Rapids)
-* Intel Xeon CPU Max Series (formerly Sapphire Rapids HBM)
+* Intel Xeon Scalable processor (Skylake, Cascade Lake, Cooper Lake, Ice Lake, and Sapphire Rapids)
+* Intel Xeon CPU Max Series (Sapphire Rapids HBM)
+* Intel Core Ultra Processors (Meteor Lake)
#### Intel® Neural Compressor supports GPUs built on Intel's Xe architecture:
-* Intel Data Center GPU Flex Series (formerly Arctic Sound-M)
-* Intel Data Center GPU Max Series (formerly Ponte Vecchio)
+* Intel Data Center GPU Flex Series (Arctic Sound-M)
+* Intel Data Center GPU Max Series (Ponte Vecchio)
#### Intel® Neural Compressor quantized ONNX models support multiple hardware vendors through ONNX Runtime:
@@ -112,7 +124,6 @@ The AI Kit is distributed through many common channels, including from Intel's w
Framework
TensorFlow
- Intel TensorFlow
Intel® Extension for TensorFlow*
PyTorch
Intel® Extension for PyTorch*
@@ -122,25 +133,26 @@ The AI Kit is distributed through many common channels, including from Intel's w
Version
- 2.15.0
- 2.14.1
- 2.13.1
- 2.14.0
- 2.13.0
- 2.14.0.1
+
+ 2.16.1
+ 2.15.0
+ 2.14.1
+
+ 2.15.0.0
+ 2.14.0.1
2.13.0.0
- 2.2.1
- 2.1.0
- 2.0.1
- 2.2.0
- 2.1.100
- 2.0.100
- 1.17.1
- 1.16.3
- 1.15.1
+
+ 2.3.0
+ 2.2.2
+ 2.1.1
+
+ 2.3.0
+ 2.2.0
+ 2.1.100
+
+ 1.18.0
+ 1.17.3
+ 1.16.3
-
-> **Note:**
-> Set the environment variable ``TF_ENABLE_ONEDNN_OPTS=1`` to enable oneDNN optimizations if you are using TensorFlow before v2.9. oneDNN is the default for TensorFlow since [v2.9](https://github.com/tensorflow/tensorflow/releases/tag/v2.9.0) ([Intel Cascade Lake](https://www.intel.com/content/www/us/en/products/platforms/details/cascade-lake.html) and newer CPUs).
diff --git a/docs/source/llm_recipes.md b/docs/source/llm_recipes.md
index 5f04242516b..8a9c17e7cd7 100644
--- a/docs/source/llm_recipes.md
+++ b/docs/source/llm_recipes.md
@@ -17,8 +17,8 @@ This document aims to publish the specific recipes we achieved for the popular L
| EleutherAI/gpt-j-6b | ✔ | ✔ | ✔ |
| facebook/opt-1.3b | ✔ | ✔ | ✔ |
| facebook/opt-30b | ✔ | ✔ | ✔ |
-| meta-llama/Llama-2-7b-hf | ✔ | ✔ | ✔ |
-| meta-llama/Llama-2-13b-hf | ✔ | ✔ | ✔ |
+| meta-llama/Llama-2-7b-hf | WIP | ✔ | ✔ |
+| meta-llama/Llama-2-13b-hf | WIP | ✔ | ✔ |
| meta-llama/Llama-2-70b-hf | ✔ | ✔ | ✔ |
| tiiuae/falcon-7b | ✔ | ✔ | ✔ |
| tiiuae/falcon-40b | ✔ | ✔ | ✔ |
@@ -29,7 +29,7 @@ This document aims to publish the specific recipes we achieved for the popular L
| databricks/dolly-v2-12b | ✖ | ✔ | ✖ |
| EleutherAI/gpt-neox-20b | ✖ | ✔ | ✔ |
| mistralai/Mistral-7B-v0.1 | ✖ | ✔ | ✔ |
-| THUDM/chatglm2-6b | WIP | ✔ | ✔ |
+| THUDM/chatglm2-6b | WIP | ✔ | WIP |
| THUDM/chatglm3-6b | WIP | ✔ | ✔ |
**Detail recipes can be found [HERE](https://github.com/intel/intel-extension-for-transformers/blob/main/examples/huggingface/pytorch/text-generation/quantization/llm_quantization_recipes.md).**
@@ -40,8 +40,8 @@ This document aims to publish the specific recipes we achieved for the popular L
> - The WIP recipes will be published soon.
## Large Language Models Accuracy
-
-
+
+
Model
lambada_openai
@@ -63,212 +63,210 @@ This document aims to publish the specific recipes we achieved for the popular L
Ratio
ACC
Ratio
-
-
+
baichuan-inc/Baichuan-13B-Chat
67.57%
- 68.23%
- 1.0098
- 67.57%
- 1.0000
- 67.84%
- 1.0040
- NA
- NA
+ 67.86%
+ 1.0043
+ 67.55%
+ 0.9997
+ 67.46%
+ 0.9984
+ N/A
+ N/A
baichuan-inc/Baichuan2-13B-Chat
71.51%
- 70.89%
- 0.9913
- 71.53%
- 1.0003
- 71.76%
- 1.0035
- NA
- NA
+ 75.51%
+ 1.0559
+ 71.57%
+ 1.0008
+ 71.45%
+ 0.9992
+ 70.87%
+ 0.9911
baichuan-inc/Baichuan2-7B-Chat
67.67%
- 67.96%
- 1.0043
- 67.59%
- 0.9988
- 67.24%
- 0.9936
- 67.42%
- 0.9963
+ 67.51%
+ 0.9976
+ 67.61%
+ 0.9991
+ 68.08%
+ 1.0061
+ 67.18%
+ 0.9928
bigscience/bloom-1b7
46.34%
- 47.99%
- 1.0356
- 46.38%
- 1.0009
- 46.19%
- 0.9968
- NA
- NA
+ 47.97%
+ 1.0352
+ 46.21%
+ 0.9972
+ 47.00%
+ 1.0142
+ N/A
+ N/A
databricks/dolly-v2-12b
64.35%
- NA
- NA
- 64.10%
- 0.9961
- NA
- NA
- NA
- NA
+ N/A
+ N/A
+ 63.92%
+ 0.9933
+ N/A
+ N/A
+ N/A
+ N/A
EleutherAI/gpt-j-6b
68.31%
- 68.33%
- 1.0003
+ 68.00%
+ 0.9955
+ 68.27%
+ 0.9994
68.23%
0.9988
- 68.79%
- 1.0070
- 68.43%
- 1.0018
+ 67.40%
+ 0.9867
EleutherAI/gpt-neox-20b
72.33%
- NA
- NA
- 72.25%
- 0.9989
- 71.96%
- 0.9949
- NA
- NA
+ N/A
+ N/A
+ 72.29%
+ 0.9994
+ 72.15%
+ 0.9975
+ N/A
+ N/A
facebook/opt-1.3b
57.89%
- 57.54%
- 0.9940
- 58.08%
- 1.0033
- 58.57%
- 1.0117
- NA
- NA
+ 57.35%
+ 0.9907
+ 58.12%
+ 1.0040
+ 58.01%
+ 1.0021
+ N/A
+ N/A
facebook/opt-30b
71.49%
71.51%
1.0003
- 71.51%
- 1.0003
+ 71.53%
+ 1.0006
71.82%
1.0046
- 72.11%
- 1.0087
+ 71.43%
+ 0.9992
meta-llama/Llama-2-13b-hf
76.77%
- 76.25%
- 0.9932
- 76.75%
- 0.9997
- 77.43%
- 1.0086
- 76.75%
- 0.9997
+ N/A
+ N/A
+ 76.89%
+ 1.0016
+ 76.96%
+ 1.0025
+ N/A
+ N/A
meta-llama/Llama-2-70b-hf
79.64%
- 79.55%
- 0.9989
- 79.57%
- 0.9991
- 80.09%
- 1.0057
- 79.97%
- 1.0041
+ 79.53%
+ 0.9986
+ 79.62%
+ 0.9997
+ 80.05%
+ 1.0051
+ N/A
+ N/A
meta-llama/Llama-2-7b-hf
73.92%
- 73.45%
- 0.9936
- 73.96%
- 1.0005
- 73.45%
- 0.9936
- 73.49%
- 0.9942
+ N/A
+ N/A
+ 73.90%
+ 0.9997
+ 73.51%
+ 0.9945
+ N/A
+ N/A
mistralai/Mistral-7B-v0.1
75.90%
- NA
- NA
+ N/A
+ N/A
75.80%
0.9987
- 76.13%
- 1.0030
- 75.61%
- 0.9962
+ 75.37%
+ 0.9930
+ 75.82%
+ 0.9989
THUDM/chatglm2-6b
53.23%
- NA
- NA
- 53.19%
- 0.9992
- 52.77%
- 0.9914
- 53.35%
- 1.0023
+ N/A
+ N/A
+ 53.00%
+ 0.9957
+ N/A
+ N/A
+ N/A
+ N/A
THUDM/chatglm3-6b
59.09%
- NA
- NA
- 59.01%
- 0.9986
- NA
- NA
- 58.61%
- 0.9919
+ N/A
+ N/A
+ 59.03%
+ 0.9990
+ N/A
+ N/A
+ 58.59%
+ 0.9915
tiiuae/falcon-40b
77.22%
- 77.04%
- 0.9977
- 77.22%
- 1.0000
- 77.94%
- 1.0093
- 78.79%
- 1.0203
+ 77.26%
+ 1.0005
+ 77.18%
+ 0.9995
+ 77.97%
+ 1.0097
+ N/A
+ N/A
tiiuae/falcon-7b
74.67%
- 76.44%
- 1.0237
- 74.77%
- 1.0013
- 75.00%
- 1.0044
- NA
- NA
+ 76.17%
+ 1.0201
+ 74.73%
+ 1.0008
+ 74.79%
+ 1.0016
+ N/A
+ N/A
-
-
+
diff --git a/docs/source/migration.md b/docs/source/migration.md
index ba5f654e41c..b4d087c890f 100644
--- a/docs/source/migration.md
+++ b/docs/source/migration.md
@@ -195,7 +195,6 @@ QuantizationAwareTrainingConfig(
objective="performance", # tuning.objective: same as in the conf.yaml;
performance_only=False, # tuning.performance_only: same as in the conf.yaml;
## tuning.random_seed and tuning.tensorboard: these parameters do not need to specially be defined;
- ## diagnosis: these parameters do not need to specially be defined;
)
```
diff --git a/docs/source/pythonic_style.md b/docs/source/pythonic_style.md
deleted file mode 100644
index d036e9775d5..00000000000
--- a/docs/source/pythonic_style.md
+++ /dev/null
@@ -1,146 +0,0 @@
-Pythonic Style Access for Configurations
-====
-
-1. [Introduction](#introduction)
-2. [Supported Feature Matrix](#supported-feature-matrix)
-3. [Get Started with Pythonic API for Configurations](#get-started-with-pythonic-api-for-configurations)
-
-## Introduction
-To meet the variety of needs arising from various circumstances, INC now provides a
-pythonic style access - Pythonic API - for same purpose of either user or framework configurations.
-
-The Pythonic API for Configuration allows users to specify configurations
-directly in their python codes without referring to
-a separate YAML file. While we support both simultaneously,
-the Pythonic API for Configurations has several advantages over YAML files,
-which one can tell from usages in the context below. Hence, we recommend
-users to use the Pythonic API for Configurations moving forward.
-
-## Supported Feature Matrix
-
-### Pythonic API for User Configurations
-| Optimization Techniques | Pythonic API |
-|-------------------------|:------------:|
-| Quantization | ✔ |
-| Pruning | ✔ |
-| Distillation | ✔ |
-| NAS | ✔ |
-### Pythonic API for Framework Configurations
-
-| Framework | Pythonic API |
-|------------|:------------:|
-| TensorFlow | ✔ |
-| PyTorch | ✔ |
-| ONNX | ✔ |
-| MXNet | ✔ |
-
-## Get Started with Pythonic API for Configurations
-
-### Pythonic API for User Configurations
-Now, let's go through the Pythonic API for Configurations in the order of
-sections similar as in user YAML files.
-
-#### Quantization
-
-To specify quantization configurations, users can use the following
-Pythonic API step by step.
-
-* First, load the ***config*** module
-```python
-from neural_compressor import config
-```
-* Next, assign values to the attributes of *config.quantization* to use specific configurations, and pass the config to *Quantization* API.
-```python
-config.quantization.inputs = ["image"] # list of str
-config.quantization.outputs = ["out"] # list of str
-config.quantization.backend = "onnxrt_integerops" # support tensorflow, tensorflow_itex, pytorch, pytorch_ipex, pytorch_fx, onnxrt_qlinearops, onnxrt_integerops, onnxrt_qdq, onnxrt_qoperator, mxnet
-config.quantization.approach = "post_training_dynamic_quant" # support post_training_static_quant, post_training_dynamic_quant, quant_aware_training
-config.quantization.device = "cpu" # support cpu, gpu
-config.quantization.op_type_dict = {"Conv": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}} # dict
-config.quantization.strategy = "mse" # support basic, mse, bayesian, random, exhaustive
-config.quantization.objective = "accuracy" # support performance, accuracy, modelsize, footprint
-config.quantization.timeout = 100 # int, default is 0
-config.quantization.accuracy_criterion.relative = 0.5 # float, default is 0.01
-config.quantization.reduce_range = (
- False # bool. default value depends on hardware, True if cpu supports VNNI instruction, otherwise is False
-)
-config.quantization.use_bf16 = False # bool
-from neural_compressor.experimental import Quantization
-
-quantizer = Quantization(config)
-```
-
-#### Distillation
-To specify distillation configurations, users can assign values to
-the corresponding attributes.
-```python
-from neural_compressor import config
-
-config.distillation.optimizer = {"SGD": {"learning_rate": 0.0001}}
-
-from neural_compressor.experimental import Distillation
-
-distiller = Distillation(config)
-```
-#### Pruning
-To specify pruning configurations, users can assign values to the corresponding attributes.
-```python
-from neural_compressor import config
-
-config.pruning.weight_compression.initial_sparsity = 0.0
-config.pruning.weight_compression.target_sparsity = 0.9
-config.pruning.weight_compression.max_sparsity_ratio_per_layer = 0.98
-config.pruning.weight_compression.prune_type = "basic_magnitude"
-config.pruning.weight_compression.start_epoch = 0
-config.pruning.weight_compression.end_epoch = 3
-config.pruning.weight_compression.start_step = 0
-config.pruning.weight_compression.end_step = 0
-config.pruning.weight_compression.update_frequency = 1.0
-config.pruning.weight_compression.update_frequency_on_step = 1
-config.pruning.weight_compression.prune_domain = "global"
-config.pruning.weight_compression.pattern = "tile_pattern_1x1"
-
-from neural_compressor.experimental import Pruning
-
-prune = Pruning(config)
-```
-#### NAS
-To specify nas configurations, users can assign values to the
-corresponding attributes.
-
-```python
-from neural_compressor import config
-
-config.nas.approach = "dynas"
-from neural_compressor.experimental import NAS
-
-nas = NAS(config)
-```
-
-
-#### Benchmark
-To specify benchmark configurations, users can assign values to the
-corresponding attributes.
-```python
-from neural_compressor import config
-
-config.benchmark.warmup = 10
-config.benchmark.iteration = 10
-config.benchmark.cores_per_instance = 10
-config.benchmark.num_of_instance = 10
-config.benchmark.inter_num_of_threads = 10
-config.benchmark.intra_num_of_threads = 10
-
-from neural_compressor.experimental import Benchmark
-
-benchmark = Benchmark(config)
-```
-### Pythonic API for Framework Configurations
-Now, let's go through the Pythonic API for Configurations in setting up similar framework
-capabilities as in YAML files. Users can specify a framework's (eg. ONNX Runtime) capability by
-assigning values to corresponding attributes.
-
-```python
-config.onnxruntime.precisions = ["int8", "uint8"]
-config.onnxruntime.graph_optimization_level = "DISABLE_ALL" # only onnxruntime has graph_optimization_level attribute
-```
diff --git a/docs/source/releases_info.md b/docs/source/releases_info.md
index a2b2c15c6ca..7d263a0a8db 100644
--- a/docs/source/releases_info.md
+++ b/docs/source/releases_info.md
@@ -17,8 +17,6 @@ Contact [inc.maintainers@intel.com](mailto:inc.maintainers@intel.com) if you nee
The MSE tuning strategy does not work with the PyTorch adaptor layer. This strategy requires a comparison between the FP32 and INT8 tensors to decide which op impacts the final quantization accuracy. The PyTorch adaptor layer does not implement this inspect tensor interface. Therefore, do not choose the MSE tuning strategy for PyTorch models.
-The diagnosis function does not work with ONNX Runtime 1.13.1 for QDQ format quantization of ONNX models. It can not dump the output value of QDQ pairs since framework limitation.
-
## Incompatible Changes
[Neural Compressor v1.2](https://github.com/intel/neural-compressor/tree/v1.2) introduces incompatible changes in user facing APIs. Please refer to [incompatible changes](incompatible_changes.md) to know which incompatible changes are made in v1.2.
diff --git a/docs/source/tensorboard.md b/docs/source/tensorboard.md
deleted file mode 100644
index 670f7930417..00000000000
--- a/docs/source/tensorboard.md
+++ /dev/null
@@ -1,205 +0,0 @@
-TensorBoard
-===========
-1. [Introduction](#introduction)
-2. [Supported Feature Matrix](#supported-feature-matrix)
-3. [Get Started with Tensorboard](#get-started-with-tensorboard)
-4. [Examples](#examples)
-
-## Introduction
-
-TensorBoard is a suite of web applications that provide measurements and visualizations used to inspect and understand your machine learning workflow for [TensorFlow TensorBoard](https://github.com/tensorflow/tensorboard) and [PyTorch TensorBoard](https://github.com/pytorch/pytorch/tree/master/torch/utils/tensorboard). Intel® Neural Compressor performs accuracy-driven quantization; the tuning process quantizes the tensor and performs graph transformation and optimization to achieve optimal performance under accuracy requirement. If you want to observe the behaviors of the optimizations, or if you want to discover why an accuracy target cannot be met, TensorBoard can provide you with some valuable information. You can inspect the graph and tensor after each tuning run. If a model cannot meet accuracy requirements, you can analyze the comparison of FP32 and the INT8 tensor histogram.
-
-We collect the TensorBoard event summary during evaluation. The first time is on the baseline FP32 model and later on at the end of each tuning runs are based on the quantized model. The TensorBoard log directory is named baseline_acc_ and tune__acc_, to indicate the stage and accuracy of the data that is generated. Users can select their data of interest to observe with TensorBoard.
-
-## Supported Feature Matrix
-| Optimized Framework | Tensorboard Support |
-|---------------------|:-------------------:|
-| PyTorch | ✔ |
-| TensorFlow | ✔ |
-
-## Get Started with TensorBoard
-### PyTorch TensorBoard
-
-PyTorch TensorBoard implementation includes three steps:
-
-* Before evaluation in the _pre_eval_hook() where instruments observers are placed in the model.
-* During evaluation where observers collect tensor information in a dict data structure.
-* After evaluation where the graph and tensor information is dumped with the TensorBoard summary writer in _post_eval_hook().
-
-
-The detailed algorithm can be described by the Pseudo code:
-```python
-
-def evaluate(self, model, dataloader, postprocess=None, \
- metric=None, measurer=None, iteration=-1, tensorboard=False):
-# The tensorboard summary is collected in the evaluation function of adaptor
-
- if tensorboard:
- model = self._pre_eval_hook(model)
- #evaluation code
- ....
- acc = metric.result()
- if tensorboard:
- self._post_eval_hook(model, accuracy=acc, input=input)
-
-def _pre_eval_hook(self, model):
-# Insert observer submodule into each module in whitelist in order to collect tensor information
-
- class _RecordingObserver(ABC, torch.nn.Module):
- # Define the Observer class
-
- def forward(self, x):
- # Record the tensor information in a dict structure
- self.output_tensors_dict[self.current_iter] = x.to("cpu")
-
- @torch.jit.export
- def get_tensor_value(self):
- return self.output_tensors_dict
-
- def _observer_forward_hook(module, input, output):
- #Forward hook that calls observer on the output
- return module.activation_post_process(output)
-
- def _add_observer_(module, op_list=None, prefix=""):
-
- #Add observer for each child module
- for name, child in module.named_children():
- _add_observer_(child, op_list, op_name)
-
- if module is a leaf:
- module.add_module(
- 'activation_post_process',
- module.qconfig.activation())
- module.register_forward_hook(_observer_forward_hook)
-
-def _post_eval_hook(self, model, **args):
- # Dump tensor and graph information with TensorBoard summary writer
- if self.dump_times == 0:
- writer = SummaryWriter('runs/eval/baseline' +
- '_acc' + str(accuracy), model)
- else:
- writer = SummaryWriter('runs/eval/tune_' +
- str(self.dump_times) +
- '_acc' + str(accuracy), model)
-
- if args is not None and 'input' in args and self.dump_times == 0:
- writer.add_graph(model, args['input'])
-
- from torch.quantization import get_observer_dict
- get_observer_dict(model, observer_dict)
- for key in observer_dict:
- ......
- op_name = key.strip(".activation_post_process")
- summary[op_name + ".output"] = observer_dict[key].get_tensor_value()
-
- for iter in summary[op_name + ".output"]:
- #Record output tensor, for fused op only record the parent op output
- ......
- if summary[op_name + ".output"][iter].is_quantized:
- writer.add_histogram(
- op + "/Output/int8",
- torch.dequantize(summary[op_name +
- ".output"][iter]))
- else:
- writer.add_histogram(
- op + "/Output/fp32",
- summary[op_name + ".output"][iter])
-
- state_dict = model.state_dict()
- for key in state_dict:
- # Record weight tensor, fused child tensorBoard tag will be merge
- if state_dict[key].is_quantized:
- writer.add_histogram(op + "/int8",
- torch.dequantize(state_dict[key]))
- else:
- writer.add_histogram(op + "/fp32", state_dict[key])
-
-```
-
-
-#### Usage
-
-1. Add "tensorboard: true" in the yaml file.
-2. Run quantization tuning; a "./runs" folder is generated in the working folder.
-3. Start tensorboard:
-
- ``shell
- tensorboard --bind_all --logdir_spec baseline:./runs/eval/tune_0_acc0.80,tune_1:././runs/eval/tune_1_acc0.79
- ``
-
-
-### TensorFlow Tensorboard
-
-TensorFlow TensorBoard implementation includes four steps:
-
-1. Before evaluation where we create the TensorBoard summary write and write graph, collect FP32 and node names for inspection, and dump the histogram of weights and bias tensor directly from graph_def.
-2. Run get_tensor_by_name_with_import() where we get data output tensors.
-3. Run session.run() to predict and get the inference result of the output tensor list collected in the previous step.
-4. Enumerate the output tensor and write the histogram.
-
-See the [tensorflow.py](https://github.com/intel/neural-compressor/tree/master/neural_compressor/adaptor/tensorflow.py) evaluate() function for details.
-
-#### Usage
-
-1. Add "tensorboard: true" in the yaml file.
-
-2. Run quantization tuning; a "./runs" folder is generated in the working folder. For example:
-
- ```shell
- ls ./runs/eval
- baseline_acc_0.776 tune_1_acc_0.095
- ```
- The baseline_acc_0.776 folder contains the FP32 event log and 0.776 is the FP32 accuracy. tune_1_acc_0.095 contains the evaluation event log of the first run of tuning.
-
-3. Start tensorboard:
-
- ```shell
- tensorboard --bind_all --logdir_spec baseline:./runs_v3/eval/baseline_acc_0.776/,tune_1:./runs_v3/eval/tune_1_acc_0.095/
- ```
-## Examples
-
-### PyTorch Examples
-
-```shell
- examples/pytorch/eager/image_recognition/imagenet/cpu/ptq/run_tuning_dump_tensor.sh
-```
-
-### TensorFlow Examples
-
-1. Add "tensorboard: true" into examples/tensorflow/image_recognition/inceptionv3.yaml. In order to demonstrate the usage of TensorBoard, remove the following lines which are added to skip the quantization of 'v0/cg/conv0/conv2d/Conv2D' to avoid a known limitation.
-
- ```yaml
- op_wise: {
- 'v0/cg/conv0/conv2d/Conv2D': {
- 'activation': {'dtype': ['fp32']},
- }
- }
- ```
-
-2. Run tuning:
-
- ```shell
- bash run_quant.sh --topology=inception_v3 --dataset_location= \
- --input_model=./inceptionv3_fp32_pretrained_model.pb --output_model=./nc_inceptionv3.pb --config=./inceptionv3_dump_tensor.yaml
- ```
-
-3. Start TensorBoard
-
- ```shell
- tensorboard --bind_all --logdir_spec baseline:./runs_v3/eval/baseline_acc_0.776/,tune_1:./runs_v3/eval/tune_1_acc_0.095/
- ```
-
-4. In order to find the reason why tune_1 got such poor accuracy, we can observe the TensorBoard.
-
-* From the **GRAPHS** tab, select "baseline/." in the "Run" box and find the first 'Conv2d' op after 'input' op. The op name is "v0/cg/conv0/Relu":
-
-![TensorBoard Baseline](./imgs/tensorboard_baseline_v0_cg_conv0.png "TensorBoard Baseline")
-
-* From the **GRAPHS** tab, select "tune_1/." in the "Run" box and find the first 'Conv2d' op after 'input' op. The tensor name is 'v0/cg/conv0/conv2d/Conv2D_eightbit_requantize':
-
-![TensorBoard Tuning](./imgs/tensorboard_tune_1_v0_cg_conv0.png "TensorBoard Tuning")
-
-
-* Switch to the **HISTOGRAMS** tab. Click the 'v0/cg/conv0' op name in the search box. TensorBoard groups the tensors with the same op name together so you can compare the tensor of baseline 'v0/cg/conv0/Relu' with the tensor of tune_1 'v0/cg/conv0/conv2d/Conv2D_eightbit_requantize_int8.output'. Note that the tensor name can be changed after quantization, so group the tensor by op name and compare. From the chart below, we can see that the histogram of the first conv2d output tensor are different. This is due to a known TensorFlow issue. After filtering the 'v0/cg/conv0/conv2d/Conv2D' op by adding "op_wise" in the yaml file, the issue disappears.
-
-![TensorBoard Histogram](./imgs/tensorboard_v0_cg_conv0_histogram.png "TensorBoard Histogram")
diff --git a/docs/source/user_guide.md b/docs/source/user_guide.md
deleted file mode 100644
index 662a2ec177c..00000000000
--- a/docs/source/user_guide.md
+++ /dev/null
@@ -1,101 +0,0 @@
-User Guide
-===========================
-
-Intel® Neural Compressor aims to provide popular model compression techniques such as quantization, pruning (sparsity), distillation, and neural architecture search to help the user optimize their model. The below documents could help you to get familiar with concepts and modules in Intel® Neural Compressor. Learn how to utilize the APIs in Intel® Neural Compressor to conduct quantization, pruning (sparsity), distillation, and neural architecture search on mainstream frameworks.
-
-## Overview
-This part helps user to get a quick understand about design structure and workflow of Intel® Neural Compressor. We provided broad examples to help users get started.
-
-
-## Python-based APIs
-Python-based APIs contains more details about the functional APIs in Intel® Neural Compressor,
-which introduce the mechanism of each function and provides a tutorial to help the user apply in their own cases.
-Please note that we will stop to support Intel Neural Compressor 1.X API in the future.
-So we provide a comprehensive migration document in Code Migration to help the user update their code from previous 1.X version to the new 2.X version.
-In 2.X API, it's very important to create the `DataLoader` and `Metrics` for your examples, so we provide the detail introductions.
-
-
-
-## Neural Coder (Zero-code Optimization)
-Neural Coder shows our special innovation about zero-code optimization to help user quickly apply Intel® Neural Compressor optimization without coding.
-
-
-## Advanced Topics
-This part provides the advanced topics that help user dive deep into Intel® Neural Compressor.
-
-
-## Innovations for Productivity
-We are continue creating some user-friendly applications to improve the productivity. From v2.2 we have `Neural Solution` for distributed quantization and `Neural Insights` for quantization accuracy debugging.
-
diff --git a/docs/source/user_yaml.md b/docs/source/user_yaml.md
deleted file mode 100644
index 14d4157ab78..00000000000
--- a/docs/source/user_yaml.md
+++ /dev/null
@@ -1,166 +0,0 @@
-User YAML Configuration Files
-=====
-1. [Introduction](#introduction)
-2. [Supported Feature Matrix](#supported-feature-matrix)
-3. [Get Started with User YAML Files](#get-started-with-user-yaml-files)
-
-
-## Introduction
-
-Intel® Neural Compressor uses YAML files for quick
-and user-friendly configurations. There are two types of YAML files -
-user YAML files and framework YAML files, which are used in
-running user cases and setting up framework capabilities, respectively.
-
-First, let's take a look at a user YAML file, It defines the model, tuning
-strategies, tuning calibrations and evaluations, and performance benchmarking
-of the passing model vs. original model.
-
-## Supported Feature Matrix
-
-| Optimization Techniques | YAML Configuration Files |
-|-------------------------|:------------------------:|
-| Quantization | ✔ |
-| Pruning | ✔ |
-| Distillation | ✔ |
-
-
-## Get started with User YAML Files
-
-
-A complete user YAML file is organized logically into several sections:
-
-* ***model***: The model specifications define a user model's name, inputs, outputs and framework.
-
-
-```yaml
-model: # mandatory. used to specify model specific information.
- name: mobilenet_v1
- framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension.
- inputs: image_tensor # optional. inputs field is only required in tensorflow.
- outputs: num_detections,detection_boxes,detection_scores,detection_classes # optional. outputs field is only required in tensorflow.
-```
-* ***quantization***: The quantization specifications define quantization tuning space and related calibrations. To calibrate, users can
-specify *sampling_size* (optional) and use the subsection *dataloader* to specify
-the dataset location using *root* and transformation using *transform*. To
-implement tuning space constraints, users can use the subsection *model_wise* and *op_wise* for specific configurations.
-
-```yaml
-quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space.
- calibration:
- sampling_size: 20 # optional. default value is 100. used to set how many samples should be used in calibration.
- dataloader:
- dataset:
- ImageRecord:
- root: /path/to/imagenet/ # NOTE: modify to calibration dataset location if needed
- transform:
- BilinearImagenet:
- height: 224
- width: 224
- model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space.
- weight:
- granularity: per_channel
- scheme: asym
- dtype: int8
- algorithm: minmax
- activation:
- granularity: per_tensor
- scheme: asym
- dtype: int8, fp32
- algorithm: minmax, kl
- op_wise: { # optional. tuning constraints on op-wise for advance user to reduce tuning space.
- 'conv1': {
- 'activation': {'dtype': ['uint8', 'fp32'],
- 'algorithm': ['minmax', 'kl'],
- 'scheme':['sym']},
- 'weight': {'dtype': ['int8', 'fp32'],
- 'algorithm': ['minmax']}
- }
- }
-```
-
-* ***pruning***: The pruning specifications define pruning tuning space. To define the training behavior, uses can
-use the subsection *train* to specify the training hyper-parameters and the training dataloader.
-To define the pruning approach, users can use the subsection *approach* to specify
-pruning target, choose the type of pruning algorithm, and the way to apply it
-during training process.
-
-```yaml
-pruning:
- train:
- dataloader:
- ...
- epoch: 40
- optimizer:
- Adam:
- learning_rate: 1e-06
- beta_1: 0.9
- beta_2: 0.999
- epsilon: 1e-07
- criterion:
- SparseCategoricalCrossentropy:
- reduction: sum_over_batch_size
- from_logits: False
- approach:
- weight_compression:
- initial_sparsity: 0.0
- target_sparsity: 0.54
- start_epoch: 0
- end_epoch: 19
- pruners:
- - !Pruner
- start_epoch: 0
- end_epoch: 19
- prune_type: basic_magnitude
-```
-* ***distillation***: The distillation specifications define distillation's tuning
-space. Similar to pruning, to define the training behavior, users can use the
-subsection *train* to specify the training hyper-parameters and the training
-dataloader and it is optional if users implement *train_func* and set the attribute
-of distillation instance to *train_func*. For criterion, Intel® Neural Compressor provides a built-in
-knowledge distillation loss class to calculate distillation loss.
-```yaml
-distillation:
- train:
- start_epoch: 0
- end_epoch: 90
- iteration: 1000
- frequency: 1
- dataloader:
- ...
- optimizer:
- SGD:
- learning_rate: 0.001
- momentum: 0.1
- nesterov: True
- weight_decay: 0.001
- criterion:
- KnowledgeDistillationLoss:
- temperature: 1.0
- loss_types: ['CE', 'CE']
- loss_weights: [0.5, 0.5]
-```
-* ***evaluation***: The evaluation specifications define the dataloader and metric for accuracy evaluation as well as dataloader
-and configurations for performance benchmarking.
-```yaml
-evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization.
- accuracy:
- metric:
- ...
- dataloader:
- ...
-```
-* ***tuning***: The tuning specifications define overall tuning targets. Users can
-use *accuracy_criterion* to specify the target of accuracy loss percentage and use
-*exit_policy* to specify the tuning timeout in seconds. The random
-seed can be specified using *random_seed*.
-
-```yaml
-tuning:
- accuracy_criterion:
- relative: 0.01 # the tuning target of accuracy loss percentage: 1%
- higher_is_better: True
- exit_policy:
- timeout: 0 # tuning timeout (seconds), 0 means early stop
- random_seed: 9527 # random seed
-```
diff --git a/examples/.config/model_params_keras_3x.json b/examples/.config/model_params_keras_3x.json
new file mode 100644
index 00000000000..bac8a06b4a3
--- /dev/null
+++ b/examples/.config/model_params_keras_3x.json
@@ -0,0 +1,18 @@
+{
+ "keras": {
+ "resnetv2_50": {
+ "model_src_dir": "keras/image_recognition/resnet_v2_50/quantization/ptq",
+ "dataset_location": "/tf_dataset/dataset/imagenet",
+ "input_model": "/tf_dataset2/models/tensorflow/resnetv2_50_keras/saved_model",
+ "main_script": "main.py",
+ "batch_size": 32
+ },
+ "inception_v3": {
+ "model_src_dir": "keras/image_recognition/inception_v3/quantization/ptq",
+ "dataset_location": "/tf_dataset/dataset/imagenet",
+ "input_model": "/tf_dataset2/models/tensorflow/inception_v3_keras/saved_model",
+ "main_script": "main.py",
+ "batch_size": 32
+ }
+ }
+}
diff --git a/examples/.config/model_params_pytorch_3x.json b/examples/.config/model_params_pytorch_3x.json
new file mode 100644
index 00000000000..c3ae3f6b5be
--- /dev/null
+++ b/examples/.config/model_params_pytorch_3x.json
@@ -0,0 +1,172 @@
+{
+ "pytorch": {
+ "opt_125m_woq_gptq_int4":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 1
+ },
+ "opt_125m_woq_gptq_int4_dq_bnb":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 1
+ },
+ "opt_125m_woq_gptq_int4_dq_ggml":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "llama2_7b_gptq_int4":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "llama2_7b_gptq_int4_dq_bnb":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "llama2_7b_gptq_int4_dq_ggml":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "gpt_j_woq_rtn_int4":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "gpt_j_woq_rtn_int4_dq_bnb":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "gpt_j_woq_rtn_int4_dq_ggml":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "gpt_j_woq_gptq_int4":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "gpt_j_woq_gptq_int4_dq_bnb":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "gpt_j_woq_gptq_int4_dq_ggml":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "gpt_j_ipex":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/ipex",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 1
+ },
+ "gpt_j_ipex_sq":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 1
+ },
+ "llama2_7b_ipex":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/ipex",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 1
+ },
+ "llama2_7b_ipex_sq":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 1
+ },
+ "opt_125m_ipex":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/ipex",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "opt_125m_ipex_sq":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 8
+ },
+ "dlrm_ipex": {
+ "model_src_dir": "recommendation/dlrm/static_quant/ipex",
+ "dataset_location": "/mnt/local_disk3/dataset/dlrm/dlrm/input",
+ "input_model": "/mnt/local_disk3/dataset/dlrm/dlrm/dlrm_weight/tb00_40M.pt",
+ "main_script": "dlrm_s_pytorch.py",
+ "batch_size": 16384
+ },
+ "resnet18_pt2e_static":{
+ "model_src_dir": "cv/static_quant",
+ "dataset_location": "/tf_dataset/pytorch/ImageNet/raw",
+ "input_model": "",
+ "main_script": "main.py",
+ "batch_size": 1
+ },
+ "resnet18_fp8_static":{
+ "model_src_dir": "cv/fp8_quant",
+ "dataset_location": "/tf_dataset/pytorch/ImageNet/raw",
+ "input_model": "",
+ "main_script": "main.py",
+ "batch_size": 1
+ },
+ "opt_125m_pt2e_static":{
+ "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "run_clm_no_trainer.py",
+ "batch_size": 1
+ },
+ "sdxl_ipex_sq":{
+ "model_src_dir": "diffusion_model/diffusers/stable_diffusion/smooth_quant",
+ "dataset_location": "",
+ "input_model": "",
+ "main_script": "main.py",
+ "batch_size": 1
+ },
+ "resnet18_mixed_precision": {
+ "model_src_dir": "cv/mixed_precision",
+ "dataset_location": "/tf_dataset/pytorch/ImageNet/raw",
+ "input_model": "resnet18",
+ "main_script": "main.py",
+ "batch_size": 20
+ }
+ }
+}
diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json
new file mode 100644
index 00000000000..e2a052656f8
--- /dev/null
+++ b/examples/.config/model_params_tensorflow_3x.json
@@ -0,0 +1,147 @@
+{
+ "tensorflow": {
+ "bert_large_squad_model_zoo": {
+ "model_src_dir": "nlp/bert_large_squad_model_zoo/quantization/ptq",
+ "dataset_location": "/tf_dataset/tensorflow/bert/data",
+ "input_model": "/tf_dataset/tensorflow/bert/fp32_bert_squad.pb",
+ "main_script": "main.py",
+ "batch_size": 64,
+ "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/fp32_bert_squad.pb"
+ },
+ "opt_125m_sq": {
+ "model_src_dir": "nlp/large_language_models/quantization/ptq/smoothquant",
+ "dataset_location": "",
+ "input_model": "facebook/opt-125m",
+ "main_script": "main.py",
+ "batch_size": 16
+ },
+ "gpt2_medium_sq": {
+ "model_src_dir": "nlp/large_language_models/quantization/ptq/smoothquant",
+ "dataset_location": "",
+ "input_model": "gpt2-medium",
+ "main_script": "main.py",
+ "batch_size": 16
+ },
+ "gpt-j-6B": {
+ "model_src_dir": "nlp/large_language_models/quantization/ptq/gpt-j",
+ "dataset_location": "",
+ "input_model": "/tf_dataset2/models/tensorflow/gpt-j-6B",
+ "main_script": "main.py",
+ "batch_size": 1
+ },
+ "transformer_lt": {
+ "model_src_dir": "nlp/transformer_lt/quantization/ptq",
+ "dataset_location": "/tf_dataset/tensorflow/transformer-lt-official-fp32-inference/transformer_lt_official_fp32_pretrained_model/data",
+ "input_model": "/tf_dataset/tensorflow/transformer-lt-official-fp32-inference/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb",
+ "main_script": "main.py",
+ "batch_size": 64
+ },
+ "inception_v3": {
+ "model_src_dir": "image_recognition/inception_v3/quantization/ptq",
+ "dataset_location": "/tf_dataset/dataset/imagenet",
+ "input_model": "/tf_dataset/pre-trained-models/inceptionv3/fp32/freezed_inceptionv3.pb",
+ "main_script": "main.py",
+ "batch_size": 32,
+ "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/inceptionv3_fp32_pretrained_model.pb"
+ },
+ "mobilenetv2": {
+ "model_src_dir": "image_recognition/mobilenet_v2/quantization/ptq",
+ "dataset_location": "/tf_dataset/dataset/imagenet",
+ "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_mobilenet_v2.pb",
+ "main_script": "main.py",
+ "batch_size": 32
+ },
+ "resnetv2_50": {
+ "model_src_dir": "image_recognition/resnet_v2_50/quantization/ptq",
+ "dataset_location": "/tf_dataset/dataset/imagenet",
+ "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_resnet_v2_50.pb",
+ "main_script": "main.py",
+ "batch_size": 32
+ },
+ "vgg16": {
+ "model_src_dir": "image_recognition/vgg16/quantization/ptq",
+ "dataset_location": "/tf_dataset/dataset/imagenet",
+ "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_vgg16.pb",
+ "main_script": "main.py",
+ "batch_size": 32
+ },
+ "ViT": {
+ "model_src_dir": "image_recognition/vision_transformer/quantization/ptq",
+ "dataset_location": "/tf_dataset/dataset/imagenet",
+ "input_model": "/tf_dataset/tensorflow/vit/HF-ViT-Base16-Img224-frozen.pb",
+ "main_script": "main.py",
+ "batch_size": 32
+ },
+ "GraphSage": {
+ "model_src_dir": "graph_networks/graphsage/quantization/ptq",
+ "dataset_location": "/tf_dataset/dataset/ppi",
+ "input_model": "/tf_dataset/tensorflow/graphsage/graphsage_frozen_model.pb",
+ "main_script": "main.py",
+ "batch_size": 1000
+ },
+ "yolo_v5": {
+ "model_src_dir": "object_detection/yolo_v5/quantization/ptq",
+ "dataset_location": "/tf_dataset2/datasets/coco_yolov5/coco",
+ "input_model": "/tf_dataset2/models/tensorflow/yolo_v5/yolov5s.pb",
+ "main_script": "main.py",
+ "batch_size": 1
+ },
+ "faster_rcnn_resnet50": {
+ "model_src_dir": "object_detection/faster_rcnn_resnet50/quantization/ptq",
+ "dataset_location": "/tf_dataset/tensorflow/coco_val.record",
+ "input_model": "/tf_dataset/pre-train-model-oob/object_detection/faster_rcnn_resnet50/frozen_inference_graph.pb",
+ "main_script": "main.py",
+ "batch_size": 10
+ },
+ "mask_rcnn_inception_v2": {
+ "model_src_dir": "object_detection/mask_rcnn_inception_v2/quantization/ptq",
+ "dataset_location": "/tf_dataset/tensorflow/coco_val.record",
+ "input_model": "/tf_dataset/pre-train-model-oob/object_detection/mask_rcnn_inception_v2/frozen_inference_graph.pb",
+ "main_script": "main.py",
+ "batch_size": 10
+ },
+ "mask_rcnn_inception_v2_ckpt": {
+ "model_src_dir": "object_detection/mask_rcnn_inception_v2/quantization/ptq",
+ "dataset_location": "/tf_dataset/tensorflow/coco_val.record",
+ "input_model": "/tf_dataset/pre-train-model-oob/object_detection/mask_rcnn_inception_v2",
+ "main_script": "main.py",
+ "batch_size": 10
+ },
+ "ssd_mobilenet_v1": {
+ "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq",
+ "dataset_location": "/tf_dataset/tensorflow/coco_val.record",
+ "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb",
+ "main_script": "main.py",
+ "batch_size": 10
+ },
+ "ssd_mobilenet_v1_ckpt": {
+ "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq",
+ "dataset_location": "/tf_dataset/tensorflow/coco_val.record",
+ "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1",
+ "main_script": "main.py",
+ "batch_size": 10
+ },
+ "wide_deep_large_ds": {
+ "model_src_dir": "recommendation/wide_deep_large_ds/quantization/ptq",
+ "dataset_location": "/tf_dataset/tensorflow/wide_deep_large_ds/dataset",
+ "input_model": "/tf_dataset/tensorflow/wide_deep_large_ds/fp32_optimized_graph.pb",
+ "main_script": "main.py",
+ "batch_size": 256,
+ "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/wide_deep_fp32_pretrained_model.pb"
+ },
+ "3dunet-mlperf": {
+ "model_src_dir": "semantic_image_segmentation/3dunet-mlperf/quantization/ptq",
+ "dataset_location": "/tf_dataset2/models/tensorflow/3dunet/build",
+ "input_model": "/tf_dataset2/models/tensorflow/3dunet/3dunet_dynamic_ndhwc.pb",
+ "main_script": "main.py",
+ "batch_size": 100
+ },
+ "style_transfer": {
+ "model_src_dir": "style_transfer/arbitrary_style_transfer/quantization/ptq",
+ "dataset_location": "style_images,content_images",
+ "input_model": "/tf_dataset/tensorflow/style_transfer/arbitrary_style_transfer/model.ckpt",
+ "main_script": "main.py",
+ "batch_size": 1
+ }
+ }
+}
\ No newline at end of file
diff --git a/examples/3.x_api/README.md b/examples/3.x_api/README.md
new file mode 100644
index 00000000000..fd79f210533
--- /dev/null
+++ b/examples/3.x_api/README.md
@@ -0,0 +1,169 @@
+# Examples
+
+Intel® Neural Compressor validated examples with multiple compression techniques, including quantization, pruning, knowledge distillation and orchestration. Part of the validated cases can be found in the example tables, and the release data is available [here](../docs/source/validated_model_list.md).
+
+
+# PyTorch Examples
+
+## Quantization
+
+
+
+ Model
+ Domain
+ Method
+ Examples
+
+
+
+
+
+ gpt_j
+ Natural Language Processing
+ Weight-Only Quantization
+ link
+
+
+ Static Quantization (IPEX)
+ link
+
+
+ llama2_7b
+ Natural Language Processing
+ Weight-Only Quantization
+ link
+
+
+ Static Quantization (IPEX)
+ link
+
+
+ opt_125m
+ Natural Language Processing
+ Static Quantization (IPEX)
+ link
+
+
+ Static Quantization (PT2E)
+ link
+
+
+ Weight-Only Quantization
+ link
+
+
+ resnet18
+ Image Recognition
+ Mixed Precision
+ link
+
+
+ Static Quantization
+ link
+
+
+
+
+
+# TensorFlow Examples
+
+## Quantization
+
+
+
+
+ Model
+ Domain
+ Method
+ Examples
+
+
+
+
+ bert_large_squad_model_zoo
+ Natural Language Processing
+ Post-Training Static Quantization
+ link
+
+
+ transformer_lt
+ Natural Language Processing
+ Post-Training Static Quantization
+ link
+
+
+ inception_v3
+ Image Recognition
+ Post-Training Static Quantization
+ link
+
+
+ mobilenetv2
+ Image Recognition
+ Post-Training Static Quantization
+ link
+
+
+ resnetv2_50
+ Image Recognition
+ Post-Training Static Quantization
+ link
+
+
+ vgg16
+ Image Recognition
+ Post-Training Static Quantization
+ link
+
+
+ ViT
+ Image Recognition
+ Post-Training Static Quantization
+ link
+
+
+ GraphSage
+ Graph Networks
+ Post-Training Static Quantization
+ link
+
+
+ yolo_v5
+ Object Detection
+ Post-Training Static Quantization
+ link
+
+
+ faster_rcnn_resnet50
+ Object Detection
+ Post-Training Static Quantization
+ link
+
+
+ mask_rcnn_inception_v2
+ Object Detection
+ Post-Training Static Quantization
+ link
+
+
+ ssd_mobilenet_v1
+ Object Detection
+ Post-Training Static Quantization
+ link
+
+
+ wide_deep_large_ds
+ Recommendation
+ Post-Training Static Quantization
+ link
+
+
+ 3dunet-mlperf
+ Semantic Image Segmentation
+ Post-Training Static Quantization
+ link
+
+
+
+
+
diff --git a/examples/3.x_api/pytorch/cv/fp8_quant/README.md b/examples/3.x_api/pytorch/cv/fp8_quant/README.md
new file mode 100644
index 00000000000..72b8eb282b5
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/fp8_quant/README.md
@@ -0,0 +1,28 @@
+# ImageNet FP8 Quantization
+
+This implements FP8 quantization of popular model architectures, such as ResNet on the ImageNet dataset, which is supported by Intel Gaudi2 AI Accelerator.
+
+## Requirements
+
+To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
+```bash
+# Run a container with an interactive shell
+docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest
+```
+
+- Install requirements
+- `pip install -r requirements.txt`
+- Download the ImageNet dataset from http://www.image-net.org/
+ - Then, move and extract the training and validation images to labeled subfolders, using [the following shell script](extract_ILSVRC.sh)
+
+## Quantizaiton
+
+To quant a model and validate accaracy, run `main.py` with the desired model architecture and the path to the ImageNet dataset:
+
+```bash
+python main.py --pretrained -t -a resnet50 -b 30 /path/to/imagenet
+```
+or
+```bash
+bash run_quant.sh --input_model=resnet50 --dataset_location=/path/to/imagenet
+```
diff --git a/examples/3.x_api/pytorch/cv/fp8_quant/extract_ILSVRC.sh b/examples/3.x_api/pytorch/cv/fp8_quant/extract_ILSVRC.sh
new file mode 100644
index 00000000000..3ec05e8f328
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/fp8_quant/extract_ILSVRC.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+#
+# script to extract ImageNet dataset
+# ILSVRC2012_img_train.tar (about 138 GB)
+# ILSVRC2012_img_val.tar (about 6.3 GB)
+# make sure ILSVRC2012_img_train.tar & ILSVRC2012_img_val.tar in your current directory
+#
+# Adapted from:
+# https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md
+# https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4
+#
+# imagenet/train/
+# ├── n01440764
+# │ ├── n01440764_10026.JPEG
+# │ ├── n01440764_10027.JPEG
+# │ ├── ......
+# ├── ......
+# imagenet/val/
+# ├── n01440764
+# │ ├── ILSVRC2012_val_00000293.JPEG
+# │ ├── ILSVRC2012_val_00002138.JPEG
+# │ ├── ......
+# ├── ......
+#
+#
+# Make imagnet directory
+#
+mkdir imagenet
+#
+# Extract the training data:
+#
+# Create train directory; move .tar file; change directory
+mkdir imagenet/train && mv ILSVRC2012_img_train.tar imagenet/train/ && cd imagenet/train
+# Extract training set; remove compressed file
+tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
+#
+# At this stage imagenet/train will contain 1000 compressed .tar files, one for each category
+#
+# For each .tar file:
+# 1. create directory with same name as .tar file
+# 2. extract and copy contents of .tar file into directory
+# 3. remove .tar file
+find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done
+#
+# This results in a training directory like so:
+#
+# imagenet/train/
+# ├── n01440764
+# │ ├── n01440764_10026.JPEG
+# │ ├── n01440764_10027.JPEG
+# │ ├── ......
+# ├── ......
+#
+# Change back to original directory
+cd ../..
+#
+# Extract the validation data and move images to subfolders:
+#
+# Create validation directory; move .tar file; change directory; extract validation .tar; remove compressed file
+mkdir imagenet/val && mv ILSVRC2012_img_val.tar imagenet/val/ && cd imagenet/val && tar -xvf ILSVRC2012_img_val.tar && rm -f ILSVRC2012_img_val.tar
+# get script from soumith and run; this script creates all class directories and moves images into corresponding directories
+wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
+#
+# This results in a validation directory like so:
+#
+# imagenet/val/
+# ├── n01440764
+# │ ├── ILSVRC2012_val_00000293.JPEG
+# │ ├── ILSVRC2012_val_00002138.JPEG
+# │ ├── ......
+# ├── ......
+#
+#
+# Check total files after extract
+#
+# $ find train/ -name "*.JPEG" | wc -l
+# 1281167
+# $ find val/ -name "*.JPEG" | wc -l
+# 50000
+#
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/cv/fp8_quant/main.py b/examples/3.x_api/pytorch/cv/fp8_quant/main.py
new file mode 100644
index 00000000000..dfa7515343c
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/fp8_quant/main.py
@@ -0,0 +1,391 @@
+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import sys
+
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+from neural_compressor.torch.quantization import (
+ FP8Config,
+ prepare,
+ convert,
+)
+import habana_frameworks.torch.core as htcore
+
+
+model_names = models.list_models(module=models)
+
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('data', metavar='DIR',
+ help='path to dataset')
+parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
+ choices=model_names,
+ help='model architecture: ' +
+ ' | '.join(model_names) +
+ ' (default: resnet18)')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+ help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=90, type=int, metavar='N',
+ help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+ help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=256, type=int,
+ metavar='N',
+ help='mini-batch size (default: 256), this is the total '
+ 'batch size of all GPUs on the current node when '
+ 'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
+ metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+ help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+ metavar='W', help='weight decay (default: 1e-4)',
+ dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+ metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+ help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+ help='evaluate model on validation set')
+parser.add_argument('-t', '--tune', dest='tune', action='store_true',
+ help='tune best int8 model on calibration dataset')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+ help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+ help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+ help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+ help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+ help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+ help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+ help='GPU id to use.')
+parser.add_argument('--ppn', default=1, type=int,
+ help='number of processes on each node of distributed training')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+ help='Use multi-processing distributed training to launch '
+ 'N processes per node, which has N GPUs. This is the '
+ 'fastest way to use PyTorch for either single node or '
+ 'multi node data parallel training')
+parser.add_argument("--calib_iters", default=10, type=int,
+ help="For calibration only.")
+parser.add_argument('-i', "--iter", default=0, type=int,
+ help='For accuracy measurement only.')
+parser.add_argument('-w', "--warmup_iter", default=5, type=int,
+ help='For benchmark measurement only.')
+parser.add_argument('--performance', dest='performance', action='store_true',
+ help='run benchmark')
+parser.add_argument('-r', "--accuracy", dest='accuracy', action='store_true',
+ help='For accuracy measurement only.')
+parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH',
+ help='path to checkpoint tuned by Neural Compressor (default: ./)')
+parser.add_argument('--int8', dest='int8', action='store_true',
+ help='run benchmark')
+parser.add_argument('--device', default='hpu', type=str,
+ help='use hpu device for fp8 quantization')
+
+best_acc1 = 0
+
+
+def main():
+ args = parser.parse_args()
+
+ if 'mobilenet' in args.arch:
+ import torchvision.models.quantization as models
+ else:
+ import torchvision.models as models
+
+ if args.seed is not None:
+ random.seed(args.seed)
+ torch.manual_seed(args.seed)
+
+ if args.pretrained:
+ print("=> using pre-trained model '{}'".format(args.arch))
+ model = models.__dict__[args.arch](pretrained=True)
+ else:
+ print("=> creating model '{}'".format(args.arch))
+ model = models.__dict__[args.arch]()
+
+ # define loss function (criterion) and optimizer
+ criterion = nn.CrossEntropyLoss()
+
+ optimizer = torch.optim.SGD(model.parameters(), args.lr,
+ momentum=args.momentum,
+ weight_decay=args.weight_decay)
+
+ # optionally resume from a checkpoint
+ if args.resume:
+ if os.path.isfile(args.resume):
+ print("=> loading checkpoint '{}'".format(args.resume))
+ checkpoint = torch.load(args.resume)
+ args.start_epoch = checkpoint['epoch']
+ best_acc1 = checkpoint['best_acc1']
+ if args.gpu is not None:
+ # best_acc1 may be from a checkpoint from a different GPU
+ best_acc1 = best_acc1.to(args.gpu)
+ model.load_state_dict(checkpoint['state_dict'])
+ optimizer.load_state_dict(checkpoint['optimizer'])
+ print("=> loaded checkpoint '{}' (epoch {})"
+ .format(args.resume, checkpoint['epoch']))
+ else:
+ print("=> no checkpoint found at '{}'".format(args.resume))
+
+ # Data loading code
+ traindir = os.path.join(args.data, 'train')
+ valdir = os.path.join(args.data, 'val')
+ normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+
+ train_dataset = datasets.ImageFolder(
+ traindir,
+ transforms.Compose([
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ normalize,
+ ]))
+
+ train_loader = torch.utils.data.DataLoader(
+ train_dataset, batch_size=args.batch_size, shuffle=True,
+ num_workers=args.workers, pin_memory=True, sampler=None)
+
+ val_dataset = datasets.ImageFolder(valdir, transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ normalize,
+ ]))
+
+ val_loader = torch.utils.data.DataLoader(
+ val_dataset,
+ batch_size=args.batch_size, shuffle=False,
+ num_workers=args.workers, pin_memory=True)
+
+ if args.evaluate:
+ validate(val_loader, model, criterion, args)
+ return
+
+ def eval_func(model):
+ accu = validate(val_loader, model, criterion, args)
+ return float(accu)
+
+ if args.tune:
+ qconfig = FP8Config(fp8_config="E4M3")
+ model = prepare(model, qconfig)
+
+ # Calibrate
+ # model is moved to HPU device automatically after preparing
+ with torch.no_grad():
+ for i, (images, target) in enumerate(train_loader):
+ print("Calibrating batch:", i)
+ if i == args.calib_iters:
+ break
+ images = images.to(args.device)
+ model(images)
+ htcore.mark_step()
+
+ model = convert(model)
+ eval_func(model)
+ # The saving and loading of fp8 quantization are planned in the next release.
+
+ if args.performance or args.accuracy:
+ model.eval()
+ if args.int8:
+ from neural_compressor.utils.pytorch import load
+ new_model = load(os.path.abspath(os.path.expanduser(args.tuned_checkpoint)),
+ model,
+ dataloader=val_loader)
+ else:
+ new_model = model
+ if args.performance:
+ from neural_compressor.config import BenchmarkConfig
+ from neural_compressor import benchmark
+ b_conf = BenchmarkConfig(warmup=5,
+ iteration=args.iter,
+ cores_per_instance=4,
+ num_of_instance=1)
+ benchmark.fit(new_model, b_conf, b_dataloader=val_loader)
+ if args.accuracy:
+ validate(val_loader, new_model, criterion, args)
+ return
+
+
+def train(train_loader, model, criterion, optimizer, epoch, args):
+ batch_time = AverageMeter('Time', ':6.3f')
+ data_time = AverageMeter('Data', ':6.3f')
+ losses = AverageMeter('Loss', ':.4e')
+ top1 = AverageMeter('Acc@1', ':6.2f')
+ top5 = AverageMeter('Acc@5', ':6.2f')
+ progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1,
+ top5, prefix="Epoch: [{}]".format(epoch))
+
+ # switch to train mode
+ model.train()
+
+ end = time.time()
+ for i, (input, target) in enumerate(train_loader):
+ # measure data loading time
+ data_time.update(time.time() - end)
+
+ if args.gpu is not None:
+ input = input.cuda(args.gpu, non_blocking=True)
+ target = target.cuda(args.gpu, non_blocking=True)
+
+ # compute output
+ output = model(input)
+ loss = criterion(output, target)
+
+ # measure accuracy and record loss
+ acc1, acc5 = accuracy(output, target, topk=(1, 5))
+ losses.update(loss.item(), input.size(0))
+ top1.update(acc1[0], input.size(0))
+ top5.update(acc5[0], input.size(0))
+
+ # compute gradient and do SGD step
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+
+ # measure elapsed time
+ batch_time.update(time.time() - end)
+ end = time.time()
+
+ if i % args.print_freq == 0:
+ progress.print(i)
+
+
+def validate(val_loader, model, criterion, args):
+ batch_time = AverageMeter('Time', ':6.3f')
+ losses = AverageMeter('Loss', ':.4e')
+ top1 = AverageMeter('Acc@1', ':6.2f')
+ top5 = AverageMeter('Acc@5', ':6.2f')
+ progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5,
+ prefix='Test: ')
+
+ # switch to evaluate mode
+ model.eval()
+
+ with torch.no_grad():
+ for i, (input, target) in enumerate(val_loader):
+ if i >= args.warmup_iter:
+ start = time.time()
+ input = input.to(args.device)
+ target = target.to(args.device)
+ if args.gpu is not None:
+ input = input.cuda(args.gpu, non_blocking=True)
+ target = target.cuda(args.gpu, non_blocking=True)
+
+ # compute output
+ output = model(input)
+ loss = criterion(output, target)
+
+ # measure accuracy and record loss
+ acc1, acc5 = accuracy(output, target, topk=(1, 5))
+ losses.update(loss.item(), input.size(0))
+ top1.update(acc1[0], input.size(0))
+ top5.update(acc5[0], input.size(0))
+
+ # measure elapsed time
+ if i >= args.warmup_iter:
+ batch_time.update(time.time() - start)
+
+ if i % args.print_freq == 0:
+ progress.print(i)
+
+ if args.iter > 0 and i >= (args.warmup_iter + args.iter - 1):
+ break
+
+ print('Batch size = %d' % args.batch_size)
+ print('Accuracy: {top1:.5f} Accuracy@5 {top5:.5f}'
+ .format(top1=(top1.avg / 100), top5=(top5.avg / 100)))
+
+ return top1.avg/100
+
+
+def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
+ torch.save(state, filename)
+ if is_best:
+ shutil.copyfile(filename, 'model_best.pth.tar')
+
+class AverageMeter(object):
+ """Computes and stores the average and current value"""
+ def __init__(self, name, fmt=':f'):
+ self.name = name
+ self.fmt = fmt
+ self.reset()
+
+ def reset(self):
+ self.val = 0
+ self.avg = 0
+ self.sum = 0
+ self.count = 0
+
+ def update(self, val, n=1):
+ self.val = val
+ self.sum += val * n
+ self.count += n
+ self.avg = self.sum / self.count
+
+ def __str__(self):
+ fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+ return fmtstr.format(**self.__dict__)
+
+
+class ProgressMeter(object):
+ def __init__(self, num_batches, *meters, prefix=""):
+ self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+ self.meters = meters
+ self.prefix = prefix
+
+ def print(self, batch):
+ entries = [self.prefix + self.batch_fmtstr.format(batch)]
+ entries += [str(meter) for meter in self.meters]
+ print('\t'.join(entries))
+
+ def _get_batch_fmtstr(self, num_batches):
+ num_digits = len(str(num_batches // 1))
+ fmt = '{:' + str(num_digits) + 'd}'
+ return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+
+
+def adjust_learning_rate(optimizer, epoch, args):
+ """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+ lr = args.lr * (0.1 ** (epoch // 30))
+ for param_group in optimizer.param_groups:
+ param_group['lr'] = lr
+
+
+def accuracy(output, target, topk=(1,)):
+ """Computes the accuracy over the k top predictions for the specified values of k"""
+ with torch.no_grad():
+ maxk = max(topk)
+ batch_size = target.size(0)
+
+ _, pred = output.topk(maxk, 1, True, True)
+ pred = pred.t()
+ correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+ res = []
+ for k in topk:
+ correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+ res.append(correct_k.mul_(100.0 / batch_size))
+ return res
+
+
+if __name__ == '__main__':
+ main()
diff --git a/examples/3.x_api/pytorch/cv/fp8_quant/requirements.txt b/examples/3.x_api/pytorch/cv/fp8_quant/requirements.txt
new file mode 100644
index 00000000000..ebd3df6ae7a
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/fp8_quant/requirements.txt
@@ -0,0 +1,3 @@
+torch
+torchvision
+neural-compressor
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/cv/fp8_quant/run_quant.sh b/examples/3.x_api/pytorch/cv/fp8_quant/run_quant.sh
new file mode 100644
index 00000000000..4d0047cf2d1
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/fp8_quant/run_quant.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ output_model=saved_results
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ if [ "${topology}" = "resnet18_fp8_static" ]; then
+ input_model="resnet18"
+ output_dir="saved_results"
+ fi
+ python main.py \
+ --pretrained \
+ -t \
+ -a ${input_model} \
+ -b 30 \
+ --tuned_checkpoint ${output_model} \
+ ${dataset_location}
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/README.md b/examples/3.x_api/pytorch/cv/mixed_precision/README.md
new file mode 100644
index 00000000000..ede1837b57a
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/mixed_precision/README.md
@@ -0,0 +1,48 @@
+Step-by-Step
+============
+
+This document describes the step-by-step instructions for reproducing PyTorch ResNet18 MixedPrecision results with Intel® Neural Compressor.
+
+# Prerequisite
+
+### 1. Environment
+
+PyTorch 1.8 or higher version is needed with pytorch_fx backend.
+
+```Shell
+cd examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18
+pip install -r requirements.txt
+```
+> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### 2. Prepare Dataset
+
+Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/imagenet. The dir includes below folder:
+
+```bash
+ls /path/to/imagenet
+train val
+```
+
+# Run
+
+> Note: All torchvision model names can be passed as long as they are included in `torchvision.models`, below are some examples.
+
+## MixedPrecision
+```Shell
+bash run_autotune.sh --input_model=resnet18 --dataset_location=/path/to/imagenet
+```
+
+## Benchmark
+```Shell
+# run optimized performance
+bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance --batch_size=20 --optimized=true --iters=500
+
+# run optimized accuracy
+bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=accuracy --batch_size=1 --optimized=true
+```
+
+
+
+
+
diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/main.py b/examples/3.x_api/pytorch/cv/mixed_precision/main.py
new file mode 100644
index 00000000000..8ef798f9ac3
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/mixed_precision/main.py
@@ -0,0 +1,367 @@
+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import sys
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+
+model_names = models.list_models(module=models)
+
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('data', metavar='DIR',
+ help='path to dataset')
+parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
+ choices=model_names,
+ help='model architecture: ' +
+ ' | '.join(model_names) +
+ ' (default: resnet18)')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+ help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=90, type=int, metavar='N',
+ help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+ help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=256, type=int,
+ metavar='N',
+ help='mini-batch size (default: 256), this is the total '
+ 'batch size of all GPUs on the current node when '
+ 'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
+ metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+ help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+ metavar='W', help='weight decay (default: 1e-4)',
+ dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+ metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+ help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+ help='evaluate model on validation set')
+parser.add_argument('-t', '--tune', dest='tune', action='store_true',
+ help='tune best optimized model')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+ help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+ help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+ help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+ help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+ help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+ help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+ help='GPU id to use.')
+parser.add_argument('--ppn', default=1, type=int,
+ help='number of processes on each node of distributed training')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+ help='Use multi-processing distributed training to launch '
+ 'N processes per node, which has N GPUs. This is the '
+ 'fastest way to use PyTorch for either single node or '
+ 'multi node data parallel training')
+parser.add_argument('-i', "--iter", default=0, type=int,
+ help='For accuracy measurement only.')
+parser.add_argument('-w', "--warmup_iter", default=5, type=int,
+ help='For benchmark measurement only.')
+parser.add_argument('--performance', dest='performance', action='store_true',
+ help='run benchmark')
+parser.add_argument('-r', "--accuracy", dest='accuracy', action='store_true',
+ help='For accuracy measurement only.')
+parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH',
+ help='path to checkpoint tuned by Neural Compressor (default: ./)')
+parser.add_argument('--optimized', dest='optimized', action='store_true',
+ help='run benchmark')
+
+best_acc1 = 0
+
+
+def main():
+ args = parser.parse_args()
+
+ if 'mobilenet_v2' in args.arch:
+ import torchvision.models.quantization as models
+ else:
+ import torchvision.models as models
+
+ if args.seed is not None:
+ random.seed(args.seed)
+ torch.manual_seed(args.seed)
+
+ if args.pretrained:
+ print("=> using pre-trained model '{}'".format(args.arch))
+ model = models.__dict__[args.arch](pretrained=True)
+ else:
+ print("=> creating model '{}'".format(args.arch))
+ model = models.__dict__[args.arch]()
+
+ # define loss function (criterion) and optimizer
+ criterion = nn.CrossEntropyLoss()
+
+ optimizer = torch.optim.SGD(model.parameters(), args.lr,
+ momentum=args.momentum,
+ weight_decay=args.weight_decay)
+
+ # optionally resume from a checkpoint
+ if args.resume:
+ if os.path.isfile(args.resume):
+ print("=> loading checkpoint '{}'".format(args.resume))
+ checkpoint = torch.load(args.resume)
+ args.start_epoch = checkpoint['epoch']
+ best_acc1 = checkpoint['best_acc1']
+ if args.gpu is not None:
+ # best_acc1 may be from a checkpoint from a different GPU
+ best_acc1 = best_acc1.to(args.gpu)
+ model.load_state_dict(checkpoint['state_dict'])
+ optimizer.load_state_dict(checkpoint['optimizer'])
+ print("=> loaded checkpoint '{}' (epoch {})"
+ .format(args.resume, checkpoint['epoch']))
+ else:
+ print("=> no checkpoint found at '{}'".format(args.resume))
+
+ # Data loading code
+ traindir = os.path.join(args.data, 'train')
+ valdir = os.path.join(args.data, 'val')
+ normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+
+ train_dataset = datasets.ImageFolder(
+ traindir,
+ transforms.Compose([
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ normalize,
+ ]))
+
+ train_loader = torch.utils.data.DataLoader(
+ train_dataset, batch_size=args.batch_size, shuffle=True,
+ num_workers=args.workers, pin_memory=True, sampler=None)
+
+ val_dataset = datasets.ImageFolder(valdir, transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ normalize,
+ ]))
+
+ val_loader = torch.utils.data.DataLoader(
+ val_dataset,
+ batch_size=args.batch_size, shuffle=False,
+ num_workers=args.workers, pin_memory=True)
+
+ if args.evaluate:
+ validate(val_loader, model, criterion, args)
+ return
+
+ def eval_func(model):
+ accu = validate(val_loader, model, criterion, args)
+ return float(accu)
+
+ if args.tune:
+ from neural_compressor.torch.quantization import MixedPrecisionConfig, TuningConfig, autotune
+ custom_tune_config = TuningConfig(config_set=[MixedPrecisionConfig(dtype=["fp16", "fp32"])])
+ best_model = autotune(model=model, tune_config=custom_tune_config, eval_fn=eval_func)
+ torch.save(best_model, args.tuned_checkpoint)
+ return
+
+ if args.performance or args.accuracy:
+ model.eval()
+ if args.optimized:
+ new_model = torch.load(args.tuned_checkpoint)
+ else:
+ new_model = model
+ if args.performance or args.accuracy:
+ validate(val_loader, new_model, criterion, args)
+ return
+
+
+def train(train_loader, model, criterion, optimizer, epoch, args):
+ batch_time = AverageMeter('Time', ':6.3f')
+ data_time = AverageMeter('Data', ':6.3f')
+ losses = AverageMeter('Loss', ':.4e')
+ top1 = AverageMeter('Acc@1', ':6.2f')
+ top5 = AverageMeter('Acc@5', ':6.2f')
+ progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1,
+ top5, prefix="Epoch: [{}]".format(epoch))
+
+ # switch to train mode
+ model.train()
+
+ end = time.time()
+ for i, (input, target) in enumerate(train_loader):
+ # measure data loading time
+ data_time.update(time.time() - end)
+
+ if args.gpu is not None:
+ input = input.cuda(args.gpu, non_blocking=True)
+ target = target.cuda(args.gpu, non_blocking=True)
+
+ # compute output
+ output = model(input)
+ loss = criterion(output, target)
+
+ # measure accuracy and record loss
+ acc1, acc5 = accuracy(output, target, topk=(1, 5))
+ losses.update(loss.item(), input.size(0))
+ top1.update(acc1[0], input.size(0))
+ top5.update(acc5[0], input.size(0))
+
+ # compute gradient and do SGD step
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+
+ # measure elapsed time
+ batch_time.update(time.time() - end)
+ end = time.time()
+
+ if i % args.print_freq == 0:
+ progress.print(i)
+
+
+def validate(val_loader, model, criterion, args):
+ batch_time = AverageMeter('Time', ':6.3f')
+ losses = AverageMeter('Loss', ':.4e')
+ top1 = AverageMeter('Acc@1', ':6.2f')
+ top5 = AverageMeter('Acc@5', ':6.2f')
+ progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5,
+ prefix='Test: ')
+
+ # switch to evaluate mode
+ model.eval()
+
+ with torch.no_grad():
+ latency_list = []
+ for i, (input, target) in enumerate(val_loader):
+ if i >= args.warmup_iter:
+ start = time.time()
+ if args.gpu is not None:
+ input = input.cuda(args.gpu, non_blocking=True)
+ target = target.cuda(args.gpu, non_blocking=True)
+
+ # compute output
+ perf_start = time.time()
+ output = model(input)
+ perf_end = time.time()
+ latency_list.append(perf_end-perf_start)
+ loss = criterion(output, target)
+
+ # measure accuracy and record loss
+ acc1, acc5 = accuracy(output, target, topk=(1, 5))
+ losses.update(loss.item(), input.size(0))
+ top1.update(acc1[0], input.size(0))
+ top5.update(acc5[0], input.size(0))
+
+ # measure elapsed time
+ if i >= args.warmup_iter:
+ batch_time.update(time.time() - start)
+
+ if i % args.print_freq == 0:
+ progress.print(i)
+
+ if args.iter > 0 and i >= (args.warmup_iter + args.iter - 1):
+ break
+
+ if args.accuracy:
+ print('Batch size = %d' % args.batch_size)
+ print('Accuracy: {top1:.5f} Accuracy@5 {top5:.5f}'
+ .format(top1=(top1.avg / 100), top5=(top5.avg / 100)))
+ if args.performance:
+ latency = np.array(latency_list[args.warmup_iter:]).mean() / args.batch_size
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+
+ return top1.avg
+
+
+def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
+ torch.save(state, filename)
+ if is_best:
+ shutil.copyfile(filename, 'model_best.pth.tar')
+
+class AverageMeter(object):
+ """Computes and stores the average and current value"""
+ def __init__(self, name, fmt=':f'):
+ self.name = name
+ self.fmt = fmt
+ self.reset()
+
+ def reset(self):
+ self.val = 0
+ self.avg = 0
+ self.sum = 0
+ self.count = 0
+
+ def update(self, val, n=1):
+ self.val = val
+ self.sum += val * n
+ self.count += n
+ self.avg = self.sum / self.count
+
+ def __str__(self):
+ fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+ return fmtstr.format(**self.__dict__)
+
+
+class ProgressMeter(object):
+ def __init__(self, num_batches, *meters, prefix=""):
+ self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+ self.meters = meters
+ self.prefix = prefix
+
+ def print(self, batch):
+ entries = [self.prefix + self.batch_fmtstr.format(batch)]
+ entries += [str(meter) for meter in self.meters]
+ print('\t'.join(entries))
+
+ def _get_batch_fmtstr(self, num_batches):
+ num_digits = len(str(num_batches // 1))
+ fmt = '{:' + str(num_digits) + 'd}'
+ return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+
+
+def adjust_learning_rate(optimizer, epoch, args):
+ """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+ lr = args.lr * (0.1 ** (epoch // 30))
+ for param_group in optimizer.param_groups:
+ param_group['lr'] = lr
+
+
+def accuracy(output, target, topk=(1,)):
+ """Computes the accuracy over the k top predictions for the specified values of k"""
+ with torch.no_grad():
+ maxk = max(topk)
+ batch_size = target.size(0)
+
+ _, pred = output.topk(maxk, 1, True, True)
+ pred = pred.t()
+ correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+ res = []
+ for k in topk:
+ correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+ res.append(correct_k.mul_(100.0 / batch_size))
+ return res
+
+
+if __name__ == '__main__':
+ main()
diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/requirements.txt b/examples/3.x_api/pytorch/cv/mixed_precision/requirements.txt
new file mode 100644
index 00000000000..46233c08f4a
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/mixed_precision/requirements.txt
@@ -0,0 +1,4 @@
+neural-compressor
+torch>=1.9.0
+torchvision>=0.10.0
+accelerate
diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/run_autotune.sh b/examples/3.x_api/pytorch/cv/mixed_precision/run_autotune.sh
new file mode 100644
index 00000000000..770671db180
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/mixed_precision/run_autotune.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ tuned_checkpoint=saved_results
+ batch_size=30
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ extra_cmd="${dataset_location}"
+ python main.py \
+ -a ${input_model}\
+ -t\
+ --pretrained\
+ ${extra_cmd}
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/run_benchmark.sh b/examples/3.x_api/pytorch/cv/mixed_precision/run_benchmark.sh
new file mode 100644
index 00000000000..28319cc4ffe
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/mixed_precision/run_benchmark.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ export ONEDNN_MAX_CPU_ISA=AVX512_CORE_AMX_FP16
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ tuned_checkpoint=saved_results
+ batch_size=20
+ for var in "$@"
+ do
+ case $var in
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --optimized=*)
+ optimized=$(echo ${var} |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ if [[ ${mode} == "accuracy" ]]; then
+ mode_cmd=" --accuracy"
+ elif [[ ${mode} == "performance" ]]; then
+ mode_cmd=" --iter ${iters} --performance "
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+
+ if [[ ${optimized} == "true" ]]; then
+ extra_cmd="--optimized ${dataset_location}"
+ else
+ extra_cmd="${dataset_location}"
+ fi
+ if [[ ${mode} == "accuracy" ]]; then
+ python main.py \
+ --pretrained \
+ --tuned_checkpoint ${tuned_checkpoint} \
+ -b ${batch_size} \
+ -a ${input_model} \
+ ${mode_cmd} \
+ ${extra_cmd}
+ elif [[ ${mode} == "performance" ]]; then
+ incbench --num_c 4 main.py \
+ --pretrained \
+ --tuned_checkpoint ${tuned_checkpoint} \
+ -b ${batch_size} \
+ -a ${input_model} \
+ ${mode_cmd} \
+ ${extra_cmd}
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/cv/static_quant/README.md b/examples/3.x_api/pytorch/cv/static_quant/README.md
new file mode 100644
index 00000000000..172f8b0e12f
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/static_quant/README.md
@@ -0,0 +1,27 @@
+# ImageNet Quantization
+
+This implements quantization of popular model architectures, such as ResNet on the ImageNet dataset.
+
+## Requirements
+
+- Install requirements
+- `pip install -r requirements.txt`
+- Download the ImageNet dataset from http://www.image-net.org/
+ - Then, move and extract the training and validation images to labeled subfolders, using [the following shell script](extract_ILSVRC.sh)
+
+## Quantizaiton
+
+To quant a model and validate accaracy, run `main.py` with the desired model architecture and the path to the ImageNet dataset:
+
+```bash
+python main.py -a resnet18 [imagenet-folder with train and val folders] -q -e
+```
+
+
+## Use Dummy Data
+
+ImageNet dataset is large and time-consuming to download. To get started quickly, run `main.py` using dummy data by "--dummy". Note that the loss or accuracy is useless in this case.
+
+```bash
+python main.py -a resnet18 --dummy -q -e
+```
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/cv/static_quant/extract_ILSVRC.sh b/examples/3.x_api/pytorch/cv/static_quant/extract_ILSVRC.sh
new file mode 100644
index 00000000000..3ec05e8f328
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/static_quant/extract_ILSVRC.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+#
+# script to extract ImageNet dataset
+# ILSVRC2012_img_train.tar (about 138 GB)
+# ILSVRC2012_img_val.tar (about 6.3 GB)
+# make sure ILSVRC2012_img_train.tar & ILSVRC2012_img_val.tar in your current directory
+#
+# Adapted from:
+# https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md
+# https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4
+#
+# imagenet/train/
+# ├── n01440764
+# │ ├── n01440764_10026.JPEG
+# │ ├── n01440764_10027.JPEG
+# │ ├── ......
+# ├── ......
+# imagenet/val/
+# ├── n01440764
+# │ ├── ILSVRC2012_val_00000293.JPEG
+# │ ├── ILSVRC2012_val_00002138.JPEG
+# │ ├── ......
+# ├── ......
+#
+#
+# Make imagnet directory
+#
+mkdir imagenet
+#
+# Extract the training data:
+#
+# Create train directory; move .tar file; change directory
+mkdir imagenet/train && mv ILSVRC2012_img_train.tar imagenet/train/ && cd imagenet/train
+# Extract training set; remove compressed file
+tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
+#
+# At this stage imagenet/train will contain 1000 compressed .tar files, one for each category
+#
+# For each .tar file:
+# 1. create directory with same name as .tar file
+# 2. extract and copy contents of .tar file into directory
+# 3. remove .tar file
+find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done
+#
+# This results in a training directory like so:
+#
+# imagenet/train/
+# ├── n01440764
+# │ ├── n01440764_10026.JPEG
+# │ ├── n01440764_10027.JPEG
+# │ ├── ......
+# ├── ......
+#
+# Change back to original directory
+cd ../..
+#
+# Extract the validation data and move images to subfolders:
+#
+# Create validation directory; move .tar file; change directory; extract validation .tar; remove compressed file
+mkdir imagenet/val && mv ILSVRC2012_img_val.tar imagenet/val/ && cd imagenet/val && tar -xvf ILSVRC2012_img_val.tar && rm -f ILSVRC2012_img_val.tar
+# get script from soumith and run; this script creates all class directories and moves images into corresponding directories
+wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
+#
+# This results in a validation directory like so:
+#
+# imagenet/val/
+# ├── n01440764
+# │ ├── ILSVRC2012_val_00000293.JPEG
+# │ ├── ILSVRC2012_val_00002138.JPEG
+# │ ├── ......
+# ├── ......
+#
+#
+# Check total files after extract
+#
+# $ find train/ -name "*.JPEG" | wc -l
+# 1281167
+# $ find val/ -name "*.JPEG" | wc -l
+# 50000
+#
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/cv/static_quant/main.py b/examples/3.x_api/pytorch/cv/static_quant/main.py
new file mode 100644
index 00000000000..3d7af7827e3
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/static_quant/main.py
@@ -0,0 +1,437 @@
+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import sys
+
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+
+model_names = models.list_models(module=models)
+
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('data', metavar='DIR',
+ help='path to dataset')
+parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
+ choices=model_names,
+ help='model architecture: ' +
+ ' | '.join(model_names) +
+ ' (default: resnet18)')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+ help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=90, type=int, metavar='N',
+ help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+ help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=256, type=int,
+ metavar='N',
+ help='mini-batch size (default: 256), this is the total '
+ 'batch size of all GPUs on the current node when '
+ 'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
+ metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+ help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+ metavar='W', help='weight decay (default: 1e-4)',
+ dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+ metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+ help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+ help='evaluate model on validation set')
+parser.add_argument('-t', '--tune', dest='tune', action='store_true',
+ help='tune best int8 model on calibration dataset')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+ help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+ help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+ help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+ help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+ help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+ help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+ help='GPU id to use.')
+parser.add_argument('--ppn', default=1, type=int,
+ help='number of processes on each node of distributed training')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+ help='Use multi-processing distributed training to launch '
+ 'N processes per node, which has N GPUs. This is the '
+ 'fastest way to use PyTorch for either single node or '
+ 'multi node data parallel training')
+parser.add_argument('-i', "--iter", default=0, type=int,
+ help='For accuracy measurement only.')
+parser.add_argument('-w', "--warmup_iter", default=5, type=int,
+ help='For benchmark measurement only.')
+parser.add_argument('--performance', dest='performance', action='store_true',
+ help='run benchmark')
+parser.add_argument('-r', "--accuracy", dest='accuracy', action='store_true',
+ help='For accuracy measurement only.')
+parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH',
+ help='path to checkpoint tuned by Neural Compressor (default: ./)')
+parser.add_argument('--int8', dest='int8', action='store_true',
+ help='Load int8 model.')
+parser.add_argument("--calib_iters", default=128, type=int,
+ help="For calibration only.")
+parser.add_argument("--iters", default=100, type=int,
+ help="For benchmark only.")
+
+best_acc1 = 0
+
+
+def main():
+ args = parser.parse_args()
+
+ if 'mobilenet' in args.arch:
+ import torchvision.models.quantization as models
+ else:
+ import torchvision.models as models
+
+ if args.seed is not None:
+ random.seed(args.seed)
+ torch.manual_seed(args.seed)
+
+ if args.pretrained:
+ print("=> using pre-trained model '{}'".format(args.arch))
+ model = models.__dict__[args.arch](pretrained=True)
+ else:
+ print("=> creating model '{}'".format(args.arch))
+ model = models.__dict__[args.arch]()
+
+ # define loss function (criterion) and optimizer
+ criterion = nn.CrossEntropyLoss()
+
+ optimizer = torch.optim.SGD(model.parameters(), args.lr,
+ momentum=args.momentum,
+ weight_decay=args.weight_decay)
+
+ # optionally resume from a checkpoint
+ if args.resume:
+ if os.path.isfile(args.resume):
+ print("=> loading checkpoint '{}'".format(args.resume))
+ checkpoint = torch.load(args.resume)
+ args.start_epoch = checkpoint['epoch']
+ best_acc1 = checkpoint['best_acc1']
+ if args.gpu is not None:
+ # best_acc1 may be from a checkpoint from a different GPU
+ best_acc1 = best_acc1.to(args.gpu)
+ model.load_state_dict(checkpoint['state_dict'])
+ optimizer.load_state_dict(checkpoint['optimizer'])
+ print("=> loaded checkpoint '{}' (epoch {})"
+ .format(args.resume, checkpoint['epoch']))
+ else:
+ print("=> no checkpoint found at '{}'".format(args.resume))
+
+ # Data loading code
+ traindir = os.path.join(args.data, 'train')
+ valdir = os.path.join(args.data, 'val')
+ normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+
+ train_dataset = datasets.ImageFolder(
+ traindir,
+ transforms.Compose([
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ normalize,
+ ]))
+
+ train_loader = torch.utils.data.DataLoader(
+ train_dataset, batch_size=args.batch_size, shuffle=True,
+ num_workers=args.workers, pin_memory=True, sampler=None)
+
+ val_dataset = datasets.ImageFolder(valdir, transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ normalize,
+ ]))
+
+ val_loader = torch.utils.data.DataLoader(
+ val_dataset,
+ batch_size=args.batch_size, shuffle=False,
+ num_workers=args.workers, pin_memory=True)
+
+ if args.evaluate:
+ validate(val_loader, model, criterion, args)
+ return
+
+ def eval_func(model):
+ accu = validate(val_loader, model, criterion, args)
+ return float(accu)
+
+ if args.tune:
+ from neural_compressor.torch.export import export
+ from neural_compressor.torch.quantization import prepare, convert, get_default_static_config
+
+ # Prepare the float model and example inputs for exporting model
+ x = torch.randn(args.batch_size, 3, 224, 224).contiguous(memory_format=torch.channels_last)
+ example_inputs = (x,)
+
+ # Specify that the first dimension of each input is that batch size
+ from torch.export import Dim
+ print(args.batch_size)
+ batch = Dim("batch", min=16)
+
+ # Specify that the first dimension of each input is that batch size
+ dynamic_shapes = {"x": {0: batch}}
+
+ # Export eager model into FX graph model
+ exported_model = export(model=model, example_inputs=example_inputs, dynamic_shapes=dynamic_shapes)
+ # Quantize the model
+ quant_config = get_default_static_config()
+
+ prepared_model = prepare(exported_model, quant_config=quant_config)
+ # Calibrate
+ with torch.no_grad():
+ for i, (images, target) in enumerate(val_loader):
+ if i == args.calib_iters:
+ break
+ if args.gpu is not None and torch.cuda.is_available():
+ images = images.cuda(args.gpu, non_blocking=True)
+ if torch.backends.mps.is_available():
+ images = images.to('mps')
+ target = target.to('mps')
+ if torch.cuda.is_available():
+ target = target.cuda(args.gpu, non_blocking=True)
+ # compute output
+ prepared_model(images)
+
+ q_model = convert(prepared_model)
+
+ if args.tuned_checkpoint:
+ q_model.save(example_inputs=example_inputs, output_dir = args.tuned_checkpoint)
+ return
+
+ if args.performance or args.accuracy:
+ if args.int8:
+ from neural_compressor.torch.quantization import load
+ q_model = load(args.tuned_checkpoint)
+
+ # Compile the quantized model and replace the Q/DQ pattern with Q-operator
+ from torch._inductor import config
+
+ config.freezing = True
+ opt_model = torch.compile(q_model)
+ new_model = opt_model
+ else:
+ new_model = model
+ new_model.eval()
+ if args.performance:
+ benchmark(val_loader, new_model, args)
+ return
+ if args.accuracy:
+ validate(val_loader, new_model, criterion, args)
+ return
+
+
+def benchmark(val_loader, model, args):
+
+ total_iters = args.iters
+ warmup_iters = args.warmup_iter
+ for i, (images, target) in enumerate(val_loader):
+ if args.gpu is not None and torch.cuda.is_available():
+ images = images.cuda(args.gpu, non_blocking=True)
+ if torch.backends.mps.is_available():
+ images = images.to('mps')
+ break
+
+ with torch.no_grad():
+ for i in range(total_iters):
+ if i == total_iters:
+ break
+ if i == warmup_iters:
+ start = time.time()
+
+ # model inference
+ model(images)
+
+ if i % args.print_freq == 0:
+ print(f"benchmarking... {i+1}/{total_iters}")
+
+ end = time.time()
+ latency = (end - start) / ((total_iters - warmup_iters) * args.batch_size)
+ throughput = ((total_iters - warmup_iters) * args.batch_size) / (end - start)
+ print("Latency: {:.3f} ms".format(latency * 10**3))
+ print("Throughput: {:.3f} samples/sec".format(throughput))
+
+def train(train_loader, model, criterion, optimizer, epoch, args):
+ batch_time = AverageMeter('Time', ':6.3f')
+ data_time = AverageMeter('Data', ':6.3f')
+ losses = AverageMeter('Loss', ':.4e')
+ top1 = AverageMeter('Acc@1', ':6.2f')
+ top5 = AverageMeter('Acc@5', ':6.2f')
+ progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1,
+ top5, prefix="Epoch: [{}]".format(epoch))
+
+ # switch to train mode
+ model.train()
+
+ end = time.time()
+ for i, (input, target) in enumerate(train_loader):
+ # measure data loading time
+ data_time.update(time.time() - end)
+
+ if args.gpu is not None:
+ input = input.cuda(args.gpu, non_blocking=True)
+ target = target.cuda(args.gpu, non_blocking=True)
+
+ # compute output
+ output = model(input)
+ loss = criterion(output, target)
+
+ # measure accuracy and record loss
+ acc1, acc5 = accuracy(output, target, topk=(1, 5))
+ losses.update(loss.item(), input.size(0))
+ top1.update(acc1[0], input.size(0))
+ top5.update(acc5[0], input.size(0))
+
+ # compute gradient and do SGD step
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+
+ # measure elapsed time
+ batch_time.update(time.time() - end)
+ end = time.time()
+
+ if i % args.print_freq == 0:
+ progress.print(i)
+
+
+def validate(val_loader, model, criterion, args):
+ batch_time = AverageMeter('Time', ':6.3f')
+ losses = AverageMeter('Loss', ':.4e')
+ top1 = AverageMeter('Acc@1', ':6.2f')
+ top5 = AverageMeter('Acc@5', ':6.2f')
+ progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5,
+ prefix='Test: ')
+
+ # switch to evaluate mode
+ # model.eval()
+
+ with torch.no_grad():
+ for i, (input, target) in enumerate(val_loader):
+ if i >= args.warmup_iter:
+ start = time.time()
+ if args.gpu is not None:
+ input = input.cuda(args.gpu, non_blocking=True)
+ target = target.cuda(args.gpu, non_blocking=True)
+
+ # compute output
+ output = model(input)
+ loss = criterion(output, target)
+
+ # measure accuracy and record loss
+ acc1, acc5 = accuracy(output, target, topk=(1, 5))
+ losses.update(loss.item(), input.size(0))
+ top1.update(acc1[0], input.size(0))
+ top5.update(acc5[0], input.size(0))
+
+ # measure elapsed time
+ if i >= args.warmup_iter:
+ batch_time.update(time.time() - start)
+
+ if i % args.print_freq == 0:
+ progress.print(i)
+
+ if args.iter > 0 and i >= (args.warmup_iter + args.iter - 1):
+ break
+
+ print('Batch size = %d' % args.batch_size)
+ print('Accuracy: {top1:.5f} Accuracy@5 {top5:.5f}'
+ .format(top1=(top1.avg / 100), top5=(top5.avg / 100)))
+
+ return top1.avg/100
+
+
+def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
+ torch.save(state, filename)
+ if is_best:
+ shutil.copyfile(filename, 'model_best.pth.tar')
+
+class AverageMeter(object):
+ """Computes and stores the average and current value"""
+ def __init__(self, name, fmt=':f'):
+ self.name = name
+ self.fmt = fmt
+ self.reset()
+
+ def reset(self):
+ self.val = 0
+ self.avg = 0
+ self.sum = 0
+ self.count = 0
+
+ def update(self, val, n=1):
+ self.val = val
+ self.sum += val * n
+ self.count += n
+ self.avg = self.sum / self.count
+
+ def __str__(self):
+ fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+ return fmtstr.format(**self.__dict__)
+
+
+class ProgressMeter(object):
+ def __init__(self, num_batches, *meters, prefix=""):
+ self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+ self.meters = meters
+ self.prefix = prefix
+
+ def print(self, batch):
+ entries = [self.prefix + self.batch_fmtstr.format(batch)]
+ entries += [str(meter) for meter in self.meters]
+ print('\t'.join(entries))
+
+ def _get_batch_fmtstr(self, num_batches):
+ num_digits = len(str(num_batches // 1))
+ fmt = '{:' + str(num_digits) + 'd}'
+ return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+
+
+def adjust_learning_rate(optimizer, epoch, args):
+ """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+ lr = args.lr * (0.1 ** (epoch // 30))
+ for param_group in optimizer.param_groups:
+ param_group['lr'] = lr
+
+
+def accuracy(output, target, topk=(1,)):
+ """Computes the accuracy over the k top predictions for the specified values of k"""
+ with torch.no_grad():
+ maxk = max(topk)
+ batch_size = target.size(0)
+
+ _, pred = output.topk(maxk, 1, True, True)
+ pred = pred.t()
+ correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+ res = []
+ for k in topk:
+ correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+ res.append(correct_k.mul_(100.0 / batch_size))
+ return res
+
+
+if __name__ == '__main__':
+ main()
+
diff --git a/examples/3.x_api/pytorch/cv/static_quant/requirements.txt b/examples/3.x_api/pytorch/cv/static_quant/requirements.txt
new file mode 100644
index 00000000000..ebd3df6ae7a
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/static_quant/requirements.txt
@@ -0,0 +1,3 @@
+torch
+torchvision
+neural-compressor
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/cv/static_quant/run_benchmark.sh b/examples/3.x_api/pytorch/cv/static_quant/run_benchmark.sh
new file mode 100644
index 00000000000..6f6b69c35df
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/static_quant/run_benchmark.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ batch_size=16
+ tuned_checkpoint=saved_results
+ echo ${max_eval_samples}
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --config=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ extra_cmd=''
+
+ if [[ ${mode} == "accuracy" ]]; then
+ mode_cmd=" --accuracy "
+ elif [[ ${mode} == "performance" ]]; then
+ mode_cmd=" --performance --iters "${iters}
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+ echo $extra_cmd
+
+
+ echo $extra_cmd
+
+ if [ "${topology}" = "resnet18_pt2e_static" ]; then
+ model_name_or_path="resnet18"
+ fi
+
+ if [[ ${mode} == "accuracy" ]]; then
+ python main.py \
+ --pretrained \
+ -a resnet18 \
+ -b 30 \
+ --tuned_checkpoint ${tuned_checkpoint} \
+ ${dataset_location} \
+ ${extra_cmd} \
+ ${mode_cmd}
+ elif [[ ${mode} == "performance" ]]; then
+ incbench --num_cores_per_instance 4 \
+ main.py \
+ --pretrained \
+ -a resnet18 \
+ -b 30 \
+ --tuned_checkpoint ${tuned_checkpoint} \
+ ${dataset_location} \
+ ${extra_cmd} \
+ ${mode_cmd}
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/cv/static_quant/run_quant.sh b/examples/3.x_api/pytorch/cv/static_quant/run_quant.sh
new file mode 100644
index 00000000000..1f4588e933c
--- /dev/null
+++ b/examples/3.x_api/pytorch/cv/static_quant/run_quant.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ tuned_checkpoint="saved_results"
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ if [ "${topology}" = "resnet18_pt2e_static" ]; then
+ model_name_or_path="resnet18"
+ fi
+ python main.py \
+ --pretrained \
+ -t \
+ -a resnet18 \
+ -b 30 \
+ --tuned_checkpoint ${tuned_checkpoint} \
+ ${dataset_location}
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/README.md b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/README.md
new file mode 100644
index 00000000000..6b37038d0dc
--- /dev/null
+++ b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/README.md
@@ -0,0 +1,83 @@
+Step-by-Step
+============
+This document describes the step-by-step instructions to run [stable diffusion XL model](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) using Smooth Quantization to accelerate inference while maintain the quality of output image.
+
+# Prerequisite
+
+## Environment
+Recommend python 3.9 or higher version.
+
+```shell
+pip install -r requirements.txt
+```
+**Note**: IPEX along with torch require nightly version (2.4) for compatibility. Please refer to [installation](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=cpu&version=main&os=linux%2fwsl2&package=source).
+
+# Run
+
+To quantize the model:
+```bash
+python sdxl_smooth_quant.py --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 --quantize --alpha 0.44 --output_dir "./saved_results"
+```
+or
+```bash
+sh run_quant.sh --alpha=0.44
+```
+To load a quantized model:
+```bash
+python sdxl_smooth_quant.py --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 --quantize --load --int8
+```
+or
+```bash
+sh run_quant.sh --int8=true
+```
+
+# Results
+## Image Generated
+
+With caption `"A brown and white dog runs on some brown grass near a Frisbee that is just sailing above the ground."`, results of fp32 model and int8 model are listed left and right respectively.
+
+
+
+
+
+
+## CLIP evaluation
+We have also evaluated CLIP scores on 5000 samples from COCO2014 validation dataset for FP32 model and INT8 model. CLIP results are listed below.
+
+| Precision | FP32 | INT8 |
+|----------------------|-------|-------|
+| CLIP on COCO2014 val | 32.05 | 31.77 |
+
+We're using the mlperf_sd_inference [repo](https://github.com/ahmadki/mlperf_sd_inference) to evaluate CLIP scores. In order to support evaluation on quantized model,
+we made some modification on the script (`main.py`). Please use as following:
+```bash
+git clone https://github.com/ahmadki/mlperf_sd_inference.git
+cd mlperf_sd_inference
+mv ../main.py ./
+```
+After setting the environment as instructed in the repo, you can execute the modified `main.py` script to generate images:
+```bash
+python main.py \
+ --model-id stabilityai/stable-diffusion-xl-base-1.0 \
+ --quantized-unet ./saved_results \ # quantized model saving path, should include `qconfig.json` and `quantized_model.pt`
+ --precision fp32 \
+ --guidance 8.0 \
+ --steps 20 \
+ --iters 200 \ # change to 5000 for the full 5k dataset
+ --latent-path latents.pt \
+ --base-output-dir ./output
+```
+Then you can compute CLIP score using the images generated by the quantized model:
+```bash
+mv ./output/stabilityai--stable-diffusion-xl-base-1.0__euler__20__8.0__fp32/* ./output/ # switch directory
+rm -rf ./output/stabilityai--stable-diffusion-xl-base-1.0__euler__20__8.0__fp32/
+
+python clip/clip_score.py \
+ --tsv-file captions_5k.tsv \
+ --image-folder ./output \ # folder with the generated images
+ --device "cpu"
+```
+Or you can use the bash script for all steps above:
+```bash
+sh run_benchmark.sh --mode=accuracy --int8=true
+```
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/fp32.jpg b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/fp32.jpg
new file mode 100644
index 00000000000..387eed9a802
Binary files /dev/null and b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/fp32.jpg differ
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/int8.jpg b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/int8.jpg
new file mode 100644
index 00000000000..9a6d146894e
Binary files /dev/null and b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/int8.jpg differ
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/latents.pt b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/latents.pt
new file mode 100644
index 00000000000..208dbc48a1c
Binary files /dev/null and b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/latents.pt differ
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/main.py b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/main.py
new file mode 100644
index 00000000000..1f5b72fd0f0
--- /dev/null
+++ b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/main.py
@@ -0,0 +1,484 @@
+import os
+import logging
+import tempfile
+import shutil
+import argparse
+import pandas as pd
+import time
+import torch
+import intel_extension_for_pytorch as ipex
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from diffusers import (
+ DDPMScheduler,
+ DDIMScheduler,
+ EulerDiscreteScheduler,
+ EulerAncestralDiscreteScheduler,
+ StableDiffusionXLPipeline,
+ StableDiffusionXLImg2ImgPipeline,
+)
+from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import (
+ deprecate, retrieve_timesteps, rescale_noise_cfg,
+ PipelineImageInput, StableDiffusionXLPipelineOutput
+)
+
+
+class StableDiffusionXLPipelineSQ(StableDiffusionXLPipeline):
+ def _get_add_time_ids(
+ self, original_size, crops_coords_top_left, target_size, dtype, text_encoder_projection_dim=None
+ ):
+ add_time_ids = list(original_size + crops_coords_top_left + target_size)
+ add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
+ return add_time_ids
+
+ def __call__(
+ self,
+ prompt: Union[str, List[str]] = None,
+ prompt_2: Optional[Union[str, List[str]]] = None,
+ height: Optional[int] = None,
+ width: Optional[int] = None,
+ num_inference_steps: int = 50,
+ timesteps: List[int] = None,
+ denoising_end: Optional[float] = None,
+ guidance_scale: float = 5.0,
+ negative_prompt: Optional[Union[str, List[str]]] = None,
+ negative_prompt_2: Optional[Union[str, List[str]]] = None,
+ num_images_per_prompt: Optional[int] = 1,
+ eta: float = 0.0,
+ generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+ latents: Optional[torch.FloatTensor] = None,
+ prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ ip_adapter_image: Optional[PipelineImageInput] = None,
+ output_type: Optional[str] = "pil",
+ return_dict: bool = True,
+ cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+ guidance_rescale: float = 0.0,
+ original_size: Optional[Tuple[int, int]] = None,
+ crops_coords_top_left: Tuple[int, int] = (0, 0),
+ target_size: Optional[Tuple[int, int]] = None,
+ negative_original_size: Optional[Tuple[int, int]] = None,
+ negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
+ negative_target_size: Optional[Tuple[int, int]] = None,
+ clip_skip: Optional[int] = None,
+ callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
+ callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+ **kwargs,
+ ):
+
+ callback = kwargs.pop("callback", None)
+ callback_steps = kwargs.pop("callback_steps", None)
+
+ if callback is not None:
+ deprecate(
+ "callback",
+ "1.0.0",
+ "Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
+ )
+ if callback_steps is not None:
+ deprecate(
+ "callback_steps",
+ "1.0.0",
+ "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
+ )
+
+ # 0. Default height and width to unet
+ height = height or self.default_sample_size * self.vae_scale_factor
+ width = width or self.default_sample_size * self.vae_scale_factor
+
+ original_size = original_size or (height, width)
+ target_size = target_size or (height, width)
+
+ # 1. Check inputs. Raise error if not correct
+ self.check_inputs(
+ prompt,
+ prompt_2,
+ height,
+ width,
+ callback_steps,
+ negative_prompt,
+ negative_prompt_2,
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ callback_on_step_end_tensor_inputs,
+ )
+
+ self._guidance_scale = guidance_scale
+ self._guidance_rescale = guidance_rescale
+ self._clip_skip = clip_skip
+ self._cross_attention_kwargs = cross_attention_kwargs
+ self._denoising_end = denoising_end
+
+ # 2. Define call parameters
+ if prompt is not None and isinstance(prompt, str):
+ batch_size = 1
+ elif prompt is not None and isinstance(prompt, list):
+ batch_size = len(prompt)
+ else:
+ batch_size = prompt_embeds.shape[0]
+
+ device = 'cpu'
+
+ # 3. Encode input prompt
+ lora_scale = (
+ self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
+ )
+
+ (
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ ) = self.encode_prompt(
+ prompt=prompt,
+ prompt_2=prompt_2,
+ device=device,
+ num_images_per_prompt=num_images_per_prompt,
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
+ negative_prompt=negative_prompt,
+ negative_prompt_2=negative_prompt_2,
+ prompt_embeds=prompt_embeds,
+ negative_prompt_embeds=negative_prompt_embeds,
+ pooled_prompt_embeds=pooled_prompt_embeds,
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+ lora_scale=lora_scale,
+ clip_skip=self.clip_skip,
+ )
+
+ # 4. Prepare timesteps
+ timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
+
+ # 5. Prepare latent variables
+ num_channels_latents = self.unet.config.in_channels
+ latents = self.prepare_latents(
+ batch_size * num_images_per_prompt,
+ num_channels_latents,
+ height,
+ width,
+ prompt_embeds.dtype,
+ device,
+ generator,
+ latents,
+ )
+
+ # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+ extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+ # 7. Prepare added time ids & embeddings
+ add_text_embeds = pooled_prompt_embeds
+ if self.text_encoder_2 is None:
+ text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
+ else:
+ text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
+
+ add_time_ids = self._get_add_time_ids(
+ original_size,
+ crops_coords_top_left,
+ target_size,
+ dtype=prompt_embeds.dtype,
+ text_encoder_projection_dim=text_encoder_projection_dim,
+ )
+ if negative_original_size is not None and negative_target_size is not None:
+ negative_add_time_ids = self._get_add_time_ids(
+ negative_original_size,
+ negative_crops_coords_top_left,
+ negative_target_size,
+ dtype=prompt_embeds.dtype,
+ text_encoder_projection_dim=text_encoder_projection_dim,
+ )
+ else:
+ negative_add_time_ids = add_time_ids
+
+ if self.do_classifier_free_guidance:
+ prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+ add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+ add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
+
+ prompt_embeds = prompt_embeds.to(device)
+ add_text_embeds = add_text_embeds.to(device)
+ add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+ if ip_adapter_image is not None:
+ image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
+ if self.do_classifier_free_guidance:
+ image_embeds = torch.cat([negative_image_embeds, image_embeds])
+ image_embeds = image_embeds.to(device)
+
+ # 8. Denoising loop
+ num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+ # 8.1 Apply denoising_end
+ if (
+ self.denoising_end is not None
+ and isinstance(self.denoising_end, float)
+ and self.denoising_end > 0
+ and self.denoising_end < 1
+ ):
+ discrete_timestep_cutoff = int(
+ round(
+ self.scheduler.config.num_train_timesteps
+ - (self.denoising_end * self.scheduler.config.num_train_timesteps)
+ )
+ )
+ num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+ timesteps = timesteps[:num_inference_steps]
+
+ # 9. Optionally get Guidance Scale Embedding
+ timestep_cond = None
+ if self.unet.config.time_cond_proj_dim is not None:
+ guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
+ timestep_cond = self.get_guidance_scale_embedding(
+ guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
+ ).to(device=device, dtype=latents.dtype)
+
+ self._num_timesteps = len(timesteps)
+ with self.progress_bar(total=num_inference_steps) as progress_bar:
+ for i, t in enumerate(timesteps):
+ # expand the latents if we are doing classifier free guidance
+ latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
+
+ latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+ # predict the noise residual
+ added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+ if ip_adapter_image is not None:
+ added_cond_kwargs["image_embeds"] = image_embeds
+ noise_pred = self.unet(
+ latent_model_input,
+ t,
+ encoder_hidden_states=prompt_embeds,
+ added_cond_kwargs=added_cond_kwargs,
+ )['sample']
+
+ # perform guidance
+ if self.do_classifier_free_guidance:
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+ noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+ if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
+ # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+ noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
+
+ # compute the previous noisy sample x_t -> x_t-1
+ latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+ if callback_on_step_end is not None:
+ callback_kwargs = {}
+ for k in callback_on_step_end_tensor_inputs:
+ callback_kwargs[k] = locals()[k]
+ callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+
+ latents = callback_outputs.pop("latents", latents)
+ prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+ negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
+ add_text_embeds = callback_outputs.pop("add_text_embeds", add_text_embeds)
+ negative_pooled_prompt_embeds = callback_outputs.pop(
+ "negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
+ )
+ add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
+ negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids)
+
+ # call the callback, if provided
+ if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+ progress_bar.update()
+ if callback is not None and i % callback_steps == 0:
+ step_idx = i // getattr(self.scheduler, "order", 1)
+ callback(step_idx, t, latents)
+
+ if not output_type == "latent":
+ # make sure the VAE is in float32 mode, as it overflows in float16
+ needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+
+ if needs_upcasting:
+ self.upcast_vae()
+ latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+ image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+
+ # cast back to fp16 if needed
+ if needs_upcasting:
+ self.vae.to(dtype=torch.float16)
+ else:
+ image = latents
+
+ if not output_type == "latent":
+ # apply watermark if available
+ if self.watermark is not None:
+ image = self.watermark.apply_watermark(image)
+
+ image = self.image_processor.postprocess(image.detach(), output_type=output_type)
+
+ # Offload all models
+ self.maybe_free_model_hooks()
+
+ if not return_dict:
+ return (image,)
+
+ return StableDiffusionXLPipelineOutput(images=image)
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--model-id', default="stabilityai/stable-diffusion-xl-base-1.0", type=str)
+parser.add_argument('--precision', default='fp32', type=str)
+parser.add_argument('--base-output-dir', default="./output", type=str)
+parser.add_argument('--quantized-unet', default="./saved_results", type=str)
+parser.add_argument("--int8", action="store_true", help="Load quantized model.")
+parser.add_argument("--load", action="store_true")
+parser.add_argument('--iters', default=5000, type=int, help="Num of image generated.")
+parser.add_argument('--output-dir-name', default=None, type=str)
+parser.add_argument('--output-dir-name-postfix', default=None, type=str)
+parser.add_argument('--captions-fname', default="captions_5k.tsv", type=str)
+parser.add_argument('--guidance', default=8.0, type=float)
+parser.add_argument('--scheduler', default="euler", type=str)
+parser.add_argument('--steps', default=20, type=int)
+parser.add_argument('--negative-prompt', default="normal quality, low quality, worst quality, low res, blurry, nsfw, nude", type=str)
+parser.add_argument('--latent-path', default="latents.pt", type=str)
+parser.add_argument('--generator-seed', default=None, type=int)
+parser.add_argument("--refiner", dest='refiner', action="store_true",
+ help="Whether to add a refiner to the SDXL pipeline."
+ "Applicable only with --model-id=xl")
+parser.add_argument("--no-refiner", dest='refiner', action="store_false",
+ help="Whether to add a refiner to the SDXL pipeline."
+ "Applicable only with --model-id=xl")
+
+args = parser.parse_args()
+
+# Init the logger
+logging.basicConfig(
+ format='%(asctime)s %(levelname)-8s %(message)s',
+ level=logging.INFO,
+ datefmt='%Y-%m-%d %H:%M:%S'
+)
+
+if args.latent_path and args.generator_seed:
+ raise ValueError(
+ "Cannot specify both --latent-path and --generator-seed"
+ )
+
+if args.precision == "fp16":
+ dtype = torch.float16
+elif args.precision == "bf16":
+ dtype = torch.bfloat16
+else:
+ dtype = torch.float32
+
+# Initialize defaults
+device = torch.device('cpu')
+world_size = 1
+rank = 0
+
+# load frozen latent
+latent_noise = None
+if args.latent_path:
+ logging.info(f"[{rank}] loading latent from: {args.latent_path}")
+ latent_noise = torch.load(args.latent_path).to(dtype)
+
+logging.info(f"[{rank}] args: {args}")
+logging.info(f"[{rank}] world_size: {world_size}")
+logging.info(f"[{rank}] device: {device}")
+
+logging.info(f"[{rank}] using captions from: {args.captions_fname}")
+df = pd.read_csv(args.captions_fname, sep='\t')
+logging.info(f"[{rank}] {len(df)} captions loaded")
+
+# split captions among ranks
+df = df[rank::world_size]
+logging.info(f"[{rank}] {len(df)} captions assigned")
+
+# Build the pipeline
+schedulers = {
+ "ddpm": DDPMScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
+ "ddim": DDIMScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
+ "euler_anc": EulerAncestralDiscreteScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
+ "euler": EulerDiscreteScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
+}
+pipe = StableDiffusionXLPipelineSQ.from_pretrained(
+ "stabilityai/stable-diffusion-xl-base-1.0",
+ torch_dtype=dtype,
+ use_safetensors=True,
+)
+pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
+
+if args.refiner:
+ refiner_pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(args.model_id,
+ scheduler=schedulers[args.scheduler],
+ safety_checker=None,
+ add_watermarker=False,
+ variant="fp16" if args.precision == 'fp16' else None,
+ torch_dtype=dtype)
+
+if args.int8 and args.load:
+ from neural_compressor.torch.quantization import load
+ example_inputs = {"sample": torch.randn((2, 4, 128, 128), dtype=dtype),
+ "timestep": torch.tensor(951.0),
+ "encoder_hidden_states": torch.randn((2, 77, 2048), dtype=dtype),
+ "added_cond_kwargs": {'text_embeds':torch.randn((2, 1280), dtype=dtype),
+ 'time_ids': torch.tensor([[1024., 1024., 0., 0., 1024., 1024.],
+ [1024., 1024., 0., 0., 1024., 1024.]], dtype=dtype)},}
+ q_unet = load(args.quantized_unet)
+ for _ in range(2):
+ q_unet(**example_inputs)
+ print("Loaded Quantized Model")
+ setattr(q_unet, "config", pipe.unet.config)
+ pipe.unet = q_unet
+
+pipe.set_progress_bar_config(disable=True)
+logging.info(f"[{rank}] Pipeline initialized: {pipe}")
+
+if args.refiner:
+ refiner_pipe = refiner_pipe.to(device)
+ refiner_pipe.set_progress_bar_config(disable=True)
+ logging.info(f"[{rank}] Refiner pipeline initialized: {refiner_pipe}")
+
+# Output directory
+output_dir = args.output_dir_name or f"{args.model_id.replace('/','--')}__{args.scheduler}__{args.steps}__{args.guidance}__{args.precision}"
+if args.output_dir_name_postfix is not None:
+ output_dir = f"{output_dir}_{args.output_dir_name_postfix}"
+
+output_dir = os.path.join(args.base_output_dir, output_dir)
+
+# Ensure the output directory exists
+if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+# Create a temporary directory to atomically move the images
+tmp_dir = tempfile.mkdtemp()
+
+# Generate the images
+for index, row in df.iterrows():
+ image_id = row['image_id']
+ caption_id = row['id']
+ caption_text = row['caption']
+
+ destination_path = os.path.join(output_dir, f"{caption_id}.png")
+
+ if index >= args.iters:
+ break
+
+ # Check if the image already exists in the output directory
+ if not os.path.exists(destination_path):
+ # Generate the image
+ print(index, caption_text)
+ tic = time.time()
+ image = pipe(prompt=caption_text,
+ negative_prompt="normal quality, low quality, worst quality, low res, blurry, nsfw, nude",
+ guidance_scale=8.0,
+ generator=torch.Generator(device=device).manual_seed(args.generator_seed) if args.generator_seed else None,
+ latents=latent_noise,
+ num_inference_steps=20).images[0]
+ toc = time.time()
+ print("Time taken : ",toc-tic)
+
+ if args.refiner:
+ image = refiner_pipe(caption_text,
+ image=image).images[0]
+
+ # Save the image
+ image_path_tmp = os.path.join(tmp_dir, f"{caption_id}.png")
+ image.save(image_path_tmp)
+ shutil.move(image_path_tmp, destination_path)
+
+ logging.info(f"[{rank}] Saved image {caption_id}: {caption_text}")
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/requirements.txt b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/requirements.txt
new file mode 100644
index 00000000000..f1fe1f7e20f
--- /dev/null
+++ b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/requirements.txt
@@ -0,0 +1,8 @@
+diffusers
+accelerate
+torch
+transformers
+tensorboard
+intel_extension_for_pytorch
+tqdm
+open-clip-torch
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_benchmark.sh b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_benchmark.sh
new file mode 100644
index 00000000000..54046faebb1
--- /dev/null
+++ b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_benchmark.sh
@@ -0,0 +1,110 @@
+ert!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0"
+ latent="latents.pt"
+ tuned_checkpoint="./saved_results/"
+ iters=200
+ for var in "$@"
+ do
+ case $var in
+ --iters=*)
+ iters=$(echo $var | cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo $var | cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var | cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ extra_cmd="--load"
+ model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0"
+ precision="fp32"
+ latent="latents.pt"
+ base_output_dir="./output/"
+
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+ echo $extra_cmd
+
+ if [[ ${mode} == "performance" ]]; then
+ extra_cmd=$extra_cmd" --performance"
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+ echo $extra_cmd
+
+ python -u sdxl_smooth_quant.py \
+ --model_name_or_path ${model_name_or_path} \
+ --latent ${latent} \
+ ${extra_cmd}
+ else
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+ echo $extra_cmd
+
+ python -u sdxl_smooth_quant.py \
+ --model_name_or_path ${model_name_or_path} \
+ --latent ${latent} \
+ ${extra_cmd}
+
+ REPO_URL="https://github.com/ahmadki/mlperf_sd_inference.git"
+ TARGET_DIR="mlperf_sd_inference"
+
+ if [ -d "$TARGET_DIR" ]; then
+ echo "Directory $TARGET_DIR already exists. Skipping git clone."
+ else
+ git clone "$REPO_URL" "$TARGET_DIR"
+ fi
+
+ cd mlperf_sd_inference
+ cp ../main.py ./
+ if [ -d "../saved_results/" ]; then
+ mv ../saved_results/ ./
+ fi
+
+ python -u main.py \
+ --model-id ${model_name_or_path} \
+ --quantized-unet ${tuned_checkpoint} \
+ --precision ${precision} \
+ --latent-path ${latent} \
+ --base-output-dir ${base_output_dir} \
+ --iters ${iters} \
+ ${extra_cmd}
+
+ mv ./output/stabilityai--stable-diffusion-xl-base-1.0__euler__20__8.0__fp32/* ./output/
+ rm -rf ./output/stabilityai--stable-diffusion-xl-base-1.0__euler__20__8.0__fp32/
+
+ python clip/clip_score.py \
+ --tsv-file captions_5k.tsv \
+ --image-folder ${base_output_dir} \
+ --device "cpu"
+
+ cd ..
+ fi
+
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_quant.sh b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_quant.sh
new file mode 100644
index 00000000000..e24ff49c78b
--- /dev/null
+++ b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_quant.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --alpha=*)
+ alpha=$(echo $var |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo $var | cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ extra_cmd=""
+ model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0"
+ n_steps=20
+ calib_size=10
+ batch_size=1
+ latent="latents.pt"
+ alpha=0.44
+
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8 --load"
+ else
+ extra_cmd=$extra_cmd" --quantize"
+ fi
+ echo $extra_cmd
+
+ python -u sdxl_smooth_quant.py \
+ --model_name_or_path ${model_name_or_path} \
+ --n_steps ${n_steps} \
+ --alpha ${alpha} \
+ --latent ${latent} \
+ ${extra_cmd}
+
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/sdxl_smooth_quant.py b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/sdxl_smooth_quant.py
new file mode 100644
index 00000000000..984a1696efd
--- /dev/null
+++ b/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/sdxl_smooth_quant.py
@@ -0,0 +1,436 @@
+
+import os
+import argparse
+import torch
+import intel_extension_for_pytorch as ipex
+from diffusers import EulerDiscreteScheduler, StableDiffusionXLPipeline
+from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import (
+ deprecate, retrieve_timesteps, rescale_noise_cfg,
+ PipelineImageInput, StableDiffusionXLPipelineOutput
+)
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+def prompts2images(pipeline, prompts, **kwargs):
+ images = pipeline(
+ prompt=prompts,
+ num_inference_steps=kwargs["n_steps"],
+ negative_prompt=[
+ "normal quality, low quality, worst quality, low res, blurry, nsfw, nude"
+ ]
+ * len(prompts),
+ latents=kwargs["latent"],
+ guidance_scale=8.0, # MLPerf requirements
+ ).images
+ return images
+
+def save_images(prompts, images, save_dir, prefix='ref'):
+ for prompt, image in zip(prompts, images):
+ image_name = f"{prefix}_{'_'.join(prompt.replace('/', ' ').split(' '))}.jpg"
+ image.save(os.path.join(save_dir, image_name))
+
+def do_calibration(pipeline, calibration_prompts, **kwargs):
+ for i_th, prompts in enumerate(calibration_prompts):
+ if i_th >= kwargs["calib_size"]:
+ return
+ prompts2images(pipeline, prompts, **kwargs)
+
+class StableDiffusionXLPipelineSQ(StableDiffusionXLPipeline):
+ def _get_add_time_ids(
+ self, original_size, crops_coords_top_left, target_size, dtype, text_encoder_projection_dim=None
+ ):
+ add_time_ids = list(original_size + crops_coords_top_left + target_size)
+ add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
+ return add_time_ids
+
+ def __call__(
+ self,
+ prompt: Union[str, List[str]] = None,
+ prompt_2: Optional[Union[str, List[str]]] = None,
+ height: Optional[int] = None,
+ width: Optional[int] = None,
+ num_inference_steps: int = 50,
+ timesteps: List[int] = None,
+ denoising_end: Optional[float] = None,
+ guidance_scale: float = 5.0,
+ negative_prompt: Optional[Union[str, List[str]]] = None,
+ negative_prompt_2: Optional[Union[str, List[str]]] = None,
+ num_images_per_prompt: Optional[int] = 1,
+ eta: float = 0.0,
+ generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+ latents: Optional[torch.FloatTensor] = None,
+ prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ ip_adapter_image: Optional[PipelineImageInput] = None,
+ output_type: Optional[str] = "pil",
+ return_dict: bool = True,
+ cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+ guidance_rescale: float = 0.0,
+ original_size: Optional[Tuple[int, int]] = None,
+ crops_coords_top_left: Tuple[int, int] = (0, 0),
+ target_size: Optional[Tuple[int, int]] = None,
+ negative_original_size: Optional[Tuple[int, int]] = None,
+ negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
+ negative_target_size: Optional[Tuple[int, int]] = None,
+ clip_skip: Optional[int] = None,
+ callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
+ callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+ **kwargs,
+ ):
+
+ callback = kwargs.pop("callback", None)
+ callback_steps = kwargs.pop("callback_steps", None)
+
+ if callback is not None:
+ deprecate(
+ "callback",
+ "1.0.0",
+ "Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
+ )
+ if callback_steps is not None:
+ deprecate(
+ "callback_steps",
+ "1.0.0",
+ "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
+ )
+
+ # 0. Default height and width to unet
+ height = height or self.default_sample_size * self.vae_scale_factor
+ width = width or self.default_sample_size * self.vae_scale_factor
+
+ original_size = original_size or (height, width)
+ target_size = target_size or (height, width)
+
+ # 1. Check inputs. Raise error if not correct
+ self.check_inputs(
+ prompt,
+ prompt_2,
+ height,
+ width,
+ callback_steps,
+ negative_prompt,
+ negative_prompt_2,
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ callback_on_step_end_tensor_inputs,
+ )
+
+ self._guidance_scale = guidance_scale
+ self._guidance_rescale = guidance_rescale
+ self._clip_skip = clip_skip
+ self._cross_attention_kwargs = cross_attention_kwargs
+ self._denoising_end = denoising_end
+
+ # 2. Define call parameters
+ if prompt is not None and isinstance(prompt, str):
+ batch_size = 1
+ elif prompt is not None and isinstance(prompt, list):
+ batch_size = len(prompt)
+ else:
+ batch_size = prompt_embeds.shape[0]
+
+ device = "cpu"
+
+ # 3. Encode input prompt
+ lora_scale = (
+ self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
+ )
+
+ (
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ ) = self.encode_prompt(
+ prompt=prompt,
+ prompt_2=prompt_2,
+ device=device,
+ num_images_per_prompt=num_images_per_prompt,
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
+ negative_prompt=negative_prompt,
+ negative_prompt_2=negative_prompt_2,
+ prompt_embeds=prompt_embeds,
+ negative_prompt_embeds=negative_prompt_embeds,
+ pooled_prompt_embeds=pooled_prompt_embeds,
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+ lora_scale=lora_scale,
+ clip_skip=self.clip_skip,
+ )
+
+ # 4. Prepare timesteps
+ timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
+
+ # 5. Prepare latent variables
+ num_channels_latents = self.unet.config.in_channels
+ latents = self.prepare_latents(
+ batch_size * num_images_per_prompt,
+ num_channels_latents,
+ height,
+ width,
+ prompt_embeds.dtype,
+ device,
+ generator,
+ latents,
+ )
+
+ # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+ extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+ # 7. Prepare added time ids & embeddings
+ add_text_embeds = pooled_prompt_embeds
+ if self.text_encoder_2 is None:
+ text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
+ else:
+ text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
+
+ add_time_ids = self._get_add_time_ids(
+ original_size,
+ crops_coords_top_left,
+ target_size,
+ dtype=prompt_embeds.dtype,
+ text_encoder_projection_dim=text_encoder_projection_dim,
+ )
+ if negative_original_size is not None and negative_target_size is not None:
+ negative_add_time_ids = self._get_add_time_ids(
+ negative_original_size,
+ negative_crops_coords_top_left,
+ negative_target_size,
+ dtype=prompt_embeds.dtype,
+ text_encoder_projection_dim=text_encoder_projection_dim,
+ )
+ else:
+ negative_add_time_ids = add_time_ids
+
+ if self.do_classifier_free_guidance:
+ prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+ add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+ add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
+
+ prompt_embeds = prompt_embeds.to(device)
+ add_text_embeds = add_text_embeds.to(device)
+ add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+ if ip_adapter_image is not None:
+ image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
+ if self.do_classifier_free_guidance:
+ image_embeds = torch.cat([negative_image_embeds, image_embeds])
+ image_embeds = image_embeds.to(device)
+
+ # 8. Denoising loop
+ num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+ # 8.1 Apply denoising_end
+ if (
+ self.denoising_end is not None
+ and isinstance(self.denoising_end, float)
+ and self.denoising_end > 0
+ and self.denoising_end < 1
+ ):
+ discrete_timestep_cutoff = int(
+ round(
+ self.scheduler.config.num_train_timesteps
+ - (self.denoising_end * self.scheduler.config.num_train_timesteps)
+ )
+ )
+ num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+ timesteps = timesteps[:num_inference_steps]
+
+ # 9. Optionally get Guidance Scale Embedding
+ timestep_cond = None
+ if self.unet.config.time_cond_proj_dim is not None:
+ guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
+ timestep_cond = self.get_guidance_scale_embedding(
+ guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
+ ).to(device=device, dtype=latents.dtype)
+
+ self._num_timesteps = len(timesteps)
+ with self.progress_bar(total=num_inference_steps) as progress_bar:
+ for i, t in enumerate(timesteps):
+ # expand the latents if we are doing classifier free guidance
+ latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
+
+ latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+ # predict the noise residual
+ added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+ if ip_adapter_image is not None:
+ added_cond_kwargs["image_embeds"] = image_embeds
+ noise_pred = self.unet(
+ latent_model_input,
+ t,
+ encoder_hidden_states=prompt_embeds,
+ added_cond_kwargs=added_cond_kwargs,
+ )['sample']
+
+ # perform guidance
+ if self.do_classifier_free_guidance:
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+ noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+ if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
+ # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+ noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
+
+ # compute the previous noisy sample x_t -> x_t-1
+ latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+ if callback_on_step_end is not None:
+ callback_kwargs = {}
+ for k in callback_on_step_end_tensor_inputs:
+ callback_kwargs[k] = locals()[k]
+ callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+
+ latents = callback_outputs.pop("latents", latents)
+ prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+ negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
+ add_text_embeds = callback_outputs.pop("add_text_embeds", add_text_embeds)
+ negative_pooled_prompt_embeds = callback_outputs.pop(
+ "negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
+ )
+ add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
+ negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids)
+
+ # call the callback, if provided
+ if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+ progress_bar.update()
+ if callback is not None and i % callback_steps == 0:
+ step_idx = i // getattr(self.scheduler, "order", 1)
+ callback(step_idx, t, latents)
+
+ if not output_type == "latent":
+ # make sure the VAE is in float32 mode, as it overflows in float16
+ needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+
+ if needs_upcasting:
+ self.upcast_vae()
+ latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+ image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+
+ # cast back to fp16 if needed
+ if needs_upcasting:
+ self.vae.to(dtype=torch.float16)
+ else:
+ image = latents
+
+ if not output_type == "latent":
+ # apply watermark if available
+ if self.watermark is not None:
+ image = self.watermark.apply_watermark(image)
+
+ image = self.image_processor.postprocess(image.detach(), output_type=output_type)
+
+ # Offload all models
+ self.maybe_free_model_hooks()
+
+ if not return_dict:
+ return (image,)
+
+ return StableDiffusionXLPipelineOutput(images=image)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_name_or_path", type=str, default="stabilityai/stable-diffusion-xl-base-1.0"
+ )
+ parser.add_argument("--quantize", action="store_true")
+ parser.add_argument("--load", action="store_true")
+ parser.add_argument("--int8", action="store_true", help="Load quantized model.")
+ parser.add_argument("--performance", action="store_true")
+ parser.add_argument("--n_steps", type=int, default=20)
+ parser.add_argument("--batch-size", type=int, default=1)
+ parser.add_argument("--calib-size", type=int, default=10)
+ parser.add_argument("--latent", type=str, default="latents.pt")
+ parser.add_argument("--alpha", type=float, default=0.5, help="SmoothQuant Alpha")
+ parser.add_argument("--output_dir", type=str, default="./saved_results", help="output directory")
+ parser.add_argument("--iters", default=10, type=int, help="For performance measurement only.")
+
+ args = parser.parse_args()
+ os.makedirs(args.output_dir, exist_ok=True)
+
+ args.calib_size = args.calib_size // args.batch_size
+
+ dtype = torch.float32
+
+ pipeline = StableDiffusionXLPipelineSQ.from_pretrained(
+ args.model_name_or_path,
+ torch_dtype=dtype,
+ use_safetensors=True,
+ )
+ pipeline.scheduler = EulerDiscreteScheduler.from_config(pipeline.scheduler.config)
+
+ # This is a list of prompts
+ cali_prompts = [['A brown and white dog runs on some brown grass near a Frisbee that is just sailing above the ground.'],
+ ['The bus is traveling down a two way street.']]
+
+ torch.random.manual_seed(42)
+ if args.latent is not None:
+ init_latent = torch.load(args.latent).to(dtype)
+ else:
+ init_latent = torch.randn((1,4,128,128), dtype=dtype)
+
+ prompts = cali_prompts[0]
+ ref_images = prompts2images(pipeline, prompts, n_steps=args.n_steps, latent=init_latent)
+ save_images(prompts, ref_images, args.output_dir, prefix='ref')
+
+ def forward_loop(model):
+ do_calibration(
+ pipeline=pipeline,
+ calibration_prompts=cali_prompts,
+ calib_size=args.calib_size,
+ n_steps=args.n_steps,
+ latent=init_latent,
+ )
+
+ if args.quantize:
+ excluded_precisions = ["bf16"]
+ example_inputs = {"sample": torch.randn((2, 4, 128, 128), dtype=dtype),
+ "timestep": torch.tensor(951.0),
+ "encoder_hidden_states": torch.randn((2, 77, 2048), dtype=dtype),
+ "added_cond_kwargs": {'text_embeds':torch.randn((2, 1280), dtype=dtype),
+ 'time_ids': torch.tensor([[1024., 1024., 0., 0., 1024., 1024.],
+ [1024., 1024., 0., 0., 1024., 1024.]], dtype=dtype)},}
+
+ from neural_compressor.torch.quantization import SmoothQuantConfig, prepare, convert
+ quant_config = SmoothQuantConfig(alpha=args.alpha, excluded_precisions=excluded_precisions)
+ user_model = prepare(model=pipeline.unet, quant_config=quant_config, example_inputs=example_inputs)
+ forward_loop(user_model)
+ q_unet = convert(user_model)
+ q_unet.save(args.output_dir)
+
+ if args.load:
+ if args.int8:
+ from neural_compressor.torch.quantization import load
+ q_unet = load(os.path.abspath(os.path.expanduser(args.output_dir)))
+ setattr(q_unet, "config", pipeline.unet.config)
+ else:
+ q_unet = pipeline.unet
+
+ pipeline.unet = q_unet
+ quant_images = prompts2images(pipeline, prompts, n_steps=args.n_steps, latent=init_latent)
+ save_images(prompts, quant_images, args.output_dir, prefix='quant')
+
+ if args.performance:
+ import time
+
+ total_iters = args.iters * args.batch_size
+ warmup_iters = 5
+ for i in range(total_iters):
+ if i == warmup_iters:
+ start = time.time()
+ prompts2images(pipeline, prompts, n_steps=args.n_steps, latent=init_latent)
+ end = time.time()
+
+ latency = (end - start) / ((total_iters - warmup_iters) * args.batch_size)
+ throughput = ((total_iters - warmup_iters) * args.batch_size) / (end - start)
+ print("Latency: {:.3f} ms".format(latency * 10**3))
+ print("Throughput: {:.3f} samples/sec".format(throughput))
+ print('Batch size = %d' % args.batch_size)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/README.md b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/README.md
new file mode 100644
index 00000000000..a17eb188d0a
--- /dev/null
+++ b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/README.md
@@ -0,0 +1,100 @@
+Step-by-Step
+============
+
+This document describes the step-by-step instructions for reproducing PyTorch tuning results with Intel® Neural Compressor.
+
+# Prerequisite
+
+## 1. Environment
+
+We verified examples with IPEX backend on Python 3.10, recommended.
+
+```shell
+pip install -r requirements.txt
+```
+
+## 2. Install Intel-Pytorch-Extension
+
+Please refer to [intel/intel-extension-for-pytorch(github.com)](https://github.com/intel/intel-extension-for-pytorch).
+
+### Install IPEX CPU
+
+ > Note: GCC9 compiler is recommended
+
+ ```shell
+ python -m pip install intel_extension_for_pytorch -f https://software.intel.com/ipex-whl-stable
+ ```
+
+### Install IPEX XPU
+Please build an IPEX docker container according to the [official guide](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu&version=v2.1.30%2bxpu&os=linux%2fwsl2&package=docker).
+
+You can run a simple sanity test to double confirm if the correct version is installed, and if the software stack can get correct hardware information onboard your system. The command should return PyTorch and IPEX versions installed, as well as GPU card(s) information detected.
+```bash
+source {DPCPPROOT}/env/vars.sh
+source {MKLROOT}/env/vars.sh
+source {CCLROOT}/env/vars.sh
+source {MPIROOT}/env/vars.sh
+python -c "import torch; import intel_extension_for_pytorch as ipex; print(torch.__version__); print(ipex.__version__); [print(f'[{i}]: {torch.xpu.get_device_properties(i)}') for i in range(torch.xpu.device_count())];"
+```
+Please also refer to this [tutorial](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu&version=v2.1.30%2bxpu&os=linux%2fwsl2&package=conda) to check system requirements and install dependencies.
+
+## 3. Prepare Dataset
+
+Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/imagenet. The dir include below folder:
+
+```bash
+ls /path/to/imagenet
+train val
+```
+
+# Run with CPU
+
+> Note: All torchvision model names can be passed as long as they are included in `torchvision.models`, below are some examples.
+
+### 1. ResNet18 With Intel PyTorch Extension
+
+```shell
+python main.py -t -a resnet18 --ipex --pretrained /path/to/imagenet
+```
+or
+```shell
+bash run_quant.sh --input_model=resnet18 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
+
+### 2. ResNet50 With Intel PyTorch Extension
+
+```shell
+python main.py -t -a resnet50 --ipex --pretrained /path/to/imagenet
+```
+or
+```shell
+bash run_quant.sh --input_model=resnet50 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=resnet50 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
+
+### 3. ResNext101_32x16d With Intel PyTorch Extension
+
+```shell
+python main.py -t -a resnext101_32x16d_wsl --hub --ipex --pretrained /path/to/imagenet
+```
+or
+```shell
+bash run_quant.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
+
+# Run with XPU
+
+> Note: All torchvision model names can be passed as long as they are included in `torchvision.models`, below are some examples.
+
+### 1. ResNet18 With Intel PyTorch Extension
+
+```shell
+python main.py -t -a resnet18 --ipex --pretrained /path/to/imagenet --xpu
+```
+or
+```shell
+bash run_quant.sh --input_model=resnet18 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false --xpu=true/false
+```
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/main.py b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/main.py
new file mode 100644
index 00000000000..a308aacad35
--- /dev/null
+++ b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/main.py
@@ -0,0 +1,551 @@
+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+import sys
+
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+use_gpu = False
+if use_gpu:
+ import torch.backends.cudnn as cudnn
+#import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models.quantization as quantize_models
+import torchvision.models as models
+from neural_compressor.adaptor.pytorch import get_torch_version
+from packaging.version import Version
+import intel_extension_for_pytorch as ipex
+
+
+model_names = models.list_models(module=models)
+
+torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
+hub_model_names = torch.hub.list('facebookresearch/WSL-Images')
+model_names += hub_model_names
+
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('data', metavar='DIR',
+ help='path to dataset')
+parser.add_argument('--hub', action='store_true', default=False,
+ help='use model with torch hub')
+parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
+ choices=model_names,
+ help='model architecture: ' +
+ ' | '.join(model_names) +
+ ' (default: resnet18)')
+parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
+ help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=90, type=int, metavar='N',
+ help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+ help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=256, type=int,
+ metavar='N',
+ help='mini-batch size (default: 256), this is the total '
+ 'batch size of all GPUs on the current node when '
+ 'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--steps', default=-1, type=int,
+ help='steps for validation')
+parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
+ metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+ help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+ metavar='W', help='weight decay (default: 1e-4)',
+ dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+ metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+ help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+ help='evaluate model on validation set')
+parser.add_argument('-t', '--tune', dest='tune', action='store_true',
+ help='tune best int8 model on calibration dataset')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+ help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+ help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+ help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+ help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+ help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+ help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+ help='GPU id to use.')
+parser.add_argument('--ppn', default=1, type=int,
+ help='number of processes on each node of distributed training')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+ help='Use multi-processing distributed training to launch '
+ 'N processes per node, which has N GPUs. This is the '
+ 'fastest way to use PyTorch for either single node or '
+ 'multi node data parallel training')
+parser.add_argument('-i', "--iter", default=0, type=int,
+ help='For accuracy measurement only.')
+parser.add_argument('-w', "--warmup_iter", default=5, type=int,
+ help='For benchmark measurement only.')
+parser.add_argument('--performance', dest='performance', action='store_true',
+ help='run benchmark')
+parser.add_argument('-r', "--accuracy", dest='accuracy', action='store_true',
+ help='For accuracy measurement only.')
+parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH',
+ help='path to checkpoint tuned by Neural Compressor (default: ./)')
+parser.add_argument('--int8', dest='int8', action='store_true',
+ help='run benchmark')
+parser.add_argument('--ipex', dest='ipex', action='store_true',
+ help='tuning or benchmark with Intel PyTorch Extension')
+parser.add_argument("--calib_iters", default=512, type=int,
+ help="calibration iters.")
+parser.add_argument('--xpu', action='store_true',
+ help='whether use xpu')
+
+best_acc1 = 0
+
+
+def main():
+ args = parser.parse_args()
+ print(args)
+
+ if args.seed is not None:
+ random.seed(args.seed)
+ torch.manual_seed(args.seed)
+ cudnn.deterministic = True
+ warnings.warn('You have chosen to seed training. '
+ 'This will turn on the CUDNN deterministic setting, '
+ 'which can slow down your training considerably! '
+ 'You may see unexpected behavior when restarting '
+ 'from checkpoints.')
+
+ if args.gpu is not None:
+ warnings.warn('You have chosen a specific GPU. This will completely '
+ 'disable data parallelism.')
+
+ if args.dist_url == "env://" and args.world_size == -1:
+ args.world_size = int(os.environ["WORLD_SIZE"])
+
+ args.distributed = args.world_size > 1 or args.ppn > 1 or args.multiprocessing_distributed
+
+ if use_gpu:
+ ngpus_per_node = torch.cuda.device_count()
+ else:
+ ngpus_per_node = args.ppn
+
+ #ngpus_per_node = torch.cuda.device_count()
+ if args.multiprocessing_distributed:
+ # Since we have ngpus_per_node processes per node, the total world_size
+ # needs to be adjusted accordingly
+ args.world_size = ngpus_per_node * args.world_size
+ # Use torch.multiprocessing.spawn to launch distributed processes: the
+ # main_worker process function
+ mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
+ else:
+ # Simply call main_worker function
+ main_worker(args.gpu, ngpus_per_node, args)
+
+
+def main_worker(gpu, ngpus_per_node, args):
+ global best_acc1
+ pytorch_version = get_torch_version()
+ #args.gpu = gpu
+ #affinity = subprocess.check_output("lscpu | grep 'NUMA node[0-9]' | awk '{ print $4 }' | awk -F',' '{ print $1 }'", shell=True)
+ #os.environ['OMP_NUM_THREADS'] = '28'
+ #os.environ['KMP_AFFINITY'] = 'proclist=[{}],granularity=thread,explicit'.format(affinity.splitlines()[gpu].decode('utf-8'))
+ #print (os.environ['KMP_AFFINITY'])
+
+ #if args.gpu is not None:
+ # print("Use GPU: {} for training".format(args.gpu))
+ print("Use CPU: {} for training".format(gpu))
+
+ if args.distributed:
+ if args.dist_url == "env://" and args.rank == -1:
+ args.rank = int(os.environ["RANK"])
+ if args.multiprocessing_distributed:
+ # For multiprocessing distributed training, rank needs to be the
+ # global rank among all the processes
+ args.rank = args.rank * ngpus_per_node + gpu
+ dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+ world_size=args.world_size, rank=args.rank)
+
+ if args.hub:
+ torch.set_flush_denormal(True)
+ model = torch.hub.load('facebookresearch/WSL-Images', args.arch)
+ else:
+ # create model
+ if args.pretrained:
+ print("=> using pre-trained model '{}'".format(args.arch))
+ if args.ipex or pytorch_version >= Version("1.7.0-rc1"):
+ model = models.__dict__[args.arch](pretrained=True)
+ else:
+ model = quantize_models.__dict__[args.arch](pretrained=True, quantize=False)
+ else:
+ print("=> creating model '{}'".format(args.arch))
+ if args.ipex:
+ model = models.__dict__[args.arch]()
+ else:
+ model = quantize_models.__dict__[args.arch]()
+
+ if args.ipex and not args.int8:
+ model = model.to(memory_format=torch.channels_last)
+
+ if not torch.cuda.is_available():
+ print('using CPU...')
+ elif args.distributed:
+ # For multiprocessing distributed, DistributedDataParallel constructor
+ # should always set the single device scope, otherwise,
+ # DistributedDataParallel will use all available devices.
+ if args.gpu is not None:
+ torch.cuda.set_device(args.gpu)
+ model.cuda(args.gpu)
+ # When using a single GPU per process and per
+ # DistributedDataParallel, we need to divide the batch size
+ # ourselves based on the total number of GPUs we have
+ args.batch_size = int(args.batch_size / ngpus_per_node)
+ args.workers = int(args.workers / ngpus_per_node)
+ model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+ else:
+ #model.cuda()
+ # DistributedDataParallel will divide and allocate batch_size to all
+ # available GPUs if device_ids are not set
+ model = torch.nn.parallel.DistributedDataParallelCPU(model)
+ elif args.gpu is not None:
+ torch.cuda.set_device(args.gpu)
+ model = model.cuda(args.gpu)
+ else:
+ # DataParallel will divide and allocate batch_size to all available GPUs
+ if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
+ model.features = torch.nn.DataParallel(model.features)
+ model.cuda()
+ else:
+ model = torch.nn.DataParallel(model)
+ if args.xpu:
+ model = model.to("xpu")
+ # define loss function (criterion) and optimizer
+ criterion = nn.CrossEntropyLoss()
+ #criterion = nn.CrossEntropyLoss().cuda(args.gpu)
+
+ optimizer = torch.optim.SGD(model.parameters(), args.lr,
+ momentum=args.momentum,
+ weight_decay=args.weight_decay)
+
+ # optionally resume from a checkpoint
+ if args.resume:
+ if os.path.isfile(args.resume):
+ print("=> loading checkpoint '{}'".format(args.resume))
+ checkpoint = torch.load(args.resume)
+ args.start_epoch = checkpoint['epoch']
+ best_acc1 = checkpoint['best_acc1']
+ if args.gpu is not None:
+ # best_acc1 may be from a checkpoint from a different GPU
+ best_acc1 = best_acc1.to(args.gpu)
+ model.load_state_dict(checkpoint['state_dict'])
+ optimizer.load_state_dict(checkpoint['optimizer'])
+ print("=> loaded checkpoint '{}' (epoch {})"
+ .format(args.resume, checkpoint['epoch']))
+ else:
+ print("=> no checkpoint found at '{}'".format(args.resume))
+
+ #cudnn.benchmark = True
+
+ # Data loading code
+ traindir = os.path.join(args.data, 'train')
+ valdir = os.path.join(args.data, 'val')
+ normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+
+ train_dataset = datasets.ImageFolder(
+ traindir,
+ transforms.Compose([
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ normalize,
+ ]))
+
+ if args.distributed:
+ train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+ else:
+ train_sampler = None
+
+ train_loader = torch.utils.data.DataLoader(
+ train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
+ num_workers=args.workers, pin_memory=True, sampler=train_sampler)
+
+ val_loader = torch.utils.data.DataLoader(
+ datasets.ImageFolder(valdir, transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ normalize,
+ ])),
+ batch_size=args.batch_size, shuffle=False,
+ num_workers=args.workers, pin_memory=True)
+
+ if args.evaluate:
+ validate(val_loader, model, criterion, args)
+
+ def eval_func(model):
+ accu = validate(val_loader, model, criterion, args)
+ return float(accu)
+
+ if args.tune:
+ from neural_compressor.torch.quantization import get_default_static_config
+ quant_config = get_default_static_config()
+
+ from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
+ from tqdm import tqdm
+ def run_fn(model):
+ calib_iter = 0
+ for batch in tqdm(val_loader, total=args.calib_iters):
+ batch = move_input_to_device(batch, device=None)
+ if isinstance(batch, tuple) or isinstance(batch, list):
+ model(batch[0])
+ elif isinstance(batch, dict):
+ model(**batch)
+ else:
+ model(batch)
+
+ calib_iter += 1
+ if calib_iter >= args.calib_iters:
+ break
+ return
+
+ from utils import get_example_inputs
+ example_inputs = get_example_inputs(model, val_loader)
+
+ from neural_compressor.torch.quantization import prepare, convert
+ model = prepare(model=model, quant_config=quant_config, example_inputs=example_inputs)
+ run_fn(model)
+ q_model = convert(model)
+ q_model.save(args.tuned_checkpoint)
+ return
+
+ if args.performance or args.accuracy:
+ model.eval()
+ if args.int8:
+ print("load int8 model")
+ from neural_compressor.torch.quantization import load
+ model = load(os.path.abspath(os.path.expanduser(args.tuned_checkpoint)))
+ else:
+ from utils import get_example_inputs
+ example_inputs = get_example_inputs(model, val_loader)
+ model = ipex.optimize(model)
+ with torch.no_grad():
+ model = torch.jit.trace(model, example_inputs)
+ model = torch.jit.freeze(model)
+
+ if args.performance:
+ from neural_compressor.config import BenchmarkConfig
+ from neural_compressor import benchmark
+ b_conf = BenchmarkConfig(warmup=5,
+ iteration=args.iter,
+ cores_per_instance=4,
+ num_of_instance=1)
+ benchmark.fit(model, b_conf, b_dataloader=val_loader)
+ if args.accuracy:
+ validate(val_loader, model, criterion, args)
+ return
+
+ for epoch in range(args.start_epoch, args.epochs):
+ if args.distributed:
+ train_sampler.set_epoch(epoch)
+ adjust_learning_rate(optimizer, epoch, args)
+
+ # train for one epoch
+ train(train_loader, model, criterion, optimizer, epoch, args)
+
+ # evaluate on validation set
+ acc1 = validate(val_loader, model, criterion, args)
+
+ # remember best acc@1 and save checkpoint
+ is_best = acc1 > best_acc1
+ best_acc1 = max(acc1, best_acc1)
+
+ if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+ and args.rank % ngpus_per_node == 0):
+ save_checkpoint({
+ 'epoch': epoch + 1,
+ 'arch': args.arch,
+ 'state_dict': model.state_dict(),
+ 'best_acc1': best_acc1,
+ 'optimizer' : optimizer.state_dict(),
+ }, is_best)
+
+def train(train_loader, model, criterion, optimizer, epoch, args):
+ batch_time = AverageMeter('Time', ':6.3f')
+ data_time = AverageMeter('Data', ':6.3f')
+ losses = AverageMeter('Loss', ':.4e')
+ top1 = AverageMeter('Acc@1', ':6.2f')
+ top5 = AverageMeter('Acc@5', ':6.2f')
+ progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1,
+ top5, prefix="Epoch: [{}]".format(epoch))
+
+ # switch to train mode
+ model.train()
+
+ end = time.time()
+ for i, (input, target) in enumerate(train_loader):
+ # measure data loading time
+ data_time.update(time.time() - end)
+
+ if args.gpu is not None:
+ input = input.cuda(args.gpu, non_blocking=True)
+ target = target.cuda(args.gpu, non_blocking=True)
+
+ # compute output
+ output = model(input)
+ loss = criterion(output, target)
+
+ # measure accuracy and record loss
+ acc1, acc5 = accuracy(output, target, topk=(1, 5))
+ losses.update(loss.item(), input.size(0))
+ top1.update(acc1[0], input.size(0))
+ top5.update(acc5[0], input.size(0))
+
+ # compute gradient and do SGD step
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+
+ # measure elapsed time
+ batch_time.update(time.time() - end)
+ end = time.time()
+
+ if i % args.print_freq == 0:
+ progress.print(i)
+
+
+def validate(val_loader, model, criterion, args):
+ batch_time = AverageMeter('Time', ':6.3f')
+ losses = AverageMeter('Loss', ':.4e')
+ top1 = AverageMeter('Acc@1', ':6.2f')
+ top5 = AverageMeter('Acc@5', ':6.2f')
+ progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5,
+ prefix='Test: ')
+
+ # switch to evaluate mode
+ with torch.no_grad():
+ for i, (input, target) in enumerate(val_loader):
+ input = input.contiguous(memory_format=torch.channels_last)
+ if i >= args.warmup_iter:
+ start = time.time()
+ if args.gpu is not None:
+ input = input.cuda(args.gpu, non_blocking=True)
+ target = target.cuda(args.gpu, non_blocking=True)
+ if args.xpu:
+ input = input.to("xpu")
+ target = target.to("xpu")
+
+ # compute output
+ output = model(input)
+
+ # measure elapsed time
+ if i >= args.warmup_iter:
+ batch_time.update(time.time() - start)
+
+ loss = criterion(output, target)
+
+ # measure accuracy and record loss
+ acc1, acc5 = accuracy(output, target, topk=(1, 5))
+ losses.update(loss.item(), input.size(0))
+ top1.update(acc1[0], input.size(0))
+ top5.update(acc5[0], input.size(0))
+
+
+ if i % args.print_freq == 0:
+ progress.print(i)
+
+ if args.iter > 0 and i >= (args.warmup_iter + args.iter - 1):
+ break
+
+ print('Batch size = %d' % args.batch_size)
+ print('Accuracy: {top1:.5f} Accuracy@5 {top5:.5f}'
+ .format(top1=(top1.avg / 100), top5=(top5.avg / 100)))
+
+ return top1.avg/100
+
+
+def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
+ torch.save(state, filename)
+ if is_best:
+ shutil.copyfile(filename, 'model_best.pth.tar')
+
+
+class AverageMeter(object):
+ """Computes and stores the average and current value"""
+ def __init__(self, name, fmt=':f'):
+ self.name = name
+ self.fmt = fmt
+ self.reset()
+
+ def reset(self):
+ self.val = 0
+ self.avg = 0
+ self.sum = 0
+ self.count = 0
+
+ def update(self, val, n=1):
+ self.val = val
+ self.sum += val * n
+ self.count += n
+ self.avg = self.sum / self.count
+
+ def __str__(self):
+ fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+ return fmtstr.format(**self.__dict__)
+
+
+class ProgressMeter(object):
+ def __init__(self, num_batches, *meters, prefix=""):
+ self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+ self.meters = meters
+ self.prefix = prefix
+
+ def print(self, batch):
+ entries = [self.prefix + self.batch_fmtstr.format(batch)]
+ entries += [str(meter) for meter in self.meters]
+ print('\t'.join(entries))
+
+ def _get_batch_fmtstr(self, num_batches):
+ num_digits = len(str(num_batches // 1))
+ fmt = '{:' + str(num_digits) + 'd}'
+ return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+
+
+def adjust_learning_rate(optimizer, epoch, args):
+ """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+ lr = args.lr * (0.1 ** (epoch // 30))
+ for param_group in optimizer.param_groups:
+ param_group['lr'] = lr
+
+
+def accuracy(output, target, topk=(1,)):
+ """Computes the accuracy over the k top predictions for the specified values of k"""
+ with torch.no_grad():
+ maxk = max(topk)
+ batch_size = target.size(0)
+
+ _, pred = output.topk(maxk, 1, True, True)
+ pred = pred.t()
+ correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+ res = []
+ for k in topk:
+ correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+ res.append(correct_k.mul_(100.0 / batch_size))
+ return res
+
+
+if __name__ == '__main__':
+ main()
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/requirements.txt b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/requirements.txt
new file mode 100644
index 00000000000..94f1a7356fe
--- /dev/null
+++ b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/requirements.txt
@@ -0,0 +1,3 @@
+neural-compressor
+torch>=1.9.0
+torchvision>=0.10.0
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_benchmark.sh b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_benchmark.sh
new file mode 100644
index 00000000000..f5a2e251554
--- /dev/null
+++ b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_benchmark.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ batch_size=32
+ tuned_checkpoint=saved_results
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --xpu=*)
+ xpu=$(echo ${var} |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ if [[ ${mode} == "accuracy" ]]; then
+ mode_cmd=" --accuracy"
+ elif [[ ${mode} == "performance" ]]; then
+ mode_cmd=" --iter ${iters} --performance "
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+
+ extra_cmd="--ipex"
+ if [ "resnext101_32x16d_wsl_ipex" = "${topology}" ];then
+ extra_cmd=$extra_cmd" --hub"
+ fi
+
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+
+ if [[ ${xpu} == "true" ]]; then
+ extra_cmd=$extra_cmd" --xpu"
+ fi
+ echo $extra_cmd
+
+
+ python main.py \
+ --pretrained \
+ --tuned_checkpoint ${tuned_checkpoint} \
+ -b ${batch_size} \
+ -a ${input_model} \
+ ${mode_cmd} \
+ ${extra_cmd} \
+ ${dataset_location}
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_quant.sh b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_quant.sh
new file mode 100644
index 00000000000..5595b069671
--- /dev/null
+++ b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_quant.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ output_model=saved_results
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ extra_cmd="--ipex"
+ if [ -n "$output_model" ];then
+ extra_cmd=$extra_cmd" --tuned_checkpoint ${output_model}"
+ fi
+ if [[ "${topology}" == "resnext101_32x16d_wsl"* ]];then
+ extra_cmd=$extra_cmd" --hub "
+ fi
+ extra_cmd=$extra_cmd" ${dataset_location}"
+
+ python main.py \
+ --pretrained \
+ -t \
+ -a $input_model \
+ -b 30 \
+ ${extra_cmd}
+
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/utils.py b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/utils.py
new file mode 100644
index 00000000000..76117f8b0b5
--- /dev/null
+++ b/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/utils.py
@@ -0,0 +1,47 @@
+import torch
+from collections import UserDict
+from packaging.version import Version
+from neural_compressor.torch.utils import get_torch_version
+
+def get_example_inputs(model, dataloader):
+ version = get_torch_version()
+ from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
+
+ # Suggest set dataloader like calib_dataloader
+ if dataloader is None:
+ return None
+ device = next(model.parameters()).device
+ try:
+ for idx, (input, label) in enumerate(dataloader):
+ input = move_input_to_device(input, device)
+ if isinstance(input, (dict, UserDict)): # pragma: no cover
+ assert version.release >= Version("1.12.0").release, "INC support IPEX version >= 1.12.0"
+ if "label" in input.keys():
+ input.pop("label")
+ if version.release <= Version("2.0.1").release:
+ return tuple(input.values())
+ else:
+ return dict(input)
+ if isinstance(input, (list, tuple)):
+ return tuple(input)
+ if isinstance(input, torch.Tensor):
+ return input
+ break
+ except Exception as e: # pragma: no cover
+ for idx, input in enumerate(dataloader):
+ input = move_input_to_device(input, device)
+ if isinstance(input, (dict, UserDict)): # pragma: no cover
+ assert version.release >= Version("1.12.0").release, "INC support IPEX version >= 1.12.0"
+ if "label" in input.keys():
+ input.pop("label")
+ if version.release <= Version("2.0.1").release:
+ return tuple(input.values())
+ else:
+ return dict(input)
+ if isinstance(input, list) or isinstance(input, tuple):
+ return tuple(input)
+ if isinstance(input, torch.Tensor):
+ return input
+ break
+ if idx == 0:
+ assert False, "Please checkout the example_inputs format."
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md
deleted file mode 100644
index 1659ae41e75..00000000000
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md
+++ /dev/null
@@ -1,168 +0,0 @@
-Step-by-Step
-============
-This document describes the step-by-step instructions to run large language models (LLMs) on 4th Gen Intel® Xeon® Scalable Processor (codenamed Sapphire Rapids) with PyTorch and Intel® Extension for PyTorch.
-
-The script `run_clm_no_trainer.py` supports `GPTJ`, `OPT`, `LLaMA2`, `BLOOM` and `Falcon` quantization and validates last word prediction accuracy with [lm_eval](https://github.com/EleutherAI/lm-evaluation-harness.git) now, and we are adding more models.
-
-# Prerequisite
-## 1. Create Environment
-```
-# Installation
-pip install -r requirements.txt
-```
-
-# Run
-
-Here is how to run the scripts:
-
-**Causal Language Modeling (CLM)**
-
-`run_clm_no_trainer.py` quantizes the large language models using the dataset [NeelNanda/pile-10k](https://huggingface.co/datasets/NeelNanda/pile-10k) calibration and validates `lambada_openai`, `piqa`, `winogrande`, `hellaswag` and other datasets accuracy provided by lm_eval, an example command is as follows.
-### GPT-J-6b
-
-#### Quantization
-```bash
-# "--sq" is used to enable smooth quant
-python run_clm_no_trainer.py \
- --model EleutherAI/gpt-j-6B \
- --quantize \
- --sq \
- --alpha 1.0 \
- --ipex \
- --output_dir "saved_results"
-```
-**Notes**: Smooth quantization here is based on torch.jit. Without past key value in example_inputs, the quantized model cannot be used for text-generation.
-
-```bash
-# "--approach weight_only" is used to enable weight only quantization.
-# "--woq_algo GPTQ" is used to enable GPTQ algorithms
-# "--double_quant_type BNB_NF4" is used to enable double quant algorithms
-python run_clm_no_trainer.py \
- --model EleutherAI/gpt-j-6B \
- --dataset NeelNanda/pile-10k \
- --quantize \
- --approach weight_only \
- --woq_algo GPTQ \
- --woq_bits 4 \
- --woq_scheme asym \
- --woq_group_size 128 \
- --gptq_max_seq_length 2048 \
- --gptq_use_max_length \
- --accuracy \
- --tasks "lambada_openai" \
- --double_quant_type "BNB_NF4"
-
-# "--woq_algo RTN" is used to enable RTN algorithms
-python run_clm_no_trainer.py \
- --model EleutherAI/gpt-j-6B \
- --dataset NeelNanda/pile-10k \
- --quantize \
- --approach weight_only \
- --woq_algo RTN \
- --woq_bits 4 \
- --woq_scheme asym \
- --woq_group_size 128 \
- --accuracy \
- --tasks "lambada_openai" \
- --double_quant_type "BNB_NF4"
-```
-**Notes**: Weight-only quantization based on fake quantization is previewly supported and supports RTN, GPTQ[1], AWQ[2], TEQ algorithms. For more details, please refer to [link](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization_weight_only.md). Our GPTQ API support various CLMs including GPTJ, OPTs, Blooms, Llamas, Falcons, MPTs, ChatGLMs, etc. Simply replace the "--model" argument with other models to quantize different CLMs with GPTQ.
-
-
-### OPT-125m
-
-#### Quantization
-
-```bash
-# "--sq" is used to enable smooth quant
-python run_clm_no_trainer.py \
- --model facebook/opt-125m \
- --quantize \
- --sq \
- --alpha 0.5 \
- --ipex \
- --output_dir "saved_results"
-
-# "--approach weight_only" is used to enable weight only quantization.
-# "--woq_algo GPTQ" is used to enable GPTQ algorithms
-# "--double_quant_type BNB_NF4" is used to enable double quant algorithms
-python run_clm_no_trainer.py \
- --model facebook/opt-125m \
- --dataset NeelNanda/pile-10k \
- --quantize \
- --approach weight_only \
- --woq_algo GPTQ \
- --woq_bits 4 \
- --woq_scheme asym \
- --woq_group_size 128 \
- --gptq_max_seq_length 2048 \
- --gptq_use_max_length \
- --accuracy \
- --tasks "lambada_openai" \
- --double_quant_type "BNB_NF4"
-
-# "--woq_algo RTN" is used to enable RTN algorithms
-python run_clm_no_trainer.py \
- --model facebook/opt-125m \
- --dataset NeelNanda/pile-10k \
- --quantize \
- --approach weight_only \
- --woq_algo RTN \
- --woq_bits 4 \
- --woq_scheme asym \
- --woq_group_size 128 \
- --accuracy \
- --tasks "lambada_openai" \
- --double_quant_type "BNB_NF4"
-```
-
-### LLAMA2-7b/13b/70b
->Note: LLAMA requires IPEX requirements >= 2.1 to get better accuracy.
-#### Quantization
-
-```bash
-# "--sq" is used to enable smooth quant
-python run_clm_no_trainer.py \
- --model meta-llama/Llama-2-7b-hf \
- --quantize \
- --sq \
- --alpha 0.8 \
- --ipex \
- --output_dir "saved_results"
-
-# "--approach weight_only" is used to enable weight only quantization.
-# "--double_quant_type BNB_NF4" is used to enable double quant algorithms
-# "--woq_algo GPTQ" is used to enable GPTQ algorithms
-python run_clm_no_trainer.py \
- --model meta-llama/Llama-2-7b-hf \
- --dataset NeelNanda/pile-10k \
- --quantize \
- --approach weight_only \
- --woq_algo GPTQ \
- --woq_bits 4 \
- --woq_scheme asym \
- --woq_group_size 128 \
- --gptq_max_seq_length 2048 \
- --gptq_use_max_length \
- --accuracy \
- --tasks "lambada_openai" \
- --double_quant_type "BNB_NF4"
-
-# "--woq_algo RTN" is used to enable RTN algorithms
-python run_clm_no_trainer.py \
- --model meta-llama/Llama-2-7b-hf \
- --dataset NeelNanda/pile-10k \
- --quantize \
- --approach weight_only \
- --woq_algo RTN \
- --woq_bits 4 \
- --woq_scheme asym \
- --woq_group_size 128 \
- --accuracy \
- --tasks "lambada_openai" \
- --double_quant_type "BNB_NF4"
-```
-
-
-[1]. Elias, Frantar, et al. "GPTQ: Accurate Post-training Compression for Generative Pretrained Transformers." arXiv preprint arXiv:2210.17323 (2023).
-[2]. Lin, Ji, et al. "AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration." arXiv preprint arXiv:2306.00978 (2023).
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
deleted file mode 100644
index 8002b61ad10..00000000000
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-
- init_params "$@"
- run_benchmark
-
-}
-
-# init params
-function init_params {
- iters=100
- batch_size=16
- approach=static
- tuned_checkpoint=saved_results
- task=lambada_openai
- echo ${max_eval_samples}
- for var in "$@"
- do
- case $var in
- --topology=*)
- topology=$(echo $var |cut -f2 -d=)
- ;;
- --dataset_location=*)
- dataset_location=$(echo $var |cut -f2 -d=)
- ;;
- --input_model=*)
- input_model=$(echo $var |cut -f2 -d=)
- ;;
- --mode=*)
- mode=$(echo $var |cut -f2 -d=)
- ;;
- --batch_size=*)
- batch_size=$(echo $var |cut -f2 -d=)
- ;;
- --iters=*)
- iters=$(echo ${var} |cut -f2 -d=)
- ;;
- --int8=*)
- int8=$(echo ${var} |cut -f2 -d=)
- ;;
- --config=*)
- tuned_checkpoint=$(echo $var |cut -f2 -d=)
- ;;
- *)
- echo "Error: No such parameter: ${var}"
- exit 1
- ;;
- esac
- done
-
-}
-
-
-# run_benchmark
-function run_benchmark {
- extra_cmd=''
-
- if [[ ${mode} == "accuracy" ]]; then
- mode_cmd=" --accuracy "
- elif [[ ${mode} == "performance" ]]; then
- mode_cmd=" --performance --iters "${iters}
- else
- echo "Error: No such mode: ${mode}"
- exit 1
- fi
-
- if [[ ${int8} == "true" ]]; then
- extra_cmd=$extra_cmd" --int8"
- fi
- echo $extra_cmd
-
- if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
- model_name_or_path="facebook/opt-125m"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_bnb" ]; then
- model_name_or_path="facebook/opt-125m"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
- elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_ggml" ]; then
- model_name_or_path="facebook/opt-125m"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.1 --gptq_actorder"
- extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
- elif [ "${topology}" = "opt_125m_ipex" ]; then
- model_name_or_path="facebook/opt-125m"
- extra_cmd=$extra_cmd" --ipex"
- elif [ "${topology}" = "opt_125m_ipex_sq" ]; then
- model_name_or_path="facebook/opt-125m"
- extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
- elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then
- model_name_or_path="meta-llama/Llama-2-7b-hf"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- elif [ "${topology}" = "llama2_7b_gptq_int4_dq_bnb" ]; then
- model_name_or_path="meta-llama/Llama-2-7b-hf"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
- elif [ "${topology}" = "llama2_7b_gptq_int4_dq_ggml" ]; then
- model_name_or_path="meta-llama/Llama-2-7b-hf"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
- elif [ "${topology}" = "llama2_7b_ipex" ]; then
- model_name_or_path="meta-llama/Llama-2-7b-hf"
- extra_cmd=$extra_cmd" --ipex"
- elif [ "${topology}" = "llama2_7b_ipex_sq" ]; then
- model_name_or_path="meta-llama/Llama-2-7b-hf"
- extra_cmd=$extra_cmd" --ipex --sq --alpha 0.8"
- elif [ "${topology}" = "gpt_j_woq_rtn_int4" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
- elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_bnb" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
- extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
- elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_ggml" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
- extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
- elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_bnb" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
- elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_ggml" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
- elif [ "${topology}" = "gpt_j_ipex" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- extra_cmd=$extra_cmd" --ipex"
- elif [ "${topology}" = "gpt_j_ipex_sq" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- extra_cmd=$extra_cmd" --ipex --sq --alpha 1.0"
- fi
-
- python -u run_clm_no_trainer.py \
- --model ${model_name_or_path} \
- --approach ${approach} \
- --output_dir ${tuned_checkpoint} \
- --task ${task} \
- --batch_size ${batch_size} \
- ${extra_cmd} ${mode_cmd}
-}
-
-main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
deleted file mode 100644
index e8ee92cb7c0..00000000000
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
+++ /dev/null
@@ -1,434 +0,0 @@
-import argparse
-import os
-import sys
-
-sys.path.append('./')
-import time
-import json
-import re
-import torch
-from datasets import load_dataset
-import datasets
-from torch.nn.functional import pad
-from torch.utils.data import DataLoader
-from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
- "--model", nargs="?", default="EleutherAI/gpt-j-6b"
-)
-parser.add_argument(
- "--trust_remote_code", default=True,
- help="Transformers parameter: use the external repo")
-parser.add_argument(
- "--revision", default=None,
- help="Transformers parameter: set the model hub commit number")
-parser.add_argument("--dataset", nargs="?", default="NeelNanda/pile-10k", const="NeelNanda/pile-10k")
-parser.add_argument("--output_dir", nargs="?", default="./saved_results")
-parser.add_argument("--quantize", action="store_true")
-parser.add_argument(
- "--int8_bf16_mixed",
- action="store_true",
- help="By default it is int8-fp32 mixed, to enable int8 mixed amp bf16 (work on platforms like SPR)",
-)
-parser.add_argument(
- '--seed',
- type=int, default=42, help='Seed for sampling the calibration data.'
-)
-parser.add_argument("--approach", type=str, default='static',
- help="Select from ['dynamic', 'static', 'weight-only']")
-parser.add_argument("--int8", action="store_true")
-parser.add_argument("--ipex", action="store_true", help="Use intel extension for pytorch.")
-parser.add_argument("--accuracy", action="store_true")
-parser.add_argument("--performance", action="store_true")
-parser.add_argument("--iters", default=100, type=int,
- help="For accuracy measurement only.")
-parser.add_argument("--batch_size", default=1, type=int,
- help="For accuracy measurement only.")
-parser.add_argument("--save_accuracy_path", default=None,
- help="Save accuracy results path.")
-parser.add_argument("--pad_max_length", default=512, type=int,
- help="Pad input ids to max length.")
-parser.add_argument("--calib_iters", default=512, type=int,
- help="calibration iters.")
-parser.add_argument("--tasks", default="lambada_openai,hellaswag,winogrande,piqa,wikitext",
- type=str, help="tasks for accuracy validation")
-parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
-# ============SmoothQuant configs==============
-parser.add_argument("--sq", action="store_true")
-parser.add_argument("--alpha", default="auto", help="Smooth quant parameter.")
-# ============WeightOnly configs===============
-parser.add_argument("--woq_algo", default="RTN", choices=['RTN', 'AWQ', 'TEQ', 'GPTQ'],
- help="Weight-only parameter.")
-parser.add_argument("--woq_bits", type=int, default=8)
-parser.add_argument("--woq_dtype", type=str, default="int")
-parser.add_argument("--woq_group_size", type=int, default=-1)
-parser.add_argument("--woq_group_dim", type=int, default=1)
-parser.add_argument("--woq_scheme", default="sym")
-parser.add_argument("--woq_use_mse_search", action="store_true")
-parser.add_argument("--woq_use_full_range", action="store_true")
-# =============GPTQ configs====================
-parser.add_argument("--gptq_actorder", action="store_true",
- help="Whether to apply the activation order GPTQ heuristic.")
-parser.add_argument('--gptq_percdamp', type=float, default=.01,
- help='Percent of the average Hessian diagonal to use for dampening.')
-parser.add_argument('--gptq_block_size', type=int, default=128, help='Block size. sub weight matrix size to run GPTQ.')
-parser.add_argument('--gptq_static_groups', action="store_true",
- help="Whether to calculate group wise quantization parameters in advance. "
- "This option mitigate actorder's extra computational requirements.")
-parser.add_argument('--gptq_nsamples', type=int, default=128, help='Number of calibration data samples.')
-parser.add_argument('--gptq_use_max_length', action="store_true",
- help='Set all sequence length to be same length of args.gptq_max_seq_length')
-parser.add_argument('--gptq_max_seq_length', type=int, default=2048,
- help='Calibration dataset sequence max length, '
- 'this should align with your model config, '
- 'and your dataset builder args: args.pad_max_length')
-
-# =============DoubleQuant configs====================
-parser.add_argument("--double_quant_type",
- type=str,
- default=None,
- choices=['GGML_TYPE_Q4_K', 'BNB_NF4'],
- help="DoubleQuant parameter")
-parser.add_argument("--double_quant_dtype",
- type=str,
- default="fp32",
- help="Data type for double quant scale.")
-parser.add_argument("--double_quant_bits",
- type=int,
- default=8,
- help="Number of bits used to represent double_quant scale.")
-parser.add_argument("--double_quant_use_sym",
- type=bool,
- default=True,
- help="Indicates whether double quant scale are symmetric.")
-parser.add_argument("--double_quant_group_size",
- type=int,
- default=256,
- help="Size of double quant groups.")
-# =======================================
-
-args = parser.parse_args()
-if args.ipex:
- import intel_extension_for_pytorch as ipex
-calib_size = 1
-
-
-class Evaluator:
- def __init__(self, dataset, tokenizer, batch_size=8, pad_val=1, pad_max=196, is_calib=False):
- self.dataset = dataset
- self.tokenizer = tokenizer
- self.batch_size = batch_size
- self.pad_val = pad_val
- self.pad_max = pad_max
- self.is_calib = is_calib
-
- # tokenize the dataset
- self.dataset = self.dataset.map(self.tokenize_function, batched=True)
- self.dataset.set_format(type="torch", columns=["input_ids"])
-
- @torch.no_grad()
- def tokenize_function(self, examples):
- if args.woq_algo in ['TEQ']:
- if self.tokenizer.pad_token is None:
- self.tokenizer.pad_token = self.tokenizer.eos_token
- example = self.tokenizer(examples["text"], padding="max_length", max_length=self.pad_max)
- else:
- example = self.tokenizer(examples["text"])
- return example
-
- @torch.no_grad()
- def collate_batch(self, batch):
-
- input_ids_padded = []
- last_ind = []
-
- for text in batch:
- input_ids = text["input_ids"]
- pad_len = self.pad_max - input_ids.shape[0]
- last_ind.append(input_ids.shape[0] - 1)
- if self.is_calib:
- if args.woq_algo != 'GPTQ':
- input_ids = input_ids[:self.pad_max] if len(input_ids) > self.pad_max else input_ids
- else:
- input_ids = pad(input_ids, (0, pad_len), value=self.pad_val)
- input_ids_padded.append(input_ids)
-
- return (torch.vstack(input_ids_padded), torch.tensor(last_ind))
-
- @torch.no_grad()
- def evaluate(self, model):
- model.eval()
- # The task is to predict the last word of the input.
- total, hit = 0, 0
- latency = 0
- test_dataloader = DataLoader(
- self.dataset,
- batch_size=self.batch_size,
- shuffle=False,
- collate_fn=self.collate_batch,
- )
- for i, (input_ids, last_ind) in enumerate(test_dataloader):
- label = input_ids[torch.arange(len(last_ind)), last_ind]
- input_ids[torch.arange(len(last_ind)), last_ind] = self.pad_val
- pad_len = self.pad_max - last_ind - 1
-
- start = time.time()
- outputs = model(input_ids)
- latency += time.time() - start
-
- last_token_logits = outputs[0][torch.arange(len(last_ind)), -2 - pad_len, :]
- pred = last_token_logits.argmax(dim=-1)
- total += label.size(0)
- hit += (pred == label).sum().item()
- if (i + 1) % 50 == 0:
- print(hit / total)
- print("Processed minibatch:", i)
-
- acc = hit / total
- print("Accuracy: ", acc)
- print("Latency: ", latency)
- return acc
-
-
-def get_user_model():
- torchscript = False
- if args.sq or args.ipex or args.woq_algo in ['AWQ', 'TEQ']:
- torchscript = True
- user_model = AutoModelForCausalLM.from_pretrained(
- args.model,
- torchscript=torchscript, # torchscript will force `return_dict=False` to avoid jit errors
- trust_remote_code=args.trust_remote_code,
- revision=args.revision,
- )
- tokenizer = AutoTokenizer.from_pretrained(args.model)
- if args.approach == 'weight_only':
- user_model = user_model.float()
-
- # Set model's seq_len when GPTQ calibration is enabled.
- if args.woq_algo == 'GPTQ':
- user_model.seqlen = args.gptq_max_seq_length
-
- if args.peft_model_id is not None:
- from peft import PeftModel
- user_model = PeftModel.from_pretrained(user_model, args.peft_model_id)
-
- # to channels last
- user_model = user_model.to(memory_format=torch.channels_last)
- user_model.eval()
- return user_model, tokenizer
-
-
-if args.quantize:
- # dataset
- user_model, tokenizer = get_user_model()
- calib_dataset = load_dataset(args.dataset, split="train")
- # calib_dataset = datasets.load_from_disk('/your/local/dataset/pile-10k/') # use this if trouble with connecting to HF
- calib_dataset = calib_dataset.shuffle(seed=args.seed)
- calib_evaluator = Evaluator(calib_dataset, tokenizer, args.batch_size, pad_max=args.pad_max_length, is_calib=True)
- calib_dataloader = DataLoader(
- calib_evaluator.dataset,
- batch_size=calib_size,
- shuffle=False,
- collate_fn=calib_evaluator.collate_batch,
- )
-
- # 3.x api
- if args.approach == 'weight_only':
- from neural_compressor.torch.quantization import RTNConfig, GPTQConfig, prepare, convert, quantize
- from neural_compressor.torch.utils import get_double_quant_config
- weight_sym = True if args.woq_scheme == "sym" else False
- double_quant_config_dict = get_double_quant_config(args.double_quant_type)
-
- if args.woq_algo == "RTN":
- if args.double_quant_type is not None:
- double_quant_config_dict.update(
- {
- # TODO: add group_dim into double quant config?
- "use_full_range": args.woq_use_full_range,
- "use_mse_search": args.woq_use_mse_search,
- }
- )
- quant_config = RTNConfig.from_dict(double_quant_config_dict)
- else:
- quant_config = RTNConfig(
- dtype=args.woq_dtype,
- bits=args.woq_bits,
- use_sym=weight_sym,
- group_size=args.woq_group_size,
- group_dim=args.woq_group_dim,
- use_full_range=args.woq_use_full_range,
- use_mse_search=args.woq_use_mse_search,
- use_double_quant=False,
- double_quant_bits=args.double_quant_bits,
- double_quant_dtype=args.double_quant_dtype,
- double_quant_use_sym=args.double_quant_use_sym,
- double_quant_group_size=args.double_quant_group_size,
- )
- quant_config.set_local("lm_head", RTNConfig(dtype="fp32"))
- user_model = prepare(model=user_model, quant_config=quant_config)
- user_model = convert(model=user_model)
- elif args.woq_algo == "GPTQ":
- from utils import DataloaderPreprocessor
- dataloaderPreprocessor = DataloaderPreprocessor(
- dataloader_original=calib_dataloader,
- use_max_length=args.gptq_use_max_length,
- max_seq_length=args.gptq_max_seq_length,
- )
- dataloader_for_calibration = dataloaderPreprocessor.get_prepared_dataloader()
- from neural_compressor.torch.algorithms.weight_only.utility import move_input_to_device
- from tqdm import tqdm
- def run_fn_for_gptq(model, dataloader_for_calibration, *args):
- for batch in tqdm(dataloader_for_calibration):
- batch = move_input_to_device(batch, device=None)
- try:
- if isinstance(batch, tuple) or isinstance(batch, list):
- model(batch[0])
- elif isinstance(batch, dict):
- model(**batch)
- else:
- model(batch)
- except ValueError:
- pass
- return
- if args.double_quant_type is not None:
- double_quant_config_dict.update(
- {
- "use_mse_search": args.woq_use_mse_search,
- "percdamp": args.gptq_percdamp,
- "act_order": args.gptq_actorder,
- "block_size": args.gptq_block_size,
- "static_groups": args.gptq_static_groups,
- }
- )
- quant_config = GPTQConfig.from_dict(double_quant_config_dict)
- else:
- quant_config = GPTQConfig(
- dtype=args.woq_dtype,
- bits=args.woq_bits,
- use_sym=weight_sym,
- group_size=args.woq_group_size,
- use_mse_search=args.woq_use_mse_search,
- percdamp=args.gptq_percdamp,
- act_order=args.gptq_actorder,
- block_size=args.gptq_block_size,
- static_groups=args.gptq_static_groups,
- use_double_quant=False,
- double_quant_bits=args.double_quant_bits,
- double_quant_dtype=args.double_quant_dtype,
- double_quant_use_sym=args.double_quant_use_sym,
- double_quant_group_size=args.double_quant_group_size,
- )
- quant_config.set_local("lm_head", GPTQConfig(dtype="fp32"))
- user_model = prepare(model=user_model, quant_config=quant_config)
- run_fn_for_gptq(user_model, dataloader_for_calibration)
- user_model = convert(user_model)
- else:
- if args.sq:
- from neural_compressor.torch.quantization import SmoothQuantConfig
-
- # alpha can be a float number of a list of float number.
- args.alpha = args.alpha if args.alpha == "auto" else eval(args.alpha)
- if re.search("falcon", user_model.config.model_type):
- quant_config = SmoothQuantConfig(alpha=args.alpha, folding=False)
- else:
- quant_config = SmoothQuantConfig(alpha=args.alpha, folding=True)
-
- if re.search("gpt", user_model.config.model_type):
- quant_config.set_local(torch.add, SmoothQuantConfig(w_dtype="fp32", act_dtype="fp32"))
- else:
- from neural_compressor.torch.quantization import get_default_static_config, StaticQuantConfig
-
- quant_config = get_default_static_config()
- if re.search("gpt", user_model.config.model_type):
- quant_config.set_local(torch.add, StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
-
- from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
- from tqdm import tqdm
- def run_fn(model):
- for batch in tqdm(calib_dataloader):
- batch = move_input_to_device(batch, device=None)
- try:
- if isinstance(batch, tuple) or isinstance(batch, list):
- model(batch[0])
- elif isinstance(batch, dict):
- model(**batch)
- else:
- model(batch)
- except ValueError:
- pass
- return
-
- from utils import get_example_inputs
- example_inputs = get_example_inputs(user_model, calib_dataloader)
-
- from neural_compressor.torch.quantization import prepare, convert
- user_model = prepare(model=user_model, quant_config=quant_config, example_inputs=example_inputs)
- run_fn(user_model)
- user_model = convert(user_model)
-
- user_model.save(args.output_dir)
-
-
-# TODO: we need run_benchmark.sh for loading and remove --accuracy in run_quant.sh, currently run_quant.sh will get fp32 result
-
-if args.int8 or args.int8_bf16_mixed:
- print("load int8 model")
-
- from neural_compressor.torch.quantization import load
- user_model, _ = get_user_model()
- tokenizer = AutoTokenizer.from_pretrained(args.model)
- config = AutoConfig.from_pretrained(args.model)
- user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)), user_model)
- setattr(user_model, "config", config)
-else:
- user_model, tokenizer = get_user_model()
-
-
-if args.accuracy:
- user_model.eval()
- from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
- eval_args = LMEvalParser(
- model="hf",
- user_model=user_model,
- tokenizer=tokenizer,
- batch_size=args.batch_size,
- tasks=args.tasks,
- device="cpu",
- )
- results = evaluate(eval_args)
- for task_name in args.tasks.split(","):
- if task_name == "wikitext":
- acc = results["results"][task_name]["word_perplexity,none"]
- else:
- acc = results["results"][task_name]["acc,none"]
- print("Accuracy: %.5f" % acc)
- print('Batch size = %d' % args.batch_size)
-
-if args.performance:
- user_model.eval()
- from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
- import time
-
- samples = args.iters * args.batch_size
- eval_args = LMEvalParser(
- model="hf",
- user_model=user_model,
- tokenizer=tokenizer,
- batch_size=args.batch_size,
- tasks=args.tasks,
- limit=samples,
- device="cpu",
- )
- start = time.time()
- results = evaluate(eval_args)
- end = time.time()
- for task_name in args.tasks.split(","):
- if task_name == "wikitext":
- acc = results["results"][task_name]["word_perplexity,none"]
- else:
- acc = results["results"][task_name]["acc,none"]
- print("Accuracy: %.5f" % acc)
- print('Throughput: %.3f samples/sec' % (samples / (end - start)))
- print('Latency: %.3f ms' % ((end - start) * 1000 / samples))
- print('Batch size = %d' % args.batch_size)
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh
deleted file mode 100644
index 3f95f44946e..00000000000
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh
+++ /dev/null
@@ -1,132 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-
- init_params "$@"
- run_tuning
-
-}
-
-# init params
-function init_params {
- for var in "$@"
- do
- case $var in
- --topology=*)
- topology=$(echo $var |cut -f2 -d=)
- ;;
- --dataset_location=*)
- dataset_location=$(echo $var |cut -f2 -d=)
- ;;
- --input_model=*)
- input_model=$(echo $var |cut -f2 -d=)
- ;;
- --output_model=*)
- tuned_checkpoint=$(echo $var |cut -f2 -d=)
- ;;
- *)
- echo "Error: No such parameter: ${var}"
- exit 1
- ;;
- esac
- done
-
-}
-
-# run_tuning
-function run_tuning {
- extra_cmd=''
- batch_size=8
- approach='static'
- DATASET_NAME="NeelNanda/pile-10k"
- tuned_checkpoint="saved_results"
-
- if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
- model_name_or_path="facebook/opt-125m"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_bnb" ]; then
- model_name_or_path="facebook/opt-125m"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
- elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_ggml" ]; then
- model_name_or_path="facebook/opt-125m"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.1 --gptq_actorder"
- extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
- elif [ "${topology}" = "opt_125m_ipex" ]; then
- model_name_or_path="facebook/opt-125m"
- extra_cmd=$extra_cmd" --ipex"
- elif [ "${topology}" = "opt_125m_ipex_sq" ]; then
- model_name_or_path="facebook/opt-125m"
- extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
- elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then
- model_name_or_path="meta-llama/Llama-2-7b-hf"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- elif [ "${topology}" = "llama2_7b_gptq_int4_dq_bnb" ]; then
- model_name_or_path="meta-llama/Llama-2-7b-hf"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
- elif [ "${topology}" = "llama2_7b_gptq_int4_dq_ggml" ]; then
- model_name_or_path="meta-llama/Llama-2-7b-hf"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
- elif [ "${topology}" = "llama2_7b_ipex" ]; then
- model_name_or_path="meta-llama/Llama-2-7b-hf"
- extra_cmd=$extra_cmd" --ipex"
- elif [ "${topology}" = "llama2_7b_ipex_sq" ]; then
- model_name_or_path="meta-llama/Llama-2-7b-hf"
- extra_cmd=$extra_cmd" --ipex --sq --alpha 0.8"
- elif [ "${topology}" = "gpt_j_woq_rtn_int4" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
- elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_bnb" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
- extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
- elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_ggml" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
- extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
- elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_bnb" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
- elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_ggml" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- approach="weight_only"
- extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
- extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
- elif [ "${topology}" = "gpt_j_ipex" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- extra_cmd=$extra_cmd" --ipex"
- elif [ "${topology}" = "gpt_j_ipex_sq" ]; then
- model_name_or_path="EleutherAI/gpt-j-6b"
- extra_cmd=$extra_cmd" --ipex --sq --alpha 1.0"
- fi
-
- python -u run_clm_no_trainer.py \
- --model ${model_name_or_path} \
- --dataset ${DATASET_NAME} \
- --quantize \
- --approach ${approach} \
- --output_dir ${tuned_checkpoint} \
- --tasks "lambada_openai" \
- --batch_size ${batch_size} \
- ${extra_cmd}
-}
-
-main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx/README.md b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx/README.md
deleted file mode 100644
index 6608cbcf726..00000000000
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# Run
-
-## Run WOQ MX FP4 model
-``` python
-python run_clm_no_trainer.py --model [model_name_or_id] --quantize --accuracy --tasks lambada_openai --w_dtype fp4 --woq
-```
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx/run_clm_no_trainer.py
deleted file mode 100644
index db5b08882e0..00000000000
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx/run_clm_no_trainer.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import argparse
-import time
-import json
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
- "--model", nargs="?", default="EleutherAI/gpt-j-6b"
-)
-parser.add_argument(
- "--trust_remote_code", default=True,
- help="Transformers parameter: use the external repo")
-parser.add_argument(
- "--revision", default=None,
- help="Transformers parameter: set the model hub commit number")
-parser.add_argument("--quantize", action="store_true")
-# dynamic only now
-parser.add_argument("--w_dtype", type=str, default="int8",
- choices=["int8", "int4", "int2", "fp8_e5m2", "fp8_e4m3", "fp6_e3m2",
- "fp6_e2m3", "fp4", "float16", "bfloat12"],
- help="weight data type")
-parser.add_argument("--act_dtype", type=str, default="int8",
- choices=["int8", "int4", "int2", "fp8_e5m2", "fp8_e4m3", "fp6_e3m2",
- "fp6_e2m3", "fp4", "float16", "bfloat12"],
- help="input activation data type")
-parser.add_argument("--woq", action="store_true")
-parser.add_argument("--accuracy", action="store_true")
-parser.add_argument("--performance", action="store_true")
-parser.add_argument("--iters", default=100, type=int,
- help="For accuracy measurement only.")
-parser.add_argument("--batch_size", default=1, type=int,
- help="For accuracy measurement only.")
-parser.add_argument("--save_accuracy_path", default=None,
- help="Save accuracy results path.")
-parser.add_argument("--tasks", type=str, default="lambada_openai",
- help="tasks list for accuracy validation")
-parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
-
-args = parser.parse_args()
-
-def get_user_model():
- from transformers import AutoModelForCausalLM, AutoModel, AutoTokenizer
- user_model = AutoModelForCausalLM.from_pretrained(
- args.model,
- trust_remote_code=args.trust_remote_code,
- revision=args.revision,
- )
- tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
-
- if args.peft_model_id is not None:
- from peft import PeftModel
- user_model = PeftModel.from_pretrained(user_model, args.peft_model_id)
-
- user_model.eval()
- return user_model, tokenizer
-
-user_model, tokenizer = get_user_model()
-if args.quantize:
- from neural_compressor.torch.quantization import MXQuantConfig, quantize
- quant_config = MXQuantConfig(w_dtype=args.w_dtype, act_dtype=args.act_dtype, weight_only=args.woq)
- user_model = quantize(model=user_model, quant_config=quant_config)
-
-
-if args.accuracy:
- user_model.eval()
- from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
- args = LMEvalParser(
- model="hf",
- user_model=user_model,
- tokenizer=tokenizer,
- batch_size=args.batch_size,
- tasks=args.tasks,
- device="cpu",
- )
- results = evaluate(args)
- dumped = json.dumps(results, indent=2)
- if args.save_accuracy_path:
- with open(args.save_accuracy_path, "w") as f:
- f.write(dumped)
- for task_name in args.tasks:
- if task_name == "wikitext":
- acc = results["results"][task_name]["word_perplexity"]
- else:
- acc = results["results"][task_name]["acc"]
- print("Accuracy: %.5f" % acc)
- print('Batch size = %d' % args.batch_size)
-
-if args.performance:
- user_model.eval()
- from intel_extension_for_transformers.llm.evaluation.lm_eval import evaluate
- import time
- samples = args.iters * args.batch_size
- start = time.time()
- results = evaluate(
- model="hf",
- tokenizer=tokenizer,
- user_model=user_model,
- batch_size=args.batch_size,
- tasks=args.tasks,
- limit=samples,
- )
- end = time.time()
- for task_name in args.tasks:
- if task_name == "wikitext":
- acc = results["results"][task_name]["word_perplexity"]
- else:
- acc = results["results"][task_name]["acc"]
- print("Accuracy: %.5f" % acc)
- print('Throughput: %.3f samples/sec' % (samples / (end - start)))
- print('Latency: %.3f ms' % ((end - start)*1000 / samples))
- print('Batch size = %d' % args.batch_size)
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/README.md b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/README.md
new file mode 100644
index 00000000000..e61d5a64ade
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/README.md
@@ -0,0 +1,7 @@
+# Run
+
+## Run WOQ MX FP4 model
+
+``` python
+python run_clm_no_trainer.py --model [model_name_or_id] --quantize --accuracy --tasks lambada_openai --w_dtype fp4 --woq
+```
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx/requirements.txt
rename to examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/requirements.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py
new file mode 100644
index 00000000000..40bf217c72e
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py
@@ -0,0 +1,95 @@
+import argparse
+import time
+import json
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+ "--model", nargs="?", default="EleutherAI/gpt-j-6b"
+)
+parser.add_argument(
+ "--trust_remote_code", default=True,
+ help="Transformers parameter: use the external repo")
+parser.add_argument(
+ "--revision", default=None,
+ help="Transformers parameter: set the model hub commit number")
+parser.add_argument("--quantize", action="store_true")
+# dynamic only now
+parser.add_argument("--w_dtype", type=str, default="int8",
+ choices=["int8", "int4", "int2", "fp8_e5m2", "fp8_e4m3", "fp6_e3m2",
+ "fp6_e2m3", "fp4", "float16", "bfloat12"],
+ help="weight data type")
+parser.add_argument("--act_dtype", type=str, default="int8",
+ choices=["int8", "int4", "int2", "fp8_e5m2", "fp8_e4m3", "fp6_e3m2",
+ "fp6_e2m3", "fp4", "float16", "bfloat12"],
+ help="input activation data type")
+parser.add_argument("--woq", action="store_true")
+parser.add_argument("--accuracy", action="store_true")
+parser.add_argument("--performance", action="store_true")
+parser.add_argument("--iters", default=100, type=int,
+ help="For accuracy measurement only.")
+parser.add_argument("--batch_size", default=1, type=int,
+ help="For accuracy measurement only.")
+parser.add_argument("--save_accuracy_path", default=None,
+ help="Save accuracy results path.")
+parser.add_argument("--tasks", nargs="+", default=["lambada_openai"], type=str,
+ help="tasks list for accuracy validation"
+)
+parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
+
+args = parser.parse_args()
+
+def get_user_model():
+ from transformers import AutoModelForCausalLM, AutoModel, AutoTokenizer
+ user_model = AutoModelForCausalLM.from_pretrained(
+ args.model,
+ trust_remote_code=args.trust_remote_code,
+ revision=args.revision,
+ )
+ tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
+
+ if args.peft_model_id is not None:
+ from peft import PeftModel
+ user_model = PeftModel.from_pretrained(user_model, args.peft_model_id)
+
+ user_model.eval()
+ return user_model, tokenizer
+
+user_model, tokenizer = get_user_model()
+
+from neural_compressor.torch.quantization import MXQuantConfig, prepare, convert
+quant_config = MXQuantConfig(w_dtype=args.w_dtype, act_dtype=args.act_dtype, weight_only=args.woq)
+user_model = prepare(model=user_model, quant_config=quant_config)
+user_model = convert(model=user_model)
+user_model.eval()
+
+from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+eval_args = LMEvalParser(
+ model="hf",
+ user_model=user_model,
+ tokenizer=tokenizer,
+ batch_size=args.batch_size,
+ tasks=','.join(args.tasks),
+ device="cpu",
+)
+
+results = evaluate(eval_args)
+dumped = json.dumps(results, indent=2)
+if args.save_accuracy_path:
+ with open(args.save_accuracy_path, "w") as f:
+ f.write(dumped)
+
+eval_acc = 0
+for task_name in args.tasks:
+ if task_name == "wikitext":
+ print("Accuracy for %s is: %s" %
+ (task_name, results["results"][task_name]["word_perplexity,none"]))
+ eval_acc += results["results"][task_name]["word_perplexity,none"]
+ else:
+ print("Accuracy for %s is: %s" %
+ (task_name, results["results"][task_name]["acc,none"]))
+ eval_acc += results["results"][task_name]["acc,none"]
+
+if len(args.tasks) != 0:
+ eval_acc /= len(args.tasks)
+print("Accuracy: %.5f" % eval_acc)
+print('Batch size = %d' % args.batch_size)
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/README.md b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/README.md
new file mode 100644
index 00000000000..8900ea9fd9b
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/README.md
@@ -0,0 +1,64 @@
+Step-by-Step
+============
+This document describes the step-by-step instructions to run large language models (LLMs) using Smooth Quantization on 4th Gen Intel® Xeon® Scalable Processor (codenamed Sapphire Rapids) with PyTorch and Intel® Extension for PyTorch.
+
+The script `run_clm_no_trainer.py` supports `GPTJ`, `OPT`, `LLaMA2`, `BLOOM` and `Falcon` quantization and validates last word prediction accuracy with [lm_eval](https://github.com/EleutherAI/lm-evaluation-harness.git) now, and we are adding more models.
+
+# Prerequisite
+## 1. Create Environment
+```
+# Installation
+pip install -r requirements.txt
+```
+
+# Run
+
+Here is how to run the scripts:
+
+**Causal Language Modeling (CLM)**
+
+`run_clm_no_trainer.py` quantizes the large language models using the dataset [NeelNanda/pile-10k](https://huggingface.co/datasets/NeelNanda/pile-10k) calibration and validates `lambada_openai`, `piqa`, `winogrande`, `hellaswag` and other datasets accuracy provided by lm_eval, an example command is as follows.
+### GPT-J-6b
+
+#### Quantization
+```bash
+# "--sq" is used to enable smooth quant
+python run_clm_no_trainer.py \
+ --model EleutherAI/gpt-j-6B \
+ --quantize \
+ --sq \
+ --alpha 1.0 \
+ --ipex \
+ --output_dir "saved_results"
+```
+**Notes**: Smooth quantization here is based on torch.jit. Without past key value in example_inputs, the quantized model cannot be used for text-generation.
+
+### OPT-125m
+
+#### Quantization
+
+```bash
+# "--sq" is used to enable smooth quant
+python run_clm_no_trainer.py \
+ --model facebook/opt-125m \
+ --quantize \
+ --sq \
+ --alpha 0.5 \
+ --ipex \
+ --output_dir "saved_results"
+```
+
+### LLAMA2-7b/13b/70b
+>Note: LLAMA requires IPEX requirements >= 2.1 to get better accuracy.
+#### Quantization
+
+```bash
+# "--sq" is used to enable smooth quant
+python run_clm_no_trainer.py \
+ --model meta-llama/Llama-2-7b-hf \
+ --quantize \
+ --sq \
+ --alpha 0.8 \
+ --ipex \
+ --output_dir "saved_results"
+```
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt
new file mode 100644
index 00000000000..d4155dfbf75
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt
@@ -0,0 +1,14 @@
+accelerate
+protobuf
+sentencepiece != 0.1.92
+datasets >= 1.1.3
+torch >= 1.10
+transformers
+pytest
+wandb
+einops
+neural-compressor
+intel-extension-for-transformers
+lm_eval==0.4.2
+peft
+optimum-intel
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_benchmark.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_benchmark.sh
new file mode 100644
index 00000000000..7b60727b047
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_benchmark.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ batch_size=16
+ approach=static
+ tuned_checkpoint=saved_results
+ task=lambada_openai
+ echo ${max_eval_samples}
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --config=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ extra_cmd=''
+
+ if [[ ${mode} == "accuracy" ]]; then
+ mode_cmd=" --accuracy "
+ extra_cmd=$extra_cmd" --load"
+ elif [[ ${mode} == "performance" ]]; then
+ mode_cmd=" --performance --iters "${iters}
+ extra_cmd=$extra_cmd" --load"
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+ echo $extra_cmd
+
+ if [ "${topology}" = "opt_125m_ipex_sq" ]; then
+ model_name_or_path="facebook/opt-125m"
+ extra_cmd=$extra_cmd" --ipex"
+ elif [ "${topology}" = "llama2_7b_ipex_sq" ]; then
+ model_name_or_path="meta-llama/Llama-2-7b-hf"
+ extra_cmd=$extra_cmd" --ipex"
+ elif [ "${topology}" = "gpt_j_ipex_sq" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --ipex"
+ fi
+
+ if [[ ${mode} == "accuracy" ]]; then
+ python -u run_clm_no_trainer.py \
+ --model ${model_name_or_path} \
+ --approach ${approach} \
+ --output_dir ${tuned_checkpoint} \
+ --task ${task} \
+ --batch_size ${batch_size} \
+ ${extra_cmd} ${mode_cmd}
+ elif [[ ${mode} == "performance" ]]; then
+ incbench --num_cores_per_instance 4 run_clm_no_trainer.py \
+ --model ${model_name_or_path} \
+ --approach ${approach} \
+ --batch_size ${batch_size} \
+ --output_dir ${tuned_checkpoint} \
+ ${extra_cmd} ${mode_cmd}
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py
new file mode 100644
index 00000000000..694c0505ea4
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py
@@ -0,0 +1,272 @@
+import argparse
+import os
+import sys
+
+sys.path.append("./")
+import time
+import re
+import torch
+from datasets import load_dataset
+from torch.nn.functional import pad
+from torch.utils.data import DataLoader
+from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--model", nargs="?", default="EleutherAI/gpt-j-6b")
+parser.add_argument("--trust_remote_code", default=True, help="Transformers parameter: use the external repo")
+parser.add_argument(
+ "--revision", default=None, help="Transformers parameter: set the model hub commit number"
+)
+parser.add_argument("--dataset", nargs="?", default="NeelNanda/pile-10k", const="NeelNanda/pile-10k")
+parser.add_argument("--output_dir", nargs="?", default="./saved_results")
+parser.add_argument("--quantize", action="store_true")
+parser.add_argument(
+ "--int8_bf16_mixed",
+ action="store_true",
+ help="By default it is int8-fp32 mixed, to enable int8 mixed amp bf16 (work on platforms like SPR)",
+)
+parser.add_argument("--seed", type=int, default=42, help="Seed for sampling the calibration data.")
+parser.add_argument(
+ "--approach", type=str, default="static", help="Select from ['dynamic', 'static', 'weight-only']"
+)
+parser.add_argument("--int8", action="store_true")
+parser.add_argument("--ipex", action="store_true", help="Use intel extension for pytorch.")
+parser.add_argument("--load", action="store_true", help="Load quantized model.")
+parser.add_argument("--accuracy", action="store_true")
+parser.add_argument("--performance", action="store_true")
+parser.add_argument("--iters", default=100, type=int, help="For accuracy measurement only.")
+parser.add_argument("--batch_size", default=1, type=int, help="For accuracy measurement only.")
+parser.add_argument("--save_accuracy_path", default=None, help="Save accuracy results path.")
+parser.add_argument("--pad_max_length", default=512, type=int, help="Pad input ids to max length.")
+parser.add_argument("--calib_iters", default=512, type=int, help="calibration iters.")
+parser.add_argument(
+ "--tasks",
+ default="lambada_openai,hellaswag,winogrande,piqa,wikitext",
+ type=str,
+ help="tasks for accuracy validation",
+)
+parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
+# ============SmoothQuant configs==============
+parser.add_argument("--sq", action="store_true")
+parser.add_argument("--alpha", default="auto", help="Smooth quant parameter.")
+
+args = parser.parse_args()
+if args.ipex:
+ import intel_extension_for_pytorch as ipex
+calib_size = 1
+
+
+class Evaluator:
+ def __init__(self, dataset, tokenizer, batch_size=8, pad_val=1, pad_max=196, is_calib=False):
+ self.dataset = dataset
+ self.tokenizer = tokenizer
+ self.batch_size = batch_size
+ self.pad_val = pad_val
+ self.pad_max = pad_max
+ self.is_calib = is_calib
+
+ # tokenize the dataset
+ self.dataset = self.dataset.map(self.tokenize_function, batched=True)
+ self.dataset.set_format(type="torch", columns=["input_ids"])
+
+ @torch.no_grad()
+ def tokenize_function(self, examples):
+ return self.tokenizer(examples["text"])
+
+ @torch.no_grad()
+ def collate_batch(self, batch):
+
+ input_ids_padded = []
+ last_ind = []
+
+ for text in batch:
+ input_ids = text["input_ids"]
+ pad_len = self.pad_max - input_ids.shape[0]
+ last_ind.append(input_ids.shape[0] - 1)
+ if self.is_calib:
+ input_ids = input_ids[: self.pad_max] if len(input_ids) > self.pad_max else input_ids
+ else:
+ input_ids = pad(input_ids, (0, pad_len), value=self.pad_val)
+ input_ids_padded.append(input_ids)
+
+ return (torch.vstack(input_ids_padded), torch.tensor(last_ind))
+
+ @torch.no_grad()
+ def evaluate(self, model):
+ model.eval()
+ # The task is to predict the last word of the input.
+ total, hit = 0, 0
+ latency = 0
+ test_dataloader = DataLoader(
+ self.dataset,
+ batch_size=self.batch_size,
+ shuffle=False,
+ collate_fn=self.collate_batch,
+ )
+ for i, (input_ids, last_ind) in enumerate(test_dataloader):
+ label = input_ids[torch.arange(len(last_ind)), last_ind]
+ input_ids[torch.arange(len(last_ind)), last_ind] = self.pad_val
+ pad_len = self.pad_max - last_ind - 1
+
+ start = time.time()
+ outputs = model(input_ids)
+ latency += time.time() - start
+
+ last_token_logits = outputs[0][torch.arange(len(last_ind)), -2 - pad_len, :]
+ pred = last_token_logits.argmax(dim=-1)
+ total += label.size(0)
+ hit += (pred == label).sum().item()
+ if (i + 1) % 50 == 0:
+ print(hit / total)
+ print("Processed minibatch:", i)
+
+ acc = hit / total
+ print("Accuracy: ", acc)
+ print("Latency: ", latency)
+ return acc
+
+
+def get_user_model():
+ user_model = AutoModelForCausalLM.from_pretrained(
+ args.model,
+ torchscript=True, # torchscript will force `return_dict=False` to avoid jit errors
+ trust_remote_code=args.trust_remote_code,
+ revision=args.revision,
+ )
+ tokenizer = AutoTokenizer.from_pretrained(args.model)
+
+ if args.peft_model_id is not None:
+ from peft import PeftModel
+
+ user_model = PeftModel.from_pretrained(user_model, args.peft_model_id)
+
+ # to channels last
+ user_model = user_model.to(memory_format=torch.channels_last)
+ user_model.eval()
+ return user_model, tokenizer
+
+
+if args.quantize:
+ # dataset
+ user_model, tokenizer = get_user_model()
+ calib_dataset = load_dataset(args.dataset, split="train")
+ # calib_dataset = datasets.load_from_disk('/your/local/dataset/pile-10k/') # use this if trouble with connecting to HF
+ calib_dataset = calib_dataset.shuffle(seed=args.seed)
+ calib_evaluator = Evaluator(
+ calib_dataset, tokenizer, args.batch_size, pad_max=args.pad_max_length, is_calib=True
+ )
+ calib_dataloader = DataLoader(
+ calib_evaluator.dataset,
+ batch_size=calib_size,
+ shuffle=False,
+ collate_fn=calib_evaluator.collate_batch,
+ )
+
+ from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
+ from tqdm import tqdm
+
+ def run_fn(model):
+ calib_iter = 0
+ for batch in tqdm(calib_dataloader, total=args.calib_iters):
+ batch = move_input_to_device(batch, device=None)
+ if isinstance(batch, tuple) or isinstance(batch, list):
+ model(batch[0])
+ elif isinstance(batch, dict):
+ model(**batch)
+ else:
+ model(batch)
+
+ calib_iter += 1
+ if calib_iter >= args.calib_iters:
+ break
+ return
+
+ def eval_func(model):
+ config = AutoConfig.from_pretrained(args.model)
+ setattr(model, "config", config)
+
+ from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+ eval_args = LMEvalParser(
+ model="hf",
+ user_model=model,
+ tokenizer=tokenizer,
+ batch_size=args.batch_size,
+ tasks=args.tasks,
+ device="cpu",
+ )
+ results = evaluate(eval_args)
+ if args.tasks == "wikitext":
+ return results["results"][args.tasks]["word_perplexity,none"]
+ else:
+ return results["results"][args.tasks]["acc,none"]
+
+ from utils import get_example_inputs
+
+ example_inputs = get_example_inputs(user_model, calib_dataloader)
+
+ from neural_compressor.torch.quantization import SmoothQuantConfig, autotune, TuningConfig
+ tune_config = TuningConfig(config_set=SmoothQuantConfig.get_config_set_for_tuning())
+ user_model = autotune(
+ user_model,
+ tune_config=tune_config,
+ eval_fn=eval_func,
+ run_fn=run_fn,
+ example_inputs=example_inputs,
+ )
+ user_model.save(args.output_dir)
+
+
+if args.load:
+ # TODO: we need run_benchmark.sh for loading and remove --accuracy in run_quant.sh, currently run_quant.sh will get fp32 result
+ if args.int8 or args.int8_bf16_mixed:
+ print("load int8 model")
+ from neural_compressor.torch.quantization import load
+
+ tokenizer = AutoTokenizer.from_pretrained(args.model)
+ config = AutoConfig.from_pretrained(args.model)
+ user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)))
+ setattr(user_model, "config", config)
+ else:
+ user_model, tokenizer = get_user_model()
+
+
+if args.accuracy:
+ user_model.eval()
+ from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+
+ eval_args = LMEvalParser(
+ model="hf",
+ user_model=user_model,
+ tokenizer=tokenizer,
+ batch_size=args.batch_size,
+ tasks=args.tasks,
+ device="cpu",
+ )
+ results = evaluate(eval_args)
+ for task_name in args.tasks.split(","):
+ if task_name == "wikitext":
+ print("Accuracy for %s is: %s" % (task_name, results["results"][task_name]["word_perplexity,none"]))
+ else:
+ print("Accuracy for %s is: %s" % (task_name, results["results"][task_name]["acc,none"]))
+
+
+if args.performance:
+ user_model.eval()
+ batch_size, input_leng = args.batch_size, 512
+ example_inputs = torch.ones((batch_size, input_leng), dtype=torch.long)
+ print("Batch size = {:d}".format(batch_size))
+ print("The length of input tokens = {:d}".format(input_leng))
+ import time
+
+ total_iters = args.iters
+ warmup_iters = 5
+ with torch.no_grad():
+ for i in range(total_iters):
+ if i == warmup_iters:
+ start = time.time()
+ user_model(example_inputs)
+ end = time.time()
+ latency = (end - start) / ((total_iters - warmup_iters) * args.batch_size)
+ throughput = ((total_iters - warmup_iters) * args.batch_size) / (end - start)
+ print("Latency: {:.3f} ms".format(latency * 10**3))
+ print("Throughput: {:.3f} samples/sec".format(throughput))
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_quant.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_quant.sh
new file mode 100644
index 00000000000..774bb73b6f1
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_quant.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ extra_cmd=''
+ batch_size=8
+ approach='static'
+ DATASET_NAME="NeelNanda/pile-10k"
+ tuned_checkpoint="saved_results"
+
+ if [ "${topology}" = "opt_125m_ipex_sq" ]; then
+ model_name_or_path="facebook/opt-125m"
+ extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
+ elif [ "${topology}" = "llama2_7b_ipex_sq" ]; then
+ model_name_or_path="meta-llama/Llama-2-7b-hf"
+ extra_cmd=$extra_cmd" --ipex --sq --alpha 0.8"
+ elif [ "${topology}" = "gpt_j_ipex_sq" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --ipex --sq --alpha 1.0"
+ fi
+
+ python -u run_clm_no_trainer.py \
+ --model ${model_name_or_path} \
+ --dataset ${DATASET_NAME} \
+ --quantize \
+ --approach ${approach} \
+ --output_dir ${tuned_checkpoint} \
+ --tasks "lambada_openai" \
+ --batch_size ${batch_size} \
+ ${extra_cmd}
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/utils.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/utils.py
new file mode 100644
index 00000000000..76117f8b0b5
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/utils.py
@@ -0,0 +1,47 @@
+import torch
+from collections import UserDict
+from packaging.version import Version
+from neural_compressor.torch.utils import get_torch_version
+
+def get_example_inputs(model, dataloader):
+ version = get_torch_version()
+ from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
+
+ # Suggest set dataloader like calib_dataloader
+ if dataloader is None:
+ return None
+ device = next(model.parameters()).device
+ try:
+ for idx, (input, label) in enumerate(dataloader):
+ input = move_input_to_device(input, device)
+ if isinstance(input, (dict, UserDict)): # pragma: no cover
+ assert version.release >= Version("1.12.0").release, "INC support IPEX version >= 1.12.0"
+ if "label" in input.keys():
+ input.pop("label")
+ if version.release <= Version("2.0.1").release:
+ return tuple(input.values())
+ else:
+ return dict(input)
+ if isinstance(input, (list, tuple)):
+ return tuple(input)
+ if isinstance(input, torch.Tensor):
+ return input
+ break
+ except Exception as e: # pragma: no cover
+ for idx, input in enumerate(dataloader):
+ input = move_input_to_device(input, device)
+ if isinstance(input, (dict, UserDict)): # pragma: no cover
+ assert version.release >= Version("1.12.0").release, "INC support IPEX version >= 1.12.0"
+ if "label" in input.keys():
+ input.pop("label")
+ if version.release <= Version("2.0.1").release:
+ return tuple(input.values())
+ else:
+ return dict(input)
+ if isinstance(input, list) or isinstance(input, tuple):
+ return tuple(input)
+ if isinstance(input, torch.Tensor):
+ return input
+ break
+ if idx == 0:
+ assert False, "Please checkout the example_inputs format."
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/README.md b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/README.md
new file mode 100644
index 00000000000..8ecdc6c5110
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/README.md
@@ -0,0 +1,57 @@
+Step-by-Step
+============
+This document describes the step-by-step instructions to run large language models (LLMs) using Static Quantization on 4th Gen Intel® Xeon® Scalable Processor (codenamed Sapphire Rapids) with PyTorch and Intel® Extension for PyTorch.
+
+The script `run_clm_no_trainer.py` supports `GPTJ`, `OPT`, `LLaMA2`, `BLOOM` and `Falcon` quantization and validates last word prediction accuracy with [lm_eval](https://github.com/EleutherAI/lm-evaluation-harness.git) now, and we are adding more models.
+
+# Prerequisite
+## 1. Create Environment
+```
+# Installation
+pip install -r requirements.txt
+```
+
+# Run
+
+Here is how to run the scripts:
+
+**Causal Language Modeling (CLM)**
+
+`run_clm_no_trainer.py` quantizes the large language models using the dataset [NeelNanda/pile-10k](https://huggingface.co/datasets/NeelNanda/pile-10k) calibration and validates `lambada_openai`, `piqa`, `winogrande`, `hellaswag` and other datasets accuracy provided by lm_eval, an example command is as follows.
+### GPT-J-6b
+
+#### Quantization
+```bash
+python run_clm_no_trainer.py \
+ --model EleutherAI/gpt-j-6B \
+ --quantize \
+ --alpha 1.0 \
+ --ipex \
+ --output_dir "saved_results"
+```
+
+### OPT-125m
+
+#### Quantization
+
+```bash
+python run_clm_no_trainer.py \
+ --model facebook/opt-125m \
+ --quantize \
+ --alpha 0.5 \
+ --ipex \
+ --output_dir "saved_results"
+```
+
+### LLAMA2-7b/13b/70b
+>Note: LLAMA requires IPEX requirements >= 2.1 to get better accuracy.
+#### Quantization
+
+```bash
+python run_clm_no_trainer.py \
+ --model meta-llama/Llama-2-7b-hf \
+ --quantize \
+ --alpha 0.8 \
+ --ipex \
+ --output_dir "saved_results"
+```
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
rename to examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/requirements.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_benchmark.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_benchmark.sh
new file mode 100644
index 00000000000..b62a6381b20
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_benchmark.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ batch_size=16
+ approach=static
+ tuned_checkpoint=saved_results
+ task=lambada_openai
+ echo ${max_eval_samples}
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --config=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ extra_cmd=''
+
+ if [[ ${mode} == "accuracy" ]]; then
+ mode_cmd=" --accuracy "
+ extra_cmd=$extra_cmd" --load"
+ elif [[ ${mode} == "performance" ]]; then
+ mode_cmd=" --performance --iters "${iters}
+ extra_cmd=$extra_cmd" --load"
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+ echo $extra_cmd
+
+ if [ "${topology}" = "opt_125m_ipex" ]; then
+ model_name_or_path="facebook/opt-125m"
+ extra_cmd=$extra_cmd" --ipex"
+ elif [ "${topology}" = "llama2_7b_ipex" ]; then
+ model_name_or_path="meta-llama/Llama-2-7b-hf"
+ extra_cmd=$extra_cmd" --ipex"
+ elif [ "${topology}" = "gpt_j_ipex" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --ipex"
+ fi
+
+ python -u run_clm_no_trainer.py \
+ --model ${model_name_or_path} \
+ --approach ${approach} \
+ --output_dir ${tuned_checkpoint} \
+ --task ${task} \
+ --batch_size ${batch_size} \
+ ${extra_cmd} ${mode_cmd}
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py
new file mode 100644
index 00000000000..b56c01f20f5
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py
@@ -0,0 +1,259 @@
+import argparse
+import os
+import sys
+
+sys.path.append('./')
+import time
+import re
+import torch
+from datasets import load_dataset
+from torch.nn.functional import pad
+from torch.utils.data import DataLoader
+from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+ "--model", nargs="?", default="EleutherAI/gpt-j-6b"
+)
+parser.add_argument(
+ "--trust_remote_code", default=True,
+ help="Transformers parameter: use the external repo")
+parser.add_argument(
+ "--revision", default=None,
+ help="Transformers parameter: set the model hub commit number")
+parser.add_argument("--dataset", nargs="?", default="NeelNanda/pile-10k", const="NeelNanda/pile-10k")
+parser.add_argument("--output_dir", nargs="?", default="./saved_results")
+parser.add_argument("--quantize", action="store_true")
+parser.add_argument(
+ "--int8_bf16_mixed",
+ action="store_true",
+ help="By default it is int8-fp32 mixed, to enable int8 mixed amp bf16 (work on platforms like SPR)",
+)
+parser.add_argument(
+ '--seed',
+ type=int, default=42, help='Seed for sampling the calibration data.'
+)
+parser.add_argument("--approach", type=str, default='static',
+ help="Select from ['dynamic', 'static', 'weight-only']")
+parser.add_argument("--int8", action="store_true")
+parser.add_argument("--ipex", action="store_true", help="Use intel extension for pytorch.")
+parser.add_argument("--load", action="store_true", help="Load quantized model.")
+parser.add_argument("--accuracy", action="store_true")
+parser.add_argument("--performance", action="store_true")
+parser.add_argument("--iters", default=100, type=int,
+ help="For accuracy measurement only.")
+parser.add_argument("--batch_size", default=1, type=int,
+ help="For accuracy measurement only.")
+parser.add_argument("--save_accuracy_path", default=None,
+ help="Save accuracy results path.")
+parser.add_argument("--pad_max_length", default=512, type=int,
+ help="Pad input ids to max length.")
+parser.add_argument("--calib_iters", default=512, type=int,
+ help="calibration iters.")
+parser.add_argument("--tasks", default="lambada_openai,hellaswag,winogrande,piqa,wikitext",
+ type=str, help="tasks for accuracy validation")
+parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
+
+args = parser.parse_args()
+if args.ipex:
+ import intel_extension_for_pytorch as ipex
+calib_size = 1
+
+
+class Evaluator:
+ def __init__(self, dataset, tokenizer, batch_size=8, pad_val=1, pad_max=196, is_calib=False):
+ self.dataset = dataset
+ self.tokenizer = tokenizer
+ self.batch_size = batch_size
+ self.pad_val = pad_val
+ self.pad_max = pad_max
+ self.is_calib = is_calib
+
+ # tokenize the dataset
+ self.dataset = self.dataset.map(self.tokenize_function, batched=True)
+ self.dataset.set_format(type="torch", columns=["input_ids"])
+
+ @torch.no_grad()
+ def tokenize_function(self, examples):
+ return self.tokenizer(examples["text"])
+
+ @torch.no_grad()
+ def collate_batch(self, batch):
+
+ input_ids_padded = []
+ last_ind = []
+
+ for text in batch:
+ input_ids = text["input_ids"]
+ pad_len = self.pad_max - input_ids.shape[0]
+ last_ind.append(input_ids.shape[0] - 1)
+ if self.is_calib:
+ input_ids = input_ids[:self.pad_max] if len(input_ids) > self.pad_max else input_ids
+ else:
+ input_ids = pad(input_ids, (0, pad_len), value=self.pad_val)
+ input_ids_padded.append(input_ids)
+
+ return (torch.vstack(input_ids_padded), torch.tensor(last_ind))
+
+ @torch.no_grad()
+ def evaluate(self, model):
+ model.eval()
+ # The task is to predict the last word of the input.
+ total, hit = 0, 0
+ latency = 0
+ test_dataloader = DataLoader(
+ self.dataset,
+ batch_size=self.batch_size,
+ shuffle=False,
+ collate_fn=self.collate_batch,
+ )
+ for i, (input_ids, last_ind) in enumerate(test_dataloader):
+ label = input_ids[torch.arange(len(last_ind)), last_ind]
+ input_ids[torch.arange(len(last_ind)), last_ind] = self.pad_val
+ pad_len = self.pad_max - last_ind - 1
+
+ start = time.time()
+ outputs = model(input_ids)
+ latency += time.time() - start
+
+ last_token_logits = outputs[0][torch.arange(len(last_ind)), -2 - pad_len, :]
+ pred = last_token_logits.argmax(dim=-1)
+ total += label.size(0)
+ hit += (pred == label).sum().item()
+ if (i + 1) % 50 == 0:
+ print(hit / total)
+ print("Processed minibatch:", i)
+
+ acc = hit / total
+ print("Accuracy: ", acc)
+ print("Latency: ", latency)
+ return acc
+
+
+def get_user_model():
+ user_model = AutoModelForCausalLM.from_pretrained(
+ args.model,
+ torchscript=True, # torchscript will force `return_dict=False` to avoid jit errors
+ trust_remote_code=args.trust_remote_code,
+ revision=args.revision,
+ )
+ tokenizer = AutoTokenizer.from_pretrained(args.model)
+
+ if args.peft_model_id is not None:
+ from peft import PeftModel
+ user_model = PeftModel.from_pretrained(user_model, args.peft_model_id)
+
+ # to channels last
+ user_model = user_model.to(memory_format=torch.channels_last)
+ user_model.eval()
+ return user_model, tokenizer
+
+
+if args.quantize:
+ # dataset
+ user_model, tokenizer = get_user_model()
+ calib_dataset = load_dataset(args.dataset, split="train")
+ # calib_dataset = datasets.load_from_disk('/your/local/dataset/pile-10k/') # use this if trouble with connecting to HF
+ calib_dataset = calib_dataset.shuffle(seed=args.seed)
+ calib_evaluator = Evaluator(calib_dataset, tokenizer, args.batch_size, pad_max=args.pad_max_length, is_calib=True)
+ calib_dataloader = DataLoader(
+ calib_evaluator.dataset,
+ batch_size=calib_size,
+ shuffle=False,
+ collate_fn=calib_evaluator.collate_batch,
+ )
+
+
+ from neural_compressor.torch.quantization import StaticQuantConfig
+ excluded_precisions = [] if args.int8_bf16_mixed else ["bf16"]
+ quant_config = StaticQuantConfig(excluded_precisions=excluded_precisions)
+ if re.search("gpt", user_model.config.model_type):
+ quant_config.set_local("add", StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
+
+ from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
+ from tqdm import tqdm
+ def run_fn(model):
+ calib_iter = 0
+ for batch in tqdm(calib_dataloader, total=args.calib_iters):
+ batch = move_input_to_device(batch, device=None)
+ if isinstance(batch, tuple) or isinstance(batch, list):
+ model(batch[0])
+ elif isinstance(batch, dict):
+ model(**batch)
+ else:
+ model(batch)
+
+ calib_iter += 1
+ if calib_iter >= args.calib_iters:
+ break
+ return
+
+ from utils import get_example_inputs
+ example_inputs = get_example_inputs(user_model, calib_dataloader)
+
+ from neural_compressor.torch.quantization import prepare, convert
+ user_model = prepare(model=user_model, quant_config=quant_config, example_inputs=example_inputs)
+ run_fn(user_model)
+ user_model = convert(user_model)
+ user_model.save(args.output_dir)
+
+if args.load:
+ # TODO: we need run_benchmark.sh for loading and remove --accuracy in run_quant.sh, currently run_quant.sh will get fp32 result
+ if args.int8 or args.int8_bf16_mixed:
+ print("load int8 model")
+ from neural_compressor.torch.quantization import load
+ tokenizer = AutoTokenizer.from_pretrained(args.model)
+ config = AutoConfig.from_pretrained(args.model)
+ user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)))
+ setattr(user_model, "config", config)
+ else:
+ user_model, tokenizer = get_user_model()
+
+
+if args.accuracy:
+ user_model.eval()
+ from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+ eval_args = LMEvalParser(
+ model="hf",
+ user_model=user_model,
+ tokenizer=tokenizer,
+ batch_size=args.batch_size,
+ tasks=args.tasks,
+ device="cpu",
+ )
+ results = evaluate(eval_args)
+ for task_name in args.tasks.split(","):
+ if task_name == "wikitext":
+ acc = results["results"][task_name]["word_perplexity,none"]
+ else:
+ acc = results["results"][task_name]["acc,none"]
+ print("Accuracy: %.5f" % acc)
+ print('Batch size = %d' % args.batch_size)
+
+if args.performance:
+ user_model.eval()
+ from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+ import time
+
+ samples = args.iters * args.batch_size
+ eval_args = LMEvalParser(
+ model="hf",
+ user_model=user_model,
+ tokenizer=tokenizer,
+ batch_size=args.batch_size,
+ tasks=args.tasks,
+ limit=samples,
+ device="cpu",
+ )
+ start = time.time()
+ results = evaluate(eval_args)
+ end = time.time()
+ for task_name in args.tasks.split(","):
+ if task_name == "wikitext":
+ acc = results["results"][task_name]["word_perplexity,none"]
+ else:
+ acc = results["results"][task_name]["acc,none"]
+ print("Accuracy: %.5f" % acc)
+ print('Throughput: %.3f samples/sec' % (samples / (end - start)))
+ print('Latency: %.3f ms' % ((end - start) * 1000 / samples))
+ print('Batch size = %d' % args.batch_size)
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_quant.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_quant.sh
new file mode 100644
index 00000000000..a93d8220d64
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_quant.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ extra_cmd=''
+ batch_size=8
+ approach='static'
+ DATASET_NAME="NeelNanda/pile-10k"
+ tuned_checkpoint="saved_results"
+
+ if [ "${topology}" = "opt_125m_ipex" ]; then
+ model_name_or_path="facebook/opt-125m"
+ extra_cmd=$extra_cmd" --ipex"
+ elif [ "${topology}" = "llama2_7b_ipex" ]; then
+ model_name_or_path="meta-llama/Llama-2-7b-hf"
+ extra_cmd=$extra_cmd" --ipex"
+ elif [ "${topology}" = "gpt_j_ipex" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --ipex"
+ fi
+
+ python -u run_clm_no_trainer.py \
+ --model ${model_name_or_path} \
+ --dataset ${DATASET_NAME} \
+ --quantize \
+ --approach ${approach} \
+ --output_dir ${tuned_checkpoint} \
+ --tasks "lambada_openai" \
+ --batch_size ${batch_size} \
+ ${extra_cmd}
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/utils.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/utils.py
new file mode 100644
index 00000000000..76117f8b0b5
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/utils.py
@@ -0,0 +1,47 @@
+import torch
+from collections import UserDict
+from packaging.version import Version
+from neural_compressor.torch.utils import get_torch_version
+
+def get_example_inputs(model, dataloader):
+ version = get_torch_version()
+ from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
+
+ # Suggest set dataloader like calib_dataloader
+ if dataloader is None:
+ return None
+ device = next(model.parameters()).device
+ try:
+ for idx, (input, label) in enumerate(dataloader):
+ input = move_input_to_device(input, device)
+ if isinstance(input, (dict, UserDict)): # pragma: no cover
+ assert version.release >= Version("1.12.0").release, "INC support IPEX version >= 1.12.0"
+ if "label" in input.keys():
+ input.pop("label")
+ if version.release <= Version("2.0.1").release:
+ return tuple(input.values())
+ else:
+ return dict(input)
+ if isinstance(input, (list, tuple)):
+ return tuple(input)
+ if isinstance(input, torch.Tensor):
+ return input
+ break
+ except Exception as e: # pragma: no cover
+ for idx, input in enumerate(dataloader):
+ input = move_input_to_device(input, device)
+ if isinstance(input, (dict, UserDict)): # pragma: no cover
+ assert version.release >= Version("1.12.0").release, "INC support IPEX version >= 1.12.0"
+ if "label" in input.keys():
+ input.pop("label")
+ if version.release <= Version("2.0.1").release:
+ return tuple(input.values())
+ else:
+ return dict(input)
+ if isinstance(input, list) or isinstance(input, tuple):
+ return tuple(input)
+ if isinstance(input, torch.Tensor):
+ return input
+ break
+ if idx == 0:
+ assert False, "Please checkout the example_inputs format."
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/README.md b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/README.md
new file mode 100644
index 00000000000..7ad8b76bd1e
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/README.md
@@ -0,0 +1,27 @@
+Step-by-Step
+============
+This document describes the step-by-step instructions to run large language models (LLMs) on 4th Gen Intel® Xeon® Scalable Processor (codenamed Sapphire Rapids) with PyTorch 2 Export Quantization.
+
+Currently, users can use `run_clm_no_trainer.py` to quantize the `OPT` series models and validate the last word prediction accuracy with [lm_eval](https://github.com/EleutherAI/lm-evaluation-harness.git). We will add more models in the near future.
+
+# Prerequisite
+## 1. Create Environment
+```
+# Installation
+pip install -r requirements.txt
+```
+
+# Run
+
+Here is how to run the scripts:
+
+**Causal Language Modeling (CLM)**
+
+`run_clm_no_trainer.py` quantizes the large language models using the dataset [NeelNanda/pile-10k](https://huggingface.co/datasets/NeelNanda/pile-10k) validates `lambada_openai`, `piqa`, `winogrande`, `hellaswag` and other datasets accuracy provided by lm_eval, an example command is as follows.
+### OPT-125m
+
+#### Quantization
+
+```bash
+python run_clm_no_trainer.py --model facebook/opt-125m --quantize --accuracy
+```
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt
new file mode 100644
index 00000000000..b6d9b6c55de
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt
@@ -0,0 +1,7 @@
+transformers
+torch
+sentencepiece
+neural-compressor
+intel-extension-for-transformers >= 1.4.1
+lm-eval==0.4.2
+peft
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_benchmark.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_benchmark.sh
new file mode 100644
index 00000000000..169142cddb8
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_benchmark.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ batch_size=16
+ tuned_checkpoint=saved_results
+ task=lambada_openai
+ echo ${max_eval_samples}
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --config=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ extra_cmd=''
+
+ if [[ ${mode} == "accuracy" ]]; then
+ mode_cmd=" --accuracy "
+ extra_cmd=$extra_cmd
+ elif [[ ${mode} == "performance" ]]; then
+ mode_cmd=" --performance --iters "${iters}
+ extra_cmd=$extra_cmd
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+ echo $extra_cmd
+
+ echo $extra_cmd
+
+ if [ "${topology}" = "opt_125m_pt2e_static" ]; then
+ model_name_or_path="facebook/opt-125m"
+ fi
+ if [[ ${mode} == "accuracy" ]]; then
+ python -u run_clm_no_trainer.py \
+ --model ${model_name_or_path} \
+ --output_dir ${tuned_checkpoint} \
+ --task ${task} \
+ --batch_size ${batch_size} \
+ ${extra_cmd} ${mode_cmd}
+ elif [[ ${mode} == "performance" ]]; then
+ incbench --num_cores_per_instance 4 run_clm_no_trainer.py \
+ --model ${model_name_or_path} \
+ --batch_size ${batch_size} \
+ --output_dir ${tuned_checkpoint} \
+ ${extra_cmd} ${mode_cmd}
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py
new file mode 100644
index 00000000000..395bc6f9b57
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py
@@ -0,0 +1,155 @@
+import argparse
+import time
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+ "--model", nargs="?", default="facebook/opt-125m"
+)
+parser.add_argument(
+ "--trust_remote_code", default=True,
+ help="Transformers parameter: use the external repo")
+parser.add_argument(
+ "--revision", default=None,
+ help="Transformers parameter: set the model hub commit number")
+parser.add_argument("--dataset", nargs="?", default="NeelNanda/pile-10k", const="NeelNanda/pile-10k")
+parser.add_argument("--output_dir", nargs="?", default="")
+parser.add_argument("--quantize", action="store_true")
+parser.add_argument("--approach", type=str, default='static',
+ help="Select from ['dynamic', 'static', 'weight-only']")
+parser.add_argument("--int8", action="store_true")
+parser.add_argument("--accuracy", action="store_true")
+parser.add_argument("--performance", action="store_true")
+parser.add_argument("--calib_iters", default=2, type=int,
+ help="For calibration only.")
+parser.add_argument("--iters", default=100, type=int,
+ help="For accuracy measurement only.")
+parser.add_argument("--batch_size", default=1, type=int,
+ help="For accuracy measurement only.")
+parser.add_argument("--tasks", default="lambada_openai,hellaswag,winogrande,piqa,wikitext",
+ type=str, help="tasks for accuracy validation")
+parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
+# =======================================
+
+args = parser.parse_args()
+
+
+def get_user_model():
+ torchscript = False
+ user_model = AutoModelForCausalLM.from_pretrained(
+ args.model,
+ torchscript=torchscript, # torchscript will force `return_dict=False` to avoid jit errors
+ trust_remote_code=args.trust_remote_code,
+ revision=args.revision,
+ )
+ tokenizer = AutoTokenizer.from_pretrained(args.model)
+
+ if args.peft_model_id is not None:
+ from peft import PeftModel
+ user_model = PeftModel.from_pretrained(user_model, args.peft_model_id)
+
+ # to channels last
+ user_model = user_model.to(memory_format=torch.channels_last)
+ user_model.eval()
+ return user_model, tokenizer
+
+user_model, tokenizer = get_user_model()
+if args.quantize:
+
+ from neural_compressor.torch.quantization import (
+ convert,
+ get_default_static_config,
+ prepare,
+ )
+ from neural_compressor.torch.export import export
+ from torch.export import Dim
+ def get_example_inputs(tokenizer):
+ text = "Hello, welcome to LLM world."
+ encoded_input = tokenizer(text, return_tensors="pt")
+
+ example_inputs = encoded_input
+ input_ids = example_inputs["input_ids"]
+ input_ids_batch = torch.cat((input_ids, input_ids), dim=0)
+ print(f"input_ids_batch shape: {input_ids_batch.shape}")
+ tuple_inputs = (input_ids_batch,)
+ return tuple_inputs
+ # torch._dynamo.config.cache_size_limit = 4 # set limitation if out of memory
+ batch = Dim(name="batch_size")
+ seq_len = Dim(name="seq_len")
+ dynamic_shapes = {"input_ids": (batch, seq_len)}
+ example_inputs = get_example_inputs(tokenizer)
+ exported_model = export(user_model, example_inputs=example_inputs, dynamic_shapes=dynamic_shapes)
+
+ quant_config = get_default_static_config()
+ # prepare
+ prepare_model = prepare(exported_model, quant_config)
+
+ # calibrate
+ for i in range(args.calib_iters):
+ prepare_model(*example_inputs)
+ # convert
+ converted_model = convert(prepare_model)
+
+ # save
+ if args.output_dir:
+ converted_model.save(example_inputs=example_inputs, output_dir = args.output_dir)
+
+
+
+if args.int8:
+ if args.output_dir:
+ print("Load int8 model.")
+ from neural_compressor.torch.quantization import load
+ model = load(args.output_dir)
+
+ model.config = user_model.config # for lm eval
+
+ # Compile the quantized model and replace the Q/DQ pattern with Q-operator
+ from torch._inductor import config
+
+ config.freezing = True
+ opt_model = torch.compile(model)
+
+ opt_model.config = user_model.config # for lm eval
+ user_model = opt_model
+
+if args.accuracy:
+
+ from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+ eval_args = LMEvalParser(
+ model="hf",
+ user_model=user_model,
+ tokenizer=tokenizer,
+ batch_size=args.batch_size,
+ tasks=args.tasks,
+ device="cpu",
+ )
+ results = evaluate(eval_args)
+ for task_name in args.tasks.split(","):
+ if task_name == "wikitext":
+ acc = results["results"][task_name]["word_perplexity,none"]
+ else:
+ acc = results["results"][task_name]["acc,none"]
+ print("Accuracy: %.5f" % acc)
+ print('Batch size = %d' % args.batch_size)
+
+if args.performance:
+ batch_size, input_leng = args.batch_size, 512
+ example_inputs = torch.ones((batch_size, input_leng), dtype=torch.long)
+ print("Batch size = {:d}".format(batch_size))
+ print("The length of input tokens = {:d}".format(input_leng))
+ import time
+
+ total_iters = args.iters
+ warmup_iters = 5
+ with torch.no_grad():
+ for i in range(total_iters):
+ if i == warmup_iters:
+ start = time.time()
+ user_model(example_inputs)
+ end = time.time()
+ latency = (end - start) / ((total_iters - warmup_iters) * args.batch_size)
+ throughput = ((total_iters - warmup_iters) * args.batch_size) / (end - start)
+ print("Latency: {:.3f} ms".format(latency * 10**3))
+ print("Throughput: {:.3f} samples/sec".format(throughput))
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_quant.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_quant.sh
new file mode 100644
index 00000000000..9e995ec8869
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_quant.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+
+ if [ "${topology}" = "opt_125m_pt2e_static" ]; then
+ model_name_or_path="facebook/opt-125m"
+ output_dir="saved_results"
+ fi
+ python run_clm_no_trainer.py --model ${model_name_or_path} --quantize --output_dir ${output_dir} --tasks "lambada_openai"
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/README.md b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/README.md
new file mode 100644
index 00000000000..889d7b42682
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/README.md
@@ -0,0 +1,130 @@
+Step-by-Step
+============
+This document describes the step-by-step instructions to run large language models (LLMs) on 4th Gen Intel® Xeon® Scalable Processor (codenamed Sapphire Rapids) with PyTorch and Intel® Extension for PyTorch.
+
+The script `run_clm_no_trainer.py` supports `GPTJ`, `OPT`, `LLaMA2`, `BLOOM` and `Falcon` quantization and validates last word prediction accuracy with [lm_eval](https://github.com/EleutherAI/lm-evaluation-harness.git) now, and we are adding more models.
+
+# Prerequisite
+## 1. Create Environment
+```
+# Installation
+pip install -r requirements.txt
+```
+
+# Run
+
+Here is how to run the scripts:
+
+**Causal Language Modeling (CLM)**
+
+`run_clm_no_trainer.py` quantizes the large language models using the dataset [NeelNanda/pile-10k](https://huggingface.co/datasets/NeelNanda/pile-10k) calibration and validates `lambada_openai`, `piqa`, `winogrande`, `hellaswag` and other datasets accuracy provided by lm_eval, an example command is as follows.
+### GPT-J-6b
+
+#### Quantization
+
+```bash
+# "--woq_algo GPTQ" is used to enable GPTQ algorithms
+# "--double_quant_type BNB_NF4" is used to enable double quant algorithms
+python run_clm_no_trainer.py \
+ --model EleutherAI/gpt-j-6B \
+ --dataset NeelNanda/pile-10k \
+ --quantize \
+ --woq_algo GPTQ \
+ --woq_bits 4 \
+ --woq_scheme asym \
+ --woq_group_size 128 \
+ --gptq_max_seq_length 2048 \
+ --gptq_use_max_length \
+ --accuracy \
+ --tasks "lambada_openai" \
+ --double_quant_type "BNB_NF4"
+
+# "--woq_algo RTN" is used to enable RTN algorithms
+python run_clm_no_trainer.py \
+ --model EleutherAI/gpt-j-6B \
+ --dataset NeelNanda/pile-10k \
+ --quantize \
+ --woq_algo RTN \
+ --woq_bits 4 \
+ --woq_scheme asym \
+ --woq_group_size 128 \
+ --accuracy \
+ --tasks "lambada_openai" \
+ --double_quant_type "BNB_NF4"
+```
+**Notes**: Weight-only quantization based on fake quantization is previewly supported and supports RTN, GPTQ[1], AWQ[2], TEQ algorithms. For more details, please refer to [link](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization_weight_only.md). Our GPTQ API support various CLMs including GPTJ, OPTs, Blooms, Llamas, Falcons, MPTs, ChatGLMs, etc. Simply replace the "--model" argument with other models to quantize different CLMs with GPTQ.
+
+
+### OPT-125m
+
+#### Quantization
+
+```bash
+# "--woq_algo GPTQ" is used to enable GPTQ algorithms
+# "--double_quant_type BNB_NF4" is used to enable double quant algorithms
+python run_clm_no_trainer.py \
+ --model facebook/opt-125m \
+ --dataset NeelNanda/pile-10k \
+ --quantize \
+ --woq_algo GPTQ \
+ --woq_bits 4 \
+ --woq_scheme asym \
+ --woq_group_size 128 \
+ --gptq_max_seq_length 2048 \
+ --gptq_use_max_length \
+ --accuracy \
+ --tasks "lambada_openai" \
+ --double_quant_type "BNB_NF4"
+
+# "--woq_algo RTN" is used to enable RTN algorithms
+python run_clm_no_trainer.py \
+ --model facebook/opt-125m \
+ --dataset NeelNanda/pile-10k \
+ --quantize \
+ --woq_algo RTN \
+ --woq_bits 4 \
+ --woq_scheme asym \
+ --woq_group_size 128 \
+ --accuracy \
+ --tasks "lambada_openai" \
+ --double_quant_type "BNB_NF4"
+```
+
+### LLAMA2-7b/13b/70b
+>Note: LLAMA requires IPEX requirements >= 2.1 to get better accuracy.
+#### Quantization
+
+```bash
+# "--double_quant_type BNB_NF4" is used to enable double quant algorithms
+# "--woq_algo GPTQ" is used to enable GPTQ algorithms
+python run_clm_no_trainer.py \
+ --model meta-llama/Llama-2-7b-hf \
+ --dataset NeelNanda/pile-10k \
+ --quantize \
+ --woq_algo GPTQ \
+ --woq_bits 4 \
+ --woq_scheme asym \
+ --woq_group_size 128 \
+ --gptq_max_seq_length 2048 \
+ --gptq_use_max_length \
+ --accuracy \
+ --tasks "lambada_openai" \
+ --double_quant_type "BNB_NF4"
+
+# "--woq_algo RTN" is used to enable RTN algorithms
+python run_clm_no_trainer.py \
+ --model meta-llama/Llama-2-7b-hf \
+ --dataset NeelNanda/pile-10k \
+ --quantize \
+ --woq_algo RTN \
+ --woq_bits 4 \
+ --woq_scheme asym \
+ --woq_group_size 128 \
+ --accuracy \
+ --tasks "lambada_openai" \
+ --double_quant_type "BNB_NF4"
+```
+
+
+[1]. Elias, Frantar, et al. "GPTQ: Accurate Post-training Compression for Generative Pretrained Transformers." arXiv preprint arXiv:2210.17323 (2023).
+[2]. Lin, Ji, et al. "AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration." arXiv preprint arXiv:2306.00978 (2023).
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt
new file mode 100644
index 00000000000..9688a4f6cb3
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt
@@ -0,0 +1,15 @@
+accelerate
+protobuf
+sentencepiece != 0.1.92
+datasets >= 1.1.3
+torch >= 1.10
+transformers
+pytest
+wandb
+einops
+neural-compressor
+intel-extension-for-transformers
+lm_eval==0.4.2
+peft
+auto_round
+intel_extension_for_pytorch
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh
new file mode 100644
index 00000000000..9e1d766128e
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ batch_size=16
+ tuned_checkpoint=saved_results
+ task=lambada_openai
+ echo ${max_eval_samples}
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --config=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ extra_cmd=''
+
+ if [[ ${mode} == "accuracy" ]]; then
+ mode_cmd=" --accuracy "
+ elif [[ ${mode} == "performance" ]]; then
+ mode_cmd=" --performance --iters "${iters}
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+ echo $extra_cmd
+
+ if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
+ model_name_or_path="facebook/opt-125m"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_bnb" ]; then
+ model_name_or_path="facebook/opt-125m"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
+ elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_ggml" ]; then
+ model_name_or_path="facebook/opt-125m"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.1 --gptq_actorder"
+ extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
+ elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then
+ model_name_or_path="meta-llama/Llama-2-7b-hf"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ elif [ "${topology}" = "llama2_7b_gptq_int4_dq_bnb" ]; then
+ model_name_or_path="meta-llama/Llama-2-7b-hf"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
+ elif [ "${topology}" = "llama2_7b_gptq_int4_dq_ggml" ]; then
+ model_name_or_path="meta-llama/Llama-2-7b-hf"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
+ elif [ "${topology}" = "gpt_j_woq_rtn_int4" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
+ elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_bnb" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"\
+ extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
+ extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
+ elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_ggml" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"\
+ extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
+ extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
+ elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_bnb" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
+ elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_ggml" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
+ fi
+
+ python -u run_clm_no_trainer.py \
+ --model ${model_name_or_path} \
+ --output_dir ${tuned_checkpoint} \
+ --task ${task} \
+ --batch_size ${batch_size} \
+ ${extra_cmd} ${mode_cmd}
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_clm_no_trainer.py
new file mode 100644
index 00000000000..abd8228354e
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_clm_no_trainer.py
@@ -0,0 +1,379 @@
+import argparse
+import os
+import sys
+
+sys.path.append('./')
+import time
+import json
+import re
+import torch
+from datasets import load_dataset
+import datasets
+from torch.nn.functional import pad
+from torch.utils.data import DataLoader
+from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+ "--model", nargs="?", default="EleutherAI/gpt-j-6b"
+)
+parser.add_argument(
+ "--trust_remote_code", default=True,
+ help="Transformers parameter: use the external repo")
+parser.add_argument(
+ "--revision", default=None,
+ help="Transformers parameter: set the model hub commit number")
+parser.add_argument("--dataset", nargs="?", default="NeelNanda/pile-10k", const="NeelNanda/pile-10k")
+parser.add_argument("--output_dir", nargs="?", default="./saved_results")
+parser.add_argument("--quantize", action="store_true")
+parser.add_argument(
+ "--int8_bf16_mixed",
+ action="store_true",
+ help="By default it is int8-fp32 mixed, to enable int8 mixed amp bf16 (work on platforms like SPR)",
+)
+parser.add_argument(
+ '--seed',
+ type=int, default=42, help='Seed for sampling the calibration data.'
+)
+parser.add_argument("--int8", action="store_true")
+parser.add_argument("--accuracy", action="store_true")
+parser.add_argument("--performance", action="store_true")
+parser.add_argument("--iters", default=100, type=int,
+ help="For accuracy measurement only.")
+parser.add_argument("--batch_size", default=1, type=int,
+ help="For accuracy measurement only.")
+parser.add_argument("--save_accuracy_path", default=None,
+ help="Save accuracy results path.")
+parser.add_argument("--pad_max_length", default=512, type=int,
+ help="Pad input ids to max length.")
+parser.add_argument("--calib_iters", default=512, type=int,
+ help="calibration iters.")
+parser.add_argument("--tasks", default="lambada_openai,hellaswag,winogrande,piqa,wikitext",
+ type=str, help="tasks for accuracy validation")
+parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
+# ============WeightOnly configs===============
+parser.add_argument("--woq_algo", default="RTN", choices=['RTN', 'AWQ', 'TEQ', 'GPTQ'],
+ help="Weight-only parameter.")
+parser.add_argument("--woq_bits", type=int, default=8)
+parser.add_argument("--woq_dtype", type=str, default="int")
+parser.add_argument("--woq_group_size", type=int, default=-1)
+parser.add_argument("--woq_group_dim", type=int, default=1)
+parser.add_argument("--woq_scheme", default="sym")
+parser.add_argument("--woq_use_mse_search", action="store_true")
+parser.add_argument("--woq_use_full_range", action="store_true")
+# =============GPTQ configs====================
+parser.add_argument("--gptq_actorder", action="store_true",
+ help="Whether to apply the activation order GPTQ heuristic.")
+parser.add_argument('--gptq_percdamp', type=float, default=.01,
+ help='Percent of the average Hessian diagonal to use for dampening.')
+parser.add_argument('--gptq_block_size', type=int, default=128, help='Block size. sub weight matrix size to run GPTQ.')
+parser.add_argument('--gptq_static_groups', action="store_true",
+ help="Whether to calculate group wise quantization parameters in advance. "
+ "This option mitigate actorder's extra computational requirements.")
+parser.add_argument('--gptq_nsamples', type=int, default=128, help='Number of calibration data samples.')
+parser.add_argument('--gptq_use_max_length', action="store_true",
+ help='Set all sequence length to be same length of args.gptq_max_seq_length')
+parser.add_argument('--gptq_max_seq_length', type=int, default=2048,
+ help='Calibration dataset sequence max length, '
+ 'this should align with your model config, '
+ 'and your dataset builder args: args.pad_max_length')
+
+# =============DoubleQuant configs====================
+parser.add_argument("--double_quant_type",
+ type=str,
+ default=None,
+ choices=['GGML_TYPE_Q4_K', 'BNB_NF4'],
+ help="DoubleQuant parameter")
+parser.add_argument("--double_quant_dtype",
+ type=str,
+ default="fp32",
+ help="Data type for double quant scale.")
+parser.add_argument("--double_quant_bits",
+ type=int,
+ default=8,
+ help="Number of bits used to represent double_quant scale.")
+parser.add_argument("--double_quant_use_sym",
+ type=bool,
+ default=True,
+ help="Indicates whether double quant scale are symmetric.")
+parser.add_argument("--double_quant_group_size",
+ type=int,
+ default=256,
+ help="Size of double quant groups.")
+# =======================================
+
+args = parser.parse_args()
+calib_size = 1
+
+
+class Evaluator:
+ def __init__(self, dataset, tokenizer, batch_size=8, pad_val=1, pad_max=196, is_calib=False):
+ self.dataset = dataset
+ self.tokenizer = tokenizer
+ self.batch_size = batch_size
+ self.pad_val = pad_val
+ self.pad_max = pad_max
+ self.is_calib = is_calib
+
+ # tokenize the dataset
+ self.dataset = self.dataset.map(self.tokenize_function, batched=True)
+ self.dataset.set_format(type="torch", columns=["input_ids"])
+
+ @torch.no_grad()
+ def tokenize_function(self, examples):
+ if args.woq_algo in ['TEQ']:
+ if self.tokenizer.pad_token is None:
+ self.tokenizer.pad_token = self.tokenizer.eos_token
+ example = self.tokenizer(examples["text"], padding="max_length", max_length=self.pad_max)
+ else:
+ example = self.tokenizer(examples["text"])
+ return example
+
+ @torch.no_grad()
+ def collate_batch(self, batch):
+
+ input_ids_padded = []
+ last_ind = []
+
+ for text in batch:
+ input_ids = text["input_ids"]
+ pad_len = self.pad_max - input_ids.shape[0]
+ last_ind.append(input_ids.shape[0] - 1)
+ if self.is_calib:
+ if args.woq_algo != 'GPTQ':
+ input_ids = input_ids[:self.pad_max] if len(input_ids) > self.pad_max else input_ids
+ else:
+ input_ids = pad(input_ids, (0, pad_len), value=self.pad_val)
+ input_ids_padded.append(input_ids)
+
+ return (torch.vstack(input_ids_padded), torch.tensor(last_ind))
+
+ @torch.no_grad()
+ def evaluate(self, model):
+ model.eval()
+ # The task is to predict the last word of the input.
+ total, hit = 0, 0
+ latency = 0
+ test_dataloader = DataLoader(
+ self.dataset,
+ batch_size=self.batch_size,
+ shuffle=False,
+ collate_fn=self.collate_batch,
+ )
+ for i, (input_ids, last_ind) in enumerate(test_dataloader):
+ label = input_ids[torch.arange(len(last_ind)), last_ind]
+ input_ids[torch.arange(len(last_ind)), last_ind] = self.pad_val
+ pad_len = self.pad_max - last_ind - 1
+
+ start = time.time()
+ outputs = model(input_ids)
+ latency += time.time() - start
+
+ last_token_logits = outputs[0][torch.arange(len(last_ind)), -2 - pad_len, :]
+ pred = last_token_logits.argmax(dim=-1)
+ total += label.size(0)
+ hit += (pred == label).sum().item()
+ if (i + 1) % 50 == 0:
+ print(hit / total)
+ print("Processed minibatch:", i)
+
+ acc = hit / total
+ print("Accuracy: ", acc)
+ print("Latency: ", latency)
+ return acc
+
+
+def get_user_model():
+ torchscript = False
+ if args.woq_algo in ['AWQ', 'TEQ']:
+ torchscript = True
+ user_model = AutoModelForCausalLM.from_pretrained(
+ args.model,
+ torchscript=torchscript, # torchscript will force `return_dict=False` to avoid jit errors
+ trust_remote_code=args.trust_remote_code,
+ revision=args.revision,
+ )
+ tokenizer = AutoTokenizer.from_pretrained(args.model)
+ user_model = user_model.float()
+
+ # Set model's seq_len when GPTQ calibration is enabled.
+ if args.woq_algo == 'GPTQ':
+ user_model.seqlen = args.gptq_max_seq_length
+
+ if args.peft_model_id is not None:
+ from peft import PeftModel
+ user_model = PeftModel.from_pretrained(user_model, args.peft_model_id)
+
+ # to channels last
+ user_model = user_model.to(memory_format=torch.channels_last)
+ user_model.eval()
+ return user_model, tokenizer
+
+
+if args.quantize:
+ # dataset
+ user_model, tokenizer = get_user_model()
+ calib_dataset = load_dataset(args.dataset, split="train")
+ # calib_dataset = datasets.load_from_disk('/your/local/dataset/pile-10k/') # use this if trouble with connecting to HF
+ calib_dataset = calib_dataset.shuffle(seed=args.seed)
+ calib_evaluator = Evaluator(calib_dataset, tokenizer, args.batch_size, pad_max=args.pad_max_length, is_calib=True)
+ calib_dataloader = DataLoader(
+ calib_evaluator.dataset,
+ batch_size=calib_size,
+ shuffle=False,
+ collate_fn=calib_evaluator.collate_batch,
+ )
+
+ # 3.x api
+ from neural_compressor.torch.quantization import RTNConfig, GPTQConfig, prepare, convert, quantize
+ from neural_compressor.torch.utils import get_double_quant_config_dict
+ weight_sym = True if args.woq_scheme == "sym" else False
+ if args.double_quant_type is not None:
+ double_quant_config_dict = get_double_quant_config_dict(args.double_quant_type)
+
+ if args.woq_algo == "RTN":
+ if args.double_quant_type is not None:
+ double_quant_config_dict.update(
+ {
+ # TODO: add group_dim into double quant config?
+ "use_full_range": args.woq_use_full_range,
+ "use_mse_search": args.woq_use_mse_search,
+ }
+ )
+ quant_config = RTNConfig.from_dict(double_quant_config_dict)
+ else:
+ quant_config = RTNConfig(
+ dtype=args.woq_dtype,
+ bits=args.woq_bits,
+ use_sym=weight_sym,
+ group_size=args.woq_group_size,
+ group_dim=args.woq_group_dim,
+ use_full_range=args.woq_use_full_range,
+ use_mse_search=args.woq_use_mse_search,
+ use_double_quant=False,
+ double_quant_bits=args.double_quant_bits,
+ double_quant_dtype=args.double_quant_dtype,
+ double_quant_use_sym=args.double_quant_use_sym,
+ double_quant_group_size=args.double_quant_group_size,
+ )
+ quant_config.set_local("lm_head", RTNConfig(dtype="fp32"))
+ user_model = prepare(model=user_model, quant_config=quant_config)
+ user_model = convert(model=user_model)
+ elif args.woq_algo == "GPTQ":
+ from utils import DataloaderPreprocessor
+ dataloaderPreprocessor = DataloaderPreprocessor(
+ dataloader_original=calib_dataloader,
+ use_max_length=args.gptq_use_max_length,
+ max_seq_length=args.gptq_max_seq_length,
+ )
+ dataloader_for_calibration = dataloaderPreprocessor.get_prepared_dataloader()
+ from neural_compressor.torch.algorithms.weight_only.utility import move_input_to_device
+ from tqdm import tqdm
+ def run_fn_for_gptq(model, dataloader_for_calibration, *args):
+ for batch in tqdm(dataloader_for_calibration):
+ batch = move_input_to_device(batch, device=None)
+ if isinstance(batch, tuple) or isinstance(batch, list):
+ model(batch[0])
+ elif isinstance(batch, dict):
+ model(**batch)
+ else:
+ model(batch)
+ return
+ if args.double_quant_type is not None:
+ double_quant_config_dict.update(
+ {
+ "use_mse_search": args.woq_use_mse_search,
+ "percdamp": args.gptq_percdamp,
+ "act_order": args.gptq_actorder,
+ "block_size": args.gptq_block_size,
+ "static_groups": args.gptq_static_groups,
+ }
+ )
+ quant_config = GPTQConfig.from_dict(double_quant_config_dict)
+ else:
+ quant_config = GPTQConfig(
+ dtype=args.woq_dtype,
+ bits=args.woq_bits,
+ use_sym=weight_sym,
+ group_size=args.woq_group_size,
+ use_mse_search=args.woq_use_mse_search,
+ percdamp=args.gptq_percdamp,
+ act_order=args.gptq_actorder,
+ block_size=args.gptq_block_size,
+ static_groups=args.gptq_static_groups,
+ use_double_quant=False,
+ double_quant_bits=args.double_quant_bits,
+ double_quant_dtype=args.double_quant_dtype,
+ double_quant_use_sym=args.double_quant_use_sym,
+ double_quant_group_size=args.double_quant_group_size,
+ )
+ quant_config.set_local("lm_head", GPTQConfig(dtype="fp32"))
+ user_model = prepare(model=user_model, quant_config=quant_config)
+ run_fn_for_gptq(user_model, dataloader_for_calibration)
+ user_model = convert(user_model)
+
+ user_model.save(args.output_dir)
+
+
+# TODO: we need run_benchmark.sh for loading and remove --accuracy in run_quant.sh, currently run_quant.sh will get fp32 result
+
+if args.int8 or args.int8_bf16_mixed:
+ print("load int8 model")
+
+ from neural_compressor.torch.quantization import load
+ user_model, _ = get_user_model()
+ tokenizer = AutoTokenizer.from_pretrained(args.model)
+ config = AutoConfig.from_pretrained(args.model)
+ user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)), user_model)
+ setattr(user_model, "config", config)
+else:
+ user_model, tokenizer = get_user_model()
+
+
+if args.accuracy:
+ user_model.eval()
+ from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+ eval_args = LMEvalParser(
+ model="hf",
+ user_model=user_model,
+ tokenizer=tokenizer,
+ batch_size=args.batch_size,
+ tasks=args.tasks,
+ device="cpu",
+ )
+ results = evaluate(eval_args)
+ for task_name in args.tasks.split(","):
+ if task_name == "wikitext":
+ acc = results["results"][task_name]["word_perplexity,none"]
+ else:
+ acc = results["results"][task_name]["acc,none"]
+ print("Accuracy: %.5f" % acc)
+ print('Batch size = %d' % args.batch_size)
+
+if args.performance:
+ user_model.eval()
+ from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+ import time
+
+ samples = args.iters * args.batch_size
+ eval_args = LMEvalParser(
+ model="hf",
+ user_model=user_model,
+ tokenizer=tokenizer,
+ batch_size=args.batch_size,
+ tasks=args.tasks,
+ limit=samples,
+ device="cpu",
+ )
+ start = time.time()
+ results = evaluate(eval_args)
+ end = time.time()
+ for task_name in args.tasks.split(","):
+ if task_name == "wikitext":
+ acc = results["results"][task_name]["word_perplexity,none"]
+ else:
+ acc = results["results"][task_name]["acc,none"]
+ print("Accuracy: %.5f" % acc)
+ print('Throughput: %.3f samples/sec' % (samples / (end - start)))
+ print('Latency: %.3f ms' % ((end - start) * 1000 / samples))
+ print('Batch size = %d' % args.batch_size)
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
new file mode 100644
index 00000000000..a860712b697
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ extra_cmd=''
+ batch_size=8
+ DATASET_NAME="NeelNanda/pile-10k"
+ tuned_checkpoint="saved_results"
+
+ if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
+ model_name_or_path="facebook/opt-125m"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_bnb" ]; then
+ model_name_or_path="facebook/opt-125m"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
+ elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_ggml" ]; then
+ model_name_or_path="facebook/opt-125m"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.8 --gptq_actorder"
+ extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
+ elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then
+ model_name_or_path="meta-llama/Llama-2-7b-hf"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ elif [ "${topology}" = "llama2_7b_gptq_int4_dq_bnb" ]; then
+ model_name_or_path="meta-llama/Llama-2-7b-hf"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
+ elif [ "${topology}" = "llama2_7b_gptq_int4_dq_ggml" ]; then
+ model_name_or_path="meta-llama/Llama-2-7b-hf"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
+ elif [ "${topology}" = "gpt_j_woq_rtn_int4" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
+ elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_bnb" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
+ extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
+ elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_ggml" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
+ extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
+ elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_bnb" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
+ elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_ggml" ]; then
+ model_name_or_path="EleutherAI/gpt-j-6b"
+ extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
+ extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
+ fi
+
+ python -u run_clm_no_trainer.py \
+ --model ${model_name_or_path} \
+ --dataset ${DATASET_NAME} \
+ --quantize \
+ --output_dir ${tuned_checkpoint} \
+ --tasks "lambada_openai" \
+ --batch_size ${batch_size} \
+ ${extra_cmd}
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/utils.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/utils.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/utils.py
rename to examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/utils.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/README.md b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/README.md
new file mode 100644
index 00000000000..b035249baac
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/README.md
@@ -0,0 +1,57 @@
+Step-by-Step
+============
+This document describes the step-by-step instructions for reproducing Huggingface models with IPEX backend tuning results with Intel® Neural Compressor.
+> Note: IPEX version >= 1.10
+
+# Prerequisite
+
+## 1. Environment
+Recommend python 3.6 or higher version.
+```shell
+pip install -r requirements.txt
+pip install torch
+pip install intel_extension_for_pytorch
+```
+
+# Quantization
+
+## 1. Quantization with CPU
+If IPEX version is equal or higher than 1.12, please install transformers 4.19.0.
+```shell
+python run_qa.py \
+ --model_name_or_path bert-large-uncased-whole-word-masking-finetuned-squad \
+ --dataset_name squad \
+ --do_eval \
+ --max_seq_length 384 \
+ --doc_stride 128 \
+ --no_cuda \
+ --tune \
+ --output_dir ./savedresult
+```
+
+## 2. Quantization with XPU
+### 2.1 Environment Setting
+Please build an IPEX docker container according to the [official guide](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu&version=v2.1.30%2bxpu&os=linux%2fwsl2&package=docker).
+
+You can run a simple sanity test to double confirm if the correct version is installed, and if the software stack can get correct hardware information onboard your system. The command should return PyTorch and IPEX versions installed, as well as GPU card(s) information detected.
+```bash
+source {DPCPPROOT}/env/vars.sh
+source {MKLROOT}/env/vars.sh
+source {CCLROOT}/env/vars.sh
+source {MPIROOT}/env/vars.sh
+python -c "import torch; import intel_extension_for_pytorch as ipex; print(torch.__version__); print(ipex.__version__); [print(f'[{i}]: {torch.xpu.get_device_properties(i)}') for i in range(torch.xpu.device_count())];"
+```
+Please also refer to this [tutorial](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu&version=v2.1.30%2bxpu&os=linux%2fwsl2&package=conda) to check system requirements and install dependencies.
+
+#### 2.2 Quantization Command
+```shell
+python run_qa.py \
+ --model_name_or_path bert-large-uncased-whole-word-masking-finetuned-squad \
+ --dataset_name squad \
+ --do_eval \
+ --max_seq_length 384 \
+ --doc_stride 128 \
+ --xpu \
+ --tune \
+ --output_dir ./savedresult
+```
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/requirements.txt
new file mode 100644
index 00000000000..2bb000d2deb
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/requirements.txt
@@ -0,0 +1,5 @@
+accelerate
+datasets>=1.8.0
+transformers>=4.34.1
+tensorboard
+tqdm
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_benchmark.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_benchmark.sh
new file mode 100644
index 00000000000..2f646afacdb
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_benchmark.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ tuned_checkpoint=saved_results
+ tokenizer_name=bert-large-uncased-whole-word-masking-finetuned-squad
+ iters=100
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --config=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ --xpu=*)
+ xpu=$(echo ${var} |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ if [[ ${mode} == "accuracy" ]]; then
+ mode_cmd=" --accuracy_only"
+ elif [[ ${mode} == "performance" ]]; then
+ mode_cmd=" --benchmark --iters "${iters}
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+
+ extra_cmd=""
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+ if [[ ${xpu} == "true" ]]; then
+ extra_cmd=$extra_cmd" --xpu"
+ fi
+ echo $extra_cmd
+ if [[ "${topology}" == "bert_large_ipex" ]]; then
+ model_name_or_path="bert-large-uncased-whole-word-masking-finetuned-squad"
+ python run_qa.py \
+ --model_name_or_path $model_name_or_path \
+ --dataset_name squad \
+ --do_eval \
+ --max_seq_length 384 \
+ --no_cuda \
+ --output_dir $tuned_checkpoint \
+ --per_gpu_eval_batch_size $batch_size \
+ $mode_cmd \
+ ${extra_cmd}
+ fi
+ if [[ "${topology}" == "distilbert_base_ipex" ]]; then
+ model_name_or_path="distilbert-base-uncased-distilled-squad"
+ python run_qa.py \
+ --model_name_or_path $model_name_or_path \
+ --dataset_name squad \
+ --do_eval \
+ --max_seq_length 384 \
+ --no_cuda \
+ --output_dir $tuned_checkpoint \
+ --per_gpu_eval_batch_size $batch_size \
+ $mode_cmd \
+ ${extra_cmd}
+ fi
+}
+
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_qa.py b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_qa.py
new file mode 100644
index 00000000000..079c0749994
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_qa.py
@@ -0,0 +1,738 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2020 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the library models for question answering using a slightly adapted version of the 🤗 Trainer.
+"""
+# You can also adapt this script on your own question answering task. Pointers for this are left as comments.
+
+import datasets
+import logging
+import os
+import sys
+import timeit
+import transformers
+from dataclasses import dataclass, field
+from datasets import load_dataset, load_metric
+from trainer_qa import QuestionAnsweringTrainer
+from transformers import (
+ AutoConfig,
+ AutoModelForQuestionAnswering,
+ AutoTokenizer,
+ DataCollatorWithPadding,
+ EvalPrediction,
+ HfArgumentParser,
+ PreTrainedTokenizerFast,
+ TrainingArguments,
+ default_data_collator,
+ set_seed,
+)
+from transformers.trainer_utils import get_last_checkpoint
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+from typing import Optional
+from utils_qa import postprocess_qa_predictions
+from neural_compressor.utils.utility import LazyImport
+try:
+ import intel_extension_for_pytorch as ipex
+ from intel_extension_for_pytorch.quantization import prepare, convert
+ from torch.ao.quantization import MinMaxObserver, PerChannelMinMaxObserver, QConfig
+except:
+ assert False, "transformers 4.19.0 requests IPEX version higher or equal to 1.12"
+torch = LazyImport("torch")
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.12.0")
+
+require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+os.environ["WANDB_DISABLED"] = "true"
+
+
+@dataclass
+class ModelArguments:
+ """
+ Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+ """
+
+ model_name_or_path: str = field(
+ metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+ )
+ config_name: Optional[str] = field(
+ default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+ )
+ tokenizer_name: Optional[str] = field(
+ default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+ )
+ cache_dir: Optional[str] = field(
+ default=None,
+ metadata={"help": "Path to directory to store the pretrained models downloaded from huggingface.co"},
+ )
+ model_revision: str = field(
+ default="main",
+ metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+ )
+ use_auth_token: bool = field(
+ default=False,
+ metadata={
+ "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+ "with private models)."
+ },
+ )
+ tune: bool = field(
+ default=False,
+ metadata={"help": "Whether or not to apply quantization."},
+ )
+ int8: bool = field(
+ default=False, metadata={"help": "use int8 model to get accuracy or benchmark"}
+ )
+ benchmark: bool = field(
+ default=False, metadata={"help": "get benchmark instead of accuracy"}
+ )
+ accuracy_only: bool = field(
+ default=False, metadata={"help": "get accuracy"}
+ )
+ iters: int = field(
+ default=100,
+ metadata={
+ "help": "The inference iterations to run for benchmark."
+ },
+ )
+ xpu: bool = field(
+ default=False, metadata={"help": "whether to use xpu"}
+ )
+ calib_iters: int = field(
+ default=512,
+ metadata={
+ "help": "The inference iterations to calibration."
+ },
+ )
+
+
+@dataclass
+class DataTrainingArguments:
+ """
+ Arguments pertaining to what data we are going to input our model for training and eval.
+ """
+
+ dataset_name: Optional[str] = field(
+ default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+ )
+ dataset_config_name: Optional[str] = field(
+ default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+ )
+ train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
+ validation_file: Optional[str] = field(
+ default=None,
+ metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
+ )
+ test_file: Optional[str] = field(
+ default=None,
+ metadata={"help": "An optional input test data file to evaluate the perplexity on (a text file)."},
+ )
+ overwrite_cache: bool = field(
+ default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+ )
+ preprocessing_num_workers: Optional[int] = field(
+ default=None,
+ metadata={"help": "The number of processes to use for the preprocessing."},
+ )
+ max_seq_length: int = field(
+ default=384,
+ metadata={
+ "help": "The maximum total input sequence length after tokenization. Sequences longer "
+ "than this will be truncated, sequences shorter will be padded."
+ },
+ )
+ pad_to_max_length: bool = field(
+ default=True,
+ metadata={
+ "help": "Whether to pad all samples to `max_seq_length`. "
+ "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
+ "be faster on GPU but will be slower on TPU)."
+ },
+ )
+ max_train_samples: Optional[int] = field(
+ default=None,
+ metadata={
+ "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+ "value if set."
+ },
+ )
+ max_eval_samples: Optional[int] = field(
+ default=None,
+ metadata={
+ "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+ "value if set."
+ },
+ )
+ max_predict_samples: Optional[int] = field(
+ default=None,
+ metadata={
+ "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
+ "value if set."
+ },
+ )
+ version_2_with_negative: bool = field(
+ default=False, metadata={"help": "If true, some of the examples do not have an answer."}
+ )
+ null_score_diff_threshold: float = field(
+ default=0.0,
+ metadata={
+ "help": "The threshold used to select the null answer: if the best answer has a score that is less than "
+ "the score of the null answer minus this threshold, the null answer is selected for this example. "
+ "Only useful when `version_2_with_negative=True`."
+ },
+ )
+ doc_stride: int = field(
+ default=128,
+ metadata={"help": "When splitting up a long document into chunks, how much stride to take between chunks."},
+ )
+ n_best_size: int = field(
+ default=20,
+ metadata={"help": "The total number of n-best predictions to generate when looking for an answer."},
+ )
+ max_answer_length: int = field(
+ default=30,
+ metadata={
+ "help": "The maximum length of an answer that can be generated. This is needed because the start "
+ "and end predictions are not conditioned on one another."
+ },
+ )
+
+ def __post_init__(self):
+ if (
+ self.dataset_name is None
+ and self.train_file is None
+ and self.validation_file is None
+ and self.test_file is None
+ ):
+ raise ValueError("Need either a dataset name or a training/validation file/test_file.")
+ else:
+ if self.train_file is not None:
+ extension = self.train_file.split(".")[-1]
+ assert extension in ["csv", "json"], "`train_file` should be a csv or a json file."
+ if self.validation_file is not None:
+ extension = self.validation_file.split(".")[-1]
+ assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
+ if self.test_file is not None:
+ extension = self.test_file.split(".")[-1]
+ assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
+
+def main():
+ # See all possible arguments in src/transformers/training_args.py
+ # or by passing the --help flag to this script.
+ # We now keep distinct sets of args, for a cleaner separation of concerns.
+
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+ # If we pass only one argument to the script and it's the path to a json file,
+ # let's parse it to get our arguments.
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+ else:
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+ # Setup logging
+ logging.basicConfig(
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+ datefmt="%m/%d/%Y %H:%M:%S",
+ handlers=[logging.StreamHandler(sys.stdout)],
+ )
+
+ log_level = training_args.get_process_log_level()
+ logger.setLevel(log_level)
+ datasets.utils.logging.set_verbosity(log_level)
+ transformers.utils.logging.set_verbosity(log_level)
+ transformers.utils.logging.enable_default_handler()
+ transformers.utils.logging.enable_explicit_format()
+
+ # Log on each process the small summary:
+ logger.warning(
+ f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+ + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+ )
+ logger.info(f"Training/evaluation parameters {training_args}")
+
+ # Detecting last checkpoint.
+ last_checkpoint = None
+ if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
+ last_checkpoint = get_last_checkpoint(training_args.output_dir)
+ if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
+ raise ValueError(
+ f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+ "Use --overwrite_output_dir to overcome."
+ )
+ elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
+ logger.info(
+ f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
+ "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
+ )
+
+ # Set seed before initializing model.
+ set_seed(training_args.seed)
+
+ # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
+ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
+ # (the dataset will be downloaded automatically from the datasets Hub).
+ #
+ # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
+ # 'text' is found. You can easily tweak this behavior (see below).
+ #
+ # In distributed training, the load_dataset function guarantee that only one local process can concurrently
+ # download the dataset.
+ if data_args.dataset_name is not None:
+ # Downloading and loading a dataset from the hub.
+ raw_datasets = load_dataset(
+ data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir
+ )
+ else:
+ data_files = {}
+ if data_args.train_file is not None:
+ data_files["train"] = data_args.train_file
+ extension = data_args.train_file.split(".")[-1]
+
+ if data_args.validation_file is not None:
+ data_files["validation"] = data_args.validation_file
+ extension = data_args.validation_file.split(".")[-1]
+ if data_args.test_file is not None:
+ data_files["test"] = data_args.test_file
+ extension = data_args.test_file.split(".")[-1]
+ raw_datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir)
+ # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
+ # https://huggingface.co/docs/datasets/loading_datasets.html.
+
+ # Load pretrained model and tokenizer
+ #
+ # Distributed training:
+ # The .from_pretrained methods guarantee that only one local process can concurrently
+ # download model & vocab.
+ config = AutoConfig.from_pretrained(
+ model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+ cache_dir=model_args.cache_dir,
+ revision=model_args.model_revision,
+ use_auth_token=True if model_args.use_auth_token else None,
+ )
+ tokenizer = AutoTokenizer.from_pretrained(
+ model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+ cache_dir=model_args.cache_dir,
+ use_fast=True,
+ revision=model_args.model_revision,
+ use_auth_token=True if model_args.use_auth_token else None,
+ )
+
+ model = AutoModelForQuestionAnswering.from_pretrained(
+ model_args.model_name_or_path,
+ from_tf=bool(".ckpt" in model_args.model_name_or_path),
+ config=config,
+ cache_dir=model_args.cache_dir,
+ revision=model_args.model_revision,
+ use_auth_token=True if model_args.use_auth_token else None,
+ )
+
+ # Tokenizer check: this script requires a fast tokenizer.
+ if not isinstance(tokenizer, PreTrainedTokenizerFast):
+ raise ValueError(
+ "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
+ "at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this "
+ "requirement"
+ )
+
+ # Preprocessing the datasets.
+ # Preprocessing is slightly different for training and evaluation.
+ if training_args.do_train:
+ column_names = raw_datasets["train"].column_names
+ elif training_args.do_eval:
+ column_names = raw_datasets["validation"].column_names
+ else:
+ column_names = raw_datasets["test"].column_names
+ question_column_name = "question" if "question" in column_names else column_names[0]
+ context_column_name = "context" if "context" in column_names else column_names[1]
+ answer_column_name = "answers" if "answers" in column_names else column_names[2]
+
+ # Padding side determines if we do (question|context) or (context|question).
+ pad_on_right = tokenizer.padding_side == "right"
+
+ if data_args.max_seq_length > tokenizer.model_max_length:
+ logger.warning(
+ f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+ f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
+ )
+ max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
+
+ # Training preprocessing
+ def prepare_train_features(examples):
+ # Some of the questions have lots of whitespace on the left, which is not useful and will make the
+ # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
+ # left whitespace
+ examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
+
+ # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
+ # in one example possible giving several features when a context is long, each of those features having a
+ # context that overlaps a bit the context of the previous feature.
+ tokenized_examples = tokenizer(
+ examples[question_column_name if pad_on_right else context_column_name],
+ examples[context_column_name if pad_on_right else question_column_name],
+ truncation="only_second" if pad_on_right else "only_first",
+ max_length=max_seq_length,
+ stride=data_args.doc_stride,
+ return_overflowing_tokens=True,
+ return_offsets_mapping=True,
+ padding="max_length" if data_args.pad_to_max_length else False,
+ )
+
+ # Since one example might give us several features if it has a long context, we need a map from a feature to
+ # its corresponding example. This key gives us just that.
+ sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+ # The offset mappings will give us a map from token to character position in the original context. This will
+ # help us compute the start_positions and end_positions.
+ offset_mapping = tokenized_examples.pop("offset_mapping")
+
+ # Let's label those examples!
+ tokenized_examples["start_positions"] = []
+ tokenized_examples["end_positions"] = []
+
+ for i, offsets in enumerate(offset_mapping):
+ # We will label impossible answers with the index of the CLS token.
+ input_ids = tokenized_examples["input_ids"][i]
+ cls_index = input_ids.index(tokenizer.cls_token_id)
+
+ # Grab the sequence corresponding to that example (to know what is the context and what is the question).
+ sequence_ids = tokenized_examples.sequence_ids(i)
+
+ # One example can give several spans, this is the index of the example containing this span of text.
+ sample_index = sample_mapping[i]
+ answers = examples[answer_column_name][sample_index]
+ # If no answers are given, set the cls_index as answer.
+ if len(answers["answer_start"]) == 0:
+ tokenized_examples["start_positions"].append(cls_index)
+ tokenized_examples["end_positions"].append(cls_index)
+ else:
+ # Start/end character index of the answer in the text.
+ start_char = answers["answer_start"][0]
+ end_char = start_char + len(answers["text"][0])
+
+ # Start token index of the current span in the text.
+ token_start_index = 0
+ while sequence_ids[token_start_index] != (1 if pad_on_right else 0):
+ token_start_index += 1
+
+ # End token index of the current span in the text.
+ token_end_index = len(input_ids) - 1
+ while sequence_ids[token_end_index] != (1 if pad_on_right else 0):
+ token_end_index -= 1
+
+ # Detect if the answer is out of the span (in which case this feature is labeled with the CLS index).
+ if not (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char):
+ tokenized_examples["start_positions"].append(cls_index)
+ tokenized_examples["end_positions"].append(cls_index)
+ else:
+ # Otherwise move the token_start_index and token_end_index to the two ends of the answer.
+ # Note: we could go after the last offset if the answer is the last word (edge case).
+ while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
+ token_start_index += 1
+ tokenized_examples["start_positions"].append(token_start_index - 1)
+ while offsets[token_end_index][1] >= end_char:
+ token_end_index -= 1
+ tokenized_examples["end_positions"].append(token_end_index + 1)
+
+ return tokenized_examples
+
+ if training_args.do_train:
+ if "train" not in raw_datasets:
+ raise ValueError("--do_train requires a train dataset")
+ train_dataset = raw_datasets["train"]
+ if data_args.max_train_samples is not None:
+ # We will select sample from whole data if argument is specified
+ max_train_samples = min(len(train_dataset), data_args.max_train_samples)
+ train_dataset = train_dataset.select(range(max_train_samples))
+ # Create train feature from dataset
+ with training_args.main_process_first(desc="train dataset map pre-processing"):
+ train_dataset = train_dataset.map(
+ prepare_train_features,
+ batched=True,
+ num_proc=data_args.preprocessing_num_workers,
+ remove_columns=column_names,
+ load_from_cache_file=not data_args.overwrite_cache,
+ desc="Running tokenizer on train dataset",
+ )
+ if data_args.max_train_samples is not None:
+ # Number of samples might increase during Feature Creation, We select only specified max samples
+ max_train_samples = min(len(train_dataset), data_args.max_train_samples)
+ train_dataset = train_dataset.select(range(max_train_samples))
+
+ # Validation preprocessing
+ def prepare_validation_features(examples):
+ # Some of the questions have lots of whitespace on the left, which is not useful and will make the
+ # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
+ # left whitespace
+ examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
+
+ # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
+ # in one example possible giving several features when a context is long, each of those features having a
+ # context that overlaps a bit the context of the previous feature.
+ tokenized_examples = tokenizer(
+ examples[question_column_name if pad_on_right else context_column_name],
+ examples[context_column_name if pad_on_right else question_column_name],
+ truncation="only_second" if pad_on_right else "only_first",
+ max_length=max_seq_length,
+ stride=data_args.doc_stride,
+ return_overflowing_tokens=True,
+ return_offsets_mapping=True,
+ padding="max_length" if data_args.pad_to_max_length else False,
+ )
+
+ # Since one example might give us several features if it has a long context, we need a map from a feature to
+ # its corresponding example. This key gives us just that.
+ sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+
+ # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the
+ # corresponding example_id and we will store the offset mappings.
+ tokenized_examples["example_id"] = []
+
+ for i in range(len(tokenized_examples["input_ids"])):
+ # Grab the sequence corresponding to that example (to know what is the context and what is the question).
+ sequence_ids = tokenized_examples.sequence_ids(i)
+ context_index = 1 if pad_on_right else 0
+
+ # One example can give several spans, this is the index of the example containing this span of text.
+ sample_index = sample_mapping[i]
+ tokenized_examples["example_id"].append(examples["id"][sample_index])
+
+ # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
+ # position is part of the context or not.
+ tokenized_examples["offset_mapping"][i] = [
+ (o if sequence_ids[k] == context_index else None)
+ for k, o in enumerate(tokenized_examples["offset_mapping"][i])
+ ]
+
+ return tokenized_examples
+
+ if training_args.do_eval:
+ if "validation" not in raw_datasets:
+ raise ValueError("--do_eval requires a validation dataset")
+ eval_examples = raw_datasets["validation"]
+ if data_args.max_eval_samples is not None:
+ # We will select sample from whole data
+ max_eval_samples = min(len(eval_examples), data_args.max_eval_samples)
+ eval_examples = eval_examples.select(range(max_eval_samples))
+ # Validation Feature Creation
+ with training_args.main_process_first(desc="validation dataset map pre-processing"):
+ eval_dataset = eval_examples.map(
+ prepare_validation_features,
+ batched=True,
+ num_proc=data_args.preprocessing_num_workers,
+ remove_columns=column_names,
+ load_from_cache_file=not data_args.overwrite_cache,
+ desc="Running tokenizer on validation dataset",
+ )
+ if data_args.max_eval_samples is not None:
+ # During Feature creation dataset samples might increase, we will select required samples again
+ max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
+ eval_dataset = eval_dataset.select(range(max_eval_samples))
+
+ if training_args.do_predict:
+ if "test" not in raw_datasets:
+ raise ValueError("--do_predict requires a test dataset")
+ predict_examples = raw_datasets["test"]
+ if data_args.max_predict_samples is not None:
+ # We will select sample from whole data
+ predict_examples = predict_examples.select(range(data_args.max_predict_samples))
+ # Predict Feature Creation
+ with training_args.main_process_first(desc="prediction dataset map pre-processing"):
+ predict_dataset = predict_examples.map(
+ prepare_validation_features,
+ batched=True,
+ num_proc=data_args.preprocessing_num_workers,
+ remove_columns=column_names,
+ load_from_cache_file=not data_args.overwrite_cache,
+ desc="Running tokenizer on prediction dataset",
+ )
+ if data_args.max_predict_samples is not None:
+ # During Feature creation dataset samples might increase, we will select required samples again
+ max_predict_samples = min(len(predict_dataset), data_args.max_predict_samples)
+ predict_dataset = predict_dataset.select(range(max_predict_samples))
+
+ # Data collator
+ # We have already padded to max length if the corresponding flag is True, otherwise we need to pad in the data
+ # collator.
+ data_collator = (
+ default_data_collator
+ if data_args.pad_to_max_length
+ else DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
+ )
+
+ # Post-processing:
+ def post_processing_function(examples, features, predictions, stage="eval"):
+ # Post-processing: we match the start logits and end logits to answers in the original context.
+ predictions = postprocess_qa_predictions(
+ examples=examples,
+ features=features,
+ predictions=predictions,
+ version_2_with_negative=data_args.version_2_with_negative,
+ n_best_size=data_args.n_best_size,
+ max_answer_length=data_args.max_answer_length,
+ null_score_diff_threshold=data_args.null_score_diff_threshold,
+ output_dir=training_args.output_dir,
+ log_level=log_level,
+ prefix=stage,
+ )
+ # Format the result to the format the metric expects.
+ if data_args.version_2_with_negative:
+ formatted_predictions = [
+ {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
+ ]
+ else:
+ formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]
+
+ references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
+ return EvalPrediction(predictions=formatted_predictions, label_ids=references)
+
+ metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad", trust_remote_code=True)
+
+ def compute_metrics(p: EvalPrediction):
+ return metric.compute(predictions=p.predictions, references=p.label_ids)
+
+ # Initialize our Trainer
+ trainer = QuestionAnsweringTrainer(
+ model=model,
+ args=training_args,
+ train_dataset=train_dataset if training_args.do_train else None,
+ eval_dataset=eval_dataset if training_args.do_eval else None,
+ eval_examples=eval_examples if training_args.do_eval else None,
+ tokenizer=tokenizer,
+ data_collator=data_collator,
+ post_process_function=post_processing_function,
+ compute_metrics=compute_metrics,
+ )
+
+ eval_dataloader = trainer.get_eval_dataloader()
+ # transformer issue #1
+ # for transformers 4.31.0: accelerate dataloader
+ # *** ValueError: batch_size attribute should not be set
+ # after DataLoaderShard is initialized
+ if eval_dataloader.batch_size is None:
+ def _build_inc_dataloader(dataloader):
+ class INCDataLoader:
+ __iter__ = dataloader.__iter__
+ def __init__(self) -> None:
+ self.dataloader = dataloader
+ self.batch_size = dataloader.total_batch_size
+ return INCDataLoader()
+ eval_dataloader = _build_inc_dataloader(eval_dataloader)
+ batch_size = eval_dataloader.batch_size
+ metric_name = "eval_f1"
+
+ def take_eval_steps(model, trainer, metric_name, save_metrics=False):
+ trainer.model = model
+ start_time = timeit.default_timer()
+ metrics = trainer.evaluate()
+ evalTime = timeit.default_timer() - start_time
+ max_eval_samples = data_args.max_eval_samples \
+ if data_args.max_eval_samples is not None else len(eval_dataset)
+ eval_samples = min(max_eval_samples, len(eval_dataset))
+ samples = eval_samples - (eval_samples % batch_size) \
+ if training_args.dataloader_drop_last else eval_samples
+ if save_metrics:
+ trainer.save_metrics("eval", metrics)
+ logger.info("metrics keys: {}".format(metrics.keys()))
+ print('Batch size = %d' % batch_size)
+ print("Finally Eval {} Accuracy: {}".format(metric_name, metrics.get(metric_name)))
+ print("Latency: %.3f ms" % (evalTime / samples * 1000))
+ print("Throughput: {} samples/sec".format(samples / evalTime))
+ return metrics.get(metric_name)
+
+ def eval_func(model):
+ return take_eval_steps(model, trainer, metric_name)
+
+ if model_args.tune:
+ ipex.nn.utils._model_convert.replace_dropout_with_identity(model)
+ from neural_compressor.torch.quantization import get_default_static_config
+ quant_config = get_default_static_config()
+ dummy_input_ids = torch.ones((training_args.per_device_eval_batch_size, data_args.max_seq_length), dtype=torch.long)
+ dummy_token_type_ids = torch.ones((training_args.per_device_eval_batch_size, data_args.max_seq_length), dtype=torch.long)
+ dummy_attention_mask = torch.ones((training_args.per_device_eval_batch_size, data_args.max_seq_length), dtype=torch.long)
+ if model.config.model_type == "distilbert":
+ example_inputs = (dummy_input_ids, dummy_attention_mask)
+ elif model.config.model_type == "bert":
+ example_inputs = (dummy_input_ids, dummy_attention_mask, dummy_token_type_ids)
+ else:
+ example_inputs = None # please provide correct example_inputs if necessary.
+
+ from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
+ from tqdm import tqdm
+ def run_fn(model):
+ calib_iter = 0
+ for batch in tqdm(eval_dataloader, total=model_args.calib_iters):
+ batch = move_input_to_device(batch, device=None)
+ if isinstance(batch, tuple) or isinstance(batch, list):
+ model(batch[0])
+ elif isinstance(batch, dict):
+ model(**batch)
+ else:
+ model(batch)
+
+ calib_iter += 1
+ if calib_iter >= model_args.calib_iters:
+ break
+ return
+
+ from neural_compressor.torch.quantization import prepare, convert
+ model = prepare(model=model, quant_config=quant_config, example_inputs=example_inputs)
+ run_fn(model)
+ q_model = convert(model)
+ q_model.save(training_args.output_dir)
+ return
+
+ model.eval()
+ if model_args.int8:
+ print("load int8 model")
+ from neural_compressor.torch.quantization import load
+ model = load(os.path.abspath(os.path.expanduser(training_args.output_dir)))
+ else:
+ from utils_qa import get_example_inputs
+ example_inputs = get_example_inputs(model, eval_dataloader)
+ model = ipex.optimize(model)
+ with torch.no_grad():
+ if isinstance(example_inputs, dict):
+ model = torch.jit.trace(model, example_kwarg_inputs=example_inputs, strict=False)
+ else:
+ model = torch.jit.trace(model, example_inputs, strict=False)
+ model = torch.jit.freeze(model)
+
+ if model_args.benchmark or model_args.accuracy_only:
+ if model_args.benchmark:
+ from neural_compressor.config import BenchmarkConfig
+ from neural_compressor import benchmark
+ b_conf = BenchmarkConfig(backend="ipex",
+ warmup=5,
+ iteration=model_args.iters,
+ cores_per_instance=4,
+ num_of_instance=1)
+ if model_args.xpu:
+ b_conf.device = "xpu"
+ benchmark.fit(model, b_conf, b_dataloader=eval_dataloader)
+ else:
+ eval_func(model)
+
+def _mp_fn(index):
+ # For xla_spawn (TPUs)
+ main()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_quant.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_quant.sh
new file mode 100644
index 00000000000..ae49ed79f5f
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_quant.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+# init params
+function init_params {
+ tuned_checkpoint=saved_results
+ tokenizer_name=bert-large-uncased-whole-word-masking-finetuned-squad
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+}
+
+
+# run_tuning
+function run_tuning {
+ if [[ "${topology}" == "bert_large_ipex" ]]; then
+ model_name_or_path="bert-large-uncased-whole-word-masking-finetuned-squad"
+ python run_qa.py \
+ --model_name_or_path $model_name_or_path \
+ --dataset_name squad \
+ --do_eval \
+ --max_seq_length 384 \
+ --no_cuda \
+ --tune \
+ --output_dir $tuned_checkpoint
+ fi
+ if [[ "${topology}" == "distilbert_base_ipex" ]]; then
+ model_name_or_path="distilbert-base-uncased-distilled-squad"
+ python run_qa.py \
+ --model_name_or_path $model_name_or_path \
+ --dataset_name squad \
+ --do_eval \
+ --max_seq_length 384 \
+ --no_cuda \
+ --tune \
+ --output_dir $tuned_checkpoint
+ fi
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/trainer_qa.py b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/trainer_qa.py
new file mode 100644
index 00000000000..7f98eba236c
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/trainer_qa.py
@@ -0,0 +1,105 @@
+# coding=utf-8
+# Copyright 2020 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+A subclass of `Trainer` specific to Question-Answering tasks
+"""
+
+from transformers import Trainer, is_torch_tpu_available
+from transformers.trainer_utils import PredictionOutput
+
+
+if is_torch_tpu_available():
+ import torch_xla.core.xla_model as xm
+ import torch_xla.debug.metrics as met
+
+
+class QuestionAnsweringTrainer(Trainer):
+ def __init__(self, *args, eval_examples=None, post_process_function=None, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.eval_examples = eval_examples
+ self.post_process_function = post_process_function
+
+ def evaluate(self, eval_dataset=None, eval_examples=None, ignore_keys=None, metric_key_prefix: str = "eval"):
+ eval_dataset = self.eval_dataset if eval_dataset is None else eval_dataset
+ eval_dataloader = self.get_eval_dataloader(eval_dataset)
+ eval_examples = self.eval_examples if eval_examples is None else eval_examples
+
+ # Temporarily disable metric computation, we will do it in the loop here.
+ compute_metrics = self.compute_metrics
+ self.compute_metrics = None
+ eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
+ try:
+ output = eval_loop(
+ eval_dataloader,
+ description="Evaluation",
+ # No point gathering the predictions if there are no metrics, otherwise we defer to
+ # self.args.prediction_loss_only
+ prediction_loss_only=True if compute_metrics is None else None,
+ ignore_keys=ignore_keys,
+ )
+ finally:
+ self.compute_metrics = compute_metrics
+
+ if self.post_process_function is not None and self.compute_metrics is not None:
+ eval_preds = self.post_process_function(eval_examples, eval_dataset, output.predictions)
+ metrics = self.compute_metrics(eval_preds)
+
+ # Prefix all keys with metric_key_prefix + '_'
+ for key in list(metrics.keys()):
+ if not key.startswith(f"{metric_key_prefix}_"):
+ metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
+
+ self.log(metrics)
+ else:
+ metrics = {}
+
+ if self.args.tpu_metrics_debug or self.args.debug:
+ # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)
+ xm.master_print(met.metrics_report())
+
+ self.control = self.callback_handler.on_evaluate(self.args, self.state, self.control, metrics)
+ return metrics
+
+ def predict(self, predict_dataset, predict_examples, ignore_keys=None, metric_key_prefix: str = "test"):
+ predict_dataloader = self.get_test_dataloader(predict_dataset)
+
+ # Temporarily disable metric computation, we will do it in the loop here.
+ compute_metrics = self.compute_metrics
+ self.compute_metrics = None
+ eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
+ try:
+ output = eval_loop(
+ predict_dataloader,
+ description="Prediction",
+ # No point gathering the predictions if there are no metrics, otherwise we defer to
+ # self.args.prediction_loss_only
+ prediction_loss_only=True if compute_metrics is None else None,
+ ignore_keys=ignore_keys,
+ )
+ finally:
+ self.compute_metrics = compute_metrics
+
+ if self.post_process_function is None or self.compute_metrics is None:
+ return output
+
+ predictions = self.post_process_function(predict_examples, predict_dataset, output.predictions, "predict")
+ metrics = self.compute_metrics(predictions)
+
+ # Prefix all keys with metric_key_prefix + '_'
+ for key in list(metrics.keys()):
+ if not key.startswith(f"{metric_key_prefix}_"):
+ metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
+
+ return PredictionOutput(predictions=predictions.predictions, label_ids=predictions.label_ids, metrics=metrics)
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/utils_qa.py b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/utils_qa.py
new file mode 100644
index 00000000000..6514e6ba7ad
--- /dev/null
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/utils_qa.py
@@ -0,0 +1,481 @@
+# coding=utf-8
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Post-processing utilities for question answering.
+"""
+import collections
+import json
+import logging
+import os
+import torch
+from typing import Optional, Tuple
+from collections import UserDict
+from packaging.version import Version
+from neural_compressor.torch.utils import get_torch_version
+
+import numpy as np
+from tqdm.auto import tqdm
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_example_inputs(model, dataloader):
+ version = get_torch_version()
+ from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
+
+ # Suggest set dataloader like calib_dataloader
+ if dataloader is None:
+ return None
+ device = next(model.parameters()).device
+ try:
+ for idx, (input, label) in enumerate(dataloader):
+ input = move_input_to_device(input, device)
+ if isinstance(input, (dict, UserDict)): # pragma: no cover
+ assert version.release >= Version("1.12.0").release, "INC support IPEX version >= 1.12.0"
+ if "label" in input.keys():
+ input.pop("label")
+ if version.release <= Version("2.0.1").release:
+ return tuple(input.values())
+ else:
+ return dict(input)
+ if isinstance(input, (list, tuple)):
+ return tuple(input)
+ if isinstance(input, torch.Tensor):
+ return input
+ break
+ except Exception as e: # pragma: no cover
+ for idx, input in enumerate(dataloader):
+ input = move_input_to_device(input, device)
+ if isinstance(input, (dict, UserDict)): # pragma: no cover
+ assert version.release >= Version("1.12.0").release, "INC support IPEX version >= 1.12.0"
+ if "label" in input.keys():
+ input.pop("label")
+ if version.release <= Version("2.0.1").release:
+ return tuple(input.values())
+ else:
+ return dict(input)
+ if isinstance(input, list) or isinstance(input, tuple):
+ return tuple(input)
+ if isinstance(input, torch.Tensor):
+ return input
+ break
+ if idx == 0:
+ assert False, "Please checkout the example_inputs format."
+
+
+def postprocess_qa_predictions(
+ examples,
+ features,
+ predictions: Tuple[np.ndarray, np.ndarray],
+ version_2_with_negative: bool = False,
+ n_best_size: int = 20,
+ max_answer_length: int = 30,
+ null_score_diff_threshold: float = 0.0,
+ output_dir: Optional[str] = None,
+ prefix: Optional[str] = None,
+ log_level: Optional[int] = logging.WARNING,
+):
+ """
+ Post-processes the predictions of a question-answering model to convert them to answers that are substrings of the
+ original contexts. This is the base postprocessing functions for models that only return start and end logits.
+
+ Args:
+ examples: The non-preprocessed dataset (see the main script for more information).
+ features: The processed dataset (see the main script for more information).
+ predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
+ The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
+ first dimension must match the number of elements of :obj:`features`.
+ version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
+ Whether or not the underlying dataset contains examples with no answers.
+ n_best_size (:obj:`int`, `optional`, defaults to 20):
+ The total number of n-best predictions to generate when looking for an answer.
+ max_answer_length (:obj:`int`, `optional`, defaults to 30):
+ The maximum length of an answer that can be generated. This is needed because the start and end predictions
+ are not conditioned on one another.
+ null_score_diff_threshold (:obj:`float`, `optional`, defaults to 0):
+ The threshold used to select the null answer: if the best answer has a score that is less than the score of
+ the null answer minus this threshold, the null answer is selected for this example (note that the score of
+ the null answer for an example giving several features is the minimum of the scores for the null answer on
+ each feature: all features must be aligned on the fact they `want` to predict a null answer).
+
+ Only useful when :obj:`version_2_with_negative` is :obj:`True`.
+ output_dir (:obj:`str`, `optional`):
+ If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
+ :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
+ answers, are saved in `output_dir`.
+ prefix (:obj:`str`, `optional`):
+ If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
+ log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
+ ``logging`` log level (e.g., ``logging.WARNING``)
+ """
+ if len(predictions) != 2:
+ raise ValueError("`predictions` should be a tuple with two elements (start_logits, end_logits).")
+ all_start_logits, all_end_logits = predictions
+
+ if len(predictions[0]) != len(features):
+ raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
+
+ # Build a map example to its corresponding features.
+ example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
+ features_per_example = collections.defaultdict(list)
+ for i, feature in enumerate(features):
+ features_per_example[example_id_to_index[feature["example_id"]]].append(i)
+
+ # The dictionaries we have to fill.
+ all_predictions = collections.OrderedDict()
+ all_nbest_json = collections.OrderedDict()
+ if version_2_with_negative:
+ scores_diff_json = collections.OrderedDict()
+
+ # Logging.
+ logger.setLevel(log_level)
+ logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
+
+ # Let's loop over all the examples!
+ for example_index, example in enumerate(tqdm(examples)):
+ # Those are the indices of the features associated to the current example.
+ feature_indices = features_per_example[example_index]
+
+ min_null_prediction = None
+ prelim_predictions = []
+
+ # Looping through all the features associated to the current example.
+ for feature_index in feature_indices:
+ # We grab the predictions of the model for this feature.
+ start_logits = all_start_logits[feature_index]
+ end_logits = all_end_logits[feature_index]
+ # This is what will allow us to map some the positions in our logits to span of texts in the original
+ # context.
+ offset_mapping = features[feature_index]["offset_mapping"]
+ # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
+ # available in the current feature.
+ token_is_max_context = features[feature_index].get("token_is_max_context", None)
+
+ # Update minimum null prediction.
+ feature_null_score = start_logits[0] + end_logits[0]
+ if min_null_prediction is None or min_null_prediction["score"] > feature_null_score:
+ min_null_prediction = {
+ "offsets": (0, 0),
+ "score": feature_null_score,
+ "start_logit": start_logits[0],
+ "end_logit": end_logits[0],
+ }
+
+ # Go through all possibilities for the `n_best_size` greater start and end logits.
+ start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
+ end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
+ for start_index in start_indexes:
+ for end_index in end_indexes:
+ # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
+ # to part of the input_ids that are not in the context.
+ if (
+ start_index >= len(offset_mapping)
+ or end_index >= len(offset_mapping)
+ or offset_mapping[start_index] is None
+ or len(offset_mapping[start_index]) < 2
+ or offset_mapping[end_index] is None
+ or len(offset_mapping[end_index]) < 2
+ ):
+ continue
+ # Don't consider answers with a length that is either < 0 or > max_answer_length.
+ if end_index < start_index or end_index - start_index + 1 > max_answer_length:
+ continue
+ # Don't consider answer that don't have the maximum context available (if such information is
+ # provided).
+ if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
+ continue
+ prelim_predictions.append(
+ {
+ "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
+ "score": start_logits[start_index] + end_logits[end_index],
+ "start_logit": start_logits[start_index],
+ "end_logit": end_logits[end_index],
+ }
+ )
+ if version_2_with_negative:
+ # Add the minimum null prediction
+ prelim_predictions.append(min_null_prediction)
+ null_score = min_null_prediction["score"]
+
+ # Only keep the best `n_best_size` predictions.
+ predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
+
+ # Add back the minimum null prediction if it was removed because of its low score.
+ if version_2_with_negative and not any(p["offsets"] == (0, 0) for p in predictions):
+ predictions.append(min_null_prediction)
+
+ # Use the offsets to gather the answer text in the original context.
+ context = example["context"]
+ for pred in predictions:
+ offsets = pred.pop("offsets")
+ pred["text"] = context[offsets[0] : offsets[1]]
+
+ # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
+ # failure.
+ if len(predictions) == 0 or (len(predictions) == 1 and predictions[0]["text"] == ""):
+ predictions.insert(0, {"text": "empty", "start_logit": 0.0, "end_logit": 0.0, "score": 0.0})
+
+ # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
+ # the LogSumExp trick).
+ scores = np.array([pred.pop("score") for pred in predictions])
+ exp_scores = np.exp(scores - np.max(scores))
+ probs = exp_scores / exp_scores.sum()
+
+ # Include the probabilities in our predictions.
+ for prob, pred in zip(probs, predictions):
+ pred["probability"] = prob
+
+ # Pick the best prediction. If the null answer is not possible, this is easy.
+ if not version_2_with_negative:
+ all_predictions[example["id"]] = predictions[0]["text"]
+ else:
+ # Otherwise we first need to find the best non-empty prediction.
+ i = 0
+ while predictions[i]["text"] == "":
+ i += 1
+ best_non_null_pred = predictions[i]
+
+ # Then we compare to the null prediction using the threshold.
+ score_diff = null_score - best_non_null_pred["start_logit"] - best_non_null_pred["end_logit"]
+ scores_diff_json[example["id"]] = float(score_diff) # To be JSON-serializable.
+ if score_diff > null_score_diff_threshold:
+ all_predictions[example["id"]] = ""
+ else:
+ all_predictions[example["id"]] = best_non_null_pred["text"]
+
+ # Make `predictions` JSON-serializable by casting np.float32 back to float.
+ all_nbest_json[example["id"]] = [
+ {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
+ for pred in predictions
+ ]
+
+ # If we have an output_dir, let's save all those dicts.
+ if output_dir is not None:
+ if not os.path.isdir(output_dir):
+ raise EnvironmentError(f"{output_dir} is not a directory.")
+
+ prediction_file = os.path.join(
+ output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
+ )
+ nbest_file = os.path.join(
+ output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
+ )
+ if version_2_with_negative:
+ null_odds_file = os.path.join(
+ output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
+ )
+
+ logger.info(f"Saving predictions to {prediction_file}.")
+ with open(prediction_file, "w") as writer:
+ writer.write(json.dumps(all_predictions, indent=4) + "\n")
+ logger.info(f"Saving nbest_preds to {nbest_file}.")
+ with open(nbest_file, "w") as writer:
+ writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
+ if version_2_with_negative:
+ logger.info(f"Saving null_odds to {null_odds_file}.")
+ with open(null_odds_file, "w") as writer:
+ writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
+
+ return all_predictions
+
+
+def postprocess_qa_predictions_with_beam_search(
+ examples,
+ features,
+ predictions: Tuple[np.ndarray, np.ndarray],
+ version_2_with_negative: bool = False,
+ n_best_size: int = 20,
+ max_answer_length: int = 30,
+ start_n_top: int = 5,
+ end_n_top: int = 5,
+ output_dir: Optional[str] = None,
+ prefix: Optional[str] = None,
+ log_level: Optional[int] = logging.WARNING,
+):
+ """
+ Post-processes the predictions of a question-answering model with beam search to convert them to answers that are substrings of the
+ original contexts. This is the postprocessing functions for models that return start and end logits, indices, as well as
+ cls token predictions.
+
+ Args:
+ examples: The non-preprocessed dataset (see the main script for more information).
+ features: The processed dataset (see the main script for more information).
+ predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
+ The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
+ first dimension must match the number of elements of :obj:`features`.
+ version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
+ Whether or not the underlying dataset contains examples with no answers.
+ n_best_size (:obj:`int`, `optional`, defaults to 20):
+ The total number of n-best predictions to generate when looking for an answer.
+ max_answer_length (:obj:`int`, `optional`, defaults to 30):
+ The maximum length of an answer that can be generated. This is needed because the start and end predictions
+ are not conditioned on one another.
+ start_n_top (:obj:`int`, `optional`, defaults to 5):
+ The number of top start logits too keep when searching for the :obj:`n_best_size` predictions.
+ end_n_top (:obj:`int`, `optional`, defaults to 5):
+ The number of top end logits too keep when searching for the :obj:`n_best_size` predictions.
+ output_dir (:obj:`str`, `optional`):
+ If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
+ :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
+ answers, are saved in `output_dir`.
+ prefix (:obj:`str`, `optional`):
+ If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
+ log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
+ ``logging`` log level (e.g., ``logging.WARNING``)
+ """
+ if len(predictions) != 5:
+ raise ValueError("`predictions` should be a tuple with five elements.")
+ start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = predictions
+
+ if len(predictions[0]) != len(features):
+ raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
+
+ # Build a map example to its corresponding features.
+ example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
+ features_per_example = collections.defaultdict(list)
+ for i, feature in enumerate(features):
+ features_per_example[example_id_to_index[feature["example_id"]]].append(i)
+
+ # The dictionaries we have to fill.
+ all_predictions = collections.OrderedDict()
+ all_nbest_json = collections.OrderedDict()
+ scores_diff_json = collections.OrderedDict() if version_2_with_negative else None
+
+ # Logging.
+ logger.setLevel(log_level)
+ logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
+
+ # Let's loop over all the examples!
+ for example_index, example in enumerate(tqdm(examples)):
+ # Those are the indices of the features associated to the current example.
+ feature_indices = features_per_example[example_index]
+
+ min_null_score = None
+ prelim_predictions = []
+
+ # Looping through all the features associated to the current example.
+ for feature_index in feature_indices:
+ # We grab the predictions of the model for this feature.
+ start_log_prob = start_top_log_probs[feature_index]
+ start_indexes = start_top_index[feature_index]
+ end_log_prob = end_top_log_probs[feature_index]
+ end_indexes = end_top_index[feature_index]
+ feature_null_score = cls_logits[feature_index]
+ # This is what will allow us to map some the positions in our logits to span of texts in the original
+ # context.
+ offset_mapping = features[feature_index]["offset_mapping"]
+ # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
+ # available in the current feature.
+ token_is_max_context = features[feature_index].get("token_is_max_context", None)
+
+ # Update minimum null prediction
+ if min_null_score is None or feature_null_score < min_null_score:
+ min_null_score = feature_null_score
+
+ # Go through all possibilities for the `n_start_top`/`n_end_top` greater start and end logits.
+ for i in range(start_n_top):
+ for j in range(end_n_top):
+ start_index = int(start_indexes[i])
+ j_index = i * end_n_top + j
+ end_index = int(end_indexes[j_index])
+ # Don't consider out-of-scope answers (last part of the test should be unnecessary because of the
+ # p_mask but let's not take any risk)
+ if (
+ start_index >= len(offset_mapping)
+ or end_index >= len(offset_mapping)
+ or offset_mapping[start_index] is None
+ or offset_mapping[end_index] is None
+ ):
+ continue
+ # Don't consider answers with a length negative or > max_answer_length.
+ if end_index < start_index or end_index - start_index + 1 > max_answer_length:
+ continue
+ # Don't consider answer that don't have the maximum context available (if such information is
+ # provided).
+ if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
+ continue
+ prelim_predictions.append(
+ {
+ "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
+ "score": start_log_prob[i] + end_log_prob[j_index],
+ "start_log_prob": start_log_prob[i],
+ "end_log_prob": end_log_prob[j_index],
+ }
+ )
+
+ # Only keep the best `n_best_size` predictions.
+ predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
+
+ # Use the offsets to gather the answer text in the original context.
+ context = example["context"]
+ for pred in predictions:
+ offsets = pred.pop("offsets")
+ pred["text"] = context[offsets[0] : offsets[1]]
+
+ # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
+ # failure.
+ if len(predictions) == 0:
+ predictions.insert(0, {"text": "", "start_logit": -1e-6, "end_logit": -1e-6, "score": -2e-6})
+
+ # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
+ # the LogSumExp trick).
+ scores = np.array([pred.pop("score") for pred in predictions])
+ exp_scores = np.exp(scores - np.max(scores))
+ probs = exp_scores / exp_scores.sum()
+
+ # Include the probabilities in our predictions.
+ for prob, pred in zip(probs, predictions):
+ pred["probability"] = prob
+
+ # Pick the best prediction and set the probability for the null answer.
+ all_predictions[example["id"]] = predictions[0]["text"]
+ if version_2_with_negative:
+ scores_diff_json[example["id"]] = float(min_null_score)
+
+ # Make `predictions` JSON-serializable by casting np.float32 back to float.
+ all_nbest_json[example["id"]] = [
+ {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
+ for pred in predictions
+ ]
+
+ # If we have an output_dir, let's save all those dicts.
+ if output_dir is not None:
+ if not os.path.isdir(output_dir):
+ raise EnvironmentError(f"{output_dir} is not a directory.")
+
+ prediction_file = os.path.join(
+ output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
+ )
+ nbest_file = os.path.join(
+ output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
+ )
+ if version_2_with_negative:
+ null_odds_file = os.path.join(
+ output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
+ )
+
+ logger.info(f"Saving predictions to {prediction_file}.")
+ with open(prediction_file, "w") as writer:
+ writer.write(json.dumps(all_predictions, indent=4) + "\n")
+ logger.info(f"Saving nbest_preds to {nbest_file}.")
+ with open(nbest_file, "w") as writer:
+ writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
+ if version_2_with_negative:
+ logger.info(f"Saving null_odds to {null_odds_file}.")
+ with open(null_odds_file, "w") as writer:
+ writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
+
+ return all_predictions, scores_diff_json
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/CODE_OF_CONDUCT.md b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000000..0f7ad8bfc17
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/CODE_OF_CONDUCT.md
@@ -0,0 +1,5 @@
+# Code of Conduct
+
+Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
+Please read the [full text](https://code.fb.com/codeofconduct/)
+so that you can understand what actions will and will not be tolerated.
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/CONTRIBUTING.md b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/CONTRIBUTING.md
new file mode 100644
index 00000000000..cc013a17ec8
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/CONTRIBUTING.md
@@ -0,0 +1,36 @@
+# Contributing to DLRM
+We want to make contributing to this project as easy and transparent as
+possible.
+
+## Pull Requests
+We actively welcome your pull requests.
+
+1. Fork the repo and create your branch from `master`.
+2. If you've added code that should be tested, add tests.
+3. If you've changed APIs, update the documentation.
+4. Ensure the test suite passes.
+5. Make sure your code lints.
+6. If you haven't already, complete the Contributor License Agreement ("CLA").
+
+## Contributor License Agreement ("CLA")
+In order to accept your pull request, we need you to submit a CLA. You only need
+to do this once to work on any of Facebook's open source projects.
+
+Complete your CLA here:
+
+## Issues
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
+disclosure of security bugs. In those cases, please go through the process
+outlined on that page and do not file a public issue.
+
+## Coding Style
+* 4 spaces for indentation rather than tabs
+* 80 character line length
+* in general, please maintain a consistent style with the rest of the code
+
+## License
+By contributing to DLRM, you agree that your contributions will be licensed
+under the LICENSE file in the root directory of this source tree.
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/LICENSE b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/LICENSE
new file mode 100644
index 00000000000..b96dcb0480a
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) Facebook, Inc. and its affiliates.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/README.md b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/README.md
new file mode 100644
index 00000000000..918cc1edc23
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/README.md
@@ -0,0 +1,90 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing PyTorch DLRM tuning zoo result. and original DLRM README is in [DLRM README](https://github.com/facebookresearch/dlrm/blob/master/README.md)
+
+> **Note**
+>
+> Please ensure your PC have >370G memory to run DLRM
+> IPEX version >= 1.11
+
+# Prerequisite
+
+### 1. Environment
+
+PyTorch 1.11 or higher version is needed with pytorch_fx backend.
+
+ ```shell
+ # Install dependency
+ cd examples/pytorch/recommendation/dlrm/quantization/ptq/ipex
+ pip install -r requirements.txt
+ ```
+> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### 2. Prepare Dataset
+
+ The code supports interface with the [Criteo Terabyte Dataset](https://labs.criteo.com/2013/12/download-terabyte-click-logs/)
+
+ 1. download the raw data files day_0.gz, ...,day_23.gz and unzip them.
+ 2. Specify the location of the unzipped text files day_0, ...,day_23, using --raw-data-file= (the day number will be appended automatically), please refer "Run" command.
+
+### 3. Prepare pretrained model
+
+ Download the DLRM PyTorch weights (`tb00_40M.pt`, 90GB) from the
+[MLPerf repo](https://github.com/mlcommons/inference/tree/master/recommendation/dlrm/pytorch#more-information-about-the-model-weights)
+
+# Run
+### tune with INC
+ ```shell
+ cd examples/pytorch/recommendation/dlrm/quantization/ptq/ipex
+ bash run_quant.sh --input_model="/path/of/pretrained/model" --dataset_location="/path/of/dataset"
+ ```
+
+### benchmark
+```shell
+bash run_benchmark.sh --input_model="/path/of/pretrained/model" --dataset_location="/path/of/dataset" --mode=accuracy --int8=true
+```
+
+
+Examples of enabling Intel® Neural Compressor
+=========================
+
+This is a tutorial of how to enable DLRM model with Intel® Neural Compressor.
+
+
+### Code update
+
+We need update dlrm_s_pytorch.py like below
+
+```python
+# evaluation
+def eval_func(model):
+ args.int8 = model.is_quantized
+ with torch.no_grad():
+ return inference(
+ args,
+ model,
+ best_acc_test,
+ best_auc_test,
+ test_ld,
+ trace=args.int8
+ )
+
+# calibration
+def calib_fn(model):
+ calib_number = 0
+ for X_test, lS_o_test, lS_i_test, T in train_ld:
+ if calib_number < 102400:
+ model(X_test, lS_o_test, lS_i_test)
+ calib_number += 1
+
+from neural_compressor.torch.quantization import SmoothQuantConfig, autotune, TuningConfig
+tune_config = TuningConfig(config_set=SmoothQuantConfig.get_config_set_for_tuning())
+dlrm = autotune(
+ dlrm,
+ tune_config=tune_config,
+ eval_fn=eval_func,
+ run_fn=calib_fn,
+)
+dlrm.save("saved_results")
+```
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/data_loader_terabyte.py b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/data_loader_terabyte.py
new file mode 100644
index 00000000000..5bc0c4d3aab
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/data_loader_terabyte.py
@@ -0,0 +1,388 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os
+import numpy as np
+from torch.utils.data import Dataset
+import torch
+import time
+import math
+from tqdm import tqdm
+import argparse
+import extend_distributed as ext_dist
+
+
+class DataLoader:
+ """
+ DataLoader dedicated for the Criteo Terabyte Click Logs dataset
+ """
+
+ def __init__(
+ self,
+ data_filename,
+ data_directory,
+ days,
+ batch_size,
+ max_ind_range=-1,
+ split="train",
+ drop_last_batch=False
+ ):
+ self.data_filename = data_filename
+ self.data_directory = data_directory
+ self.days = days
+ self.batch_size = batch_size
+ self.max_ind_range = max_ind_range
+
+ total_file = os.path.join(
+ data_directory,
+ data_filename + "_day_count.npz"
+ )
+ with np.load(total_file) as data:
+ total_per_file = data["total_per_file"][np.array(days)]
+
+ self.length = sum(total_per_file)
+ if split == "test" or split == "val":
+ self.length = int(np.ceil(self.length / 2.))
+ self.split = split
+ self.drop_last_batch = drop_last_batch
+
+ def __iter__(self):
+ return iter(
+ _batch_generator(
+ self.data_filename, self.data_directory, self.days,
+ self.batch_size, self.split, self.drop_last_batch, self.max_ind_range
+ )
+ )
+
+ def __len__(self):
+ if self.drop_last_batch:
+ return self.length // self.batch_size
+ else:
+ return math.ceil(self.length / self.batch_size)
+
+
+def _transform_features(
+ x_int_batch, x_cat_batch, y_batch, max_ind_range, flag_input_torch_tensor=False
+):
+ if max_ind_range > 0:
+ x_cat_batch = x_cat_batch % max_ind_range
+
+ if flag_input_torch_tensor:
+ x_int_batch = torch.log(x_int_batch.clone().detach().type(torch.float) + 1)
+ x_cat_batch = x_cat_batch.clone().detach().type(torch.long)
+ y_batch = y_batch.clone().detach().type(torch.float32).view(-1, 1)
+ else:
+ x_int_batch = torch.log(torch.tensor(x_int_batch, dtype=torch.float) + 1)
+ x_cat_batch = torch.tensor(x_cat_batch, dtype=torch.long)
+ y_batch = torch.tensor(y_batch, dtype=torch.float32).view(-1, 1)
+
+ batch_size = x_cat_batch.shape[0]
+ feature_count = x_cat_batch.shape[1]
+ lS_o = torch.arange(batch_size).reshape(1, -1).repeat(feature_count, 1)
+
+ return x_int_batch, lS_o, x_cat_batch.t(), y_batch.view(-1, 1)
+
+
+def _batch_generator(
+ data_filename, data_directory, days, batch_size, split, drop_last, max_ind_range
+):
+ previous_file = None
+ for day in days:
+ filepath = os.path.join(
+ data_directory,
+ data_filename + "_{}_reordered.npz".format(day)
+ )
+
+ # print('Loading file: ', filepath)
+ with np.load(filepath) as data:
+ x_int = data["X_int"]
+ x_cat = data["X_cat"]
+ y = data["y"]
+
+ samples_in_file = y.shape[0]
+ batch_start_idx = 0
+ if split == "test" or split == "val":
+ length = int(np.ceil(samples_in_file / 2.))
+ if split == "test":
+ samples_in_file = length
+ elif split == "val":
+ batch_start_idx = samples_in_file - length
+
+ while batch_start_idx < samples_in_file - batch_size:
+
+ missing_samples = batch_size
+ if previous_file is not None:
+ missing_samples -= previous_file['y'].shape[0]
+
+ current_slice = slice(batch_start_idx, batch_start_idx + missing_samples)
+
+ x_int_batch = x_int[current_slice]
+ x_cat_batch = x_cat[current_slice]
+ y_batch = y[current_slice]
+
+ if previous_file is not None:
+ x_int_batch = np.concatenate(
+ [previous_file['x_int'], x_int_batch],
+ axis=0
+ )
+ x_cat_batch = np.concatenate(
+ [previous_file['x_cat'], x_cat_batch],
+ axis=0
+ )
+ y_batch = np.concatenate([previous_file['y'], y_batch], axis=0)
+ previous_file = None
+
+ if x_int_batch.shape[0] != batch_size:
+ raise ValueError('should not happen')
+
+ yield _transform_features(x_int_batch, x_cat_batch, y_batch, max_ind_range)
+
+ batch_start_idx += missing_samples
+ if batch_start_idx != samples_in_file:
+ current_slice = slice(batch_start_idx, samples_in_file)
+ if previous_file is not None:
+ previous_file = {
+ 'x_int' : np.concatenate(
+ [previous_file['x_int'], x_int[current_slice]],
+ axis=0
+ ),
+ 'x_cat' : np.concatenate(
+ [previous_file['x_cat'], x_cat[current_slice]],
+ axis=0
+ ),
+ 'y' : np.concatenate([previous_file['y'], y[current_slice]], axis=0)
+ }
+ else:
+ previous_file = {
+ 'x_int' : x_int[current_slice],
+ 'x_cat' : x_cat[current_slice],
+ 'y' : y[current_slice]
+ }
+
+ if not drop_last:
+ yield _transform_features(
+ previous_file['x_int'],
+ previous_file['x_cat'],
+ previous_file['y'],
+ max_ind_range
+ )
+
+
+def _test():
+ generator = _batch_generator(
+ data_filename='day',
+ data_directory='./input',
+ days=range(23),
+ split="train",
+ batch_size=2048,
+ drop_last=True,
+ max_ind_range=-1
+ )
+ t1 = time.time()
+ for x_int, lS_o, x_cat, y in generator:
+ t2 = time.time()
+ time_diff = t2 - t1
+ t1 = t2
+ print(
+ "time {} x_int.shape: {} lS_o.shape: {} x_cat.shape: {} y.shape: {}".format(
+ time_diff, x_int.shape, lS_o.shape, x_cat.shape, y.shape
+ )
+ )
+
+
+class CriteoBinDataset(Dataset):
+ """Binary version of criteo dataset."""
+
+ def __init__(self, data_file, counts_file,
+ batch_size=1, max_ind_range=-1, bytes_per_feature=4):
+ # dataset
+ self.tar_fea = 1 # single target
+ self.den_fea = 13 # 13 dense features
+ self.spa_fea = 26 # 26 sparse features
+ self.tad_fea = self.tar_fea + self.den_fea
+ self.tot_fea = self.tad_fea + self.spa_fea
+
+ self.batch_size = batch_size
+ self.max_ind_range = max_ind_range
+ self.bytes_per_entry = (bytes_per_feature * self.tot_fea * batch_size)
+
+ self.num_entries = math.ceil(os.path.getsize(data_file) / self.bytes_per_entry)
+
+ data_file_size = os.path.getsize(data_file)
+ bytes_per_sample = bytes_per_feature * self.tot_fea
+ if ext_dist.my_size > 1:
+ self.bytes_per_rank = self.bytes_per_entry // ext_dist.my_size
+ else:
+ self.bytes_per_rank = self.bytes_per_entry
+
+ if ext_dist.my_size > 1 and self.num_entries * self.bytes_per_entry > data_file_size:
+ last_batch = (data_file_size % self.bytes_per_entry) // bytes_per_sample
+ self.bytes_last_batch = last_batch // ext_dist.my_size * bytes_per_sample
+ else:
+ self.bytes_last_batch = self.bytes_per_rank
+
+ if self.bytes_last_batch == 0:
+ self.num_entries = self.num_entries - 1
+ self.bytes_last_batch = self.bytes_per_rank
+
+ print('data file:', data_file, 'number of batches:', self.num_entries)
+ self.file = open(data_file, 'rb')
+
+ with np.load(counts_file) as data:
+ self.counts = data["counts"]
+
+ # hardcoded for now
+ self.m_den = 13
+
+ def __len__(self):
+ return self.num_entries
+
+ def __getitem__(self, idx):
+ my_rank = ext_dist.dist.get_rank() if ext_dist.my_size > 1 else 0
+ rank_size = self.bytes_last_batch if idx == (self.num_entries - 1) else self.bytes_per_rank
+ self.file.seek(idx * self.bytes_per_entry + rank_size * my_rank, 0)
+ raw_data = self.file.read(rank_size)
+ array = np.frombuffer(raw_data, dtype=np.int32)
+ tensor = torch.from_numpy(array).view((-1, self.tot_fea))
+
+ return _transform_features(x_int_batch=tensor[:, 1:14],
+ x_cat_batch=tensor[:, 14:],
+ y_batch=tensor[:, 0],
+ max_ind_range=self.max_ind_range,
+ flag_input_torch_tensor=True)
+
+ def __del__(self):
+ self.file.close()
+
+
+def numpy_to_binary(input_files, output_file_path, split='train'):
+ """Convert the data to a binary format to be read with CriteoBinDataset."""
+
+ # WARNING - both categorical and numerical data must fit into int32 for
+ # the following code to work correctly
+
+ with open(output_file_path, 'wb') as output_file:
+ if split == 'train':
+ for input_file in input_files:
+ print('Processing file: ', input_file)
+
+ np_data = np.load(input_file)
+ np_data = np.concatenate([np_data['y'].reshape(-1, 1),
+ np_data['X_int'],
+ np_data['X_cat']], axis=1)
+ np_data = np_data.astype(np.int32)
+
+ output_file.write(np_data.tobytes())
+ else:
+ assert len(input_files) == 1
+ np_data = np.load(input_files[0])
+ np_data = np.concatenate([np_data['y'].reshape(-1, 1),
+ np_data['X_int'],
+ np_data['X_cat']], axis=1)
+ np_data = np_data.astype(np.int32)
+
+ samples_in_file = np_data.shape[0]
+ midpoint = int(np.ceil(samples_in_file / 2.))
+ if split == "test":
+ begin = 0
+ end = midpoint
+ elif split == "val":
+ begin = midpoint
+ end = samples_in_file
+ else:
+ raise ValueError('Unknown split value: ', split)
+
+ output_file.write(np_data[begin:end].tobytes())
+
+
+def _preprocess(args):
+ train_files = ['{}_{}_reordered.npz'.format(args.input_data_prefix, day) for
+ day in range(0, 23)]
+
+ test_valid_file = args.input_data_prefix + '_23_reordered.npz'
+
+ os.makedirs(args.output_directory, exist_ok=True)
+ for split in ['train', 'val', 'test']:
+ print('Running preprocessing for split =', split)
+
+ output_file = os.path.join(args.output_directory,
+ '{}_data.bin'.format(split))
+
+ input_files = train_files if split == 'train' else [test_valid_file]
+ numpy_to_binary(input_files=input_files,
+ output_file_path=output_file,
+ split=split)
+
+
+def _test_bin():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--output_directory', required=True)
+ parser.add_argument('--input_data_prefix', required=True)
+ parser.add_argument('--split', choices=['train', 'test', 'val'],
+ required=True)
+ args = parser.parse_args()
+
+ _preprocess(args)
+
+ binary_data_file = os.path.join(args.output_directory,
+ '{}_data.bin'.format(args.split))
+
+ counts_file = os.path.join(args.output_directory, 'day_fea_count.npz')
+ dataset_binary = CriteoBinDataset(data_file=binary_data_file,
+ counts_file=counts_file,
+ batch_size=2048,)
+ from dlrm_data_pytorch import CriteoDataset
+ from dlrm_data_pytorch import collate_wrapper_criteo_offset as collate_wrapper_criteo
+
+ binary_loader = torch.utils.data.DataLoader(
+ dataset_binary,
+ batch_size=None,
+ shuffle=False,
+ num_workers=0,
+ collate_fn=None,
+ pin_memory=False,
+ drop_last=False,
+ )
+
+ original_dataset = CriteoDataset(
+ dataset='terabyte',
+ max_ind_range=10 * 1000 * 1000,
+ sub_sample_rate=1,
+ randomize=True,
+ split=args.split,
+ raw_path=args.input_data_prefix,
+ pro_data='dummy_string',
+ memory_map=True
+ )
+
+ original_loader = torch.utils.data.DataLoader(
+ original_dataset,
+ batch_size=2048,
+ shuffle=False,
+ num_workers=0,
+ collate_fn=collate_wrapper_criteo,
+ pin_memory=False,
+ drop_last=False,
+ )
+
+ assert len(dataset_binary) == len(original_loader)
+ for i, (old_batch, new_batch) in tqdm(enumerate(zip(original_loader,
+ binary_loader)),
+ total=len(dataset_binary)):
+
+ for j in range(len(new_batch)):
+ if not np.array_equal(old_batch[j], new_batch[j]):
+ raise ValueError('FAILED: Datasets not equal')
+ if i > len(dataset_binary):
+ break
+ print('PASSED')
+
+
+if __name__ == '__main__':
+ _test()
+ _test_bin()
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/data_utils.py b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/data_utils.py
new file mode 100644
index 00000000000..6ceef9517df
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/data_utils.py
@@ -0,0 +1,1292 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Description: generate inputs and targets for the DLRM benchmark
+#
+# Utility function(s) to download and pre-process public data sets
+# - Criteo Kaggle Display Advertising Challenge Dataset
+# https://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset
+# - Criteo Terabyte Dataset
+# https://labs.criteo.com/2013/12/download-terabyte-click-logs
+#
+# After downloading dataset, run:
+# getCriteoAdData(
+# datafile="",
+# o_filename=kaggleAdDisplayChallenge_processed.npz,
+# max_ind_range=-1,
+# sub_sample_rate=0.0,
+# days=7,
+# data_split='train',
+# randomize='total',
+# criteo_kaggle=True,
+# memory_map=False
+# )
+# getCriteoAdData(
+# datafile="",
+# o_filename=terabyte_processed.npz,
+# max_ind_range=-1,
+# sub_sample_rate=0.0,
+# days=24,
+# data_split='train',
+# randomize='total',
+# criteo_kaggle=False,
+# memory_map=False
+# )
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import sys
+# import os
+from os import path
+from multiprocessing import Process, Manager
+# import io
+# from io import StringIO
+# import collections as coll
+
+import numpy as np
+
+
+def convertUStringToDistinctIntsDict(mat, convertDicts, counts):
+ # Converts matrix of unicode strings into distinct integers.
+ #
+ # Inputs:
+ # mat (np.array): array of unicode strings to convert
+ # convertDicts (list): dictionary for each column
+ # counts (list): number of different categories in each column
+ #
+ # Outputs:
+ # out (np.array): array of output integers
+ # convertDicts (list): dictionary for each column
+ # counts (list): number of different categories in each column
+
+ # check if convertDicts and counts match correct length of mat
+ if len(convertDicts) != mat.shape[1] or len(counts) != mat.shape[1]:
+ print("Length of convertDicts or counts does not match input shape")
+ print("Generating convertDicts and counts...")
+
+ convertDicts = [{} for _ in range(mat.shape[1])]
+ counts = [0 for _ in range(mat.shape[1])]
+
+ # initialize output
+ out = np.zeros(mat.shape)
+
+ for j in range(mat.shape[1]):
+ for i in range(mat.shape[0]):
+ # add to convertDict and increment count
+ if mat[i, j] not in convertDicts[j]:
+ convertDicts[j][mat[i, j]] = counts[j]
+ counts[j] += 1
+ out[i, j] = convertDicts[j][mat[i, j]]
+
+ return out, convertDicts, counts
+
+
+def convertUStringToDistinctIntsUnique(mat, mat_uni, counts):
+ # mat is an array of 0,...,# samples, with each being 26 categorical features
+
+ # check if mat_unique and counts match correct length of mat
+ if len(mat_uni) != mat.shape[1] or len(counts) != mat.shape[1]:
+ print("Length of mat_unique or counts does not match input shape")
+ print("Generating mat_unique and counts...")
+
+ mat_uni = [np.array([]) for _ in range(mat.shape[1])]
+ counts = [0 for _ in range(mat.shape[1])]
+
+ # initialize output
+ out = np.zeros(mat.shape)
+ ind_map = [np.array([]) for _ in range(mat.shape[1])]
+
+ # find out and assign unique ids to features
+ for j in range(mat.shape[1]):
+ m = mat_uni[j].size
+ mat_concat = np.concatenate((mat_uni[j], mat[:, j]))
+ mat_uni[j], ind_map[j] = np.unique(mat_concat, return_inverse=True)
+ out[:, j] = ind_map[j][m:]
+ counts[j] = mat_uni[j].size
+
+ return out, mat_uni, counts
+
+
+def processCriteoAdData(d_path, d_file, npzfile, i, convertDicts, pre_comp_counts):
+ # Process Kaggle Display Advertising Challenge or Terabyte Dataset
+ # by converting unicode strings in X_cat to integers and
+ # converting negative integer values in X_int.
+ #
+ # Loads data in the form "{kaggle|terabyte}_day_i.npz" where i is the day.
+ #
+ # Inputs:
+ # d_path (str): path for {kaggle|terabyte}_day_i.npz files
+ # i (int): splits in the dataset (typically 0 to 7 or 0 to 24)
+
+ # process data if not all files exist
+ filename_i = npzfile + "_{0}_processed.npz".format(i)
+
+ if path.exists(filename_i):
+ print("Using existing " + filename_i, end="\n")
+ else:
+ print("Not existing " + filename_i)
+ with np.load(npzfile + "_{0}.npz".format(i)) as data:
+ # categorical features
+ '''
+ # Approach 1a: using empty dictionaries
+ X_cat, convertDicts, counts = convertUStringToDistinctIntsDict(
+ data["X_cat"], convertDicts, counts
+ )
+ '''
+ '''
+ # Approach 1b: using empty np.unique
+ X_cat, convertDicts, counts = convertUStringToDistinctIntsUnique(
+ data["X_cat"], convertDicts, counts
+ )
+ '''
+ # Approach 2a: using pre-computed dictionaries
+ X_cat_t = np.zeros(data["X_cat_t"].shape)
+ for j in range(26):
+ for k, x in enumerate(data["X_cat_t"][j, :]):
+ X_cat_t[j, k] = convertDicts[j][x]
+ # continuous features
+ X_int = data["X_int"]
+ X_int[X_int < 0] = 0
+ # targets
+ y = data["y"]
+
+ np.savez_compressed(
+ filename_i,
+ # X_cat = X_cat,
+ X_cat=np.transpose(X_cat_t), # transpose of the data
+ X_int=X_int,
+ y=y,
+ )
+ print("Processed " + filename_i, end="\n")
+ # sanity check (applicable only if counts have been pre-computed & are re-computed)
+ # for j in range(26):
+ # if pre_comp_counts[j] != counts[j]:
+ # sys.exit("ERROR: Sanity check on counts has failed")
+ # print("\nSanity check on counts passed")
+
+ return
+
+
+def concatCriteoAdData(
+ d_path,
+ d_file,
+ npzfile,
+ trafile,
+ days,
+ data_split,
+ randomize,
+ total_per_file,
+ total_count,
+ memory_map,
+ o_filename
+):
+ # Concatenates different days and saves the result.
+ #
+ # Inputs:
+ # days (int): total number of days in the dataset (typically 7 or 24)
+ # d_path (str): path for {kaggle|terabyte}_day_i.npz files
+ # o_filename (str): output file name
+ #
+ # Output:
+ # o_file (str): output file path
+
+ if memory_map:
+ # dataset break up per fea
+ # tar_fea = 1 # single target
+ den_fea = 13 # 13 dense features
+ spa_fea = 26 # 26 sparse features
+ # tad_fea = tar_fea + den_fea
+ # tot_fea = tad_fea + spa_fea
+ # create offset per file
+ offset_per_file = np.array([0] + [x for x in total_per_file])
+ for i in range(days):
+ offset_per_file[i + 1] += offset_per_file[i]
+
+ '''
+ # Approach 1, 2 and 3 use indices, while Approach 4 does not use them
+ # create indices
+ indices = np.arange(total_count)
+ if data_split == "none":
+ if randomize == "total":
+ indices = np.random.permutation(indices)
+ else:
+ indices = np.array_split(indices, offset_per_file[1:-1])
+
+ # randomize train data (per day)
+ if randomize == "day": # or randomize == "total":
+ for i in range(len(indices) - 1):
+ indices[i] = np.random.permutation(indices[i])
+ print("Randomized indices per day ...")
+
+ train_indices = np.concatenate(indices[:-1])
+ test_indices = indices[-1]
+
+ # randomize train data (across days)
+ if randomize == "total":
+ train_indices = np.random.permutation(train_indices)
+ print("Randomized indices across days ...")
+
+ indices = np.concatenate((train_indices, test_indices))
+ # no reordering
+ # indices = np.arange(total_count)
+ '''
+ '''
+ # Approach 1: simple and slow (no grouping is used)
+ # check if data already exists
+ recreate_flag = False
+ for j in range(tot_fea):
+ filename_j = trafile + "_{0}_reordered.npy".format(j)
+ if path.exists(filename_j):
+ print("Using existing " + filename_j)
+ else:
+ recreate_flag = True
+ # load, reorder and concatenate data (memmap all reordered files per feature)
+ if recreate_flag:
+ # init reordered files (.npy appended automatically)
+ z = np.zeros((total_count))
+ for j in range(tot_fea):
+ filename_j = trafile + "_{0}_reordered".format(j)
+ np.save(filename_j, z)
+ print("Creating " + filename_j)
+
+ for i in range(days):
+ filename_i = d_path + npzfile + "_{0}_processed.npz".format(i)
+ with np.load(filename_i) as data:
+ X_cat_t = np.transpose(data["X_cat"])
+ X_int_t = np.transpose(data["X_int"])
+ y = data["y"]
+ size = len(y)
+ # sanity check
+ if total_per_file[i] != size:
+ sys.exit("ERROR: sanity check on number of samples failed")
+ # setup start and end ranges
+ start = offset_per_file[i]
+ end = offset_per_file[i + 1]
+ # print(filename_i)
+ # print("start=" + str(start) + " end=" + str(end)
+ # + " diff=" + str(end - start) + "=" + str(total_per_file[i]))
+
+ for j in range(tot_fea):
+ filename_j = trafile + "_{0}_reordered.npy".format(j)
+ fj = np.load(filename_j, mmap_mode='r+')
+ if j < tar_fea:
+ fj[indices[start:end]] = y
+ elif tar_fea <= j and j < tad_fea:
+ fj[indices[start:end]] = X_int_t[j - tar_fea, :]
+ else:
+ fj[indices[start:end]] = X_cat_t[j - tad_fea, :]
+ del fj
+ else:
+ print("Reordered fea files already exist, skipping ...")
+
+ # check if data already exists
+ recreate_flag = False
+ for i in range(days):
+ filename_i = d_path + npzfile + "_{0}_reordered.npz".format(i)
+ if path.exists(filename_i):
+ print("Using existing " + filename_i)
+ else:
+ recreate_flag = True
+ # split reordered data by files (memmap all reordered files per feature)
+ # on the day boundary del the file object and memmap again
+ if recreate_flag:
+ for i in range(days):
+ filename_i = d_path + npzfile + "_{0}_reordered.npz".format(i)
+ size = total_per_file[i]
+ X_int_t = np.zeros((den_fea, size))
+ X_cat_t = np.zeros((spa_fea, size))
+ # setup start and end ranges
+ start = offset_per_file[i]
+ end = offset_per_file[i + 1]
+ print("Creating " + filename_i)
+ # print("start=" + str(start) + " end=" + str(end)
+ # + " diff=" + str(end - start) + "=" + str(total_per_file[i]))
+
+ for j in range(tot_fea):
+ filename_j = trafile + "_{0}_reordered.npy".format(j)
+ fj = np.load(filename_j, mmap_mode='r')
+ if j < tar_fea:
+ y = fj[start:end]
+ elif tar_fea <= j and j < tad_fea:
+ X_int_t[j - tar_fea, :] = fj[start:end]
+ else:
+ X_cat_t[j - tad_fea, :] = fj[start:end]
+ del fj
+
+ np.savez_compressed(
+ filename_i,
+ X_cat=np.transpose(X_cat_t), # transpose of the data
+ X_int=np.transpose(X_int_t), # transpose of the data
+ y=y,
+ )
+ else:
+ print("Reordered day files already exist, skipping ...")
+ '''
+ '''
+ # Approach 2: group days
+ # check if data already exists
+ recreate_flag = False
+ for j in range(tot_fea):
+ filename_j = trafile + "_{0}_reordered.npy".format(j)
+ if path.exists(filename_j):
+ print("Using existing " + filename_j)
+ else:
+ recreate_flag = True
+ # load, reorder and concatenate data (memmap all reordered files per feature)
+ if recreate_flag:
+ # init reordered files (.npy appended automatically)
+ z = np.zeros((total_count))
+ for j in range(tot_fea):
+ filename_j = trafile + "_{0}_reordered".format(j)
+ np.save(filename_j, z)
+ print("Creating " + filename_j)
+
+ group_day = 3 # e.g. 8, 4 or 3
+ group_num = days // group_day
+ file_group = [i*group_day for i in range(group_num)] + [days]
+ for ii in range(group_num):
+ # for last may be group_size != group_num, therefore reset it below
+ group_size = file_group[ii + 1] - file_group[ii]
+ X_cat_t = [0]*group_size
+ X_int_t = [0]*group_size
+ y = [0]*group_size
+ start = [0]*group_size
+ end = [0]*group_size
+ for ig in range(group_size):
+ i = file_group[ii] + ig
+ filename_i = d_path + npzfile + "_{0}_processed.npz".format(i)
+ # setup start and end ranges
+ start[ig] = offset_per_file[i]
+ end[ig] = offset_per_file[i + 1]
+ # print(filename_i)
+ # load a group of files
+ with np.load(filename_i) as data:
+ X_cat_t[ig] = np.transpose(data["X_cat"])
+ X_int_t[ig] = np.transpose(data["X_int"])
+ y[ig] = data["y"]
+ # sanity check
+ if total_per_file[i] != len(y[ig]):
+ sys.exit("ERROR: sanity check on number of samples failed")
+ # print("start=" + str(start) + " end=" + str(end)
+ # + " diff=" + str(end[ig]-start[ig]) + "=" + str(total_per_file[i]))
+
+ for j in range(tot_fea):
+ filename_j = trafile + "_{0}_reordered.npy".format(j)
+ fj = np.load(filename_j, mmap_mode='r+')
+ for ig in range(group_size):
+ if j < tar_fea:
+ fj[indices[start[ig]:end[ig]]] = y[ig]
+ elif tar_fea <= j and j < tad_fea:
+ fj[indices[start[ig]:end[ig]]] = X_int_t[ig][j - tar_fea, :]
+ else:
+ fj[indices[start[ig]:end[ig]]] = X_cat_t[ig][j - tad_fea, :]
+ del fj
+ else:
+ print("Reordered fea files already exist, skipping ...")
+
+ # check if data already exists
+ recreate_flag = False
+ for i in range(days):
+ filename_i = d_path + npzfile + "_{0}_reordered.npz".format(i)
+ if path.exists(filename_i):
+ print("Using existing " + filename_i)
+ else:
+ recreate_flag = True
+ # split reordered data by files (memmap all reordered files per feature)
+ # on the day boundary del the file object and memmap again
+ if recreate_flag:
+ for ii in range(group_num):
+ # for last may be group_size != group_num, therefore reset it below
+ group_size = file_group[ii + 1] - file_group[ii]
+ X_cat_t= []; X_int_t = []
+ for ig in range(group_size):
+ i = file_group[ii] + ig
+ X_int_t.append(np.zeros((den_fea, total_per_file[i])))
+ X_cat_t.append(np.zeros((spa_fea, total_per_file[i])))
+ y = [0]*group_size
+ start = [0]*group_size
+ end = [0]*group_size
+
+ for j in range(tot_fea):
+ filename_j = trafile + "_{0}_reordered.npy".format(j)
+ fj = np.load(filename_j, mmap_mode='r')
+ # load a group of files
+ for ig in range(group_size):
+ i = file_group[ii] + ig
+ # setup start and end ranges
+ start[ig] = offset_per_file[i]
+ end[ig] = offset_per_file[i + 1]
+ # load data for the group of files
+ if j < tar_fea:
+ y[ig] = fj[start[ig]:end[ig]]
+ elif tar_fea <= j and j < tad_fea:
+ X_int_t[ig][j - tar_fea, :] = fj[start[ig]:end[ig]]
+ else:
+ X_cat_t[ig][j - tad_fea, :] = fj[start[ig]:end[ig]]
+ del fj
+
+ for ig in range(group_size):
+ i = file_group[ii] + ig
+ filename_i = d_path + npzfile + "_{0}_reordered.npz".format(i)
+ print("Creating " + filename_i)
+ np.savez_compressed(
+ filename_i,
+ X_cat=np.transpose(X_cat_t[ig]), # transpose of the data
+ X_int=np.transpose(X_int_t[ig]), # transpose of the data
+ y=y[ig],
+ )
+ else:
+ print("Reordered day files already exist, skipping ...")
+ '''
+ '''
+ # Approach 3: group features
+ # check if data already exists
+ group_fea = 5 # e.g. 8, 5 or 4
+ group_num = tot_fea // group_fea
+ if tot_fea % group_fea != 0: # sanity check
+ sys.exit("ERROR: the group_fea must divided tot_fea evenly.")
+ recreate_flag = False
+ for jn in range(group_num):
+ filename_j = trafile + "_{0}_reordered{1}.npy".format(
+ jn, group_fea
+ )
+ if path.exists(filename_j):
+ print("Using existing " + filename_j)
+ else:
+ recreate_flag = True
+ # load, reorder and concatenate data (memmap all reordered files per feature)
+ if recreate_flag:
+ # init reordered files (.npy appended automatically)
+ z = np.zeros((group_fea, total_count))
+ for jn in range(group_num):
+ filename_j = trafile + "_{0}_reordered{1}".format(
+ jn, group_fea
+ )
+ np.save(filename_j, z)
+ print("Creating " + filename_j)
+
+ for i in range(days):
+ filename_i = d_path + npzfile + "_{0}_processed.npz".format(i)
+ with np.load(filename_i) as data:
+ X_cat_t = np.transpose(data["X_cat"])
+ X_int_t = np.transpose(data["X_int"])
+ y = data["y"]
+ size = len(y)
+ # sanity check
+ if total_per_file[i] != size:
+ sys.exit("ERROR: sanity check on number of samples failed")
+ # setup start and end ranges
+ start = offset_per_file[i]
+ end = offset_per_file[i + 1]
+ # print(filename_i)
+ # print("start=" + str(start) + " end=" + str(end)
+ # + " diff=" + str(end - start) + "=" + str(total_per_file[i]))
+
+ for jn in range(group_num):
+ filename_j = trafile + "_{0}_reordered{1}.npy".format(
+ jn, group_fea
+ )
+ fj = np.load(filename_j, mmap_mode='r+')
+ for jg in range(group_fea):
+ j = jn * group_fea + jg
+ # print("j=" + str(j) + " jn=" + str(jn) + " jg=" + str(jg))
+ if j < tar_fea:
+ fj[jg, indices[start:end]] = y
+ elif tar_fea <= j and j < tad_fea:
+ fj[jg, indices[start:end]] = X_int_t[j - tar_fea, :]
+ else:
+ fj[jg, indices[start:end]] = X_cat_t[j - tad_fea, :]
+ del fj
+ else:
+ print("Reordered fea files already exist, skipping ...")
+
+ # check if data already exists
+ recreate_flag = False
+ for i in range(days):
+ filename_i = d_path + npzfile + "_{0}_reordered.npz".format(i)
+ if path.exists(filename_i):
+ print("Using existing" + filename_i)
+ else:
+ recreate_flag = True
+ # split reordered data by files (memmap all reordered files per feature)
+ # on the day boundary del the file object and memmap again
+ if recreate_flag:
+ for i in range(days):
+ filename_i = d_path + npzfile + "_{0}_reordered.npz".format(i)
+ size = total_per_file[i]
+ X_int_t = np.zeros((den_fea, size))
+ X_cat_t = np.zeros((spa_fea, size))
+ # setup start and end ranges
+ start = offset_per_file[i]
+ end = offset_per_file[i + 1]
+ print("Creating " + filename_i)
+ # print("start=" + str(start) + " end=" + str(end)
+ # + " diff=" + str(end - start) + "=" + str(total_per_file[i]))
+
+ for jn in range(group_num):
+ filename_j = trafile + "_{0}_reordered{1}.npy".format(
+ jn, group_fea
+ )
+ fj = np.load(filename_j, mmap_mode='r')
+ for jg in range(group_fea):
+ j = jn * group_fea + jg
+ # print("j=" + str(j) + " jn=" + str(jn) + " jg=" + str(jg))
+ if j < tar_fea:
+ y = fj[jg, start:end]
+ elif tar_fea <= j and j < tad_fea:
+ X_int_t[j - tar_fea, :] = fj[jg, start:end]
+ else:
+ X_cat_t[j - tad_fea, :] = fj[jg, start:end]
+ del fj
+
+ np.savez_compressed(
+ filename_i,
+ X_cat=np.transpose(X_cat_t), # transpose of the data
+ X_int=np.transpose(X_int_t), # transpose of the data
+ y=y,
+ )
+
+ else:
+ print("Reordered day files already exist, skipping ...")
+ '''
+
+ # Approach 4: Fisher-Yates-Rao (FYR) shuffle algorithm
+ # 1st pass of FYR shuffle
+ # check if data already exists
+ recreate_flag = False
+ for j in range(days):
+ filename_j_y = npzfile + "_{0}_intermediate_y.npy".format(j)
+ filename_j_d = npzfile + "_{0}_intermediate_d.npy".format(j)
+ filename_j_s = npzfile + "_{0}_intermediate_s.npy".format(j)
+ if (
+ path.exists(filename_j_y)
+ and path.exists(filename_j_d)
+ and path.exists(filename_j_s)
+ ):
+ print(
+ "Using existing\n"
+ + filename_j_y + "\n"
+ + filename_j_d + "\n"
+ + filename_j_s
+ )
+ else:
+ recreate_flag = True
+ # reorder across buckets using sampling
+ if recreate_flag:
+ # init intermediate files (.npy appended automatically)
+ for j in range(days):
+ filename_j_y = npzfile + "_{0}_intermediate_y".format(j)
+ filename_j_d = npzfile + "_{0}_intermediate_d".format(j)
+ filename_j_s = npzfile + "_{0}_intermediate_s".format(j)
+ np.save(filename_j_y, np.zeros((total_per_file[j])))
+ np.save(filename_j_d, np.zeros((total_per_file[j], den_fea)))
+ np.save(filename_j_s, np.zeros((total_per_file[j], spa_fea)))
+ # start processing files
+ total_counter = [0] * days
+ for i in range(days):
+ filename_i = npzfile + "_{0}_processed.npz".format(i)
+ with np.load(filename_i) as data:
+ X_cat = data["X_cat"]
+ X_int = data["X_int"]
+ y = data["y"]
+ size = len(y)
+ # sanity check
+ if total_per_file[i] != size:
+ sys.exit("ERROR: sanity check on number of samples failed")
+ # debug prints
+ print("Reordering (1st pass) " + filename_i)
+
+ # create buckets using sampling of random ints
+ # from (discrete) uniform distribution
+ buckets = []
+ for _j in range(days):
+ buckets.append([])
+ counter = [0] * days
+ days_to_sample = days if data_split == "none" else days - 1
+ if randomize == "total":
+ rand_u = np.random.randint(low=0, high=days_to_sample, size=size)
+ for k in range(size):
+ # sample and make sure elements per buckets do not overflow
+ if data_split == "none" or i < days - 1:
+ # choose bucket
+ p = rand_u[k]
+ # retry of the bucket is full
+ while total_counter[p] + counter[p] >= total_per_file[p]:
+ p = np.random.randint(low=0, high=days_to_sample)
+ else: # preserve the last day/bucket if needed
+ p = i
+ buckets[p].append(k)
+ counter[p] += 1
+ else: # randomize is day or none
+ for k in range(size):
+ # do not sample, preserve the data in this bucket
+ p = i
+ buckets[p].append(k)
+ counter[p] += 1
+
+ # sanity check
+ if np.sum(counter) != size:
+ sys.exit("ERROR: sanity check on number of samples failed")
+ # debug prints
+ # print(counter)
+ # print(str(np.sum(counter)) + " = " + str(size))
+ # print([len(x) for x in buckets])
+ # print(total_counter)
+
+ # partially feel the buckets
+ for j in range(days):
+ filename_j_y = npzfile + "_{0}_intermediate_y.npy".format(j)
+ filename_j_d = npzfile + "_{0}_intermediate_d.npy".format(j)
+ filename_j_s = npzfile + "_{0}_intermediate_s.npy".format(j)
+ start = total_counter[j]
+ end = total_counter[j] + counter[j]
+ # target buckets
+ fj_y = np.load(filename_j_y, mmap_mode='r+')
+ # print("start=" + str(start) + " end=" + str(end)
+ # + " end - start=" + str(end - start) + " "
+ # + str(fj_y[start:end].shape) + " "
+ # + str(len(buckets[j])))
+ fj_y[start:end] = y[buckets[j]]
+ del fj_y
+ # dense buckets
+ fj_d = np.load(filename_j_d, mmap_mode='r+')
+ # print("start=" + str(start) + " end=" + str(end)
+ # + " end - start=" + str(end - start) + " "
+ # + str(fj_d[start:end, :].shape) + " "
+ # + str(len(buckets[j])))
+ fj_d[start:end, :] = X_int[buckets[j], :]
+ del fj_d
+ # sparse buckets
+ fj_s = np.load(filename_j_s, mmap_mode='r+')
+ # print("start=" + str(start) + " end=" + str(end)
+ # + " end - start=" + str(end - start) + " "
+ # + str(fj_s[start:end, :].shape) + " "
+ # + str(len(buckets[j])))
+ fj_s[start:end, :] = X_cat[buckets[j], :]
+ del fj_s
+ # update counters for next step
+ total_counter[j] += counter[j]
+
+ # 2nd pass of FYR shuffle
+ # check if data already exists
+ for j in range(days):
+ filename_j = npzfile + "_{0}_reordered.npz".format(j)
+ if path.exists(filename_j):
+ print("Using existing " + filename_j)
+ else:
+ recreate_flag = True
+ # reorder within buckets
+ if recreate_flag:
+ for j in range(days):
+ filename_j_y = npzfile + "_{0}_intermediate_y.npy".format(j)
+ filename_j_d = npzfile + "_{0}_intermediate_d.npy".format(j)
+ filename_j_s = npzfile + "_{0}_intermediate_s.npy".format(j)
+ fj_y = np.load(filename_j_y)
+ fj_d = np.load(filename_j_d)
+ fj_s = np.load(filename_j_s)
+
+ indices = range(total_per_file[j])
+ if randomize == "day" or randomize == "total":
+ if data_split == "none" or j < days - 1:
+ indices = np.random.permutation(range(total_per_file[j]))
+
+ filename_r = npzfile + "_{0}_reordered.npz".format(j)
+ print("Reordering (2nd pass) " + filename_r)
+ np.savez_compressed(
+ filename_r,
+ X_cat=fj_s[indices, :],
+ X_int=fj_d[indices, :],
+ y=fj_y[indices],
+ )
+
+ '''
+ # sanity check (under no reordering norms should be zero)
+ for i in range(days):
+ filename_i_o = npzfile + "_{0}_processed.npz".format(i)
+ print(filename_i_o)
+ with np.load(filename_i_o) as data_original:
+ X_cat_o = data_original["X_cat"]
+ X_int_o = data_original["X_int"]
+ y_o = data_original["y"]
+ filename_i_r = npzfile + "_{0}_reordered.npz".format(i)
+ print(filename_i_r)
+ with np.load(filename_i_r) as data_reordered:
+ X_cat_r = data_reordered["X_cat"]
+ X_int_r = data_reordered["X_int"]
+ y_r = data_reordered["y"]
+ print(np.linalg.norm(y_o - y_r))
+ print(np.linalg.norm(X_int_o - X_int_r))
+ print(np.linalg.norm(X_cat_o - X_cat_r))
+ '''
+
+ else:
+ print("Concatenating multiple days into %s.npz file" % str(d_path + o_filename))
+
+ # load and concatenate data
+ for i in range(days):
+ filename_i = npzfile + "_{0}_processed.npz".format(i)
+ with np.load(filename_i) as data:
+ if i == 0:
+ X_cat = data["X_cat"]
+ X_int = data["X_int"]
+ y = data["y"]
+ else:
+ X_cat = np.concatenate((X_cat, data["X_cat"]))
+ X_int = np.concatenate((X_int, data["X_int"]))
+ y = np.concatenate((y, data["y"]))
+ print("Loaded day:", i, "y = 1:", len(y[y == 1]), "y = 0:", len(y[y == 0]))
+
+ with np.load(d_path + d_file + "_fea_count.npz") as data:
+ counts = data["counts"]
+ print("Loaded counts!")
+
+ np.savez_compressed(
+ d_path + o_filename + ".npz",
+ X_cat=X_cat,
+ X_int=X_int,
+ y=y,
+ counts=counts,
+ )
+
+ return d_path + o_filename + ".npz"
+
+
+def transformCriteoAdData(X_cat, X_int, y, days, data_split, randomize, total_per_file):
+ # Transforms Criteo Kaggle or terabyte data by applying log transformation
+ # on dense features and converting everything to appropriate tensors.
+ #
+ # Inputs:
+ # X_cat (ndarray): array of integers corresponding to preprocessed
+ # categorical features
+ # X_int (ndarray): array of integers corresponding to dense features
+ # y (ndarray): array of bool corresponding to labels
+ # data_split(str): flag for splitting dataset into training/validation/test
+ # sets
+ # randomize (str): determines randomization scheme
+ # "none": no randomization
+ # "day": randomizes each day"s data (only works if split = True)
+ # "total": randomizes total dataset
+ #
+ # Outputs:
+ # if split:
+ # X_cat_train (tensor): sparse features for training set
+ # X_int_train (tensor): dense features for training set
+ # y_train (tensor): labels for training set
+ # X_cat_val (tensor): sparse features for validation set
+ # X_int_val (tensor): dense features for validation set
+ # y_val (tensor): labels for validation set
+ # X_cat_test (tensor): sparse features for test set
+ # X_int_test (tensor): dense features for test set
+ # y_test (tensor): labels for test set
+ # else:
+ # X_cat (tensor): sparse features
+ # X_int (tensor): dense features
+ # y (tensor): label
+
+ # define initial set of indices
+ indices = np.arange(len(y))
+
+ # create offset per file
+ offset_per_file = np.array([0] + [x for x in total_per_file])
+ for i in range(days):
+ offset_per_file[i + 1] += offset_per_file[i]
+
+ # split dataset
+ if data_split == 'train':
+ indices = np.array_split(indices, offset_per_file[1:-1])
+
+ # randomize train data (per day)
+ if randomize == "day": # or randomize == "total":
+ for i in range(len(indices) - 1):
+ indices[i] = np.random.permutation(indices[i])
+ print("Randomized indices per day ...")
+
+ train_indices = np.concatenate(indices[:-1])
+ test_indices = indices[-1]
+ test_indices, val_indices = np.array_split(test_indices, 2)
+
+ print("Defined training and testing indices...")
+
+ # randomize train data (across days)
+ if randomize == "total":
+ train_indices = np.random.permutation(train_indices)
+ print("Randomized indices across days ...")
+
+ # indices = np.concatenate((train_indices, test_indices))
+
+ # create training, validation, and test sets
+ X_cat_train = X_cat[train_indices]
+ X_int_train = X_int[train_indices]
+ y_train = y[train_indices]
+
+ X_cat_val = X_cat[val_indices]
+ X_int_val = X_int[val_indices]
+ y_val = y[val_indices]
+
+ X_cat_test = X_cat[test_indices]
+ X_int_test = X_int[test_indices]
+ y_test = y[test_indices]
+
+ print("Split data according to indices...")
+
+ X_cat_train = X_cat_train.astype(np.long)
+ X_int_train = np.log(X_int_train.astype(np.float32) + 1)
+ y_train = y_train.astype(np.float32)
+
+ X_cat_val = X_cat_val.astype(np.long)
+ X_int_val = np.log(X_int_val.astype(np.float32) + 1)
+ y_val = y_val.astype(np.float32)
+
+ X_cat_test = X_cat_test.astype(np.long)
+ X_int_test = np.log(X_int_test.astype(np.float32) + 1)
+ y_test = y_test.astype(np.float32)
+
+ print("Converted to tensors...done!")
+
+ return (
+ X_cat_train,
+ X_int_train,
+ y_train,
+ X_cat_val,
+ X_int_val,
+ y_val,
+ X_cat_test,
+ X_int_test,
+ y_test,
+ )
+
+ else:
+
+ # randomize data
+ if randomize == "total":
+ indices = np.random.permutation(indices)
+ print("Randomized indices...")
+
+ X_cat = X_cat[indices].astype(np.long)
+ X_int = np.log(X_int[indices].astype(np.float32) + 1)
+ y = y[indices].astype(np.float32)
+
+ print("Converted to tensors...done!")
+
+ return (X_cat, X_int, y, [], [], [], [], [], [])
+
+
+def getCriteoAdData(
+ datafile,
+ o_filename,
+ max_ind_range=-1,
+ sub_sample_rate=0.0,
+ days=7,
+ data_split='train',
+ randomize='total',
+ criteo_kaggle=True,
+ memory_map=False,
+ dataset_multiprocessing=False,
+):
+ # Passes through entire dataset and defines dictionaries for categorical
+ # features and determines the number of total categories.
+ #
+ # Inputs:
+ # datafile : path to downloaded raw data file
+ # o_filename (str): saves results under o_filename if filename is not ""
+ #
+ # Output:
+ # o_file (str): output file path
+
+ #split the datafile into path and filename
+ lstr = datafile.split("/")
+ d_path = "/".join(lstr[0:-1]) + "/"
+ d_file = lstr[-1].split(".")[0] if criteo_kaggle else lstr[-1]
+ npzfile = d_path + ((d_file + "_day") if criteo_kaggle else d_file)
+ trafile = d_path + ((d_file + "_fea") if criteo_kaggle else "fea")
+
+ # count number of datapoints in training set
+ total_file = d_path + d_file + "_day_count.npz"
+ if path.exists(total_file):
+ with np.load(total_file) as data:
+ total_per_file = list(data["total_per_file"])
+ total_count = np.sum(total_per_file)
+ print("Skipping counts per file (already exist)")
+ else:
+ total_count = 0
+ total_per_file = []
+ if criteo_kaggle:
+ # WARNING: The raw data consists of a single train.txt file
+ # Each line in the file is a sample, consisting of 13 continuous and
+ # 26 categorical features (an extra space indicates that feature is
+ # missing and will be interpreted as 0).
+ if path.exists(datafile):
+ print("Reading data from path=%s" % (datafile))
+ with open(str(datafile)) as f:
+ for _ in f:
+ total_count += 1
+ total_per_file.append(total_count)
+ # reset total per file due to split
+ num_data_per_split, extras = divmod(total_count, days)
+ total_per_file = [num_data_per_split] * days
+ for j in range(extras):
+ total_per_file[j] += 1
+ # split into days (simplifies code later on)
+ file_id = 0
+ boundary = total_per_file[file_id]
+ nf = open(npzfile + "_" + str(file_id), "w")
+ with open(str(datafile)) as f:
+ for j, line in enumerate(f):
+ if j == boundary:
+ nf.close()
+ file_id += 1
+ nf = open(npzfile + "_" + str(file_id), "w")
+ boundary += total_per_file[file_id]
+ nf.write(line)
+ nf.close()
+ else:
+ sys.exit("ERROR: Criteo Kaggle Display Ad Challenge Dataset path is invalid; please download from https://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset")
+ else:
+ # WARNING: The raw data consist of day_0.gz,... ,day_23.gz text files
+ # Each line in the file is a sample, consisting of 13 continuous and
+ # 26 categorical features (an extra space indicates that feature is
+ # missing and will be interpreted as 0).
+ for i in range(days):
+ datafile_i = datafile + "_" + str(i) # + ".gz"
+ if path.exists(str(datafile_i)):
+ print("Reading data from path=%s" % (str(datafile_i)))
+ # file day_
+ total_per_file_count = 0
+ with open(str(datafile_i)) as f:
+ for _ in f:
+ total_per_file_count += 1
+ total_per_file.append(total_per_file_count)
+ total_count += total_per_file_count
+ else:
+ sys.exit("ERROR: Criteo Terabyte Dataset path is invalid; please download from https://labs.criteo.com/2013/12/download-terabyte-click-logs")
+
+ # process a file worth of data and reinitialize data
+ # note that a file main contain a single or multiple splits
+ def process_one_file(
+ datfile,
+ npzfile,
+ split,
+ num_data_in_split,
+ dataset_multiprocessing,
+ convertDictsDay=None,
+ resultDay=None
+ ):
+ if dataset_multiprocessing:
+ convertDicts_day = [{} for _ in range(26)]
+
+ with open(str(datfile)) as f:
+ y = np.zeros(num_data_in_split, dtype="i4") # 4 byte int
+ X_int = np.zeros((num_data_in_split, 13), dtype="i4") # 4 byte int
+ X_cat = np.zeros((num_data_in_split, 26), dtype="i4") # 4 byte int
+ if sub_sample_rate == 0.0:
+ rand_u = 1.0
+ else:
+ rand_u = np.random.uniform(low=0.0, high=1.0, size=num_data_in_split)
+
+ i = 0
+ percent = 0
+ for k, line in enumerate(f):
+ # process a line (data point)
+ line = line.split('\t')
+ # set missing values to zero
+ for j in range(len(line)):
+ if (line[j] == '') or (line[j] == '\n'):
+ line[j] = '0'
+ # sub-sample data by dropping zero targets, if needed
+ target = np.int32(line[0])
+ if target == 0 and \
+ (rand_u if sub_sample_rate == 0.0 else rand_u[k]) < sub_sample_rate:
+ continue
+
+ y[i] = target
+ X_int[i] = np.array(line[1:14], dtype=np.int32)
+ if max_ind_range > 0:
+ X_cat[i] = np.array(
+ list(map(lambda x: int(x, 16) % max_ind_range, line[14:])),
+ dtype=np.int32
+ )
+ else:
+ X_cat[i] = np.array(
+ list(map(lambda x: int(x, 16), line[14:])),
+ dtype=np.int32
+ )
+
+ # count uniques
+ if dataset_multiprocessing:
+ for j in range(26):
+ convertDicts_day[j][X_cat[i][j]] = 1
+ # debug prints
+ if float(i)/num_data_in_split*100 > percent+1:
+ percent = int(float(i)/num_data_in_split*100)
+ print(
+ "Load %d/%d (%d%%) Split: %d Label True: %d Stored: %d"
+ % (
+ i,
+ num_data_in_split,
+ percent,
+ split,
+ target,
+ y[i],
+ ),
+ end="\n",
+ )
+ else:
+ for j in range(26):
+ convertDicts[j][X_cat[i][j]] = 1
+ # debug prints
+ print(
+ "Load %d/%d Split: %d Label True: %d Stored: %d"
+ % (
+ i,
+ num_data_in_split,
+ split,
+ target,
+ y[i],
+ ),
+ end="\r",
+ )
+ i += 1
+
+ # store num_data_in_split samples or extras at the end of file
+ # count uniques
+ # X_cat_t = np.transpose(X_cat)
+ # for j in range(26):
+ # for x in X_cat_t[j,:]:
+ # convertDicts[j][x] = 1
+ # store parsed
+ filename_s = npzfile + "_{0}.npz".format(split)
+ if path.exists(filename_s):
+ print("\nSkip existing " + filename_s)
+ else:
+ np.savez_compressed(
+ filename_s,
+ X_int=X_int[0:i, :],
+ # X_cat=X_cat[0:i, :],
+ X_cat_t=np.transpose(X_cat[0:i, :]), # transpose of the data
+ y=y[0:i],
+ )
+ print("\nSaved " + npzfile + "_{0}.npz!".format(split))
+
+ if dataset_multiprocessing:
+ resultDay[split] = i
+ convertDictsDay[split] = convertDicts_day
+ return
+ else:
+ return i
+
+ # create all splits (reuse existing files if possible)
+ recreate_flag = False
+ convertDicts = [{} for _ in range(26)]
+ # WARNING: to get reproducible sub-sampling results you must reset the seed below
+ # np.random.seed(123)
+ # in this case there is a single split in each day
+ for i in range(days):
+ npzfile_i = npzfile + "_{0}.npz".format(i)
+ npzfile_p = npzfile + "_{0}_processed.npz".format(i)
+ if path.exists(npzfile_i):
+ print("Skip existing " + npzfile_i)
+ elif path.exists(npzfile_p):
+ print("Skip existing " + npzfile_p)
+ else:
+ recreate_flag = True
+
+ if recreate_flag:
+ if dataset_multiprocessing:
+ resultDay = Manager().dict()
+ convertDictsDay = Manager().dict()
+ processes = [Process(target=process_one_file,
+ name="process_one_file:%i" % i,
+ args=(npzfile + "_{0}".format(i),
+ npzfile,
+ i,
+ total_per_file[i],
+ dataset_multiprocessing,
+ convertDictsDay,
+ resultDay,
+ )
+ ) for i in range(0, days)]
+ for process in processes:
+ process.start()
+ for process in processes:
+ process.join()
+ for day in range(days):
+ total_per_file[day] = resultDay[day]
+ print("Constructing convertDicts Split: {}".format(day))
+ convertDicts_tmp = convertDictsDay[day]
+ for i in range(26):
+ for j in convertDicts_tmp[i]:
+ convertDicts[i][j] = 1
+ else:
+ for i in range(days):
+ total_per_file[i] = process_one_file(
+ npzfile + "_{0}".format(i),
+ npzfile,
+ i,
+ total_per_file[i],
+ dataset_multiprocessing,
+ )
+
+ # report and save total into a file
+ total_count = np.sum(total_per_file)
+ if not path.exists(total_file):
+ np.savez_compressed(total_file, total_per_file=total_per_file)
+ print("Total number of samples:", total_count)
+ print("Divided into days/splits:\n", total_per_file)
+
+ # dictionary files
+ counts = np.zeros(26, dtype=np.int32)
+ if recreate_flag:
+ # create dictionaries
+ for j in range(26):
+ for i, x in enumerate(convertDicts[j]):
+ convertDicts[j][x] = i
+ dict_file_j = d_path + d_file + "_fea_dict_{0}.npz".format(j)
+ if not path.exists(dict_file_j):
+ np.savez_compressed(
+ dict_file_j,
+ unique=np.array(list(convertDicts[j]), dtype=np.int32)
+ )
+ counts[j] = len(convertDicts[j])
+ # store (uniques and) counts
+ count_file = d_path + d_file + "_fea_count.npz"
+ if not path.exists(count_file):
+ np.savez_compressed(count_file, counts=counts)
+ else:
+ # create dictionaries (from existing files)
+ for j in range(26):
+ with np.load(d_path + d_file + "_fea_dict_{0}.npz".format(j)) as data:
+ unique = data["unique"]
+ for i, x in enumerate(unique):
+ convertDicts[j][x] = i
+ # load (uniques and) counts
+ with np.load(d_path + d_file + "_fea_count.npz") as data:
+ counts = data["counts"]
+
+ # process all splits
+ if dataset_multiprocessing:
+ processes = [Process(target=processCriteoAdData,
+ name="processCriteoAdData:%i" % i,
+ args=(d_path,
+ d_file,
+ npzfile,
+ i,
+ convertDicts,
+ counts,
+ )
+ ) for i in range(0, days)]
+ for process in processes:
+ process.start()
+ for process in processes:
+ process.join()
+
+ else:
+ for i in range(days):
+ processCriteoAdData(d_path, d_file, npzfile, i, convertDicts, counts)
+
+ o_file = concatCriteoAdData(
+ d_path,
+ d_file,
+ npzfile,
+ trafile,
+ days,
+ data_split,
+ randomize,
+ total_per_file,
+ total_count,
+ memory_map,
+ o_filename
+ )
+
+ return o_file
+
+
+def loadDataset(
+ dataset,
+ max_ind_range,
+ sub_sample_rate,
+ randomize,
+ data_split,
+ raw_path="",
+ pro_data="",
+ memory_map=False
+):
+ # dataset
+ if dataset == "kaggle":
+ days = 7
+ o_filename = "kaggleAdDisplayChallenge_processed"
+ elif dataset == "terabyte":
+ days = 24
+ o_filename = "terabyte_processed"
+ else:
+ raise(ValueError("Data set option is not supported"))
+
+ # split the datafile into path and filename
+ lstr = raw_path.split("/")
+ d_path = "/".join(lstr[0:-1]) + "/"
+ d_file = lstr[-1].split(".")[0] if dataset == "kaggle" else lstr[-1]
+ npzfile = (d_file + "_day") if dataset == "kaggle" else d_file
+ # trafile = d_path + ((d_file + "_fea") if dataset == "kaggle" else "fea")
+
+ # check if pre-processed data is available
+ data_ready = True
+ if memory_map:
+ for i in range(days):
+ reo_data = d_path + npzfile + "_{0}_reordered.npz".format(i)
+ if not path.exists(str(reo_data)):
+ data_ready = False
+ else:
+ if not path.exists(str(pro_data)):
+ data_ready = False
+
+ # pre-process data if needed
+ # WARNNING: when memory mapping is used we get a collection of files
+ if data_ready:
+ print("Reading pre-processed data=%s" % (str(pro_data)))
+ file = str(pro_data)
+ else:
+ print("Reading raw data=%s" % (str(raw_path)))
+ file = getCriteoAdData(
+ raw_path,
+ o_filename,
+ max_ind_range,
+ sub_sample_rate,
+ days,
+ data_split,
+ randomize,
+ dataset == "kaggle",
+ memory_map
+ )
+
+ return file, days
+
+
+if __name__ == "__main__":
+ ### import packages ###
+ import argparse
+
+ ### parse arguments ###
+ parser = argparse.ArgumentParser(
+ description="Preprocess Criteo dataset"
+ )
+ # model related parameters
+ parser.add_argument("--max-ind-range", type=int, default=-1)
+ parser.add_argument("--data-sub-sample-rate", type=float, default=0.0) # in [0, 1]
+ parser.add_argument("--data-randomize", type=str, default="total") # or day or none
+ parser.add_argument("--memory-map", action="store_true", default=False)
+ parser.add_argument("--data-set", type=str, default="kaggle") # or terabyte
+ parser.add_argument("--raw-data-file", type=str, default="")
+ parser.add_argument("--processed-data-file", type=str, default="")
+ args = parser.parse_args()
+
+ loadDataset(
+ args.data_set,
+ args.max_ind_range,
+ args.data_sub_sample_rate,
+ args.data_randomize,
+ "train",
+ args.raw_data_file,
+ args.processed_data_file,
+ args.memory_map
+ )
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_data_pytorch.py b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_data_pytorch.py
new file mode 100644
index 00000000000..f6f30f8e663
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_data_pytorch.py
@@ -0,0 +1,575 @@
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Description: generate inputs and targets for the dlrm benchmark
+# The inpts and outputs are generated according to the following three option(s)
+# 1) random distribution
+# 2) synthetic distribution, based on unique accesses and distances between them
+# i) R. Hassan, A. Harris, N. Topham and A. Efthymiou "Synthetic Trace-Driven
+# Simulation of Cache Memory", IEEE AINAM'07
+# 3) public data set
+# i) Criteo Kaggle Display Advertising Challenge Dataset
+# https://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset
+# ii) Criteo Terabyte Dataset
+# https://labs.criteo.com/2013/12/download-terabyte-click-logs
+
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+# others
+from os import path
+import sys
+
+import data_utils
+
+# numpy
+import numpy as np
+from numpy import random as ra
+
+
+# pytorch
+import torch
+from torch.utils.data import Dataset, RandomSampler
+
+import data_loader_terabyte
+
+
+# Kaggle Display Advertising Challenge Dataset
+# dataset (str): name of dataset (Kaggle or Terabyte)
+# randomize (str): determines randomization scheme
+# "none": no randomization
+# "day": randomizes each day"s data (only works if split = True)
+# "total": randomizes total dataset
+# split (bool) : to split into train, test, validation data-sets
+class CriteoDataset(Dataset):
+
+ def __init__(
+ self,
+ dataset,
+ max_ind_range,
+ sub_sample_rate,
+ randomize,
+ split="train",
+ raw_path="",
+ pro_data="",
+ memory_map=False,
+ dataset_multiprocessing=False,
+ ):
+ # dataset
+ # tar_fea = 1 # single target
+ den_fea = 13 # 13 dense features
+ # spa_fea = 26 # 26 sparse features
+ # tad_fea = tar_fea + den_fea
+ # tot_fea = tad_fea + spa_fea
+ if dataset == "kaggle":
+ days = 7
+ out_file = "kaggleAdDisplayChallenge_processed"
+ elif dataset == "terabyte":
+ days = 24
+ out_file = "terabyte_processed"
+ else:
+ raise(ValueError("Data set option is not supported"))
+ self.max_ind_range = max_ind_range
+ self.memory_map = memory_map
+
+ # split the datafile into path and filename
+ lstr = raw_path.split("/")
+ self.d_path = "/".join(lstr[0:-1]) + "/"
+ self.d_file = lstr[-1].split(".")[0] if dataset == "kaggle" else lstr[-1]
+ self.npzfile = self.d_path + (
+ (self.d_file + "_day") if dataset == "kaggle" else self.d_file
+ )
+ self.trafile = self.d_path + (
+ (self.d_file + "_fea") if dataset == "kaggle" else "fea"
+ )
+
+ # check if pre-processed data is available
+ data_ready = True
+ if memory_map:
+ for i in range(days):
+ reo_data = self.npzfile + "_{0}_reordered.npz".format(i)
+ if not path.exists(str(reo_data)):
+ data_ready = False
+ else:
+ if not path.exists(str(pro_data)):
+ data_ready = False
+
+ # pre-process data if needed
+ # WARNNING: when memory mapping is used we get a collection of files
+ if data_ready:
+ print("Reading pre-processed data=%s" % (str(pro_data)))
+ file = str(pro_data)
+ else:
+ print("Reading raw data=%s" % (str(raw_path)))
+ file = data_utils.getCriteoAdData(
+ raw_path,
+ out_file,
+ max_ind_range,
+ sub_sample_rate,
+ days,
+ split,
+ randomize,
+ dataset == "kaggle",
+ memory_map,
+ dataset_multiprocessing,
+ )
+
+ # get a number of samples per day
+ total_file = self.d_path + self.d_file + "_day_count.npz"
+ with np.load(total_file) as data:
+ total_per_file = data["total_per_file"]
+ # compute offsets per file
+ self.offset_per_file = np.array([0] + [x for x in total_per_file])
+ for i in range(days):
+ self.offset_per_file[i + 1] += self.offset_per_file[i]
+ # print(self.offset_per_file)
+
+ # setup data
+ if memory_map:
+ # setup the training/testing split
+ self.split = split
+ if split == 'none' or split == 'train':
+ self.day = 0
+ self.max_day_range = days if split == 'none' else days - 1
+ elif split == 'test' or split == 'val':
+ self.day = days - 1
+ num_samples = self.offset_per_file[days] - \
+ self.offset_per_file[days - 1]
+ self.test_size = int(np.ceil(num_samples / 2.))
+ self.val_size = num_samples - self.test_size
+ else:
+ sys.exit("ERROR: dataset split is neither none, nor train or test.")
+
+ '''
+ # text
+ print("text")
+ for i in range(days):
+ fi = self.npzfile + "_{0}".format(i)
+ with open(fi) as data:
+ ttt = 0; nnn = 0
+ for _j, line in enumerate(data):
+ ttt +=1
+ if np.int32(line[0]) > 0:
+ nnn +=1
+ print("day=" + str(i) + " total=" + str(ttt) + " non-zeros="
+ + str(nnn) + " ratio=" +str((nnn * 100.) / ttt) + "%")
+ # processed
+ print("processed")
+ for i in range(days):
+ fi = self.npzfile + "_{0}_processed.npz".format(i)
+ with np.load(fi) as data:
+ yyy = data["y"]
+ ttt = len(yyy)
+ nnn = np.count_nonzero(yyy)
+ print("day=" + str(i) + " total=" + str(ttt) + " non-zeros="
+ + str(nnn) + " ratio=" +str((nnn * 100.) / ttt) + "%")
+ # reordered
+ print("reordered")
+ for i in range(days):
+ fi = self.npzfile + "_{0}_reordered.npz".format(i)
+ with np.load(fi) as data:
+ yyy = data["y"]
+ ttt = len(yyy)
+ nnn = np.count_nonzero(yyy)
+ print("day=" + str(i) + " total=" + str(ttt) + " non-zeros="
+ + str(nnn) + " ratio=" +str((nnn * 100.) / ttt) + "%")
+ '''
+
+ # load unique counts
+ with np.load(self.d_path + self.d_file + "_fea_count.npz") as data:
+ self.counts = data["counts"]
+ self.m_den = den_fea # X_int.shape[1]
+ self.n_emb = len(self.counts)
+ print("Sparse features= %d, Dense features= %d" % (self.n_emb, self.m_den))
+
+ # Load the test data
+ # Only a single day is used for testing
+ if self.split == 'test' or self.split == 'val':
+ # only a single day is used for testing
+ fi = self.npzfile + "_{0}_reordered.npz".format(
+ self.day
+ )
+ with np.load(fi) as data:
+ self.X_int = data["X_int"] # continuous feature
+ self.X_cat = data["X_cat"] # categorical feature
+ self.y = data["y"] # target
+
+ else:
+ # load and preprocess data
+ with np.load(file) as data:
+ X_int = data["X_int"] # continuous feature
+ X_cat = data["X_cat"] # categorical feature
+ y = data["y"] # target
+ self.counts = data["counts"]
+ self.m_den = X_int.shape[1] # den_fea
+ self.n_emb = len(self.counts)
+ print("Sparse fea = %d, Dense fea = %d" % (self.n_emb, self.m_den))
+
+ # create reordering
+ indices = np.arange(len(y))
+
+ if split == "none":
+ # randomize all data
+ if randomize == "total":
+ indices = np.random.permutation(indices)
+ print("Randomized indices...")
+
+ X_int[indices] = X_int
+ X_cat[indices] = X_cat
+ y[indices] = y
+
+ else:
+ indices = np.array_split(indices, self.offset_per_file[1:-1])
+
+ # randomize train data (per day)
+ if randomize == "day": # or randomize == "total":
+ for i in range(len(indices) - 1):
+ indices[i] = np.random.permutation(indices[i])
+ print("Randomized indices per day ...")
+
+ train_indices = np.concatenate(indices[:-1])
+ test_indices = indices[-1]
+ test_indices, val_indices = np.array_split(test_indices, 2)
+
+ print("Defined %s indices..." % (split))
+
+ # randomize train data (across days)
+ if randomize == "total":
+ train_indices = np.random.permutation(train_indices)
+ print("Randomized indices across days ...")
+
+ # create training, validation, and test sets
+ if split == 'train':
+ self.X_int = [X_int[i] for i in train_indices]
+ self.X_cat = [X_cat[i] for i in train_indices]
+ self.y = [y[i] for i in train_indices]
+ elif split == 'val':
+ self.X_int = [X_int[i] for i in val_indices]
+ self.X_cat = [X_cat[i] for i in val_indices]
+ self.y = [y[i] for i in val_indices]
+ elif split == 'test':
+ self.X_int = [X_int[i] for i in test_indices]
+ self.X_cat = [X_cat[i] for i in test_indices]
+ self.y = [y[i] for i in test_indices]
+
+ print("Split data according to indices...")
+
+ def __getitem__(self, index):
+
+ if isinstance(index, slice):
+ return [
+ self[idx] for idx in range(
+ index.start or 0, index.stop or len(self), index.step or 1
+ )
+ ]
+
+ if self.memory_map:
+ if self.split == 'none' or self.split == 'train':
+ # check if need to switch to next day and load data
+ if index == self.offset_per_file[self.day]:
+ # print("day_boundary switch", index)
+ self.day_boundary = self.offset_per_file[self.day]
+ fi = self.npzfile + "_{0}_reordered.npz".format(
+ self.day
+ )
+ # print('Loading file: ', fi)
+ with np.load(fi) as data:
+ self.X_int = data["X_int"] # continuous feature
+ self.X_cat = data["X_cat"] # categorical feature
+ self.y = data["y"] # target
+ self.day = (self.day + 1) % self.max_day_range
+
+ i = index - self.day_boundary
+ elif self.split == 'test' or self.split == 'val':
+ # only a single day is used for testing
+ i = index + (0 if self.split == 'test' else self.test_size)
+ else:
+ sys.exit("ERROR: dataset split is neither none, nor train or test.")
+ else:
+ i = index
+
+ if self.max_ind_range > 0:
+ return self.X_int[i], self.X_cat[i] % self.max_ind_range, self.y[i]
+ else:
+ return self.X_int[i], self.X_cat[i], self.y[i]
+
+ def _default_preprocess(self, X_int, X_cat, y):
+ X_int = torch.log(torch.tensor(X_int, dtype=torch.float) + 1)
+ if self.max_ind_range > 0:
+ X_cat = torch.tensor(X_cat % self.max_ind_range, dtype=torch.long)
+ else:
+ X_cat = torch.tensor(X_cat, dtype=torch.long)
+ y = torch.tensor(y.astype(np.float32))
+
+ return X_int, X_cat, y
+
+ def __len__(self):
+ if self.memory_map:
+ if self.split == 'none':
+ return self.offset_per_file[-1]
+ elif self.split == 'train':
+ return self.offset_per_file[-2]
+ elif self.split == 'test':
+ return self.test_size
+ elif self.split == 'val':
+ return self.val_size
+ else:
+ sys.exit("ERROR: dataset split is neither none, nor train nor test.")
+ else:
+ return len(self.y)
+
+
+def collate_wrapper_criteo_offset(list_of_tuples):
+ # where each tuple is (X_int, X_cat, y)
+ transposed_data = list(zip(*list_of_tuples))
+ X_int = torch.log(torch.tensor(transposed_data[0], dtype=torch.float) + 1)
+ X_cat = torch.tensor(transposed_data[1], dtype=torch.long)
+ T = torch.tensor(transposed_data[2], dtype=torch.float32).view(-1, 1)
+
+ batchSize = X_cat.shape[0]
+ featureCnt = X_cat.shape[1]
+
+ lS_i = [X_cat[:, i] for i in range(featureCnt)]
+ lS_o = [torch.tensor(range(batchSize)) for _ in range(featureCnt)]
+
+ return X_int, torch.stack(lS_o), torch.stack(lS_i), T
+
+
+def ensure_dataset_preprocessed(args, d_path):
+ _ = CriteoDataset(
+ args.data_set,
+ args.max_ind_range,
+ args.data_sub_sample_rate,
+ args.data_randomize,
+ "train",
+ args.raw_data_file,
+ args.processed_data_file,
+ args.memory_map,
+ args.dataset_multiprocessing
+ )
+
+ _ = CriteoDataset(
+ args.data_set,
+ args.max_ind_range,
+ args.data_sub_sample_rate,
+ args.data_randomize,
+ "test",
+ args.raw_data_file,
+ args.processed_data_file,
+ args.memory_map,
+ args.dataset_multiprocessing
+ )
+
+ for split in ['train', 'val', 'test']:
+ print('Running preprocessing for split =', split)
+
+ train_files = ['{}_{}_reordered.npz'.format(args.raw_data_file, day)
+ for
+ day in range(0, 23)]
+
+ test_valid_file = args.raw_data_file + '_23_reordered.npz'
+
+ output_file = d_path + '_{}.bin'.format(split)
+
+ input_files = train_files if split == 'train' else [test_valid_file]
+ data_loader_terabyte.numpy_to_binary(input_files=input_files,
+ output_file_path=output_file,
+ split=split)
+
+
+# Conversion from offset to length
+def offset_to_length_converter(lS_o, lS_i):
+ def diff(tensor):
+ return tensor[1:] - tensor[:-1]
+
+ return torch.stack(
+ [
+ diff(torch.cat((S_o, torch.tensor(lS_i[ind].shape))).int())
+ for ind, S_o in enumerate(lS_o)
+ ]
+ )
+
+
+def collate_wrapper_criteo_length(list_of_tuples):
+ # where each tuple is (X_int, X_cat, y)
+ transposed_data = list(zip(*list_of_tuples))
+ X_int = torch.log(torch.tensor(transposed_data[0], dtype=torch.float) + 1)
+ X_cat = torch.tensor(transposed_data[1], dtype=torch.long)
+ T = torch.tensor(transposed_data[2], dtype=torch.float32).view(-1, 1)
+
+ batchSize = X_cat.shape[0]
+ featureCnt = X_cat.shape[1]
+
+ lS_i = torch.stack([X_cat[:, i] for i in range(featureCnt)])
+ lS_o = torch.stack(
+ [torch.tensor(range(batchSize)) for _ in range(featureCnt)]
+ )
+
+ lS_l = offset_to_length_converter(lS_o, lS_i)
+
+ return X_int, lS_l, lS_i, T
+
+
+def make_criteo_data_and_loaders(args, offset_to_length_converter=False):
+ if args.memory_map and args.data_set == "terabyte":
+ # more efficient for larger batches
+ data_directory = path.dirname(args.raw_data_file)
+
+ if args.mlperf_bin_loader:
+ lstr = args.processed_data_file.split("/")
+ d_path = "/".join(lstr[0:-1]) + "/" + lstr[-1].split(".")[0]
+ train_file = d_path + "_train.bin"
+ test_file = d_path + "_test.bin"
+ # val_file = d_path + "_val.bin"
+ counts_file = args.raw_data_file + '_fea_count.npz'
+ if any(not path.exists(p) for p in [train_file,
+ test_file,
+ counts_file]):
+ ensure_dataset_preprocessed(args, d_path)
+
+ train_data = data_loader_terabyte.CriteoBinDataset(
+ data_file=train_file,
+ counts_file=counts_file,
+ batch_size=args.mini_batch_size,
+ max_ind_range=args.max_ind_range
+ )
+
+ train_loader = torch.utils.data.DataLoader(
+ train_data,
+ batch_size=None,
+ batch_sampler=None,
+ shuffle=False,
+ num_workers=0,
+ collate_fn=None,
+ pin_memory=False,
+ drop_last=False,
+ sampler=RandomSampler(train_data) if args.mlperf_bin_shuffle else None
+ )
+
+ test_data = data_loader_terabyte.CriteoBinDataset(
+ data_file=test_file,
+ counts_file=counts_file,
+ batch_size=args.test_mini_batch_size,
+ max_ind_range=args.max_ind_range
+ )
+
+ test_loader = torch.utils.data.DataLoader(
+ test_data,
+ batch_size=None,
+ batch_sampler=None,
+ shuffle=False,
+ num_workers=0,
+ collate_fn=None,
+ pin_memory=False,
+ drop_last=False,
+ )
+ else:
+ data_filename = args.raw_data_file.split("/")[-1]
+
+ train_data = CriteoDataset(
+ args.data_set,
+ args.max_ind_range,
+ args.data_sub_sample_rate,
+ args.data_randomize,
+ "train",
+ args.raw_data_file,
+ args.processed_data_file,
+ args.memory_map,
+ args.dataset_multiprocessing
+ )
+
+ test_data = CriteoDataset(
+ args.data_set,
+ args.max_ind_range,
+ args.data_sub_sample_rate,
+ args.data_randomize,
+ "test",
+ args.raw_data_file,
+ args.processed_data_file,
+ args.memory_map,
+ args.dataset_multiprocessing
+ )
+
+ train_loader = data_loader_terabyte.DataLoader(
+ data_directory=data_directory,
+ data_filename=data_filename,
+ days=list(range(23)),
+ batch_size=args.mini_batch_size,
+ max_ind_range=args.max_ind_range,
+ split="train"
+ )
+
+ test_loader = data_loader_terabyte.DataLoader(
+ data_directory=data_directory,
+ data_filename=data_filename,
+ days=[23],
+ batch_size=args.test_mini_batch_size,
+ max_ind_range=args.max_ind_range,
+ split="test"
+ )
+ else:
+ train_data = CriteoDataset(
+ args.data_set,
+ args.max_ind_range,
+ args.data_sub_sample_rate,
+ args.data_randomize,
+ "train",
+ args.raw_data_file,
+ args.processed_data_file,
+ args.memory_map,
+ args.dataset_multiprocessing,
+ )
+
+ test_data = CriteoDataset(
+ args.data_set,
+ args.max_ind_range,
+ args.data_sub_sample_rate,
+ args.data_randomize,
+ "test",
+ args.raw_data_file,
+ args.processed_data_file,
+ args.memory_map,
+ args.dataset_multiprocessing,
+ )
+
+ collate_wrapper_criteo = collate_wrapper_criteo_offset
+ if offset_to_length_converter:
+ collate_wrapper_criteo = collate_wrapper_criteo_length
+
+ train_loader = torch.utils.data.DataLoader(
+ train_data,
+ batch_size=args.mini_batch_size,
+ shuffle=False,
+ num_workers=args.num_workers,
+ collate_fn=collate_wrapper_criteo,
+ pin_memory=False,
+ drop_last=False, # True
+ )
+
+ test_loader = torch.utils.data.DataLoader(
+ test_data,
+ batch_size=args.test_mini_batch_size,
+ shuffle=False,
+ num_workers=args.test_num_workers,
+ collate_fn=collate_wrapper_criteo,
+ pin_memory=False,
+ drop_last=False, # True
+ )
+
+ return train_data, train_loader, test_data, test_loader
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_s_pytorch.py b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_s_pytorch.py
new file mode 100644
index 00000000000..2af63ea4b98
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_s_pytorch.py
@@ -0,0 +1,1154 @@
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Description: an implementation of a deep learning recommendation model (DLRM)
+# The model input consists of dense and sparse features. The former is a vector
+# of floating point values. The latter is a list of sparse indices into
+# embedding tables, which consist of vectors of floating point values.
+# The selected vectors are passed to mlp networks denoted by triangles,
+# in some cases the vectors are interacted through operators (Ops).
+#
+# output:
+# vector of values
+# model: |
+# /\
+# /__\
+# |
+# _____________________> Op <___________________
+# / | \
+# /\ /\ /\
+# /__\ /__\ ... /__\
+# | | |
+# | Op Op
+# | ____/__\_____ ____/__\____
+# | |_Emb_|____|__| ... |_Emb_|__|___|
+# input:
+# [ dense features ] [sparse indices] , ..., [sparse indices]
+#
+# More precise definition of model layers:
+# 1) fully connected layers of an mlp
+# z = f(y)
+# y = Wx + b
+#
+# 2) embedding lookup (for a list of sparse indices p=[p1,...,pk])
+# z = Op(e1,...,ek)
+# obtain vectors e1=E[:,p1], ..., ek=E[:,pk]
+#
+# 3) Operator Op can be one of the following
+# Sum(e1,...,ek) = e1 + ... + ek
+# Dot(e1,...,ek) = [e1'e1, ..., e1'ek, ..., ek'e1, ..., ek'ek]
+# Cat(e1,...,ek) = [e1', ..., ek']'
+# where ' denotes transpose operation
+#
+# References:
+# [1] Maxim Naumov, Dheevatsa Mudigere, Hao-Jun Michael Shi, Jianyu Huang,
+# Narayanan Sundaram, Jongsoo Park, Xiaodong Wang, Udit Gupta, Carole-Jean Wu,
+# Alisson G. Azzolini, Dmytro Dzhulgakov, Andrey Mallevich, Ilia Cherniavskii,
+# Yinghai Lu, Raghuraman Krishnamoorthi, Ansha Yu, Volodymyr Kondratenko,
+# Stephanie Pereira, Xianjie Chen, Wenlin Chen, Vijay Rao, Bill Jia, Liang Xiong,
+# Misha Smelyanskiy, "Deep Learning Recommendation Model for Personalization and
+# Recommendation Systems", CoRR, arXiv:1906.00091, 2019
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import argparse
+
+# miscellaneous
+import builtins
+import datetime
+import sys
+import time
+
+
+# data generation
+import dlrm_data_pytorch as dp
+
+# numpy
+import numpy as np
+import sklearn.metrics
+
+# pytorch
+import torch
+import torch.nn as nn
+from torch._ops import ops
+from torch.autograd.profiler import record_function
+from torch.nn.parallel.parallel_apply import parallel_apply
+from torch.nn.parallel.replicate import replicate
+from torch.nn.parallel.scatter_gather import gather, scatter
+from torch.nn.parameter import Parameter
+from torch.optim.lr_scheduler import _LRScheduler
+from torch.utils import ThroughputBenchmark
+# For distributed run
+import extend_distributed as ext_dist
+
+
+try:
+ import intel_extension_for_pytorch as ipex
+except:
+ assert False, "please install intel-extension-for-pytorch, support version higher than 1.10"
+
+
+exc = getattr(builtins, "IOError", "FileNotFoundError")
+
+def freeze(model):
+ return torch.jit._recursive.wrap_cpp_module(torch._C._freeze_module(model._c, preserveParameters=True))
+
+
+def time_wrap():
+ return time.time()
+
+
+def dlrm_wrap(X, *emb_args):
+ with record_function("DLRM forward"):
+ return dlrm(X, *emb_args)
+
+
+def loss_fn_wrap(Z, T):
+ with record_function("DLRM loss compute"):
+ return dlrm.loss_fn(Z, T)
+
+# The following function is a wrapper to avoid checking this multiple times in th
+# loop below.
+def unpack_batch(b):
+ # Experiment with unweighted samples
+ return b[0], b[1], b[2], b[3], torch.ones(b[3].size()), None
+
+
+class LRPolicyScheduler(_LRScheduler):
+ def __init__(self, optimizer, num_warmup_steps, decay_start_step, num_decay_steps):
+ self.num_warmup_steps = num_warmup_steps
+ self.decay_start_step = decay_start_step
+ self.decay_end_step = decay_start_step + num_decay_steps
+ self.num_decay_steps = num_decay_steps
+
+ if self.decay_start_step < self.num_warmup_steps:
+ sys.exit("Learning rate warmup must finish before the decay starts")
+
+ super(LRPolicyScheduler, self).__init__(optimizer)
+
+ def get_lr(self):
+ step_count = self._step_count
+ if step_count < self.num_warmup_steps:
+ # warmup
+ scale = 1.0 - (self.num_warmup_steps - step_count) / self.num_warmup_steps
+ lr = [base_lr * scale for base_lr in self.base_lrs]
+ self.last_lr = lr
+ elif self.decay_start_step <= step_count and step_count < self.decay_end_step:
+ # decay
+ decayed_steps = step_count - self.decay_start_step
+ scale = ((self.num_decay_steps - decayed_steps) / self.num_decay_steps) ** 2
+ min_lr = 0.0000001
+ lr = [max(min_lr, base_lr * scale) for base_lr in self.base_lrs]
+ self.last_lr = lr
+ else:
+ if self.num_decay_steps > 0:
+ # freeze at last, either because we're after decay
+ # or because we're between warmup and decay
+ lr = self.last_lr
+ else:
+ # do not adjust
+ lr = self.base_lrs
+ return lr
+
+
+### define dlrm in PyTorch ###
+class DLRM_Net(nn.Module):
+ def create_mlp(self, ln, sigmoid_layer):
+ # build MLP layer by layer
+ layers = nn.ModuleList()
+ for i in range(0, ln.size - 1):
+ n = ln[i]
+ m = ln[i + 1]
+
+ # construct fully connected operator
+ LL = nn.Linear(int(n), int(m), bias=True)
+
+ # initialize the weights
+ # with torch.no_grad():
+ # custom Xavier input, output or two-sided fill
+ mean = 0.0 # std_dev = np.sqrt(variance)
+ std_dev = np.sqrt(2 / (m + n)) # np.sqrt(1 / m) # np.sqrt(1 / n)
+ W = np.random.normal(mean, std_dev, size=(m, n)).astype(np.float32)
+ std_dev = np.sqrt(1 / m) # np.sqrt(2 / (m + 1))
+ bt = np.random.normal(mean, std_dev, size=m).astype(np.float32)
+ # approach 1
+ LL.weight.data = torch.tensor(W, requires_grad=True)
+ LL.bias.data = torch.tensor(bt, requires_grad=True)
+ # approach 2
+ # LL.weight.data.copy_(torch.tensor(W))
+ # LL.bias.data.copy_(torch.tensor(bt))
+ # approach 3
+ # LL.weight = Parameter(torch.tensor(W),requires_grad=True)
+ # LL.bias = Parameter(torch.tensor(bt),requires_grad=True)
+ layers.append(LL)
+
+ # construct sigmoid or relu operator
+ if i == sigmoid_layer:
+ layers.append(nn.Sigmoid())
+ else:
+ layers.append(nn.ReLU())
+
+ # approach 1: use ModuleList
+ # return layers
+ # approach 2: use Sequential container to wrap all layers
+ return torch.nn.Sequential(*layers)
+
+ def create_emb(self, m, ln, local_ln_emb=None):
+ emb_l = nn.ModuleList()
+ n_embs = ln.size if local_ln_emb is None else len(local_ln_emb)
+ for i in range(n_embs):
+ if local_ln_emb is None:
+ n = ln[i]
+ else:
+ n = ln[local_ln_emb[i]]
+ EE = nn.EmbeddingBag(n, m, mode="sum", sparse=True)
+ # initialize embeddings
+ if not args.inference_only:
+ nn.init.uniform_(EE.weight, a=-np.sqrt(1 / n), b=np.sqrt(1 / n))
+ emb_l.append(EE)
+ return emb_l
+
+ def __init__(
+ self,
+ m_spa=None,
+ ln_emb=None,
+ ln_bot=None,
+ ln_top=None,
+ sigmoid_bot=-1,
+ sigmoid_top=-1,
+ weighted_pooling=None,
+ loss_threshold=0.0,
+ ):
+ super(DLRM_Net, self).__init__()
+ self.loss_threshold = loss_threshold
+ #If running distributed, get local slice of embedding tables
+ if ext_dist.my_size > 1:
+ n_emb = len(ln_emb)
+ self.n_global_emb = n_emb
+ self.rank = ext_dist.dist.get_rank()
+ self.ln_emb = [i for i in range(n_emb)]
+ self.n_local_emb, self.n_emb_per_rank = ext_dist.get_split_lengths(n_emb)
+ self.local_ln_emb_slice = ext_dist.get_my_slice(n_emb)
+ self.local_ln_emb = self.ln_emb[self.local_ln_emb_slice]
+ else:
+ self.local_ln_emb = None
+ self.emb_l = self.create_emb(m_spa, ln_emb, self.local_ln_emb)
+ self.bot_l = self.create_mlp(ln_bot, sigmoid_bot)
+ self.top_l = self.create_mlp(ln_top, sigmoid_top)
+ self.loss_fn = torch.nn.BCELoss(reduction="mean")
+
+
+ def apply_mlp(self, x, layers):
+ # approach 1: use ModuleList
+ # for layer in layers:
+ # x = layer(x)
+ # return x
+ # approach 2: use Sequential container to wrap all layers
+ return layers(x)
+
+ def apply_emb(self, emb_l, *emb_args):
+ # WARNING: notice that we are processing the batch at once. We implicitly
+ # assume that the data is laid out such that:
+ # 1. each embedding is indexed with a group of sparse indices,
+ # corresponding to a single lookup
+ # 2. for each embedding the lookups are further organized into a batch
+ # 3. for a list of embedding tables there is a list of batched lookups
+ if isinstance(emb_l, ipex.nn.modules.MergedEmbeddingBagWithSGD):
+ return emb_l(emb_args, self.need_linearize_indices_and_offsets)
+ lS_o, lS_i = emb_args
+ ly = []
+ for k, sparse_index_group_batch in enumerate(lS_i):
+ sparse_offset_group_batch = lS_o[k]
+
+ # embedding lookup
+ # We are using EmbeddingBag, which implicitly uses sum operator.
+ # The embeddings are represented as tall matrices, with sum
+ # happening vertically across 0 axis, resulting in a row vector
+ E = emb_l[k]
+ V = E(
+ sparse_index_group_batch,
+ sparse_offset_group_batch,
+ )
+
+ ly.append(V)
+
+ return ly
+
+ def interact_features(self, x, ly):
+ if args.ipex_interaction:
+ T = [x] + list(ly)
+ R = ipex.nn.functional.interaction(*T)
+ else:
+ # concatenate dense and sparse features
+ (batch_size, d) = x.shape
+ T = torch.cat([x] + ly, dim=1).view((batch_size, -1, d))
+ # perform a dot product
+ Z = torch.bmm(T, torch.transpose(T, 1, 2))
+ # append dense feature with the interactions (into a row vector)
+ # approach 1: all
+ # Zflat = Z.view((batch_size, -1))
+ # approach 2: unique
+ _, ni, nj = Z.shape
+ # approach 1: tril_indices
+ # offset = -1
+ # li, lj = torch.tril_indices(ni, nj, offset=offset)
+ # approach 2: custom
+ offset = 0
+ li = torch.tensor([i for i in range(ni) for j in range(i + offset)])
+ lj = torch.tensor([j for i in range(nj) for j in range(i + offset)])
+ Zflat = Z[:, li, lj]
+ # concatenate dense features and interactions
+ R = torch.cat([x] + [Zflat], dim=1)
+ return R
+
+ def forward(self, dense_x, *emb_args):
+ if ext_dist.my_size > 1:
+ return self.distributed_forward(dense_x, *emb_args)
+ else:
+ return self.sequential_forward(dense_x, *emb_args)
+
+ def distributed_forward(self, dense_x, *emb_args):
+ batch_size = dense_x.size()[0]
+ vector_lenght = self.emb_l.weights[0].size()[1]
+ # WARNING: # of ranks must be <= batch size in distributed_forward call
+ if batch_size < ext_dist.my_size:
+ sys.exit("ERROR: batch_size (%d) must be larger than number of ranks (%d)" % (batch_size, ext_dist.my_size))
+
+ # embeddings
+ ly = self.apply_emb(self.emb_l, *emb_args)
+ a2a_req = ext_dist.alltoall(ly, self.n_emb_per_rank)
+ # bottom mlp
+ x = self.apply_mlp(dense_x, self.bot_l)
+ ly = a2a_req.wait()
+ _ly = []
+ for item in ly:
+ _ly += [item[:, emb_id * vector_lenght: (emb_id + 1) * vector_lenght] for emb_id in range(self.emb_l.n_tables)]
+ # interactions
+ z = self.interact_features(x, _ly)
+ # top mlp
+ p = self.apply_mlp(z, self.top_l)
+ # clamp output if needed
+ if 0.0 < self.loss_threshold and self.loss_threshold < 1.0:
+ z = torch.clamp(
+ p, min=self.loss_threshold, max=(1.0 - self.loss_threshold)
+ )
+ else:
+ z = p
+ return z
+
+
+ def sequential_forward(self, dense_x, *emb_args):
+ # process dense features (using bottom mlp), resulting in a row vector
+ x = self.apply_mlp(dense_x, self.bot_l)
+ # debug prints
+ # print("intermediate")
+ # print(x.detach().cpu().numpy())
+
+ # process sparse features(using embeddings), resulting in a list of row vectors
+ ly = self.apply_emb(self.emb_l, *emb_args)
+ # for y in ly:
+ # print(y.detach().cpu().numpy())
+
+ # interact features (dense and sparse)
+ z = self.interact_features(x, ly)
+ # print(z.detach().cpu().numpy())
+
+ # obtain probability of a click (using top mlp)
+ p = self.apply_mlp(z, self.top_l)
+
+ # clamp output if needed
+ if 0.0 < self.loss_threshold and self.loss_threshold < 1.0:
+ z = torch.clamp(p, min=self.loss_threshold, max=(1.0 - self.loss_threshold))
+ else:
+ z = p
+
+ return z
+
+
+def dash_separated_ints(value):
+ vals = value.split("-")
+ for val in vals:
+ try:
+ int(val)
+ except ValueError:
+ raise argparse.ArgumentTypeError(
+ "%s is not a valid dash separated list of ints" % value
+ )
+
+ return value
+
+
+def trace_or_load_model(args, dlrm, test_ld, inplace=True):
+ dlrm.eval()
+ for j, inputBatch in enumerate(test_ld):
+ X, lS_o, lS_i, _, _, _ = unpack_batch(inputBatch)
+ if args.bf16:
+ # at::GradMode::is_enabled() will query a threadlocal flag
+ # but new thread generate from throughputbench mark will
+ # init this flag to true, so we temporal cast embedding's
+ # weight to bfloat16 for now
+ if args.inference_only:
+ dlrm.emb_l.bfloat16()
+ dlrm = ipex.optimize(dlrm, dtype=torch.bfloat16, inplace=inplace)
+ elif args.int8 and not args.tune:
+ if args.num_cpu_cores != 0:
+ torch.set_num_threads(args.num_cpu_cores)
+ from neural_compressor.torch.quantization import load
+ dlrm = load(args.save_model)
+ elif args.int8 and args.tune:
+ dlrm = dlrm
+ else:
+ dlrm = ipex.optimize(dlrm, dtype=torch.float, inplace=True, auto_kernel_selection=True)
+ with torch.cpu.amp.autocast(enabled=args.bf16):
+ dlrm = torch.jit.trace(dlrm, (X, lS_o, lS_i), check_trace=True)
+ dlrm = torch.jit.freeze(dlrm)
+ dlrm(X, lS_o, lS_i)
+ dlrm(X, lS_o, lS_i)
+ return dlrm
+
+
+def run_throughput_benchmark(args, dlrm, test_ld):
+ bench = ThroughputBenchmark(dlrm)
+ for j, inputBatch in enumerate(test_ld):
+ X, lS_o, lS_i, T, W, CBPP = unpack_batch(inputBatch)
+ bench.add_input(X, lS_o, lS_i)
+ if args.num_batches > 0 and j == args.num_batches:
+ break
+ args.num_batches = args.num_batches if args.num_batches > 0 else j
+ stats = bench.benchmark(
+ num_calling_threads=args.share_weight_instance,
+ num_warmup_iters=100,
+ num_iters=args.num_batches * args.share_weight_instance,
+ )
+ print(stats)
+ latency = stats.latency_avg_ms
+ throughput = (1 / latency) * 1000 * test_ld.dataset.batch_size * args.share_weight_instance
+ print("throughput: {:.3f} fps".format(throughput))
+ print("latency: {:.5f} ms".format(1/throughput * 1000))
+ exit(0)
+
+
+def inference(
+ args,
+ dlrm,
+ best_acc_test,
+ best_auc_test,
+ test_ld,
+ trace=True
+):
+ test_accu = 0
+ test_samp = 0
+
+ if args.print_auc:
+ scores = []
+ targets = []
+
+ total_time = 0
+ total_iter = 0
+ if args.inference_only and trace:
+ dlrm = trace_or_load_model(args, dlrm, test_ld)
+ if args.share_weight_instance != 0:
+ run_throughput_benchmark(args, dlrm, test_ld)
+ with torch.cpu.amp.autocast(enabled=args.bf16):
+ for i, testBatch in enumerate(test_ld):
+ should_print = ((i + 1) % args.print_freq == 0 or i + 1 == len(test_ld)) and args.inference_only
+ if should_print:
+ gT = 1000.0 * total_time / total_iter
+ print(
+ "Finished {} it {}/{}, {:.2f} ms/it,".format(
+ "inference", i + 1, len(test_ld), gT
+ ),
+ flush=True,
+ )
+ total_time = 0
+ total_iter = 0
+ # early exit if nbatches was set by the user and was exceeded
+ if args.inference_only and nbatches > 0 and i >= nbatches:
+ break
+
+ X_test, lS_o_test, lS_i_test, T_test, W_test, CBPP_test = unpack_batch(
+ testBatch
+ )
+
+ # forward pass
+
+ if not args.inference_only and isinstance(dlrm.emb_l, ipex.nn.modules.MergedEmbeddingBagWithSGD):
+ n_tables = lS_i_test.shape[0]
+ idx = [lS_i_test[i] for i in range(n_tables)]
+ offset = [lS_o_test[i] for i in range(n_tables)]
+ include_last = [False for i in range(n_tables)]
+ indices, offsets, indices_with_row_offsets = dlrm.emb_l.linearize_indices_and_offsets(idx, offset, include_last)
+
+ start = time_wrap()
+ if not args.inference_only and isinstance(dlrm.emb_l, ipex.nn.modules.MergedEmbeddingBagWithSGD):
+ Z_test = dlrm(X_test, indices, offsets, indices_with_row_offsets)
+ else:
+ Z_test = dlrm(X_test, lS_o_test, lS_i_test)
+
+
+ total_time += (time_wrap() - start)
+ total_iter += 1
+
+ if args.print_auc:
+ S_test = Z_test.detach().cpu().float().numpy() # numpy array
+ T_test = T_test.detach().cpu().float().numpy() # numpy array
+ scores.append(S_test)
+ targets.append(T_test)
+ elif not args.inference_only:
+ with record_function("DLRM accuracy compute"):
+ # compute loss and accuracy
+ S_test = Z_test.detach().cpu().float().numpy() # numpy array
+ T_test = T_test.detach().cpu().float().numpy() # numpy array
+
+ mbs_test = T_test.shape[0] # = mini_batch_size except last
+ A_test = np.sum((np.round(S_test, 0) == T_test).astype(np.uint8))
+
+ test_accu += A_test
+ test_samp += mbs_test
+ else:
+ # do nothing to save time
+ pass
+
+ if args.print_auc:
+ with record_function("DLRM mlperf sklearn metrics compute"):
+ scores = np.concatenate(scores, axis=0)
+ targets = np.concatenate(targets, axis=0)
+
+ metrics = {
+ "recall": lambda y_true, y_score: sklearn.metrics.recall_score(
+ y_true=y_true, y_pred=np.round(y_score)
+ ),
+ "precision": lambda y_true, y_score: sklearn.metrics.precision_score(
+ y_true=y_true, y_pred=np.round(y_score)
+ ),
+ "f1": lambda y_true, y_score: sklearn.metrics.f1_score(
+ y_true=y_true, y_pred=np.round(y_score)
+ ),
+ "ap": sklearn.metrics.average_precision_score,
+ "roc_auc": sklearn.metrics.roc_auc_score,
+ "accuracy": lambda y_true, y_score: sklearn.metrics.accuracy_score(
+ y_true=y_true, y_pred=np.round(y_score)
+ ),
+ }
+
+ validation_results = {}
+ for metric_name, metric_function in metrics.items():
+ validation_results[metric_name] = metric_function(targets, scores)
+ acc_test = validation_results["accuracy"]
+ elif not args.inference_only:
+ acc_test = test_accu / test_samp
+ else:
+ pass
+
+ model_metrics_dict = {
+ "nepochs": args.nepochs,
+ "nbatches": nbatches,
+ "nbatches_test": nbatches_test,
+ }
+ if not args.inference_only:
+ model_metrics_dict["test_acc"] = acc_test
+
+ if args.print_auc:
+ is_best = validation_results["roc_auc"] > best_auc_test
+ if is_best:
+ best_auc_test = validation_results["roc_auc"]
+ model_metrics_dict["test_auc"] = best_auc_test
+ print(
+ "recall {:.4f}, precision {:.4f},".format(
+ validation_results["recall"],
+ validation_results["precision"],
+ )
+ + " f1 {:.4f}, ap {:.4f},".format(
+ validation_results["f1"], validation_results["ap"]
+ )
+ + " auc {:.4f}, best auc {:.4f},".format(
+ validation_results["roc_auc"], best_auc_test
+ )
+ + " accuracy {:3.3f} %, best accuracy {:3.3f} %".format(
+ validation_results["accuracy"] * 100, best_acc_test * 100
+ ),
+ flush=True,
+ )
+ print("Accuracy: {:.34} ".format(validation_results["roc_auc"]))
+ elif not args.inference_only:
+ is_best = acc_test > best_acc_test
+ if is_best:
+ best_acc_test = acc_test
+ print(
+ " accuracy {:3.3f} %, best {:3.3f} %".format(
+ acc_test * 100, best_acc_test * 100
+ ),
+ flush=True,
+ )
+ else:
+ pass
+ if not args.inference_only:
+ return model_metrics_dict, is_best
+ else:
+ return validation_results["roc_auc"]
+
+
+def run():
+ ### parse arguments ###
+ parser = argparse.ArgumentParser(
+ description="Train Deep Learning Recommendation Model (DLRM)"
+ )
+ # model related parameters
+ parser.add_argument("--arch-sparse-feature-size", type=int, default=2)
+ parser.add_argument(
+ "--arch-embedding-size", type=dash_separated_ints, default="4-3-2"
+ )
+ # j will be replaced with the table number
+ parser.add_argument("--arch-mlp-bot", type=dash_separated_ints, default="4-3-2")
+ parser.add_argument("--arch-mlp-top", type=dash_separated_ints, default="4-2-1")
+ # activations and loss
+ parser.add_argument("--activation-function", type=str, default="relu")
+ parser.add_argument("--loss-threshold", type=float, default=0.0) # 1.0e-7
+ parser.add_argument("--round-targets", type=bool, default=False)
+ # data
+ parser.add_argument("--num-batches", type=int, default=0)
+ parser.add_argument("--data-set", type=str, default="kaggle") # or terabyte
+ parser.add_argument("--raw-data-file", type=str, default="")
+ parser.add_argument("--processed-data-file", type=str, default="")
+ parser.add_argument("--max-ind-range", type=int, default=-1)
+ parser.add_argument("--memory-map", action="store_true", default=False)
+ parser.add_argument("--data-sub-sample-rate", type=float, default=0.0) # in [0, 1]
+ parser.add_argument("--data-randomize", type=str, default="total") # or day or none
+ parser.add_argument(
+ "--dataset-multiprocessing",
+ action="store_true",
+ default=False,
+ help="The Kaggle dataset can be multiprocessed in an environment \
+ with more than 7 CPU cores and more than 20 GB of memory. \n \
+ The Terabyte dataset can be multiprocessed in an environment \
+ with more than 24 CPU cores and at least 1 TB of memory.",
+ )
+ # training
+ parser.add_argument("--mini-batch-size", type=int, default=1)
+ parser.add_argument("--nepochs", type=int, default=1)
+ parser.add_argument("--learning-rate", type=float, default=0.01)
+ parser.add_argument("--print-precision", type=int, default=5)
+ parser.add_argument("--numpy-rand-seed", type=int, default=123)
+ # inference
+ parser.add_argument("--inference-only", action="store_true", default=False)
+ # store/load model
+ parser.add_argument("--save-model", type=str, default="")
+ parser.add_argument("--load-model", type=str, default="")
+ # debugging and profiling
+ parser.add_argument("--print-freq", type=int, default=1)
+ parser.add_argument("--test-freq", type=int, default=-1)
+ parser.add_argument("--test-mini-batch-size", type=int, default=-1)
+ parser.add_argument("--print-time", action="store_true", default=False)
+ parser.add_argument("--print-wall-time", action="store_true", default=False)
+ parser.add_argument("--enable-profiling", action="store_true", default=False)
+ # stop at target AUC Terabyte (no subsampling) 0.8025
+ parser.add_argument("--mlperf-auc-threshold", type=float, default=0.0)
+ parser.add_argument("--mlperf-bin-loader", action="store_true", default=False)
+ parser.add_argument("--mlperf-bin-shuffle", action="store_true", default=False)
+ # LR policy
+ parser.add_argument("--lr-num-warmup-steps", type=int, default=0)
+ parser.add_argument("--lr-decay-start-step", type=int, default=0)
+ parser.add_argument("--lr-num-decay-steps", type=int, default=0)
+ # intel
+ parser.add_argument("--print-auc", action="store_true", default=False)
+ parser.add_argument("--should-test", action="store_true", default=False)
+ parser.add_argument("--bf16", action="store_true", default=False)
+ parser.add_argument("--share-weight-instance", type=int, default=0)
+ parser.add_argument("--num-cpu-cores", type=int, default=0)
+ parser.add_argument("--ipex-interaction", action="store_true", default=False)
+ parser.add_argument("--ipex-merged-emb", action="store_true", default=False)
+ parser.add_argument("--num-warmup-iters", type=int, default=1000)
+ parser.add_argument("--int8", action="store_true", default=False)
+ parser.add_argument("--dist-backend", type=str, default="ccl")
+ parser.add_argument("--tune", action="store_true", default=False)
+ parser.add_argument("--benchmark", action="store_true", default=False)
+ parser.add_argument("--accuracy_only", action="store_true", default=False)
+
+ global args
+ global nbatches
+ global nbatches_test
+ args = parser.parse_args()
+ ext_dist.init_distributed(backend=args.dist_backend)
+
+
+ ### some basic setup ###
+ np.random.seed(args.numpy_rand_seed)
+ np.set_printoptions(precision=args.print_precision)
+ torch.set_printoptions(precision=args.print_precision)
+ torch.manual_seed(args.numpy_rand_seed)
+
+ if args.test_mini_batch_size < 0:
+ # if the parameter is not set, use the training batch size
+ args.test_mini_batch_size = args.mini_batch_size
+
+ device = torch.device("cpu")
+ print("Using CPU...")
+
+ ### prepare training data ###
+ ln_bot = np.fromstring(args.arch_mlp_bot, dtype=int, sep="-")
+ # input data
+ train_data, train_ld, test_data, test_ld = dp.make_criteo_data_and_loaders(args)
+ nbatches = args.num_batches if args.num_batches > 0 else len(train_ld)
+ nbatches_test = len(test_ld)
+
+ ln_emb = train_data.counts
+ # enforce maximum limit on number of vectors per embedding
+ if args.max_ind_range > 0:
+ ln_emb = np.array(
+ list(
+ map(
+ lambda x: x if x < args.max_ind_range else args.max_ind_range,
+ ln_emb,
+ )
+ )
+ )
+ else:
+ ln_emb = np.array(ln_emb)
+ m_den = train_data.m_den
+ ln_bot[0] = m_den
+
+ args.ln_emb = ln_emb.tolist()
+
+ ### parse command line arguments ###
+ m_spa = args.arch_sparse_feature_size
+ ln_emb = np.asarray(ln_emb)
+ num_fea = ln_emb.size + 1 # num sparse + num dense features
+
+ m_den_out = ln_bot[ln_bot.size - 1]
+ # approach 1: all
+ # num_int = num_fea * num_fea + m_den_out
+ # approach 2: unique
+ num_int = (num_fea * (num_fea - 1)) // 2 + m_den_out
+
+ arch_mlp_top_adjusted = str(num_int) + "-" + args.arch_mlp_top
+ ln_top = np.fromstring(arch_mlp_top_adjusted, dtype=int, sep="-")
+
+ ### construct the neural network specified above ###
+ # WARNING: to obtain exactly the same initialization for
+ # the weights we need to start from the same random seed.
+ # np.random.seed(args.numpy_rand_seed)
+ global dlrm
+ dlrm = DLRM_Net(
+ m_spa,
+ ln_emb,
+ ln_bot,
+ ln_top,
+ sigmoid_bot=-1,
+ sigmoid_top=ln_top.size - 2,
+ loss_threshold=args.loss_threshold,
+ )
+ if args.ipex_merged_emb:
+ dlrm.emb_l = ipex.nn.modules.MergedEmbeddingBagWithSGD.from_embeddingbag_list(dlrm.emb_l, lr=args.learning_rate)
+ dlrm.need_linearize_indices_and_offsets = torch.BoolTensor([False])
+
+ if not args.inference_only:
+ optimizer = torch.optim.SGD(dlrm.parameters(), lr=args.learning_rate)
+ lr_scheduler = LRPolicyScheduler(
+ optimizer,
+ args.lr_num_warmup_steps,
+ args.lr_decay_start_step,
+ args.lr_num_decay_steps,
+ )
+
+ ### main loop ###
+
+ # training or inference
+ best_acc_test = 0
+ best_auc_test = 0
+ skip_upto_epoch = 0
+ skip_upto_batch = 0
+ total_time = 0
+ total_loss = 0
+ total_iter = 0
+ total_samp = 0
+
+ # Load model is specified
+ if not (args.load_model == ""):
+ print("Loading saved model {}".format(args.load_model))
+ ld_model = torch.load(args.load_model, map_location=torch.device("cpu"))
+ dlrm.load_state_dict(ld_model["state_dict"])
+ ld_j = ld_model["iter"]
+ ld_k = ld_model["epoch"]
+ ld_nepochs = ld_model["nepochs"]
+ ld_nbatches = ld_model["nbatches"]
+ ld_nbatches_test = ld_model["nbatches_test"]
+ ld_train_loss = ld_model["train_loss"]
+ ld_total_loss = ld_model["total_loss"]
+ ld_acc_test = ld_model["test_acc"]
+ if not args.inference_only:
+ optimizer.load_state_dict(ld_model["opt_state_dict"])
+ best_acc_test = ld_acc_test
+ total_loss = ld_total_loss
+ skip_upto_epoch = ld_k # epochs
+ skip_upto_batch = ld_j # batches
+ else:
+ args.print_freq = ld_nbatches
+ args.test_freq = 0
+
+ print(
+ "Saved at: epoch = {:d}/{:d}, batch = {:d}/{:d}, ntbatch = {:d}".format(
+ ld_k, ld_nepochs, ld_j, ld_nbatches, ld_nbatches_test
+ )
+ )
+ print(
+ "Training state: loss = {:.6f}".format(
+ ld_train_loss,
+ )
+ )
+ print("Testing state: accuracy = {:3.3f} %".format(ld_acc_test * 100))
+ del(ld_model)
+
+ ext_dist.barrier()
+ print("time/loss/accuracy (if enabled):")
+
+ if args.tune:
+ # evaluation
+ def eval_func(model):
+ args.int8 = getattr(model, "is_quantized", False)
+ with torch.no_grad():
+ return inference(
+ args,
+ model,
+ best_acc_test,
+ best_auc_test,
+ test_ld,
+ trace=args.int8
+ )
+
+ # calibration
+ def calib_fn(model):
+ calib_iter = 0
+ for X_test, lS_o_test, lS_i_test, T in train_ld:
+ if calib_iter < 100:
+ model(X_test, lS_o_test, lS_i_test)
+ calib_iter += 1
+ else:
+ break
+
+ X_test, lS_o_test, lS_i_test, T = next(iter(train_ld))
+ example_inputs = (X_test, lS_o_test, lS_i_test)
+ assert args.inference_only, "Please set inference_only in arguments"
+ from neural_compressor.torch.quantization import StaticQuantConfig, autotune, TuningConfig
+ tune_config = TuningConfig(config_set=StaticQuantConfig.get_config_set_for_tuning())
+
+ dlrm = autotune(
+ dlrm,
+ tune_config=tune_config,
+ eval_fn=eval_func,
+ run_fn=calib_fn,
+ example_inputs=example_inputs,
+ )
+ dlrm.save(args.save_model)
+ exit(0)
+ if args.benchmark:
+ dlrm = trace_or_load_model(args, dlrm, test_ld, inplace=True)
+ import time
+ X_test, lS_o_test, lS_i_test, T = next(iter(test_ld))
+ total_iters = 100
+ warmup_iters = 5
+ with torch.no_grad():
+ for i in range(total_iters):
+ if i == warmup_iters:
+ start = time.time()
+ dlrm(X_test, lS_o_test, lS_i_test)
+ end = time.time()
+ latency = (end - start) / ((total_iters - warmup_iters) * args.mini_batch_size)
+ throughput = ((total_iters - warmup_iters) * args.mini_batch_size) / (end - start)
+ print('Batch size = {:d}'.format(args.mini_batch_size))
+ print('Latency: {:.3f} ms'.format(latency * 10**3))
+ print('Throughput: {:.3f} samples/sec'.format(throughput))
+ exit(0)
+
+ if args.accuracy_only:
+ with torch.no_grad():
+ inference(
+ args,
+ dlrm,
+ best_acc_test,
+ best_auc_test,
+ test_ld
+ )
+ exit(0)
+
+
+ if args.bf16 and not args.inference_only:
+ for j, inputBatch in enumerate(train_ld):
+ X, lS_o, lS_i, T, W, CBPP = unpack_batch(inputBatch)
+ if ext_dist.my_size > 1:
+ local_bs = X.size()[0] // ext_dist.my_size
+ rank_id = dlrm.rank
+ X = X[rank_id * local_bs: (rank_id + 1) * local_bs]
+ T = T[rank_id * local_bs: (rank_id + 1) * local_bs]
+ global_bs = local_bs * ext_dist.my_size
+ lS_o = lS_o[:, :global_bs]
+ lS_i = lS_i[:, :global_bs]
+
+ if isinstance(dlrm.emb_l, ipex.nn.modules.MergedEmbeddingBagWithSGD):
+ if ext_dist.my_size > 1:
+ batch_size = X.size()[0]
+ g_i = lS_i[dlrm.local_ln_emb]
+ g_o = lS_o[dlrm.local_ln_emb]
+ n_tables = g_i.shape[0]
+ idx = [g_i[i] for i in range(n_tables)]
+ offset = [g_o[i] for i in range(n_tables)]
+ include_last = [False for i in range(n_tables)]
+ indices, offsets, indices_with_row_offsets = dlrm.emb_l.linearize_indices_and_offsets(idx, offset, include_last)
+ else:
+ n_tables = lS_i.shape[0]
+ idx = [lS_i[i] for i in range(n_tables)]
+ offset = [lS_o[i] for i in range(n_tables)]
+ include_last = [False for i in range(n_tables)]
+ indices, offsets, indices_with_row_offsets = dlrm.emb_l.linearize_indices_and_offsets(idx, offset, include_last)
+ if isinstance(dlrm.emb_l, ipex.nn.modules.MergedEmbeddingBagWithSGD):
+ sample_input = (X, indices, offsets, indices_with_row_offsets)
+ else:
+ sample_input = (X, lS_o, lS_i)
+ break
+ dlrm, optimizer = ipex.optimize(dlrm, dtype=torch.bfloat16, optimizer=optimizer, inplace=True, sample_input=sample_input)
+
+ if args.ipex_merged_emb:
+ dlrm.emb_l.to_bfloat16_train()
+ for i in range(len(dlrm.top_l)):
+ if isinstance(dlrm.top_l[i], ipex.nn.utils._weight_prepack._IPEXLinear):
+ if isinstance(dlrm.top_l[i+1], torch.nn.ReLU):
+ dlrm.top_l[i] = ipex.nn.modules.IPEXLinearEltwise(dlrm.top_l[i], 'relu')
+ else:
+ dlrm.top_l[i] = ipex.nn.modules.IPEXLinearEltwise(dlrm.top_l[i], 'sigmoid')
+ dlrm.top_l[i + 1] = torch.nn.Identity()
+ for i in range(len(dlrm.bot_l)):
+ if isinstance(dlrm.bot_l[i], ipex.nn.utils._weight_prepack._IPEXLinear):
+ if isinstance(dlrm.bot_l[i+1], torch.nn.ReLU):
+ dlrm.bot_l[i] = ipex.nn.modules.IPEXLinearEltwise(dlrm.bot_l[i], 'relu')
+ else:
+ dlrm.bot_l[i] = ipex.nn.modules.IPEXLinearEltwise(dlrm.bot_l[i], 'sigmoid')
+ dlrm.bot_l[i + 1] = torch.nn.Identity()
+
+ if ext_dist.my_size > 1:
+ dlrm.bot_l = ext_dist.DDP(dlrm.bot_l)
+ dlrm.top_l = ext_dist.DDP(dlrm.top_l)
+ training_record = [0, 0]
+ def update_training_performance(time, iters, training_record=training_record):
+ if iters > args.num_warmup_iters:
+ training_record[0] += time
+ training_record[1] += 1
+
+ def print_training_performance(training_record=training_record):
+ if training_record[0] == 0:
+ print("num-batches larger than warm up iters, please increase num-batches or decrease warmup iters")
+ exit()
+ total_samples = training_record[1] * args.mini_batch_size
+ throughput = total_samples / training_record[0] * 1000
+ print("throughput: {:.3f} fps".format(throughput))
+
+ test_freq = args.test_freq if args.test_freq != -1 else nbatches // 20
+ with torch.autograd.profiler.profile(
+ enabled=args.enable_profiling, use_cuda=False, record_shapes=False
+ ) as prof:
+ if not args.inference_only:
+ k = 0
+ while k < args.nepochs:
+
+ if k < skip_upto_epoch:
+ continue
+
+ for j, inputBatch in enumerate(train_ld):
+
+ if j < skip_upto_batch:
+ continue
+
+ X, lS_o, lS_i, T, W, CBPP = unpack_batch(inputBatch)
+ if ext_dist.my_size > 1:
+ local_bs = X.size()[0] // ext_dist.my_size
+ rank_id = dlrm.rank
+ X = X[rank_id * local_bs: (rank_id + 1) * local_bs]
+ T = T[rank_id * local_bs: (rank_id + 1) * local_bs]
+ global_bs = local_bs * ext_dist.my_size
+ lS_o = lS_o[:, :global_bs]
+ lS_i = lS_i[:, :global_bs]
+
+ if isinstance(dlrm.emb_l, ipex.nn.modules.MergedEmbeddingBagWithSGD):
+ if ext_dist.my_size > 1:
+ batch_size = X.size()[0]
+ g_i = lS_i[dlrm.local_ln_emb]
+ g_o = lS_o[dlrm.local_ln_emb]
+ n_tables = g_i.shape[0]
+ idx = [g_i[i] for i in range(n_tables)]
+ offset = [g_o[i] for i in range(n_tables)]
+ include_last = [False for i in range(n_tables)]
+ indices, offsets, indices_with_row_offsets = dlrm.emb_l.linearize_indices_and_offsets(idx, offset, include_last)
+ else:
+ n_tables = lS_i.shape[0]
+ idx = [lS_i[i] for i in range(n_tables)]
+ offset = [lS_o[i] for i in range(n_tables)]
+ include_last = [False for i in range(n_tables)]
+ indices, offsets, indices_with_row_offsets = dlrm.emb_l.linearize_indices_and_offsets(idx, offset, include_last)
+
+ t1 = time_wrap()
+
+ # early exit if nbatches was set by the user and has been exceeded
+ if nbatches > 0 and j >= nbatches:
+ break
+
+ mbs = T.shape[0] # = args.mini_batch_size except maybe for last
+
+ # forward pass
+ with torch.cpu.amp.autocast(enabled=args.bf16):
+ if isinstance(dlrm.emb_l, ipex.nn.modules.MergedEmbeddingBagWithSGD):
+ Z = dlrm_wrap(
+ X,
+ indices,
+ offsets,
+ indices_with_row_offsets
+ ).float()
+ else:
+ Z = dlrm_wrap(
+ X,
+ lS_o,
+ lS_i,
+ ).float()
+
+ # loss
+ E = loss_fn_wrap(Z, T)
+
+ # compute loss and accuracy
+ L = E.detach().cpu().numpy() # numpy array
+
+ with record_function("DLRM backward"):
+ # scaled error gradient propagation
+ # (where we do not accumulate gradients across mini-batches)
+ optimizer.zero_grad(set_to_none=True)
+ # backward pass
+ E.backward()
+
+ with record_function("DLRM update"):
+ # optimizer
+ optimizer.step()
+ lr_scheduler.step()
+ if isinstance(dlrm.emb_l, ipex.nn.modules.MergedEmbeddingBagWithSGD):
+ dlrm.emb_l.sgd_args = dlrm.emb_l.sgd_args._replace(lr=lr_scheduler.get_last_lr()[0])
+
+ t2 = time_wrap()
+ total_time += t2 - t1
+
+ total_loss += L * mbs
+ total_iter += 1
+ total_samp += mbs
+
+ should_print = ((j + 1) % args.print_freq == 0) or (
+ j + 1 == nbatches
+ )
+ should_test = (
+ (args.should_test)
+ and (((j + 1) % test_freq == 0) or (j + 1 == nbatches))
+ )
+
+ # print time, loss and accuracy
+ if should_print or should_test:
+ gT = 1000.0 * total_time / total_iter if args.print_time else -1
+ total_time = 0
+
+ train_loss = total_loss / total_samp
+ total_loss = 0
+
+ str_run_type = (
+ "inference" if args.inference_only else "training"
+ )
+
+ wall_time = ""
+ if args.print_wall_time:
+ wall_time = " ({})".format(time.strftime("%H:%M"))
+
+ print(
+ "Finished {} it {}/{} of epoch {}, {:.2f} ms/it,".format(
+ str_run_type, j + 1, nbatches, k, gT
+ )
+ + " loss {:.6f}".format(train_loss)
+ + wall_time,
+ flush=True,
+ )
+ update_training_performance(gT, j)
+
+ total_iter = 0
+ total_samp = 0
+
+ # testing
+ if should_test:
+ model_metrics_dict, is_best = inference(
+ args,
+ dlrm,
+ best_acc_test,
+ best_auc_test,
+ test_ld,
+ )
+
+ if (
+ is_best
+ and not (args.save_model == "")
+ and not args.inference_only
+ ):
+ model_metrics_dict["epoch"] = k
+ model_metrics_dict["iter"] = j + 1
+ model_metrics_dict["train_loss"] = train_loss
+ model_metrics_dict["total_loss"] = total_loss
+ model_metrics_dict[
+ "opt_state_dict"
+ ] = optimizer.state_dict()
+ print("Saving model to {}".format(args.save_model))
+ torch.save(model_metrics_dict, args.save_model)
+
+ if (
+ (args.mlperf_auc_threshold > 0)
+ and (best_auc_test > args.mlperf_auc_threshold)
+ ):
+ print(
+ "MLPerf testing auc threshold "
+ + str(args.mlperf_auc_threshold)
+ + " reached, stop training"
+ )
+ k += 1 # nepochs
+ else:
+ print("Testing for inference only")
+ with torch.no_grad():
+ inference(
+ args,
+ dlrm,
+ best_acc_test,
+ best_auc_test,
+ test_ld
+ )
+
+ # profiling
+ if not args.inference_only:
+ print_training_performance()
+
+ if args.enable_profiling:
+ time_stamp = str(datetime.datetime.now()).replace(" ", "_")
+ with open("dlrm_s_pytorch" + time_stamp + "_shape.prof", "w") as prof_f:
+ prof_f.write(
+ prof.key_averages(group_by_input_shape=True).table(
+ sort_by="self_cpu_time_total"
+ )
+ )
+ with open("dlrm_s_pytorch" + time_stamp + "_total.prof", "w") as prof_f:
+ prof_f.write(prof.key_averages().table(sort_by="self_cpu_time_total"))
+ prof.export_chrome_trace("dlrm_s_pytorch" + time_stamp + ".json")
+ exit(0)
+
+if __name__ == "__main__":
+ run()
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/extend_distributed.py b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/extend_distributed.py
new file mode 100644
index 00000000000..0b117975b25
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/extend_distributed.py
@@ -0,0 +1,424 @@
+import os
+import builtins
+import numpy as np
+import torch
+from torch.autograd import Function
+from torch.nn.parallel import DistributedDataParallel as DDP
+import torch.distributed as dist
+try:
+ import torch_ccl
+except ImportError as e:
+ #print(e)
+ torch_ccl = False
+
+my_rank = -1
+my_size = -1
+my_local_rank = -1
+my_local_size = -1
+alltoall_supported = False
+allgatherv_supported = False
+a2a_impl = os.environ.get('DLRM_ALLTOALL_IMPL', '')
+
+myreq = None
+
+def env2int(env_list, default = -1):
+ for e in env_list:
+ val = int(os.environ.get(e, -1))
+ if val >= 0: return val
+ return default
+
+def get_my_slice(n):
+ my_size = dist.get_world_size()
+ my_rank = dist.get_rank()
+ k, m = divmod(n, my_size)
+ return slice(my_rank * k + min(my_rank, m), (my_rank+1) * k + min(my_rank+1, m), 1)
+
+def get_split_lengths(n):
+ my_size = dist.get_world_size()
+ k, m = divmod(n, my_size)
+ if m == 0:
+ splits = None
+ my_len = k
+ else:
+ my_rank = dist.get_rank()
+ splits = [(k+1) if i < m else k for i in range(my_size)]
+ my_len = splits[my_rank]
+ return (my_len, splits)
+
+def init_distributed(rank = -1, size = -1, backend=''):
+ global myreq
+ #global my_rank
+ global my_size
+ global my_local_rank
+ global my_local_size
+ global a2a_impl
+ global alltoall_supported
+ global allgatherv_supported
+ # guess MPI ranks from env (works for IMPI, OMPI and MVAPICH2)
+ num_mpi_ranks = env2int(['PMI_SIZE', 'OMPI_COMM_WORLD_SIZE', 'MV2_COMM_WORLD_SIZE', 'WORLD_SIZE'])
+ if backend == '' and num_mpi_ranks > 1:
+ if torch_ccl and env2int(['CCL_WORKER_COUNT']) > 0:
+ backend = 'ccl'
+ elif dist.is_mpi_available():
+ backend = 'mpi'
+ else:
+ print("WARNING: MPI multi-process launch detected but PyTorch MPI backend not available.")
+ backend = 'gloo'
+ if backend != '':
+ #guess Rank and size
+ if rank == -1:
+ rank = env2int(['PMI_RANK', 'OMPI_COMM_WORLD_RANK', 'MV2_COMM_WORLD_RANK', 'RANK'], 0)
+ if size == -1:
+ size = env2int(['PMI_SIZE', 'OMPI_COMM_WORLD_SIZE', 'MV2_COMM_WORLD_SIZE', 'WORLD_SIZE'], 1)
+ if not os.environ.get('RANK', None) and rank != -1: os.environ['RANK'] = str(rank)
+ if not os.environ.get('WORLD_SIZE', None) and size != -1: os.environ['WORLD_SIZE'] = str(size)
+ if not os.environ.get('MASTER_PORT', None): os.environ['MASTER_PORT'] = '29500'
+ if not os.environ.get('MASTER_ADDR', None):
+ local_size = env2int(['MPI_LOCALNRANKS', 'OMPI_COMM_WORLD_LOCAL_SIZE', 'MV2_COMM_WORLD_LOCAL_SIZE'], 1)
+ if local_size != size and backend != 'mpi':
+ print("Warning: Looks like distributed multinode run but MASTER_ADDR env not set, using '127.0.0.1' as default")
+ print("If this run hangs, try exporting rank 0's hostname as MASTER_ADDR")
+ os.environ['MASTER_ADDR'] = '127.0.0.1'
+ if size > 1:
+ dist.init_process_group(backend, rank=rank, world_size=size)
+ my_rank = dist.get_rank()
+ my_size = dist.get_world_size()
+ my_local_rank = env2int(['MPI_LOCALRANKID', 'OMPI_COMM_WORLD_LOCAL_RANK', 'MV2_COMM_WORLD_LOCAL_RANK'], 0)
+ my_local_size = env2int(['MPI_LOCALNRANKS', 'OMPI_COMM_WORLD_LOCAL_SIZE', 'MV2_COMM_WORLD_LOCAL_SIZE'], 1)
+ if my_rank == 0: print("Running on %d ranks using %s backend" % (my_size, backend))
+ if backend == 'ccl':
+ print("Using CCL_ATL_TRANSPORT=%s" % os.environ.get('CCL_ATL_TRANSPORT', '(default)'))
+ print("Using CCL_ATL_SHM=%s" % os.environ.get('CCL_ATL_SHM', '(default)'))
+ if hasattr(dist, 'all_to_all_single'):
+ try:
+ # dist.all_to_all_single(torch.empty([0]), torch.empty([0]))
+ alltoall_supported = True
+ except RuntimeError:
+ pass
+ if a2a_impl == 'alltoall' and alltoall_supported == False:
+ print("Requested DLRM_ALLTOALL_IMPL=%s but backend %s does not support it, use scatter/gather based alltoall" % (a2a_impl, backend))
+ a2a_impl = 'scatter'
+ if a2a_impl != '': print("Using DLRM_ALLTOALL_IMPL=%s" % a2a_impl)
+ try:
+ x = torch.ones([my_rank])
+ y = torch.zeros([(my_size*(my_size-1))//2])
+ y = list(y.split([r for r in range(my_size)]))
+ dist.all_gather(y, x)
+ allgatherv_supported = True
+ except RuntimeError:
+ pass
+ else:
+ my_rank = 0
+ my_size = 1
+ my_local_rank = 0
+ my_local_size = 1
+ myreq = Request()
+
+class Request(object):
+ def __init__(self):
+ self.req = None
+ self.tensor = None
+ self.WaitFunction = All2All_Scatter_Wait
+
+ def wait(self):
+ ret = self.WaitFunction.apply(*self.tensor)
+ self.req = None
+ self.tensor = None
+ return ret
+
+class All2All_ScatterList_Req(Function):
+ @staticmethod
+ def forward(ctx, a2ai, *inputs):
+ global myreq
+ my_rank = dist.get_rank()
+ #print("All2All_ScatterList_Req:forward")
+ mb_split_lengths = a2ai.gNS if a2ai.gNS else a2ai.lN
+ emb_split_lengths = a2ai.gSS if a2ai.gSS else [a2ai.lS] * my_size
+ gather_list = []
+ req_list = []
+ for i in range(my_size):
+ for j in range(emb_split_lengths[i]):
+ out_tensor = inputs[0].new_empty([a2ai.lN, a2ai.E])
+ scatter_list = list(inputs[j].split(mb_split_lengths, dim = 0)) if i == my_rank else []
+ req = dist.scatter(out_tensor, scatter_list, src=i, async_op=True)
+ gather_list.append(out_tensor)
+ req_list.append(req)
+ myreq.req = req_list
+ myreq.tensor = tuple(gather_list)
+ myreq.a2ai = a2ai
+ return myreq.tensor
+
+ @staticmethod
+ def backward(ctx, *grad_output):
+ global myreq
+ #print("All2All_ScatterList_Req:backward")
+ for r in myreq.req:
+ r.wait()
+ myreq.req = None
+ grad_inputs = myreq.tensor
+ myreq.tensor = None
+ return (None, *grad_inputs)
+
+
+class All2All_ScatterList_Wait(Function):
+ @staticmethod
+ def forward(ctx, *output):
+ global myreq
+ #print("All2All_Scatter_Wait:forward")
+ ctx.a2ai = myreq.a2ai
+ for r in myreq.req:
+ r.wait()
+ myreq.req = None
+ myreq.tensor = None
+ return output
+
+ @staticmethod
+ def backward(ctx, *grad_output):
+ global myreq
+ my_rank = dist.get_rank()
+ a2ai = ctx.a2ai
+ grad_output = [t.contiguous() for t in grad_output]
+ mb_split_lengths = a2ai.gNS if a2ai.gNS else [a2ai.lN] * my_size
+ per_rank_split_lengths = a2ai.gSS if a2ai.gSS else [a2ai.lS] * my_size
+ grad_inputs = [grad_output[0].new_empty([ctx.a2ai.N, ctx.a2ai.E]) for _ in range(a2ai.lS)]
+ req_list = []
+ ind = 0
+ for i in range(my_size):
+ for j in range(per_rank_split_lengths[i]):
+ gather_list = list(grad_inputs[j].split(mb_split_lengths, dim = 0)) if i == my_rank else None
+ req = dist.gather(grad_output[ind], gather_list, dst = i, async_op=True)
+ req_list.append(req)
+ ind += 1
+ myreq.req = req_list
+ myreq.tensor = grad_inputs
+ return tuple(grad_output)
+
+
+
+class All2All_Scatter_Req(Function):
+ @staticmethod
+ def forward(ctx, a2ai, *inputs):
+ global myreq
+ #print("All2All_Scatter_Req:forward")
+ my_rank = dist.get_rank()
+ mb_split_lengths = a2ai.gNS if a2ai.gNS else a2ai.lN
+ emb_split_lengths = a2ai.gSS if a2ai.gSS else [a2ai.lS] * my_size
+ input = torch.cat(inputs, dim=1)
+ scatter_list = list(input.split(mb_split_lengths, dim=0))
+ gather_list = []
+ req_list = []
+ for i in range(my_size):
+ out_tensor = input.new_empty([a2ai.lN, emb_split_lengths[i] * a2ai.E])
+ req = dist.scatter(out_tensor, scatter_list if i == my_rank else [], src=i, async_op=True)
+ gather_list.append(out_tensor)
+ req_list.append(req)
+ myreq.req = req_list
+ myreq.tensor = tuple(gather_list)
+ myreq.a2ai = a2ai
+ ctx.a2ai = a2ai
+ return myreq.tensor
+
+ @staticmethod
+ def backward(ctx, *grad_output):
+ global myreq
+ #print("All2All_Scatter_Req:backward")
+ for r in myreq.req:
+ r.wait()
+ myreq.req = None
+ grad_input = myreq.tensor
+ grad_inputs = grad_input.split(ctx.a2ai.E, dim=1)
+ myreq.tensor = None
+ return (None, *grad_inputs)
+
+
+class All2All_Scatter_Wait(Function):
+ @staticmethod
+ def forward(ctx, *output):
+ global myreq
+ #print("All2All_Scatter_Wait:forward")
+ ctx.a2ai = myreq.a2ai
+ for r in myreq.req:
+ r.wait()
+ myreq.req = None
+ myreq.tensor = None
+ return output
+
+ @staticmethod
+ def backward(ctx, *grad_output):
+ global myreq
+ my_rank = dist.get_rank()
+ #print("All2All_Scatter_Wait:backward")
+ assert len(grad_output) == my_size
+ scatter_list = [t.contiguous() for t in grad_output]
+ a2ai = ctx.a2ai
+ mb_split_lengths = a2ai.gNS if a2ai.gNS else a2ai.lN
+ emb_split_lengths = a2ai.gSS if a2ai.gSS else [a2ai.lS] * my_size
+ grad_input = grad_output[0].new_empty([a2ai.N, a2ai.E*a2ai.lS])
+ gather_list = list(grad_input.split(mb_split_lengths, dim=0))
+ req_list = []
+ for i in range(my_size):
+ #req = dist.scatter(gather_list[i], scatter_list if i == my_rank else [], src=i, async_op=True)
+ req = dist.gather(scatter_list[i], gather_list if i == my_rank else [], dst=i, async_op=True)
+ req_list.append(req)
+ myreq.req = req_list
+ myreq.tensor = grad_input
+ return grad_output
+
+
+class All2All_Req(Function):
+ @staticmethod
+ def forward(ctx, a2ai, *inputs):
+ global myreq
+ #print("All2All_Req:forward")
+ mb_split_lengths = a2ai.gNS
+ if mb_split_lengths: mb_split_lengths = [m * a2ai.lS * a2ai.E for m in mb_split_lengths]
+ emb_split_lengths = a2ai.gSS
+ if emb_split_lengths: emb_split_lengths = [a2ai.lN * e * a2ai.E for e in emb_split_lengths]
+ input = torch.cat(inputs, dim=1).view([-1])
+ output = input.new_empty([a2ai.S*a2ai.lN*a2ai.E])
+ req = dist.all_to_all_single(output, input, emb_split_lengths, mb_split_lengths, async_op=True)
+ myreq.req = req
+ myreq.tensor = []
+ myreq.tensor.append(output)
+ myreq.tensor = tuple(myreq.tensor)
+ a2ai.mb_split_lengths = mb_split_lengths
+ a2ai.emb_split_lengths = emb_split_lengths
+ myreq.a2ai = a2ai
+ ctx.a2ai = a2ai
+ return myreq.tensor
+
+ @staticmethod
+ def backward(ctx, *grad_output):
+ global myreq
+ #print("All2All_Req:backward")
+ a2ai = ctx.a2ai
+ myreq.req.wait()
+ myreq.req = None
+ grad_input = myreq.tensor
+ grad_inputs = grad_input.view([a2ai.N, -1]).split(a2ai.E, dim=1)
+ grad_inputs = [gin.contiguous() for gin in grad_inputs]
+ myreq.tensor = None
+ return (None, *grad_inputs)
+
+
+class All2All_Wait(Function):
+ @staticmethod
+ def forward(ctx, *output):
+ global myreq
+ #print("All2All_Wait:forward")
+ a2ai = myreq.a2ai
+ ctx.a2ai = a2ai
+ myreq.req.wait()
+ myreq.req = None
+ myreq.tensor = None
+ emb_split_lengths = a2ai.emb_split_lengths if a2ai.emb_split_lengths else a2ai.lS * a2ai.lN * a2ai.E
+ outputs = output[0].split(emb_split_lengths)
+ outputs = tuple([out.view([a2ai.lN, -1]) for out in outputs])
+ return outputs
+
+ @staticmethod
+ def backward(ctx, *grad_outputs):
+ global myreq
+ #print("All2All_Wait:backward")
+ a2ai = ctx.a2ai
+ grad_outputs = [gout.contiguous().view([-1]) for gout in grad_outputs]
+ grad_output = torch.cat(grad_outputs)
+ grad_input = grad_output.new_empty([a2ai.N * a2ai.lS * a2ai.E])
+ req = dist.all_to_all_single(grad_input, grad_output, a2ai.mb_split_lengths, a2ai.emb_split_lengths, async_op=True)
+ myreq.req = req
+ myreq.tensor = grad_input
+ return (grad_output,)
+
+class AllGather(Function):
+
+ @staticmethod
+ def forward(ctx, input, global_lengths, dim=0):
+ if not isinstance(global_lengths, (list, tuple)):
+ global_lengths = [global_lengths] * my_size
+ my_rank = dist.get_rank()
+ assert(len(global_lengths) == my_size)
+ assert(global_lengths[my_rank] == input.size(dim))
+ local_start = sum(global_lengths[:my_rank])
+
+ output_size = list(input.size())
+
+ ctx.dim = dim
+ ctx.local_start = local_start
+ ctx.local_length = global_lengths[my_rank]
+
+ input = input.contiguous()
+ if dim == 0:
+ out_len = sum(global_lengths)
+ output_size[dim] = out_len
+ output = input.new_empty(output_size)
+ gather_list = list(output.split(global_lengths, dim=0))
+ else:
+ gather_list = [torch.empty_like(input) for _ in range(my_size)]
+ gather_list = []
+ for l in global_lengths:
+ output_size[dim] = l
+ gather_list.append(input.new_empty(output_size))
+
+ dist.all_gather(gather_list, input)
+
+ if dim != 0:
+ output = torch.cat(gather_list, dim=dim)
+
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ # print("Inside All2AllBackward")
+ dim = ctx.dim
+ start = ctx.local_start
+ length = ctx.local_length
+
+ grad_input = grad_output.narrow(dim, start, length)
+
+ return (grad_input, None, None)
+
+class All2AllInfo(object):
+ pass
+
+def alltoall(inputs, per_rank_split_lengths):
+ global myreq
+ N, E = inputs[0].size()
+ a2ai = All2AllInfo()
+ a2ai.lS = len(inputs)
+ a2ai.gSS = per_rank_split_lengths
+ a2ai.lN, a2ai.gNS = get_split_lengths(N)
+ a2ai.E = E
+ a2ai.N = N
+ a2ai.S = sum(per_rank_split_lengths) if per_rank_split_lengths else a2ai.lS * my_size
+ if a2a_impl == '' and alltoall_supported or a2a_impl == 'alltoall':
+ output = All2All_Req.apply(a2ai, *inputs)
+ myreq.WaitFunction = All2All_Wait
+ elif a2a_impl == '' or a2a_impl == 'scatter':
+ #print("Using All2All_Scatter_Req")
+ output = All2All_Scatter_Req.apply(a2ai, *inputs)
+ myreq.WaitFunction = All2All_Scatter_Wait
+ elif a2a_impl == 'scatter_list':
+ #print("Using All2All_ScatterList_Req")
+ output = All2All_ScatterList_Req.apply(a2ai, *inputs)
+ myreq.WaitFunction = All2All_ScatterList_Wait
+ else:
+ print("Unknown value set for DLRM_ALLTOALL_IMPL (%s), please use one of [alltoall, scatter, scatter_list]" % a2a_impl)
+ return myreq
+
+def shuffle_data(inputs):
+ input = torch.cat(inputs)
+ output = input.new_empty(input.size())
+ req = dist.all_to_all_single(output, input)
+ output = output.reshape(my_size, -1)
+ return output
+
+
+def all_gather(input, lengths, dim=0):
+ #print("lengths: ", lengths)
+ if not lengths: lengths = [input.size(0)] * my_size
+ return AllGather.apply(input, lengths, dim)
+
+def barrier():
+ if my_size > 1:
+ dist.barrier()
+
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/requirements.txt b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/requirements.txt
new file mode 100644
index 00000000000..859bbfc346b
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/requirements.txt
@@ -0,0 +1,8 @@
+future
+numpy
+pydot
+neural-compressor
+scikit-learn
+tqdm
+torch>=1.11.0
+intel_extension_for_pytorch>=1.11.0
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/run_benchmark.sh b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/run_benchmark.sh
new file mode 100755
index 00000000000..dc593308678
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/run_benchmark.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ tuned_checkpoint=saved_results
+ batch_size=16384
+ iters=100
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo ${var} |cut -f2 -d=)
+ ;;
+ --config=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ MODEL_SCRIPT=dlrm_s_pytorch.py
+
+ # Create the output directory in case it doesn't already exist
+ mkdir -p ${tuned_checkpoint}/dlrm_inference_accuracy_log
+
+ LOG=${tuned_checkpoint}/dlrm_inference_accuracy_log
+
+ CORES=`lscpu | grep Core | awk '{print $4}'`
+
+ ARGS=""
+ if [[ ${int8} == "true" ]]; then
+ echo "running int8 path"
+ ARGS="$ARGS --int8"
+ else
+ echo "running fp32 path"
+ fi
+
+ if [[ ${mode} == "accuracy" ]]; then
+ python -u $MODEL_SCRIPT \
+ --raw-data-file=${dataset_location}/day --processed-data-file=${dataset_location}/terabyte_processed.npz \
+ --data-set=terabyte \
+ --memory-map --mlperf-bin-loader --round-targets=True --learning-rate=1.0 \
+ --arch-mlp-bot=13-512-256-128 --arch-mlp-top=1024-1024-512-256-1 \
+ --arch-sparse-feature-size=128 --max-ind-range=40000000 \
+ --numpy-rand-seed=727 --inference-only --ipex-interaction \
+ --print-freq=100 --print-time --mini-batch-size=2048 --test-mini-batch-size=16384 \
+ --save-model ${tuned_checkpoint} --test-freq=2048 --print-auc $ARGS \
+ --load-model=${input_model} --accuracy_only
+ elif [[ ${mode} == "performance" ]]; then
+ incbench --num_cores_per_instance 4 -u $MODEL_SCRIPT \
+ --raw-data-file=${dataset_location}/day --processed-data-file=${dataset_location}/terabyte_processed.npz \
+ --data-set=terabyte --benchmark \
+ --memory-map --mlperf-bin-loader --round-targets=True --learning-rate=1.0 \
+ --arch-mlp-bot=13-512-256-128 --arch-mlp-top=1024-1024-512-256-1 \
+ --arch-sparse-feature-size=128 --max-ind-range=40000000 --ipex-interaction \
+ --numpy-rand-seed=727 --inference-only --num-batches=1000 \
+ --print-freq=10 --print-time --mini-batch-size=128 --test-mini-batch-size=${batch_size} \
+ --save-model ${tuned_checkpoint}
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+}
+
+main "$@"
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/run_quant.sh b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/run_quant.sh
new file mode 100755
index 00000000000..58d8b1fe491
--- /dev/null
+++ b/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/run_quant.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ tuned_checkpoint=saved_results
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ tuned_checkpoint=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+CORES=`lscpu | grep Core | awk '{print $4}'`
+# use first socket
+numa_cmd="numactl -C 0-$((CORES-1)) "
+echo "will run on core 0-$((CORES-1)) on socket 0"
+
+export OMP_NUM_THREADS=$CORES
+
+# run_tuning
+function run_tuning {
+ MODEL_SCRIPT=dlrm_s_pytorch.py
+
+ # Create the output directory in case it doesn't already exist
+ mkdir -p ${tuned_checkpoint}/dlrm_inference_accuracy_log
+
+ LOG=${tuned_checkpoint}/dlrm_inference_accuracy_log
+ CORES=`lscpu | grep Core | awk '{print $4}'`
+ ARGS=""
+
+ $numa_cmd python -u $MODEL_SCRIPT \
+ --raw-data-file=${dataset_location}/day --processed-data-file=${dataset_location}/terabyte_processed.npz \
+ --data-set=terabyte \
+ --memory-map --mlperf-bin-loader --round-targets=True --learning-rate=1.0 \
+ --arch-mlp-bot=13-512-256-128 --arch-mlp-top=1024-1024-512-256-1 \
+ --arch-sparse-feature-size=128 --max-ind-range=40000000 \
+ --numpy-rand-seed=727 --inference-only --ipex-interaction \
+ --print-freq=100 --print-time --mini-batch-size=2048 --test-mini-batch-size=16384 \
+ --test-freq=2048 --print-auc --tune --save-model=${tuned_checkpoint} $ARGS \
+ --load-model=${input_model} --num-cpu-cores=${CORES} | tee $LOG
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/README.md b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/README.md
new file mode 100644
index 00000000000..057b3559756
--- /dev/null
+++ b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/README.md
@@ -0,0 +1,109 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs.
+
+# Prerequisite
+
+
+## 1. Environment
+Recommend python 3.6 or higher version.
+
+### Install Intel® Neural Compressor
+```shell
+pip install neural-compressor
+```
+
+### Install Intel Tensorflow
+```shell
+pip install intel-tensorflow
+```
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### Installation Dependency packages
+```shell
+cd examples\tensorflow\graph_networks\graphsage\quantization\ptq
+pip install -r requirements.txt
+```
+
+### Install Intel Extension for Tensorflow
+
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare Model
+Download Frozen graph:
+```shell
+wget https://storage.googleapis.com/intel-optimized-tensorflow/models/2_12_0/graphsage_frozen_model.pb
+```
+
+## 3. Prepare Dataset
+
+```shell
+wget https://snap.stanford.edu/graphsage/ppi.zip
+unzip ppi.zip
+```
+
+# Run
+
+## 1. Quantization
+
+ ```shell
+ # The cmd of running faster_rcnn_resnet50
+ bash run_quant.sh --input_model=./graphsage_frozen_model.pb --output_model=./nc_graphsage_int8_model.pb --dataset_location=./ppi
+ ```
+
+## 2. Benchmark
+ ```shell
+ bash run_benchmark.sh --input_model=./nc_graphsage_int8_model.pb --dataset_location=./ppi --mode=performance
+ ```
+
+Details of enabling Intel® Neural Compressor on graphsage for Tensorflow.
+=========================
+
+This is a tutorial of how to enable graphsage model with Intel® Neural Compressor.
+## User Code Analysis
+User specifies fp32 *model*, calibration dataset *calib_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself.
+
+For graphsage, we applied the latter one because our philosophy is to enable the model with minimal changes. Hence we need to make two changes on the original code. The first one is to implement the q_dataloader and make necessary changes to *eval_func*.
+
+### Code update
+
+After prepare step is done, we just need update main.py like below.
+```python
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+ from neural_compressor.tensorflow.utils import BaseDataLoader
+
+ dataset = CustomDataset()
+ calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=1, collate_fn=collate_function)
+ quant_config = StaticQuantConfig()
+ q_model = quantize_model(args.input_graph, quant_config, calib_dataloader)
+ q_model.save(args.output_graph)
+
+ if args.benchmark:
+ if args.mode == 'performance':
+ evaluate(args.input_graph)
+ elif args.mode == 'accuracy':
+ acc_result = evaluate(args.input_graph)
+ print("Batch size = %d" % args.batch_size)
+ print("Accuracy: %.5f" % acc_result)
+
+```
+
+The quantization.fit() function will return a best quantized model during timeout constrain.
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py
new file mode 100644
index 00000000000..e2a1d28d7d7
--- /dev/null
+++ b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py
@@ -0,0 +1,80 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import random
+import json
+import sys
+import os
+
+import networkx as nx
+from networkx.readwrite import json_graph
+
+
+def load_data(prefix, normalize=True, load_walks=False):
+ G_data = json.load(open(prefix + "-G.json"))
+ G = json_graph.node_link_graph(G_data)
+ if isinstance(list(G.nodes())[0], int):
+ conversion = lambda n : int(n)
+ else:
+ conversion = lambda n : n
+
+ if os.path.exists(prefix + "-feats.npy"):
+ feats = np.load(prefix + "-feats.npy")
+ else:
+ print("No features present.. Only identity features will be used.")
+ feats = None
+ id_map = json.load(open(prefix + "-id_map.json"))
+ id_map = {conversion(k):int(v) for k,v in id_map.items()}
+ walks = []
+ class_map = json.load(open(prefix + "-class_map.json"))
+ if isinstance(list(class_map.values())[0], list):
+ lab_conversion = lambda n : n
+ else:
+ lab_conversion = lambda n : int(n)
+
+ class_map = {conversion(k):lab_conversion(v) for k,v in class_map.items()}
+
+ ## Remove all nodes that do not have val/test annotations
+ ## (necessary because of networkx weirdness with the Reddit data)
+ broken_count = 0
+ for node in G.nodes():
+ if not 'val' in G.nodes[node] or not 'test' in G.nodes[node]:
+ G.remove_node(node)
+ broken_count += 1
+ print("Removed {:d} nodes that lacked proper annotations due to networkx versioning issues".format(broken_count))
+
+ ## Make sure the graph has edge train_removed annotations
+ ## (some datasets might already have this..)
+ print("Loaded data.. now preprocessing..")
+ for edge in G.edges():
+ if (G.nodes[edge[0]]['val'] or G.nodes[edge[1]]['val'] or
+ G.nodes[edge[0]]['test'] or G.nodes[edge[1]]['test']):
+ G[edge[0]][edge[1]]['train_removed'] = True
+ else:
+ G[edge[0]][edge[1]]['train_removed'] = False
+
+ if normalize and not feats is None:
+ from sklearn.preprocessing import StandardScaler
+ train_ids = np.array([id_map[n] for n in G.nodes() if not G.nodes[n]['val'] and not G.nodes[n]['test']])
+ train_feats = feats[train_ids]
+ scaler = StandardScaler()
+ scaler.fit(train_feats)
+ feats = scaler.transform(feats)
+
+ return G, feats, id_map, walks, class_map
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/main.py b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/main.py
new file mode 100644
index 00000000000..87837510d3d
--- /dev/null
+++ b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/main.py
@@ -0,0 +1,189 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import time
+import utils
+import dataloader
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.platform import tf_logging
+from tensorflow.core.protobuf import rewriter_config_pb2
+
+from argparse import ArgumentParser
+
+np.random.seed(123)
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+arg_parser = ArgumentParser(description='Parse args')
+arg_parser.add_argument('-g', "--input-graph",
+ help='Specify the input graph for the transform tool',
+ dest='input_graph')
+arg_parser.add_argument("--output-graph",
+ help='Specify tune result model save dir',
+ dest='output_graph')
+arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark')
+arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode')
+arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.')
+arg_parser.add_argument('--dataset_location', dest='dataset_location',
+ help='location of calibration dataset and evaluate dataset')
+arg_parser.add_argument('-e', "--num-inter-threads",
+ help='The number of inter-thread.',
+ dest='num_inter_threads', type=int, default=0)
+
+arg_parser.add_argument('-a', "--num-intra-threads",
+ help='The number of intra-thread.',
+ dest='num_intra_threads', type=int, default=0)
+arg_parser.add_argument('--batch_size', type=int, default=1000, dest='batch_size', help='batch_size of benchmark')
+arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations')
+args = arg_parser.parse_args()
+
+def prepare_Dataset():
+ data_location = args.dataset_location
+ pretrained_model = args.input_graph
+ data = dataloader.load_data(prefix=data_location+'/ppi')
+ G = data[0]
+ features = data[1]
+ id_map = data[2]
+ class_map = data[4]
+ if isinstance(list(class_map.values())[0], list):
+ num_classes = len(list(class_map.values())[0])
+ else:
+ num_classes = len(set(class_map.values()))
+
+ context_pairs = data[3]
+ placeholders = utils.construct_placeholders(num_classes)
+ minibatch = utils.NodeMinibatchIterator(G,
+ id_map,
+ placeholders,
+ class_map,
+ num_classes,
+ batch_size=args.batch_size,
+ context_pairs = context_pairs)
+ return minibatch
+
+class CustomDataset(object):
+ def __init__(self):
+ self.batch1 = []
+ self.batch_labels = []
+ minibatch = prepare_Dataset()
+ self.parse_minibatch(minibatch)
+
+ def parse_minibatch(self, minibatch):
+ iter_num = 0
+ finished = False
+ while not finished:
+ feed_dict_val, batch_labels, finished, _ = minibatch.incremental_node_val_feed_dict(args.batch_size, iter_num, test=True)
+ self.batch1.append(feed_dict_val['batch1:0'])
+ self.batch_labels.append(batch_labels)
+ iter_num += 1
+
+ def __getitem__(self, index):
+ return (self.batch1[index], len(self.batch1[index])), self.batch_labels[index]
+
+ def __len__(self):
+ return len(self.batch1)
+
+def evaluate(model):
+ """Custom evaluate function to estimate the accuracy of the model.
+
+ Args:
+ model (tf.Graph_def): The input model graph
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ from neural_compressor.tensorflow import Model
+ model = Model(model)
+ output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+ model.output_tensor[0]
+ iteration = -1
+ minibatch = prepare_Dataset()
+ if args.benchmark and args.mode == 'performance':
+ iteration = args.iters
+
+ #output_tensor = model.sess.graph.get_tensor_by_name('Sigmoid:0')
+ def eval_func(size, output_tensor, minibatch, test):
+ t_test = time.time()
+ val_losses = []
+ val_preds = []
+ labels = []
+ iter_num = 0
+ finished = False
+ total_time = 0
+ while not finished:
+ feed_dict_val, batch_labels, finished, _ = minibatch.incremental_node_val_feed_dict(size, iter_num, test=True)
+ tf_logging.warn('\n---> Start iteration {0}'.format(str(iter_num)))
+ start_time = time.time()
+ node_outs_val = model.sess.run([output_tensor],feed_dict=feed_dict_val)
+ time_consume = time.time() - start_time
+ val_preds.append(node_outs_val[0].astype(float))
+ labels.append(batch_labels)
+ iter_num += 1
+ total_time += time_consume
+ if iteration != -1 and iter_num >= iteration:
+ break
+ tf_logging.warn('\n---> Stop iteration {0}'.format(str(iter_num)))
+ val_preds = np.vstack(val_preds)
+ labels = np.vstack(labels)
+ f1_scores = utils.calc_f1(labels, val_preds)
+ time_average = total_time / iter_num
+ return f1_scores, (time.time() - t_test)/iter_num, time_average
+
+ test_f1_micro, duration, time_average = eval_func(args.batch_size, output_tensor, minibatch, test=True)
+ if args.benchmark and args.mode == 'performance':
+ latency = time_average / args.batch_size
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ return test_f1_micro
+
+def collate_function(batch):
+ return (batch[0][0][0], batch[0][0][1]), batch[0][1]
+
+class eval_graphsage_optimized_graph:
+ """Evaluate image classifier with optimized TensorFlow graph."""
+
+ def run(self):
+ """This is neural_compressor function include tuning, export and benchmark option."""
+ from neural_compressor.common import set_random_seed
+ set_random_seed(9527)
+
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+ from neural_compressor.tensorflow.utils import BaseDataLoader
+
+ dataset = CustomDataset()
+ calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=1, collate_fn=collate_function)
+ quant_config = StaticQuantConfig()
+ q_model = quantize_model(args.input_graph, quant_config, calib_dataloader)
+ q_model.save(args.output_graph)
+
+ if args.benchmark:
+ if args.mode == 'performance':
+ evaluate(args.input_graph)
+ elif args.mode == 'accuracy':
+ acc_result = evaluate(args.input_graph)
+ print("Batch size = %d" % args.batch_size)
+ print("Accuracy: %.5f" % acc_result)
+
+if __name__ == "__main__":
+ evaluate_opt_graph = eval_graphsage_optimized_graph()
+ evaluate_opt_graph.run()
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..a6c2afe448c
--- /dev/null
+++ b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/requirements.txt
@@ -0,0 +1,2 @@
+networkx
+scikit-learn
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..89c7cc19b6e
--- /dev/null
+++ b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=1000
+ iters=100
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input-graph ${input_model} \
+ --mode ${mode} \
+ --dataset_location "${dataset_location}" \
+ --batch_size ${batch_size} \
+ --iters ${iters} \
+ --benchmark
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..f7046cc3df7
--- /dev/null
+++ b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/run_quant.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo "$var" |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input-graph "${input_model}" \
+ --output-graph "${output_model}" \
+ --dataset_location "${dataset_location}" \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py
new file mode 100644
index 00000000000..babe7146f5c
--- /dev/null
+++ b/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import random
+import json
+import sys
+import os
+import json
+import networkx as nx
+from networkx.readwrite import json_graph
+import tensorflow as tf
+from sklearn import metrics
+
+def calc_f1(y_true, y_pred):
+ y_pred[y_pred > 0.5] = 1
+ y_pred[y_pred <= 0.5] = 0
+ return metrics.f1_score(y_true, y_pred, average="micro")
+
+def construct_placeholders(num_classes):
+ # Define placeholders
+ tf.compat.v1.disable_eager_execution()
+ placeholders = {
+ 'labels' : tf.compat.v1.placeholder(tf.float32, shape=(None, num_classes), name='labels'),
+ 'batch' : tf.compat.v1.placeholder(tf.int32, shape=(None), name='batch1'),
+ 'batch_size' : tf.compat.v1.placeholder(tf.int32, name='batch_size'),
+ }
+ return placeholders
+
+
+class NodeMinibatchIterator(object):
+
+ """
+ This minibatch iterator iterates over nodes for supervised learning.
+
+ G -- networkx graph
+ id2idx -- dict mapping node ids to integer values indexing feature tensor
+ placeholders -- standard tensorflow placeholders object for feeding
+ label_map -- map from node ids to class values (integer or list)
+ num_classes -- number of output classes
+ batch_size -- size of the minibatches
+ max_degree -- maximum size of the downsampled adjacency lists
+ """
+ # (G,
+ # id_map,
+ # placeholders,
+ # class_map,
+ # num_classes,
+ # batch_size=FLAGS.batch_size,
+ # max_degree=FLAGS.max_degree,
+ # context_pairs = context_pairs)
+ def __init__(self, G, id2idx,
+ placeholders, label_map, num_classes,
+ batch_size=100, max_degree=25,
+ **kwargs):
+
+ self.G = G
+ self.nodes = G.nodes()
+ self.id2idx = id2idx
+ self.placeholders = placeholders
+ self.batch_size = batch_size
+ self.max_degree = max_degree
+ self.batch_num = 0
+ self.label_map = label_map
+ self.num_classes = num_classes
+ self.test_nodes = [n for n in self.G.nodes() if self.G.nodes[n]['test']]
+
+ def _make_label_vec(self, node):
+ label = self.label_map[node]
+ if isinstance(label, list):
+ label_vec = np.array(label)
+ else:
+ label_vec = np.zeros((self.num_classes))
+ class_ind = self.label_map[node]
+ label_vec[class_ind] = 1
+ return label_vec
+ def batch_feed_dict(self, batch_nodes, val=False):
+ batch1id = batch_nodes
+ batch1 = [self.id2idx[n] for n in batch1id]
+
+ labels = np.vstack([self._make_label_vec(node) for node in batch1id])
+ feed_dict = dict()
+ feed_dict.update({'batch1:0': batch1})
+ feed_dict.update({'batch_size:0' : len(batch1)})
+ return feed_dict, labels
+
+
+ def incremental_node_val_feed_dict(self, size, iter_num, test=False):
+ if test:
+ val_nodes = self.test_nodes
+ else:
+ val_nodes = self.val_nodes
+ val_node_subset = val_nodes[iter_num*size:min((iter_num+1)*size,
+ len(val_nodes))]
+
+ # add a dummy neighbor
+ ret_val = self.batch_feed_dict(val_node_subset)
+ return ret_val[0], ret_val[1], (iter_num+1)*size >= len(val_nodes), val_node_subset
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md
new file mode 100644
index 00000000000..34eb64fcf74
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md
@@ -0,0 +1,75 @@
+Step-by-Step
+============
+
+This document list steps of reproducing inception_v3 model tuning and benchmark results via Neural Compressor.
+This example can run on Intel CPUs and GPUs.
+
+> **Note**:
+> The models is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+# Prerequisite
+
+## 1. Environment
+
+### Installation
+Recommend python 3.9 or higher version.
+```shell
+pip install -r requirements.txt
+```
+
+### Install Intel Extension for Tensorflow
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare pre-trained model
+
+ Download pre-trained PB
+ ```shell
+ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv3_fp32_pretrained_model.pb
+ ```
+
+## 3. Prepare Dataset
+
+ TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
+ We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format.
+
+ ```shell
+ cd examples/3.x_api/tensorflow/cv
+ # convert validation subset
+ bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation
+ # convert train subset
+ bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train
+ ```
+> **Note**:
+> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images.
+
+# Run
+
+## 1 Quantization
+
+ ```shell
+ bash run_quant.sh --input_model=/PATH/TO/inceptionv3_fp32_pretrained_model.pb \
+ --output_model=./nc_inception_v3.pb --dataset_location=/path/to/ImageNet/
+ ```
+
+## 2. Benchmark
+ ```shell
+ bash run_benchmark.sh --input_model=./nc_inception_v3.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32
+ bash run_benchmark.sh --input_model=./nc_inception_v3.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1
+ ```
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..ecfca2348cd
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py
@@ -0,0 +1,511 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+class ParseDecodeImagenet:
+ """Parse features in Example proto.
+
+ Returns:
+ tuple of parsed image and label
+ """
+
+ def __call__(self, sample):
+ """Parse features in example."""
+ # Dense features in Example proto.
+ feature_map = {
+ "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""),
+ "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
+ }
+
+ sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
+ # Sparse features in Example proto.
+ feature_map.update(
+ {
+ k: sparse_float32
+ for k in [
+ "image/object/bbox/xmin",
+ "image/object/bbox/ymin",
+ "image/object/bbox/xmax",
+ "image/object/bbox/ymax",
+ ]
+ }
+ )
+
+ features = tf.io.parse_single_example(serialized=sample, features=feature_map)
+ label = tf.cast(features["image/class/label"], dtype=tf.int32)
+ image = features["image/encoded"]
+ image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST")
+ return (image, label)
+
+
+class BilinearImagenetTransform(object):
+ """Combination of a series of transforms which is applicable to images in Imagenet.
+
+ Args:
+ height: Height of the result
+ width:Width of the result
+ central_fraction(float, default=0.875):fraction of size to crop
+ mean_value(list, default=[0.0,0.0,0.0]):means for each channel
+ scale(float, default=1.0):std value
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0):
+ """Initialize `BilinearImagenetTransform` class."""
+ self.height = height
+ self.width = width
+ self.mean_value = mean_value
+ self.scale = scale
+ self.central_fraction = central_fraction
+
+ # sample is (images, labels)
+ def __call__(self, sample):
+ """Convert `BilinearImagenetTransform` feature."""
+ image, label = sample
+ if image.dtype is not tf.float32:
+ image = tf.image.convert_image_dtype(image, dtype=tf.float32)
+ # Crop the central region of the image containing 87.5% area of the original image.
+ if self.central_fraction:
+ image = tf.image.central_crop(image, central_fraction=self.central_fraction)
+
+ if self.height and self.width:
+ # Resize the image to the specified height and width.
+ image = tf.expand_dims(image, 0)
+ image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR)
+ image = tf.squeeze(image, [0])
+
+ image = tf.subtract(image, 0.5)
+ image = tf.multiply(image, 2.0)
+ means = tf.broadcast_to(self.mean_value, tf.shape(input=image))
+ image = (image - means) * self.scale
+ return (image, label)
+
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+
+class ShiftRescale(object):
+ """Label shift by 1 and rescale.
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __call__(self, sample):
+ image, label = sample
+ label -= 1
+ image = (image - 127.5) / 127.5
+ return (image, label)
+
+
+class ImageRecordDataset(object):
+ """Tensorflow imageNet database in tf record format.
+
+ Please arrange data in this way:
+ root/validation-000-of-100
+ root/validation-001-of-100
+ ...
+ root/validation-099-of-100
+ The file name needs to follow this pattern: '* - * -of- *'
+
+ Args: root (str): Root directory of dataset.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according
+ to specific conditions.
+ """
+
+ """Configuration for Imagenet dataset."""
+
+ def __new__(cls, root, transform=None, filter=None):
+ """Build a new object of TensorflowImageRecord class."""
+ from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module
+
+ glob_pattern = os.path.join(root, "*-*-of-*")
+ file_names = gfile.Glob(glob_pattern)
+ if not file_names:
+ raise ValueError("Found no files in --root matching: {}".format(glob_pattern))
+
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False)
+ ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names)))
+
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeImagenet())
+ else:
+ transform = ParseDecodeImagenet()
+ ds = ds.map(transform, num_parallel_calls=None)
+ ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned
+ return ds
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric_cls
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class TopKMetric(BaseMetric):
+ """Compute Top-k Accuracy classification score for Tensorflow model.
+
+ This metric computes the number of times where the correct label is among
+ the top k labels predicted.
+
+ Attributes:
+ k (int): The number of most likely outcomes considered to find the correct label.
+ num_correct: The number of predictions that were correct classified.
+ num_sample: The total number of predictions.
+ """
+
+ def __init__(self, k=1):
+ """Initialize the k, number of samples and correct predictions.
+
+ Args:
+ k: The number of most likely outcomes considered to find the correct label.
+ """
+ self.k = k
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def update(self, preds, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ preds: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight.
+ """
+ preds, labels = TopKMetric._topk_shape_validate(preds, labels)
+
+ labels = labels.reshape([len(labels)])
+ with tf.Graph().as_default() as acc_graph:
+ topk = tf.nn.in_top_k(
+ predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k
+ )
+ fp32_topk = tf.cast(topk, tf.float32)
+ correct_tensor = tf.reduce_sum(input_tensor=fp32_topk)
+
+ with tf.compat.v1.Session() as acc_sess:
+ correct = acc_sess.run(correct_tensor)
+
+ self.num_sample += len(labels)
+ self.num_correct += correct
+
+ def reset(self):
+ """Reset the number of samples and correct predictions."""
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def result(self):
+ """Compute the top-k score.
+
+ Returns:
+ The top-k score.
+ """
+ if self.num_sample == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ elif getattr(self, "_hvd", None) is not None: # pragma: no cover
+ allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct))
+ allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample))
+ return allgather_num_correct / allgather_num_sample
+ return self.num_correct / self.num_sample
+
+ @staticmethod
+ def _topk_shape_validate(preds, labels):
+ # preds shape can be Nxclass_num or class_num(N=1 by default)
+ # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax
+ if isinstance(preds, int):
+ preds = [preds]
+ preds = np.array(preds)
+ elif isinstance(preds, np.ndarray):
+ preds = np.array(preds)
+ elif isinstance(preds, list):
+ preds = np.array(preds)
+ preds = preds.reshape((-1, preds.shape[-1]))
+
+ # consider labels just int value 1x1
+ if isinstance(labels, int):
+ labels = [labels]
+ labels = np.array(labels)
+ elif isinstance(labels, tuple):
+ labels = np.array([labels])
+ labels = labels.reshape((labels.shape[-1], -1))
+ elif isinstance(labels, list):
+ if isinstance(labels[0], int):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[0], 1))
+ elif isinstance(labels[0], tuple):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[-1], -1))
+ else:
+ labels = np.array(labels)
+ # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot)
+ # only support 2 dimension one-shot labels
+ # or 1 dimension one-hot class_num will confuse with N
+
+ if len(preds.shape) == 1:
+ N = 1
+ class_num = preds.shape[0]
+ preds = preds.reshape([-1, class_num])
+ elif len(preds.shape) >= 2:
+ N = preds.shape[0]
+ preds = preds.reshape([N, -1])
+ class_num = preds.shape[1]
+
+ label_N = labels.shape[0]
+ assert label_N == N, "labels batch size should same with preds"
+ labels = labels.reshape([N, -1])
+ # one-hot labels will have 2 dimension not equal 1
+ if labels.shape[1] != 1:
+ labels = labels.argsort()[..., -1:]
+ return preds, labels
+
+
+class TFDataLoader(object): # pragma: no cover
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py
new file mode 100644
index 00000000000..9b0f737b619
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py
@@ -0,0 +1,144 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+
+import tensorflow as tf
+import numpy as np
+
+from argparse import ArgumentParser
+from data_process import (
+ ImageRecordDataset,
+ ComposeTransform,
+ BilinearImagenetTransform,
+ TFDataLoader,
+ TopKMetric,
+)
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+arg_parser = ArgumentParser(description='Parse args')
+arg_parser.add_argument('-g', "--input-graph",
+ help='Specify the input graph for the transform tool',
+ dest='input_graph')
+arg_parser.add_argument("--output-graph",
+ help='Specify tune result model save dir',
+ dest='output_graph')
+arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark')
+arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode')
+arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.')
+arg_parser.add_argument('--dataset_location', dest='dataset_location',
+ help='location of calibration dataset and evaluate dataset')
+arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark')
+arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations')
+args = arg_parser.parse_args()
+
+def evaluate(model, eval_dataloader, metric, postprocess=None):
+ """Custom evaluate function to estimate the accuracy of the model.
+
+ Args:
+ model (tf.Graph_def): The input model graph
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ from neural_compressor.tensorflow import Model
+ model = Model(model)
+ input_tensor = model.input_tensor
+ output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+ model.output_tensor[0]
+ iteration = -1
+ if args.benchmark and args.mode == 'performance':
+ iteration = args.iters
+
+ def eval_func(dataloader):
+ latency_list = []
+ for idx, (inputs, labels) in enumerate(dataloader):
+ # dataloader should keep the order and len of inputs same with input_tensor
+ inputs = np.array([inputs])
+ feed_dict = dict(zip(input_tensor, inputs))
+
+ start = time.time()
+ predictions = model.sess.run(output_tensor, feed_dict)
+ end = time.time()
+
+ metric.update(predictions, labels)
+ latency_list.append(end-start)
+ if idx + 1 == iteration:
+ break
+ latency = np.array(latency_list).mean() / args.batch_size
+ return latency
+
+ latency = eval_func(eval_dataloader)
+ if args.benchmark and args.mode == 'performance':
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+class eval_classifier_optimized_graph:
+ """Evaluate image classifier with optimized TensorFlow graph."""
+
+ def run(self):
+ """This is neural_compressor function include tuning, export and benchmark option."""
+ from neural_compressor import set_random_seed
+ set_random_seed(9527)
+
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+
+ dataset = ImageRecordDataset(
+ root=args.dataset_location,
+ transform=ComposeTransform(transform_list= [
+ BilinearImagenetTransform(height=299, width=299),
+ ]
+ )
+ )
+ calib_dataloader = TFDataLoader(dataset=dataset)
+
+ # maybe we need to exclud bf16
+ quant_config = StaticQuantConfig()
+ conv_config = StaticQuantConfig(weight_dtype="fp32", act_dtype="fp32")
+ quant_config.set_local("v0/cg/conv0/conv2d/Conv2D", conv_config)
+ q_model = quantize_model(args.input_graph, quant_config, calib_dataloader)
+ q_model.save(args.output_graph)
+
+ if args.benchmark:
+ dataset = ImageRecordDataset(
+ root=args.dataset_location,
+ transform=ComposeTransform(transform_list= [
+ BilinearImagenetTransform(height=299, width=299),
+ ]
+ )
+ )
+ dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size)
+ def eval(model):
+ top1 = TopKMetric(k=1)
+ return evaluate(model, dataloader, top1)
+
+ if args.mode == 'performance':
+ eval(args.input_graph)
+ elif args.mode == 'accuracy':
+ acc_result = eval(args.input_graph)
+ print("Batch size = %d" % dataloader.batch_size)
+ print("Accuracy: %.5f" % acc_result)
+
+if __name__ == "__main__":
+ evaluate_opt_graph = eval_classifier_optimized_graph()
+ evaluate_opt_graph.run()
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..2755e1a41ac
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow
+neural-compressor
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..8ecac837cf7
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=32
+ iters=100
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input-graph ${input_model} \
+ --mode ${mode} \
+ --dataset_location ${dataset_location} \
+ --batch_size ${batch_size} \
+ --benchmark \
+ --iters ${iters}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..6a9e1b859c9
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+set -x
+
+function main {
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input-graph ${input_model} \
+ --output-graph ${output_model} \
+ --dataset_location ${dataset_location} \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md
new file mode 100644
index 00000000000..25755074a06
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md
@@ -0,0 +1,108 @@
+Step-by-Step
+============
+
+This document list steps of reproducing mobilenet_v2 model tuning and benchmark results via Neural Compressor.
+This example can run on Intel CPUs and GPUs.
+
+> **Note**:
+> The model is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+# Prerequisite
+
+## 1. Environment
+
+### Installation
+Recommend python 3.9 or higher version.
+```shell
+pip install -r requirements.txt
+```
+
+### Install Intel Extension for Tensorflow
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare pre-trained model
+
+The mobilenet_v2 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models).
+We can get the pb file by convert the checkpoint file.
+
+ 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models)
+ ```shell
+ wget https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz
+ tar -xvf mobilenet_v2_1.4_224.tgz
+ ```
+
+ 2. Exporting the Inference Graph
+ ```shell
+ git clone https://github.com/tensorflow/models
+ cd models/research/slim
+ python export_inference_graph.py \
+ --alsologtostderr \
+ --model_name=mobilenet_v2 \
+ --output_file=/tmp/mobilenet_v2_inf_graph.pb
+ ```
+ Make sure to use intel-tensorflow v1.15, and pip install tf_slim.
+ #### Install Intel Tensorflow 1.15 up2
+ Check your python version and use pip install 1.15.0 up2 from links below:
+ https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl
+ https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl
+ https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl
+ > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command.
+
+ 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1`
+
+ 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo
+ ```shell
+ python freeze_graph.py \
+ --input_graph=/tmp/mobilenet_v2_inf_graph.pb \
+ --input_checkpoint=./mobilenet_v2.ckpt \
+ --input_binary=true \
+ --output_graph=./frozen_mobilenet_v2.pb \
+ --output_node_names=MobilenetV2/Predictions/Reshape_1
+ ```
+
+## 3. Prepare Dataset
+
+ TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
+ We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format.
+
+ ```shell
+ cd examples/3.x_api/tensorflow/cv
+ # convert validation subset
+ bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation
+ # convert train subset
+ bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train
+ ```
+> **Note**:
+> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images.
+
+# Run
+
+## 1 Quantization
+
+ ```shell
+ bash run_quant.sh --input_model=/PATH/TO/frozen_mobilenet_v2.pb \
+ --output_model=./nc_mobilenet_v2.pb --dataset_location=/path/to/ImageNet/
+ ```
+
+## 2. Benchmark
+ ```shell
+ bash run_benchmark.sh --input_model=./nc_mobilenet_v2.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32
+ bash run_benchmark.sh --input_model=./nc_mobilenet_v2.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1
+ ```
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..ecfca2348cd
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py
@@ -0,0 +1,511 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+class ParseDecodeImagenet:
+ """Parse features in Example proto.
+
+ Returns:
+ tuple of parsed image and label
+ """
+
+ def __call__(self, sample):
+ """Parse features in example."""
+ # Dense features in Example proto.
+ feature_map = {
+ "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""),
+ "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
+ }
+
+ sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
+ # Sparse features in Example proto.
+ feature_map.update(
+ {
+ k: sparse_float32
+ for k in [
+ "image/object/bbox/xmin",
+ "image/object/bbox/ymin",
+ "image/object/bbox/xmax",
+ "image/object/bbox/ymax",
+ ]
+ }
+ )
+
+ features = tf.io.parse_single_example(serialized=sample, features=feature_map)
+ label = tf.cast(features["image/class/label"], dtype=tf.int32)
+ image = features["image/encoded"]
+ image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST")
+ return (image, label)
+
+
+class BilinearImagenetTransform(object):
+ """Combination of a series of transforms which is applicable to images in Imagenet.
+
+ Args:
+ height: Height of the result
+ width:Width of the result
+ central_fraction(float, default=0.875):fraction of size to crop
+ mean_value(list, default=[0.0,0.0,0.0]):means for each channel
+ scale(float, default=1.0):std value
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0):
+ """Initialize `BilinearImagenetTransform` class."""
+ self.height = height
+ self.width = width
+ self.mean_value = mean_value
+ self.scale = scale
+ self.central_fraction = central_fraction
+
+ # sample is (images, labels)
+ def __call__(self, sample):
+ """Convert `BilinearImagenetTransform` feature."""
+ image, label = sample
+ if image.dtype is not tf.float32:
+ image = tf.image.convert_image_dtype(image, dtype=tf.float32)
+ # Crop the central region of the image containing 87.5% area of the original image.
+ if self.central_fraction:
+ image = tf.image.central_crop(image, central_fraction=self.central_fraction)
+
+ if self.height and self.width:
+ # Resize the image to the specified height and width.
+ image = tf.expand_dims(image, 0)
+ image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR)
+ image = tf.squeeze(image, [0])
+
+ image = tf.subtract(image, 0.5)
+ image = tf.multiply(image, 2.0)
+ means = tf.broadcast_to(self.mean_value, tf.shape(input=image))
+ image = (image - means) * self.scale
+ return (image, label)
+
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+
+class ShiftRescale(object):
+ """Label shift by 1 and rescale.
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __call__(self, sample):
+ image, label = sample
+ label -= 1
+ image = (image - 127.5) / 127.5
+ return (image, label)
+
+
+class ImageRecordDataset(object):
+ """Tensorflow imageNet database in tf record format.
+
+ Please arrange data in this way:
+ root/validation-000-of-100
+ root/validation-001-of-100
+ ...
+ root/validation-099-of-100
+ The file name needs to follow this pattern: '* - * -of- *'
+
+ Args: root (str): Root directory of dataset.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according
+ to specific conditions.
+ """
+
+ """Configuration for Imagenet dataset."""
+
+ def __new__(cls, root, transform=None, filter=None):
+ """Build a new object of TensorflowImageRecord class."""
+ from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module
+
+ glob_pattern = os.path.join(root, "*-*-of-*")
+ file_names = gfile.Glob(glob_pattern)
+ if not file_names:
+ raise ValueError("Found no files in --root matching: {}".format(glob_pattern))
+
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False)
+ ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names)))
+
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeImagenet())
+ else:
+ transform = ParseDecodeImagenet()
+ ds = ds.map(transform, num_parallel_calls=None)
+ ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned
+ return ds
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric_cls
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class TopKMetric(BaseMetric):
+ """Compute Top-k Accuracy classification score for Tensorflow model.
+
+ This metric computes the number of times where the correct label is among
+ the top k labels predicted.
+
+ Attributes:
+ k (int): The number of most likely outcomes considered to find the correct label.
+ num_correct: The number of predictions that were correct classified.
+ num_sample: The total number of predictions.
+ """
+
+ def __init__(self, k=1):
+ """Initialize the k, number of samples and correct predictions.
+
+ Args:
+ k: The number of most likely outcomes considered to find the correct label.
+ """
+ self.k = k
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def update(self, preds, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ preds: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight.
+ """
+ preds, labels = TopKMetric._topk_shape_validate(preds, labels)
+
+ labels = labels.reshape([len(labels)])
+ with tf.Graph().as_default() as acc_graph:
+ topk = tf.nn.in_top_k(
+ predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k
+ )
+ fp32_topk = tf.cast(topk, tf.float32)
+ correct_tensor = tf.reduce_sum(input_tensor=fp32_topk)
+
+ with tf.compat.v1.Session() as acc_sess:
+ correct = acc_sess.run(correct_tensor)
+
+ self.num_sample += len(labels)
+ self.num_correct += correct
+
+ def reset(self):
+ """Reset the number of samples and correct predictions."""
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def result(self):
+ """Compute the top-k score.
+
+ Returns:
+ The top-k score.
+ """
+ if self.num_sample == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ elif getattr(self, "_hvd", None) is not None: # pragma: no cover
+ allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct))
+ allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample))
+ return allgather_num_correct / allgather_num_sample
+ return self.num_correct / self.num_sample
+
+ @staticmethod
+ def _topk_shape_validate(preds, labels):
+ # preds shape can be Nxclass_num or class_num(N=1 by default)
+ # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax
+ if isinstance(preds, int):
+ preds = [preds]
+ preds = np.array(preds)
+ elif isinstance(preds, np.ndarray):
+ preds = np.array(preds)
+ elif isinstance(preds, list):
+ preds = np.array(preds)
+ preds = preds.reshape((-1, preds.shape[-1]))
+
+ # consider labels just int value 1x1
+ if isinstance(labels, int):
+ labels = [labels]
+ labels = np.array(labels)
+ elif isinstance(labels, tuple):
+ labels = np.array([labels])
+ labels = labels.reshape((labels.shape[-1], -1))
+ elif isinstance(labels, list):
+ if isinstance(labels[0], int):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[0], 1))
+ elif isinstance(labels[0], tuple):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[-1], -1))
+ else:
+ labels = np.array(labels)
+ # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot)
+ # only support 2 dimension one-shot labels
+ # or 1 dimension one-hot class_num will confuse with N
+
+ if len(preds.shape) == 1:
+ N = 1
+ class_num = preds.shape[0]
+ preds = preds.reshape([-1, class_num])
+ elif len(preds.shape) >= 2:
+ N = preds.shape[0]
+ preds = preds.reshape([N, -1])
+ class_num = preds.shape[1]
+
+ label_N = labels.shape[0]
+ assert label_N == N, "labels batch size should same with preds"
+ labels = labels.reshape([N, -1])
+ # one-hot labels will have 2 dimension not equal 1
+ if labels.shape[1] != 1:
+ labels = labels.argsort()[..., -1:]
+ return preds, labels
+
+
+class TFDataLoader(object): # pragma: no cover
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py
new file mode 100644
index 00000000000..fd3a07937de
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py
@@ -0,0 +1,142 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+
+import tensorflow as tf
+import numpy as np
+
+from argparse import ArgumentParser
+from data_process import (
+ ImageRecordDataset,
+ ComposeTransform,
+ BilinearImagenetTransform,
+ TFDataLoader,
+ TopKMetric,
+)
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+arg_parser = ArgumentParser(description='Parse args')
+arg_parser.add_argument('-g', "--input-graph",
+ help='Specify the input graph for the transform tool',
+ dest='input_graph')
+arg_parser.add_argument("--output-graph",
+ help='Specify tune result model save dir',
+ dest='output_graph')
+arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark')
+arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode')
+arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.')
+arg_parser.add_argument('--dataset_location', dest='dataset_location',
+ help='location of calibration dataset and evaluate dataset')
+arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark')
+arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations')
+args = arg_parser.parse_args()
+
+def evaluate(model, eval_dataloader, metric, postprocess=None):
+ """Custom evaluate function to estimate the accuracy of the model.
+
+ Args:
+ model (tf.Graph_def): The input model graph
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ from neural_compressor.tensorflow import Model
+ model = Model(model)
+ input_tensor = model.input_tensor
+ output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+ model.output_tensor[0]
+ iteration = -1
+ if args.benchmark and args.mode == 'performance':
+ iteration = args.iters
+
+ def eval_func(dataloader):
+ latency_list = []
+ for idx, (inputs, labels) in enumerate(dataloader):
+ # dataloader should keep the order and len of inputs same with input_tensor
+ inputs = np.array([inputs])
+ feed_dict = dict(zip(input_tensor, inputs))
+
+ start = time.time()
+ predictions = model.sess.run(output_tensor, feed_dict)
+ end = time.time()
+
+ metric.update(predictions, labels)
+ latency_list.append(end-start)
+ if idx + 1 == iteration:
+ break
+ latency = np.array(latency_list).mean() / args.batch_size
+ return latency
+
+ latency = eval_func(eval_dataloader)
+ if args.benchmark and args.mode == 'performance':
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+class eval_classifier_optimized_graph:
+ """Evaluate image classifier with optimized TensorFlow graph."""
+
+ def run(self):
+ """This is neural_compressor function include tuning, export and benchmark option."""
+ from neural_compressor.common import set_random_seed
+ set_random_seed(9527)
+
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+
+ dataset = ImageRecordDataset(
+ root=args.dataset_location,
+ transform=ComposeTransform(transform_list= [
+ BilinearImagenetTransform(height=224, width=224),
+ ]
+ )
+ )
+ calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10)
+
+ quant_config = StaticQuantConfig(weight_granularity="per_channel")
+ q_model = quantize_model(args.input_graph, quant_config, calib_dataloader)
+ q_model.save(args.output_graph)
+
+ if args.benchmark:
+ dataset = ImageRecordDataset(
+ root=args.dataset_location,
+ transform=ComposeTransform(transform_list= [
+ BilinearImagenetTransform(height=224, width=224),
+ ]
+ )
+ )
+ dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size)
+
+ def eval(model):
+ top1 = TopKMetric(k=1)
+ return evaluate(model, dataloader, top1)
+
+ if args.mode == 'performance':
+ eval(args.input_graph)
+ elif args.mode == 'accuracy':
+ acc_result = eval(args.input_graph)
+ print("Batch size = %d" % dataloader.batch_size)
+ print("Accuracy: %.5f" % acc_result)
+
+if __name__ == "__main__":
+ evaluate_opt_graph = eval_classifier_optimized_graph()
+ evaluate_opt_graph.run()
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..2755e1a41ac
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow
+neural-compressor
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..8ecac837cf7
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=32
+ iters=100
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input-graph ${input_model} \
+ --mode ${mode} \
+ --dataset_location ${dataset_location} \
+ --batch_size ${batch_size} \
+ --benchmark \
+ --iters ${iters}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..6a9e1b859c9
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+set -x
+
+function main {
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input-graph ${input_model} \
+ --output-graph ${output_model} \
+ --dataset_location ${dataset_location} \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/image_recognition/prepare_dataset.sh b/examples/3.x_api/tensorflow/image_recognition/prepare_dataset.sh
new file mode 100644
index 00000000000..4aad5d69a3f
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/prepare_dataset.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# set -x
+
+OUTPUT_DIR="./data"
+SUBSET="validation"
+SHARDS=1
+
+help()
+{
+ cat <<- EOF
+ Desc: Convert prepared raw imagnet dataset to tfrecord
+ -h --help help info
+ --output_dir Output data directory
+ default: './data'
+ --raw_dir Raw data directory
+ --shards Number of shards in TFRecord files.
+ default: '1'
+ --subset Subset of imagenet, can be validation/train.
+ default: 'validation'
+EOF
+ exit 0
+}
+
+function main {
+ init_params "$@"
+ convert_dataset
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --output_dir=*)
+ OUTPUT_DIR=$(echo $var |cut -f2 -d=)
+ ;;
+ --raw_dir=*)
+ RAW_DIR=$(echo $var |cut -f2 -d=)
+ ;;
+ --shards=*)
+ SHARDS=$(echo $var |cut -f2 -d=)
+ ;;
+ --subset=*)
+ SUBSET=$(echo $var |cut -f2 -d=)
+ ;;
+ -h|--help) help
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+}
+
+# convert dataset
+function convert_dataset {
+ if [ ! -d ${OUTPUT_DIR} ]; then
+ mkdir ${OUTPUT_DIR}
+ fi
+ python imagenet_prepare/build_imagenet_data.py \
+ --imagenet_metadata_file "imagenet_prepare/imagenet_metadata.txt" \
+ --labels_file "imagenet_prepare/imagenet_lsvrc_2015_synsets.txt" \
+ --output_directory ${OUTPUT_DIR} \
+ --subset ${SUBSET} \
+ --raw_directory ${RAW_DIR} \
+ --shards ${SHARDS}
+}
+
+main "$@"
+
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md
new file mode 100644
index 00000000000..bc07e651f96
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md
@@ -0,0 +1,107 @@
+Step-by-Step
+============
+
+This document list steps of reproducing resnet_v2_50 model tuning and benchmark results via Neural Compressor.
+This example can run on Intel CPUs and GPUs.
+
+> **Note**:
+> The models is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+# Prerequisite
+
+## 1. Environment
+
+### Installation
+Recommend python 3.9 or higher version.
+```shell
+pip install -r requirements.txt
+```
+
+### Install Intel Extension for Tensorflow
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare pre-trained model
+The resnet_v2_50 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models).
+We can get the pb file by convert the checkpoint file.
+
+ 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models)
+ ```shell
+ wget http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz
+ tar -xvf resnet_v2_50_2017_04_14.tar.gz
+ ```
+
+ 2. Exporting the Inference Graph
+ ```shell
+ git clone https://github.com/tensorflow/models
+ cd models/research/slim
+ python export_inference_graph.py \
+ --alsologtostderr \
+ --model_name=resnet_v2_50 \
+ --output_file=/tmp/resnet_v2_50_inf_graph.pb
+ ```
+ Make sure to use intel-tensorflow v1.15, and pip install tf_slim.
+ #### Install Intel Tensorflow 1.15 up2
+ Check your python version and use pip install 1.15.0 up2 from links below:
+ https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl
+ https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl
+ https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl
+ > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command.
+
+ 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_50/predictions/Reshape_1`
+
+ 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo
+ ```shell
+ python freeze_graph.py \
+ --input_graph=/tmp/resnet_v2_50_inf_graph.pb \
+ --input_checkpoint=./resnet_v2_50.ckpt \
+ --input_binary=true \
+ --output_graph=./frozen_resnet_v2_50.pb \
+ --output_node_names=resnet_v2_50/predictions/Reshape_1
+ ```
+
+## 3. Prepare Dataset
+
+ TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
+ We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format.
+
+ ```shell
+ cd examples/3.x_api/tensorflow/cv
+ # convert validation subset
+ bash prepare_dataset.sh --output_dir=./resnet_v2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation
+ # convert train subset
+ bash prepare_dataset.sh --output_dir=./resnet_v2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train
+ ```
+> **Note**:
+> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images.
+
+# Run
+
+## 1 Quantization
+
+ ```shell
+ bash run_quant.sh --input_model=/PATH/TO/frozen_resnet_v2_50.pb \
+ --output_model=./nc_resnet_v2_50.pb --dataset_location=/path/to/ImageNet/
+ ```
+
+## 2. Benchmark
+ ```shell
+ bash run_benchmark.sh --input_model=./nc_resnet_v2_50.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32
+ bash run_benchmark.sh --input_model=./nc_resnet_v2_50.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1
+ ```
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..ecfca2348cd
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py
@@ -0,0 +1,511 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+class ParseDecodeImagenet:
+ """Parse features in Example proto.
+
+ Returns:
+ tuple of parsed image and label
+ """
+
+ def __call__(self, sample):
+ """Parse features in example."""
+ # Dense features in Example proto.
+ feature_map = {
+ "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""),
+ "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
+ }
+
+ sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
+ # Sparse features in Example proto.
+ feature_map.update(
+ {
+ k: sparse_float32
+ for k in [
+ "image/object/bbox/xmin",
+ "image/object/bbox/ymin",
+ "image/object/bbox/xmax",
+ "image/object/bbox/ymax",
+ ]
+ }
+ )
+
+ features = tf.io.parse_single_example(serialized=sample, features=feature_map)
+ label = tf.cast(features["image/class/label"], dtype=tf.int32)
+ image = features["image/encoded"]
+ image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST")
+ return (image, label)
+
+
+class BilinearImagenetTransform(object):
+ """Combination of a series of transforms which is applicable to images in Imagenet.
+
+ Args:
+ height: Height of the result
+ width:Width of the result
+ central_fraction(float, default=0.875):fraction of size to crop
+ mean_value(list, default=[0.0,0.0,0.0]):means for each channel
+ scale(float, default=1.0):std value
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0):
+ """Initialize `BilinearImagenetTransform` class."""
+ self.height = height
+ self.width = width
+ self.mean_value = mean_value
+ self.scale = scale
+ self.central_fraction = central_fraction
+
+ # sample is (images, labels)
+ def __call__(self, sample):
+ """Convert `BilinearImagenetTransform` feature."""
+ image, label = sample
+ if image.dtype is not tf.float32:
+ image = tf.image.convert_image_dtype(image, dtype=tf.float32)
+ # Crop the central region of the image containing 87.5% area of the original image.
+ if self.central_fraction:
+ image = tf.image.central_crop(image, central_fraction=self.central_fraction)
+
+ if self.height and self.width:
+ # Resize the image to the specified height and width.
+ image = tf.expand_dims(image, 0)
+ image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR)
+ image = tf.squeeze(image, [0])
+
+ image = tf.subtract(image, 0.5)
+ image = tf.multiply(image, 2.0)
+ means = tf.broadcast_to(self.mean_value, tf.shape(input=image))
+ image = (image - means) * self.scale
+ return (image, label)
+
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+
+class ShiftRescale(object):
+ """Label shift by 1 and rescale.
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __call__(self, sample):
+ image, label = sample
+ label -= 1
+ image = (image - 127.5) / 127.5
+ return (image, label)
+
+
+class ImageRecordDataset(object):
+ """Tensorflow imageNet database in tf record format.
+
+ Please arrange data in this way:
+ root/validation-000-of-100
+ root/validation-001-of-100
+ ...
+ root/validation-099-of-100
+ The file name needs to follow this pattern: '* - * -of- *'
+
+ Args: root (str): Root directory of dataset.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according
+ to specific conditions.
+ """
+
+ """Configuration for Imagenet dataset."""
+
+ def __new__(cls, root, transform=None, filter=None):
+ """Build a new object of TensorflowImageRecord class."""
+ from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module
+
+ glob_pattern = os.path.join(root, "*-*-of-*")
+ file_names = gfile.Glob(glob_pattern)
+ if not file_names:
+ raise ValueError("Found no files in --root matching: {}".format(glob_pattern))
+
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False)
+ ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names)))
+
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeImagenet())
+ else:
+ transform = ParseDecodeImagenet()
+ ds = ds.map(transform, num_parallel_calls=None)
+ ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned
+ return ds
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric_cls
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class TopKMetric(BaseMetric):
+ """Compute Top-k Accuracy classification score for Tensorflow model.
+
+ This metric computes the number of times where the correct label is among
+ the top k labels predicted.
+
+ Attributes:
+ k (int): The number of most likely outcomes considered to find the correct label.
+ num_correct: The number of predictions that were correct classified.
+ num_sample: The total number of predictions.
+ """
+
+ def __init__(self, k=1):
+ """Initialize the k, number of samples and correct predictions.
+
+ Args:
+ k: The number of most likely outcomes considered to find the correct label.
+ """
+ self.k = k
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def update(self, preds, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ preds: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight.
+ """
+ preds, labels = TopKMetric._topk_shape_validate(preds, labels)
+
+ labels = labels.reshape([len(labels)])
+ with tf.Graph().as_default() as acc_graph:
+ topk = tf.nn.in_top_k(
+ predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k
+ )
+ fp32_topk = tf.cast(topk, tf.float32)
+ correct_tensor = tf.reduce_sum(input_tensor=fp32_topk)
+
+ with tf.compat.v1.Session() as acc_sess:
+ correct = acc_sess.run(correct_tensor)
+
+ self.num_sample += len(labels)
+ self.num_correct += correct
+
+ def reset(self):
+ """Reset the number of samples and correct predictions."""
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def result(self):
+ """Compute the top-k score.
+
+ Returns:
+ The top-k score.
+ """
+ if self.num_sample == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ elif getattr(self, "_hvd", None) is not None: # pragma: no cover
+ allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct))
+ allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample))
+ return allgather_num_correct / allgather_num_sample
+ return self.num_correct / self.num_sample
+
+ @staticmethod
+ def _topk_shape_validate(preds, labels):
+ # preds shape can be Nxclass_num or class_num(N=1 by default)
+ # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax
+ if isinstance(preds, int):
+ preds = [preds]
+ preds = np.array(preds)
+ elif isinstance(preds, np.ndarray):
+ preds = np.array(preds)
+ elif isinstance(preds, list):
+ preds = np.array(preds)
+ preds = preds.reshape((-1, preds.shape[-1]))
+
+ # consider labels just int value 1x1
+ if isinstance(labels, int):
+ labels = [labels]
+ labels = np.array(labels)
+ elif isinstance(labels, tuple):
+ labels = np.array([labels])
+ labels = labels.reshape((labels.shape[-1], -1))
+ elif isinstance(labels, list):
+ if isinstance(labels[0], int):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[0], 1))
+ elif isinstance(labels[0], tuple):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[-1], -1))
+ else:
+ labels = np.array(labels)
+ # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot)
+ # only support 2 dimension one-shot labels
+ # or 1 dimension one-hot class_num will confuse with N
+
+ if len(preds.shape) == 1:
+ N = 1
+ class_num = preds.shape[0]
+ preds = preds.reshape([-1, class_num])
+ elif len(preds.shape) >= 2:
+ N = preds.shape[0]
+ preds = preds.reshape([N, -1])
+ class_num = preds.shape[1]
+
+ label_N = labels.shape[0]
+ assert label_N == N, "labels batch size should same with preds"
+ labels = labels.reshape([N, -1])
+ # one-hot labels will have 2 dimension not equal 1
+ if labels.shape[1] != 1:
+ labels = labels.argsort()[..., -1:]
+ return preds, labels
+
+
+class TFDataLoader(object): # pragma: no cover
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py
new file mode 100644
index 00000000000..bb82476fced
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py
@@ -0,0 +1,143 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+
+import tensorflow as tf
+import numpy as np
+
+from argparse import ArgumentParser
+from data_process import (
+ ImageRecordDataset,
+ ComposeTransform,
+ BilinearImagenetTransform,
+ TFDataLoader,
+ TopKMetric,
+)
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+arg_parser = ArgumentParser(description='Parse args')
+arg_parser.add_argument('-g', "--input-graph",
+ help='Specify the input graph for the transform tool',
+ dest='input_graph')
+arg_parser.add_argument("--output-graph",
+ help='Specify tune result model save dir',
+ dest='output_graph')
+arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark')
+arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode')
+arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.')
+arg_parser.add_argument('--diagnose', dest='diagnose', action='store_true', help='use Neural Insights to diagnose tuning and benchmark.')
+arg_parser.add_argument('--dataset_location', dest='dataset_location',
+ help='location of calibration dataset and evaluate dataset')
+arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark')
+arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations')
+args = arg_parser.parse_args()
+
+def evaluate(model, eval_dataloader, metric, postprocess=None):
+ """Custom evaluate function to estimate the accuracy of the model.
+
+ Args:
+ model (tf.Graph_def): The input model graph
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ from neural_compressor.tensorflow import Model
+ model = Model(model)
+ input_tensor = model.input_tensor
+ output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+ model.output_tensor[0]
+ iteration = -1
+ if args.benchmark and args.mode == 'performance':
+ iteration = args.iters
+
+ def eval_func(dataloader):
+ latency_list = []
+ for idx, (inputs, labels) in enumerate(dataloader):
+ # dataloader should keep the order and len of inputs same with input_tensor
+ inputs = np.array([inputs])
+ feed_dict = dict(zip(input_tensor, inputs))
+
+ start = time.time()
+ predictions = model.sess.run(output_tensor, feed_dict)
+ end = time.time()
+
+ metric.update(predictions, labels)
+ latency_list.append(end-start)
+ if idx + 1 == iteration:
+ break
+ latency = np.array(latency_list).mean() / args.batch_size
+ return latency
+
+ latency = eval_func(eval_dataloader)
+ if args.benchmark and args.mode == 'performance':
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+class eval_classifier_optimized_graph:
+ """Evaluate image classifier with optimized TensorFlow graph."""
+
+ def run(self):
+ """This is neural_compressor function include tuning, export and benchmark option."""
+ from neural_compressor.common import set_random_seed
+ set_random_seed(9527)
+
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+
+ dataset = ImageRecordDataset(
+ root=args.dataset_location,
+ transform=ComposeTransform(transform_list= [
+ BilinearImagenetTransform(height=224, width=224),
+ ]
+ )
+ )
+ calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10)
+
+ quant_config = StaticQuantConfig()
+ q_model = quantize_model(args.input_graph, quant_config, calib_dataloader)
+ q_model.save(args.output_graph)
+
+ if args.benchmark:
+ dataset = ImageRecordDataset(
+ root=args.dataset_location,
+ transform=ComposeTransform(transform_list= [
+ BilinearImagenetTransform(height=224, width=224),
+ ]
+ )
+ )
+ dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size)
+
+ def eval(model):
+ top1 = TopKMetric(k=1)
+ return evaluate(model, dataloader, top1)
+
+ if args.mode == 'performance':
+ eval(args.input_graph)
+ elif args.mode == 'accuracy':
+ acc_result = eval(args.input_graph)
+ print("Batch size = %d" % dataloader.batch_size)
+ print("Accuracy: %.5f" % acc_result)
+
+if __name__ == "__main__":
+ evaluate_opt_graph = eval_classifier_optimized_graph()
+ evaluate_opt_graph.run()
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..2755e1a41ac
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow
+neural-compressor
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..8ecac837cf7
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=32
+ iters=100
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input-graph ${input_model} \
+ --mode ${mode} \
+ --dataset_location ${dataset_location} \
+ --batch_size ${batch_size} \
+ --benchmark \
+ --iters ${iters}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..6a9e1b859c9
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+set -x
+
+function main {
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input-graph ${input_model} \
+ --output-graph ${output_model} \
+ --dataset_location ${dataset_location} \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/README.md
new file mode 100644
index 00000000000..00e00c7846d
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/README.md
@@ -0,0 +1,108 @@
+Step-by-Step
+============
+
+This document list steps of reproducing vgg16 model tuning and benchmark results via Neural Compressor.
+This example can run on Intel CPUs and GPUs.
+
+> **Note**:
+> The model is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+# Prerequisite
+
+## 1. Environment
+
+### Installation
+Recommend python 3.9 or higher version.
+```shell
+pip install -r requirements.txt
+```
+
+### Install Intel Extension for Tensorflow
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare pre-trained model
+
+The vgg16 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models).
+We can get the pb file by convert the checkpoint file.
+
+ 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models)
+ ```shell
+ wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz
+ tar -xvf vgg_16_2016_08_28.tar.gz
+ ```
+
+ 2. Exporting the Inference Graph
+ ```shell
+ git clone https://github.com/tensorflow/models
+ cd models/research/slim
+ python export_inference_graph.py \
+ --alsologtostderr \
+ --model_name=vgg_16 \
+ --output_file=/tmp/vgg_16_inf_graph.pb
+ ```
+ Make sure to use intel-tensorflow v1.15, and pip install tf_slim.
+ #### Install Intel Tensorflow 1.15 up2
+ Check your python version and use pip install 1.15.0 up2 from links below:
+ https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl
+ https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl
+ https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl
+ > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command.
+
+ 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed`
+
+ 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo
+ ```shell
+ python freeze_graph.py \
+ --input_graph=/tmp/vgg_16_inf_graph.pb \
+ --input_checkpoint=./vgg_16.ckpt \
+ --input_binary=true \
+ --output_graph=./frozen_vgg16.pb \
+ --output_node_names=vgg_16/fc8/squeezed
+ ```
+
+## 3. Prepare Dataset
+
+ TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
+ We also prepared related scripts in `examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format.
+
+ ```shell
+ cd examples/3.x_api/tensorflow/cv
+ # convert validation subset
+ bash prepare_dataset.sh --output_dir=./vgg16/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation
+ # convert train subset
+ bash prepare_dataset.sh --output_dir=./vgg16/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train
+ ```
+> **Note**:
+> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images.
+
+# Run
+
+## 1 Quantization
+
+ ```shell
+ bash run_quant.sh --input_model=/PATH/TO/frozen_vgg16.pb \
+ --output_model=./nc_vgg16.pb --dataset_location=/path/to/ImageNet/
+ ```
+
+## 2. Benchmark
+ ```shell
+ bash run_benchmark.sh --input_model=./nc_vgg16.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32
+ bash run_benchmark.sh --input_model=./nc_vgg16.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1
+ ```
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..17b4d9cec5e
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py
@@ -0,0 +1,581 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+class ParseDecodeImagenet:
+ """Parse features in Example proto.
+
+ Returns:
+ tuple of parsed image and label
+ """
+
+ def __call__(self, sample):
+ """Parse features in example."""
+ # Dense features in Example proto.
+ feature_map = {
+ "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""),
+ "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
+ }
+
+ sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
+ # Sparse features in Example proto.
+ feature_map.update(
+ {
+ k: sparse_float32
+ for k in [
+ "image/object/bbox/xmin",
+ "image/object/bbox/ymin",
+ "image/object/bbox/xmax",
+ "image/object/bbox/ymax",
+ ]
+ }
+ )
+
+ features = tf.io.parse_single_example(serialized=sample, features=feature_map)
+ label = tf.cast(features["image/class/label"], dtype=tf.int32)
+ image = features["image/encoded"]
+ image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST")
+ return (image, label)
+
+
+class ResizeCropImagenet(object):
+ """Combination of a series of transforms which is applicable to images in Imagenet.
+
+ Args:
+ height (int): Height of the result
+ width (int): Width of the result
+ random_crop (bool, default=False): whether to random crop
+ resize_side (int, default=256):desired shape after resize operation
+ random_flip_left_right (bool, default=False): whether to random flip left and right
+ mean_value (list, default=[0.0,0.0,0.0]):means for each channel
+ scale (float, default=1.0):std value
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(
+ self,
+ height,
+ width,
+ random_crop=False,
+ resize_side=256,
+ resize_method="bilinear",
+ random_flip_left_right=False,
+ mean_value=[0.0, 0.0, 0.0],
+ scale=1.0,
+ data_format="channels_last",
+ subpixels="RGB",
+ ):
+ """Initialize `TensorflowResizeCropImagenetTransform` class."""
+ self.height = height
+ self.width = width
+ self.mean_value = mean_value
+ self.scale = scale
+ self.random_crop = random_crop
+ self.random_flip_left_right = random_flip_left_right
+ self.resize_side = resize_side
+ self.resize_method = resize_method
+ self.data_format = data_format
+ self.subpixels = subpixels
+
+ # sample is (images, labels)
+ def __call__(self, sample):
+ """Convert `TensorflowResizeCropImagenetTransform` feature."""
+ image, label = sample
+ shape = tf.shape(input=image)
+
+ height = (
+ tf.cast(shape[0], dtype=tf.float32)
+ if self.data_format == "channels_last"
+ else tf.cast(shape[1], dtype=tf.float32)
+ )
+ width = (
+ tf.cast(shape[1], dtype=tf.float32)
+ if self.data_format == "channels_last"
+ else tf.cast(shape[2], dtype=tf.float32)
+ )
+ scale = tf.cond(
+ pred=tf.greater(height, width),
+ true_fn=lambda: self.resize_side / width,
+ false_fn=lambda: self.resize_side / height,
+ )
+
+ scale = tf.cast(scale, dtype=tf.float32)
+ new_height = tf.cast(tf.math.rint(height * scale), dtype=tf.int32)
+ new_width = tf.cast(tf.math.rint(width * scale), dtype=tf.int32)
+
+ if self.subpixels == "BGR" and self.data_format == "channels_first":
+ # 'RGB'->'BGR'
+ image = tf.cond(
+ tf.equal(tf.rank(image), 3),
+ lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1),
+ lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1),
+ )
+ elif self.subpixels == "BGR":
+ # 'RGB'->'BGR'
+ image = image[..., ::-1]
+ image = tf.expand_dims(image, 0)
+ image = tf.image.resize(image, [new_height, new_width], method=self.resize_method)
+ image = tf.squeeze(image)
+ shape = tf.shape(input=image)
+ if self.random_crop:
+ y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height + 1), dtype=tf.dtypes.int32)
+ x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width + 1), dtype=tf.dtypes.int32)
+ else:
+ y0 = (shape[0] - self.height) // 2
+ x0 = (shape[1] - self.width) // 2
+
+ image = tf.image.crop_to_bounding_box(image, y0, x0, self.height, self.width)
+ image.set_shape([self.height, self.width, 3])
+ if self.random_flip_left_right:
+ image = tf.image.random_flip_left_right(image)
+ means = tf.broadcast_to(self.mean_value, tf.shape(input=image))
+ image = (image - means) * self.scale
+ return (image, label)
+
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+
+class LabelShift(object):
+ """Convert label to label - label_shift.
+
+ Args:
+ label_shift(int, default=0): number of label shift
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, label_shift=0):
+ """Initialize `LabelShift` class."""
+ self.label_shift = label_shift
+
+ def __call__(self, sample):
+ """Convert label to label_shift."""
+ images, labels = sample
+ if isinstance(labels, np.ndarray):
+ labels = labels - self.label_shift
+ elif isinstance(labels, list):
+ if isinstance(labels[0], tuple):
+ labels = [tuple(np.array(label) - self.label_shift) for label in labels]
+ elif isinstance(labels[0], np.ndarray):
+ labels = [label - self.label_shift for label in labels]
+ else:
+ labels = np.array(labels) - self.label_shift
+ labels = labels.tolist()
+ else:
+ labels = np.array(labels) - self.label_shift
+ return images, labels
+
+
+class ImageRecordDataset(object):
+ """Tensorflow imageNet database in tf record format.
+
+ Please arrange data in this way:
+ root/validation-000-of-100
+ root/validation-001-of-100
+ ...
+ root/validation-099-of-100
+ The file name needs to follow this pattern: '* - * -of- *'
+
+ Args: root (str): Root directory of dataset.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according
+ to specific conditions.
+ """
+
+ """Configuration for Imagenet dataset."""
+
+ def __new__(cls, root, transform=None, filter=None):
+ """Build a new object of TensorflowImageRecord class."""
+ from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module
+
+ glob_pattern = os.path.join(root, "*-*-of-*")
+ file_names = gfile.Glob(glob_pattern)
+ if not file_names:
+ raise ValueError("Found no files in --root matching: {}".format(glob_pattern))
+
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False)
+ ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names)))
+
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeImagenet())
+ else:
+ transform = ParseDecodeImagenet()
+ ds = ds.map(transform, num_parallel_calls=None)
+ ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned
+ return ds
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric_cls
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class TopKMetric(BaseMetric):
+ """Compute Top-k Accuracy classification score for Tensorflow model.
+
+ This metric computes the number of times where the correct label is among
+ the top k labels predicted.
+
+ Attributes:
+ k (int): The number of most likely outcomes considered to find the correct label.
+ num_correct: The number of predictions that were correct classified.
+ num_sample: The total number of predictions.
+ """
+
+ def __init__(self, k=1):
+ """Initialize the k, number of samples and correct predictions.
+
+ Args:
+ k: The number of most likely outcomes considered to find the correct label.
+ """
+ self.k = k
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def update(self, preds, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ preds: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight.
+ """
+ preds, labels = TopKMetric._topk_shape_validate(preds, labels)
+
+ labels = labels.reshape([len(labels)])
+ with tf.Graph().as_default() as acc_graph:
+ topk = tf.nn.in_top_k(
+ predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k
+ )
+ fp32_topk = tf.cast(topk, tf.float32)
+ correct_tensor = tf.reduce_sum(input_tensor=fp32_topk)
+
+ with tf.compat.v1.Session() as acc_sess:
+ correct = acc_sess.run(correct_tensor)
+
+ self.num_sample += len(labels)
+ self.num_correct += correct
+
+ def reset(self):
+ """Reset the number of samples and correct predictions."""
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def result(self):
+ """Compute the top-k score.
+
+ Returns:
+ The top-k score.
+ """
+ if self.num_sample == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ elif getattr(self, "_hvd", None) is not None: # pragma: no cover
+ allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct))
+ allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample))
+ return allgather_num_correct / allgather_num_sample
+ return self.num_correct / self.num_sample
+
+ @staticmethod
+ def _topk_shape_validate(preds, labels):
+ # preds shape can be Nxclass_num or class_num(N=1 by default)
+ # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax
+ if isinstance(preds, int):
+ preds = [preds]
+ preds = np.array(preds)
+ elif isinstance(preds, np.ndarray):
+ preds = np.array(preds)
+ elif isinstance(preds, list):
+ preds = np.array(preds)
+ preds = preds.reshape((-1, preds.shape[-1]))
+
+ # consider labels just int value 1x1
+ if isinstance(labels, int):
+ labels = [labels]
+ labels = np.array(labels)
+ elif isinstance(labels, tuple):
+ labels = np.array([labels])
+ labels = labels.reshape((labels.shape[-1], -1))
+ elif isinstance(labels, list):
+ if isinstance(labels[0], int):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[0], 1))
+ elif isinstance(labels[0], tuple):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[-1], -1))
+ else:
+ labels = np.array(labels)
+ # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot)
+ # only support 2 dimension one-shot labels
+ # or 1 dimension one-hot class_num will confuse with N
+
+ if len(preds.shape) == 1:
+ N = 1
+ class_num = preds.shape[0]
+ preds = preds.reshape([-1, class_num])
+ elif len(preds.shape) >= 2:
+ N = preds.shape[0]
+ preds = preds.reshape([N, -1])
+ class_num = preds.shape[1]
+
+ label_N = labels.shape[0]
+ assert label_N == N, "labels batch size should same with preds"
+ labels = labels.reshape([N, -1])
+ # one-hot labels will have 2 dimension not equal 1
+ if labels.shape[1] != 1:
+ labels = labels.argsort()[..., -1:]
+ return preds, labels
+
+
+class TFDataLoader(object): # pragma: no cover
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/main.py
new file mode 100644
index 00000000000..ffe960e1b1e
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/main.py
@@ -0,0 +1,146 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+
+import tensorflow as tf
+import numpy as np
+
+from argparse import ArgumentParser
+from data_process import (
+ ImageRecordDataset,
+ ComposeTransform,
+ ResizeCropImagenet,
+ LabelShift,
+ TFDataLoader,
+ TopKMetric
+)
+
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+arg_parser = ArgumentParser(description='Parse args')
+arg_parser.add_argument('-g', "--input-graph",
+ help='Specify the input graph for the transform tool',
+ dest='input_graph')
+arg_parser.add_argument("--output-graph",
+ help='Specify tune result model save dir',
+ dest='output_graph')
+arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark')
+arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode')
+arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.')
+arg_parser.add_argument('--dataset_location', dest='dataset_location',
+ help='location of calibration dataset and evaluate dataset')
+arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark')
+arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations')
+args = arg_parser.parse_args()
+
+def evaluate(model, eval_dataloader, metric, postprocess=None):
+ """Custom evaluate function to estimate the accuracy of the model.
+
+ Args:
+ model (tf.Graph_def): The input model graph
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ from neural_compressor.tensorflow import Model
+ model = Model(model)
+ input_tensor = model.input_tensor
+ output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+ model.output_tensor[0]
+ iteration = -1
+ if args.benchmark and args.mode == 'performance':
+ iteration = args.iters
+
+ def eval_func(dataloader):
+ latency_list = []
+ for idx, (inputs, labels) in enumerate(dataloader):
+ # dataloader should keep the order and len of inputs same with input_tensor
+ inputs = np.array([inputs])
+ feed_dict = dict(zip(input_tensor, inputs))
+
+ start = time.time()
+ predictions = model.sess.run(output_tensor, feed_dict)
+ end = time.time()
+ if postprocess:
+ predictions, labels = postprocess((predictions, labels))
+ metric.update(predictions, labels)
+ latency_list.append(end-start)
+ if idx + 1 == iteration:
+ break
+ latency = np.array(latency_list).mean() / args.batch_size
+ return latency
+
+ latency = eval_func(eval_dataloader)
+ if args.benchmark and args.mode == 'performance':
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+class eval_classifier_optimized_graph:
+ """Evaluate image classifier with optimized TensorFlow graph."""
+
+ def run(self):
+ """This is neural_compressor function include tuning, export and benchmark option."""
+ from neural_compressor.common import set_random_seed
+ set_random_seed(9527)
+
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+
+ dataset = ImageRecordDataset(
+ root=args.dataset_location,
+ transform=ComposeTransform(transform_list= [
+ ResizeCropImagenet(height=224, width=224, mean_value=[123.68, 116.78, 103.94]),
+ ]
+ )
+ )
+ calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10)
+
+ quant_config = StaticQuantConfig()
+ q_model = quantize_model(args.input_graph, quant_config, calib_dataloader)
+ q_model.save(args.output_graph)
+
+ if args.benchmark:
+ dataset = ImageRecordDataset(
+ root=args.dataset_location,
+ transform=ComposeTransform(transform_list= [
+ ResizeCropImagenet(height=224, width=224, mean_value=[123.68, 116.78, 103.94]),
+ ]
+ )
+ )
+ dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size)
+
+ def eval(model):
+ top1 = TopKMetric(k=1)
+ postprocess = LabelShift(label_shift=1)
+ return evaluate(model, dataloader, top1, postprocess)
+
+ if args.mode == 'performance':
+ eval(args.input_graph)
+ elif args.mode == 'accuracy':
+ acc_result = eval(args.input_graph)
+ print("Batch size = %d" % dataloader.batch_size)
+ print("Accuracy: %.5f" % acc_result)
+
+if __name__ == "__main__":
+ evaluate_opt_graph = eval_classifier_optimized_graph()
+ evaluate_opt_graph.run()
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..2755e1a41ac
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow
+neural-compressor
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..8ecac837cf7
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=32
+ iters=100
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input-graph ${input_model} \
+ --mode ${mode} \
+ --dataset_location ${dataset_location} \
+ --batch_size ${batch_size} \
+ --benchmark \
+ --iters ${iters}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..6a9e1b859c9
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+set -x
+
+function main {
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input-graph ${input_model} \
+ --output-graph ${output_model} \
+ --dataset_location ${dataset_location} \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md
new file mode 100644
index 00000000000..0d4fa041690
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md
@@ -0,0 +1,79 @@
+Step-by-Step
+============
+
+This document list steps of reproducing Vision Transformer model tuning results via Neural Compressor.
+
+# Prerequisite
+
+## 1. Environment
+
+### Install Dependency Package
+
+```
+pip install -r requirements.txt
+```
+
+### Install Intel Extension for Tensorflow
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare Pretrained model
+
+```
+wget https://storage.googleapis.com/intel-optimized-tensorflow/models/2_11_0/HF-ViT-Base16-Img224-frozen.pb
+```
+
+## 3. Prepare Dataset
+
+ TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
+ We also prepared related scripts in `examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format.
+
+ ```shell
+ cd examples/3.x_api/tensorflow/cv
+ # convert validation subset
+ bash prepare_dataset.sh --output_dir=./vision_transformer/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation
+ # convert train subset
+ bash prepare_dataset.sh --output_dir=./vision_transformer/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train
+ ```
+> **Note**:
+> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images.
+
+# Run
+
+## 1. Quantization
+
+```shell
+bash run_quant.sh --input_model=./HF-ViT-Base16-Img224-frozen.pb --output_model=./output --dataset_location=
+```
+
+
+## 2. Benchmark
+
+### Benchmark the fp32 model
+
+```shell
+bash run_benchmark.sh --input_model=./HF-ViT-Base16-Img224-frozen.pb --mode=accuracy --dataset_location= --batch_size=32
+```
+
+### Benchmark the int8 model
+
+```shell
+bash run_benchmark.sh --input_model=./output.pb --mode=accuracy --dataset_location= --batch_size=32 --int8=true
+```
\ No newline at end of file
diff --git a/neural_solution/examples/custom_models_optimized/tf_example1/model/.gitkeep b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/__init__.py
similarity index 100%
rename from neural_solution/examples/custom_models_optimized/tf_example1/model/.gitkeep
rename to examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/__init__.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..8d28e4a3e17
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py
@@ -0,0 +1,576 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+class ParseDecodeImagenet:
+ """Parse features in Example proto.
+
+ Returns:
+ tuple of parsed image and label
+ """
+
+ def __call__(self, sample):
+ """Parse features in example."""
+ # Dense features in Example proto.
+ feature_map = {
+ "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""),
+ "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
+ }
+
+ sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
+ # Sparse features in Example proto.
+ feature_map.update(
+ {
+ k: sparse_float32
+ for k in [
+ "image/object/bbox/xmin",
+ "image/object/bbox/ymin",
+ "image/object/bbox/xmax",
+ "image/object/bbox/ymax",
+ ]
+ }
+ )
+
+ features = tf.io.parse_single_example(serialized=sample, features=feature_map)
+ label = tf.cast(features["image/class/label"], dtype=tf.int32)
+ image = features["image/encoded"]
+ image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST")
+ return (image, label)
+
+
+class ResizeCropImagenet(object):
+ """Combination of a series of transforms which is applicable to images in Imagenet.
+
+ Args:
+ height (int): Height of the result
+ width (int): Width of the result
+ random_crop (bool, default=False): whether to random crop
+ resize_side (int, default=256):desired shape after resize operation
+ random_flip_left_right (bool, default=False): whether to random flip left and right
+ mean_value (list, default=[0.0,0.0,0.0]):means for each channel
+ scale (float, default=1.0):std value
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(
+ self,
+ height,
+ width,
+ random_crop=False,
+ resize_side=256,
+ resize_method="bilinear",
+ random_flip_left_right=False,
+ mean_value=[0.0, 0.0, 0.0],
+ scale=1.0,
+ data_format="channels_last",
+ subpixels="RGB",
+ ):
+ """Initialize `TensorflowResizeCropImagenetTransform` class."""
+ self.height = height
+ self.width = width
+ self.mean_value = mean_value
+ self.scale = scale
+ self.random_crop = random_crop
+ self.random_flip_left_right = random_flip_left_right
+ self.resize_side = resize_side
+ self.resize_method = resize_method
+ self.data_format = data_format
+ self.subpixels = subpixels
+
+ # sample is (images, labels)
+ def __call__(self, sample):
+ """Convert `TensorflowResizeCropImagenetTransform` feature."""
+ image, label = sample
+ shape = tf.shape(input=image)
+
+ height = (
+ tf.cast(shape[0], dtype=tf.float32)
+ if self.data_format == "channels_last"
+ else tf.cast(shape[1], dtype=tf.float32)
+ )
+ width = (
+ tf.cast(shape[1], dtype=tf.float32)
+ if self.data_format == "channels_last"
+ else tf.cast(shape[2], dtype=tf.float32)
+ )
+ scale = tf.cond(
+ pred=tf.greater(height, width),
+ true_fn=lambda: self.resize_side / width,
+ false_fn=lambda: self.resize_side / height,
+ )
+
+ scale = tf.cast(scale, dtype=tf.float32)
+ new_height = tf.cast(tf.math.rint(height * scale), dtype=tf.int32)
+ new_width = tf.cast(tf.math.rint(width * scale), dtype=tf.int32)
+
+ if self.subpixels == "BGR" and self.data_format == "channels_first":
+ # 'RGB'->'BGR'
+ image = tf.cond(
+ tf.equal(tf.rank(image), 3),
+ lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1),
+ lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1),
+ )
+ elif self.subpixels == "BGR":
+ # 'RGB'->'BGR'
+ image = image[..., ::-1]
+ image = tf.expand_dims(image, 0)
+ image = tf.image.resize(image, [new_height, new_width], method=self.resize_method)
+ image = tf.squeeze(image)
+ shape = tf.shape(input=image)
+ if self.random_crop:
+ y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height + 1), dtype=tf.dtypes.int32)
+ x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width + 1), dtype=tf.dtypes.int32)
+ else:
+ y0 = (shape[0] - self.height) // 2
+ x0 = (shape[1] - self.width) // 2
+
+ image = tf.image.crop_to_bounding_box(image, y0, x0, self.height, self.width)
+ image.set_shape([self.height, self.width, 3])
+ if self.random_flip_left_right:
+ image = tf.image.random_flip_left_right(image)
+ means = tf.broadcast_to(self.mean_value, tf.shape(input=image))
+ image = (image - means) * self.scale
+ return (image, label)
+
+
+class TransposeLastChannel(object):
+ """Transpose NHWC to NCHW.
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __call__(self, sample):
+ image, label = sample
+ image = tf.transpose(image, perm=[2, 0, 1])
+ return (image, label)
+
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+
+class ShiftRescale(object):
+ """Label shift by 1 and rescale.
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __call__(self, sample):
+ image, label = sample
+ label -= 1
+ image = (image - 127.5) / 127.5
+ return (image, label)
+
+
+class ImageRecordDataset(object):
+ """Tensorflow imageNet database in tf record format.
+
+ Please arrange data in this way:
+ root/validation-000-of-100
+ root/validation-001-of-100
+ ...
+ root/validation-099-of-100
+ The file name needs to follow this pattern: '* - * -of- *'
+
+ Args: root (str): Root directory of dataset.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according
+ to specific conditions.
+ """
+
+ """Configuration for Imagenet dataset."""
+
+ def __new__(cls, root, transform=None, filter=None):
+ """Build a new object of TensorflowImageRecord class."""
+ from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module
+
+ glob_pattern = os.path.join(root, "*-*-of-*")
+ file_names = gfile.Glob(glob_pattern)
+ if not file_names:
+ raise ValueError("Found no files in --root matching: {}".format(glob_pattern))
+
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False)
+ ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names)))
+
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeImagenet())
+ else:
+ transform = ParseDecodeImagenet()
+ ds = ds.map(transform, num_parallel_calls=None)
+ ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned
+ return ds
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric_cls
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class TopKMetric(BaseMetric):
+ """Compute Top-k Accuracy classification score for Tensorflow model.
+
+ This metric computes the number of times where the correct label is among
+ the top k labels predicted.
+
+ Attributes:
+ k (int): The number of most likely outcomes considered to find the correct label.
+ num_correct: The number of predictions that were correct classified.
+ num_sample: The total number of predictions.
+ """
+
+ def __init__(self, k=1):
+ """Initialize the k, number of samples and correct predictions.
+
+ Args:
+ k: The number of most likely outcomes considered to find the correct label.
+ """
+ self.k = k
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def update(self, preds, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ preds: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight.
+ """
+ preds, labels = TopKMetric._topk_shape_validate(preds, labels)
+
+ labels = labels.reshape([len(labels)])
+ with tf.Graph().as_default() as acc_graph:
+ topk = tf.nn.in_top_k(
+ predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k
+ )
+ fp32_topk = tf.cast(topk, tf.float32)
+ correct_tensor = tf.reduce_sum(input_tensor=fp32_topk)
+
+ with tf.compat.v1.Session() as acc_sess:
+ correct = acc_sess.run(correct_tensor)
+
+ self.num_sample += len(labels)
+ self.num_correct += correct
+
+ def reset(self):
+ """Reset the number of samples and correct predictions."""
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def result(self):
+ """Compute the top-k score.
+
+ Returns:
+ The top-k score.
+ """
+ if self.num_sample == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ elif getattr(self, "_hvd", None) is not None: # pragma: no cover
+ allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct))
+ allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample))
+ return allgather_num_correct / allgather_num_sample
+ return self.num_correct / self.num_sample
+
+ @staticmethod
+ def _topk_shape_validate(preds, labels):
+ # preds shape can be Nxclass_num or class_num(N=1 by default)
+ # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax
+ if isinstance(preds, int):
+ preds = [preds]
+ preds = np.array(preds)
+ elif isinstance(preds, np.ndarray):
+ preds = np.array(preds)
+ elif isinstance(preds, list):
+ preds = np.array(preds)
+ preds = preds.reshape((-1, preds.shape[-1]))
+
+ # consider labels just int value 1x1
+ if isinstance(labels, int):
+ labels = [labels]
+ labels = np.array(labels)
+ elif isinstance(labels, tuple):
+ labels = np.array([labels])
+ labels = labels.reshape((labels.shape[-1], -1))
+ elif isinstance(labels, list):
+ if isinstance(labels[0], int):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[0], 1))
+ elif isinstance(labels[0], tuple):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[-1], -1))
+ else:
+ labels = np.array(labels)
+ # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot)
+ # only support 2 dimension one-shot labels
+ # or 1 dimension one-hot class_num will confuse with N
+
+ if len(preds.shape) == 1:
+ N = 1
+ class_num = preds.shape[0]
+ preds = preds.reshape([-1, class_num])
+ elif len(preds.shape) >= 2:
+ N = preds.shape[0]
+ preds = preds.reshape([N, -1])
+ class_num = preds.shape[1]
+
+ label_N = labels.shape[0]
+ assert label_N == N, "labels batch size should same with preds"
+ labels = labels.reshape([N, -1])
+ # one-hot labels will have 2 dimension not equal 1
+ if labels.shape[1] != 1:
+ labels = labels.argsort()[..., -1:]
+ return preds, labels
+
+
+class TFDataLoader(object): # pragma: no cover
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py
new file mode 100644
index 00000000000..92b2ea0fb2a
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py
@@ -0,0 +1,186 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference
+from tensorflow.python.framework import dtypes
+from tensorflow.core.protobuf import saved_model_pb2
+
+from argparse import ArgumentParser
+from data_process import (
+ ImageRecordDataset,
+ ComposeTransform,
+ ResizeCropImagenet,
+ TransposeLastChannel,
+ ShiftRescale,
+ TFDataLoader,
+)
+
+
+INPUTS = 'inputs'
+OUTPUTS = 'Identity'
+
+RESNET_IMAGE_SIZE = 224
+IMAGENET_VALIDATION_IMAGES = 50000
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+arg_parser = ArgumentParser(description='Parse args')
+arg_parser.add_argument('-g', "--input-graph",
+ help='Specify the input graph for the transform tool',
+ dest='input_graph')
+arg_parser.add_argument("--output-graph",
+ help='Specify tune result model save dir',
+ dest='output_graph')
+arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark')
+arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode')
+arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.')
+arg_parser.add_argument('--diagnose', dest='diagnose', action='store_true', help='use Neural Insights to diagnose tuning and benchmark.')
+arg_parser.add_argument('--dataset_location', dest='dataset_location',
+ help='location of calibration dataset and evaluate dataset')
+arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark')
+arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations')
+arg_parser.add_argument('--int8', dest='int8', action='store_true', help='whether to use int8 model for benchmark')
+args = arg_parser.parse_args()
+
+def evaluate(model, eval_dataloader, preprocess=None):
+ """Custom evaluate function to estimate the accuracy of the model.
+
+ Args:
+ model (tf.Graph_def): The input model graph
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ from data_process import TopKMetric
+ from neural_compressor.tensorflow import Model
+ model = Model(model)
+ input_tensor = model.input_tensor
+ output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+ model.output_tensor[0]
+ iteration = -1
+ metric = TopKMetric()
+ if args.benchmark and args.mode == 'performance':
+ iteration = args.iters
+
+ def eval_func(dataloader):
+ latency_list = []
+ for idx, (inputs, labels) in enumerate(dataloader):
+ # shift the label and rescale the inputs
+ inputs, labels = preprocess((inputs, labels))
+ # dataloader should keep the order and len of inputs same with input_tensor
+ inputs = np.array([inputs])
+ feed_dict = dict(zip(input_tensor, inputs))
+
+ start = time.time()
+ predictions = model.sess.run(output_tensor, feed_dict)
+ end = time.time()
+
+ if isinstance(predictions, list):
+ if len(model.output_tensor_names) == 1:
+ predictions = predictions[0]
+ elif len(model.output_tensor_names) > 1:
+ predictions = predictions[1]
+ metric.update(predictions, labels)
+ latency_list.append(end-start)
+ if idx + 1 == iteration:
+ break
+ latency = np.array(latency_list).mean() / args.batch_size
+ return latency
+
+ latency = eval_func(eval_dataloader)
+ if args.benchmark and args.mode == 'performance':
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+class eval_classifier_optimized_graph:
+ """Evaluate image classifier with optimized TensorFlow graph."""
+
+ def run(self):
+ """This is neural_compressor function include tuning, export and benchmark option."""
+ from neural_compressor.common import set_random_seed
+ set_random_seed(9527)
+
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+
+ dataset = ImageRecordDataset(
+ root=args.dataset_location,
+ transform=ComposeTransform(transform_list= [
+ ResizeCropImagenet(height=224, width=224),
+ TransposeLastChannel(),
+ ]
+ )
+ )
+ calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10)
+
+ quant_config = StaticQuantConfig()
+ matmul_config = StaticQuantConfig(weight_dtype="fp32", act_dtype="fp32")
+ conv_config = StaticQuantConfig(weight_dtype="fp32", act_dtype="fp32")
+ quant_config.set_local("StatefulPartitionedCall/vit/encoder/layer_._9/output/dense/Tensordot/MatMul", matmul_config)
+ quant_config.set_local("Conv2D", conv_config)
+
+ sm = saved_model_pb2.SavedModel()
+ with tf.io.gfile.GFile(args.input_graph, "rb") as f:
+ sm.ParseFromString(f.read())
+ graph_def = sm.meta_graphs[0].graph_def
+
+ q_model = quantize_model(graph_def, quant_config, calib_dataloader)
+ q_model.save(args.output_graph)
+
+ if args.benchmark:
+ dataset = ImageRecordDataset(
+ root=args.dataset_location,
+ transform=ComposeTransform(transform_list= [
+ ResizeCropImagenet(height=224, width=224),
+ TransposeLastChannel(),
+ ]
+ )
+ )
+ dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size)
+
+ if args.int8 or args.input_graph.endswith("-tune.pb"):
+ input_graph = args.input_graph
+ else:
+ sm = saved_model_pb2.SavedModel()
+ with tf.io.gfile.GFile(args.input_graph, "rb") as f:
+ sm.ParseFromString(f.read())
+ graph_def = sm.meta_graphs[0].graph_def
+ input_graph = graph_def
+
+ def eval(model):
+ preprocess = ShiftRescale()
+ return evaluate(model, dataloader, preprocess)
+
+ if args.mode == 'performance':
+ eval(input_graph)
+ elif args.mode == 'accuracy':
+ acc_result = eval(input_graph)
+ print("Batch size = %d" % dataloader.batch_size)
+ print("Accuracy: %.5f" % acc_result)
+
+if __name__ == "__main__":
+ evaluate_opt_graph = eval_classifier_optimized_graph()
+ evaluate_opt_graph.run()
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..d86161032c2
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow
+neural-compressor
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..2348865d66e
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=32
+ iters=100
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_benchmark {
+ if [[ ${int8} == "true" ]]; then
+ extra_cmd=$extra_cmd" --int8"
+ fi
+ python main.py \
+ --input-graph ${input_model} \
+ --mode ${mode} \
+ --dataset_location ${dataset_location} \
+ --batch_size ${batch_size} \
+ --benchmark \
+ --iters ${iters} \
+ ${extra_cmd}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..6a9e1b859c9
--- /dev/null
+++ b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+set -x
+
+function main {
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input-graph ${input_model} \
+ --output-graph ${output_model} \
+ --dataset_location ${dataset_location} \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py
new file mode 100644
index 00000000000..c52d2bd4218
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py
@@ -0,0 +1,567 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Converts ImageNet data to TFRecords file format with Example protos.
+
+The raw ImageNet data set is expected to reside in JPEG files located in the
+following directory structure.
+
+ data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
+ data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
+ ...
+
+where 'n01440764' is the unique synset label associated with
+these images.
+
+The training data set consists of 1000 sub-directories (i.e. labels)
+each containing 1200 JPEG images for a total of 1.2M JPEG images.
+
+The evaluation data set consists of 1000 sub-directories (i.e. labels)
+each containing 50 JPEG images for a total of 50K JPEG images.
+
+This TensorFlow script converts the training and evaluation data into
+a sharded data set consisting of 1024 and 128 TFRecord files, respectively.
+
+ train_directory/train-00000-of-01024
+ train_directory/train-00001-of-01024
+ ...
+ train_directory/train-00127-of-01024
+
+and
+
+ validation_directory/validation-00000-of-00128
+ validation_directory/validation-00001-of-00128
+ ...
+ validation_directory/validation-00127-of-00128
+
+Each validation TFRecord file contains ~390 records. Each training TFREcord
+file contains ~1250 records. Each record within the TFRecord file is a
+serialized Example proto. The Example proto contains the following fields:
+
+ image/encoded: string containing JPEG encoded image in RGB colorspace
+ image/height: integer, image height in pixels
+ image/width: integer, image width in pixels
+ image/colorspace: string, specifying the colorspace, always 'RGB'
+ image/channels: integer, specifying the number of channels, always 3
+ image/format: string, specifying the format, always'JPEG'
+
+ image/filename: string containing the basename of the image file
+ e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG'
+ image/class/label: integer specifying the index in a classification layer.
+ The label ranges from [1, 1000] where 0 is not used.
+ image/class/synset: string specifying the unique ID of the label,
+ e.g. 'n01440764'
+ image/class/text: string specifying the human-readable version of the label
+ e.g. 'red fox, Vulpes vulpes'
+
+Note that the length of xmin is identical to the length of xmax, ymin and ymax
+for each example.
+
+Running this script using 16 threads may take around ~2.5 hours on a HP Z420.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from datetime import datetime
+import os
+import random
+import sys
+import threading
+
+import numpy as np
+from six.moves import xrange # pylint: disable=redefined-builtin
+import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
+
+tf.compat.v1.app.flags.DEFINE_string('raw_directory', None,
+ 'Raw data directory')
+
+tf.compat.v1.app.flags.DEFINE_string('output_directory', None,
+ 'Output data directory')
+
+tf.compat.v1.app.flags.DEFINE_integer('shards', 1,
+ 'Number of shards in TFRecord files.')
+
+tf.compat.v1.app.flags.DEFINE_string('subset', 'validation',
+ 'Subset of imagenet, can be validation/train')
+
+tf.compat.v1.app.flags.DEFINE_integer('num_threads', 1,
+ 'Number of threads to preprocess the images.')
+
+# The labels file contains a list of valid labels are held in this file.
+# Assumes that the file contains entries as such:
+# n01440764
+# n01443537
+# n01484850
+# where each line corresponds to a label expressed as a synset. We map
+# each synset contained in the file to an integer (based on the alphabetical
+# ordering). See below for details.
+tf.compat.v1.app.flags.DEFINE_string('labels_file',
+ 'imagenet_lsvrc_2015_synsets.txt',
+ 'Labels file')
+
+# This file containing mapping from synset to human-readable label.
+# Assumes each line of the file looks like:
+#
+# n02119247 black fox
+# n02119359 silver fox
+# n02119477 red fox, Vulpes fulva
+#
+# where each line corresponds to a unique mapping. Note that each line is
+# formatted as \t.
+tf.compat.v1.app.flags.DEFINE_string('imagenet_metadata_file',
+ 'imagenet_metadata.txt',
+ 'ImageNet metadata file')
+
+FLAGS = tf.compat.v1.app.flags.FLAGS
+
+
+def _int64_feature(value):
+ """Wrapper for inserting int64 features into Example proto."""
+ if not isinstance(value, list):
+ value = [value]
+ return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+
+
+def _float_feature(value):
+ """Wrapper for inserting float features into Example proto."""
+ if not isinstance(value, list):
+ value = [value]
+ return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+
+
+def _bytes_feature(value):
+ """Wrapper for inserting bytes features into Example proto."""
+ return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def _convert_to_example(filename, image_buffer, label, synset, human,
+ height, width):
+ """Build an Example proto for an example.
+
+ Args:
+ filename: string, path to an image file, e.g., '/path/to/example.JPG'
+ image_buffer: string, JPEG encoding of RGB image
+ label: integer, identifier for the ground truth for the network
+ synset: string, unique WordNet ID specifying the label, e.g., 'n02323233'
+ human: string, human-readable label, e.g., 'red fox, Vulpes vulpes'
+ height: integer, image height in pixels
+ width: integer, image width in pixels
+ Returns:
+ Example proto
+ """
+
+ colorspace = b'RGB'
+ channels = 3
+ image_format = b'JPEG'
+
+ example = tf.train.Example(features=tf.train.Features(feature={
+ 'image/height': _int64_feature(height),
+ 'image/width': _int64_feature(width),
+ 'image/colorspace': _bytes_feature(colorspace),
+ 'image/channels': _int64_feature(channels),
+ 'image/class/label': _int64_feature(label),
+ 'image/class/synset': _bytes_feature(bytes(synset,'utf-8')),
+ 'image/class/text': _bytes_feature(bytes(human,'utf-8')),
+ 'image/format': _bytes_feature(image_format),
+ 'image/filename': _bytes_feature(bytes(os.path.basename(filename),'utf-8')),
+ 'image/encoded': _bytes_feature(image_buffer)}))
+ return example
+
+
+class ImageCoder(object):
+ """Helper class that provides TensorFlow image coding utilities."""
+
+ def __init__(self):
+ # Create a single Session to run all image coding calls.
+ self._sess = tf.compat.v1.Session()
+
+ # Initializes function that converts PNG to JPEG data.
+ self._png_data = tf.compat.v1.placeholder(dtype=tf.string)
+ image = tf.image.decode_png(self._png_data, channels=3)
+ self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
+
+ # Initializes function that converts CMYK JPEG data to RGB JPEG data.
+ self._cmyk_data = tf.compat.v1.placeholder(dtype=tf.string)
+ image = tf.image.decode_jpeg(self._cmyk_data, channels=0)
+ self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100)
+
+ # Initializes function that decodes RGB JPEG data.
+ self._decode_jpeg_data = tf.compat.v1.placeholder(dtype=tf.string)
+ self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
+
+ def png_to_jpeg(self, image_data):
+ return self._sess.run(self._png_to_jpeg,
+ feed_dict={self._png_data: image_data})
+
+ def cmyk_to_rgb(self, image_data):
+ return self._sess.run(self._cmyk_to_rgb,
+ feed_dict={self._cmyk_data: image_data})
+
+ def decode_jpeg(self, image_data):
+ image = self._sess.run(self._decode_jpeg,
+ feed_dict={self._decode_jpeg_data: image_data})
+ assert len(image.shape) == 3
+ assert image.shape[2] == 3
+ return image
+
+
+def _is_png(filename):
+ """Determine if a file contains a PNG format image.
+
+ Args:
+ filename: string, path of the image file.
+
+ Returns:
+ boolean indicating if the image is a PNG.
+ """
+ # File list from:
+ # https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU
+ return 'n02105855_2933.JPEG' in filename
+
+
+def _is_cmyk(filename):
+ """Determine if file contains a CMYK JPEG format image.
+
+ Args:
+ filename: string, path of the image file.
+
+ Returns:
+ boolean indicating if the image is a JPEG encoded with CMYK color space.
+ """
+ # File list from:
+ # https://github.com/cytsai/ilsvrc-cmyk-image-list
+ blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG',
+ 'n02447366_23489.JPEG', 'n02492035_15739.JPEG',
+ 'n02747177_10752.JPEG', 'n03018349_4028.JPEG',
+ 'n03062245_4620.JPEG', 'n03347037_9675.JPEG',
+ 'n03467068_12171.JPEG', 'n03529860_11437.JPEG',
+ 'n03544143_17228.JPEG', 'n03633091_5218.JPEG',
+ 'n03710637_5125.JPEG', 'n03961711_5286.JPEG',
+ 'n04033995_2932.JPEG', 'n04258138_17003.JPEG',
+ 'n04264628_27969.JPEG', 'n04336792_7448.JPEG',
+ 'n04371774_5854.JPEG', 'n04596742_4225.JPEG',
+ 'n07583066_647.JPEG', 'n13037406_4650.JPEG']
+ return filename.split('/')[-1] in blacklist
+
+
+def _process_image(filename, coder):
+ """Process a single image file.
+
+ Args:
+ filename: string, path to an image file e.g., '/path/to/example.JPG'.
+ coder: instance of ImageCoder to provide TensorFlow image coding utils.
+ Returns:
+ image_buffer: string, JPEG encoding of RGB image.
+ height: integer, image height in pixels.
+ width: integer, image width in pixels.
+ """
+ # Read the image file.
+ image_data = tf.io.gfile.GFile(filename, 'rb').read()
+
+ # Clean the dirty data.
+ if _is_png(filename):
+ # 1 image is a PNG.
+ print('Converting PNG to JPEG for %s' % filename)
+ image_data = coder.png_to_jpeg(image_data)
+ elif _is_cmyk(filename):
+ # 22 JPEG images are in CMYK colorspace.
+ print('Converting CMYK to RGB for %s' % filename)
+ image_data = coder.cmyk_to_rgb(image_data)
+
+ # Decode the RGB JPEG.
+ image = coder.decode_jpeg(image_data)
+
+ # Check that image converted to RGB
+ assert len(image.shape) == 3
+ height = image.shape[0]
+ width = image.shape[1]
+ assert image.shape[2] == 3
+
+ return image_data, height, width
+
+
+def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
+ synsets, labels, humans, num_shards):
+ """Processes and saves list of images as TFRecord in 1 thread.
+
+ Args:
+ coder: instance of ImageCoder to provide TensorFlow image coding utils.
+ thread_index: integer, unique batch to run index is within [0, len(ranges)).
+ ranges: list of pairs of integers specifying ranges of each batches to
+ analyze in parallel.
+ name: string, unique identifier specifying the data set
+ filenames: list of strings; each string is a path to an image file
+ synsets: list of strings; each string is a unique WordNet ID
+ labels: list of integer; each integer identifies the ground truth
+ humans: list of strings; each string is a human-readable label
+ num_shards: integer number of shards for this data set.
+ """
+ # Each thread produces N shards where N = int(num_shards / num_threads).
+ # For instance, if num_shards = 128, and the num_threads = 2, then the first
+ # thread would produce shards [0, 64).
+ num_threads = len(ranges)
+ assert not num_shards % num_threads
+ num_shards_per_batch = int(num_shards / num_threads)
+
+ shard_ranges = np.linspace(ranges[thread_index][0],
+ ranges[thread_index][1],
+ num_shards_per_batch + 1).astype(int)
+ num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]
+
+ counter = 0
+ for s in xrange(num_shards_per_batch):
+ # Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
+ shard = thread_index * num_shards_per_batch + s
+ output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
+ output_file = os.path.join(FLAGS.output_directory, output_filename)
+ writer = tf.io.TFRecordWriter(output_file)
+
+ shard_counter = 0
+ files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) # HERE
+ for i in files_in_shard:
+ filename = filenames[i]
+ label = labels[i]
+ synset = synsets[i]
+ human = humans[i]
+
+ image_buffer, height, width = _process_image(filename, coder)
+
+ example = _convert_to_example(filename, image_buffer, label, synset, human, height, width)
+ writer.write(example.SerializeToString())
+ shard_counter += 1
+ counter += 1
+
+ if not counter % 1000:
+ print('%s [thread %d]: Processed %d of %d images in thread batch.' %
+ (datetime.now(), thread_index, counter, num_files_in_thread))
+ sys.stdout.flush()
+
+ writer.close()
+ print('%s [thread %d]: Wrote %d images to %s' %
+ (datetime.now(), thread_index, shard_counter, output_file))
+ sys.stdout.flush()
+ shard_counter = 0
+ print('%s [thread %d]: Wrote %d images to %d shards.' %
+ (datetime.now(), thread_index, counter, num_files_in_thread))
+ sys.stdout.flush()
+
+
+def _process_image_files(name, filenames, synsets, labels, humans, num_shards):
+ """Process and save list of images as TFRecord of Example protos.
+
+ Args:
+ name: string, unique identifier specifying the data set
+ filenames: list of strings; each string is a path to an image file
+ synsets: list of strings; each string is a unique WordNet ID
+ labels: list of integer; each integer identifies the ground truth
+ humans: list of strings; each string is a human-readable label
+ num_shards: integer number of shards for this data set.
+ """
+ assert len(filenames) == len(synsets)
+ assert len(filenames) == len(labels)
+ assert len(filenames) == len(humans)
+
+ # Break all images into batches with a [ranges[i][0], ranges[i][1]].
+ spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int)
+ ranges = []
+ threads = []
+ for i in xrange(len(spacing) - 1):
+ ranges.append([spacing[i], spacing[i+1]])
+
+ # Launch a thread for each batch.
+ print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges))
+ sys.stdout.flush()
+
+ # Create a mechanism for monitoring when all threads are finished.
+ coord = tf.train.Coordinator()
+
+ # Create a generic TensorFlow-based utility for converting all image codings.
+ coder = ImageCoder()
+
+ threads = []
+ for thread_index in xrange(len(ranges)):
+ args = (coder, thread_index, ranges, name, filenames,
+ synsets, labels, humans, num_shards)
+ t = threading.Thread(target=_process_image_files_batch, args=args)
+ t.start()
+ threads.append(t)
+
+ # Wait for all the threads to terminate.
+ coord.join(threads)
+ print('%s: Finished writing all %d images in data set.' %
+ (datetime.now(), len(filenames)))
+ sys.stdout.flush()
+
+
+def _find_image_files(data_dir, labels_file):
+ """Build a list of all images files and labels in the data set.
+
+ Args:
+ data_dir: string, path to the root directory of images.
+
+ Assumes that the ImageNet data set resides in JPEG files located in
+ the following directory structure.
+
+ data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
+ data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
+
+ where 'n01440764' is the unique synset label associated with these images.
+
+ labels_file: string, path to the labels file.
+
+ The list of valid labels are held in this file. Assumes that the file
+ contains entries as such:
+ n01440764
+ n01443537
+ n01484850
+ where each line corresponds to a label expressed as a synset. We map
+ each synset contained in the file to an integer (based on the alphabetical
+ ordering) starting with the integer 1 corresponding to the synset
+ contained in the first line.
+
+ The reason we start the integer labels at 1 is to reserve label 0 as an
+ unused background class.
+
+ Returns:
+ filenames: list of strings; each string is a path to an image file.
+ synsets: list of strings; each string is a unique WordNet ID.
+ labels: list of integer; each integer identifies the ground truth.
+ """
+ print('Determining list of input files and labels from %s.' % data_dir)
+ challenge_synsets = [l.strip() for l in
+ tf.compat.v1.gfile.FastGFile(labels_file, 'r').readlines()]
+
+ labels = []
+ filenames = []
+ synsets = []
+
+ # Leave label index 0 empty as a background class.
+ label_index = 1
+
+ # Construct the list of JPEG files and labels.
+ for synset in challenge_synsets:
+ jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset)
+ matching_files = tf.io.gfile.glob(jpeg_file_path)
+
+ labels.extend([label_index] * len(matching_files))
+ synsets.extend([synset] * len(matching_files))
+ filenames.extend(matching_files)
+
+ if not label_index % 100:
+ print('Finished finding files in %d of %d classes.' % (
+ label_index, len(challenge_synsets)))
+ label_index += 1
+
+ # Shuffle the ordering of all image files in order to guarantee
+ # random ordering of the images with respect to label in the
+ # saved TFRecord files. Make the randomization repeatable.
+ shuffled_index = range(len(filenames))
+ random.seed(12345)
+
+ random.shuffle(list(range(len(shuffled_index))))
+
+ filenames = [filenames[i] for i in shuffled_index]
+ synsets = [synsets[i] for i in shuffled_index]
+ labels = [labels[i] for i in shuffled_index]
+
+ print('Found %d JPEG files across %d labels inside %s.' %
+ (len(filenames), len(challenge_synsets), data_dir))
+ return filenames, synsets, labels
+
+
+def _find_human_readable_labels(synsets, synset_to_human):
+ """Build a list of human-readable labels.
+
+ Args:
+ synsets: list of strings; each string is a unique WordNet ID.
+ synset_to_human: dict of synset to human labels, e.g.,
+ 'n02119022' --> 'red fox, Vulpes vulpes'
+
+ Returns:
+ List of human-readable strings corresponding to each synset.
+ """
+ humans = []
+ for s in synsets:
+ assert s in synset_to_human, ('Failed to find: %s' % s)
+ humans.append(synset_to_human[s])
+ return humans
+
+
+def _process_dataset(name, directory, num_shards, synset_to_human):
+ """Process a complete data set and save it as a TFRecord.
+
+ Args:
+ name: string, unique identifier specifying the data set.
+ directory: string, root path to the data set.
+ num_shards: integer number of shards for this data set.
+ synset_to_human: dict of synset to human labels, e.g.,
+ 'n02119022' --> 'red fox, Vulpes vulpes'
+ """
+ filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file)
+ humans = _find_human_readable_labels(synsets, synset_to_human)
+
+ _process_image_files(name, filenames, synsets, labels,
+ humans, num_shards)
+
+
+def _build_synset_lookup(imagenet_metadata_file):
+ """Build lookup for synset to human-readable label.
+
+ Args:
+ imagenet_metadata_file: string, path to file containing mapping from
+ synset to human-readable label.
+
+ Assumes each line of the file looks like:
+
+ n02119247 black fox
+ n02119359 silver fox
+ n02119477 red fox, Vulpes fulva
+
+ where each line corresponds to a unique mapping. Note that each line is
+ formatted as \t.
+
+ Returns:
+ Dictionary of synset to human labels, such as:
+ 'n02119022' --> 'red fox, Vulpes vulpes'
+ """
+ lines = tf.compat.v1.gfile.FastGFile(imagenet_metadata_file, 'r').readlines()
+ synset_to_human = {}
+ for l in lines:
+ if l:
+ parts = l.strip().split('\t')
+ assert len(parts) == 2
+ synset = parts[0]
+ human = parts[1]
+ synset_to_human[synset] = human
+ return synset_to_human
+
+
+def main(unused_argv):
+ assert not FLAGS.shards % FLAGS.num_threads, (
+ 'Please make the FLAGS.num_threads commensurate with FLAGS.shards')
+
+ print('Saving results to %s' % FLAGS.output_directory)
+
+ # Build a map from synset to human-readable label.
+ synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file)
+
+ if(FLAGS.raw_directory != None):
+ _process_dataset(FLAGS.subset, FLAGS.raw_directory,FLAGS.shards, synset_to_human)
+
+if __name__ == '__main__':
+ tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh
new file mode 100644
index 00000000000..f9baa85ab07
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# Script to download and preprocess ImageNet Challenge 2012
+# training and validation data set.
+#
+# The final output of this script are sharded TFRecord files containing
+# serialized Example protocol buffers. See build_imagenet_data.py for
+# details of how the Example protocol buffers contain the ImageNet data.
+#
+# The final output of this script appears as such:
+#
+# data_dir/train-00000-of-01024
+# data_dir/train-00001-of-01024
+# ...
+# data_dir/train-00127-of-01024
+#
+# and
+#
+# data_dir/validation-00000-of-00128
+# data_dir/validation-00001-of-00128
+# ...
+# data_dir/validation-00127-of-00128
+#
+# Note that this script may take several hours to run to completion. The
+# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending
+# on the speed of your machine. Please be patient.
+#
+# **IMPORTANT**
+# To download the raw images, the user must create an account with image-net.org
+# and generate a username and access_key. The latter two are required for
+# downloading the raw images.
+#
+
+set -e
+
+if [ -z "$1" ]; then
+ echo "usage download_and_convert_imagenet.sh [data dir]"
+ exit
+fi
+
+# Create the output and temporary directories.
+DATA_DIR="${1%/}"
+SCRATCH_DIR="${DATA_DIR}/raw-data/"
+mkdir -p "${DATA_DIR}"
+mkdir -p "${SCRATCH_DIR}"
+WORK_DIR="$0.runfiles/__main__"
+
+# Download the ImageNet data.
+LABELS_FILE="${WORK_DIR}/datasets/imagenet_lsvrc_2015_synsets.txt"
+DOWNLOAD_SCRIPT="${WORK_DIR}/datasets/download_imagenet.sh"
+"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}"
+
+# Note the locations of the train and validation data.
+TRAIN_DIRECTORY="${SCRATCH_DIR}train/"
+VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/"
+
+# Preprocess the validation data by moving the images into the appropriate
+# sub-directory based on the label (synset) of the image.
+echo "Organizing the validation data into sub-directories."
+PREPROCESS_VAL_SCRIPT="${WORK_DIR}/datasets/preprocess_imagenet_validation_data.py"
+VAL_LABELS_FILE="${WORK_DIR}/datasets/imagenet_2012_validation_synset_labels.txt"
+
+"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}"
+
+# Convert the XML files for bounding box annotations into a single CSV.
+echo "Extracting bounding box information from XML."
+BOUNDING_BOX_SCRIPT="${WORK_DIR}/datasets/process_bounding_boxes.py"
+BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv"
+BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/"
+
+"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \
+ | sort >"${BOUNDING_BOX_FILE}"
+echo "Finished downloading and preprocessing the ImageNet data."
+
+# Build the TFRecords version of the ImageNet data.
+BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data"
+OUTPUT_DIRECTORY="${DATA_DIR}"
+IMAGENET_METADATA_FILE="${WORK_DIR}/datasets/imagenet_metadata.txt"
+
+"${BUILD_SCRIPT}" \
+ --train_directory="${TRAIN_DIRECTORY}" \
+ --validation_directory="${VALIDATION_DIRECTORY}" \
+ --output_directory="${OUTPUT_DIRECTORY}" \
+ --imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \
+ --labels_file="${LABELS_FILE}" \
+ --bounding_box_file="${BOUNDING_BOX_FILE}"
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh
new file mode 100644
index 00000000000..c780e179f93
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# Script to download ImageNet Challenge 2012 training and validation data set.
+#
+# Downloads and decompresses raw images and bounding boxes.
+#
+# **IMPORTANT**
+# To download the raw images, the user must create an account with image-net.org
+# and generate a username and access_key. The latter two are required for
+# downloading the raw images.
+#
+# usage:
+# ./download_imagenet.sh [dirname]
+set -e
+
+if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then
+ cat < Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+## 2. Prepare Pretrained model
+
+The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow:
+ ```
+python prepare_model.py --output_model=./inception_v3_keras
+ ```
+`--output_model ` the model should be saved as SavedModel format or H5 format.
+
+
+## 3. Prepare Dataset
+
+ TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
+ We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format.
+
+ ```shell
+ cd examples/3.x_api/tensorflow/keras/cv/
+ # convert validation subset
+ bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation
+ # convert train subset
+ bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train
+ cd inception_v3/quantization/ptq
+ ```
+> **Note**:
+> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images.
+
+# Run Command
+
+## Quantization
+ ```shell
+ bash run_quant.sh --input_model=./inception_v3_keras --output_model=./result --dataset_location=/path/to/evaluation/dataset
+ ```
+
+## Benchmark
+ ```shell
+ bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=performance --batch_size=1
+ bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=accuracy --batch_size=32
+ ```
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..b8cd01593c6
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py
@@ -0,0 +1,543 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+class ParseDecodeImagenet:
+ """Parse features in Example proto.
+
+ Returns:
+ tuple of parsed image and label
+ """
+
+ def __call__(self, sample):
+ """Parse features in example."""
+ # Dense features in Example proto.
+ feature_map = {
+ "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""),
+ "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
+ }
+
+ sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
+ # Sparse features in Example proto.
+ feature_map.update(
+ {
+ k: sparse_float32
+ for k in [
+ "image/object/bbox/xmin",
+ "image/object/bbox/ymin",
+ "image/object/bbox/xmax",
+ "image/object/bbox/ymax",
+ ]
+ }
+ )
+
+ features = tf.io.parse_single_example(serialized=sample, features=feature_map)
+ label = tf.cast(features["image/class/label"], dtype=tf.int32)
+ image = features["image/encoded"]
+ image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST")
+ return (image, label)
+
+
+class BilinearImagenetTransform(object):
+ """Combination of a series of transforms which is applicable to images in Imagenet.
+
+ Args:
+ height: Height of the result
+ width:Width of the result
+ central_fraction(float, default=0.875):fraction of size to crop
+ mean_value(list, default=[0.0,0.0,0.0]):means for each channel
+ scale(float, default=1.0):std value
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0):
+ """Initialize `BilinearImagenetTransform` class."""
+ self.height = height
+ self.width = width
+ self.mean_value = mean_value
+ self.scale = scale
+ self.central_fraction = central_fraction
+
+ # sample is (images, labels)
+ def __call__(self, sample):
+ """Convert `BilinearImagenetTransform` feature."""
+ image, label = sample
+ if image.dtype is not tf.float32:
+ image = tf.image.convert_image_dtype(image, dtype=tf.float32)
+ # Crop the central region of the image containing 87.5% area of the original image.
+ if self.central_fraction:
+ image = tf.image.central_crop(image, central_fraction=self.central_fraction)
+
+ if self.height and self.width:
+ # Resize the image to the specified height and width.
+ image = tf.expand_dims(image, 0)
+ image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR)
+ image = tf.squeeze(image, [0])
+
+ image = tf.subtract(image, 0.5)
+ image = tf.multiply(image, 2.0)
+ means = tf.broadcast_to(self.mean_value, tf.shape(input=image))
+ image = (image - means) * self.scale
+ return (image, label)
+
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+
+class ShiftRescale(object):
+ """Label shift by 1 and rescale.
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __call__(self, sample):
+ image, label = sample
+ label -= 1
+ image = (image - 127.5) / 127.5
+ return (image, label)
+
+
+class LabelShift(object):
+ """Convert label to label - label_shift.
+
+ Args:
+ label_shift(int, default=0): number of label shift
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, label_shift=0):
+ """Initialize `LabelShift` class."""
+ self.label_shift = label_shift
+
+ def __call__(self, sample):
+ """Convert label to label_shift."""
+ images, labels = sample
+ if isinstance(labels, np.ndarray):
+ labels = labels - self.label_shift
+ elif isinstance(labels, list):
+ if isinstance(labels[0], tuple):
+ labels = [tuple(np.array(label) - self.label_shift) for label in labels]
+ elif isinstance(labels[0], np.ndarray):
+ labels = [label - self.label_shift for label in labels]
+ else:
+ labels = np.array(labels) - self.label_shift
+ labels = labels.tolist()
+ else:
+ labels = np.array(labels) - self.label_shift
+ return images, labels
+
+
+class ImageRecordDataset(object):
+ """Tensorflow imageNet database in tf record format.
+
+ Please arrange data in this way:
+ root/validation-000-of-100
+ root/validation-001-of-100
+ ...
+ root/validation-099-of-100
+ The file name needs to follow this pattern: '* - * -of- *'
+
+ Args: root (str): Root directory of dataset.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according
+ to specific conditions.
+ """
+
+ """Configuration for Imagenet dataset."""
+
+ def __new__(cls, root, transform=None, filter=None):
+ """Build a new object of TensorflowImageRecord class."""
+ from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module
+
+ glob_pattern = os.path.join(root, "*-*-of-*")
+ file_names = gfile.Glob(glob_pattern)
+ if not file_names:
+ raise ValueError("Found no files in --root matching: {}".format(glob_pattern))
+
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False)
+ ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names)))
+
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeImagenet())
+ else:
+ transform = ParseDecodeImagenet()
+ ds = ds.map(transform, num_parallel_calls=None)
+ ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned
+ return ds
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric_cls
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class TopKMetric(BaseMetric):
+ """Compute Top-k Accuracy classification score for Tensorflow model.
+
+ This metric computes the number of times where the correct label is among
+ the top k labels predicted.
+
+ Attributes:
+ k (int): The number of most likely outcomes considered to find the correct label.
+ num_correct: The number of predictions that were correct classified.
+ num_sample: The total number of predictions.
+ """
+
+ def __init__(self, k=1):
+ """Initialize the k, number of samples and correct predictions.
+
+ Args:
+ k: The number of most likely outcomes considered to find the correct label.
+ """
+ self.k = k
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def update(self, preds, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ preds: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight.
+ """
+ preds, labels = TopKMetric._topk_shape_validate(preds, labels)
+
+ labels = labels.reshape([len(labels)])
+ with tf.Graph().as_default() as acc_graph:
+ topk = tf.nn.in_top_k(
+ predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k
+ )
+ fp32_topk = tf.cast(topk, tf.float32)
+ correct_tensor = tf.reduce_sum(input_tensor=fp32_topk)
+
+ with tf.compat.v1.Session() as acc_sess:
+ correct = acc_sess.run(correct_tensor)
+
+ self.num_sample += len(labels)
+ self.num_correct += correct
+
+ def reset(self):
+ """Reset the number of samples and correct predictions."""
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def result(self):
+ """Compute the top-k score.
+
+ Returns:
+ The top-k score.
+ """
+ if self.num_sample == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ elif getattr(self, "_hvd", None) is not None: # pragma: no cover
+ allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct))
+ allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample))
+ return allgather_num_correct / allgather_num_sample
+ return self.num_correct / self.num_sample
+
+ @staticmethod
+ def _topk_shape_validate(preds, labels):
+ # preds shape can be Nxclass_num or class_num(N=1 by default)
+ # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax
+ if isinstance(preds, int):
+ preds = [preds]
+ preds = np.array(preds)
+ elif isinstance(preds, np.ndarray):
+ preds = np.array(preds)
+ elif isinstance(preds, list):
+ preds = np.array(preds)
+ preds = preds.reshape((-1, preds.shape[-1]))
+
+ # consider labels just int value 1x1
+ if isinstance(labels, int):
+ labels = [labels]
+ labels = np.array(labels)
+ elif isinstance(labels, tuple):
+ labels = np.array([labels])
+ labels = labels.reshape((labels.shape[-1], -1))
+ elif isinstance(labels, list):
+ if isinstance(labels[0], int):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[0], 1))
+ elif isinstance(labels[0], tuple):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[-1], -1))
+ else:
+ labels = np.array(labels)
+ # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot)
+ # only support 2 dimension one-shot labels
+ # or 1 dimension one-hot class_num will confuse with N
+
+ if len(preds.shape) == 1:
+ N = 1
+ class_num = preds.shape[0]
+ preds = preds.reshape([-1, class_num])
+ elif len(preds.shape) >= 2:
+ N = preds.shape[0]
+ preds = preds.reshape([N, -1])
+ class_num = preds.shape[1]
+
+ label_N = labels.shape[0]
+ assert label_N == N, "labels batch size should same with preds"
+ labels = labels.reshape([N, -1])
+ # one-hot labels will have 2 dimension not equal 1
+ if labels.shape[1] != 1:
+ labels = labels.argsort()[..., -1:]
+ return preds, labels
+
+
+class TFDataLoader(object): # pragma: no cover
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py
new file mode 100644
index 00000000000..5f8b08d6e11
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py
@@ -0,0 +1,144 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from neural_compressor.utils import logger
+from data_process import (
+ ImageRecordDataset,
+ ComposeTransform,
+ BilinearImagenetTransform,
+ TFDataLoader,
+ TopKMetric,
+ LabelShift,
+)
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+flags = tf.compat.v1.flags
+FLAGS = flags.FLAGS
+
+## Required parameters
+flags.DEFINE_string(
+ 'input_model', None, 'Run inference with specified keras model.')
+
+flags.DEFINE_string(
+ 'output_model', None, 'The output quantized model.')
+
+flags.DEFINE_string(
+ 'mode', 'performance', 'define benchmark mode for accuracy or performance')
+
+flags.DEFINE_bool(
+ 'tune', False, 'whether to tune the model')
+
+flags.DEFINE_bool(
+ 'benchmark', False, 'whether to benchmark the model')
+
+flags.DEFINE_string(
+ 'calib_data', None, 'location of calibration dataset')
+
+flags.DEFINE_string(
+ 'eval_data', None, 'location of evaluate dataset')
+
+flags.DEFINE_integer('batch_size', 32, 'batch_size')
+
+flags.DEFINE_integer(
+ 'iters', 100, 'maximum iteration when evaluating performance')
+
+height = width = 299
+eval_dataset = ImageRecordDataset(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \
+ [BilinearImagenetTransform(height=height, width=width)]))
+
+eval_dataloader = TFDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size)
+
+if FLAGS.calib_data:
+ calib_dataset = ImageRecordDataset(root=FLAGS.calib_data, transform= \
+ ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)]))
+ calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=10)
+
+def evaluate(model):
+ """
+ Custom evaluate function to inference the model for specified metric on validation dataset.
+
+ Args:
+ model (tf.keras.Model): The input model will be the objection of tf.keras.Model.
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ latency_list = []
+ metric = TopKMetric()
+ postprocess = LabelShift(label_shift=1)
+
+ def eval_func(dataloader, metric):
+ warmup = 5
+ iteration = None
+ if FLAGS.benchmark and FLAGS.mode == 'performance':
+ iteration = FLAGS.iters
+ for idx, (inputs, labels) in enumerate(dataloader):
+ start = time.time()
+ predictions = model.predict_on_batch(inputs)
+ end = time.time()
+ latency_list.append(end - start)
+ predictions, labels = postprocess((predictions, labels))
+ metric.update(predictions, labels)
+ if iteration and idx >= iteration:
+ break
+ latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size
+ return latency
+
+ latency = eval_func(eval_dataloader, metric)
+ if FLAGS.benchmark:
+ logger.info("\n{} mode benchmark result:".format(FLAGS.mode))
+ for i, res in enumerate(latency_list):
+ logger.debug("Iteration {} result {}:".format(i, res))
+ if FLAGS.benchmark and FLAGS.mode == 'performance':
+ logger.info("Batch size = {}".format(eval_dataloader.batch_size))
+ logger.info("Latency: {:.3f} ms".format(latency * 1000))
+ logger.info("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+def main(_):
+ if FLAGS.tune:
+ from neural_compressor.common import set_random_seed
+ from neural_compressor.tensorflow import quantize_model
+ from neural_compressor.tensorflow.keras import StaticQuantConfig
+
+ set_random_seed(9527)
+ quant_config = StaticQuantConfig()
+ q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader)
+ q_model.save(FLAGS.output_model)
+ logger.info("Save quantized model to {}.".format(FLAGS.output_model))
+
+ if FLAGS.benchmark:
+ from neural_compressor.tensorflow import Model
+
+ inc_model = Model(FLAGS.input_model)
+ if FLAGS.mode == 'performance':
+ evaluate(inc_model.model)
+ else:
+ accuracy = evaluate(inc_model.model)
+ logger.info('Batch size = %d' % FLAGS.batch_size)
+ logger.info("Accuracy: %.5f" % accuracy)
+
+
+if __name__ == "__main__":
+ tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py
new file mode 100644
index 00000000000..abf63dc93b4
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py
@@ -0,0 +1,35 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+from tensorflow.keras.applications.inception_v3 import InceptionV3
+def get_inception_v3_model(saved_path):
+ model = InceptionV3(weights='imagenet')
+ model.save(saved_path)
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description='Export pretained keras model',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument(
+ '--output_model',
+ type=str,
+ help='path to exported model file')
+
+ args = parser.parse_args()
+ get_inception_v3_model(args.output_model)
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..2f0697d8502
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow
+intel-extension-for-tensorflow[cpu]
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..43b1636c839
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=32
+ iters=100
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input_model ${input_model} \
+ --benchmark \
+ --mode ${mode} \
+ --eval_data ${dataset_location} \
+ --batch_size ${batch_size} \
+ --iters ${iters}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..7e3ed727f71
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+set -x
+
+function main {
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input_model ${input_model} \
+ --output_model ${output_model} \
+ --eval_data ${dataset_location} \
+ --calib_data ${dataset_location} \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/prepare_dataset.sh b/examples/3.x_api/tensorflow/keras/image_recognition/prepare_dataset.sh
new file mode 100644
index 00000000000..4aad5d69a3f
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/prepare_dataset.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# set -x
+
+OUTPUT_DIR="./data"
+SUBSET="validation"
+SHARDS=1
+
+help()
+{
+ cat <<- EOF
+ Desc: Convert prepared raw imagnet dataset to tfrecord
+ -h --help help info
+ --output_dir Output data directory
+ default: './data'
+ --raw_dir Raw data directory
+ --shards Number of shards in TFRecord files.
+ default: '1'
+ --subset Subset of imagenet, can be validation/train.
+ default: 'validation'
+EOF
+ exit 0
+}
+
+function main {
+ init_params "$@"
+ convert_dataset
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --output_dir=*)
+ OUTPUT_DIR=$(echo $var |cut -f2 -d=)
+ ;;
+ --raw_dir=*)
+ RAW_DIR=$(echo $var |cut -f2 -d=)
+ ;;
+ --shards=*)
+ SHARDS=$(echo $var |cut -f2 -d=)
+ ;;
+ --subset=*)
+ SUBSET=$(echo $var |cut -f2 -d=)
+ ;;
+ -h|--help) help
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+}
+
+# convert dataset
+function convert_dataset {
+ if [ ! -d ${OUTPUT_DIR} ]; then
+ mkdir ${OUTPUT_DIR}
+ fi
+ python imagenet_prepare/build_imagenet_data.py \
+ --imagenet_metadata_file "imagenet_prepare/imagenet_metadata.txt" \
+ --labels_file "imagenet_prepare/imagenet_lsvrc_2015_synsets.txt" \
+ --output_directory ${OUTPUT_DIR} \
+ --subset ${SUBSET} \
+ --raw_directory ${RAW_DIR} \
+ --shards ${SHARDS}
+}
+
+main "$@"
+
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md
new file mode 100644
index 00000000000..a276ef7cd0d
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md
@@ -0,0 +1,65 @@
+Step-by-Step
+============
+
+This document is used to enable Tensorflow Keras models using Intel® Neural Compressor.
+This example can run on Intel CPUs and GPUs.
+
+
+# Prerequisite
+
+## 1. Environment
+
+### Installation
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+
+### Install Requirements
+The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this example.
+The Intel Extension for Tensorflow for Intel CPUs is installed as default.
+```shell
+pip install -r requirements.txt
+```
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+## 2. Prepare Pretrained model
+
+The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow:
+ ```
+python prepare_model.py --output_model=./resnetv2_50_keras
+ ```
+`--output_model ` the model should be saved as SavedModel format or H5 format.
+
+## 3. Prepare Dataset
+
+ TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
+ We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format.
+
+ ```shell
+ cd examples/3.x_api/tensorflow/keras/cv/
+ # convert validation subset
+ bash prepare_dataset.sh --output_dir=./resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation
+ # convert train subset
+ bash prepare_dataset.sh --output_dir=./resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train
+ cd resnetv2_50/quantization/ptq
+ ```
+> **Note**:
+> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+> /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images.
+
+# Run Command
+
+## Quantization
+ ```shell
+ bash run_quant.sh --input_model=./resnetv2_50_keras --output_model=./result --dataset_location=/path/to/evaluation/dataset
+ ```
+
+## Benchmark
+ ```shell
+ bash run_benchmark.sh --input_model=./result --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=32
+ bash run_benchmark.sh --input_model=./result --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1
+ ```
+
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..b8cd01593c6
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py
@@ -0,0 +1,543 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+class ParseDecodeImagenet:
+ """Parse features in Example proto.
+
+ Returns:
+ tuple of parsed image and label
+ """
+
+ def __call__(self, sample):
+ """Parse features in example."""
+ # Dense features in Example proto.
+ feature_map = {
+ "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""),
+ "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
+ }
+
+ sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
+ # Sparse features in Example proto.
+ feature_map.update(
+ {
+ k: sparse_float32
+ for k in [
+ "image/object/bbox/xmin",
+ "image/object/bbox/ymin",
+ "image/object/bbox/xmax",
+ "image/object/bbox/ymax",
+ ]
+ }
+ )
+
+ features = tf.io.parse_single_example(serialized=sample, features=feature_map)
+ label = tf.cast(features["image/class/label"], dtype=tf.int32)
+ image = features["image/encoded"]
+ image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST")
+ return (image, label)
+
+
+class BilinearImagenetTransform(object):
+ """Combination of a series of transforms which is applicable to images in Imagenet.
+
+ Args:
+ height: Height of the result
+ width:Width of the result
+ central_fraction(float, default=0.875):fraction of size to crop
+ mean_value(list, default=[0.0,0.0,0.0]):means for each channel
+ scale(float, default=1.0):std value
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0):
+ """Initialize `BilinearImagenetTransform` class."""
+ self.height = height
+ self.width = width
+ self.mean_value = mean_value
+ self.scale = scale
+ self.central_fraction = central_fraction
+
+ # sample is (images, labels)
+ def __call__(self, sample):
+ """Convert `BilinearImagenetTransform` feature."""
+ image, label = sample
+ if image.dtype is not tf.float32:
+ image = tf.image.convert_image_dtype(image, dtype=tf.float32)
+ # Crop the central region of the image containing 87.5% area of the original image.
+ if self.central_fraction:
+ image = tf.image.central_crop(image, central_fraction=self.central_fraction)
+
+ if self.height and self.width:
+ # Resize the image to the specified height and width.
+ image = tf.expand_dims(image, 0)
+ image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR)
+ image = tf.squeeze(image, [0])
+
+ image = tf.subtract(image, 0.5)
+ image = tf.multiply(image, 2.0)
+ means = tf.broadcast_to(self.mean_value, tf.shape(input=image))
+ image = (image - means) * self.scale
+ return (image, label)
+
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+
+class ShiftRescale(object):
+ """Label shift by 1 and rescale.
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __call__(self, sample):
+ image, label = sample
+ label -= 1
+ image = (image - 127.5) / 127.5
+ return (image, label)
+
+
+class LabelShift(object):
+ """Convert label to label - label_shift.
+
+ Args:
+ label_shift(int, default=0): number of label shift
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, label_shift=0):
+ """Initialize `LabelShift` class."""
+ self.label_shift = label_shift
+
+ def __call__(self, sample):
+ """Convert label to label_shift."""
+ images, labels = sample
+ if isinstance(labels, np.ndarray):
+ labels = labels - self.label_shift
+ elif isinstance(labels, list):
+ if isinstance(labels[0], tuple):
+ labels = [tuple(np.array(label) - self.label_shift) for label in labels]
+ elif isinstance(labels[0], np.ndarray):
+ labels = [label - self.label_shift for label in labels]
+ else:
+ labels = np.array(labels) - self.label_shift
+ labels = labels.tolist()
+ else:
+ labels = np.array(labels) - self.label_shift
+ return images, labels
+
+
+class ImageRecordDataset(object):
+ """Tensorflow imageNet database in tf record format.
+
+ Please arrange data in this way:
+ root/validation-000-of-100
+ root/validation-001-of-100
+ ...
+ root/validation-099-of-100
+ The file name needs to follow this pattern: '* - * -of- *'
+
+ Args: root (str): Root directory of dataset.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according
+ to specific conditions.
+ """
+
+ """Configuration for Imagenet dataset."""
+
+ def __new__(cls, root, transform=None, filter=None):
+ """Build a new object of TensorflowImageRecord class."""
+ from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module
+
+ glob_pattern = os.path.join(root, "*-*-of-*")
+ file_names = gfile.Glob(glob_pattern)
+ if not file_names:
+ raise ValueError("Found no files in --root matching: {}".format(glob_pattern))
+
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False)
+ ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names)))
+
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeImagenet())
+ else:
+ transform = ParseDecodeImagenet()
+ ds = ds.map(transform, num_parallel_calls=None)
+ ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned
+ return ds
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric_cls
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class TopKMetric(BaseMetric):
+ """Compute Top-k Accuracy classification score for Tensorflow model.
+
+ This metric computes the number of times where the correct label is among
+ the top k labels predicted.
+
+ Attributes:
+ k (int): The number of most likely outcomes considered to find the correct label.
+ num_correct: The number of predictions that were correct classified.
+ num_sample: The total number of predictions.
+ """
+
+ def __init__(self, k=1):
+ """Initialize the k, number of samples and correct predictions.
+
+ Args:
+ k: The number of most likely outcomes considered to find the correct label.
+ """
+ self.k = k
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def update(self, preds, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ preds: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight.
+ """
+ preds, labels = TopKMetric._topk_shape_validate(preds, labels)
+
+ labels = labels.reshape([len(labels)])
+ with tf.Graph().as_default() as acc_graph:
+ topk = tf.nn.in_top_k(
+ predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k
+ )
+ fp32_topk = tf.cast(topk, tf.float32)
+ correct_tensor = tf.reduce_sum(input_tensor=fp32_topk)
+
+ with tf.compat.v1.Session() as acc_sess:
+ correct = acc_sess.run(correct_tensor)
+
+ self.num_sample += len(labels)
+ self.num_correct += correct
+
+ def reset(self):
+ """Reset the number of samples and correct predictions."""
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def result(self):
+ """Compute the top-k score.
+
+ Returns:
+ The top-k score.
+ """
+ if self.num_sample == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ elif getattr(self, "_hvd", None) is not None: # pragma: no cover
+ allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct))
+ allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample))
+ return allgather_num_correct / allgather_num_sample
+ return self.num_correct / self.num_sample
+
+ @staticmethod
+ def _topk_shape_validate(preds, labels):
+ # preds shape can be Nxclass_num or class_num(N=1 by default)
+ # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax
+ if isinstance(preds, int):
+ preds = [preds]
+ preds = np.array(preds)
+ elif isinstance(preds, np.ndarray):
+ preds = np.array(preds)
+ elif isinstance(preds, list):
+ preds = np.array(preds)
+ preds = preds.reshape((-1, preds.shape[-1]))
+
+ # consider labels just int value 1x1
+ if isinstance(labels, int):
+ labels = [labels]
+ labels = np.array(labels)
+ elif isinstance(labels, tuple):
+ labels = np.array([labels])
+ labels = labels.reshape((labels.shape[-1], -1))
+ elif isinstance(labels, list):
+ if isinstance(labels[0], int):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[0], 1))
+ elif isinstance(labels[0], tuple):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[-1], -1))
+ else:
+ labels = np.array(labels)
+ # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot)
+ # only support 2 dimension one-shot labels
+ # or 1 dimension one-hot class_num will confuse with N
+
+ if len(preds.shape) == 1:
+ N = 1
+ class_num = preds.shape[0]
+ preds = preds.reshape([-1, class_num])
+ elif len(preds.shape) >= 2:
+ N = preds.shape[0]
+ preds = preds.reshape([N, -1])
+ class_num = preds.shape[1]
+
+ label_N = labels.shape[0]
+ assert label_N == N, "labels batch size should same with preds"
+ labels = labels.reshape([N, -1])
+ # one-hot labels will have 2 dimension not equal 1
+ if labels.shape[1] != 1:
+ labels = labels.argsort()[..., -1:]
+ return preds, labels
+
+
+class TFDataLoader(object): # pragma: no cover
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py
new file mode 100644
index 00000000000..7fc6a2cdf10
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py
@@ -0,0 +1,143 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from neural_compressor.utils import logger
+from data_process import (
+ ImageRecordDataset,
+ ComposeTransform,
+ BilinearImagenetTransform,
+ TFDataLoader,
+ TopKMetric,
+ LabelShift
+)
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+flags = tf.compat.v1.flags
+FLAGS = flags.FLAGS
+
+## Required parameters
+flags.DEFINE_string(
+ 'input_model', None, 'Run inference with specified keras model.')
+
+flags.DEFINE_string(
+ 'output_model', None, 'The output quantized model.')
+
+flags.DEFINE_string(
+ 'mode', 'performance', 'define benchmark mode for accuracy or performance')
+
+flags.DEFINE_bool(
+ 'tune', False, 'whether to tune the model')
+
+flags.DEFINE_bool(
+ 'benchmark', False, 'whether to benchmark the model')
+
+flags.DEFINE_string(
+ 'calib_data', None, 'location of calibration dataset')
+
+flags.DEFINE_string(
+ 'eval_data', None, 'location of evaluate dataset')
+
+flags.DEFINE_integer('batch_size', 32, 'batch_size')
+
+flags.DEFINE_integer(
+ 'iters', 100, 'maximum iteration when evaluating performance')
+
+height = width = 224
+eval_dataset = ImageRecordDataset(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \
+ [BilinearImagenetTransform(height=height, width=width)]))
+
+eval_dataloader = TFDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size)
+
+if FLAGS.calib_data:
+ calib_dataset = ImageRecordDataset(root=FLAGS.calib_data, transform= \
+ ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)]))
+ calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=10)
+
+def evaluate(model):
+ """
+ Custom evaluate function to inference the model for specified metric on validation dataset.
+
+ Args:
+ model (tf.keras.Model): The input model will be the objection of tf.keras.Model.
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ latency_list = []
+ metric = TopKMetric()
+ postprocess = LabelShift(label_shift=1)
+
+ def eval_func(dataloader, metric):
+ warmup = 5
+ iteration = None
+ if FLAGS.benchmark and FLAGS.mode == 'performance':
+ iteration = FLAGS.iters
+ for idx, (inputs, labels) in enumerate(dataloader):
+ start = time.time()
+ predictions = model.predict_on_batch(inputs)
+ end = time.time()
+ latency_list.append(end - start)
+ predictions, labels = postprocess((predictions, labels))
+ metric.update(predictions, labels)
+ if iteration and idx >= iteration:
+ break
+ latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size
+ return latency
+
+ latency = eval_func(eval_dataloader, metric)
+ if FLAGS.benchmark:
+ logger.info("\n{} mode benchmark result:".format(FLAGS.mode))
+ for i, res in enumerate(latency_list):
+ logger.debug("Iteration {} result {}:".format(i, res))
+ if FLAGS.benchmark and FLAGS.mode == 'performance':
+ logger.info("Batch size = {}".format(eval_dataloader.batch_size))
+ logger.info("Latency: {:.3f} ms".format(latency * 1000))
+ logger.info("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+def main(_):
+ if FLAGS.tune:
+ from neural_compressor.common import set_random_seed
+ from neural_compressor.tensorflow import quantize_model
+ from neural_compressor.tensorflow.keras import StaticQuantConfig
+
+ set_random_seed(9527)
+ quant_config = StaticQuantConfig()
+ q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader)
+ q_model.save(FLAGS.output_model)
+ logger.info("Save quantized model to {}.".format(FLAGS.output_model))
+
+ if FLAGS.benchmark:
+ from neural_compressor.tensorflow import Model
+
+ inc_model = Model(FLAGS.input_model)
+ if FLAGS.mode == 'performance':
+ evaluate(inc_model.model)
+ else:
+ accuracy = evaluate(inc_model.model)
+ logger.info('Batch size = %d' % FLAGS.batch_size)
+ logger.info("Accuracy: %.5f" % accuracy)
+
+if __name__ == "__main__":
+ tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py
new file mode 100644
index 00000000000..f8cd505f965
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py
@@ -0,0 +1,35 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import tensorflow as tf
+def get_resnet50_v2_model(saved_path):
+ model = tf.keras.applications.ResNet50V2(weights='imagenet')
+ model.save(saved_path)
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description='Export pretained keras model',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument(
+ '--output_model',
+ type=str,
+ help='path to exported model file')
+
+ args = parser.parse_args()
+ get_resnet50_v2_model(args.output_model)
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..8b7b47da969
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow>=2.11.1
+intel-extension-for-tensorflow[cpu]
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..d464b019f8e
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=32
+ iters=100
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ esac
+ done
+
+}
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input_model ${input_model} \
+ --benchmark \
+ --mode ${mode} \
+ --eval_data ${dataset_location} \
+ --batch_size ${batch_size} \
+ --iters ${iters}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..7e3ed727f71
--- /dev/null
+++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+set -x
+
+function main {
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input_model ${input_model} \
+ --output_model ${output_model} \
+ --eval_data ${dataset_location} \
+ --calib_data ${dataset_location} \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md
new file mode 100644
index 00000000000..41a673fc834
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md
@@ -0,0 +1,92 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning result of Intel® Model Zoo bert large model on squad v1.1 task.
+This example can run on Intel CPUs and GPUs.
+
+
+# Prerequisite
+
+## 1. Environment
+
+### Installation
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+
+### Install Intel Tensorflow
+```shell
+pip install intel-tensorflow
+```
+
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### Install Intel Extension for Tensorflow
+
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare Pretrained model
+```shell
+wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/fp32_bert_squad.pb
+```
+
+## 3. Prepare Dataset
+Please choose one way to prepare the dataset from the manual approach and the automatic approach.
+### Manual approach
+```shell
+wget https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip
+```
+
+```shell
+unzip wwm_uncased_L-24_H-1024_A-16.zip
+```
+
+```shell
+wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -P wwm_uncased_L-24_H-1024_A-16
+```
+wwm_uncased_L-24_H-1024_A-16 folder will be located on your data path.
+
+#### Automatic dataset download
+Run the `prepare_dataset.sh` script located in `examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq`.
+
+Usage:
+```shell
+cd examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq
+bash prepare_dataset.sh --output_dir=./data
+```
+
+### Convert the dataset to TF Record format
+After the dataset is downloaded by either of ways above, the dataset should be converted to files of TF Record format.
+```shell
+python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=data/eval.tf_record
+```
+
+# Run Command
+ Please make sure below command should be executed with the same Tensorflow runtime version as above step.
+
+## Quantization
+ ```shell
+ bash run_quant.sh --input_model=./fp32_bert_squad.pb --output_model=./bert_squad_int8.pb --dataset_location=data
+ ```
+
+## Benchmark
+ ```shell
+ bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=accuracy --dataset_location=data --batch_size=64
+ bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=performance --dataset_location=data --batch_size=64
+ ```
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py
new file mode 100644
index 00000000000..8adecb971fd
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py
@@ -0,0 +1,475 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Create masked LM/next sentence masked_lm TF examples for BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import random
+import tokenization
+import tensorflow as tf
+
+from absl import app
+#from absl import flags
+from absl import logging
+flags = tf.compat.v1.flags
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("input_file", None,
+ "Input raw text file (or comma-separated list of files).")
+
+flags.DEFINE_string(
+ "output_file", None,
+ "Output TF example file (or comma-separated list of files).")
+
+flags.DEFINE_string("vocab_file", None,
+ "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_bool(
+ "do_lower_case", True,
+ "Whether to lower case the input text. Should be True for uncased "
+ "models and False for cased models.")
+
+flags.DEFINE_bool(
+ "do_whole_word_mask", False,
+ "Whether to use whole word masking rather than per-WordPiece masking.")
+
+flags.DEFINE_integer("max_seq_length", 128, "Maximum sequence length.")
+
+flags.DEFINE_integer("max_predictions_per_seq", 20,
+ "Maximum number of masked LM predictions per sequence.")
+
+flags.DEFINE_integer("random_seed", 12345, "Random seed for data generation.")
+
+flags.DEFINE_integer(
+ "dupe_factor", 10,
+ "Number of times to duplicate the input data (with different masks).")
+
+flags.DEFINE_float("masked_lm_prob", 0.15, "Masked LM probability.")
+
+flags.DEFINE_float(
+ "short_seq_prob", 0.1,
+ "Probability of creating sequences which are shorter than the "
+ "maximum length.")
+
+
+class TrainingInstance(object):
+ """A single training instance (sentence pair)."""
+
+ def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels,
+ is_random_next):
+ self.tokens = tokens
+ self.segment_ids = segment_ids
+ self.is_random_next = is_random_next
+ self.masked_lm_positions = masked_lm_positions
+ self.masked_lm_labels = masked_lm_labels
+
+ def __str__(self):
+ s = ""
+ s += "tokens: %s\n" % (" ".join(
+ [tokenization.printable_text(x) for x in self.tokens]))
+ s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
+ s += "is_random_next: %s\n" % self.is_random_next
+ s += "masked_lm_positions: %s\n" % (" ".join(
+ [str(x) for x in self.masked_lm_positions]))
+ s += "masked_lm_labels: %s\n" % (" ".join(
+ [tokenization.printable_text(x) for x in self.masked_lm_labels]))
+ s += "\n"
+ return s
+
+ def __repr__(self):
+ return self.__str__()
+
+
+def write_instance_to_example_files(instances, tokenizer, max_seq_length,
+ max_predictions_per_seq, output_files):
+ """Create TF example files from `TrainingInstance`s."""
+ writers = []
+ for output_file in output_files:
+ writers.append(tf.io.TFRecordWriter(output_file))
+
+ writer_index = 0
+
+ total_written = 0
+ for (inst_index, instance) in enumerate(instances):
+ input_ids = tokenizer.convert_tokens_to_ids(instance.tokens)
+ input_mask = [1] * len(input_ids)
+ segment_ids = list(instance.segment_ids)
+ assert len(input_ids) <= max_seq_length
+
+ while len(input_ids) < max_seq_length:
+ input_ids.append(0)
+ input_mask.append(0)
+ segment_ids.append(0)
+
+ assert len(input_ids) == max_seq_length
+ assert len(input_mask) == max_seq_length
+ assert len(segment_ids) == max_seq_length
+
+ masked_lm_positions = list(instance.masked_lm_positions)
+ masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels)
+ masked_lm_weights = [1.0] * len(masked_lm_ids)
+
+ while len(masked_lm_positions) < max_predictions_per_seq:
+ masked_lm_positions.append(0)
+ masked_lm_ids.append(0)
+ masked_lm_weights.append(0.0)
+
+ next_sentence_label = 1 if instance.is_random_next else 0
+
+ features = collections.OrderedDict()
+ features["input_ids"] = create_int_feature(input_ids)
+ features["input_mask"] = create_int_feature(input_mask)
+ features["segment_ids"] = create_int_feature(segment_ids)
+ features["masked_lm_positions"] = create_int_feature(masked_lm_positions)
+ features["masked_lm_ids"] = create_int_feature(masked_lm_ids)
+ features["masked_lm_weights"] = create_float_feature(masked_lm_weights)
+ features["next_sentence_labels"] = create_int_feature([next_sentence_label])
+
+ tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+
+ writers[writer_index].write(tf_example.SerializeToString())
+ writer_index = (writer_index + 1) % len(writers)
+
+ total_written += 1
+
+ if inst_index < 20:
+ tf.compat.v1.logging.info("*** Example ***")
+ tf.compat.v1.logging.info("tokens: %s" % " ".join(
+ [tokenization.printable_text(x) for x in instance.tokens]))
+
+ for feature_name in features.keys():
+ feature = features[feature_name]
+ values = []
+ if feature.int64_list.value:
+ values = feature.int64_list.value
+ elif feature.float_list.value:
+ values = feature.float_list.value
+ tf.compat.v1.logging.info(
+ "%s: %s" % (feature_name, " ".join([str(x) for x in values])))
+
+ for writer in writers:
+ writer.close()
+
+ tf.compat.v1.logging.info("Wrote %d total instances", total_written)
+
+
+def create_int_feature(values):
+ feature = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
+ return feature
+
+
+def create_float_feature(values):
+ feature = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
+ return feature
+
+
+def create_training_instances(input_files, tokenizer, max_seq_length,
+ dupe_factor, short_seq_prob, masked_lm_prob,
+ max_predictions_per_seq, rng):
+ """Create `TrainingInstance`s from raw text."""
+ all_documents = [[]]
+
+ # Input file format:
+ # (1) One sentence per line. These should ideally be actual sentences, not
+ # entire paragraphs or arbitrary spans of text. (Because we use the
+ # sentence boundaries for the "next sentence prediction" task).
+ # (2) Blank lines between documents. Document boundaries are needed so
+ # that the "next sentence prediction" task doesn't span between documents.
+ for input_file in input_files:
+ with tf.io.gfile.GFile(input_file, "r") as reader:
+ while True:
+ line = tokenization.convert_to_unicode(reader.readline())
+ if not line:
+ break
+ line = line.strip()
+
+ # Empty lines are used as document delimiters
+ if not line:
+ all_documents.append([])
+ tokens = tokenizer.tokenize(line)
+ if tokens:
+ all_documents[-1].append(tokens)
+
+ # Remove empty documents
+ all_documents = [x for x in all_documents if x]
+ rng.shuffle(all_documents)
+
+ vocab_words = list(tokenizer.vocab.keys())
+ instances = []
+ for _ in range(dupe_factor):
+ for document_index in range(len(all_documents)):
+ instances.extend(
+ create_instances_from_document(
+ all_documents, document_index, max_seq_length, short_seq_prob,
+ masked_lm_prob, max_predictions_per_seq, vocab_words, rng))
+
+ rng.shuffle(instances)
+ return instances
+
+
+def create_instances_from_document(
+ all_documents, document_index, max_seq_length, short_seq_prob,
+ masked_lm_prob, max_predictions_per_seq, vocab_words, rng):
+ """Creates `TrainingInstance`s for a single document."""
+ document = all_documents[document_index]
+
+ # Account for [CLS], [SEP], [SEP]
+ max_num_tokens = max_seq_length - 3
+
+ # We *usually* want to fill up the entire sequence since we are padding
+ # to `max_seq_length` anyways, so short sequences are generally wasted
+ # computation. However, we *sometimes*
+ # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter
+ # sequences to minimize the mismatch between pre-training and fine-tuning.
+ # The `target_seq_length` is just a rough target however, whereas
+ # `max_seq_length` is a hard limit.
+ target_seq_length = max_num_tokens
+ if rng.random() < short_seq_prob:
+ target_seq_length = rng.randint(2, max_num_tokens)
+
+ # We DON'T just concatenate all of the tokens from a document into a long
+ # sequence and choose an arbitrary split point because this would make the
+ # next sentence prediction task too easy. Instead, we split the input into
+ # segments "A" and "B" based on the actual "sentences" provided by the user
+ # input.
+ instances = []
+ current_chunk = []
+ current_length = 0
+ i = 0
+ while i < len(document):
+ segment = document[i]
+ current_chunk.append(segment)
+ current_length += len(segment)
+ if i == len(document) - 1 or current_length >= target_seq_length:
+ if current_chunk:
+ # `a_end` is how many segments from `current_chunk` go into the `A`
+ # (first) sentence.
+ a_end = 1
+ if len(current_chunk) >= 2:
+ a_end = rng.randint(1, len(current_chunk) - 1)
+
+ tokens_a = []
+ for j in range(a_end):
+ tokens_a.extend(current_chunk[j])
+
+ tokens_b = []
+ # Random next
+ is_random_next = False
+ if len(current_chunk) == 1 or rng.random() < 0.5:
+ is_random_next = True
+ target_b_length = target_seq_length - len(tokens_a)
+
+ # This should rarely go for more than one iteration for large
+ # corpora. However, just to be careful, we try to make sure that
+ # the random document is not the same as the document
+ # we're processing.
+ for _ in range(10):
+ random_document_index = rng.randint(0, len(all_documents) - 1)
+ if random_document_index != document_index:
+ break
+
+ random_document = all_documents[random_document_index]
+ random_start = rng.randint(0, len(random_document) - 1)
+ for j in range(random_start, len(random_document)):
+ tokens_b.extend(random_document[j])
+ if len(tokens_b) >= target_b_length:
+ break
+ # We didn't actually use these segments so we "put them back" so
+ # they don't go to waste.
+ num_unused_segments = len(current_chunk) - a_end
+ i -= num_unused_segments
+ # Actual next
+ else:
+ is_random_next = False
+ for j in range(a_end, len(current_chunk)):
+ tokens_b.extend(current_chunk[j])
+ truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng)
+
+ assert len(tokens_a) >= 1
+ assert len(tokens_b) >= 1
+
+ tokens = []
+ segment_ids = []
+ tokens.append("[CLS]")
+ segment_ids.append(0)
+ for token in tokens_a:
+ tokens.append(token)
+ segment_ids.append(0)
+
+ tokens.append("[SEP]")
+ segment_ids.append(0)
+
+ for token in tokens_b:
+ tokens.append(token)
+ segment_ids.append(1)
+ tokens.append("[SEP]")
+ segment_ids.append(1)
+
+ (tokens, masked_lm_positions,
+ masked_lm_labels) = create_masked_lm_predictions(
+ tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng)
+ instance = TrainingInstance(
+ tokens=tokens,
+ segment_ids=segment_ids,
+ is_random_next=is_random_next,
+ masked_lm_positions=masked_lm_positions,
+ masked_lm_labels=masked_lm_labels)
+ instances.append(instance)
+ current_chunk = []
+ current_length = 0
+ i += 1
+
+ return instances
+
+
+MaskedLmInstance = collections.namedtuple("MaskedLmInstance",
+ ["index", "label"])
+
+
+def create_masked_lm_predictions(tokens, masked_lm_prob,
+ max_predictions_per_seq, vocab_words, rng):
+ """Creates the predictions for the masked LM objective."""
+
+ cand_indexes = []
+ for (i, token) in enumerate(tokens):
+ if token == "[CLS]" or token == "[SEP]":
+ continue
+ # Whole Word Masking means that if we mask all of the wordpieces
+ # corresponding to an original word. When a word has been split into
+ # WordPieces, the first token does not have any marker and any subsequence
+ # tokens are prefixed with ##. So whenever we see the ## token, we
+ # append it to the previous set of word indexes.
+ #
+ # Note that Whole Word Masking does *not* change the training code
+ # at all -- we still predict each WordPiece independently, softmaxed
+ # over the entire vocabulary.
+ if (FLAGS.do_whole_word_mask and len(cand_indexes) >= 1 and
+ token.startswith("##")):
+ cand_indexes[-1].append(i)
+ else:
+ cand_indexes.append([i])
+
+ rng.shuffle(cand_indexes)
+
+ output_tokens = list(tokens)
+
+ num_to_predict = min(max_predictions_per_seq,
+ max(1, int(round(len(tokens) * masked_lm_prob))))
+
+ masked_lms = []
+ covered_indexes = set()
+ for index_set in cand_indexes:
+ if len(masked_lms) >= num_to_predict:
+ break
+ # If adding a whole-word mask would exceed the maximum number of
+ # predictions, then just skip this candidate.
+ if len(masked_lms) + len(index_set) > num_to_predict:
+ continue
+ is_any_index_covered = False
+ for index in index_set:
+ if index in covered_indexes:
+ is_any_index_covered = True
+ break
+ if is_any_index_covered:
+ continue
+ for index in index_set:
+ covered_indexes.add(index)
+
+ masked_token = None
+ # 80% of the time, replace with [MASK]
+ if rng.random() < 0.8:
+ masked_token = "[MASK]"
+ else:
+ # 10% of the time, keep original
+ if rng.random() < 0.5:
+ masked_token = tokens[index]
+ # 10% of the time, replace with random word
+ else:
+ masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]
+
+ output_tokens[index] = masked_token
+
+ masked_lms.append(MaskedLmInstance(index=index, label=tokens[index]))
+ assert len(masked_lms) <= num_to_predict
+ masked_lms = sorted(masked_lms, key=lambda x: x.index)
+
+ masked_lm_positions = []
+ masked_lm_labels = []
+ for p in masked_lms:
+ masked_lm_positions.append(p.index)
+ masked_lm_labels.append(p.label)
+
+ return (output_tokens, masked_lm_positions, masked_lm_labels)
+
+
+def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng):
+ """Truncates a pair of sequences to a maximum sequence length."""
+ while True:
+ total_length = len(tokens_a) + len(tokens_b)
+ if total_length <= max_num_tokens:
+ break
+
+ trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b
+ assert len(trunc_tokens) >= 1
+
+ # We want to sometimes truncate from the front and sometimes from the
+ # back to add more randomness and avoid biases.
+ if rng.random() < 0.5:
+ del trunc_tokens[0]
+ else:
+ trunc_tokens.pop()
+
+
+def main(_):
+ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
+
+ tokenizer = tokenization.FullTokenizer(
+ vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+ input_files = []
+ for input_pattern in FLAGS.input_file.split(","):
+ input_files.extend(tf.io.gfile.glob(input_pattern))
+
+ tf.compat.v1.logging.info("*** Reading from input files ***")
+ for input_file in input_files:
+ tf.compat.v1.logging.info(" %s", input_file)
+
+ rng = random.Random(FLAGS.random_seed)
+ instances = create_training_instances(
+ input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor,
+ FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq,
+ rng)
+
+ output_files = FLAGS.output_file.split(",")
+ tf.compat.v1.logging.info("*** Writing to output files ***")
+ for output_file in output_files:
+ tf.compat.v1.logging.info(" %s", output_file)
+
+ write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length,
+ FLAGS.max_predictions_per_seq, output_files)
+
+
+if __name__ == "__main__":
+ flags.mark_flag_as_required("input_file")
+ flags.mark_flag_as_required("output_file")
+ flags.mark_flag_as_required("vocab_file")
+ tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py
new file mode 100644
index 00000000000..12c6486283d
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py
@@ -0,0 +1,509 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""generate bert dataset"""
+
+import collections
+import json
+import os
+import tokenization
+import six
+import tensorflow as tf
+
+from absl import app
+#from absl import flags
+from absl import logging
+
+flags = tf.compat.v1.flags
+FLAGS = flags.FLAGS
+
+## Required parameters
+flags.DEFINE_string("vocab_file", None,
+ "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_string(
+ "predict_file", None,
+ "SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json")
+
+flags.DEFINE_string(
+ "output_file", None, "The output tf_record for usage.")
+
+class SquadExample(object):
+ """A single training/test example for simple sequence classification.
+
+ For examples without an answer, the start and end position are -1.
+ """
+
+ def __init__(self,
+ qas_id,
+ question_text,
+ doc_tokens,
+ orig_answer_text=None,
+ start_position=None,
+ end_position=None,
+ is_impossible=False):
+ self.qas_id = qas_id
+ self.question_text = question_text
+ self.doc_tokens = doc_tokens
+ self.orig_answer_text = orig_answer_text
+ self.start_position = start_position
+ self.end_position = end_position
+ self.is_impossible = is_impossible
+ #self.startpb = 0
+
+ def __str__(self):
+ return self.__repr__()
+
+ def __repr__(self):
+ s = ""
+ s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
+ s += ", question_text: %s" % (
+ tokenization.printable_text(self.question_text))
+ s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
+ if self.start_position:
+ s += ", start_position: %d" % (self.start_position)
+ if self.start_position:
+ s += ", end_position: %d" % (self.end_position)
+ if self.start_position:
+ s += ", is_impossible: %r" % (self.is_impossible)
+ return s
+
+
+class InputFeatures(object):
+ """A single set of features of data."""
+
+ def __init__(self,
+ unique_id,
+ example_index,
+ doc_span_index,
+ tokens,
+ token_to_orig_map,
+ token_is_max_context,
+ input_ids,
+ input_mask,
+ segment_ids,
+ start_position=None,
+ end_position=None,
+ is_impossible=None):
+ self.unique_id = unique_id
+ self.example_index = example_index
+ self.doc_span_index = doc_span_index
+ self.tokens = tokens
+ self.token_to_orig_map = token_to_orig_map
+ self.token_is_max_context = token_is_max_context
+ self.input_ids = input_ids
+ self.input_mask = input_mask
+ self.segment_ids = segment_ids
+ self.start_position = start_position
+ self.end_position = end_position
+ self.is_impossible = is_impossible
+
+
+def read_squad_examples(input_file, is_training=None):
+ """Read a SQuAD json file into a list of SquadExample."""
+ with tf.io.gfile.GFile(input_file, "r") as reader:
+ input_data = json.load(reader)["data"]
+
+ def is_whitespace(c):
+ if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
+ return True
+ return False
+
+ examples = []
+ for entry in input_data:
+ for paragraph in entry["paragraphs"]:
+ paragraph_text = paragraph["context"]
+ doc_tokens = []
+ char_to_word_offset = []
+ prev_is_whitespace = True
+ for c in paragraph_text:
+ if is_whitespace(c):
+ prev_is_whitespace = True
+ else:
+ if prev_is_whitespace:
+ doc_tokens.append(c)
+ else:
+ doc_tokens[-1] += c
+ prev_is_whitespace = False
+ char_to_word_offset.append(len(doc_tokens) - 1)
+
+ for qa in paragraph["qas"]:
+ qas_id = qa["id"]
+ question_text = qa["question"]
+ start_position = None
+ end_position = None
+ orig_answer_text = None
+ is_impossible = False
+ if is_training:
+
+ if FLAGS.version_2_with_negative:
+ is_impossible = qa["is_impossible"]
+ if (len(qa["answers"]) != 1) and (not is_impossible):
+ raise ValueError(
+ "For training, each question should have exactly 1 answer.")
+ if not is_impossible:
+ answer = qa["answers"][0]
+ orig_answer_text = answer["text"]
+ answer_offset = answer["answer_start"]
+ answer_length = len(orig_answer_text)
+ start_position = char_to_word_offset[answer_offset]
+ end_position = char_to_word_offset[answer_offset + answer_length -
+ 1]
+ # Only add answers where the text can be exactly recovered from the
+ # document. If this CAN'T happen it's likely due to weird Unicode
+ # stuff so we will just skip the example.
+ #
+ # Note that this means for training mode, every example is NOT
+ # guaranteed to be preserved.
+ actual_text = " ".join(
+ doc_tokens[start_position:(end_position + 1)])
+ cleaned_answer_text = " ".join(
+ tokenization.whitespace_tokenize(orig_answer_text))
+ if actual_text.find(cleaned_answer_text) == -1:
+ tf.compat.v1.logging.warning("Could not find answer: '%s' vs. '%s'",
+ actual_text, cleaned_answer_text)
+ continue
+ else:
+ start_position = -1
+ end_position = -1
+ orig_answer_text = ""
+
+ example = SquadExample(
+ qas_id=qas_id,
+ question_text=question_text,
+ doc_tokens=doc_tokens,
+ orig_answer_text=orig_answer_text,
+ start_position=start_position,
+ end_position=end_position,
+ is_impossible=is_impossible)
+ examples.append(example)
+
+ return examples
+
+
+def convert_examples_to_features(examples, tokenizer, max_seq_length,
+ doc_stride, max_query_length, is_training=None,
+ output_fn=None):
+ """Loads a data file into a list of `InputBatch`s."""
+
+ unique_id = 1000000000
+
+ for (example_index, example) in enumerate(examples):
+ query_tokens = tokenizer.tokenize(example.question_text)
+
+ if len(query_tokens) > max_query_length:
+ query_tokens = query_tokens[0:max_query_length]
+
+ tok_to_orig_index = []
+ orig_to_tok_index = []
+ all_doc_tokens = []
+ for (i, token) in enumerate(example.doc_tokens):
+ orig_to_tok_index.append(len(all_doc_tokens))
+ sub_tokens = tokenizer.tokenize(token)
+ for sub_token in sub_tokens:
+ tok_to_orig_index.append(i)
+ all_doc_tokens.append(sub_token)
+
+ tok_start_position = None
+ tok_end_position = None
+ if is_training and example.is_impossible:
+ tok_start_position = -1
+ tok_end_position = -1
+ if is_training and not example.is_impossible:
+ tok_start_position = orig_to_tok_index[example.start_position]
+ if example.end_position < len(example.doc_tokens) - 1:
+ tok_end_position = orig_to_tok_index[example.end_position + 1] - 1
+ else:
+ tok_end_position = len(all_doc_tokens) - 1
+ (tok_start_position, tok_end_position) = _improve_answer_span(
+ all_doc_tokens, tok_start_position, tok_end_position, tokenizer,
+ example.orig_answer_text)
+
+ # The -3 accounts for [CLS], [SEP] and [SEP]
+ max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
+
+ # We can have documents that are longer than the maximum sequence length.
+ # To deal with this we do a sliding window approach, where we take chunks
+ # of the up to our max length with a stride of `doc_stride`.
+ _DocSpan = collections.namedtuple( # pylint: disable=invalid-name
+ "DocSpan", ["start", "length"])
+ doc_spans = []
+ start_offset = 0
+ while start_offset < len(all_doc_tokens):
+ length = len(all_doc_tokens) - start_offset
+ if length > max_tokens_for_doc:
+ length = max_tokens_for_doc
+ doc_spans.append(_DocSpan(start=start_offset, length=length))
+ if start_offset + length == len(all_doc_tokens):
+ break
+ start_offset += min(length, doc_stride)
+
+ for (doc_span_index, doc_span) in enumerate(doc_spans):
+ tokens = []
+ token_to_orig_map = {}
+ token_is_max_context = {}
+ segment_ids = []
+ tokens.append("[CLS]")
+ segment_ids.append(0)
+ for token in query_tokens:
+ tokens.append(token)
+ segment_ids.append(0)
+ tokens.append("[SEP]")
+ segment_ids.append(0)
+
+ for i in range(doc_span.length):
+ split_token_index = doc_span.start + i
+ token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]
+
+ is_max_context = _check_is_max_context(doc_spans, doc_span_index,
+ split_token_index)
+ token_is_max_context[len(tokens)] = is_max_context
+ tokens.append(all_doc_tokens[split_token_index])
+ segment_ids.append(1)
+ tokens.append("[SEP]")
+ segment_ids.append(1)
+
+ input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+ # The mask has 1 for real tokens and 0 for padding tokens. Only real
+ # tokens are attended to.
+ input_mask = [1] * len(input_ids)
+
+ # Zero-pad up to the sequence length.
+ while len(input_ids) < max_seq_length:
+ input_ids.append(0)
+ input_mask.append(0)
+ segment_ids.append(0)
+
+ assert len(input_ids) == max_seq_length
+ assert len(input_mask) == max_seq_length
+ assert len(segment_ids) == max_seq_length
+
+ start_position = None
+ end_position = None
+ if is_training and not example.is_impossible:
+ # For training, if our document chunk does not contain an annotation
+ # we throw it out, since there is nothing to predict.
+ doc_start = doc_span.start
+ doc_end = doc_span.start + doc_span.length - 1
+ out_of_span = False
+ if not (tok_start_position >= doc_start and
+ tok_end_position <= doc_end):
+ out_of_span = True
+ if out_of_span:
+ start_position = 0
+ end_position = 0
+ else:
+ doc_offset = len(query_tokens) + 2
+ start_position = tok_start_position - doc_start + doc_offset
+ end_position = tok_end_position - doc_start + doc_offset
+
+ if is_training and example.is_impossible:
+ start_position = 0
+ end_position = 0
+
+ if example_index < 1:
+ tf.compat.v1.logging.info("*** Example ***")
+ tf.compat.v1.logging.info("unique_id: %s" % (unique_id))
+ tf.compat.v1.logging.info("example_index: %s" % (example_index))
+ tf.compat.v1.logging.info("doc_span_index: %s" % (doc_span_index))
+ tf.compat.v1.logging.info("tokens: %s" % " ".join(
+ [tokenization.printable_text(x) for x in tokens]))
+ tf.compat.v1.logging.info("token_to_orig_map: %s" % " ".join(
+ ["%d:%d" % (x, y) for (x, y) in six.iteritems(token_to_orig_map)]))
+ tf.compat.v1.logging.info("token_is_max_context: %s" % " ".join([
+ "%d:%s" % (x, y) for (x, y) in six.iteritems(token_is_max_context)
+ ]))
+ tf.compat.v1.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+ tf.compat.v1.logging.info(
+ "input_mask: %s" % " ".join([str(x) for x in input_mask]))
+ tf.compat.v1.logging.info(
+ "segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
+ if is_training and example.is_impossible:
+ tf.compat.v1.logging.info("impossible example")
+ if is_training and not example.is_impossible:
+ answer_text = " ".join(tokens[start_position:(end_position + 1)])
+ tf.compat.v1.logging.info("start_position: %d" % (start_position))
+ tf.compat.v1.logging.info("end_position: %d" % (end_position))
+ tf.compat.v1.logging.info(
+ "answer: %s" % (tokenization.printable_text(answer_text)))
+
+ feature = InputFeatures(
+ unique_id=unique_id,
+ example_index=example_index,
+ doc_span_index=doc_span_index,
+ tokens=tokens,
+ token_to_orig_map=token_to_orig_map,
+ token_is_max_context=token_is_max_context,
+ input_ids=input_ids,
+ input_mask=input_mask,
+ segment_ids=segment_ids,
+ start_position=start_position,
+ end_position=end_position,
+ is_impossible=example.is_impossible)
+
+ # Run callback
+ output_fn(feature)
+
+ unique_id += 1
+
+
+def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
+ orig_answer_text):
+ """Returns tokenized answer spans that better match the annotated answer."""
+
+ # The SQuAD annotations are character based. We first project them to
+ # whitespace-tokenized words. But then after WordPiece tokenization, we can
+ # often find a "better match". For example:
+ #
+ # Question: What year was John Smith born?
+ # Context: The leader was John Smith (1895-1943).
+ # Answer: 1895
+ #
+ # The original whitespace-tokenized answer will be "(1895-1943).". However
+ # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match
+ # the exact answer, 1895.
+ #
+ # However, this is not always possible. Consider the following:
+ #
+ # Question: What country is the top exporter of electornics?
+ # Context: The Japanese electronics industry is the lagest in the world.
+ # Answer: Japan
+ #
+ # In this case, the annotator chose "Japan" as a character sub-span of
+ # the word "Japanese". Since our WordPiece tokenizer does not split
+ # "Japanese", we just use "Japanese" as the annotation. This is fairly rare
+ # in SQuAD, but does happen.
+ tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text))
+
+ for new_start in range(input_start, input_end + 1):
+ for new_end in range(input_end, new_start - 1, -1):
+ text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
+ if text_span == tok_answer_text:
+ return (new_start, new_end)
+
+ return (input_start, input_end)
+
+
+def _check_is_max_context(doc_spans, cur_span_index, position):
+ """Check if this is the 'max context' doc span for the token."""
+
+ # Because of the sliding window approach taken to scoring documents, a single
+ # token can appear in multiple documents. E.g.
+ # Doc: the man went to the store and bought a gallon of milk
+ # Span A: the man went to the
+ # Span B: to the store and bought
+ # Span C: and bought a gallon of
+ # ...
+ #
+ # Now the word 'bought' will have two scores from spans B and C. We only
+ # want to consider the score with "maximum context", which we define as
+ # the *minimum* of its left and right context (the *sum* of left and
+ # right context will always be the same, of course).
+ #
+ # In the example the maximum context for 'bought' would be span C since
+ # it has 1 left context and 3 right context, while span B has 4 left context
+ # and 0 right context.
+ best_score = None
+ best_span_index = None
+ for (span_index, doc_span) in enumerate(doc_spans):
+ end = doc_span.start + doc_span.length - 1
+ if position < doc_span.start:
+ continue
+ if position > end:
+ continue
+ num_left_context = position - doc_span.start
+ num_right_context = end - position
+ score = min(num_left_context, num_right_context) + 0.01 * doc_span.length
+ if best_score is None or score > best_score:
+ best_score = score
+ best_span_index = span_index
+
+ return cur_span_index == best_span_index
+
+class FeatureWriter(object):
+ """Writes InputFeature to TF example file."""
+
+ def __init__(self, filename, is_training):
+ self.is_training = is_training
+ self.num_features = 0
+ self.filename = filename
+ self._writer = tf.io.TFRecordWriter(self.filename)
+
+ def process_feature(self, feature):
+ """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
+ self.num_features += 1
+
+ def create_int_feature(values):
+ feature = tf.train.Feature(
+ int64_list=tf.train.Int64List(value=list(values)))
+ return feature
+
+ features = collections.OrderedDict()
+ features["unique_ids"] = create_int_feature([feature.unique_id])
+ features["input_ids"] = create_int_feature(feature.input_ids)
+ features["input_mask"] = create_int_feature(feature.input_mask)
+ features["segment_ids"] = create_int_feature(feature.segment_ids)
+
+ if self.is_training:
+ features["start_positions"] = create_int_feature([feature.start_position])
+ features["end_positions"] = create_int_feature([feature.end_position])
+ impossible = 0
+ if feature.is_impossible:
+ impossible = 1
+ features["is_impossible"] = create_int_feature([impossible])
+
+ tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+ self._writer.write(tf_example.SerializeToString())
+
+ def close(self):
+ self._writer.close()
+
+ def rm_tmp_file(self):
+ os.remove(self.filename)
+
+def main(_):
+ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
+
+ tokenizer = tokenization.FullTokenizer(
+ vocab_file=FLAGS.vocab_file, do_lower_case=True)
+
+ eval_examples = read_squad_examples(
+ input_file=FLAGS.predict_file, is_training=False)
+
+ eval_writer = FeatureWriter(
+ filename=FLAGS.output_file, is_training=False)
+
+ eval_features = []
+ def append_feature(feature):
+ eval_features.append(feature)
+ eval_writer.process_feature(feature)
+ convert_examples_to_features(
+ examples=eval_examples,
+ tokenizer=tokenizer,
+ max_seq_length=384,
+ doc_stride=128,
+ max_query_length=64,
+ is_training=False,
+ output_fn=append_feature)
+
+
+if __name__ == "__main__":
+ flags.mark_flag_as_required("vocab_file")
+ flags.mark_flag_as_required("predict_file")
+ flags.mark_flag_as_required("output_file")
+ tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..6e9d169ada5
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/data_process.py
@@ -0,0 +1,936 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import json
+import re
+import sys
+import string
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from collections import Counter
+from neural_compressor.tensorflow.utils.data import default_collate, BaseDataLoader, BatchSampler, IterableFetcher
+
+def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
+ """Calculate the max metric for each ground truth.
+
+ For each answer in ground_truths, evaluate the metric of prediction with
+ this answer, and return the max metric.
+
+ Args:
+ metric_fn: The function to calculate the metric.
+ prediction: The prediction result.
+ ground_truths: A list of correct answers.
+
+ Returns:
+ The max metric. Float point number.
+ """
+ scores_for_ground_truths = []
+ for ground_truth in ground_truths:
+ score = metric_fn(prediction, ground_truth)
+ scores_for_ground_truths.append(score)
+ return max(scores_for_ground_truths)
+
+def normalize_answer(text: str) -> str:
+ """Normalize the answer text.
+
+ Lower text, remove punctuation, articles and extra whitespace,
+ and replace other whitespace (newline, tab, etc.) to space.
+
+ Args:
+ s: The text to be normalized.
+
+ Returns:
+ The normalized text.
+ """
+
+ def _remove_articles(text):
+ return re.sub(r"\b(a|an|the)\b", " ", text)
+
+ def _white_space_fix(text):
+ return " ".join(text.split())
+
+ def _remove_punc(text):
+ exclude = set(string.punctuation)
+ return "".join(ch for ch in text if ch not in exclude)
+
+ def _lower(text):
+ return text.lower()
+
+ return _white_space_fix(_remove_articles(_remove_punc(_lower(text))))
+
+def exact_match_score(prediction, ground_truth):
+ """Compute the exact match score between prediction and ground truth.
+
+ Args:
+ prediction: The result of predictions to be evaluated.
+ ground_truth: The ground truth.
+
+ Returns:
+ The exact match score.
+ """
+ return normalize_answer(prediction) == normalize_answer(ground_truth)
+
+def f1_score(prediction, ground_truth):
+ """Calculate the F1 score of the prediction and the ground_truth.
+
+ Args:
+ prediction: The predicted result.
+ ground_truth: The ground truth.
+
+ Returns:
+ The F1 score of prediction. Float point number.
+ """
+ prediction_tokens = normalize_answer(prediction).split()
+ ground_truth_tokens = normalize_answer(ground_truth).split()
+ common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+ num_same = sum(common.values())
+ if num_same == 0:
+ return 0
+ precision = 1.0 * num_same / len(prediction_tokens)
+ recall = 1.0 * num_same / len(ground_truth_tokens)
+ f1 = (2 * precision * recall) / (precision + recall)
+ return f1
+
+def evaluate(dataset, predictions):
+ """Evaluate the average F1 score and the exact match score for Question-Answering results.
+
+ Args:
+ dataset: The dataset to evaluate the prediction. A list instance of articles.
+ An article contains a list of paragraphs, a paragraph contains a list of
+ question-and-answers (qas), and a question-and-answer contains an id, a question,
+ and a list of correct answers. For example:
+ predictions: The result of predictions to be evaluated. A dict mapping the id of
+ a question to the predicted answer of the question.
+
+ Returns:
+ The F1 score and the exact match score.
+ """
+ f1 = exact_match = total = 0
+ for article in dataset:
+ for paragraph in article["paragraphs"]:
+ for qa in paragraph["qas"]:
+ total += 1
+ if qa["id"] not in predictions:
+ message = "Unanswered question " + qa["id"] + " will receive score 0."
+ print(message, file=sys.stderr)
+ continue
+ ground_truths = list(map(lambda x: x["text"], qa["answers"]))
+ prediction = predictions[qa["id"]]
+ exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
+ f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths)
+
+ exact_match = 100.0 * exact_match / total
+ f1 = 100.0 * f1 / total
+
+ return {"exact_match": exact_match, "f1": f1}
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class SquadF1(BaseMetric):
+ """Evaluate for v1.1 of the SQuAD dataset."""
+
+ def __init__(self):
+ """Initialize the score list."""
+ self._score_list = [] # squad metric only work when all data preds collected
+
+ def update(self, preds, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ preds: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight.
+ """
+ if preds:
+ if getattr(self, "_hvd", None) is not None:
+ gathered_preds_list = self._hvd.allgather_object(preds)
+ gathered_labels_list = self._hvd.allgather_object(labels)
+ temp_preds_list, temp_labels_list = [], []
+ for i in range(0, self._hvd.size()):
+ temp_preds_list += gathered_preds_list[i]
+ temp_labels_list += gathered_labels_list[i]
+ preds = temp_preds_list
+ labels = temp_labels_list
+ result = evaluate(labels, preds)
+ self._score_list.append(result["f1"])
+
+ def reset(self):
+ """Reset the score list."""
+ self._score_list = []
+
+ def result(self):
+ """Compute F1 score."""
+ if len(self._score_list) == 0:
+ return 0.0
+ return np.array(self._score_list).mean()
+
+
+class ParseDecodeBert:
+ """Helper function for TensorflowModelZooBertDataset.
+
+ Parse the features from sample.
+ """
+
+ def __call__(self, sample):
+ """Parse the sample data.
+
+ Args:
+ sample: Data to be parsed.
+ """
+ # Dense features in Example proto.
+ feature_map = {
+ "input_ids": tf.compat.v1.VarLenFeature(dtype=tf.int64),
+ "input_mask": tf.compat.v1.VarLenFeature(dtype=tf.int64),
+ "segment_ids": tf.compat.v1.VarLenFeature(dtype=tf.int64),
+ }
+
+ features = tf.io.parse_single_example(sample, feature_map)
+
+ input_ids = features["input_ids"].values
+ input_mask = features["input_mask"].values
+ segment_ids = features["segment_ids"].values
+
+ return (input_ids, input_mask, segment_ids)
+
+
+class TFDataLoader(object): # pragma: no cover
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
+
+
+class ModelZooBertDataset(object):
+ """Tensorflow dataset for three-input Bert in tf record format.
+
+ Root is a full path to tfrecord file, which contains the file name.
+ Please use Resize transform when batch_size > 1
+ Args: root (str): path of dataset.
+ label_file (str): path of label file.
+ task (str, default='squad'): task type of model.
+ model_type (str, default='bert'): model type, support 'bert'.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according.
+ """
+
+ def __init__(self, root, label_file, task="squad", model_type="bert", transform=None, filter=None, num_cores=28):
+ """Initialize the attributes of class."""
+ with open(label_file) as lf:
+ label_json = json.load(lf)
+ assert label_json["version"] == "1.1", "only support squad 1.1"
+ self.label = label_json["data"]
+
+ record_iterator = tf.compat.v1.python_io.tf_record_iterator(root)
+ example = tf.train.SequenceExample()
+ for element in record_iterator:
+ example.ParseFromString(element)
+ break
+ feature = example.context.feature
+ if len(feature["input_ids"].int64_list.value) == 0 and len(feature["input_mask"].int64_list.value) == 0:
+ raise ValueError(
+ "Tfrecord format is incorrect, please refer\
+ 'https://github.com/tensorflow/models/blob/master/research/\
+ object_detection/dataset_tools/' to create correct tfrecord"
+ )
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ tfrecord_paths = [root]
+ ds = tf.data.TFRecordDataset.list_files(tfrecord_paths)
+ ds = ds.apply(
+ parallel_interleave(
+ tf.data.TFRecordDataset,
+ cycle_length=num_cores,
+ block_length=5,
+ sloppy=True,
+ buffer_output_elements=10000,
+ prefetch_input_elements=10000,
+ )
+ )
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeBert())
+ else:
+ transform = ParseDecodeBert()
+ ds = ds.map(transform, num_parallel_calls=None)
+ if filter is not None:
+ ds = ds.filter(filter)
+ ds = ds.prefetch(buffer_size=1000)
+ ds = TFDataLoader(ds)
+ self.root = []
+ for inputs in ds:
+ self.root.append(inputs)
+ self.transform = transform
+ self.filter = filter
+
+ def __getitem__(self, index):
+ """Magic method.
+
+ x[i] is roughly equivalent to type(x).__getitem__(x, index)
+ """
+ return self.root[index], self.label
+
+ def __len__(self):
+ """Length of the dataset."""
+ return len(self.root)
+
+
+class TFSquadV1PostTransform(object):
+ """Postprocess the predictions of bert on SQuAD.
+
+ Args:
+ label_file (str): path of label file
+ vocab_file(str): path of vocabulary file
+ n_best_size (int, default=20):
+ The total number of n-best predictions to generate in nbest_predictions.json
+ max_seq_length (int, default=384):
+ The maximum total input sequence length after WordPiece tokenization.
+ Sequences longer than this will be truncated, shorter than this will be padded
+ max_query_length (int, default=64):
+ The maximum number of tokens for the question.
+ Questions longer than this will be truncated to this length
+ max_answer_length (int, default=30):
+ The maximum length of an answer that can be generated. This is needed because
+ the start and end predictions are not conditioned on one another
+ do_lower_case (bool, default=True):
+ Whether to lower case the input text.
+ Should be True for uncased models and False for cased models
+ doc_stride (int, default=128):
+ When splitting up a long document into chunks,
+ how much stride to take between chunks
+
+ Returns:
+ tuple of processed prediction and label
+ """
+
+ def __init__(
+ self,
+ label_file,
+ vocab_file,
+ n_best_size=20,
+ max_seq_length=384,
+ max_query_length=64,
+ max_answer_length=30,
+ do_lower_case=True,
+ doc_stride=128,
+ ):
+ """Initialize `TFSquadV1PostTransform` class."""
+ from tokenization import FullTokenizer
+ from create_tf_record import read_squad_examples, convert_examples_to_features
+ self.eval_examples = read_squad_examples(label_file)
+ tokenizer = FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)
+
+ self.eval_features = []
+
+ def append_feature(feature):
+ self.eval_features.append(feature)
+
+ convert_examples_to_features(
+ examples=self.eval_examples,
+ tokenizer=tokenizer,
+ max_seq_length=max_seq_length,
+ doc_stride=doc_stride,
+ max_query_length=max_query_length,
+ output_fn=append_feature,
+ )
+
+ self.n_best_size = n_best_size
+ self.max_answer_length = max_answer_length
+ self.do_lower_case = do_lower_case
+ self.RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
+
+ def process_result(self, results):
+ """Get the processed results."""
+ processed_results = []
+ # notice the result list sequence
+ for unique_id, start_logits, end_logits in zip(*results):
+ processed_results.append(
+ self.RawResult(
+ unique_id=int(unique_id),
+ start_logits=[float(x) for x in start_logits.flat],
+ end_logits=[float(x) for x in end_logits.flat],
+ )
+ )
+
+ return processed_results
+
+ def get_postprocess_result(self, sample):
+ """Get the post processed results."""
+ if sample == (None, None):
+ return (None, None)
+ all_results, label = sample
+ all_results = self.process_result(all_results)
+ example_index_to_features = collections.defaultdict(list)
+ for feature in self.eval_features:
+ example_index_to_features[feature.example_index].append(feature)
+
+ unique_id_to_result = {}
+ for result in all_results:
+ unique_id_to_result[result.unique_id] = result
+
+ _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
+ "PrelimPrediction", ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]
+ )
+
+ all_predictions = collections.OrderedDict()
+ for example_index, example in enumerate(self.eval_examples):
+ features = example_index_to_features[example_index]
+
+ prelim_predictions = []
+ # keep track of the minimum score of null start+end of position 0
+ score_null = 1000000 # large and positive
+ min_null_feature_index = 0 # the paragraph slice with min mull score
+ null_start_logit = 0 # the start logit at the slice with min null score
+ null_end_logit = 0 # the end logit at the slice with min null score
+ for feature_index, feature in enumerate(features):
+ # skip the case that is not predicted
+ if feature.unique_id not in unique_id_to_result:
+ all_predictions[example.qas_id] = "*#skip this example#*"
+ continue
+ result = unique_id_to_result[feature.unique_id]
+ start_indexes = TFSquadV1PostTransform._get_best_indexes(result.start_logits, self.n_best_size)
+ end_indexes = TFSquadV1PostTransform._get_best_indexes(result.end_logits, self.n_best_size)
+
+ for start_index in start_indexes:
+ for end_index in end_indexes:
+ # We could hypothetically create invalid predictions, e.g., predict
+ # that the start of the span is in the question. We throw out all
+ # invalid predictions.
+ if start_index >= len(feature.tokens):
+ continue
+ if end_index >= len(feature.tokens):
+ continue
+ if start_index not in feature.token_to_orig_map:
+ continue
+ if end_index not in feature.token_to_orig_map:
+ continue
+ if not feature.token_is_max_context.get(start_index, False):
+ continue
+ if end_index < start_index:
+ continue
+ length = end_index - start_index + 1
+ if length > self.max_answer_length:
+ continue
+ prelim_predictions.append(
+ _PrelimPrediction(
+ feature_index=feature_index,
+ start_index=start_index,
+ end_index=end_index,
+ start_logit=result.start_logits[start_index],
+ end_logit=result.end_logits[end_index],
+ )
+ )
+
+ prelim_predictions = sorted(
+ prelim_predictions, key=lambda x: (x.start_logit + x.end_logit), reverse=True
+ )
+ _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name
+ "NbestPrediction", ["text", "start_logit", "end_logit"]
+ )
+
+ seen_predictions = {}
+ nbest = []
+ for pred in prelim_predictions:
+ if len(nbest) >= self.n_best_size:
+ break
+ feature = features[pred.feature_index]
+ if pred.start_index > 0: # this is a non-null prediction
+ tok_tokens = feature.tokens[pred.start_index : (pred.end_index + 1)]
+ orig_doc_start = feature.token_to_orig_map[pred.start_index]
+ orig_doc_end = feature.token_to_orig_map[pred.end_index]
+ orig_tokens = example.doc_tokens[orig_doc_start : (orig_doc_end + 1)]
+ tok_text = " ".join(tok_tokens)
+
+ # De-tokenize WordPieces that have been split off.
+ tok_text = tok_text.replace(" ##", "")
+ tok_text = tok_text.replace("##", "")
+
+ # Clean whitespace
+ tok_text = tok_text.strip()
+ tok_text = " ".join(tok_text.split())
+ orig_text = " ".join(orig_tokens)
+
+ final_text = TFSquadV1PostTransform.get_final_text(tok_text, orig_text, self.do_lower_case)
+ if final_text in seen_predictions:
+ continue
+
+ seen_predictions[final_text] = True
+ else:
+ final_text = ""
+ seen_predictions[final_text] = True
+
+ nbest.append(
+ _NbestPrediction(text=final_text, start_logit=pred.start_logit, end_logit=pred.end_logit)
+ )
+
+ # In very rare edge cases we could have no valid predictions. So we
+ # just create a nonce prediction in this case to avoid failure.
+ if not nbest:
+ nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
+
+ assert len(nbest) >= 1
+
+ total_scores = []
+ best_non_null_entry = None
+ for entry in nbest:
+ total_scores.append(entry.start_logit + entry.end_logit)
+ if not best_non_null_entry:
+ if entry.text:
+ best_non_null_entry = entry
+ probs = TFSquadV1PostTransform._compute_softmax(total_scores)
+
+ nbest_json = []
+ for i, entry in enumerate(nbest):
+ output = collections.OrderedDict()
+ output["text"] = entry.text
+ output["probability"] = probs[i]
+ output["start_logit"] = entry.start_logit
+ output["end_logit"] = entry.end_logit
+ nbest_json.append(output)
+
+ assert len(nbest_json) >= 1
+ all_predictions[example.qas_id] = nbest_json[0]["text"]
+ return (all_predictions, label)
+
+ @staticmethod
+ def _get_best_indexes(logits, n_best_size):
+ """Get the n-best logits from a list."""
+ index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True)
+
+ best_indexes = []
+ for i in range(len(index_and_score)):
+ if i >= n_best_size:
+ break
+ best_indexes.append(index_and_score[i][0])
+ return best_indexes
+
+ @staticmethod
+ def _compute_softmax(scores):
+ """Compute softmax probability over raw logits."""
+ import math
+
+ if not scores:
+ return []
+
+ max_score = None
+ for score in scores:
+ if max_score is None or score > max_score:
+ max_score = score
+
+ exp_scores = []
+ total_sum = 0.0
+ for score in scores:
+ x = math.exp(score - max_score)
+ exp_scores.append(x)
+ total_sum += x
+
+ probs = []
+ for score in exp_scores:
+ probs.append(score / total_sum)
+ return probs
+
+ @staticmethod
+ def get_final_text(pred_text, orig_text, do_lower_case):
+ """Project the tokenized prediction back to the original text."""
+ import six
+
+ from tokenization import BasicTokenizer
+
+ def _strip_spaces(text):
+ ns_chars = []
+ ns_to_s_map = collections.OrderedDict()
+ for i, c in enumerate(text):
+ if c == " ":
+ continue
+ ns_to_s_map[len(ns_chars)] = i
+ ns_chars.append(c)
+ ns_text = "".join(ns_chars)
+ return (ns_text, ns_to_s_map)
+
+ tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
+ tok_text = " ".join(tokenizer.tokenize(orig_text))
+ start_position = tok_text.find(pred_text)
+ if start_position == -1:
+ return orig_text
+ end_position = start_position + len(pred_text) - 1
+
+ (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text)
+ (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text)
+
+ if len(orig_ns_text) != len(tok_ns_text):
+ return orig_text
+
+ tok_s_to_ns_map = {}
+ for i, tok_index in six.iteritems(tok_ns_to_s_map):
+ tok_s_to_ns_map[tok_index] = i
+
+ orig_start_position = None
+ if start_position in tok_s_to_ns_map:
+ ns_start_position = tok_s_to_ns_map[start_position]
+ if ns_start_position in orig_ns_to_s_map:
+ orig_start_position = orig_ns_to_s_map[ns_start_position]
+
+ if orig_start_position is None:
+ return orig_text
+
+ orig_end_position = None
+ if end_position in tok_s_to_ns_map:
+ ns_end_position = tok_s_to_ns_map[end_position]
+ if ns_end_position in orig_ns_to_s_map:
+ orig_end_position = orig_ns_to_s_map[ns_end_position]
+
+ if orig_end_position is None:
+ return orig_text
+
+ output_text = orig_text[orig_start_position : (orig_end_position + 1)]
+ return output_text
+
+ def __call__(self, sample):
+ """Call the get_postprocess_result."""
+ return self.get_postprocess_result(sample)
+
+
+class CollectTransform(object):
+ """Postprocess the predictions, collect data."""
+
+ def __init__(self, length=10833):
+ """Initialize `CollectTransform` class."""
+ self.length = length
+ self.unique_id = []
+ self.start_logits = []
+ self.end_logits = []
+ self.all_sample = (None, None)
+ self.idx = 1000000000
+
+ def __call__(self, sample):
+ """Collect postprocess data."""
+ all_results, label = sample
+ result_list = [np.expand_dims(result, 0) for result in all_results]
+ for result in result_list:
+ if len(self.unique_id) < self.length:
+ result = result.transpose(2, 0, 1)
+ self.unique_id.append(self.idx)
+ self.start_logits.append(result[0])
+ self.end_logits.append(result[1])
+ self.idx += 1
+ if len(self.unique_id) == self.length:
+ self.all_sample = ([self.unique_id, self.start_logits, self.end_logits], label)
+ return self.all_sample
+
+
+class TFModelZooCollectTransform(CollectTransform):
+ """Postprocess the predictions of model zoo, collect data."""
+
+ def __call__(self, sample):
+ """Collect postprocess data."""
+ all_results, label = sample
+ if len(all_results) == 1:
+ all_results = all_results.reshape((2, 1, 384))
+ all_results = zip(all_results[0], all_results[1])
+ for start_logits, end_logits in all_results:
+ if len(self.unique_id) < self.length:
+ self.unique_id.append(self.idx)
+ self.start_logits.append(start_logits)
+ self.end_logits.append(end_logits)
+ self.idx += 1
+ if len(self.unique_id) == self.length:
+ self.all_sample = ([self.unique_id, self.start_logits, self.end_logits], label)
+ return self.all_sample
+
+
+class TFSquadV1ModelZooPostTransform(TFSquadV1PostTransform):
+ """Postprocess the predictions of bert on SQuADV1.1.
+
+ See class TFSquadV1PostTransform for more details
+ """
+
+ def __init__(
+ self,
+ label_file,
+ vocab_file,
+ n_best_size=20,
+ max_seq_length=384,
+ max_query_length=64,
+ max_answer_length=30,
+ do_lower_case=True,
+ doc_stride=128,
+ ):
+ """Initialize `TFSquadV1ModelZooPostTransform` class."""
+ super().__init__(
+ label_file,
+ vocab_file,
+ n_best_size,
+ max_seq_length,
+ max_query_length,
+ max_answer_length,
+ do_lower_case,
+ doc_stride,
+ )
+ self.length = len(self.eval_features)
+ self.collect_data = TFModelZooCollectTransform(length=self.length)
+
+ def __call__(self, sample):
+ """Collect data and get postprocess results."""
+ sample = self.collect_data(sample)
+ return self.get_postprocess_result(sample)
+
+
+class ModelZooBertDataLoader(BaseDataLoader): # pragma: no cover
+ """This dataloader is designed to satisfy the usage of Model Zoo Bert models."""
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size,
+ last_batch,
+ collate_fn,
+ sampler,
+ batch_sampler,
+ num_workers,
+ pin_memory,
+ shuffle,
+ distributed,
+ ):
+ def bert_collate_fn(batch):
+ input_ids = []
+ input_mask = []
+ segment_ids = []
+ for elem in batch:
+ input_ids.append(elem[0][0][0])
+ input_mask.append(elem[0][1][0])
+ segment_ids.append(elem[0][2][0])
+ inputs = [input_ids, input_mask, segment_ids]
+ return inputs, batch[0][1]
+
+ drop_last = False if last_batch == "rollover" else True
+ sampler = self._generate_sampler(dataset, distributed)
+ self.batch_sampler = BatchSampler(sampler, batch_size, drop_last)
+ self.fetcher = IterableFetcher(dataset, bert_collate_fn, drop_last, distributed)
+
+ inputs = []
+ for batched_indices in self.batch_sampler:
+ try:
+ data = self.fetcher(batched_indices)
+ yield data
+ except StopIteration:
+ return
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py
new file mode 100644
index 00000000000..7f99b3507fc
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py
@@ -0,0 +1,138 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Run BERT on SQuAD 1.1 and SQuAD 2.0."""
+import os
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from data_process import SquadF1, ModelZooBertDataset, TFSquadV1ModelZooPostTransform, ModelZooBertDataLoader
+
+flags = tf.compat.v1.flags
+FLAGS = flags.FLAGS
+
+## Required parameters
+flags.DEFINE_string(
+ 'input_model', None, 'Run inference with specified pb graph.')
+
+flags.DEFINE_string(
+ 'output_model', None, 'The output model of the quantized model.')
+
+flags.DEFINE_string(
+ 'mode', 'performance', 'define benchmark mode for accuracy or performance')
+
+flags.DEFINE_bool(
+ 'tune', False, 'whether to tune the model')
+
+flags.DEFINE_bool(
+ 'benchmark', False, 'whether to benchmark the model')
+
+flags.DEFINE_bool(
+ 'strip_iterator', False, 'whether to strip the iterator of the model')
+
+flags.DEFINE_string('dataset_location', None,
+ 'location of calibration dataset and evaluate dataset')
+
+flags.DEFINE_integer("batch_size", 64, "run batch size")
+
+flags.DEFINE_integer("iters", 100, "The iteration used for benchmark.")
+
+
+def evaluate(model, dataloader, metric, postprocess):
+ """Custom evaluate function to estimate the accuracy of the bert model.
+
+ Args:
+ model (tf.Graph_def): The input model graph
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ from neural_compressor.tensorflow.quantization.utils.utility import iterator_sess_run
+ from neural_compressor.tensorflow.utils import Model, BaseModel
+ if not isinstance(model, BaseModel):
+ model = Model(model)
+ model.input_tensor_names = ['input_ids', 'input_mask', 'segment_ids']
+ model.output_tensor_names = ['start_logits', 'end_logits']
+ input_tensor = model.input_tensor
+ output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+ model.output_tensor[0]
+ iteration = -1
+ if FLAGS.benchmark and FLAGS.mode == 'performance':
+ iteration = FLAGS.iters
+
+ warmup = 5
+ latency_list = []
+ for idx, (inputs, labels) in enumerate(dataloader):
+ # dataloader should keep the order and len of inputs same with input_tensor
+ assert len(input_tensor) == len(inputs), \
+ 'inputs len must equal with input_tensor'
+ feed_dict = dict(zip(input_tensor, inputs))
+ start_time = time.time()
+ predictions = model.sess.run(output_tensor, feed_dict)
+ latency_list.append(time.time() - start_time)
+ predictions, labels = postprocess((predictions, labels))
+ metric.update(predictions, labels)
+ if idx + 1 == iteration:
+ break
+
+ latency = np.array(latency_list[warmup:]).mean() / FLAGS.batch_size
+
+ if FLAGS.benchmark and FLAGS.mode == 'performance':
+ print("Batch size = {}".format(FLAGS.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+def main(_):
+ tf.compat.v1.disable_eager_execution()
+ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
+
+ data_path = os.path.join(FLAGS.dataset_location, 'eval.tf_record')
+ label_path = os.path.join(FLAGS.dataset_location, 'dev-v1.1.json')
+ vocab_path = os.path.join(FLAGS.dataset_location, 'vocab.txt')
+
+ dataset = ModelZooBertDataset(root=data_path, label_file=label_path)
+ dataloader = ModelZooBertDataLoader(dataset=dataset, batch_size=FLAGS.batch_size)
+
+ def eval(model):
+ metric = SquadF1()
+ postprocess = TFSquadV1ModelZooPostTransform(label_file=label_path, vocab_file=vocab_path)
+ return evaluate(model, dataloader, metric, postprocess)
+
+ if FLAGS.benchmark:
+ if FLAGS.mode == 'performance':
+ eval(FLAGS.input_model)
+ elif FLAGS.mode == 'accuracy':
+ acc_result = eval(FLAGS.input_model)
+ print("Batch size = %d" % dataloader.batch_size)
+ print("Accuracy: %.5f" % acc_result)
+
+ elif FLAGS.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+ model = Model(FLAGS.input_model)
+ model.input_tensor_names = ['input_ids', 'input_mask', 'segment_ids']
+ model.output_tensor_names = ['start_logits', 'end_logits']
+ quant_config = StaticQuantConfig()
+ q_model = quantize_model(model, quant_config, dataloader)
+ q_model.save(FLAGS.output_model)
+
+if __name__ == "__main__":
+ tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_dataset.sh b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_dataset.sh
new file mode 100644
index 00000000000..acae8ce944d
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_dataset.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# set -x
+
+OUTPUT_DIR="./data"
+
+help()
+{
+ cat <<- EOF
+ Desc: Prepare bert dataset
+ -h --help help info
+ --output_dir Output data directory
+ default: './data'
+EOF
+ exit 0
+}
+
+function main {
+ init_params "$@"
+ convert_dataset
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --output_dir=*)
+ OUTPUT_DIR=$(echo $var |cut -f2 -d=)
+ ;;
+ -h|--help) help
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+}
+
+# convert dataset
+function convert_dataset {
+ if [ ! -d ${OUTPUT_DIR} ]; then
+ echo '${OUTPUT_DIR} already exists, please check...'
+ fi
+ wget https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip
+ unzip wwm_uncased_L-24_H-1024_A-16.zip
+ wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -P wwm_uncased_L-24_H-1024_A-16
+ mv wwm_uncased_L-24_H-1024_A-16 ${OUTPUT_DIR}
+
+}
+
+main "$@"
+
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..d42132a4e87
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/requirements.txt
@@ -0,0 +1 @@
+intel-tensorflow>=2.12.0
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..aa8d269a79a
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input_model=${input_model} \
+ --mode=${mode} \
+ --dataset_location=${dataset_location} \
+ --batch_size=${batch_size} \
+ --benchmark \
+
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..ddc30b40177
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# set -x
+
+function main {
+
+ init_params "$@"
+
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ batch_size=64
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input_model=${input_model} \
+ --output_model=${output_model} \
+ --dataset_location=${dataset_location} \
+ --batch_size=${batch_size} \
+ --tune \
+
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py
new file mode 100644
index 00000000000..77c3175db07
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py
@@ -0,0 +1,402 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Tokenization classes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+import unicodedata
+import six
+import tensorflow as tf
+
+
+def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
+ """Checks whether the casing config is consistent with the checkpoint name."""
+
+ # The casing has to be passed in by the user and there is no explicit check
+ # as to whether it matches the checkpoint. The casing information probably
+ # should have been stored in the bert_config.json file, but it's not, so
+ # we have to heuristically detect it to validate.
+
+ if not init_checkpoint:
+ return
+
+ m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint)
+ if m is None:
+ return
+
+ model_name = m.group(1)
+
+ lower_models = [
+ "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12",
+ "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12"
+ ]
+
+ cased_models = [
+ "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16",
+ "multi_cased_L-12_H-768_A-12"
+ ]
+
+ is_bad_config = False
+ if model_name in lower_models and not do_lower_case:
+ is_bad_config = True
+ actual_flag = "False"
+ case_name = "lowercased"
+ opposite_flag = "True"
+
+ if model_name in cased_models and do_lower_case:
+ is_bad_config = True
+ actual_flag = "True"
+ case_name = "cased"
+ opposite_flag = "False"
+
+ if is_bad_config:
+ raise ValueError(
+ "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. "
+ "However, `%s` seems to be a %s model, so you "
+ "should pass in `--do_lower_case=%s` so that the fine-tuning matches "
+ "how the model was pre-training. If this error is wrong, please "
+ "just comment out this check." % (actual_flag, init_checkpoint,
+ model_name, case_name, opposite_flag))
+
+
+def convert_to_unicode(text):
+ """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
+ if six.PY3:
+ if isinstance(text, str):
+ return text
+ elif isinstance(text, bytes):
+ return text.decode("utf-8", "ignore")
+ else:
+ raise ValueError("Unsupported string type: %s" % (type(text)))
+ elif six.PY2:
+ if isinstance(text, str):
+ return text.decode("utf-8", "ignore")
+ elif isinstance(text, unicode):
+ return text
+ else:
+ raise ValueError("Unsupported string type: %s" % (type(text)))
+ else:
+ raise ValueError("Not running on Python2 or Python 3?")
+
+
+def printable_text(text):
+ """Returns text encoded in a way suitable for print or `tf.logging`."""
+
+ # These functions want `str` for both Python2 and Python3, but in one case
+ # it's a Unicode string and in the other it's a byte string.
+ if six.PY3:
+ if isinstance(text, str):
+ return text
+ elif isinstance(text, bytes):
+ return text.decode("utf-8", "ignore")
+ else:
+ raise ValueError("Unsupported string type: %s" % (type(text)))
+ elif six.PY2:
+ if isinstance(text, str):
+ return text
+ elif isinstance(text, unicode):
+ return text.encode("utf-8")
+ else:
+ raise ValueError("Unsupported string type: %s" % (type(text)))
+ else:
+ raise ValueError("Not running on Python2 or Python 3?")
+
+
+def load_vocab(vocab_file):
+ """Loads a vocabulary file into a dictionary."""
+ vocab = collections.OrderedDict()
+ index = 0
+ with tf.io.gfile.GFile(vocab_file, "r") as reader:
+ while True:
+ token = convert_to_unicode(reader.readline())
+ if not token:
+ break
+ token = token.strip()
+ vocab[token] = index
+ index += 1
+ return vocab
+
+
+def convert_by_vocab(vocab, items):
+ """Converts a sequence of [tokens|ids] using the vocab."""
+ output = []
+ for item in items:
+ output.append(vocab[item])
+ return output
+
+
+def convert_tokens_to_ids(vocab, tokens):
+ return convert_by_vocab(vocab, tokens)
+
+
+def convert_ids_to_tokens(inv_vocab, ids):
+ return convert_by_vocab(inv_vocab, ids)
+
+
+def whitespace_tokenize(text):
+ """Runs basic whitespace cleaning and splitting on a piece of text."""
+ text = text.strip()
+ if not text:
+ return []
+ tokens = text.split()
+ return tokens
+
+
+class FullTokenizer(object):
+ """Runs end-to-end tokenziation."""
+
+ def __init__(self, vocab_file, do_lower_case=True):
+ self.vocab = load_vocab(vocab_file)
+ self.inv_vocab = {v: k for k, v in self.vocab.items()}
+ self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
+ self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+
+ def tokenize(self, text):
+ split_tokens = []
+ for token in self.basic_tokenizer.tokenize(text):
+ for sub_token in self.wordpiece_tokenizer.tokenize(token):
+ split_tokens.append(sub_token)
+
+ return split_tokens
+
+ def convert_tokens_to_ids(self, tokens):
+ return convert_by_vocab(self.vocab, tokens)
+
+ def convert_ids_to_tokens(self, ids):
+ return convert_by_vocab(self.inv_vocab, ids)
+
+
+class BasicTokenizer(object):
+ """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+ def __init__(self, do_lower_case=True):
+ """Constructs a BasicTokenizer.
+
+ Args:
+ do_lower_case: Whether to lower case the input.
+ """
+ self.do_lower_case = do_lower_case
+
+ def tokenize(self, text):
+ """Tokenizes a piece of text."""
+ text = convert_to_unicode(text)
+ text = self._clean_text(text)
+
+ # This was added on November 1st, 2018 for the multilingual and Chinese
+ # models. This is also applied to the English models now, but it doesn't
+ # matter since the English models were not trained on any Chinese data
+ # and generally don't have any Chinese data in them (there are Chinese
+ # characters in the vocabulary because Wikipedia does have some Chinese
+ # words in the English Wikipedia.).
+ text = self._tokenize_chinese_chars(text)
+
+ orig_tokens = whitespace_tokenize(text)
+ split_tokens = []
+ for token in orig_tokens:
+ if self.do_lower_case:
+ token = token.lower()
+ token = self._run_strip_accents(token)
+ split_tokens.extend(self._run_split_on_punc(token))
+
+ output_tokens = whitespace_tokenize(" ".join(split_tokens))
+ return output_tokens
+
+ def _run_strip_accents(self, text):
+ """Strips accents from a piece of text."""
+ text = unicodedata.normalize("NFD", text)
+ output = []
+ for char in text:
+ cat = unicodedata.category(char)
+ if cat == "Mn":
+ continue
+ output.append(char)
+ return "".join(output)
+
+ def _run_split_on_punc(self, text):
+ """Splits punctuation on a piece of text."""
+ chars = list(text)
+ i = 0
+ start_new_word = True
+ output = []
+ while i < len(chars):
+ char = chars[i]
+ if _is_punctuation(char):
+ output.append([char])
+ start_new_word = True
+ else:
+ if start_new_word:
+ output.append([])
+ start_new_word = False
+ output[-1].append(char)
+ i += 1
+
+ return ["".join(x) for x in output]
+
+ def _tokenize_chinese_chars(self, text):
+ """Adds whitespace around any CJK character."""
+ output = []
+ for char in text:
+ cp = ord(char)
+ if self._is_chinese_char(cp):
+ output.append(" ")
+ output.append(char)
+ output.append(" ")
+ else:
+ output.append(char)
+ return "".join(output)
+
+ def _is_chinese_char(self, cp):
+ """Checks whether CP is the codepoint of a CJK character."""
+ # This defines a "chinese character" as anything in the CJK Unicode block:
+ # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+ #
+ # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+ # despite its name. The modern Korean Hangul alphabet is a different block,
+ # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+ # space-separated words, so they are not treated specially and handled
+ # like the all of the other languages.
+ if ((cp >= 0x4E00 and cp <= 0x9FFF) or #
+ (cp >= 0x3400 and cp <= 0x4DBF) or #
+ (cp >= 0x20000 and cp <= 0x2A6DF) or #
+ (cp >= 0x2A700 and cp <= 0x2B73F) or #
+ (cp >= 0x2B740 and cp <= 0x2B81F) or #
+ (cp >= 0x2B820 and cp <= 0x2CEAF) or
+ (cp >= 0xF900 and cp <= 0xFAFF) or #
+ (cp >= 0x2F800 and cp <= 0x2FA1F)): #
+ return True
+
+ return False
+
+ def _clean_text(self, text):
+ """Performs invalid character removal and whitespace cleanup on text."""
+ output = []
+ for char in text:
+ cp = ord(char)
+ if cp == 0 or cp == 0xfffd or _is_control(char):
+ continue
+ if _is_whitespace(char):
+ output.append(" ")
+ else:
+ output.append(char)
+ return "".join(output)
+
+
+class WordpieceTokenizer(object):
+ """Runs WordPiece tokenziation."""
+
+ def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200):
+ self.vocab = vocab
+ self.unk_token = unk_token
+ self.max_input_chars_per_word = max_input_chars_per_word
+
+ def tokenize(self, text):
+ """Tokenizes a piece of text into its word pieces.
+
+ This uses a greedy longest-match-first algorithm to perform tokenization
+ using the given vocabulary.
+
+ For example:
+ input = "unaffable"
+ output = ["un", "##aff", "##able"]
+
+ Args:
+ text: A single token or whitespace separated tokens. This should have
+ already been passed through `BasicTokenizer.
+
+ Returns:
+ A list of wordpiece tokens.
+ """
+
+ text = convert_to_unicode(text)
+
+ output_tokens = []
+ for token in whitespace_tokenize(text):
+ chars = list(token)
+ if len(chars) > self.max_input_chars_per_word:
+ output_tokens.append(self.unk_token)
+ continue
+
+ is_bad = False
+ start = 0
+ sub_tokens = []
+ while start < len(chars):
+ end = len(chars)
+ cur_substr = None
+ while start < end:
+ substr = "".join(chars[start:end])
+ if start > 0:
+ substr = "##" + substr
+ if substr in self.vocab:
+ cur_substr = substr
+ break
+ end -= 1
+ if cur_substr is None:
+ is_bad = True
+ break
+ sub_tokens.append(cur_substr)
+ start = end
+
+ if is_bad:
+ output_tokens.append(self.unk_token)
+ else:
+ output_tokens.extend(sub_tokens)
+ return output_tokens
+
+
+def _is_whitespace(char):
+ """Checks whether `chars` is a whitespace character."""
+ # \t, \n, and \r are technically control characters but we treat them
+ # as whitespace since they are generally considered as such.
+ if char == " " or char == "\t" or char == "\n" or char == "\r":
+ return True
+ cat = unicodedata.category(char)
+ if cat == "Zs":
+ return True
+ return False
+
+
+def _is_control(char):
+ """Checks whether `chars` is a control character."""
+ # These are technically control characters but we count them as whitespace
+ # characters.
+ if char == "\t" or char == "\n" or char == "\r":
+ return False
+ cat = unicodedata.category(char)
+ if cat in ("Cc", "Cf"):
+ return True
+ return False
+
+
+def _is_punctuation(char):
+ """Checks whether `chars` is a punctuation character."""
+ cp = ord(char)
+ # We treat all non-letter/number ASCII as punctuation.
+ # Characters such as "^", "$", and "`" are not in the Unicode
+ # Punctuation class but we treat them as punctuation anyways, for
+ # consistency.
+ if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+ (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+ return True
+ cat = unicodedata.category(char)
+ if cat.startswith("P"):
+ return True
+ return False
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md
new file mode 100644
index 00000000000..7a8c22631e0
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md
@@ -0,0 +1,141 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor smooth quantization of language models gpt-j-6B.
+
+# Prerequisite
+
+## 1. Environment
+
+### Installation
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+pip install -r requirements.txt
+```
+
+## 2. Prepare Pretrained model
+Run the follow script to download gpt-j-6B saved_model to ```./gpt-j-6B```:
+ ```
+bash prepare_model.sh
+ ```
+
+## 3. Install TensorFlow 2.11.dev202242
+Build a TensorFlow pip package from [intel-tensorflow spr_ww42 branch](https://github.com/Intel-tensorflow/tensorflow/tree/spr_ww42) and install it. How to build a TensorFlow pip package from source please refer to this [tutorial](https://www.tensorflow.org/install/source).
+
+The performance of int8 gpt-j-6B would be better once intel-tensorflow for gnr is released.
+
+## 4. Prepare Dataset
+The dataset will be automatically loaded.
+
+# Run
+
+## Smooth Quantization
+
+```shell
+bash run_quant.sh --input_model= --output_model=
+```
+
+## Benchmark
+
+### Evaluate Performance
+
+```shell
+bash run_benchmark.sh --input_model= --mode=benchmark
+```
+
+### Evaluate Accuracy
+
+```shell
+bash run_benchmark.sh --input_model= --mode=accuracy
+```
+
+
+Details of enabling Intel® Neural Compressor on gpt-j-6B for TensorFlow
+=========================
+
+This is a tutorial of how to enable gpt-j-6B model with Intel® Neural Compressor.
+## User Code Analysis
+
+User specifies fp32 *model*, calibration dataloader *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataloader and metric by itself.
+
+### calib_dataloader Part Adaption
+Below dataloader class uses generator function to provide the model with input.
+
+```python
+class MyDataloader:
+ def __init__(self, dataset, batch_size=1):
+ self.dataset = dataset
+ self.batch_size = batch_size
+ self.length = math.ceil(len(dataset) / self.batch_size)
+
+ def generate_data(self, data, pad_token_id=50256):
+ input_ids = tf.convert_to_tensor([data[:-1]], dtype=tf.int32)
+ cur_len = len(data)-1
+ input_ids_padding = tf.ones((self.batch_size, 1), dtype=tf.int32) * (pad_token_id or 0)
+ generated = tf.concat([input_ids, input_ids_padding], axis=-1)
+ model_kwargs = {'attention_mask': prepare_attention_mask_for_generation(input_ids)}
+ if model_kwargs.get("past_key_values") is None:
+ input_ids = generated[:, :cur_len]
+ else:
+ input_ids = tf.expand_dims(generated[:, cur_len - 1], -1)
+ return model_kwargs['attention_mask'], input_ids
+
+ def __iter__(self):
+ labels = None
+ for _, data in enumerate(self.dataset):
+ cur_input = self.generate_data(data)
+ yield (cur_input, labels)
+
+ def __len__(self):
+ return self.length
+```
+
+
+### Code Update
+After prepare step is done, we add the code for quantization tuning to generate quantized model.
+
+Firstly, let's load a INC inner class model from the path of gpt-j-6B saved_model.
+```python
+ from neural_compressor import Model
+ model = Model(run_args.input_model, modelType='llm_saved_model')
+```
+
+#### Tune
+
+To apply quantization, the function that maps names from AutoTrackable variables to graph nodes must be defined to match names of nodes in different format.
+```python
+ def weight_name_mapping(name):
+ """The function that maps name from AutoTrackable variables to graph nodes"""
+ name = name.replace('tfgptj_for_causal_lm', 'StatefulPartitionedCall')
+ name = name.replace('kernel:0', 'Tensordot/ReadVariableOp')
+ return name
+```
+
+Please use the recipe to set smooth quantization.
+```python
+ from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, autotune
+ from neural_compressor.tensorflow.quantization import TuningConfig
+ from neural_compressor.tensorflow.utils import BaseDataLoader
+
+ calib_dataloader = MyDataloader(mydata, batch_size=run_args.batch_size)
+ quant_config = [SmoothQuantConfig(alpha=0.52705), StaticQuantConfig(act_dtype="int8", weight_dtype="int8")]
+ tune_config = TuningConfig(config_set=quant_config, max_trials=1)
+ model.weight_name_mapping = weight_name_mapping
+ q_model = autotune(model,
+ tune_config,
+ eval_fn=evaluate,
+ calib_dataloader=calib_dataloader)
+ q_model.save(run_args.output_model)
+```
+#### Benchmark
+```python
+ if run_args.mode == "performance":
+ evaluate(model.model)
+ elif run_args.mode == "accuracy":
+ acc_result = evaluate(model.model)
+ print("Batch size = %d" % run_args.batch_size)
+ print("Accuracy: %.5f" % acc_result)
+```
+
+The Intel® Neural Compressor quantization.fit() function will return a best quantized model under time constraint.
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py
new file mode 100644
index 00000000000..faf54b65bd0
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py
@@ -0,0 +1,349 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import time
+import math
+import numpy as np
+import logging
+import datasets
+import tensorflow as tf
+from typing import Optional
+from itertools import chain
+from datasets import load_dataset
+from collections import defaultdict
+from dataclasses import dataclass, field
+
+import transformers
+from transformers import (
+ TF_MODEL_FOR_CAUSAL_LM_MAPPING,
+ AutoConfig,
+ AutoTokenizer,
+ HfArgumentParser,
+ TFAutoModelForCausalLM,
+ TFTrainingArguments,
+ set_seed,
+)
+from transformers.utils.versions import require_version
+
+logger = logging.getLogger(__name__)
+require_version("datasets>=1.8.0", "To fix: pip install -r requirements.txt")
+MODEL_CONFIG_CLASSES = list(TF_MODEL_FOR_CAUSAL_LM_MAPPING.keys())
+MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
+
+@dataclass
+class ModelArguments:
+ """
+ Arguments pertaining to which model/config/tokenizer we are going to use.
+ """
+
+ model_name_or_path: Optional[str] = field(
+ default="EleutherAI/gpt-j-6B",
+ metadata={
+ "help": (
+ "The model checkpoint for GPT-J weights."
+ )
+ },
+ )
+ config_overrides: Optional[str] = field(
+ default=None,
+ metadata={
+ "help": (
+ "Override some existing default config settings when a model is trained from scratch. Example: "
+ "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
+ )
+ },
+ )
+ checkpoint: Optional[str] = field(
+ default=None,
+ metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
+ )
+ use_fast_tokenizer: bool = field(
+ default=True,
+ metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+ )
+ precision: Optional[str] = field(
+ default="fp32",
+ metadata={"help": "The precision that we want to run with."},
+ )
+
+
+
+@dataclass
+class DataTrainingArguments:
+ """
+ Arguments pertaining to what data we are going to input our model for evaluation.
+ """
+
+ dataset_name: Optional[str] = field(
+ default="EleutherAI/lambada_openai", metadata={"help": "The name of the dataset to use (via the datasets library)."}
+ )
+ dataset_config_name: Optional[str] = field(
+ default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+ )
+ block_size: Optional[int] = field(
+ default=None,
+ metadata={
+ "help": (
+ "Optional input sequence length after tokenization. "
+ "The training dataset will be truncated in block of this size for training. "
+ "Default to the model max input length for single sentence inputs (take into account special tokens)."
+ )
+ },
+ )
+ preprocessing_num_workers: Optional[int] = field(
+ default=None,
+ metadata={"help": "The number of processes to use for the preprocessing."},
+ )
+
+
+@dataclass
+class RunningArguments:
+ """
+ Arguments for options of running.
+ """
+
+ input_model: Optional[str] = field(
+ default="./gpt-j-6B",
+ metadata={
+ "help": (
+ "The path of input model."
+ )
+ },
+ )
+ output_model: Optional[str] = field(
+ default="./nc_int8_gpt-j-6B",
+ metadata={
+ "help": (
+ "The path save quantized gpt-j-6B int8 model."
+ )
+ },
+ )
+ tune: bool = field(
+ default=False,
+ metadata={"help": "Whether to apply quantization."},
+ )
+ benchmark: bool = field(
+ default=False,
+ metadata={"help": "Whether to apply benchmarking."},
+ )
+ mode: Optional[str] = field(
+ default="performance",
+ metadata={"help": ("Evaluate performance or accuracy benchmark."
+ "Set it to be accuracy or performance.")},
+ )
+ batch_size: Optional[int] = field(
+ default=1,
+ metadata={"help": "The number of processes to use for the preprocessing."},
+ )
+ iteration: Optional[int] = field(
+ default=200,
+ metadata={"help": "The number of processes to use for the preprocessing."},
+ )
+
+
+
+parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TFTrainingArguments, RunningArguments))
+model_args, data_args, train_args, run_args = parser.parse_args_into_dataclasses()
+
+logger.setLevel(logging.INFO)
+datasets.utils.logging.set_verbosity_warning()
+transformers.utils.logging.set_verbosity_info()
+
+if train_args.seed is not None:
+ set_seed(train_args.seed)
+
+raw_datasets = load_dataset(
+ data_args.dataset_name,
+ data_args.dataset_config_name,
+ cache_dir=model_args.checkpoint,
+ use_auth_token=None,
+ )
+
+config = AutoConfig.from_pretrained(model_args.model_name_or_path)
+tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path)
+column_names = raw_datasets["test"].column_names
+text_column_name = "text" if "text" in column_names else column_names[0]
+
+mydata = tokenizer(raw_datasets["test"][text_column_name], return_tensors="np").input_ids
+
+
+def prepare_attention_mask_for_generation(
+ inputs: tf.Tensor,
+ pad_token_id=50256,
+ eos_token_id=50256,
+) -> tf.Tensor:
+ """Generate attention_mask from input_ids.
+
+ Args:
+ inputs (tf.Tensor): The tensor of input_ids.
+
+ Returns:
+ attention_mask (tf.Tensor): The tensor of attention_mask.
+ """
+ is_input_ids = len(inputs.shape) == 2 and inputs.dtype in (tf.int32, tf.int64)
+ is_pad_token_in_inputs = (pad_token_id is not None) and tf.math.reduce_any(inputs == pad_token_id)
+ is_pad_token_not_equal_to_eos_token_id = (eos_token_id is None) or (pad_token_id != eos_token_id)
+
+ # Check if input is input_ids and padded -> only then is attention_mask defined
+ attention_mask = tf.cast(tf.math.not_equal(inputs, pad_token_id), dtype=tf.int32) \
+ if is_input_ids and is_pad_token_in_inputs and is_pad_token_not_equal_to_eos_token_id \
+ else tf.ones(inputs.shape[:2], dtype=tf.int32)
+
+ return attention_mask
+
+class MyDataloader:
+ def __init__(self, dataset, batch_size=1):
+ self.dataset = dataset
+ self.batch_size = batch_size
+ self.length = math.ceil(len(dataset) / self.batch_size)
+
+ def generate_data(self, data, pad_token_id=50256):
+ input_ids = tf.convert_to_tensor([data[:-1]], dtype=tf.int32)
+ cur_len = len(data)-1
+ input_ids_padding = tf.ones((self.batch_size, 1), dtype=tf.int32) * (pad_token_id or 0)
+ generated = tf.concat([input_ids, input_ids_padding], axis=-1)
+ model_kwargs = {'attention_mask': prepare_attention_mask_for_generation(input_ids)}
+ if model_kwargs.get("past_key_values") is None:
+ input_ids = generated[:, :cur_len]
+ else:
+ input_ids = tf.expand_dims(generated[:, cur_len - 1], -1)
+ return model_kwargs['attention_mask'], input_ids
+
+ def __iter__(self):
+ labels = None
+ for _, data in enumerate(self.dataset):
+ cur_input = self.generate_data(data)
+ yield (cur_input, labels)
+
+ def __len__(self):
+ return self.length
+
+def postprocess(outputs, generated, batch_size, cur_len):
+ """The function that processes the inference outputs to prediction"""
+ finished_sequences = tf.convert_to_tensor([False])
+ next_token_logits = outputs['logits'][:, -1]
+ # pre-process distribution
+ next_tokens_scores = next_token_logits
+ # argmax
+ next_tokens = tf.argmax(next_tokens_scores, axis=-1, output_type=tf.int32)
+
+ pad_token_id = 50256
+ eos_token_id = [50256]
+
+ unfinished_seq = 1 - tf.cast(finished_sequences, tf.int32)
+ next_tokens = next_tokens * unfinished_seq + pad_token_id * (1 - unfinished_seq)
+ next_token_is_eos = tf.math.reduce_any(
+ tf.equal(
+ tf.broadcast_to(next_tokens, (len(eos_token_id), batch_size)), tf.expand_dims(eos_token_id, -1)
+ ),
+ axis=0,
+ )
+ finished_sequences = finished_sequences | next_token_is_eos
+
+ # update `generated` and `cur_len`
+ update_indices = tf.stack([tf.range(batch_size), tf.broadcast_to(cur_len, [batch_size])], axis=-1)
+ return tf.tensor_scatter_nd_update(tensor=generated, indices=update_indices, updates=next_tokens)
+
+def evaluate(model, tf_eval_dataset=mydata):
+ """Evaluate function that inference the model to apply calibration or benchmarking.
+
+ Args:
+ model (tf.python.trackable.autotrackable): The model to be evaluated.
+ The object is usually gotten by using tf.saved_model.load(model_dir) API.
+
+ Returns:
+ accuracy (float): The accuracy result.
+ """
+ warmup = 5
+ batch_size = run_args.batch_size
+ pad_token_id = 50256
+ iteration = run_args.iteration
+ correct = 0
+ latency_list = []
+ from neural_compressor.tensorflow.utils import BaseModel
+
+ if isinstance(model, BaseModel):
+ model = model.model
+ infer = model.signatures["serving_default"]
+ for idx, data in enumerate(tf_eval_dataset):
+ input_ids = tf.convert_to_tensor([data[:-1]], dtype=tf.int32)
+ cur_len = len(data)-1
+ input_ids_padding = tf.ones((batch_size, 1), dtype=tf.int32) * (pad_token_id or 0)
+ generated = tf.concat([input_ids, input_ids_padding], axis=-1)
+ input_ids = generated[:, :cur_len]
+ attention_mask = prepare_attention_mask_for_generation(input_ids)
+ inputs = {'input_ids': input_ids, 'attention_mask': attention_mask}
+
+ start = time.time()
+ outputs = infer(**inputs)
+ end = time.time()
+ dur = end-start
+
+ predictions = postprocess(outputs, generated, batch_size, cur_len)
+ if data[-1] == predictions[0][-1].numpy():
+ correct+=1
+
+ latency_list.append(dur)
+ if idx >= iteration:
+ break
+ latency = np.array(latency_list[warmup:]).mean() / 1
+ acc = correct/(iteration+1)
+ if run_args.benchmark and run_args.mode == 'performance':
+ print("Batch size = {}".format(run_args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ return acc
+
+def weight_name_mapping(name):
+ """The function that maps name from AutoTrackable variables to graph nodes"""
+ name = name.replace('tfgptj_for_causal_lm', 'StatefulPartitionedCall')
+ name = name.replace('kernel:0', 'Tensordot/ReadVariableOp')
+ return name
+
+def main():
+ with train_args.strategy.scope():
+ options = tf.data.Options()
+ options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
+ from neural_compressor.tensorflow import Model
+ model = Model(run_args.input_model, modelType='llm_saved_model')
+
+ if run_args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, autotune
+ from neural_compressor.tensorflow.quantization import TuningConfig
+ from neural_compressor.tensorflow.utils import BaseDataLoader
+
+ calib_dataloader = MyDataloader(mydata, batch_size=run_args.batch_size)
+ quant_config = [SmoothQuantConfig(alpha=0.52705), StaticQuantConfig(act_dtype="int8", weight_dtype="int8")]
+ tune_config = TuningConfig(config_set=quant_config, max_trials=1)
+ model.weight_name_mapping = weight_name_mapping
+ q_model = autotune(model,
+ tune_config,
+ eval_fn=evaluate,
+ calib_dataloader=calib_dataloader)
+ q_model.save(run_args.output_model)
+ if run_args.benchmark:
+ if run_args.mode == "performance":
+ evaluate(model.model)
+ elif run_args.mode == "accuracy":
+ acc_result = evaluate(model.model)
+ print("Batch size = %d" % run_args.batch_size)
+ print("Accuracy: %.5f" % acc_result)
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py
new file mode 100644
index 00000000000..cb4cd7f3f29
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py
@@ -0,0 +1,23 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+model = TFAutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
+model.save_pretrained("./gpt-j-6B", saved_model=True)
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.sh b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.sh
new file mode 100644
index 00000000000..67e59f983f5
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.sh
@@ -0,0 +1,6 @@
+pip install transformers==4.25.0
+python prepare_model.py
+mv ./gpt-j-6B/saved_model/1 ./
+rm -r ./gpt-j-6B
+mv ./1 ./gpt-j-6B
+pip install transformers==4.35
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
new file mode 100644
index 00000000000..23c79d8bbd3
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
@@ -0,0 +1,4 @@
+tensorflow==2.12
+transformers
+datasets==2.17
+numpy
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_benchmark.sh b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_benchmark.sh
new file mode 100644
index 00000000000..75196199bce
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_benchmark.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=1
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_benchmark {
+ python main.py \
+ --input_model ${input_model} \
+ --mode ${mode} \
+ --batch_size ${batch_size} \
+ --benchmark \
+ --output_dir "./outputs"
+
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_quant.sh b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_quant.sh
new file mode 100644
index 00000000000..e8ad1f1dc19
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_quant.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input_model=${input_model} \
+ --output_model=${output_model} \
+ --output_dir="./outputs" \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md
new file mode 100644
index 00000000000..fa45adbd5ef
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md
@@ -0,0 +1,52 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor quantization and smooth quantization of language models such as OPT and GPT2.
+
+## Prerequisite
+
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+pip install -r requirements
+```
+## Run
+
+
+### Basic quantization
+
+```
+python main.py --model_name_or_path
+```
+
+`` can be following:
+
+- gpt2-medium
+- facebook/opt-125m
+
+### Smooth quant
+
+```shell
+bash run_quant.sh --input_model=
+```
+
+Or you can use
+
+```
+python main.py --model_name_or_path --sq
+```
+
+## Benchmark
+
+### Get the FP32 performance
+
+```shell
+bash run_benchmark.sh --input_model=
+```
+
+### Get the INT8 performance
+
+```shell
+bash run_benchmark.sh --input_model= --int8=true
+```
+
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py
new file mode 100644
index 00000000000..673d50c034f
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py
@@ -0,0 +1,190 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os.path
+import transformers
+import tensorflow as tf
+from tqdm import tqdm
+import sys
+import argparse
+from datasets import load_dataset
+import numpy as np
+import time
+
+sys.path.insert(0, './')
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--int8', action='store_true', help="eval fp32 model or int8 model")
+parser.add_argument('--model_name_or_path', type=str, default='facebook/opt-125m')
+parser.add_argument('--batch_size', type=int, default=16)
+parser.add_argument('--warmup', type=int, default=10)
+args = parser.parse_args()
+
+class Evaluator:
+ def __init__(self, dataset, tokenizer, device, batch_size=args.batch_size):
+ self.dataset = dataset
+ self.tokenizer = tokenizer
+ self.device = device
+ self.dataloader = INCDataloader(dataset, tokenizer, batch_size, device)
+
+ def evaluate(self, model):
+ # model.eval()
+ # The task is to predict the last word of the input.
+ total, hit = 0, 0
+ index = 1
+ for input_ids, label, label_indices in tqdm(self.dataloader):
+ # TFCausalLMOutputWithPast len: 2
+ # first element shape (16, 196, 50272)
+ # second element shape (16, 12, 196, 64)
+ outputs = model(input_ids)
+ last_token_logits = outputs[0].numpy()[np.arange(len(label_indices)), label_indices, :]
+ pred = last_token_logits.argmax(axis=-1)
+ total += label.shape[0]
+ hit += (pred == label.numpy()).sum().item()
+ index += 1
+ acc = hit / total
+ print(acc, flush=True)
+ return acc
+
+ def get_attention_mask(self, input_ids):
+ return tf.constant(1 - (input_ids==1).numpy().astype(int))
+
+ def evaluate_tf_v1(self, model):
+ total, hit = 0, 0
+ index = 1
+ infer = model.signatures["serving_default"]
+ overall_infer_duration = 0
+ for input_ids, label, label_indices in tqdm(self.dataloader):
+ attention_mask = self.get_attention_mask(input_ids)
+ input_ids = tf.constant(input_ids.numpy(), dtype=infer.inputs[0].dtype)
+ attention_mask = tf.constant(attention_mask.numpy(), dtype=infer.inputs[0].dtype)
+ start = time.time()
+ results = infer(input_ids=input_ids, attention_mask=attention_mask) # len: 25 Identity: [16, 196, 50272], Identity_1: [16, 12, 196, 64]
+ batch_infer_time = time.time() - start
+ if index > args.warmup:
+ overall_infer_duration += batch_infer_time
+ last_token_logits = results['Identity'].numpy()[np.arange(len(label_indices)), label_indices, :]
+ pred = last_token_logits.argmax(axis=-1)
+ total += label.shape[0]
+ hit += (pred == label.numpy()).sum().item()
+ index += 1
+ acc = hit / total
+ print("\nEvaluation result: ")
+ print(f"Batch size = {args.batch_size}")
+ print(f"Accuracy: {acc}")
+ print(
+ f"Throughput: {(len(self.dataloader) - args.warmup * args.batch_size) / overall_infer_duration} samples/sec"
+ )
+
+class INCDataloader:
+ # for_calib=True in quantization, only input_id is needed, =False in evaluation need label
+ def __init__(self, dataset, tokenizer, batch_size=1, device='cpu', for_calib=False):
+ self.dataset = dataset
+ self.tokenizer = tokenizer
+ self.device = device
+ self.batch_size = batch_size
+ self.for_calib = for_calib
+ import math
+ self.length = math.ceil(len(dataset) / self.batch_size) # batch number
+ self.pad_len = 196
+
+ # tokenize the dataset
+ def tokenize_function(examples):
+ example = self.tokenizer(examples['text'])
+ return example
+
+ self.dataset = self.dataset.map(tokenize_function, batched=True)
+ self.dataset.set_format(type='tensorflow', columns=['input_ids'])
+ def get_attention_mask(self, input_ids):
+ return 1 - (input_ids==1).numpy().astype(int)
+ def pad_input(self, input): # input: a record
+ input_id = input['input_ids']
+ if input_id.numpy().shape[0] > self.pad_len: # truncate the sequence to pad_len if the sequence is longer than pad_len
+ input_id = input_id[:self.pad_len]
+ label = input_id[-1]
+ pad_len = self.pad_len - input_id.numpy().shape[0]
+ label_index = -2 - pad_len # last logit index
+ input_id = tf.pad(input_id, tf.constant([[0,pad_len]]), constant_values=1)
+ input_id = tf.expand_dims(input_id, axis=0)
+ label = tf.expand_dims(label, axis=0)
+ return (input_id, label, label_index)
+
+ def __iter__(self):
+ if self.for_calib:
+ labels = None
+ # label_indices = None
+ for idx, record in enumerate(self.dataset):
+ input_id, label, label_index = self.pad_input(record)
+ attention_mask = self.get_attention_mask(input_id)
+ # compose attention_mask and input_id together
+ # during the calibration, it requires to yield a
+ # cur_input = tf.constant(np.append(attention_mask, input_id.numpy(), axis=0))
+ cur_input = {"input_ids": input_id.numpy(), "attention_mask": attention_mask}
+ assert self.batch_size == 1
+ yield (cur_input, label)
+ else:
+ input_ids = None
+ labels = None
+ label_indices = None
+ for idx, record in enumerate(self.dataset):
+ input_id, label, label_index = self.pad_input(record)
+ if input_ids is None:
+ input_ids = input_id
+ labels = label
+ label_indices = [label_index]
+ else:
+ input_ids = tf.concat([input_ids, input_id], 0)
+ labels = tf.concat([labels, label], 0)
+
+ label_indices.append(label_index)
+
+ if (idx + 1) % self.batch_size == 0:
+ yield (input_ids, labels, label_indices)
+ input_ids = None
+ labels = None
+ label_indices = None
+ if (idx + 1) % self.batch_size != 0:
+ yield (input_ids, labels, label_indices)
+
+ def __len__(self):
+ return self.length
+
+from datasets import load_dataset
+
+model_name = args.model_name_or_path
+tokenizer = transformers.AutoTokenizer.from_pretrained(
+ model_name,
+)
+eval_dataset = load_dataset('lambada', split='validation')
+
+evaluator = Evaluator(eval_dataset, tokenizer, 'cpu')
+
+if args.int8:
+ print("benchmarking int8 model")
+ int8_folder = model_name.split('/')[-1] + "_int8"
+ if not os.path.exists(int8_folder):
+ print(f"could not find int8 folder {int8_folder} ")
+ exit()
+ model = tf.saved_model.load(int8_folder) # tensorflow.python.trackable.autotrackable.AutoTrackable object
+else:
+ print("benchmaking fp32 model")
+ model = transformers.TFAutoModelForCausalLM.from_pretrained(model_name)
+ from neural_compressor.tensorflow import Model
+
+ model = Model(model).model # tensorflow.python.trackable.autotrackable.AutoTrackable object
+
+evaluator.evaluate_tf_v1(model)
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
new file mode 100644
index 00000000000..8f012ceb404
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
@@ -0,0 +1,140 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import transformers
+import tensorflow as tf
+from tqdm import tqdm
+import sys
+import argparse
+from datasets import load_dataset
+import numpy as np
+
+sys.path.insert(0, './')
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--sq', action='store_true', default=False, help="whether to use smooth quant")
+parser.add_argument('--model_name_or_path', type=str, default="facebook/opt-125m")
+parser.add_argument('--alpha', type=float, default=0.5, help="alpha value for smoothing.")
+parser.add_argument('--log_frequency', type=int, default=100)
+parser.add_argument('--batch_size', type=int, default=16)
+parser.add_argument('--kl', action='store_true', default=False, help="whether to use kl divergence for calibration")
+parser.add_argument('--fallback_add', action='store_true', default=False, help="Whether to add fp32 fallback option" )
+args = parser.parse_args()
+
+class CustomDataloader:
+ # for_calib=True in quantization, only input_id is needed, =False in evaluation need label
+ def __init__(self, dataset, tokenizer, batch_size=1, device='cpu', for_calib=False):
+ self.dataset = dataset
+ self.tokenizer = tokenizer
+ self.device = device
+ self.batch_size = batch_size
+ self.for_calib = for_calib
+ import math
+ self.length = math.ceil(len(dataset) / self.batch_size) # batch number
+ self.pad_len = 196
+
+ # tokenize the dataset
+ def tokenize_function(examples):
+ example = self.tokenizer(examples['text'])
+ return example
+
+ self.dataset = self.dataset.map(tokenize_function, batched=True)
+ self.dataset.set_format(type='tensorflow', columns=['input_ids'])
+ def get_attention_mask(self, input_ids):
+ return 1 - (input_ids==1).numpy().astype(int)
+ def pad_input(self, input): # input: a record
+ input_id = input['input_ids']
+ if input_id.numpy().shape[0] > self.pad_len: # truncate the sequence to pad_len if the sequence is longer than pad_len
+ input_id = input_id[:self.pad_len]
+ label = input_id[-1]
+ pad_len = self.pad_len - input_id.numpy().shape[0]
+ label_index = -2 - pad_len # last logit index
+ input_id = tf.pad(input_id, tf.constant([[0,pad_len]]), constant_values=1) # TODO need to check why pad with 1
+ input_id = tf.expand_dims(input_id, axis=0)
+ label = tf.expand_dims(label, axis=0)
+ return (input_id, label, label_index)
+
+ def __iter__(self):
+ if self.for_calib:
+ labels = None
+ for idx, record in enumerate(self.dataset):
+ input_id, label, label_index = self.pad_input(record)
+ attention_mask = self.get_attention_mask(input_id)
+ cur_input = {"input_ids": input_id.numpy(), "attention_mask": attention_mask}
+ assert self.batch_size == 1
+ yield (cur_input, label)
+ else:
+ input_ids = None
+ labels = None
+ label_indices = None
+ for idx, record in enumerate(self.dataset):
+ input_id, label, label_index = self.pad_input(record)
+ if input_ids is None:
+ input_ids = input_id
+ labels = label
+ label_indices = [label_index]
+ else:
+ input_ids = tf.concat([input_ids, input_id], 0)
+ labels = tf.concat([labels, label], 0)
+
+ label_indices.append(label_index)
+
+ if (idx + 1) % self.batch_size == 0:
+ yield (input_ids, labels, label_indices)
+ input_ids = None
+ labels = None
+ label_indices = None
+ if (idx + 1) % self.batch_size != 0:
+ yield (input_ids, labels, label_indices)
+
+ def __len__(self):
+ return self.length
+
+
+model_name = args.model_name_or_path
+
+tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
+model = transformers.TFAutoModelForCausalLM.from_pretrained(model_name)
+
+calib_dataset = load_dataset('lambada', split='validation')
+calib_dataset = calib_dataset.shuffle(seed=42)
+calib_dataloader = CustomDataloader(calib_dataset, tokenizer, device='cpu', batch_size=1, for_calib=True)
+
+from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, quantize_model
+
+ptq_config = None
+quant_config = []
+
+if args.sq:
+ quant_config.append(SmoothQuantConfig(alpha=args.alpha))
+if args.kl:
+ ptq_config = StaticQuantConfig(act_dtype="int8", weight_dtype="int8", act_algorithm="kl")
+if args.fallback_add:
+ ptq_config = StaticQuantConfig(act_dtype="int8", weight_dtype="int8")
+ ptq_config.set_local("Add", StaticQuantConfig(act_dtype="fp32", weight_dtype="fp32"))
+
+if not ptq_config:
+ ptq_config = StaticQuantConfig(act_dtype="int8", weight_dtype="int8")
+quant_config.append(ptq_config)
+
+q_model = quantize_model(model,
+ quant_config,
+ calib_dataloader=calib_dataloader)
+
+save_model_name = model_name.split("/")[-1]
+q_model.save(f"{save_model_name}_int8")
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt
new file mode 100644
index 00000000000..3486c09473c
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt
@@ -0,0 +1,3 @@
+tensorflow==2.15
+datasets
+transformers==4.35
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh
new file mode 100644
index 00000000000..b8fad17eebd
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ int8=false
+ batch_size=16
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --int8=*)
+ int8=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_benchmark {
+ if [[ "${int8}" == "true" ]]; then
+ python benchmark.py \
+ --model_name_or_path ${input_model} \
+ --batch_size ${batch_size} \
+ --int8
+ else
+ python benchmark.py \
+ --model_name_or_path ${input_model} \
+ --batch_size ${batch_size}
+ fi
+
+}
+
+main "$@"
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_quant.sh b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_quant.sh
new file mode 100644
index 00000000000..4295060acb9
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_quant.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --sq=*)
+ sq=$(echo ${var} |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+
+ ext_cmd=""
+ if [[ ${sq} == "True" ]]; then
+ ext_cmd="--sq"
+ fi
+ python main.py \
+ --model_name_or_path ${input_model} \
+ ${ext_cmd}
+}
+
+main "$@"
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md
new file mode 100644
index 00000000000..544e954371e
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md
@@ -0,0 +1,130 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of Transformer-LT. This example can run on Intel CPUs and GPUs.
+
+## Prerequisite
+
+### 1. Installation
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+
+### 2. Install Tensorflow
+```shell
+pip install tensorflow
+```
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### 3. Install Intel Extension for Tensorflow
+
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation.
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers).
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+### 4. Prepare Dataset & Pretrained model
+
+```shell
+wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_2_0/transformer-lt-official-fp32-inference.tar.gz
+tar -zxvf transformer-lt-official-fp32-inference.tar.gz
+cd transformer-lt-official-fp32-inference
+tar -zxvf transformer_lt_official_fp32_pretrained_model.tar.gz
+```
+
+Dataset is in data folder, pretrained model is in graph folder.
+
+#### Automatic dataset & model download
+Run the `prepare_dataset_model.sh` script located in `examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq`.
+
+```shell
+cd examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq
+bash prepare_dataset_model.sh
+```
+
+## Run Command
+### Quantization
+
+```shell
+bash run_quant.sh --input_model=./model/fp32_graphdef.pb --dataset_location=./data --output_model=./model/int8_graphdef.pb
+```
+### Benchmark
+```shell
+bash run_benchmark.sh --input_model=./model/int8_graphdef.pb --dataset_location=./data --mode=performance
+
+bash run_benchmark.sh --input_model=./model/int8_graphdef.pb --dataset_location=./data --mode=accuracy --batch_size=1
+```
+
+Details of enabling Intel® Neural Compressor on transformer-lt for Tensorflow.
+=========================
+
+This is a tutorial of how to enable transformer-lt model with Intel® Neural Compressor.
+
+### q_dataloader Part Adaption
+Below dataset class uses getitem to provide the model with input.
+
+```python
+class Dataset(object):
+ def __init__(self, *args):
+ # initialize dataset related info here
+ ...
+
+ def __getitem__(self, index):
+ data = self.batch[index]
+ label = self.ref_lines[index]
+ return data[0], label
+
+ def __len__(self):
+ return len(self.batch)
+```
+
+### Evaluation Part Adaption
+We evaluate the model with BLEU score, its source: https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py
+
+Here we set the input tensor and output tensors name into *inputs* and *outputs* args.
+In this case we calibrate and quantize the model, and use our calibration dataloader initialized from a 'Dataset' object.
+
+### Code update
+After prepare step is done, we add tune code to generate quantized model.
+
+#### Tune
+```python
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+ dataset = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
+ calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size, collate_fn=collate_fn)
+
+ quant_config = StaticQuantConfig()
+ model = Model(graph)
+ model.input_tensor_names = ['input_tensor']
+ model.output_tensor_names = ['model/Transformer/strided_slice_19']
+ q_model = quantize_model(model, quant_config, calib_dataloader)
+ try:
+ q_model.save(FLAGS.output_model)
+ except Exception as e:
+ print("Failed to save model due to {}".format(str(e)))
+```
+#### Benchmark
+```python
+ if FLAGS.benchmark:
+ assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \
+ "Benchmark only supports performance or accuracy mode."
+ acc = eval_func(graph)
+ if FLAGS.mode == 'accuracy':
+ print('Accuracy is {:.3f}'.format(acc))
+```
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py
new file mode 100644
index 00000000000..58a93090e7a
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py
@@ -0,0 +1,258 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+import re
+import six
+import sys
+import time
+import numpy as np
+import unicodedata
+import pandas as pd
+from absl import app
+import tensorflow as tf
+from argparse import ArgumentParser
+
+from utils import metrics
+from utils import tokenizer
+from utils.tokenizer import Subtokenizer
+from neural_compressor.tensorflow.utils import BaseDataLoader
+
+flags = tf.compat.v1.flags
+FLAGS = flags.FLAGS
+
+flags.DEFINE_integer("batch_size", 64,
+ "run batch size")
+
+flags.DEFINE_string("input_graph", None,
+ "The path of input model file.")
+
+flags.DEFINE_string("inputs_file", None,
+ "File saved to an output file.")
+
+flags.DEFINE_string("reference_file", None,
+ "File containing reference translation.")
+
+flags.DEFINE_string("vocab_file", None,
+ "Path to subtoken vocabulary file.")
+
+flags.DEFINE_string("output_model", None,
+ "The output model of the quantized model.")
+
+flags.DEFINE_bool('tune', False,
+ 'whether to tune the model')
+
+flags.DEFINE_bool('benchmark', False,
+ 'whether to benchmark the model')
+
+flags.DEFINE_string("mode", 'performance',
+ "One of three options: 'performance'/'accuracy'.")
+
+flags.DEFINE_integer("iters", 100,
+ "The iteration used for benchmark.")
+
+class UnicodeRegex(object):
+ def __init__(self):
+ punctuation = self.property_chars("P")
+ self.nondigit_punct_re = re.compile(r"([^\d])([" + punctuation + r"])")
+ self.punct_nondigit_re = re.compile(r"([" + punctuation + r"])([^\d])")
+ self.symbol_re = re.compile("([" + self.property_chars("S") + "])")
+
+ def property_chars(self, prefix):
+ return "".join(six.unichr(x) for x in range(sys.maxunicode)
+ if unicodedata.category(six.unichr(x)).startswith(prefix))
+
+uregex = UnicodeRegex()
+
+def bleu_tokenize(string):
+ string = uregex.nondigit_punct_re.sub(r"\1 \2 ", string)
+ string = uregex.punct_nondigit_re.sub(r" \1 \2", string)
+ string = uregex.symbol_re.sub(r" \1 ", string)
+ return string.split()
+
+class bleu(object):
+ def __init__(self):
+ self.translations = []
+ self.labels = []
+
+ def reset(self):
+ self.translations = []
+ self.labels = []
+
+ def update(self, pred, label):
+ if len(label) != len(pred):
+ raise ValueError("Reference and translation files have different number "
+ "of lines. If training only a few steps (100-200), the "
+ "translation may be empty.")
+ label = [x.lower() for x in label]
+ pred = [x.lower() for x in pred]
+ label = [bleu_tokenize(x) for x in label]
+ pred = [bleu_tokenize(x) for x in pred]
+ self.labels.extend(label)
+ self.translations.extend(pred)
+
+ def result(self):
+ return metrics.compute_bleu(self.labels, self.translations) * 100
+
+def collate_fn(batch):
+ """Puts each data field into a pd frame with outer dimension batch size"""
+ elem = batch[0]
+ if isinstance(elem, tuple):
+ batch = zip(*batch)
+ return [collate_fn(samples) for samples in batch]
+ elif isinstance(elem, np.ndarray):
+ return [list(elem) for elem in batch]
+ elif isinstance(elem, str):
+ return batch
+ else:
+ return pd.DataFrame(batch).fillna(0).values.astype(np.int32)
+
+def load_graph(file_name):
+ tf.compat.v1.logging.info('Loading graph from: ' + file_name)
+ with tf.io.gfile.GFile(file_name, "rb") as f:
+ graph_def = tf.compat.v1.GraphDef()
+ graph_def.ParseFromString(f.read())
+ with tf.Graph().as_default() as graph:
+ tf.import_graph_def(graph_def, name='')
+ return graph
+
+def eval_func(infer_graph, iteration=-1):
+ if isinstance(infer_graph, tf.compat.v1.GraphDef):
+ graph = tf.Graph()
+ with graph.as_default():
+ tf.import_graph_def(infer_graph, name='')
+ infer_graph = graph
+
+ subtokenizer = Subtokenizer(FLAGS.vocab_file)
+ input_tensor = infer_graph.get_tensor_by_name('input_tensor:0')
+ output_tensor = infer_graph.get_tensor_by_name(\
+ 'model/Transformer/strided_slice_19:0')
+
+ ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
+ dataloader = BaseDataLoader(dataset=ds, batch_size=FLAGS.batch_size, collate_fn=collate_fn)
+ config = tf.compat.v1.ConfigProto()
+ config.use_per_session_threads = 1
+ config.inter_op_parallelism_threads = 1
+ sess = tf.compat.v1.Session(graph=infer_graph, config=config)
+ iteration=-1
+ time_list = []
+ bleu_eval = bleu()
+ predictions = []
+ labels = []
+ warmup = 10
+ if FLAGS.benchmark and FLAGS.mode == 'performance':
+ iteration = FLAGS.iters
+ assert iteration >= warmup, 'iteration must be larger than warmup'
+
+ for idx, (input_data, label) in enumerate(dataloader):
+ if idx < iteration or iteration == -1:
+ time_start = time.time()
+ out = sess.run([output_tensor], {input_tensor: input_data})
+ duration = time.time() - time_start
+ time_list.append(duration)
+ predictions.append(out)
+ labels.extend(label)
+ else:
+ break
+
+ latency = np.array(time_list[warmup: ]).mean() / FLAGS.batch_size
+ if FLAGS.benchmark and FLAGS.mode == 'performance':
+ print('Batch size = {}'.format(FLAGS.batch_size))
+ print('Latency: {:.3f} ms'.format(latency * 1000))
+ print('Throughput: {:.3f} items/sec'.format(1./ latency))
+
+ # only calculate accuracy when running out all predictions
+ if iteration == -1:
+ decode = []
+ for i,tr in enumerate(predictions):
+ for j,itr in enumerate(tr):
+ for k, otr in enumerate(itr):
+ try:
+ index = list(otr).index(tokenizer.EOS_ID)
+ decode.append(subtokenizer.decode(otr[:index]))
+ except:
+ decode.append(subtokenizer.decode(otr))
+ bleu_eval.update(decode, labels)
+ return bleu_eval.result()
+
+class Dataset(object):
+ def __init__(self, inputs_file, reference_file, vocab_file):
+ with tf.io.gfile.GFile(inputs_file) as f:
+ records = f.read().split("\n")
+ inputs = [record.strip() for record in records]
+ if not inputs[-1]:
+ inputs.pop()
+
+ self.ref_lines = tokenizer.native_to_unicode(
+ tf.io.gfile.GFile(reference_file).read()).strip().splitlines()
+
+ subtokenizer = Subtokenizer(vocab_file)
+ self.batch = []
+ token_lens=[]
+ for i, line in enumerate(inputs):
+ enc = subtokenizer.encode(line, add_eos=True)
+ token_lens.append((i, len(enc)))
+
+ sorted_by_token_input_lens = sorted(token_lens, key=lambda x: x[1], reverse=True)
+
+ sorted_inputs = [None] * len(sorted_by_token_input_lens)
+ sorted_keys = [0] * len(sorted_by_token_input_lens)
+
+ lines = []
+ for i, (index, _) in enumerate(sorted_by_token_input_lens):
+ sorted_inputs[i] = inputs[index]
+ sorted_keys[index] = i
+ enc=subtokenizer.encode(sorted_inputs[i], add_eos=True)
+ lines.append([enc])
+ for i in sorted_keys:
+ self.batch.append(lines[i])
+
+ def __getitem__(self, index):
+ data = self.batch[index]
+ label = self.ref_lines[index]
+ return data[0], label
+
+ def __len__(self):
+ return len(self.batch)
+
+def main(_):
+ graph = load_graph(FLAGS.input_graph)
+ if FLAGS.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+ dataset = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
+ calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size, collate_fn=collate_fn)
+
+ quant_config = StaticQuantConfig()
+ model = Model(graph)
+ model.input_tensor_names = ['input_tensor']
+ model.output_tensor_names = ['model/Transformer/strided_slice_19']
+ q_model = quantize_model(model, quant_config, calib_dataloader)
+ try:
+ q_model.save(FLAGS.output_model)
+ except Exception as e:
+ print("Failed to save model due to {}".format(str(e)))
+
+ if FLAGS.benchmark:
+ assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \
+ "Benchmark only supports performance or accuracy mode."
+ acc = eval_func(graph)
+ if FLAGS.mode == 'accuracy':
+ print('Accuracy is {:.3f}'.format(acc))
+
+if __name__ == "__main__":
+ tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh
new file mode 100644
index 00000000000..3d47dbad80c
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# set -x
+
+DATA_DIR="../data"
+MODEL_DIR="../model"
+
+help()
+{
+ cat <<- EOF
+ Desc: Prepare bert dataset
+ -h --help help info
+ --data_dir Output data directory
+ default: './data'
+ --model_dir Output model directory
+ default: './model'
+EOF
+ exit 0
+}
+
+function main {
+ init_params "$@"
+ prepare
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --data_dir=*)
+ DATA_DIR=$(echo $var |cut -f2 -d=)
+ ;;
+ --model_dir=*)
+ MODEL_DIR=$(echo $var |cut -f2 -d=)
+ ;;
+ -h|--help) help
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+}
+
+# prepare data and model
+function prepare {
+ if [ ! -d ${DATA_DIR} ]; then
+ echo '${DATA_DIR} already exists, please check...'
+ fi
+ if [ ! -d ${MODEL_DIR} ]; then
+ echo '${MODEL_DIR} already exists, please check...'
+ fi
+ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_2_0/transformer-lt-official-fp32-inference.tar.gz
+ tar -zxvf transformer-lt-official-fp32-inference.tar.gz
+ cd transformer-lt-official-fp32-inference
+ tar -zxvf transformer_lt_official_fp32_pretrained_model.tar.gz
+ mv transformer_lt_official_fp32_pretrained_model/data ${DATA_DIR}
+ mv transformer_lt_official_fp32_pretrained_model/graph ${MODEL_DIR}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..87bc4c7d5c1
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# set -x
+
+function main {
+
+ init_params "$@"
+
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ for var in "$@"
+ do
+ case $var in
+ --dataset_location=*)
+ dataset_location=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+function define_mode {
+
+ if [[ ${mode} == "accuracy" ]]; then
+ mode="accuracy"
+ elif [[ ${mode} == "performance" ]]; then
+ mode="performance"
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+}
+
+# run_benchmark
+function run_benchmark {
+ python main.py \
+ --input_graph=${input_model} \
+ --inputs_file=${dataset_location}/newstest2014.en \
+ --reference_file=${dataset_location}/newstest2014.de \
+ --vocab_file=${dataset_location}/vocab.txt \
+ --benchmark \
+ --mode=${mode} \
+ --iters=${iters} \
+ --batch_size=${batch_size}
+}
+
+main "$@"
+
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..2f2075cf346
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# set -x
+
+function main {
+
+ init_params "$@"
+
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --dataset_location=*)
+ dataset_location=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input_graph=${input_model} \
+ --inputs_file=${dataset_location}/newstest2014.en \
+ --reference_file=${dataset_location}/newstest2014.de \
+ --vocab_file=${dataset_location}/vocab.txt \
+ --output_model=${output_model} \
+ --tune
+}
+
+main "$@"
+
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py
new file mode 100644
index 00000000000..3e41f985c63
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py
@@ -0,0 +1,490 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions for calculating loss, accuracy, and other model metrics.
+
+Metrics:
+ - Padded loss, accuracy, and negative log perplexity. Source:
+ https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/metrics.py
+ - BLEU approximation. Source:
+ https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py
+ - ROUGE score. Source:
+ https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/rouge.py
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import math
+
+import numpy as np
+import six
+from six.moves import xrange # pylint: disable=redefined-builtin
+import tensorflow as tf
+
+
+def _pad_tensors_to_same_length(x, y):
+ """Pad x and y so that the results have the same length (second dimension)."""
+ with tf.name_scope("pad_to_same_length"):
+ x_length = tf.shape(x)[1]
+ y_length = tf.shape(y)[1]
+
+ max_length = tf.maximum(x_length, y_length)
+
+ x = tf.pad(x, [[0, 0], [0, max_length - x_length], [0, 0]])
+ y = tf.pad(y, [[0, 0], [0, max_length - y_length]])
+ return x, y
+
+
+def padded_cross_entropy_loss(logits, labels, smoothing, vocab_size):
+ """Calculate cross entropy loss while ignoring padding.
+
+ Args:
+ logits: Tensor of size [batch_size, length_logits, vocab_size]
+ labels: Tensor of size [batch_size, length_labels]
+ smoothing: Label smoothing constant, used to determine the on and off values
+ vocab_size: int size of the vocabulary
+ Returns:
+ Returns the cross entropy loss and weight tensors: float32 tensors with
+ shape [batch_size, max(length_logits, length_labels)]
+ """
+ with tf.name_scope("loss", values=[logits, labels]):
+ logits, labels = _pad_tensors_to_same_length(logits, labels)
+
+ # Calculate smoothing cross entropy
+ with tf.name_scope("smoothing_cross_entropy", values=[logits, labels]):
+ confidence = 1.0 - smoothing
+ low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1)
+ soft_targets = tf.one_hot(
+ tf.cast(labels, tf.int32),
+ depth=vocab_size,
+ on_value=confidence,
+ off_value=low_confidence)
+ xentropy = tf.nn.softmax_cross_entropy_with_logits_v2(
+ logits=logits, labels=soft_targets)
+
+ # Calculate the best (lowest) possible value of cross entropy, and
+ # subtract from the cross entropy loss.
+ normalizing_constant = -(
+ confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) *
+ low_confidence * tf.log(low_confidence + 1e-20))
+ xentropy -= normalizing_constant
+
+ weights = tf.to_float(tf.not_equal(labels, 0))
+ return xentropy * weights, weights
+
+
+def _convert_to_eval_metric(metric_fn):
+ """Wrap a metric fn that returns scores and weights as an eval metric fn.
+
+ The input metric_fn returns values for the current batch. The wrapper
+ aggregates the return values collected over all of the batches evaluated.
+
+ Args:
+ metric_fn: function that returns scores and weights for the current batch's
+ logits and predicted labels.
+
+ Returns:
+ function that aggregates the scores and weights from metric_fn.
+ """
+ def problem_metric_fn(*args):
+ """Returns an aggregation of the metric_fn's returned values."""
+ (scores, weights) = metric_fn(*args)
+
+ # The tf.metrics.mean function assures correct aggregation.
+ return tf.metrics.mean(scores, weights)
+ return problem_metric_fn
+
+
+def get_eval_metrics(logits, labels, params):
+ """Return dictionary of model evaluation metrics."""
+ metrics = {
+ "accuracy": _convert_to_eval_metric(padded_accuracy)(logits, labels),
+ "accuracy_top5": _convert_to_eval_metric(padded_accuracy_top5)(
+ logits, labels),
+ "accuracy_per_sequence": _convert_to_eval_metric(
+ padded_sequence_accuracy)(logits, labels),
+ "neg_log_perplexity": _convert_to_eval_metric(padded_neg_log_perplexity)(
+ logits, labels, params["vocab_size"]),
+ }
+
+ if not params["use_tpu"]:
+ # TPU does not support tf.py_func
+ metrics.update({
+ "approx_bleu_score": _convert_to_eval_metric(
+ bleu_score)(logits, labels),
+ "rouge_2_fscore": _convert_to_eval_metric(
+ rouge_2_fscore)(logits, labels),
+ "rouge_L_fscore": _convert_to_eval_metric(
+ rouge_l_fscore)(logits, labels),
+ })
+
+ # Prefix each of the metric names with "metrics/". This allows the metric
+ # graphs to display under the "metrics" category in TensorBoard.
+ metrics = {"metrics/%s" % k: v for k, v in six.iteritems(metrics)}
+ return metrics
+
+
+def padded_accuracy(logits, labels):
+ """Percentage of times that predictions matches labels on non-0s."""
+ with tf.variable_scope("padded_accuracy", values=[logits, labels]):
+ logits, labels = _pad_tensors_to_same_length(logits, labels)
+ weights = tf.to_float(tf.not_equal(labels, 0))
+ outputs = tf.to_int32(tf.argmax(logits, axis=-1))
+ padded_labels = tf.to_int32(labels)
+ return tf.to_float(tf.equal(outputs, padded_labels)), weights
+
+
+def padded_accuracy_topk(logits, labels, k):
+ """Percentage of times that top-k predictions matches labels on non-0s."""
+ with tf.variable_scope("padded_accuracy_topk", values=[logits, labels]):
+ logits, labels = _pad_tensors_to_same_length(logits, labels)
+ weights = tf.to_float(tf.not_equal(labels, 0))
+ effective_k = tf.minimum(k, tf.shape(logits)[-1])
+ _, outputs = tf.nn.top_k(logits, k=effective_k)
+ outputs = tf.to_int32(outputs)
+ padded_labels = tf.to_int32(labels)
+ padded_labels = tf.expand_dims(padded_labels, axis=-1)
+ padded_labels += tf.zeros_like(outputs) # Pad to same shape.
+ same = tf.to_float(tf.equal(outputs, padded_labels))
+ same_topk = tf.reduce_sum(same, axis=-1)
+ return same_topk, weights
+
+
+def padded_accuracy_top5(logits, labels):
+ return padded_accuracy_topk(logits, labels, 5)
+
+
+def padded_sequence_accuracy(logits, labels):
+ """Percentage of times that predictions matches labels everywhere (non-0)."""
+ with tf.variable_scope("padded_sequence_accuracy", values=[logits, labels]):
+ logits, labels = _pad_tensors_to_same_length(logits, labels)
+ weights = tf.to_float(tf.not_equal(labels, 0))
+ outputs = tf.to_int32(tf.argmax(logits, axis=-1))
+ padded_labels = tf.to_int32(labels)
+ not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights
+ axis = list(range(1, len(outputs.get_shape())))
+ correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis))
+ return correct_seq, tf.constant(1.0)
+
+
+def padded_neg_log_perplexity(logits, labels, vocab_size):
+ """Average log-perplexity excluding padding 0s. No smoothing."""
+ num, den = padded_cross_entropy_loss(logits, labels, 0, vocab_size)
+ return -num, den
+
+
+def bleu_score(logits, labels):
+ """Approximate BLEU score computation between labels and predictions.
+
+ An approximate BLEU scoring method since we do not glue word pieces or
+ decode the ids and tokenize the output. By default, we use ngram order of 4
+ and use brevity penalty. Also, this does not have beam search.
+
+ Args:
+ logits: Tensor of size [batch_size, length_logits, vocab_size]
+ labels: Tensor of size [batch-size, length_labels]
+
+ Returns:
+ bleu: int, approx bleu score
+ """
+ predictions = tf.to_int32(tf.argmax(logits, axis=-1))
+ # TODO: Look into removing use of py_func
+ bleu = tf.py_func(compute_bleu, (labels, predictions), tf.float32)
+ return bleu, tf.constant(1.0)
+
+
+def _get_ngrams_with_counter(segment, max_order):
+ """Extracts all n-grams up to a given maximum order from an input segment.
+
+ Args:
+ segment: text segment from which n-grams will be extracted.
+ max_order: maximum length in tokens of the n-grams returned by this
+ methods.
+
+ Returns:
+ The Counter containing all n-grams upto max_order in segment
+ with a count of how many times each n-gram occurred.
+ """
+ ngram_counts = collections.Counter()
+ for order in xrange(1, max_order + 1):
+ for i in xrange(0, len(segment) - order + 1):
+ ngram = tuple(segment[i:i + order])
+ ngram_counts[ngram] += 1
+ return ngram_counts
+
+
+def compute_bleu(reference_corpus, translation_corpus, max_order=4,
+ use_bp=True):
+ """Computes BLEU score of translated segments against one or more references.
+
+ Args:
+ reference_corpus: list of references for each translation. Each
+ reference should be tokenized into a list of tokens.
+ translation_corpus: list of translations to score. Each translation
+ should be tokenized into a list of tokens.
+ max_order: Maximum n-gram order to use when computing BLEU score.
+ use_bp: boolean, whether to apply brevity penalty.
+
+ Returns:
+ BLEU score.
+ """
+ reference_length = 0
+ translation_length = 0
+ bp = 1.0
+ geo_mean = 0
+
+ matches_by_order = [0] * max_order
+ possible_matches_by_order = [0] * max_order
+ precisions = []
+
+ for (references, translations) in zip(reference_corpus, translation_corpus):
+ reference_length += len(references)
+ translation_length += len(translations)
+ ref_ngram_counts = _get_ngrams_with_counter(references, max_order)
+ translation_ngram_counts = _get_ngrams_with_counter(translations, max_order)
+
+ overlap = dict((ngram,
+ min(count, translation_ngram_counts[ngram]))
+ for ngram, count in ref_ngram_counts.items())
+
+ for ngram in overlap:
+ matches_by_order[len(ngram) - 1] += overlap[ngram]
+ for ngram in translation_ngram_counts:
+ possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[
+ ngram]
+
+ precisions = [0] * max_order
+ smooth = 1.0
+
+ for i in xrange(0, max_order):
+ if possible_matches_by_order[i] > 0:
+ precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i]
+ if matches_by_order[i] > 0:
+ precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[
+ i]
+ else:
+ smooth *= 2
+ precisions[i] = 1.0 / (smooth * possible_matches_by_order[i])
+ else:
+ precisions[i] = 0.0
+
+ if max(precisions) > 0:
+ p_log_sum = sum(math.log(p) for p in precisions if p)
+ geo_mean = math.exp(p_log_sum / max_order)
+
+ if use_bp:
+ ratio = translation_length / reference_length
+ bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0
+ bleu = geo_mean * bp
+ return np.float32(bleu)
+
+
+def rouge_2_fscore(logits, labels):
+ """ROUGE-2 F1 score computation between labels and predictions.
+
+ This is an approximate ROUGE scoring method since we do not glue word pieces
+ or decode the ids and tokenize the output.
+
+ Args:
+ logits: tensor, model predictions
+ labels: tensor, gold output.
+
+ Returns:
+ rouge2_fscore: approx rouge-2 f1 score.
+ """
+ predictions = tf.to_int32(tf.argmax(logits, axis=-1))
+ # TODO: Look into removing use of py_func
+ rouge_2_f_score = tf.py_func(rouge_n, (predictions, labels), tf.float32)
+ return rouge_2_f_score, tf.constant(1.0)
+
+
+def _get_ngrams(n, text):
+ """Calculates n-grams.
+
+ Args:
+ n: which n-grams to calculate
+ text: An array of tokens
+
+ Returns:
+ A set of n-grams
+ """
+ ngram_set = set()
+ text_length = len(text)
+ max_index_ngram_start = text_length - n
+ for i in range(max_index_ngram_start + 1):
+ ngram_set.add(tuple(text[i:i + n]))
+ return ngram_set
+
+
+def rouge_n(eval_sentences, ref_sentences, n=2):
+ """Computes ROUGE-N f1 score of two text collections of sentences.
+
+ Source: https://www.microsoft.com/en-us/research/publication/
+ rouge-a-package-for-automatic-evaluation-of-summaries/
+
+ Args:
+ eval_sentences: Predicted sentences.
+ ref_sentences: Sentences from the reference set
+ n: Size of ngram. Defaults to 2.
+
+ Returns:
+ f1 score for ROUGE-N
+ """
+ f1_scores = []
+ for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences):
+ eval_ngrams = _get_ngrams(n, eval_sentence)
+ ref_ngrams = _get_ngrams(n, ref_sentence)
+ ref_count = len(ref_ngrams)
+ eval_count = len(eval_ngrams)
+
+ # Count the overlapping ngrams between evaluated and reference
+ overlapping_ngrams = eval_ngrams.intersection(ref_ngrams)
+ overlapping_count = len(overlapping_ngrams)
+
+ # Handle edge case. This isn't mathematically correct, but it's good enough
+ if eval_count == 0:
+ precision = 0.0
+ else:
+ precision = float(overlapping_count) / eval_count
+ if ref_count == 0:
+ recall = 0.0
+ else:
+ recall = float(overlapping_count) / ref_count
+ f1_scores.append(2.0 * ((precision * recall) / (precision + recall + 1e-8)))
+
+ # return overlapping_count / reference_count
+ return np.mean(f1_scores, dtype=np.float32)
+
+
+def rouge_l_fscore(predictions, labels):
+ """ROUGE scores computation between labels and predictions.
+
+ This is an approximate ROUGE scoring method since we do not glue word pieces
+ or decode the ids and tokenize the output.
+
+ Args:
+ predictions: tensor, model predictions
+ labels: tensor, gold output.
+
+ Returns:
+ rouge_l_fscore: approx rouge-l f1 score.
+ """
+ outputs = tf.to_int32(tf.argmax(predictions, axis=-1))
+ rouge_l_f_score = tf.py_func(rouge_l_sentence_level, (outputs, labels),
+ tf.float32)
+ return rouge_l_f_score, tf.constant(1.0)
+
+
+def rouge_l_sentence_level(eval_sentences, ref_sentences):
+ """Computes ROUGE-L (sentence level) of two collections of sentences.
+
+ Source: https://www.microsoft.com/en-us/research/publication/
+ rouge-a-package-for-automatic-evaluation-of-summaries/
+
+ Calculated according to:
+ R_lcs = LCS(X,Y)/m
+ P_lcs = LCS(X,Y)/n
+ F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs)
+
+ where:
+ X = reference summary
+ Y = Candidate summary
+ m = length of reference summary
+ n = length of candidate summary
+
+ Args:
+ eval_sentences: The sentences that have been picked by the summarizer
+ ref_sentences: The sentences from the reference set
+
+ Returns:
+ A float: F_lcs
+ """
+
+ f1_scores = []
+ for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences):
+ m = float(len(ref_sentence))
+ n = float(len(eval_sentence))
+ lcs = _len_lcs(eval_sentence, ref_sentence)
+ f1_scores.append(_f_lcs(lcs, m, n))
+ return np.mean(f1_scores, dtype=np.float32)
+
+
+def _len_lcs(x, y):
+ """Returns the length of the Longest Common Subsequence between two seqs.
+
+ Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence
+
+ Args:
+ x: sequence of words
+ y: sequence of words
+
+ Returns
+ integer: Length of LCS between x and y
+ """
+ table = _lcs(x, y)
+ n, m = len(x), len(y)
+ return table[n, m]
+
+
+def _lcs(x, y):
+ """Computes the length of the LCS between two seqs.
+
+ The implementation below uses a DP programming algorithm and runs
+ in O(nm) time where n = len(x) and m = len(y).
+ Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence
+
+ Args:
+ x: collection of words
+ y: collection of words
+
+ Returns:
+ Table of dictionary of coord and len lcs
+ """
+ n, m = len(x), len(y)
+ table = dict()
+ for i in range(n + 1):
+ for j in range(m + 1):
+ if i == 0 or j == 0:
+ table[i, j] = 0
+ elif x[i - 1] == y[j - 1]:
+ table[i, j] = table[i - 1, j - 1] + 1
+ else:
+ table[i, j] = max(table[i - 1, j], table[i, j - 1])
+ return table
+
+
+def _f_lcs(llcs, m, n):
+ """Computes the LCS-based F-measure score.
+
+ Source: http://research.microsoft.com/en-us/um/people/cyl/download/papers/
+ rouge-working-note-v1.3.1.pdf
+
+ Args:
+ llcs: Length of LCS
+ m: number of words in reference summary
+ n: number of words in candidate summary
+
+ Returns:
+ Float. LCS-based F-measure score
+ """
+ r_lcs = llcs / m
+ p_lcs = llcs / n
+ beta = p_lcs / (r_lcs + 1e-12)
+ num = (1 + (beta ** 2)) * r_lcs * p_lcs
+ denom = r_lcs + ((beta ** 2) * p_lcs)
+ f_lcs = num / (denom + 1e-12)
+ return f_lcs
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py
new file mode 100644
index 00000000000..33f144b23fd
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py
@@ -0,0 +1,620 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Defines Subtokenizer class to encode and decode strings."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+import sys
+import unicodedata
+
+import numpy as np
+import six
+from six.moves import xrange # pylint: disable=redefined-builtin
+import tensorflow as tf
+
+PAD = ""
+PAD_ID = 0
+EOS = ""
+EOS_ID = 1
+RESERVED_TOKENS = [PAD, EOS]
+
+# Set of characters that will be used in the function _escape_token() (see func
+# docstring for more details).
+# This set is added to the alphabet list to ensure that all escaped tokens can
+# be encoded.
+_ESCAPE_CHARS = set(u"\\_u;0123456789")
+# Regex for the function _unescape_token(), the inverse of _escape_token().
+# This is used to find "\u", "\\", and "\###;" substrings in the token.
+_UNESCAPE_REGEX = re.compile(r"\\u|\\\\|\\([0-9]+);")
+
+_UNDEFINED_UNICODE = u"\u3013"
+
+# Set contains all letter and number characters.
+_ALPHANUMERIC_CHAR_SET = set(
+ six.unichr(i) for i in xrange(sys.maxunicode)
+ if (unicodedata.category(six.unichr(i)).startswith("L") or
+ unicodedata.category(six.unichr(i)).startswith("N")))
+
+# min_count is the minimum number of times a subtoken must appear in the data
+# before before it is added to the vocabulary. The value is found using binary
+# search to obtain the target vocabulary size.
+_MIN_MIN_COUNT = 1 # min value to use when binary searching for min_count
+_MAX_MIN_COUNT = 1000 # max value to use when binary searching for min_count
+
+
+class Subtokenizer(object):
+ """Encodes and decodes strings to/from integer IDs."""
+
+ def __init__(self, vocab_file, reserved_tokens=None):
+ """Initializes class, creating a vocab file if data_files is provided."""
+ tf.compat.v1.logging.info("Initializing Subtokenizer from file %s." %
+ vocab_file)
+
+ if reserved_tokens is None:
+ reserved_tokens = RESERVED_TOKENS
+
+ self.subtoken_list = _load_vocab_file(vocab_file, reserved_tokens)
+ self.alphabet = _generate_alphabet_dict(self.subtoken_list)
+ self.subtoken_to_id_dict = _list_to_index_dict(self.subtoken_list)
+
+ self.max_subtoken_length = 0
+ for subtoken in self.subtoken_list:
+ self.max_subtoken_length = max(self.max_subtoken_length, len(subtoken))
+
+ # Create cache to speed up subtokenization
+ self._cache_size = 2 ** 20
+ self._cache = [(None, None)] * self._cache_size
+
+ @staticmethod
+ def init_from_files(
+ vocab_file, files, target_vocab_size, threshold, min_count=None,
+ file_byte_limit=1e6, reserved_tokens=None, correct_strip=True):
+ """Create subtoken vocabulary based on files, and save vocab to file.
+
+ Args:
+ vocab_file: String name of vocab file to store subtoken vocabulary.
+ files: List of file paths that will be used to generate vocabulary.
+ target_vocab_size: target vocabulary size to generate.
+ threshold: int threshold of vocabulary size to accept.
+ min_count: int minimum count to use for generating the vocabulary. The min
+ count is the minimum number of times a subtoken should appear in the
+ files before it is added to the vocabulary. If set to none, this value
+ is found using binary search.
+ file_byte_limit: (Default 1e6) Maximum number of bytes of sample text that
+ will be drawn from the files.
+ reserved_tokens: List of string tokens that are guaranteed to be at the
+ beginning of the subtoken vocabulary list.
+ correct_strip: Whether to convert text to unicode before strip.
+
+ Returns:
+ Subtokenizer object
+ """
+ if reserved_tokens is None:
+ reserved_tokens = RESERVED_TOKENS
+
+ if tf.io.gfile.exists(vocab_file):
+ tf.compat.v1.logging.info("Vocab file already exists (%s)" % vocab_file)
+ else:
+ tf.compat.v1.logging.info("Begin steps to create subtoken vocabulary...")
+ token_counts = _count_tokens(files, file_byte_limit, correct_strip)
+ alphabet = _generate_alphabet_dict(token_counts)
+ subtoken_list = _generate_subtokens_with_target_vocab_size(
+ token_counts, alphabet, target_vocab_size, threshold, min_count,
+ reserved_tokens)
+ tf.compat.v1.logging.info("Generated vocabulary with %d subtokens." %
+ len(subtoken_list))
+ _save_vocab_file(vocab_file, subtoken_list)
+ return Subtokenizer(vocab_file)
+
+ def encode(self, raw_string, add_eos=False):
+ """Encodes a string into a list of int subtoken ids."""
+ ret = []
+ tokens = _split_string_to_tokens(native_to_unicode(raw_string))
+ for token in tokens:
+ ret.extend(self._token_to_subtoken_ids(token))
+ if add_eos:
+ ret.append(EOS_ID)
+ return ret
+
+ def _token_to_subtoken_ids(self, token):
+ """Encode a single token into a list of subtoken ids."""
+ cache_location = hash(token) % self._cache_size
+ cache_key, cache_value = self._cache[cache_location]
+ if cache_key == token:
+ return cache_value
+
+ ret = _split_token_to_subtokens(
+ _escape_token(token, self.alphabet), self.subtoken_to_id_dict,
+ self.max_subtoken_length)
+ ret = [self.subtoken_to_id_dict[subtoken_id] for subtoken_id in ret]
+
+ self._cache[cache_location] = (token, ret)
+ return ret
+
+ def decode(self, subtokens):
+ """Converts list of int subtokens ids into a string."""
+ if isinstance(subtokens, np.ndarray):
+ # Note that list(subtokens) converts subtokens to a python list, but the
+ # items remain as np.int32. This converts both the array and its items.
+ subtokens = subtokens.tolist()
+
+ if not subtokens:
+ return ""
+
+ assert isinstance(subtokens, list) and isinstance(subtokens[0], int), (
+ "Subtokens argument passed into decode() must be a list of integers.")
+
+ return _unicode_to_native(
+ _join_tokens_to_string(self._subtoken_ids_to_tokens(subtokens)))
+
+ def _subtoken_ids_to_tokens(self, subtokens):
+ """Convert list of int subtoken ids to a list of string tokens."""
+ escaped_tokens = "".join([
+ self.subtoken_list[s] for s in subtokens
+ if s < len(self.subtoken_list)])
+ escaped_tokens = escaped_tokens.split("_")
+
+ # All tokens in the vocabulary list have been escaped (see _escape_token())
+ # so each token must be unescaped when decoding.
+ ret = []
+ for token in escaped_tokens:
+ if token:
+ ret.append(_unescape_token(token))
+ return ret
+
+
+def _save_vocab_file(vocab_file, subtoken_list):
+ """Save subtokens to file."""
+ with tf.io.gfile.GFile(vocab_file, mode="w") as f:
+ for subtoken in subtoken_list:
+ f.write("'%s'\n" % _unicode_to_native(subtoken))
+
+
+def _load_vocab_file(vocab_file, reserved_tokens=None):
+ """Load vocabulary while ensuring reserved tokens are at the top."""
+ if reserved_tokens is None:
+ reserved_tokens = RESERVED_TOKENS
+
+ subtoken_list = []
+ with tf.io.gfile.GFile(vocab_file, mode="r") as f:
+ for line in f:
+ subtoken = native_to_unicode(line.strip())
+ subtoken = subtoken[1:-1] # Remove surrounding single-quotes
+ if subtoken in reserved_tokens:
+ continue
+ subtoken_list.append(native_to_unicode(subtoken))
+ return reserved_tokens + subtoken_list
+
+
+def native_to_unicode(s):
+ """Convert string to unicode (required in Python 2)."""
+ try: # Python 2
+ return s if isinstance(s, unicode) else s.decode("utf-8")
+ except NameError: # Python 3
+ return s
+
+
+def _unicode_to_native(s):
+ """Convert string from unicode to native format (required in Python 2)."""
+ try: # Python 2
+ return s.encode("utf-8") if isinstance(s, unicode) else s
+ except NameError: # Python 3
+ return s
+
+
+def _split_string_to_tokens(text):
+ """Splits text to a list of string tokens."""
+ if not text:
+ return []
+ ret = []
+ token_start = 0
+ # Classify each character in the input string
+ is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text]
+ for pos in xrange(1, len(text)):
+ if is_alnum[pos] != is_alnum[pos - 1]:
+ token = text[token_start:pos]
+ if token != u" " or token_start == 0:
+ ret.append(token)
+ token_start = pos
+ final_token = text[token_start:]
+ ret.append(final_token)
+ return ret
+
+
+def _join_tokens_to_string(tokens):
+ """Join a list of string tokens into a single string."""
+ token_is_alnum = [t[0] in _ALPHANUMERIC_CHAR_SET for t in tokens]
+ ret = []
+ for i, token in enumerate(tokens):
+ if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]:
+ ret.append(u" ")
+ ret.append(token)
+ return "".join(ret)
+
+
+def _escape_token(token, alphabet):
+ r"""Replace characters that aren't in the alphabet and append "_" to token.
+
+ Apply three transformations to the token:
+ 1. Replace underline character "_" with "\u", and backslash "\" with "\\".
+ 2. Replace characters outside of the alphabet with "\###;", where ### is the
+ character's Unicode code point.
+ 3. Appends "_" to mark the end of a token.
+
+ Args:
+ token: unicode string to be escaped
+ alphabet: list of all known characters
+
+ Returns:
+ escaped string
+ """
+ token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u")
+ ret = [c if c in alphabet and c != u"\n" else r"\%d;" % ord(c) for c in token]
+ return u"".join(ret) + "_"
+
+
+def _unescape_token(token):
+ r"""Replaces escaped characters in the token with their unescaped versions.
+
+ Applies inverse transformations as _escape_token():
+ 1. Replace "\u" with "_", and "\\" with "\".
+ 2. Replace "\###;" with the unicode character the ### refers to.
+
+ Args:
+ token: escaped string
+
+ Returns:
+ unescaped string
+ """
+
+ def match(m):
+ r"""Returns replacement string for matched object.
+
+ Matched objects contain one of the strings that matches the regex pattern:
+ r"\\u|\\\\|\\([0-9]+);"
+ The strings can be '\u', '\\', or '\###;' (### is any digit number).
+
+ m.group(0) refers to the entire matched string ('\u', '\\', or '\###;').
+ m.group(1) refers to the first parenthesized subgroup ('###').
+
+ m.group(0) exists for all match objects, while m.group(1) exists only for
+ the string '\###;'.
+
+ This function looks to see if m.group(1) exists. If it doesn't, then the
+ matched string must be '\u' or '\\' . In this case, the corresponding
+ replacement ('_' and '\') are returned. Note that in python, a single
+ backslash is written as '\\', and double backslash as '\\\\'.
+
+ If m.group(1) exists, then use the integer in m.group(1) to return a
+ unicode character.
+
+ Args:
+ m: match object
+
+ Returns:
+ String to replace matched object with.
+ """
+ # Check if the matched strings are '\u' or '\\'.
+ if m.group(1) is None:
+ return u"_" if m.group(0) == u"\\u" else u"\\"
+
+ # If m.group(1) exists, try and return unicode character.
+ try:
+ return six.unichr(int(m.group(1)))
+ except (ValueError, OverflowError) as _:
+ return _UNDEFINED_UNICODE
+
+ # Use match function to replace escaped substrings in the token.
+ return _UNESCAPE_REGEX.sub(match, token)
+
+
+def _count_tokens(files, file_byte_limit=1e6, correct_strip=True):
+ """Return token counts of words in the files.
+
+ Samples file_byte_limit bytes from each file, and counts the words that appear
+ in the samples. The samples are semi-evenly distributed across the file.
+
+ Args:
+ files: List of filepaths
+ file_byte_limit: Max number of bytes that will be read from each file.
+ correct_strip: Whether to convert text to unicode before strip. This affects
+ vocabulary generation for PY2. Sets correct_strip to False in PY2 to
+ reproduce previous common public result. Sets correct_strip to True will
+ let PY2 and PY3 get a consistent vocabulary.
+
+ Returns:
+ Dictionary mapping tokens to the number of times they appear in the sampled
+ lines from the files.
+ """
+ token_counts = collections.defaultdict(int)
+
+ for filepath in files:
+ with tf.io.gfile.GFile(filepath, mode="r") as reader:
+ file_byte_budget = file_byte_limit
+ counter = 0
+ lines_to_skip = int(reader.size() / (file_byte_budget * 2))
+ for line in reader:
+ if counter < lines_to_skip:
+ counter += 1
+ else:
+ if file_byte_budget < 0:
+ break
+ if correct_strip:
+ line = native_to_unicode(line)
+ line = line.strip()
+ file_byte_budget -= len(line)
+ counter = 0
+
+ # Add words to token counts
+ for token in _split_string_to_tokens(native_to_unicode(line)):
+ token_counts[token] += 1
+ return token_counts
+
+
+def _list_to_index_dict(lst):
+ """Create dictionary mapping list items to their indices in the list."""
+ return {item: n for n, item in enumerate(lst)}
+
+
+def _split_token_to_subtokens(token, subtoken_dict, max_subtoken_length):
+ """Splits a token into subtokens defined in the subtoken dict."""
+ ret = []
+ start = 0
+ token_len = len(token)
+ while start < token_len:
+ # Find the longest subtoken, so iterate backwards.
+ for end in xrange(min(token_len, start + max_subtoken_length), start, -1):
+ subtoken = token[start:end]
+ if subtoken in subtoken_dict:
+ ret.append(subtoken)
+ start = end
+ break
+ else: # Did not break
+ # If there is no possible encoding of the escaped token then one of the
+ # characters in the token is not in the alphabet. This should be
+ # impossible and would be indicative of a bug.
+ raise ValueError("Was unable to split token \"%s\" into subtokens." %
+ token)
+ return ret
+
+
+def _generate_subtokens_with_target_vocab_size(
+ token_counts, alphabet, target_size, threshold, min_count=None,
+ reserved_tokens=None):
+ """Generate subtoken vocabulary close to the target size."""
+ if reserved_tokens is None:
+ reserved_tokens = RESERVED_TOKENS
+
+ if min_count is not None:
+ tf.compat.v1.logging.info(
+ "Using min_count=%d to generate vocab with target size %d" %
+ (min_count, target_size))
+ return _generate_subtokens(
+ token_counts, alphabet, min_count, reserved_tokens=reserved_tokens)
+
+ def bisect(min_val, max_val):
+ """Recursive function to binary search for subtoken vocabulary."""
+ cur_count = (min_val + max_val) // 2
+ tf.compat.v1.logging.info("Binary search: trying min_count=%d (%d %d)" %
+ (cur_count, min_val, max_val))
+ subtoken_list = _generate_subtokens(
+ token_counts, alphabet, cur_count, reserved_tokens=reserved_tokens)
+
+ val = len(subtoken_list)
+ tf.compat.v1.logging.info(
+ "Binary search: min_count=%d resulted in %d tokens" % (cur_count, val))
+
+ within_threshold = abs(val - target_size) < threshold
+ if within_threshold or min_val >= max_val or cur_count < 2:
+ return subtoken_list
+ if val > target_size:
+ other_subtoken_list = bisect(cur_count + 1, max_val)
+ else:
+ other_subtoken_list = bisect(min_val, cur_count - 1)
+
+ # Return vocabulary dictionary with the closest number of tokens.
+ other_val = len(other_subtoken_list)
+ if abs(other_val - target_size) < abs(val - target_size):
+ return other_subtoken_list
+ return subtoken_list
+
+ tf.compat.v1.logging.info("Finding best min_count to get target size of %d" %
+ target_size)
+ return bisect(_MIN_MIN_COUNT, _MAX_MIN_COUNT)
+
+
+def _generate_alphabet_dict(iterable, reserved_tokens=None):
+ """Create set of characters that appear in any element in the iterable."""
+ if reserved_tokens is None:
+ reserved_tokens = RESERVED_TOKENS
+ alphabet = {c for token in iterable for c in token}
+ alphabet |= {c for token in reserved_tokens for c in token}
+ alphabet |= _ESCAPE_CHARS # Add escape characters to alphabet set.
+ return alphabet
+
+
+def _count_and_gen_subtokens(
+ token_counts, alphabet, subtoken_dict, max_subtoken_length):
+ """Count number of times subtokens appear, and generate new subtokens.
+
+ Args:
+ token_counts: dict mapping tokens to the number of times they appear in the
+ original files.
+ alphabet: list of allowed characters. Used to escape the tokens, which
+ guarantees that all tokens can be split into subtokens.
+ subtoken_dict: dict mapping subtokens to ids.
+ max_subtoken_length: maximum length of subtoken in subtoken_dict.
+
+ Returns:
+ A defaultdict mapping subtokens to the number of times they appear in the
+ tokens. The dict may contain new subtokens.
+ """
+ subtoken_counts = collections.defaultdict(int)
+ for token, count in six.iteritems(token_counts):
+ token = _escape_token(token, alphabet)
+ subtokens = _split_token_to_subtokens(
+ token, subtoken_dict, max_subtoken_length)
+
+ # Generate new subtokens by taking substrings from token.
+ start = 0
+ for subtoken in subtokens:
+ for end in xrange(start + 1, len(token) + 1):
+ new_subtoken = token[start:end]
+ subtoken_counts[new_subtoken] += count
+ start += len(subtoken)
+
+ return subtoken_counts
+
+
+def _filter_and_bucket_subtokens(subtoken_counts, min_count):
+ """Return a bucketed list of subtokens that are filtered by count.
+
+ Args:
+ subtoken_counts: defaultdict mapping subtokens to their counts
+ min_count: int count used to filter subtokens
+
+ Returns:
+ List of subtoken sets, where subtokens in set i have the same length=i.
+ """
+ # Create list of buckets, where subtokens in bucket i have length i.
+ subtoken_buckets = []
+ for subtoken, count in six.iteritems(subtoken_counts):
+ if count < min_count: # Filter out subtokens that don't appear enough
+ continue
+ while len(subtoken_buckets) <= len(subtoken):
+ subtoken_buckets.append(set())
+ subtoken_buckets[len(subtoken)].add(subtoken)
+ return subtoken_buckets
+
+
+def _gen_new_subtoken_list(
+ subtoken_counts, min_count, alphabet, reserved_tokens=None):
+ """Generate candidate subtokens ordered by count, and new max subtoken length.
+
+ Add subtokens to the candidate list in order of length (longest subtokens
+ first). When a subtoken is added, the counts of each of its prefixes are
+ decreased. Prefixes that don't appear much outside the subtoken are not added
+ to the candidate list.
+
+ For example:
+ subtoken being added to candidate list: 'translate'
+ subtoken_counts: {'translate':10, 't':40, 'tr':16, 'tra':12, ...}
+ min_count: 5
+
+ When 'translate' is added, subtoken_counts is updated to:
+ {'translate':0, 't':30, 'tr':6, 'tra': 2, ...}
+
+ The subtoken 'tra' will not be added to the candidate list, because it appears
+ twice (less than min_count) outside of 'translate'.
+
+ Args:
+ subtoken_counts: defaultdict mapping str subtokens to int counts
+ min_count: int minimum count requirement for subtokens
+ alphabet: set of characters. Each character is added to the subtoken list to
+ guarantee that all tokens can be encoded.
+ reserved_tokens: list of tokens that will be added to the beginning of the
+ returned subtoken list.
+
+ Returns:
+ List of candidate subtokens in decreasing count order, and maximum subtoken
+ length
+ """
+ if reserved_tokens is None:
+ reserved_tokens = RESERVED_TOKENS
+
+ # Create a list of (count, subtoken) for each candidate subtoken.
+ subtoken_candidates = []
+
+ # Use bucketted list to iterate through subtokens in order of length.
+ # subtoken_buckets[i] = set(subtokens), where each subtoken has length i.
+ subtoken_buckets = _filter_and_bucket_subtokens(subtoken_counts, min_count)
+ max_subtoken_length = len(subtoken_buckets) - 1
+
+ # Go through the list in reverse order to consider longer subtokens first.
+ for subtoken_len in xrange(max_subtoken_length, 0, -1):
+ for subtoken in subtoken_buckets[subtoken_len]:
+ count = subtoken_counts[subtoken]
+
+ # Possible if this subtoken is a prefix of another token.
+ if count < min_count:
+ continue
+
+ # Ignore alphabet/reserved tokens, which will be added manually later.
+ if subtoken not in alphabet and subtoken not in reserved_tokens:
+ subtoken_candidates.append((count, subtoken))
+
+ # Decrement count of the subtoken's prefixes (if a longer subtoken is
+ # added, its prefixes lose priority to be added).
+ for end in xrange(1, subtoken_len):
+ subtoken_counts[subtoken[:end]] -= count
+
+ # Add alphabet subtokens (guarantees that all strings are encodable).
+ subtoken_candidates.extend((subtoken_counts.get(a, 0), a) for a in alphabet)
+
+ # Order subtoken candidates by decreasing count.
+ subtoken_list = [t for _, t in sorted(subtoken_candidates, reverse=True)]
+
+ # Add reserved tokens to beginning of the list.
+ subtoken_list = reserved_tokens + subtoken_list
+ return subtoken_list, max_subtoken_length
+
+
+def _generate_subtokens(
+ token_counts, alphabet, min_count, num_iterations=4,
+ reserved_tokens=None):
+ """Create a list of subtokens in decreasing order of frequency.
+
+ Args:
+ token_counts: dict mapping str tokens -> int count
+ alphabet: set of characters
+ min_count: int minimum number of times a subtoken must appear before it is
+ added to the vocabulary.
+ num_iterations: int number of iterations to generate new tokens.
+ reserved_tokens: list of tokens that will be added to the beginning to the
+ returned subtoken list.
+
+ Returns:
+ Sorted list of subtokens (most frequent first)
+ """
+ if reserved_tokens is None:
+ reserved_tokens = RESERVED_TOKENS
+
+ # Use alphabet set to create initial list of subtokens
+ subtoken_list = reserved_tokens + list(alphabet)
+ max_subtoken_length = 1
+
+ # On each iteration, segment all words using the subtokens defined in
+ # subtoken_dict, count how often the resulting subtokens appear, and update
+ # the dictionary with subtokens w/ high enough counts.
+ for i in xrange(num_iterations):
+ tf.compat.v1.logging.info("\tGenerating subtokens: iteration %d" % i)
+ # Generate new subtoken->id dictionary using the new subtoken list.
+ subtoken_dict = _list_to_index_dict(subtoken_list)
+
+ # Create dict mapping subtoken->count, with additional subtokens created
+ # from substrings taken from the tokens.
+ subtoken_counts = _count_and_gen_subtokens(
+ token_counts, alphabet, subtoken_dict, max_subtoken_length)
+
+ # Generate new list of subtokens sorted by subtoken count.
+ subtoken_list, max_subtoken_length = _gen_new_subtoken_list(
+ subtoken_counts, min_count, alphabet, reserved_tokens)
+
+ tf.compat.v1.logging.info("\tVocab size: %d" % len(subtoken_list))
+ return subtoken_list
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py
new file mode 100644
index 00000000000..f757389f30d
--- /dev/null
+++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py
@@ -0,0 +1,182 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Test Subtokenizer and string helper methods."""
+
+import collections
+import tempfile
+
+import tensorflow as tf # pylint: disable=g-bad-import-order
+
+from official.transformer.utils import tokenizer
+
+
+class SubtokenizerTest(tf.test.TestCase):
+
+ def _init_subtokenizer(self, vocab_list):
+ temp_file = tempfile.NamedTemporaryFile(delete=False)
+ with tf.io.gfile.GFile(temp_file.name, "w") as w:
+ for subtoken in vocab_list:
+ w.write("'%s'" % subtoken)
+ w.write("\n")
+ return tokenizer.Subtokenizer(temp_file.name, reserved_tokens=[])
+
+ def test_encode(self):
+ vocab_list = ["123_", "test", "ing_"]
+ subtokenizer = self._init_subtokenizer(vocab_list)
+ s = "testing 123"
+ encoded_list = subtokenizer.encode(s)
+ self.assertEqual([1, 2, 0], encoded_list)
+
+ def test_decode(self):
+ vocab_list = ["123_", "test", "ing_"]
+ subtokenizer = self._init_subtokenizer(vocab_list)
+ encoded_list = [1, 2, 0] # testing 123
+ decoded_str = subtokenizer.decode(encoded_list)
+ self.assertEqual("testing 123", decoded_str)
+
+ def test_subtoken_ids_to_tokens(self):
+ vocab_list = ["123_", "test", "ing_"]
+ subtokenizer = self._init_subtokenizer(vocab_list)
+ encoded_list = [1, 2, 0] # testing 123
+ token_list = subtokenizer._subtoken_ids_to_tokens(encoded_list)
+ self.assertEqual([u"testing", u"123"], token_list)
+
+
+class StringHelperTest(tf.test.TestCase):
+
+ def test_split_string_to_tokens(self):
+ text = "test? testing 123."
+
+ tokens = tokenizer._split_string_to_tokens(text)
+ self.assertEqual(["test", "? ", "testing", "123", "."], tokens)
+
+ def test_join_tokens_to_string(self):
+ tokens = ["test", "? ", "testing", "123", "."]
+
+ s = tokenizer._join_tokens_to_string(tokens)
+ self.assertEqual("test? testing 123.", s)
+
+ def test_escape_token(self):
+ token = u"abc_\\4"
+ alphabet = set("abc_\\u;")
+
+ escaped_token = tokenizer._escape_token(token, alphabet)
+ self.assertEqual("abc\\u\\\\\\52;_", escaped_token)
+
+ def test_unescape_token(self):
+ escaped_token = u"Underline: \\u, Backslash: \\\\, Unicode: \\52;"
+
+ unescaped_token = tokenizer._unescape_token(escaped_token)
+ self.assertEqual(
+ "Underline: _, Backslash: \\, Unicode: 4", unescaped_token)
+
+ def test_list_to_index_dict(self):
+ lst = ["test", "strings"]
+
+ d = tokenizer._list_to_index_dict(lst)
+ self.assertDictEqual({"test": 0, "strings": 1}, d)
+
+ def test_split_token_to_subtokens(self):
+ token = "abc"
+ subtoken_dict = {"a": 0, "b": 1, "c": 2, "ab": 3}
+ max_subtoken_length = 2
+
+ subtokens = tokenizer._split_token_to_subtokens(
+ token, subtoken_dict, max_subtoken_length)
+ self.assertEqual(["ab", "c"], subtokens)
+
+ def test_generate_alphabet_dict(self):
+ s = ["testing", "123"]
+ reserved_tokens = ["???"]
+
+ alphabet = tokenizer._generate_alphabet_dict(s, reserved_tokens)
+ self.assertIn("?", alphabet)
+ self.assertIn("t", alphabet)
+ self.assertIn("e", alphabet)
+ self.assertIn("s", alphabet)
+ self.assertIn("i", alphabet)
+ self.assertIn("n", alphabet)
+ self.assertIn("g", alphabet)
+ self.assertIn("1", alphabet)
+ self.assertIn("2", alphabet)
+ self.assertIn("3", alphabet)
+
+ def test_count_and_gen_subtokens(self):
+ token_counts = {"abc": 5}
+ alphabet = set("abc_")
+ subtoken_dict = {"a": 0, "b": 1, "c": 2, "_": 3}
+ max_subtoken_length = 2
+
+ subtoken_counts = tokenizer._count_and_gen_subtokens(
+ token_counts, alphabet, subtoken_dict, max_subtoken_length)
+
+ self.assertIsInstance(subtoken_counts, collections.defaultdict)
+ self.assertDictEqual(
+ {"a": 5, "b": 5, "c": 5, "_": 5, "ab": 5, "bc": 5, "c_": 5,
+ "abc": 5, "bc_": 5, "abc_": 5}, subtoken_counts)
+
+ def test_filter_and_bucket_subtokens(self):
+ subtoken_counts = collections.defaultdict(
+ int, {"a": 2, "b": 4, "c": 1, "ab": 6, "ac": 3, "abbc": 5})
+ min_count = 3
+
+ subtoken_buckets = tokenizer._filter_and_bucket_subtokens(
+ subtoken_counts, min_count)
+
+ self.assertEqual(len(subtoken_buckets[0]), 0)
+ self.assertEqual(set("b"), subtoken_buckets[1])
+ self.assertEqual(set(["ab", "ac"]), subtoken_buckets[2])
+ self.assertEqual(len(subtoken_buckets[3]), 0)
+ self.assertEqual(set(["abbc"]), subtoken_buckets[4])
+
+ def test_gen_new_subtoken_list(self):
+ subtoken_counts = collections.defaultdict(
+ int, {"translate": 10, "t": 40, "tr": 16, "tra": 12})
+ min_count = 5
+ alphabet = set("translate")
+ reserved_tokens = ["reserved", "tokens"]
+
+ subtoken_list, max_token_length = tokenizer._gen_new_subtoken_list(
+ subtoken_counts, min_count, alphabet, reserved_tokens)
+
+ # Check that "tra" isn"t in the list (its count should be decremented to 2,
+ # so it should not be added to the canddiate list).
+ self.assertNotIn("tra", subtoken_list)
+
+ self.assertIn("tr", subtoken_list)
+ self.assertIn("t", subtoken_list)
+
+ self.assertEqual(len("translate"), max_token_length)
+
+ def test_generate_subtokens(self):
+ token_counts = {"ab": 1, "bc": 3, "abc": 5}
+ alphabet = set("abc_")
+ min_count = 100
+ num_iterations = 1
+ reserved_tokens = ["reserved", "tokens"]
+
+ vocab_list = tokenizer._generate_subtokens(
+ token_counts, alphabet, min_count, num_iterations, reserved_tokens)
+
+ # Check that reserved tokens are at the front of the list
+ self.assertEqual(vocab_list[:2], reserved_tokens)
+
+ # Check that each character in alphabet is in the vocab list
+ for c in alphabet:
+ self.assertIn(c, vocab_list)
+
+
+if __name__ == "__main__":
+ tf.test.main()
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md
new file mode 100644
index 00000000000..b7b90b6f8ec
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md
@@ -0,0 +1,133 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs.
+
+# Prerequisite
+
+
+## 1. Environment
+Recommend python 3.9 or higher version.
+
+### Install Intel® Neural Compressor
+```shell
+pip install neural-compressor
+```
+
+### Install Intel Tensorflow
+```shell
+pip install tensorflow
+```
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### Installation Dependency packages
+```shell
+cd examples/3.x_api/tensorflow/object_detection
+pip install -r requirements.txt
+cd faster_rcnn_resnet50/quantization/ptq
+```
+
+### Install Protocol Buffer Compiler
+
+`Protocol Buffer Compiler` in version higher than 3.0.0 is necessary ingredient for automatic COCO dataset preparation. To install please follow
+[Protobuf installation instructions](https://grpc.io/docs/protoc-installation/#install-using-a-package-manager).
+
+### Install Intel Extension for Tensorflow
+
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare Model
+
+```shell
+wget https://storage.googleapis.com/intel-optimized-tensorflow/models/faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz
+tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz
+```
+
+## 3. Prepare Dataset
+
+### Automatic dataset download
+
+> **_Note: `prepare_dataset.sh` script works with TF version 1.x._**
+
+Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`.
+
+Usage:
+```shell
+cd examples/3.x_api/tensorflow/object_detection
+. prepare_dataset.sh
+cd faster_rcnn_resnet50/quantization/ptq
+```
+
+This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to
+tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script.
+
+### Manual dataset download
+Download CoCo Dataset from [Official Website](https://cocodataset.org/#download).
+
+
+# Run
+
+## 1. Quantization
+
+ ```shell
+ bash run_quant.sh --input_model=./faster_rcnn_resnet50_fp32_coco_pretrained_model/frozen_inference_graph.pb --output_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record
+ ```
+
+## 2. Benchmark
+ ```shell
+ # run performance benchmark
+ bash run_benchmark.sh --input_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance
+
+ # run accuracy benchmark
+ bash run_benchmark.sh --input_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=accuracy
+ ```
+
+Details of enabling Intel® Neural Compressor on faster_rcnn_resnet50 for Tensorflow.
+=========================
+
+This is a tutorial of how to enable faster_rcnn_resnet50 model with Intel® Neural Compressor.
+## User Code Analysis
+User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself.
+
+For faster_rcnn_resnet50, we applied the latter one because our philosophy is to enable the model with minimal changes. Hence we need to make two changes on the original code. The first one is to implement the q_dataloader and make necessary changes to *eval_func*.
+
+### Code update
+
+After prepare step is done, we just need update main.py like below.
+```python
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+ quant_config = StaticQuantConfig(weight_granularity="per_channel")
+ model = Model(args.input_graph)
+ model.input_tensor_names = ['image_tensor']
+ model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"]
+ q_model = quantize_model(model, quant_config, calib_dataloader)
+ q_model.save(args.output_model)
+
+ if args.benchmark:
+ if args.mode == 'performance':
+ evaluate(args.input_graph)
+ else:
+ accuracy = evaluate(args.input_graph)
+ print('Batch size = %d' % args.batch_size)
+ print("Accuracy: %.5f" % accuracy)
+```
+
+The quantization.fit() function will return a best quantized model during timeout constrain.
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py
new file mode 100644
index 00000000000..2f9369798df
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py
@@ -0,0 +1,694 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Wrappers for third party pycocotools to be used within object_detection.
+
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+
+
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+
+ groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+ image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+ max_num_classes, output_path=None)
+ detections_list = coco_tools.ExportDetectionsToCOCO(
+ image_ids, detection_boxes_list, detection_scores_list,
+ detection_classes_list, output_path=None)
+ groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+ detections = groundtruth.LoadAnnotations(detections_list)
+ evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+ agnostic_mode=False)
+ metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+from collections import OrderedDict
+from typing import Any, Dict, List, Set, Union
+
+import numpy as np
+from pycocotools import coco, cocoeval, mask
+
+from neural_compressor.utils import logger
+
+
+class COCOWrapper(coco.COCO):
+ """Wrapper for the pycocotools COCO class.
+
+ Attributes:
+ dataset: a dictionary holding bounding box annotations in the COCO format.
+ detection_type: type of detections being wrapped. Can be one of ['bbox',
+ 'segmentation']
+ """
+
+ def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"):
+ """Construct a COCOWrapper.
+
+ See http://mscoco.org/dataset/#format for a description of the format.
+ By default, the coco.COCO class constructor reads from a JSON file.
+ This function duplicates the same behavior but loads from a dictionary,
+ allowing us to perform evaluation without writing to external storage.
+
+ Args:
+ dataset: a dictionary holding bounding box annotations in the COCO format.
+ detection_type: type of detections being wrapped. Can be one of ['bbox',
+ 'segmentation']
+
+ Raises:
+ ValueError: if detection_type is unsupported.
+ """
+ supported_detection_types = ["bbox", "segmentation"]
+ if detection_type not in supported_detection_types:
+ raise ValueError(
+ "Unsupported detection type: {}. "
+ "Supported values are: {}".format(detection_type, supported_detection_types)
+ )
+ self._detection_type = detection_type
+ coco.COCO.__init__(self)
+ self.dataset = dataset
+ self.createIndex()
+
+ def LoadAnnotations(self, annotations: list) -> coco.COCO:
+ """Load annotations dictionary into COCO datastructure.
+
+ See http://mscoco.org/dataset/#format for a description of the annotations
+ format. As above, this function replicates the default behavior of the API
+ but does not require writing to external storage.
+
+ Args:
+ annotations: python list holding object detection results where each
+ detection is encoded as a dict with required keys ['image_id',
+ 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+ `detection_type`.
+
+ Returns:
+ a coco.COCO datastructure holding object detection annotations results
+
+ Raises:
+ ValueError: if (1) annotations is not a list or annotations do not
+ correspond to the images contained in self.
+ """
+ results = coco.COCO()
+ results.dataset["images"] = [img for img in self.dataset["images"]]
+
+ logger.info("Load and prepare annotation results.")
+ tic = time.time()
+
+ if not isinstance(annotations, list):
+ raise ValueError("annotations is not a list of objects")
+ annotation_img_ids = [ann["image_id"] for ann in annotations]
+ if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())):
+ raise ValueError("Results do not correspond to current coco set")
+ results.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+ if self._detection_type == "bbox":
+ for idx, ann in enumerate(annotations):
+ bb = ann["bbox"]
+ ann["area"] = bb[2] * bb[3]
+ ann["id"] = idx + 1
+ ann["iscrowd"] = 0
+ elif self._detection_type == "segmentation":
+ for idx, ann in enumerate(annotations):
+ ann["area"] = mask.area(ann["segmentation"])
+ ann["bbox"] = mask.toBbox(ann["segmentation"])
+ ann["id"] = idx + 1
+ ann["iscrowd"] = 0
+ logger.info("DONE (t=%0.2fs)", (time.time() - tic))
+
+ results.dataset["annotations"] = annotations
+ results.createIndex()
+ return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+ """Wrapper for the pycocotools COCOeval class.
+
+ To evaluate, create two objects (groundtruth_dict and detections_list)
+ using the conventions listed at http://mscoco.org/dataset/#format.
+ Then call evaluation as follows:
+
+ groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+ detections = groundtruth.LoadAnnotations(detections_list)
+ evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+ agnostic_mode=False)
+ metrics = evaluator.ComputeMetrics()
+ """
+
+ def __init__(
+ self,
+ groundtruth: coco.COCO = None,
+ detections: coco.COCO = None,
+ agnostic_mode=False,
+ iou_type: str = "bbox",
+ iou_thrs: Union[str, float] = None,
+ map_points=None,
+ ):
+ """Construct a COCOEvalWrapper.
+
+ Note that for the area-based metrics to be meaningful, detection and
+ groundtruth boxes must be in image coordinates measured in pixels.
+
+ Args:
+ groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+ groundtruth annotations
+ detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+ detections
+ agnostic_mode: boolean (default: False). If True, evaluation ignores
+ class labels, treating all detections as proposals.
+ iou_thrs: Minimal value for intersection over union that allows to
+ make decision that prediction bounding box is true positive.
+ You can specify one float value between 0 to 1 or
+ string "05:0.05:0.95" for standard COCO thresholds.
+ iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+ map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+ 11-point interpolated AP, 0 for area under PR curve.
+ """
+ cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type)
+ if agnostic_mode:
+ self.params.useCats = 0
+ if iou_thrs == "0.5:0.05:0.95":
+ self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
+ elif isinstance(iou_thrs, float):
+ self.params.iouThrs = [iou_thrs]
+
+ if map_points == 101:
+ self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
+ if map_points == 11:
+ self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True)
+ if map_points == 0:
+ self.params.recThrs = [-1]
+
+ def GetCategory(self, category_id: int) -> dict:
+ """Fetch dictionary holding category information given category id.
+
+ Args:
+ category_id: integer id
+
+ Returns:
+ dictionary holding 'id', 'name'.
+ """
+ return self.cocoGt.cats[category_id]
+
+ def GetAgnosticMode(self) -> bool:
+ """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+ return self.params.useCats == 0
+
+ def GetCategoryIdList(self) -> List[int]:
+ """Return the list of IDs of all valid categories."""
+ return self.params.catIds
+
+ def accumulate(self, p: cocoeval.Params = None):
+ """Accumulate evaluation results per image and store it to self.eval.
+
+ Args:
+ p: input params for evaluation
+ """
+ print("Accumulating evaluation results...")
+ tic = time.time()
+ if not self.evalImgs:
+ print("Please run evaluate() first")
+ # allows input customized parameters
+ if p is None:
+ p = self.params
+ p.catIds = p.catIds if p.useCats == 1 else [-1]
+ T = len(p.iouThrs)
+ R = len(p.recThrs)
+ K = len(p.catIds) if p.useCats else 1
+ A = len(p.areaRng)
+ M = len(p.maxDets)
+ precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories
+ recall = -np.ones((T, K, A, M))
+ scores = -np.ones((T, R, K, A, M))
+
+ # create dictionary for future indexing
+ _pe = self._paramsEval
+ print("-pe", _pe)
+ catIds = _pe.catIds if _pe.useCats else [-1]
+ setK = set(catIds)
+ setA = set(map(tuple, _pe.areaRng))
+ setM = set(_pe.maxDets)
+ setI = set(_pe.imgIds)
+ # get inds to evaluate
+ k_list = [n for n, k in enumerate(p.catIds) if k in setK]
+ m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+ a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+ i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
+ I0 = len(_pe.imgIds)
+ A0 = len(_pe.areaRng)
+ # retrieve E at each category, area range, and max number of detections
+ for k, k0 in enumerate(k_list):
+ Nk = k0 * A0 * I0
+ for a, a0 in enumerate(a_list):
+ Na = a0 * I0
+ for m, maxDet in enumerate(m_list):
+ E = [self.evalImgs[Nk + Na + i] for i in i_list]
+ E = [e for e in E if e is not None]
+ if len(E) == 0:
+ continue
+ dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E])
+
+ # different sorting method generates slightly different results.
+ # mergesort is used to be consistent as Matlab implementation.
+ inds = np.argsort(-dtScores, kind="mergesort")
+ dtScoresSorted = dtScores[inds]
+
+ dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds]
+ dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds]
+ gtIg = np.concatenate([e["gtIgnore"] for e in E])
+ npig = np.count_nonzero(gtIg == 0)
+ if npig == 0:
+ continue
+ tps = np.logical_and(dtm, np.logical_not(dtIg))
+ fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))
+
+ tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+ fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+ for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+ tp = np.array(tp)
+ fp = np.array(fp)
+ nd = len(tp)
+ rc = tp / npig
+ pr = tp / (fp + tp + np.spacing(1))
+
+ # calculate precision
+ if R == 1:
+ rc = np.concatenate(([0.0], rc, [1.0]))
+ pr = np.concatenate(([0.0], pr, [0.0]))
+
+ # compute the precision envelope
+ for i in range(pr.size - 1, 0, -1):
+ pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+ # to calculate area under PR curve, look for points
+ # where X axis (recall) changes value
+ change_point = np.where(rc[1:] != rc[:-1])[0]
+ # and sum (\Delta recall) * recall
+ res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1])
+ precision[t, :, k, a, m] = np.array([res])
+ else:
+ q = np.zeros((R,))
+
+ # numpy is slow without cython optimization for accessing elements
+ # use python array gets significant speed improvement
+ pr = pr.tolist()
+ q = q.tolist()
+
+ for i in range(nd - 1, 0, -1):
+ if pr[i] > pr[i - 1]:
+ pr[i - 1] = pr[i]
+
+ inds = np.searchsorted(rc, p.recThrs, side="left")
+ try:
+ for ri, pi in enumerate(inds):
+ q[ri] = pr[pi]
+ except:
+ pass
+ precision[t, :, k, a, m] = np.array(q)
+
+ # calculate recall
+ if nd:
+ recall[t, k, a, m] = rc[-1]
+ else:
+ recall[t, k, a, m] = 0
+
+ # calculate score
+ ss = np.zeros((R,))
+ inds = np.searchsorted(rc, p.recThrs, side="left")
+ try:
+ for ri, pi in enumerate(inds):
+ ss[ri] = dtScoresSorted[pi]
+ except:
+ pass
+ scores[t, :, k, a, m] = np.array(ss)
+ # exit(0)
+ self.eval = {
+ "params": p,
+ "counts": [T, R, K, A, M],
+ "precision": precision,
+ "recall": recall,
+ "scores": scores,
+ }
+ toc = time.time()
+ print("DONE (t={:0.2f}s).".format(toc - tic))
+
+ def ComputeMetrics(
+ self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False
+ ): # pragma: no cover
+ """Compute detection metrics.
+
+ Args:
+ include_metrics_per_category: Whether include metrics per category.
+ all_metrics_per_category: Whether include all the summery metrics for
+ each category in per_category_ap. Be careful with setting it to true if
+ you have more than handful of categories, because it will pollute
+ your mldash.
+
+ Returns:
+ A tuple of (summary_metrics, per_category_ap), in which
+ (1) summary_metrics is a dictionary holding:
+ 'Precision/mAP': mean average precision over classes averaged over IOU
+ thresholds ranging from .5 to .95 with .05 increments;
+ 'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+ 'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+ 'Precision/mAP (small)': mean average precision for small objects
+ (area < 32^2 pixels);
+ 'Precision/mAP (medium)': mean average precision for medium sized
+ objects (32^2 pixels < area < 96^2 pixels);
+ 'Precision/mAP (large)': mean average precision for large objects
+ (96^2 pixels < area < 10000^2 pixels);
+ 'Recall/AR@1': average recall with 1 detection;
+ 'Recall/AR@10': average recall with 10 detections;
+ 'Recall/AR@100': average recall with 100 detections;
+ 'Recall/AR@100 (small)': average recall for small objects with 100
+ detections;
+ 'Recall/AR@100 (medium)': average recall for medium objects with 100
+ detections;
+ 'Recall/AR@100 (large)': average recall for large objects with 100
+ detections;
+ and (2) per_category_ap is a dictionary holding category specific results with
+ keys of the form: 'Precision mAP ByCategory/category'
+ (without the supercategory part if no supercategories exist).
+
+ For backward compatibility 'PerformanceByCategory' is included in the
+ output regardless of all_metrics_per_category. If evaluating class-agnostic
+ mode, per_category_ap is an empty dictionary.
+
+ Raises:
+ ValueError: If category_stats does not exist.
+ """
+ self.evaluate()
+ self.accumulate()
+ self.summarize()
+
+ summary_metrics = OrderedDict(
+ [
+ ("Precision/mAP", self.stats[0]),
+ ("Precision/mAP@.50IOU", self.stats[1]),
+ ("Precision/mAP@.75IOU", self.stats[2]),
+ ("Precision/mAP (small)", self.stats[3]),
+ ("Precision/mAP (medium)", self.stats[4]),
+ ("Precision/mAP (large)", self.stats[5]),
+ ("Recall/AR@1", self.stats[6]),
+ ("Recall/AR@10", self.stats[7]),
+ ("Recall/AR@100", self.stats[8]),
+ ("Recall/AR@100 (small)", self.stats[9]),
+ ("Recall/AR@100 (medium)", self.stats[10]),
+ ("Recall/AR@100 (large)", self.stats[11]),
+ ]
+ )
+ if not include_metrics_per_category:
+ return summary_metrics, {}
+ if not hasattr(self, "category_stats"):
+ raise ValueError("Category stats do not exist")
+ per_category_ap = OrderedDict([])
+ if self.GetAgnosticMode():
+ return summary_metrics, per_category_ap
+ for category_index, category_id in enumerate(self.GetCategoryIdList()):
+ category = self.GetCategory(category_id)["name"]
+ # Kept for backward compatilbility
+ # pylint: disable=no-member
+ per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index]
+ if all_metrics_per_category:
+ per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index]
+ per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][
+ category_index
+ ]
+ per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][
+ category_index
+ ]
+ per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][
+ category_index
+ ]
+ per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][
+ category_index
+ ]
+ per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][
+ category_index
+ ]
+ per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index]
+ per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index]
+ per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index]
+ per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][
+ category_index
+ ]
+ per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][
+ category_index
+ ]
+ per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][
+ category_index
+ ]
+
+ return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+ """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+
+ This is a utility function for converting from our internal
+ [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+ i.e., [xmin, ymin, width, height].
+
+ Args:
+ box: a numpy array in format of [ymin, xmin, ymax, xmax]
+
+ Returns:
+ A list of floats, in COCO format, representing [xmin, ymin, width, height]
+ """
+ return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])]
+
+
+def _RleCompress(masks):
+ """Compresses mask using Run-length encoding provided by pycocotools.
+
+ Args:
+ masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+ {0, 1}.
+
+ Returns:
+ A pycocotools Run-length encoding of the mask.
+ """
+ return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(
+ image_id: Union[int, str],
+ next_annotation_id: int,
+ category_id_set: Set[str],
+ groundtruth_boxes: np.array,
+ groundtruth_classes: np.array,
+ groundtruth_masks: Union[np.array, None] = None,
+ groundtruth_is_crowd: Union[np.array, None] = None,
+) -> list:
+ """Export groundtruth of a single image to COCO format.
+
+ This function converts groundtruth detection annotations represented as numpy
+ arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+ that the image_ids provided here must match the ones given to
+ ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+ correspondence - that is: groundtruth_boxes[i, :], and
+ groundtruth_classes[i] are associated with the same groundtruth annotation.
+
+ In the exported result, "area" fields are always set to the area of the
+ groundtruth bounding box.
+
+ Args:
+ image_id: a unique image identifier either of type integer or string.
+ next_annotation_id: integer specifying the first id to use for the
+ groundtruth annotations. All annotations are assigned a continuous integer
+ id starting from this value.
+ category_id_set: A set of valid class ids. Groundtruth with classes not in
+ category_id_set are dropped.
+ groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+ groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+ groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+ image_height, image_width] containing detection_masks.
+ groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+ indicating whether groundtruth boxes are crowd.
+
+ Returns:
+ A list of groundtruth annotations for a single image in the COCO format.
+
+ Raises:
+ ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+ right lengths or (2) if each of the elements inside these lists do not
+ have the correct shapes or (3) if image_ids are not integers
+ """
+ if len(groundtruth_classes.shape) != 1:
+ raise ValueError("groundtruth_classes is " "expected to be of rank 1.")
+ if len(groundtruth_boxes.shape) != 2:
+ raise ValueError("groundtruth_boxes is expected to be of " "rank 2.")
+ if groundtruth_boxes.shape[1] != 4:
+ raise ValueError("groundtruth_boxes should have " "shape[1] == 4.")
+ num_boxes = groundtruth_classes.shape[0]
+ if num_boxes != groundtruth_boxes.shape[0]:
+ raise ValueError(
+ "Corresponding entries in groundtruth_classes, "
+ "and groundtruth_boxes should have "
+ "compatible shapes (i.e., agree on the 0th dimension)."
+ "Classes shape: %d. Boxes shape: %d. Image ID: %s"
+ % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id)
+ )
+ has_is_crowd = groundtruth_is_crowd is not None
+ if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+ raise ValueError("groundtruth_is_crowd is expected to be of rank 1.")
+ groundtruth_list = []
+ for i in range(num_boxes):
+ if groundtruth_classes[i] in category_id_set:
+ iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+ export_dict = {
+ "id": next_annotation_id + i,
+ "image_id": image_id,
+ "category_id": int(groundtruth_classes[i]),
+ "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+ "area": float(
+ (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0])
+ * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])
+ ),
+ "iscrowd": iscrowd,
+ }
+ if groundtruth_masks is not None:
+ export_dict["segmentation"] = _RleCompress(groundtruth_masks[i])
+ groundtruth_list.append(export_dict)
+ return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(
+ image_id: Union[int, str],
+ category_id_set: Set[int],
+ detection_boxes: np.array,
+ detection_scores: np.array,
+ detection_classes: np.array,
+) -> list:
+ """Export detections of a single image to COCO format.
+
+ This function converts detections represented as numpy arrays to dictionaries
+ that can be ingested by the COCO evaluation API. Note that the image_ids
+ provided here must match the ones given to the
+ ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+ correspondence - that is: boxes[i, :], and classes[i]
+ are associated with the same groundtruth annotation.
+
+ Args:
+ image_id: unique image identifier either of type integer or string.
+ category_id_set: A set of valid class ids. Detections with classes not in
+ category_id_set are dropped.
+ detection_boxes: float numpy array of shape [num_detections, 4] containing
+ detection boxes.
+ detection_scores: float numpy array of shape [num_detections] containing
+ scored for the detection boxes.
+ detection_classes: integer numpy array of shape [num_detections] containing
+ the classes for detection boxes.
+
+ Returns:
+ A list of detection annotations for a single image in the COCO format.
+
+ Raises:
+ ValueError: if (1) detection_boxes, detection_scores and detection_classes
+ do not have the right lengths or (2) if each of the elements inside these
+ lists do not have the correct shapes or (3) if image_ids are not integers.
+ """
+ if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+ raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.")
+ if len(detection_boxes.shape) != 2:
+ raise ValueError("All entries in detection_boxes expected to be of " "rank 2.")
+ if detection_boxes.shape[1] != 4:
+ raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.")
+ num_boxes = detection_classes.shape[0]
+ if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+ raise ValueError(
+ "Corresponding entries in detection_classes, "
+ "detection_scores and detection_boxes should have "
+ "compatible shapes (i.e., agree on the 0th dimension). "
+ "Classes shape: %d. Boxes shape: %d. "
+ "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0])
+ )
+ detections_list = []
+ for i in range(num_boxes):
+ if detection_classes[i] in category_id_set:
+ detections_list.append(
+ {
+ "image_id": image_id,
+ "category_id": int(detection_classes[i]),
+ "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+ "score": float(detection_scores[i]),
+ }
+ )
+ return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(
+ image_id: Union[str, int],
+ category_id_set: Set[int],
+ detection_masks: np.array,
+ detection_scores: np.array,
+ detection_classes: np.array,
+) -> list:
+ """Export detection masks of a single image to COCO format.
+
+ This function converts detections represented as numpy arrays to dictionaries
+ that can be ingested by the COCO evaluation API. We assume that
+ detection_masks, detection_scores, and detection_classes are in correspondence
+ - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+ are associated with the same annotation.
+
+ Args:
+ image_id: unique image identifier either of type integer or string.
+ category_id_set: A set of valid class ids. Detections with classes not in
+ category_id_set are dropped.
+ detection_masks: uint8 numpy array of shape [num_detections, image_height,
+ image_width] containing detection_masks.
+ detection_scores: float numpy array of shape [num_detections] containing
+ scores for detection masks.
+ detection_classes: integer numpy array of shape [num_detections] containing
+ the classes for detection masks.
+
+ Returns:
+ A list of detection mask annotations for a single image in the COCO format.
+
+ Raises:
+ ValueError: if (1) detection_masks, detection_scores and detection_classes
+ do not have the right lengths or (2) if each of the elements inside these
+ lists do not have the correct shapes or (3) if image_ids are not integers.
+ """
+ if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+ raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.")
+ num_boxes = detection_classes.shape[0]
+ if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+ raise ValueError(
+ "Corresponding entries in detection_classes, "
+ "detection_scores and detection_masks should have "
+ "compatible lengths and shapes "
+ "Classes length: %d. Masks length: %d. "
+ "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0])
+ )
+ detections_list = []
+ for i in range(num_boxes):
+ if detection_classes[i] in category_id_set:
+ detections_list.append(
+ {
+ "image_id": image_id,
+ "category_id": int(detection_classes[i]),
+ "segmentation": _RleCompress(detection_masks[i]),
+ "score": float(detection_scores[i]),
+ }
+ )
+ return detections_list
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..32e55adb3fd
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py
@@ -0,0 +1,655 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import cv2
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+interpolation_map = {
+ "nearest": cv2.INTER_NEAREST,
+ "bilinear": cv2.INTER_LINEAR,
+ "bicubic": cv2.INTER_CUBIC,
+}
+
+category_map = {
+ 1: "person",
+ 2: "bicycle",
+ 3: "car",
+ 4: "motorcycle",
+ 5: "airplane",
+ 6: "bus",
+ 7: "train",
+ 8: "truck",
+ 9: "boat",
+ 10: "traffic light",
+ 11: "fire hydrant",
+ 13: "stop sign",
+ 14: "parking meter",
+ 15: "bench",
+ 16: "bird",
+ 17: "cat",
+ 18: "dog",
+ 19: "horse",
+ 20: "sheep",
+ 21: "cow",
+ 22: "elephant",
+ 23: "bear",
+ 24: "zebra",
+ 25: "giraffe",
+ 27: "backpack",
+ 28: "umbrella",
+ 31: "handbag",
+ 32: "tie",
+ 33: "suitcase",
+ 34: "frisbee",
+ 35: "skis",
+ 36: "snowboard",
+ 37: "sports ball",
+ 38: "kite",
+ 39: "baseball bat",
+ 40: "baseball glove",
+ 41: "skateboard",
+ 42: "surfboard",
+ 43: "tennis racket",
+ 44: "bottle",
+ 46: "wine glass",
+ 47: "cup",
+ 48: "fork",
+ 49: "knife",
+ 50: "spoon",
+ 51: "bowl",
+ 52: "banana",
+ 53: "apple",
+ 54: "sandwich",
+ 55: "orange",
+ 56: "broccoli",
+ 57: "carrot",
+ 58: "hot dog",
+ 59: "pizza",
+ 60: "donut",
+ 61: "cake",
+ 62: "chair",
+ 63: "couch",
+ 64: "potted plant",
+ 65: "bed",
+ 67: "dining table",
+ 70: "toilet",
+ 72: "tv",
+ 73: "laptop",
+ 74: "mouse",
+ 75: "remote",
+ 76: "keyboard",
+ 77: "cell phone",
+ 78: "microwave",
+ 79: "oven",
+ 80: "toaster",
+ 81: "sink",
+ 82: "refrigerator",
+ 84: "book",
+ 85: "clock",
+ 86: "vase",
+ 87: "scissors",
+ 88: "teddy bear",
+ 89: "hair drier",
+ 90: "toothbrush",
+}
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+
+class ResizeTFTransform(object):
+ """Resize the input image to the given size.
+
+ Args:
+ size (list or int): Size of the result
+ interpolation (str, default='bilinear'):Desired interpolation type,
+ support 'bilinear', 'nearest', 'bicubic'
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, size, interpolation="bilinear"):
+ """Initialize `ResizeTFTransform` class."""
+ if isinstance(size, int):
+ self.size = size, size
+ elif isinstance(size, list):
+ if len(size) == 1:
+ self.size = size[0], size[0]
+ elif len(size) == 2:
+ self.size = size[0], size[1]
+ self.interpolation = interpolation
+
+ if self.interpolation not in ["bilinear", "nearest", "bicubic"]:
+ raise ValueError("Unsupported interpolation type!")
+
+ def __call__(self, sample):
+ """Resize the input image in sample to the given size."""
+ image, label = sample
+ if isinstance(image, tf.Tensor):
+ image = tf.image.resize(image, self.size, method=self.interpolation)
+ else:
+ image = cv2.resize(image, self.size, interpolation=interpolation_map[self.interpolation])
+ return (image, label)
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class COCOmAPv2(BaseMetric):
+ """Compute mean average precision of the detection task."""
+
+ def __init__(
+ self,
+ anno_path=None,
+ iou_thrs="0.5:0.05:0.95",
+ map_points=101,
+ map_key="DetectionBoxes_Precision/mAP",
+ output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2},
+ ):
+ """Initialize the metric.
+
+ Args:
+ anno_path: The path of annotation file.
+ iou_thrs: Minimal value for intersection over union that allows to make decision
+ that prediction bounding box is true positive. You can specify one float value
+ between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
+ map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+ 11-point interpolated AP, 0 for area under PR curve.
+ map_key: The key that mapping to pycocotools COCOeval.
+ Defaults to 'DetectionBoxes_Precision/mAP'.
+ output_index_mapping: The output index mapping.
+ Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
+ """
+ self.output_index_mapping = output_index_mapping
+
+ if anno_path:
+ import os
+ import yaml
+
+ assert os.path.exists(anno_path), "Annotation path does not exists!"
+ with open(anno_path, "r") as f:
+ label_map = yaml.safe_load(f.read())
+ self.category_map_reverse = {k: v for k, v in label_map.items()}
+ else:
+ # label: index
+ self.category_map_reverse = {v: k for k, v in category_map.items()}
+ self.image_ids = []
+ self.ground_truth_list = []
+ self.detection_list = []
+ self.annotation_id = 1
+ self.category_map = category_map
+ self.category_id_set = set([cat for cat in self.category_map]) # index
+ self.iou_thrs = iou_thrs
+ self.map_points = map_points
+ self.map_key = map_key
+
+ def update(self, predicts, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ predicts: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight. Defaults to None.
+ """
+ from coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco
+
+ detections = []
+ if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1:
+ for item in zip(*predicts):
+ detection = {}
+ num = int(item[self.output_index_mapping["num_detections"]])
+ detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num]
+ detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num]
+ detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num]
+ detections.append(detection)
+ else:
+ for item in zip(*predicts):
+ detection = {}
+ detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])
+ detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])
+ detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])
+ detections.append(detection)
+
+ bboxes, str_labels, int_labels, image_ids = labels
+ labels = []
+ if len(int_labels[0]) == 0:
+ for str_label in str_labels:
+ str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label]
+ labels.append([self.category_map_reverse[x] for x in str_label])
+ elif len(str_labels[0]) == 0:
+ for int_label in int_labels:
+ labels.append([x for x in int_label])
+
+ for idx, image_id in enumerate(image_ids):
+ image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8")
+ if image_id in self.image_ids:
+ continue
+ self.image_ids.append(image_id)
+
+ ground_truth = {}
+ ground_truth["boxes"] = np.asarray(bboxes[idx])
+ ground_truth["classes"] = np.asarray(labels[idx])
+
+ self.ground_truth_list.extend(
+ ExportSingleImageGroundtruthToCoco(
+ image_id=image_id,
+ next_annotation_id=self.annotation_id,
+ category_id_set=self.category_id_set,
+ groundtruth_boxes=ground_truth["boxes"],
+ groundtruth_classes=ground_truth["classes"],
+ )
+ )
+ self.annotation_id += ground_truth["boxes"].shape[0]
+
+ self.detection_list.extend(
+ ExportSingleImageDetectionBoxesToCoco(
+ image_id=image_id,
+ category_id_set=self.category_id_set,
+ detection_boxes=detections[idx]["boxes"],
+ detection_scores=detections[idx]["scores"],
+ detection_classes=detections[idx]["classes"],
+ )
+ )
+
+ def reset(self):
+ """Reset the prediction and labels."""
+ self.image_ids = []
+ self.ground_truth_list = []
+ self.detection_list = []
+ self.annotation_id = 1
+
+ def result(self):
+ """Compute mean average precision.
+
+ Returns:
+ The mean average precision score.
+ """
+ from coco_tools import COCOEvalWrapper, COCOWrapper
+
+ if len(self.ground_truth_list) == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ else:
+ groundtruth_dict = {
+ "annotations": self.ground_truth_list,
+ "images": [{"id": image_id} for image_id in self.image_ids],
+ "categories": [{"id": k, "name": v} for k, v in self.category_map.items()],
+ }
+ coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
+ coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list)
+ box_evaluator = COCOEvalWrapper(
+ coco_wrapped_groundtruth,
+ coco_wrapped_detections,
+ agnostic_mode=False,
+ iou_thrs=self.iou_thrs,
+ map_points=self.map_points,
+ )
+ box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
+ include_metrics_per_category=False, all_metrics_per_category=False
+ )
+ box_metrics.update(box_per_category_ap)
+ box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())}
+
+ return box_metrics[self.map_key]
+
+
+class ParseDecodeCoco: # pragma: no cover
+ """Helper function for TensorflowModelZooBertDataset.
+
+ Parse the features from sample.
+ """
+
+ def __call__(self, sample):
+ """Parse the sample data.
+
+ Args:
+ sample: Data to be parsed.
+ """
+ # Dense features in Example proto.
+ feature_map = {
+ "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""),
+ "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string),
+ "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64),
+ "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""),
+ }
+ sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32)
+ # Sparse features in Example proto.
+ feature_map.update(
+ {
+ k: sparse_float32
+ for k in [
+ "image/object/bbox/xmin",
+ "image/object/bbox/ymin",
+ "image/object/bbox/xmax",
+ "image/object/bbox/ymax",
+ ]
+ }
+ )
+
+ features = tf.io.parse_single_example(sample, feature_map)
+
+ xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0)
+ ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0)
+ xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0)
+ ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0)
+
+ bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
+ # Force the variable number of bounding boxes into the shape
+ # [1, num_boxes, coords].
+ bbox = tf.expand_dims(bbox, 0)
+ bbox = tf.transpose(bbox, [0, 2, 1])
+
+ encoded_image = features["image/encoded"]
+ image_tensor = tf.image.decode_image(encoded_image, channels=3)
+ image_tensor.set_shape([None, None, 3])
+
+ str_label = features["image/object/class/text"].values
+ int_label = features["image/object/class/label"].values
+ image_id = features["image/source_id"]
+
+ return image_tensor, (bbox[0], str_label, int_label, image_id)
+
+
+class COCORecordDataset(object):
+ """Tensorflow COCO dataset in tf record format.
+
+ Root is a full path to tfrecord file, which contains the file name.
+ Please use Resize transform when batch_size > 1
+
+ Args: root (str): Root directory of dataset.
+ num_cores (int, default=28):The number of input Datasets to interleave from in parallel.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according
+ to specific conditions.
+ """
+
+ def __new__(cls, root, num_cores=28, transform=None, filter=filter):
+ """Build a new object."""
+ record_iterator = tf.compat.v1.python_io.tf_record_iterator(root)
+ example = tf.train.SequenceExample()
+ for element in record_iterator:
+ example.ParseFromString(element)
+ break
+ feature = example.context.feature
+ if (
+ len(feature["image/object/class/text"].bytes_list.value) == 0
+ and len(feature["image/object/class/label"].int64_list.value) == 0
+ ):
+ raise ValueError(
+ "Tfrecord format is incorrect, please refer\
+ 'https://github.com/tensorflow/models/blob/master/research/\
+ object_detection/dataset_tools/create_coco_tf_record.py' to\
+ create correct tfrecord"
+ )
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ tfrecord_paths = [root]
+ ds = tf.data.TFRecordDataset.list_files(tfrecord_paths)
+ ds = ds.apply(
+ parallel_interleave(
+ tf.data.TFRecordDataset,
+ cycle_length=num_cores,
+ block_length=5,
+ sloppy=True,
+ buffer_output_elements=10000,
+ prefetch_input_elements=10000,
+ )
+ )
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeCoco())
+ else:
+ transform = ParseDecodeCoco()
+ ds = ds.map(transform, num_parallel_calls=None)
+ if filter is not None:
+ ds = ds.filter(filter)
+ ds = ds.prefetch(buffer_size=1000)
+ return ds
+
+
+class TFDataLoader(object):
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py
new file mode 100644
index 00000000000..0ca37671fd6
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py
@@ -0,0 +1,128 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+from __future__ import division
+
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from argparse import ArgumentParser
+from data_process import(
+ COCOmAPv2,
+ COCORecordDataset,
+ ComposeTransform,
+ ResizeTFTransform,
+ TFDataLoader,
+)
+
+arg_parser = ArgumentParser(description='Parse args')
+
+arg_parser.add_argument('-g',
+ "--input-graph",
+ help='Specify the input graph.',
+ dest='input_graph')
+arg_parser.add_argument('--config', type=str, default='')
+arg_parser.add_argument('--dataset_location', type=str, default='')
+arg_parser.add_argument('--output_model', type=str, default='')
+arg_parser.add_argument('--mode', type=str, default='performance')
+arg_parser.add_argument('--batch_size', type=int, default=10)
+arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='iterations')
+arg_parser.add_argument('--tune', action='store_true', default=False)
+arg_parser.add_argument('--benchmark', dest='benchmark',
+ action='store_true', help='run benchmark')
+args = arg_parser.parse_args()
+
+def evaluate(model):
+ """Custom evaluate function to estimate the accuracy of the model.
+
+ Args:
+ model (tf.Graph): The input model graph.
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ from neural_compressor.tensorflow import Model
+ model = Model(model)
+ model.input_tensor_names = ["image_tensor:0"]
+ model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \
+ "detection_scores:0", "detection_classes:0"]
+ input_tensor = model.input_tensor
+ output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+ model.output_tensor[0]
+ warmup = 5
+ iteration = -1
+ if args.benchmark and args.mode == 'performance':
+ iteration = args.iters
+ metric = COCOmAPv2(output_index_mapping={'num_detections':0, 'boxes':1, 'scores':2, 'classes':3})
+
+ def eval_func(dataloader):
+ latency_list = []
+ for idx, (inputs, labels) in enumerate(dataloader):
+ # dataloader should keep the order and len of inputs same with input_tensor
+ inputs = np.array([inputs])
+ feed_dict = dict(zip(input_tensor, inputs))
+
+ start = time.time()
+ predictions = model.sess.run(output_tensor, feed_dict)
+ end = time.time()
+
+ metric.update(predictions, labels)
+ latency_list.append(end-start)
+ if idx + 1 == iteration:
+ break
+ latency = np.array(latency_list[warmup:]).mean() / args.batch_size
+ return latency
+
+ eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \
+ transform=ComposeTransform(transform_list=[ResizeTFTransform(size=600)]))
+ eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=args.batch_size)
+ latency = eval_func(eval_dataloader)
+ if args.benchmark and args.mode == 'performance':
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+def main(_):
+ calib_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \
+ transform=ComposeTransform(transform_list=[ResizeTFTransform(size=600)]))
+ calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=args.batch_size)
+
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+ quant_config = StaticQuantConfig(weight_granularity="per_channel")
+ model = Model(args.input_graph)
+ model.input_tensor_names = ['image_tensor']
+ model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"]
+ q_model = quantize_model(model, quant_config, calib_dataloader)
+ q_model.save(args.output_model)
+
+ if args.benchmark:
+ if args.mode == 'performance':
+ evaluate(args.input_graph)
+ else:
+ accuracy = evaluate(args.input_graph)
+ print('Batch size = %d' % args.batch_size)
+ print("Accuracy: %.5f" % accuracy)
+
+if __name__ == "__main__":
+ tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..6c2115f58ff
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=32
+ iters=100
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input-graph ${input_model} \
+ --mode ${mode} \
+ --dataset_location "${dataset_location}" \
+ --batch_size ${batch_size} \
+ --iters ${iters} \
+ --benchmark
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..559d695f768
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo "$var" |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input-graph "${input_model}" \
+ --output_model "${output_model}" \
+ --dataset_location "${dataset_location}" \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md
new file mode 100644
index 00000000000..9ec8ae2ad78
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md
@@ -0,0 +1,142 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs.
+
+# Prerequisite
+
+
+## 1. Environment
+Recommend python 3.6 or higher version.
+
+### Install Intel® Neural Compressor
+```shell
+pip install neural-compressor
+```
+
+### Install Intel Tensorflow
+```shell
+pip install intel-tensorflow
+```
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### Installation Dependency packages
+```shell
+cd examples/3.x_api/tensorflow/object_detection
+pip install -r requirements.txt
+cd mask_rcnn_inception_v2/quantization/ptq
+```
+
+### Install Protocol Buffer Compiler
+
+`Protocol Buffer Compiler` in version higher than 3.0.0 is necessary ingredient for automatic COCO dataset preparation. To install please follow
+[Protobuf installation instructions](https://grpc.io/docs/protoc-installation/#install-using-a-package-manager).
+
+### Install Intel Extension for Tensorflow
+
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare Model
+
+```shell
+wget http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
+tar -xvzf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
+```
+
+## 3. Prepare Dataset
+
+### Automatic dataset download
+
+> **_Note: `prepare_dataset.sh` script works with TF version 1.x._**
+
+Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`.
+
+Usage:
+```shell
+cd examples/3.x_api/tensorflow/object_detection/
+. prepare_dataset.sh
+cd mask_rcnn_inception_v2/quantization/ptq
+```
+
+This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to
+tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script.
+
+### Manual dataset download
+Download CoCo Dataset from [Official Website](https://cocodataset.org/#download).
+
+
+# Run
+
+Now we support both pb and ckpt formats.
+
+## 1. Quantization
+### For PB format
+
+ ```shell
+ bash run_quant.sh --input_model=./mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb --output_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record
+ ```
+
+### For ckpt format
+
+ ```shell
+ bash run_quant.sh --input_model=./mask_rcnn_inception_v2_coco_2018_01_28/ --output_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record
+ ```
+
+## 2. Benchmark
+ ```shell
+ # run performance benchmark
+ bash run_benchmark.sh --input_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance
+
+ # run accuracy benchmark
+ bash run_benchmark.sh --input_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=accuracy
+ ```
+
+Details of enabling Intel® Neural Compressor on mask_rcnn_inception_v2 for Tensorflow.
+=========================
+
+This is a tutorial of how to enable mask_rcnn_inception_v2 model with Intel® Neural Compressor.
+## User Code Analysis
+User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself.
+
+For mask_rcnn_inception_v2, we applied the latter one because our philosophy is to enable the model with minimal changes. Hence we need to make two changes on the original code. The first one is to implement the q_dataloader and make necessary changes to *eval_func*.
+
+### Code update
+
+After prepare step is done, we just need update main.py like below.
+```python
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+ quant_config = StaticQuantConfig(weight_granularity="per_channel")
+ model = Model(args.input_graph)
+ model.input_tensor_names = ['image_tensor']
+ model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"]
+ q_model = quantize_model(model, quant_config, calib_dataloader)
+ q_model.save(args.output_model)
+
+ if args.benchmark:
+ if args.mode == 'performance':
+ evaluate(args.input_graph)
+ else:
+ accuracy = evaluate(args.input_graph)
+ print('Batch size = %d' % args.batch_size)
+ print("Accuracy: %.5f" % accuracy)
+```
+
+The quantization.fit() function will return a best quantized model during timeout constrain.
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py
new file mode 100644
index 00000000000..2f9369798df
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py
@@ -0,0 +1,694 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Wrappers for third party pycocotools to be used within object_detection.
+
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+
+
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+
+ groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+ image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+ max_num_classes, output_path=None)
+ detections_list = coco_tools.ExportDetectionsToCOCO(
+ image_ids, detection_boxes_list, detection_scores_list,
+ detection_classes_list, output_path=None)
+ groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+ detections = groundtruth.LoadAnnotations(detections_list)
+ evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+ agnostic_mode=False)
+ metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+from collections import OrderedDict
+from typing import Any, Dict, List, Set, Union
+
+import numpy as np
+from pycocotools import coco, cocoeval, mask
+
+from neural_compressor.utils import logger
+
+
+class COCOWrapper(coco.COCO):
+ """Wrapper for the pycocotools COCO class.
+
+ Attributes:
+ dataset: a dictionary holding bounding box annotations in the COCO format.
+ detection_type: type of detections being wrapped. Can be one of ['bbox',
+ 'segmentation']
+ """
+
+ def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"):
+ """Construct a COCOWrapper.
+
+ See http://mscoco.org/dataset/#format for a description of the format.
+ By default, the coco.COCO class constructor reads from a JSON file.
+ This function duplicates the same behavior but loads from a dictionary,
+ allowing us to perform evaluation without writing to external storage.
+
+ Args:
+ dataset: a dictionary holding bounding box annotations in the COCO format.
+ detection_type: type of detections being wrapped. Can be one of ['bbox',
+ 'segmentation']
+
+ Raises:
+ ValueError: if detection_type is unsupported.
+ """
+ supported_detection_types = ["bbox", "segmentation"]
+ if detection_type not in supported_detection_types:
+ raise ValueError(
+ "Unsupported detection type: {}. "
+ "Supported values are: {}".format(detection_type, supported_detection_types)
+ )
+ self._detection_type = detection_type
+ coco.COCO.__init__(self)
+ self.dataset = dataset
+ self.createIndex()
+
+ def LoadAnnotations(self, annotations: list) -> coco.COCO:
+ """Load annotations dictionary into COCO datastructure.
+
+ See http://mscoco.org/dataset/#format for a description of the annotations
+ format. As above, this function replicates the default behavior of the API
+ but does not require writing to external storage.
+
+ Args:
+ annotations: python list holding object detection results where each
+ detection is encoded as a dict with required keys ['image_id',
+ 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+ `detection_type`.
+
+ Returns:
+ a coco.COCO datastructure holding object detection annotations results
+
+ Raises:
+ ValueError: if (1) annotations is not a list or annotations do not
+ correspond to the images contained in self.
+ """
+ results = coco.COCO()
+ results.dataset["images"] = [img for img in self.dataset["images"]]
+
+ logger.info("Load and prepare annotation results.")
+ tic = time.time()
+
+ if not isinstance(annotations, list):
+ raise ValueError("annotations is not a list of objects")
+ annotation_img_ids = [ann["image_id"] for ann in annotations]
+ if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())):
+ raise ValueError("Results do not correspond to current coco set")
+ results.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+ if self._detection_type == "bbox":
+ for idx, ann in enumerate(annotations):
+ bb = ann["bbox"]
+ ann["area"] = bb[2] * bb[3]
+ ann["id"] = idx + 1
+ ann["iscrowd"] = 0
+ elif self._detection_type == "segmentation":
+ for idx, ann in enumerate(annotations):
+ ann["area"] = mask.area(ann["segmentation"])
+ ann["bbox"] = mask.toBbox(ann["segmentation"])
+ ann["id"] = idx + 1
+ ann["iscrowd"] = 0
+ logger.info("DONE (t=%0.2fs)", (time.time() - tic))
+
+ results.dataset["annotations"] = annotations
+ results.createIndex()
+ return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+ """Wrapper for the pycocotools COCOeval class.
+
+ To evaluate, create two objects (groundtruth_dict and detections_list)
+ using the conventions listed at http://mscoco.org/dataset/#format.
+ Then call evaluation as follows:
+
+ groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+ detections = groundtruth.LoadAnnotations(detections_list)
+ evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+ agnostic_mode=False)
+ metrics = evaluator.ComputeMetrics()
+ """
+
+ def __init__(
+ self,
+ groundtruth: coco.COCO = None,
+ detections: coco.COCO = None,
+ agnostic_mode=False,
+ iou_type: str = "bbox",
+ iou_thrs: Union[str, float] = None,
+ map_points=None,
+ ):
+ """Construct a COCOEvalWrapper.
+
+ Note that for the area-based metrics to be meaningful, detection and
+ groundtruth boxes must be in image coordinates measured in pixels.
+
+ Args:
+ groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+ groundtruth annotations
+ detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+ detections
+ agnostic_mode: boolean (default: False). If True, evaluation ignores
+ class labels, treating all detections as proposals.
+ iou_thrs: Minimal value for intersection over union that allows to
+ make decision that prediction bounding box is true positive.
+ You can specify one float value between 0 to 1 or
+ string "05:0.05:0.95" for standard COCO thresholds.
+ iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+ map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+ 11-point interpolated AP, 0 for area under PR curve.
+ """
+ cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type)
+ if agnostic_mode:
+ self.params.useCats = 0
+ if iou_thrs == "0.5:0.05:0.95":
+ self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
+ elif isinstance(iou_thrs, float):
+ self.params.iouThrs = [iou_thrs]
+
+ if map_points == 101:
+ self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
+ if map_points == 11:
+ self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True)
+ if map_points == 0:
+ self.params.recThrs = [-1]
+
+ def GetCategory(self, category_id: int) -> dict:
+ """Fetch dictionary holding category information given category id.
+
+ Args:
+ category_id: integer id
+
+ Returns:
+ dictionary holding 'id', 'name'.
+ """
+ return self.cocoGt.cats[category_id]
+
+ def GetAgnosticMode(self) -> bool:
+ """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+ return self.params.useCats == 0
+
+ def GetCategoryIdList(self) -> List[int]:
+ """Return the list of IDs of all valid categories."""
+ return self.params.catIds
+
+ def accumulate(self, p: cocoeval.Params = None):
+ """Accumulate evaluation results per image and store it to self.eval.
+
+ Args:
+ p: input params for evaluation
+ """
+ print("Accumulating evaluation results...")
+ tic = time.time()
+ if not self.evalImgs:
+ print("Please run evaluate() first")
+ # allows input customized parameters
+ if p is None:
+ p = self.params
+ p.catIds = p.catIds if p.useCats == 1 else [-1]
+ T = len(p.iouThrs)
+ R = len(p.recThrs)
+ K = len(p.catIds) if p.useCats else 1
+ A = len(p.areaRng)
+ M = len(p.maxDets)
+ precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories
+ recall = -np.ones((T, K, A, M))
+ scores = -np.ones((T, R, K, A, M))
+
+ # create dictionary for future indexing
+ _pe = self._paramsEval
+ print("-pe", _pe)
+ catIds = _pe.catIds if _pe.useCats else [-1]
+ setK = set(catIds)
+ setA = set(map(tuple, _pe.areaRng))
+ setM = set(_pe.maxDets)
+ setI = set(_pe.imgIds)
+ # get inds to evaluate
+ k_list = [n for n, k in enumerate(p.catIds) if k in setK]
+ m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+ a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+ i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
+ I0 = len(_pe.imgIds)
+ A0 = len(_pe.areaRng)
+ # retrieve E at each category, area range, and max number of detections
+ for k, k0 in enumerate(k_list):
+ Nk = k0 * A0 * I0
+ for a, a0 in enumerate(a_list):
+ Na = a0 * I0
+ for m, maxDet in enumerate(m_list):
+ E = [self.evalImgs[Nk + Na + i] for i in i_list]
+ E = [e for e in E if e is not None]
+ if len(E) == 0:
+ continue
+ dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E])
+
+ # different sorting method generates slightly different results.
+ # mergesort is used to be consistent as Matlab implementation.
+ inds = np.argsort(-dtScores, kind="mergesort")
+ dtScoresSorted = dtScores[inds]
+
+ dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds]
+ dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds]
+ gtIg = np.concatenate([e["gtIgnore"] for e in E])
+ npig = np.count_nonzero(gtIg == 0)
+ if npig == 0:
+ continue
+ tps = np.logical_and(dtm, np.logical_not(dtIg))
+ fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))
+
+ tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+ fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+ for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+ tp = np.array(tp)
+ fp = np.array(fp)
+ nd = len(tp)
+ rc = tp / npig
+ pr = tp / (fp + tp + np.spacing(1))
+
+ # calculate precision
+ if R == 1:
+ rc = np.concatenate(([0.0], rc, [1.0]))
+ pr = np.concatenate(([0.0], pr, [0.0]))
+
+ # compute the precision envelope
+ for i in range(pr.size - 1, 0, -1):
+ pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+ # to calculate area under PR curve, look for points
+ # where X axis (recall) changes value
+ change_point = np.where(rc[1:] != rc[:-1])[0]
+ # and sum (\Delta recall) * recall
+ res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1])
+ precision[t, :, k, a, m] = np.array([res])
+ else:
+ q = np.zeros((R,))
+
+ # numpy is slow without cython optimization for accessing elements
+ # use python array gets significant speed improvement
+ pr = pr.tolist()
+ q = q.tolist()
+
+ for i in range(nd - 1, 0, -1):
+ if pr[i] > pr[i - 1]:
+ pr[i - 1] = pr[i]
+
+ inds = np.searchsorted(rc, p.recThrs, side="left")
+ try:
+ for ri, pi in enumerate(inds):
+ q[ri] = pr[pi]
+ except:
+ pass
+ precision[t, :, k, a, m] = np.array(q)
+
+ # calculate recall
+ if nd:
+ recall[t, k, a, m] = rc[-1]
+ else:
+ recall[t, k, a, m] = 0
+
+ # calculate score
+ ss = np.zeros((R,))
+ inds = np.searchsorted(rc, p.recThrs, side="left")
+ try:
+ for ri, pi in enumerate(inds):
+ ss[ri] = dtScoresSorted[pi]
+ except:
+ pass
+ scores[t, :, k, a, m] = np.array(ss)
+ # exit(0)
+ self.eval = {
+ "params": p,
+ "counts": [T, R, K, A, M],
+ "precision": precision,
+ "recall": recall,
+ "scores": scores,
+ }
+ toc = time.time()
+ print("DONE (t={:0.2f}s).".format(toc - tic))
+
+ def ComputeMetrics(
+ self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False
+ ): # pragma: no cover
+ """Compute detection metrics.
+
+ Args:
+ include_metrics_per_category: Whether include metrics per category.
+ all_metrics_per_category: Whether include all the summery metrics for
+ each category in per_category_ap. Be careful with setting it to true if
+ you have more than handful of categories, because it will pollute
+ your mldash.
+
+ Returns:
+ A tuple of (summary_metrics, per_category_ap), in which
+ (1) summary_metrics is a dictionary holding:
+ 'Precision/mAP': mean average precision over classes averaged over IOU
+ thresholds ranging from .5 to .95 with .05 increments;
+ 'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+ 'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+ 'Precision/mAP (small)': mean average precision for small objects
+ (area < 32^2 pixels);
+ 'Precision/mAP (medium)': mean average precision for medium sized
+ objects (32^2 pixels < area < 96^2 pixels);
+ 'Precision/mAP (large)': mean average precision for large objects
+ (96^2 pixels < area < 10000^2 pixels);
+ 'Recall/AR@1': average recall with 1 detection;
+ 'Recall/AR@10': average recall with 10 detections;
+ 'Recall/AR@100': average recall with 100 detections;
+ 'Recall/AR@100 (small)': average recall for small objects with 100
+ detections;
+ 'Recall/AR@100 (medium)': average recall for medium objects with 100
+ detections;
+ 'Recall/AR@100 (large)': average recall for large objects with 100
+ detections;
+ and (2) per_category_ap is a dictionary holding category specific results with
+ keys of the form: 'Precision mAP ByCategory/category'
+ (without the supercategory part if no supercategories exist).
+
+ For backward compatibility 'PerformanceByCategory' is included in the
+ output regardless of all_metrics_per_category. If evaluating class-agnostic
+ mode, per_category_ap is an empty dictionary.
+
+ Raises:
+ ValueError: If category_stats does not exist.
+ """
+ self.evaluate()
+ self.accumulate()
+ self.summarize()
+
+ summary_metrics = OrderedDict(
+ [
+ ("Precision/mAP", self.stats[0]),
+ ("Precision/mAP@.50IOU", self.stats[1]),
+ ("Precision/mAP@.75IOU", self.stats[2]),
+ ("Precision/mAP (small)", self.stats[3]),
+ ("Precision/mAP (medium)", self.stats[4]),
+ ("Precision/mAP (large)", self.stats[5]),
+ ("Recall/AR@1", self.stats[6]),
+ ("Recall/AR@10", self.stats[7]),
+ ("Recall/AR@100", self.stats[8]),
+ ("Recall/AR@100 (small)", self.stats[9]),
+ ("Recall/AR@100 (medium)", self.stats[10]),
+ ("Recall/AR@100 (large)", self.stats[11]),
+ ]
+ )
+ if not include_metrics_per_category:
+ return summary_metrics, {}
+ if not hasattr(self, "category_stats"):
+ raise ValueError("Category stats do not exist")
+ per_category_ap = OrderedDict([])
+ if self.GetAgnosticMode():
+ return summary_metrics, per_category_ap
+ for category_index, category_id in enumerate(self.GetCategoryIdList()):
+ category = self.GetCategory(category_id)["name"]
+ # Kept for backward compatilbility
+ # pylint: disable=no-member
+ per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index]
+ if all_metrics_per_category:
+ per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index]
+ per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][
+ category_index
+ ]
+ per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][
+ category_index
+ ]
+ per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][
+ category_index
+ ]
+ per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][
+ category_index
+ ]
+ per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][
+ category_index
+ ]
+ per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index]
+ per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index]
+ per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index]
+ per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][
+ category_index
+ ]
+ per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][
+ category_index
+ ]
+ per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][
+ category_index
+ ]
+
+ return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+ """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+
+ This is a utility function for converting from our internal
+ [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+ i.e., [xmin, ymin, width, height].
+
+ Args:
+ box: a numpy array in format of [ymin, xmin, ymax, xmax]
+
+ Returns:
+ A list of floats, in COCO format, representing [xmin, ymin, width, height]
+ """
+ return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])]
+
+
+def _RleCompress(masks):
+ """Compresses mask using Run-length encoding provided by pycocotools.
+
+ Args:
+ masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+ {0, 1}.
+
+ Returns:
+ A pycocotools Run-length encoding of the mask.
+ """
+ return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(
+ image_id: Union[int, str],
+ next_annotation_id: int,
+ category_id_set: Set[str],
+ groundtruth_boxes: np.array,
+ groundtruth_classes: np.array,
+ groundtruth_masks: Union[np.array, None] = None,
+ groundtruth_is_crowd: Union[np.array, None] = None,
+) -> list:
+ """Export groundtruth of a single image to COCO format.
+
+ This function converts groundtruth detection annotations represented as numpy
+ arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+ that the image_ids provided here must match the ones given to
+ ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+ correspondence - that is: groundtruth_boxes[i, :], and
+ groundtruth_classes[i] are associated with the same groundtruth annotation.
+
+ In the exported result, "area" fields are always set to the area of the
+ groundtruth bounding box.
+
+ Args:
+ image_id: a unique image identifier either of type integer or string.
+ next_annotation_id: integer specifying the first id to use for the
+ groundtruth annotations. All annotations are assigned a continuous integer
+ id starting from this value.
+ category_id_set: A set of valid class ids. Groundtruth with classes not in
+ category_id_set are dropped.
+ groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+ groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+ groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+ image_height, image_width] containing detection_masks.
+ groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+ indicating whether groundtruth boxes are crowd.
+
+ Returns:
+ A list of groundtruth annotations for a single image in the COCO format.
+
+ Raises:
+ ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+ right lengths or (2) if each of the elements inside these lists do not
+ have the correct shapes or (3) if image_ids are not integers
+ """
+ if len(groundtruth_classes.shape) != 1:
+ raise ValueError("groundtruth_classes is " "expected to be of rank 1.")
+ if len(groundtruth_boxes.shape) != 2:
+ raise ValueError("groundtruth_boxes is expected to be of " "rank 2.")
+ if groundtruth_boxes.shape[1] != 4:
+ raise ValueError("groundtruth_boxes should have " "shape[1] == 4.")
+ num_boxes = groundtruth_classes.shape[0]
+ if num_boxes != groundtruth_boxes.shape[0]:
+ raise ValueError(
+ "Corresponding entries in groundtruth_classes, "
+ "and groundtruth_boxes should have "
+ "compatible shapes (i.e., agree on the 0th dimension)."
+ "Classes shape: %d. Boxes shape: %d. Image ID: %s"
+ % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id)
+ )
+ has_is_crowd = groundtruth_is_crowd is not None
+ if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+ raise ValueError("groundtruth_is_crowd is expected to be of rank 1.")
+ groundtruth_list = []
+ for i in range(num_boxes):
+ if groundtruth_classes[i] in category_id_set:
+ iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+ export_dict = {
+ "id": next_annotation_id + i,
+ "image_id": image_id,
+ "category_id": int(groundtruth_classes[i]),
+ "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+ "area": float(
+ (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0])
+ * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])
+ ),
+ "iscrowd": iscrowd,
+ }
+ if groundtruth_masks is not None:
+ export_dict["segmentation"] = _RleCompress(groundtruth_masks[i])
+ groundtruth_list.append(export_dict)
+ return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(
+ image_id: Union[int, str],
+ category_id_set: Set[int],
+ detection_boxes: np.array,
+ detection_scores: np.array,
+ detection_classes: np.array,
+) -> list:
+ """Export detections of a single image to COCO format.
+
+ This function converts detections represented as numpy arrays to dictionaries
+ that can be ingested by the COCO evaluation API. Note that the image_ids
+ provided here must match the ones given to the
+ ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+ correspondence - that is: boxes[i, :], and classes[i]
+ are associated with the same groundtruth annotation.
+
+ Args:
+ image_id: unique image identifier either of type integer or string.
+ category_id_set: A set of valid class ids. Detections with classes not in
+ category_id_set are dropped.
+ detection_boxes: float numpy array of shape [num_detections, 4] containing
+ detection boxes.
+ detection_scores: float numpy array of shape [num_detections] containing
+ scored for the detection boxes.
+ detection_classes: integer numpy array of shape [num_detections] containing
+ the classes for detection boxes.
+
+ Returns:
+ A list of detection annotations for a single image in the COCO format.
+
+ Raises:
+ ValueError: if (1) detection_boxes, detection_scores and detection_classes
+ do not have the right lengths or (2) if each of the elements inside these
+ lists do not have the correct shapes or (3) if image_ids are not integers.
+ """
+ if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+ raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.")
+ if len(detection_boxes.shape) != 2:
+ raise ValueError("All entries in detection_boxes expected to be of " "rank 2.")
+ if detection_boxes.shape[1] != 4:
+ raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.")
+ num_boxes = detection_classes.shape[0]
+ if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+ raise ValueError(
+ "Corresponding entries in detection_classes, "
+ "detection_scores and detection_boxes should have "
+ "compatible shapes (i.e., agree on the 0th dimension). "
+ "Classes shape: %d. Boxes shape: %d. "
+ "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0])
+ )
+ detections_list = []
+ for i in range(num_boxes):
+ if detection_classes[i] in category_id_set:
+ detections_list.append(
+ {
+ "image_id": image_id,
+ "category_id": int(detection_classes[i]),
+ "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+ "score": float(detection_scores[i]),
+ }
+ )
+ return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(
+ image_id: Union[str, int],
+ category_id_set: Set[int],
+ detection_masks: np.array,
+ detection_scores: np.array,
+ detection_classes: np.array,
+) -> list:
+ """Export detection masks of a single image to COCO format.
+
+ This function converts detections represented as numpy arrays to dictionaries
+ that can be ingested by the COCO evaluation API. We assume that
+ detection_masks, detection_scores, and detection_classes are in correspondence
+ - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+ are associated with the same annotation.
+
+ Args:
+ image_id: unique image identifier either of type integer or string.
+ category_id_set: A set of valid class ids. Detections with classes not in
+ category_id_set are dropped.
+ detection_masks: uint8 numpy array of shape [num_detections, image_height,
+ image_width] containing detection_masks.
+ detection_scores: float numpy array of shape [num_detections] containing
+ scores for detection masks.
+ detection_classes: integer numpy array of shape [num_detections] containing
+ the classes for detection masks.
+
+ Returns:
+ A list of detection mask annotations for a single image in the COCO format.
+
+ Raises:
+ ValueError: if (1) detection_masks, detection_scores and detection_classes
+ do not have the right lengths or (2) if each of the elements inside these
+ lists do not have the correct shapes or (3) if image_ids are not integers.
+ """
+ if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+ raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.")
+ num_boxes = detection_classes.shape[0]
+ if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+ raise ValueError(
+ "Corresponding entries in detection_classes, "
+ "detection_scores and detection_masks should have "
+ "compatible lengths and shapes "
+ "Classes length: %d. Masks length: %d. "
+ "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0])
+ )
+ detections_list = []
+ for i in range(num_boxes):
+ if detection_classes[i] in category_id_set:
+ detections_list.append(
+ {
+ "image_id": image_id,
+ "category_id": int(detection_classes[i]),
+ "segmentation": _RleCompress(detection_masks[i]),
+ "score": float(detection_scores[i]),
+ }
+ )
+ return detections_list
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..8d0a074ee82
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py
@@ -0,0 +1,767 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import cv2
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+interpolation_map = {
+ "nearest": cv2.INTER_NEAREST,
+ "bilinear": cv2.INTER_LINEAR,
+ "bicubic": cv2.INTER_CUBIC,
+}
+
+category_map = {
+ 1: "person",
+ 2: "bicycle",
+ 3: "car",
+ 4: "motorcycle",
+ 5: "airplane",
+ 6: "bus",
+ 7: "train",
+ 8: "truck",
+ 9: "boat",
+ 10: "traffic light",
+ 11: "fire hydrant",
+ 13: "stop sign",
+ 14: "parking meter",
+ 15: "bench",
+ 16: "bird",
+ 17: "cat",
+ 18: "dog",
+ 19: "horse",
+ 20: "sheep",
+ 21: "cow",
+ 22: "elephant",
+ 23: "bear",
+ 24: "zebra",
+ 25: "giraffe",
+ 27: "backpack",
+ 28: "umbrella",
+ 31: "handbag",
+ 32: "tie",
+ 33: "suitcase",
+ 34: "frisbee",
+ 35: "skis",
+ 36: "snowboard",
+ 37: "sports ball",
+ 38: "kite",
+ 39: "baseball bat",
+ 40: "baseball glove",
+ 41: "skateboard",
+ 42: "surfboard",
+ 43: "tennis racket",
+ 44: "bottle",
+ 46: "wine glass",
+ 47: "cup",
+ 48: "fork",
+ 49: "knife",
+ 50: "spoon",
+ 51: "bowl",
+ 52: "banana",
+ 53: "apple",
+ 54: "sandwich",
+ 55: "orange",
+ 56: "broccoli",
+ 57: "carrot",
+ 58: "hot dog",
+ 59: "pizza",
+ 60: "donut",
+ 61: "cake",
+ 62: "chair",
+ 63: "couch",
+ 64: "potted plant",
+ 65: "bed",
+ 67: "dining table",
+ 70: "toilet",
+ 72: "tv",
+ 73: "laptop",
+ 74: "mouse",
+ 75: "remote",
+ 76: "keyboard",
+ 77: "cell phone",
+ 78: "microwave",
+ 79: "oven",
+ 80: "toaster",
+ 81: "sink",
+ 82: "refrigerator",
+ 84: "book",
+ 85: "clock",
+ 86: "vase",
+ 87: "scissors",
+ 88: "teddy bear",
+ 89: "hair drier",
+ 90: "toothbrush",
+}
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+
+class ResizeWithRatio():
+ """Resize image with aspect ratio and pad it to max shape(optional).
+
+ If the image is padded, the label will be processed at the same time.
+ The input image should be np.array.
+
+ Args:
+ min_dim (int, default=800):
+ Resizes the image such that its smaller dimension == min_dim
+ max_dim (int, default=1365):
+ Ensures that the image longest side doesn't exceed this value
+ padding (bool, default=False):
+ If true, pads image with zeros so its size is max_dim x max_dim
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, min_dim=800, max_dim=1365, padding=False, constant_value=0):
+ """Initialize `ResizeWithRatio` class."""
+ self.min_dim = min_dim
+ self.max_dim = max_dim
+ self.padding = padding
+ self.constant_value = constant_value
+
+ def __call__(self, sample):
+ """Resize the image with ratio in sample."""
+ image, label = sample
+ height, width = image.shape[:2]
+ scale = 1
+ if self.min_dim:
+ scale = max(1, self.min_dim / min(height, width))
+ if self.max_dim:
+ image_max = max(height, width)
+ if round(image_max * scale) > self.max_dim:
+ scale = self.max_dim / image_max
+ if scale != 1:
+ image = cv2.resize(image, (round(height * scale), round(width * scale)))
+
+ bbox, str_label, int_label, image_id = label
+
+ if self.padding:
+ h, w = image.shape[:2]
+ pad_param = [
+ [(self.max_dim - h) // 2, self.max_dim - h - (self.max_dim - h) // 2],
+ [(self.max_dim - w) // 2, self.max_dim - w - (self.max_dim - w) // 2],
+ [0, 0],
+ ]
+ if not isinstance(bbox, np.ndarray):
+ bbox = np.array(bbox)
+ resized_box = bbox * [height, width, height, width] * scale
+ moved_box = resized_box + [
+ (self.max_dim - h) // 2,
+ (self.max_dim - w) // 2,
+ (self.max_dim - h) // 2,
+ (self.max_dim - w) // 2,
+ ]
+ bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim]
+ image = np.pad(image, pad_param, mode="constant", constant_values=self.constant_value)
+ return image, (bbox, str_label, int_label, image_id)
+
+
+class TensorflowResizeWithRatio():
+ """Resize image with aspect ratio and pad it to max shape(optional).
+
+ If the image is padded, the label will be processed at the same time.
+ The input image should be np.array or tf.Tensor.
+
+ Args:
+ min_dim (int, default=800):
+ Resizes the image such that its smaller dimension == min_dim
+ max_dim (int, default=1365):
+ Ensures that the image longest side doesn't exceed this value
+ padding (bool, default=False):
+ If true, pads image with zeros so its size is max_dim x max_dim
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, min_dim=800, max_dim=1365, padding=False, constant_value=0):
+ """Initialize `TensorflowResizeWithRatio` class."""
+ self.min_dim = min_dim
+ self.max_dim = max_dim
+ self.padding = padding
+ self.constant_value = constant_value
+
+ def __call__(self, sample):
+ """Resize the image with ratio in sample."""
+ image, label = sample
+ if isinstance(image, tf.Tensor):
+ shape = tf.shape(input=image)
+ height = tf.cast(shape[0], dtype=tf.float32)
+ width = tf.cast(shape[1], dtype=tf.float32)
+ scale = 1
+ if self.min_dim:
+ scale = tf.maximum(1.0, tf.cast(self.min_dim / tf.math.minimum(height, width), dtype=tf.float32))
+ if self.max_dim:
+ image_max = tf.cast(tf.maximum(height, width), dtype=tf.float32)
+ scale = tf.cond(
+ pred=tf.greater(tf.math.round(image_max * scale), self.max_dim),
+ true_fn=lambda: self.max_dim / image_max,
+ false_fn=lambda: scale,
+ )
+ image = tf.image.resize(image, (tf.math.round(height * scale), tf.math.round(width * scale)))
+ bbox, str_label, int_label, image_id = label
+
+ if self.padding:
+ shape = tf.shape(input=image)
+ h = tf.cast(shape[0], dtype=tf.float32)
+ w = tf.cast(shape[1], dtype=tf.float32)
+ pad_param = [
+ [(self.max_dim - h) // 2, self.max_dim - h - (self.max_dim - h) // 2],
+ [(self.max_dim - w) // 2, self.max_dim - w - (self.max_dim - w) // 2],
+ [0, 0],
+ ]
+ resized_box = bbox * [height, width, height, width] * scale
+ moved_box = resized_box + [
+ (self.max_dim - h) // 2,
+ (self.max_dim - w) // 2,
+ (self.max_dim - h) // 2,
+ (self.max_dim - w) // 2,
+ ]
+ bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim]
+ image = tf.pad(image, pad_param, constant_values=self.constant_value)
+ else:
+ transform = ResizeWithRatio(self.min_dim, self.max_dim, self.padding)
+ image, (bbox, str_label, int_label, image_id) = transform(sample)
+ return image, (bbox, str_label, int_label, image_id)
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class LabelBalanceCOCORecordFilter(object):
+ """The label balance filter for COCO Record."""
+
+ def __init__(self, size=1):
+ """Initialize the attribute of class."""
+ self.size = size
+
+ def __call__(self, image, label):
+ """Execute the filter.
+
+ Args:
+ image: Not used.
+ label: label of a sample.
+ """
+ return tf.math.equal(len(label[0]), self.size)
+
+
+class COCOmAPv2(BaseMetric):
+ """Compute mean average precision of the detection task."""
+
+ def __init__(
+ self,
+ anno_path=None,
+ iou_thrs="0.5:0.05:0.95",
+ map_points=101,
+ map_key="DetectionBoxes_Precision/mAP",
+ output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2},
+ ):
+ """Initialize the metric.
+
+ Args:
+ anno_path: The path of annotation file.
+ iou_thrs: Minimal value for intersection over union that allows to make decision
+ that prediction bounding box is true positive. You can specify one float value
+ between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
+ map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+ 11-point interpolated AP, 0 for area under PR curve.
+ map_key: The key that mapping to pycocotools COCOeval.
+ Defaults to 'DetectionBoxes_Precision/mAP'.
+ output_index_mapping: The output index mapping.
+ Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
+ """
+ self.output_index_mapping = output_index_mapping
+
+ if anno_path:
+ import os
+ import yaml
+
+ assert os.path.exists(anno_path), "Annotation path does not exists!"
+ with open(anno_path, "r") as f:
+ label_map = yaml.safe_load(f.read())
+ self.category_map_reverse = {k: v for k, v in label_map.items()}
+ else:
+ # label: index
+ self.category_map_reverse = {v: k for k, v in category_map.items()}
+ self.image_ids = []
+ self.ground_truth_list = []
+ self.detection_list = []
+ self.annotation_id = 1
+ self.category_map = category_map
+ self.category_id_set = set([cat for cat in self.category_map]) # index
+ self.iou_thrs = iou_thrs
+ self.map_points = map_points
+ self.map_key = map_key
+
+ def update(self, predicts, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ predicts: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight. Defaults to None.
+ """
+ from coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco
+
+ detections = []
+ if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1:
+ for item in zip(*predicts):
+ detection = {}
+ num = int(item[self.output_index_mapping["num_detections"]])
+ detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num]
+ detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num]
+ detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num]
+ detections.append(detection)
+ else:
+ for item in zip(*predicts):
+ detection = {}
+ detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])
+ detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])
+ detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])
+ detections.append(detection)
+
+ bboxes, str_labels, int_labels, image_ids = labels
+ labels = []
+ if len(int_labels[0]) == 0:
+ for str_label in str_labels:
+ str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label]
+ labels.append([self.category_map_reverse[x] for x in str_label])
+ elif len(str_labels[0]) == 0:
+ for int_label in int_labels:
+ labels.append([x for x in int_label])
+
+ for idx, image_id in enumerate(image_ids):
+ image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8")
+ if image_id in self.image_ids:
+ continue
+ self.image_ids.append(image_id)
+
+ ground_truth = {}
+ ground_truth["boxes"] = np.asarray(bboxes[idx])
+ ground_truth["classes"] = np.asarray(labels[idx])
+
+ self.ground_truth_list.extend(
+ ExportSingleImageGroundtruthToCoco(
+ image_id=image_id,
+ next_annotation_id=self.annotation_id,
+ category_id_set=self.category_id_set,
+ groundtruth_boxes=ground_truth["boxes"],
+ groundtruth_classes=ground_truth["classes"],
+ )
+ )
+ self.annotation_id += ground_truth["boxes"].shape[0]
+
+ self.detection_list.extend(
+ ExportSingleImageDetectionBoxesToCoco(
+ image_id=image_id,
+ category_id_set=self.category_id_set,
+ detection_boxes=detections[idx]["boxes"],
+ detection_scores=detections[idx]["scores"],
+ detection_classes=detections[idx]["classes"],
+ )
+ )
+
+ def reset(self):
+ """Reset the prediction and labels."""
+ self.image_ids = []
+ self.ground_truth_list = []
+ self.detection_list = []
+ self.annotation_id = 1
+
+ def result(self):
+ """Compute mean average precision.
+
+ Returns:
+ The mean average precision score.
+ """
+ from coco_tools import COCOEvalWrapper, COCOWrapper
+
+ if len(self.ground_truth_list) == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ else:
+ groundtruth_dict = {
+ "annotations": self.ground_truth_list,
+ "images": [{"id": image_id} for image_id in self.image_ids],
+ "categories": [{"id": k, "name": v} for k, v in self.category_map.items()],
+ }
+ coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
+ coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list)
+ box_evaluator = COCOEvalWrapper(
+ coco_wrapped_groundtruth,
+ coco_wrapped_detections,
+ agnostic_mode=False,
+ iou_thrs=self.iou_thrs,
+ map_points=self.map_points,
+ )
+ box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
+ include_metrics_per_category=False, all_metrics_per_category=False
+ )
+ box_metrics.update(box_per_category_ap)
+ box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())}
+
+ return box_metrics[self.map_key]
+
+
+class ParseDecodeCoco: # pragma: no cover
+ """Helper function for TensorflowModelZooBertDataset.
+
+ Parse the features from sample.
+ """
+
+ def __call__(self, sample):
+ """Parse the sample data.
+
+ Args:
+ sample: Data to be parsed.
+ """
+ # Dense features in Example proto.
+ feature_map = {
+ "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""),
+ "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string),
+ "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64),
+ "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""),
+ }
+ sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32)
+ # Sparse features in Example proto.
+ feature_map.update(
+ {
+ k: sparse_float32
+ for k in [
+ "image/object/bbox/xmin",
+ "image/object/bbox/ymin",
+ "image/object/bbox/xmax",
+ "image/object/bbox/ymax",
+ ]
+ }
+ )
+
+ features = tf.io.parse_single_example(sample, feature_map)
+
+ xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0)
+ ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0)
+ xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0)
+ ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0)
+
+ bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
+ # Force the variable number of bounding boxes into the shape
+ # [1, num_boxes, coords].
+ bbox = tf.expand_dims(bbox, 0)
+ bbox = tf.transpose(bbox, [0, 2, 1])
+
+ encoded_image = features["image/encoded"]
+ image_tensor = tf.image.decode_image(encoded_image, channels=3)
+ image_tensor.set_shape([None, None, 3])
+
+ str_label = features["image/object/class/text"].values
+ int_label = features["image/object/class/label"].values
+ image_id = features["image/source_id"]
+
+ return image_tensor, (bbox[0], str_label, int_label, image_id)
+
+
+class COCORecordDataset(object):
+ """Tensorflow COCO dataset in tf record format.
+
+ Root is a full path to tfrecord file, which contains the file name.
+ Please use Resize transform when batch_size > 1
+
+ Args: root (str): Root directory of dataset.
+ num_cores (int, default=28):The number of input Datasets to interleave from in parallel.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according
+ to specific conditions.
+ """
+
+ def __new__(cls, root, num_cores=28, transform=None, filter=filter):
+ """Build a new object."""
+ record_iterator = tf.compat.v1.python_io.tf_record_iterator(root)
+ example = tf.train.SequenceExample()
+ for element in record_iterator:
+ example.ParseFromString(element)
+ break
+ feature = example.context.feature
+ if (
+ len(feature["image/object/class/text"].bytes_list.value) == 0
+ and len(feature["image/object/class/label"].int64_list.value) == 0
+ ):
+ raise ValueError(
+ "Tfrecord format is incorrect, please refer\
+ 'https://github.com/tensorflow/models/blob/master/research/\
+ object_detection/dataset_tools/create_coco_tf_record.py' to\
+ create correct tfrecord"
+ )
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ tfrecord_paths = [root]
+ ds = tf.data.TFRecordDataset.list_files(tfrecord_paths)
+ ds = ds.apply(
+ parallel_interleave(
+ tf.data.TFRecordDataset,
+ cycle_length=num_cores,
+ block_length=5,
+ sloppy=True,
+ buffer_output_elements=10000,
+ prefetch_input_elements=10000,
+ )
+ )
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeCoco())
+ else:
+ transform = ParseDecodeCoco()
+ ds = ds.map(transform, num_parallel_calls=None)
+ if filter is not None:
+ ds = ds.filter(filter)
+ ds = ds.prefetch(buffer_size=1000)
+ return ds
+
+
+class TFDataLoader(object):
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py
new file mode 100644
index 00000000000..632d66ac25a
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py
@@ -0,0 +1,133 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+from __future__ import division
+
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from argparse import ArgumentParser
+from data_process import(
+ COCOmAPv2,
+ COCORecordDataset,
+ ComposeTransform,
+ TFDataLoader,
+ LabelBalanceCOCORecordFilter,
+ TensorflowResizeWithRatio,
+)
+
+arg_parser = ArgumentParser(description='Parse args')
+
+arg_parser.add_argument('-g',
+ "--input-graph",
+ help='Specify the input graph.',
+ dest='input_graph')
+arg_parser.add_argument('--config', type=str, default='')
+arg_parser.add_argument('--dataset_location', type=str, default='')
+arg_parser.add_argument('--output_model', type=str, default='')
+arg_parser.add_argument('--mode', type=str, default='accuracy')
+arg_parser.add_argument('--batch_size', type=int, default=10)
+arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='iterations')
+arg_parser.add_argument('--tune', action='store_true', default=False)
+arg_parser.add_argument('--benchmark', dest='benchmark',
+ action='store_true', help='run benchmark')
+args = arg_parser.parse_args()
+
+def evaluate(model):
+ """Custom evaluate function to estimate the accuracy of the model.
+
+ Args:
+ model (tf.Graph or string or INC.model.TensorflowCheckpointModel): The input model.
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ from neural_compressor.tensorflow import Model
+ if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph):
+ model = Model(model)
+ model.input_tensor_names = ["image_tensor:0"]
+ model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \
+ "detection_scores:0", "detection_classes:0"]
+ input_tensor = model.input_tensor
+ output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+ model.output_tensor[0]
+ warmup = 5
+ iteration = -1
+ if args.benchmark and args.mode == 'performance':
+ iteration = args.iters
+ metric = COCOmAPv2(output_index_mapping={'num_detections':0, 'boxes':1, 'scores':2, 'classes':3})
+
+ def eval_func(dataloader):
+ latency_list = []
+ for idx, (inputs, labels) in enumerate(dataloader):
+ # dataloader should keep the order and len of inputs same with input_tensor
+ inputs = np.array([inputs])
+ feed_dict = dict(zip(input_tensor, inputs))
+
+ start = time.time()
+ predictions = model.sess.run(output_tensor, feed_dict)
+ end = time.time()
+
+ metric.update(predictions, labels)
+ latency_list.append(end-start)
+ if idx + 1 == iteration:
+ break
+ latency = np.array(latency_list[warmup:]).mean() / args.batch_size
+ return latency
+
+ use_padding = True if args.mode == 'performance' else False
+ eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \
+ transform=ComposeTransform(transform_list=[TensorflowResizeWithRatio(
+ min_dim=800, max_dim=1356, padding=use_padding)]))
+ batch_size = 1 if args.mode == 'accuracy' else args.batch_size
+ eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=batch_size)
+
+ latency = eval_func(eval_dataloader)
+ if args.benchmark and args.mode == 'performance':
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+def main(_):
+ calib_dataset = COCORecordDataset(root=args.dataset_location, filter=LabelBalanceCOCORecordFilter(size=1))
+ calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=1)
+
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+ quant_config = StaticQuantConfig(weight_granularity="per_channel")
+ model = Model(args.input_graph)
+ model.input_tensor_names = ['image_tensor']
+ model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"]
+ q_model = quantize_model(model, quant_config, calib_dataloader)
+ q_model.save(args.output_model)
+
+ if args.benchmark:
+ if args.mode == 'performance':
+ evaluate(args.input_graph)
+ else:
+ accuracy = evaluate(args.input_graph)
+ print('Batch size = %d' % args.batch_size)
+ print("Accuracy: %.5f" % accuracy)
+
+if __name__ == "__main__":
+ tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..6c2115f58ff
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=32
+ iters=100
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input-graph ${input_model} \
+ --mode ${mode} \
+ --dataset_location "${dataset_location}" \
+ --batch_size ${batch_size} \
+ --iters ${iters} \
+ --benchmark
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..559d695f768
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo "$var" |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input-graph "${input_model}" \
+ --output_model "${output_model}" \
+ --dataset_location "${dataset_location}" \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh b/examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh
new file mode 100644
index 00000000000..fea0ff1c373
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh
@@ -0,0 +1,136 @@
+!/bin/bash
+# set -x
+
+DATA_DIR="${PWD}/data"
+DATA_NAME="val2017"
+DATA_URL_LIST='http://images.cocodataset.org/zips/val2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip'
+PACKAGES_LIST='val2017.zip annotations_trainval2017.zip'
+VAL_IMAGE_DIR=$DATA_DIR/val2017
+TRAIN_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json
+VAL_ANNOTATIONS_FILE=$DATA_DIR/annotations/instances_val2017.json
+TESTDEV_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json
+OUTPUT_DIR=$DATA_DIR
+
+help()
+{
+ cat <<- EOF
+
+ Desc: Prepare dataset for Tensorflow COCO object detection.
+
+ -h --help help info
+
+ --dataset_location set dataset location, default is ./data
+
+EOF
+ exit 0
+}
+
+function main {
+ init_params "$@"
+ download_dataset
+ convert_to_tf_record
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --dataset_location=*)
+ DATA_DIR=$(echo "$var" |cut -f2 -d=)
+ ;;
+ -h|--help) help
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+# removes files that will not be used anymore
+function remove_zipped_packages {
+ for package in $PACKAGES_LIST; do
+ rm "$package"
+ done
+}
+
+function download_tf_models_repo {
+ if [ ! -d models ]; then
+ git clone https://github.com/tensorflow/models.git
+ fi
+ cd models || exit
+ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40
+ cd ..
+}
+
+function divide_tf_records_by_dataset {
+ if [ ! -d "${DATA_DIR}/tf_test2017" ]; then
+ mkdir "${DATA_DIR}/tf_test2017"
+ fi
+ if [ ! -d "${DATA_DIR}/tf_train2017" ]; then
+ mkdir "${DATA_DIR}/tf_train2017"
+ fi
+ if [ ! -d "${DATA_DIR}/tf_val2017" ]; then
+ mkdir "${DATA_DIR}/tf_val2017"
+ fi
+ mv ${DATA_DIR}/coco_testdev.record* ${DATA_DIR}/tf_test2017
+ mv ${DATA_DIR}/coco_train.record* ${DATA_DIR}/tf_train2017
+ mv ${DATA_DIR}/coco_val.record* ${DATA_DIR}/tf_val2017
+}
+
+function convert {
+ cd models/research
+ protoc object_detection/protos/*.proto --python_out=.
+ export PYTHONPATH=$PYTHONPATH:$(pwd)
+ export PYTHONPATH=$PYTHONPATH:$(pwd)/slim
+ python ./object_detection/dataset_tools/create_coco_tf_record.py --logtostderr \
+ --train_image_dir=empty_dir \
+ --val_image_dir="${VAL_IMAGE_DIR}" \
+ --test_image_dir=empty_dir \
+ --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
+ --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
+ --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
+ --output_dir="${OUTPUT_DIR}"
+}
+
+function convert_to_tf_record {
+ download_tf_models_repo
+ convert
+ divide_tf_records_by_dataset
+}
+
+# download_dataset
+function download_dataset {
+ if [ ! -d "${DATA_DIR}" ]; then
+ mkdir "${DATA_DIR}"
+ fi
+
+ cd "${DATA_DIR}" || exit
+ if [ ! -f "${VAL_IMAGE_DIR}" ]; then
+
+ for dataset_dowload_link in $DATA_URL_LIST; do
+ wget "$dataset_dowload_link"
+ done
+ for package in $PACKAGES_LIST; do
+ unzip -o "$package"
+ done
+ remove_zipped_packages
+ if [ ! -d empty_dir ]; then
+ mkdir empty_dir
+ fi
+
+ cd annotations || exit
+ echo "{ \"images\": {}, \"categories\": {}}" > empty.json
+ cd ..
+ else
+ echo "Dataset ${DATA_NAME} is exist!"
+ fi
+
+ cd ../
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/object_detection/requirements.txt b/examples/3.x_api/tensorflow/object_detection/requirements.txt
new file mode 100644
index 00000000000..865df0f3a6b
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/requirements.txt
@@ -0,0 +1,8 @@
+Cython
+contextlib2
+pillow>=8.2.0
+lxml>=4.6.2
+matplotlib
+numpy>=1.17.4
+pycocotools
+protobuf
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md
new file mode 100644
index 00000000000..1b52ecf8b17
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md
@@ -0,0 +1,160 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs.
+
+# Prerequisite
+
+
+## 1. Environment
+Recommend python 3.6 or higher version.
+
+### Install Intel® Neural Compressor
+```shell
+pip install neural-compressor
+```
+
+### Install Intel Tensorflow
+```shell
+pip install intel-tensorflow
+```
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### Installation Dependency packages
+```shell
+cd examples/3.x_api//tensorflow/object_detection
+pip install -r requirements.txt
+cd ssd_mobilenet_v1/quantization/ptq
+```
+
+### Install Protocol Buffer Compiler
+
+`Protocol Buffer Compiler` in version higher than 3.0.0 is necessary ingredient for automatic COCO dataset preparation. To install please follow
+[Protobuf installation instructions](https://grpc.io/docs/protoc-installation/#install-using-a-package-manager).
+
+### Install Intel Extension for Tensorflow
+
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare Model
+
+### Automated approach
+Run the `prepare_model.py` script located in `examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq`.
+
+```
+python prepare_model.py --model_name=ssd_mobilenet_v1 --model_path=./
+
+Prepare pre-trained model for COCO object detection
+
+optional arguments:
+ -h, --help show this help message and exit
+ --model_name {ssd_resnet50_v1,ssd_mobilenet_v1}
+ model to download, default is ssd_resnet50_v1
+ --model_path MODEL_PATH
+ directory to put models, default is ./model
+```
+
+### Manual approach
+
+```shell
+wget http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz
+tar -xvzf ssd_mobilenet_v1_coco_2018_01_28.tar.gz
+```
+
+## 3. Prepare Dataset
+
+### Automatic dataset download
+
+> **_Note: `prepare_dataset.sh` script works with TF version 1.x._**
+
+Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`.
+
+Usage:
+```shell
+cd examples/3.x_api/tensorflow/object_detection
+. prepare_dataset.sh
+cd ssd_mobilenet_v1/quantization/ptq
+```
+
+This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to
+tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script.
+
+### Manual dataset download
+Download CoCo Dataset from [Official Website](https://cocodataset.org/#download).
+
+
+# Run Command
+
+Now we support both pb and ckpt formats.
+
+## 1. Quantization
+### For PB format
+
+ ```shell
+ bash run_quant.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb --output_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record
+ ```
+
+### For ckpt format
+
+ ```shell
+ bash run_quant.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28/ --output_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record
+ ```
+
+## 2. Benchmark
+ ```shell
+ # run performance benchmark
+ bash run_benchmark.sh --input_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance
+
+ # run accuracy benchmark
+ bash run_benchmark.sh --input_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=accuracy
+ ```
+
+Details of enabling Intel® Neural Compressor on ssd_mobilenet_v1 for Tensorflow.
+=========================
+
+This is a tutorial of how to enable ssd_mobilenet_v1 model with Intel® Neural Compressor.
+## User Code Analysis
+User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself.
+
+For ssd_mobilenet_v1, we applied the latter one because our philosophy is to enable the model with minimal changes. Hence we need to make two changes on the original code. The first one is to implement the q_dataloader and make necessary changes to *eval_func*.
+
+### Code update
+
+After prepare step is done, we just need update main.py like below.
+```python
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+ quant_config = StaticQuantConfig(weight_granularity="per_channel")
+ model = Model(args.input_graph)
+ model.input_tensor_names = ['image_tensor']
+ model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"]
+ q_model = quantize_model(model, quant_config, calib_dataloader)
+ q_model.save(args.output_model)
+
+ if args.benchmark:
+ if args.mode == 'performance':
+ evaluate(args.input_graph)
+ else:
+ accuracy = evaluate(args.input_graph)
+ print('Batch size = %d' % args.batch_size)
+ print("Accuracy: %.5f" % accuracy)
+```
+
+The quantization.fit() function will return a best quantized model during timeout constrain.
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py
new file mode 100644
index 00000000000..2f9369798df
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py
@@ -0,0 +1,694 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Wrappers for third party pycocotools to be used within object_detection.
+
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+
+
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+
+ groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+ image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+ max_num_classes, output_path=None)
+ detections_list = coco_tools.ExportDetectionsToCOCO(
+ image_ids, detection_boxes_list, detection_scores_list,
+ detection_classes_list, output_path=None)
+ groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+ detections = groundtruth.LoadAnnotations(detections_list)
+ evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+ agnostic_mode=False)
+ metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+from collections import OrderedDict
+from typing import Any, Dict, List, Set, Union
+
+import numpy as np
+from pycocotools import coco, cocoeval, mask
+
+from neural_compressor.utils import logger
+
+
+class COCOWrapper(coco.COCO):
+ """Wrapper for the pycocotools COCO class.
+
+ Attributes:
+ dataset: a dictionary holding bounding box annotations in the COCO format.
+ detection_type: type of detections being wrapped. Can be one of ['bbox',
+ 'segmentation']
+ """
+
+ def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"):
+ """Construct a COCOWrapper.
+
+ See http://mscoco.org/dataset/#format for a description of the format.
+ By default, the coco.COCO class constructor reads from a JSON file.
+ This function duplicates the same behavior but loads from a dictionary,
+ allowing us to perform evaluation without writing to external storage.
+
+ Args:
+ dataset: a dictionary holding bounding box annotations in the COCO format.
+ detection_type: type of detections being wrapped. Can be one of ['bbox',
+ 'segmentation']
+
+ Raises:
+ ValueError: if detection_type is unsupported.
+ """
+ supported_detection_types = ["bbox", "segmentation"]
+ if detection_type not in supported_detection_types:
+ raise ValueError(
+ "Unsupported detection type: {}. "
+ "Supported values are: {}".format(detection_type, supported_detection_types)
+ )
+ self._detection_type = detection_type
+ coco.COCO.__init__(self)
+ self.dataset = dataset
+ self.createIndex()
+
+ def LoadAnnotations(self, annotations: list) -> coco.COCO:
+ """Load annotations dictionary into COCO datastructure.
+
+ See http://mscoco.org/dataset/#format for a description of the annotations
+ format. As above, this function replicates the default behavior of the API
+ but does not require writing to external storage.
+
+ Args:
+ annotations: python list holding object detection results where each
+ detection is encoded as a dict with required keys ['image_id',
+ 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+ `detection_type`.
+
+ Returns:
+ a coco.COCO datastructure holding object detection annotations results
+
+ Raises:
+ ValueError: if (1) annotations is not a list or annotations do not
+ correspond to the images contained in self.
+ """
+ results = coco.COCO()
+ results.dataset["images"] = [img for img in self.dataset["images"]]
+
+ logger.info("Load and prepare annotation results.")
+ tic = time.time()
+
+ if not isinstance(annotations, list):
+ raise ValueError("annotations is not a list of objects")
+ annotation_img_ids = [ann["image_id"] for ann in annotations]
+ if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())):
+ raise ValueError("Results do not correspond to current coco set")
+ results.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+ if self._detection_type == "bbox":
+ for idx, ann in enumerate(annotations):
+ bb = ann["bbox"]
+ ann["area"] = bb[2] * bb[3]
+ ann["id"] = idx + 1
+ ann["iscrowd"] = 0
+ elif self._detection_type == "segmentation":
+ for idx, ann in enumerate(annotations):
+ ann["area"] = mask.area(ann["segmentation"])
+ ann["bbox"] = mask.toBbox(ann["segmentation"])
+ ann["id"] = idx + 1
+ ann["iscrowd"] = 0
+ logger.info("DONE (t=%0.2fs)", (time.time() - tic))
+
+ results.dataset["annotations"] = annotations
+ results.createIndex()
+ return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+ """Wrapper for the pycocotools COCOeval class.
+
+ To evaluate, create two objects (groundtruth_dict and detections_list)
+ using the conventions listed at http://mscoco.org/dataset/#format.
+ Then call evaluation as follows:
+
+ groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+ detections = groundtruth.LoadAnnotations(detections_list)
+ evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+ agnostic_mode=False)
+ metrics = evaluator.ComputeMetrics()
+ """
+
+ def __init__(
+ self,
+ groundtruth: coco.COCO = None,
+ detections: coco.COCO = None,
+ agnostic_mode=False,
+ iou_type: str = "bbox",
+ iou_thrs: Union[str, float] = None,
+ map_points=None,
+ ):
+ """Construct a COCOEvalWrapper.
+
+ Note that for the area-based metrics to be meaningful, detection and
+ groundtruth boxes must be in image coordinates measured in pixels.
+
+ Args:
+ groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+ groundtruth annotations
+ detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+ detections
+ agnostic_mode: boolean (default: False). If True, evaluation ignores
+ class labels, treating all detections as proposals.
+ iou_thrs: Minimal value for intersection over union that allows to
+ make decision that prediction bounding box is true positive.
+ You can specify one float value between 0 to 1 or
+ string "05:0.05:0.95" for standard COCO thresholds.
+ iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+ map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+ 11-point interpolated AP, 0 for area under PR curve.
+ """
+ cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type)
+ if agnostic_mode:
+ self.params.useCats = 0
+ if iou_thrs == "0.5:0.05:0.95":
+ self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
+ elif isinstance(iou_thrs, float):
+ self.params.iouThrs = [iou_thrs]
+
+ if map_points == 101:
+ self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
+ if map_points == 11:
+ self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True)
+ if map_points == 0:
+ self.params.recThrs = [-1]
+
+ def GetCategory(self, category_id: int) -> dict:
+ """Fetch dictionary holding category information given category id.
+
+ Args:
+ category_id: integer id
+
+ Returns:
+ dictionary holding 'id', 'name'.
+ """
+ return self.cocoGt.cats[category_id]
+
+ def GetAgnosticMode(self) -> bool:
+ """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+ return self.params.useCats == 0
+
+ def GetCategoryIdList(self) -> List[int]:
+ """Return the list of IDs of all valid categories."""
+ return self.params.catIds
+
+ def accumulate(self, p: cocoeval.Params = None):
+ """Accumulate evaluation results per image and store it to self.eval.
+
+ Args:
+ p: input params for evaluation
+ """
+ print("Accumulating evaluation results...")
+ tic = time.time()
+ if not self.evalImgs:
+ print("Please run evaluate() first")
+ # allows input customized parameters
+ if p is None:
+ p = self.params
+ p.catIds = p.catIds if p.useCats == 1 else [-1]
+ T = len(p.iouThrs)
+ R = len(p.recThrs)
+ K = len(p.catIds) if p.useCats else 1
+ A = len(p.areaRng)
+ M = len(p.maxDets)
+ precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories
+ recall = -np.ones((T, K, A, M))
+ scores = -np.ones((T, R, K, A, M))
+
+ # create dictionary for future indexing
+ _pe = self._paramsEval
+ print("-pe", _pe)
+ catIds = _pe.catIds if _pe.useCats else [-1]
+ setK = set(catIds)
+ setA = set(map(tuple, _pe.areaRng))
+ setM = set(_pe.maxDets)
+ setI = set(_pe.imgIds)
+ # get inds to evaluate
+ k_list = [n for n, k in enumerate(p.catIds) if k in setK]
+ m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+ a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+ i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
+ I0 = len(_pe.imgIds)
+ A0 = len(_pe.areaRng)
+ # retrieve E at each category, area range, and max number of detections
+ for k, k0 in enumerate(k_list):
+ Nk = k0 * A0 * I0
+ for a, a0 in enumerate(a_list):
+ Na = a0 * I0
+ for m, maxDet in enumerate(m_list):
+ E = [self.evalImgs[Nk + Na + i] for i in i_list]
+ E = [e for e in E if e is not None]
+ if len(E) == 0:
+ continue
+ dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E])
+
+ # different sorting method generates slightly different results.
+ # mergesort is used to be consistent as Matlab implementation.
+ inds = np.argsort(-dtScores, kind="mergesort")
+ dtScoresSorted = dtScores[inds]
+
+ dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds]
+ dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds]
+ gtIg = np.concatenate([e["gtIgnore"] for e in E])
+ npig = np.count_nonzero(gtIg == 0)
+ if npig == 0:
+ continue
+ tps = np.logical_and(dtm, np.logical_not(dtIg))
+ fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))
+
+ tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+ fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+ for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+ tp = np.array(tp)
+ fp = np.array(fp)
+ nd = len(tp)
+ rc = tp / npig
+ pr = tp / (fp + tp + np.spacing(1))
+
+ # calculate precision
+ if R == 1:
+ rc = np.concatenate(([0.0], rc, [1.0]))
+ pr = np.concatenate(([0.0], pr, [0.0]))
+
+ # compute the precision envelope
+ for i in range(pr.size - 1, 0, -1):
+ pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+ # to calculate area under PR curve, look for points
+ # where X axis (recall) changes value
+ change_point = np.where(rc[1:] != rc[:-1])[0]
+ # and sum (\Delta recall) * recall
+ res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1])
+ precision[t, :, k, a, m] = np.array([res])
+ else:
+ q = np.zeros((R,))
+
+ # numpy is slow without cython optimization for accessing elements
+ # use python array gets significant speed improvement
+ pr = pr.tolist()
+ q = q.tolist()
+
+ for i in range(nd - 1, 0, -1):
+ if pr[i] > pr[i - 1]:
+ pr[i - 1] = pr[i]
+
+ inds = np.searchsorted(rc, p.recThrs, side="left")
+ try:
+ for ri, pi in enumerate(inds):
+ q[ri] = pr[pi]
+ except:
+ pass
+ precision[t, :, k, a, m] = np.array(q)
+
+ # calculate recall
+ if nd:
+ recall[t, k, a, m] = rc[-1]
+ else:
+ recall[t, k, a, m] = 0
+
+ # calculate score
+ ss = np.zeros((R,))
+ inds = np.searchsorted(rc, p.recThrs, side="left")
+ try:
+ for ri, pi in enumerate(inds):
+ ss[ri] = dtScoresSorted[pi]
+ except:
+ pass
+ scores[t, :, k, a, m] = np.array(ss)
+ # exit(0)
+ self.eval = {
+ "params": p,
+ "counts": [T, R, K, A, M],
+ "precision": precision,
+ "recall": recall,
+ "scores": scores,
+ }
+ toc = time.time()
+ print("DONE (t={:0.2f}s).".format(toc - tic))
+
+ def ComputeMetrics(
+ self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False
+ ): # pragma: no cover
+ """Compute detection metrics.
+
+ Args:
+ include_metrics_per_category: Whether include metrics per category.
+ all_metrics_per_category: Whether include all the summery metrics for
+ each category in per_category_ap. Be careful with setting it to true if
+ you have more than handful of categories, because it will pollute
+ your mldash.
+
+ Returns:
+ A tuple of (summary_metrics, per_category_ap), in which
+ (1) summary_metrics is a dictionary holding:
+ 'Precision/mAP': mean average precision over classes averaged over IOU
+ thresholds ranging from .5 to .95 with .05 increments;
+ 'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+ 'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+ 'Precision/mAP (small)': mean average precision for small objects
+ (area < 32^2 pixels);
+ 'Precision/mAP (medium)': mean average precision for medium sized
+ objects (32^2 pixels < area < 96^2 pixels);
+ 'Precision/mAP (large)': mean average precision for large objects
+ (96^2 pixels < area < 10000^2 pixels);
+ 'Recall/AR@1': average recall with 1 detection;
+ 'Recall/AR@10': average recall with 10 detections;
+ 'Recall/AR@100': average recall with 100 detections;
+ 'Recall/AR@100 (small)': average recall for small objects with 100
+ detections;
+ 'Recall/AR@100 (medium)': average recall for medium objects with 100
+ detections;
+ 'Recall/AR@100 (large)': average recall for large objects with 100
+ detections;
+ and (2) per_category_ap is a dictionary holding category specific results with
+ keys of the form: 'Precision mAP ByCategory/category'
+ (without the supercategory part if no supercategories exist).
+
+ For backward compatibility 'PerformanceByCategory' is included in the
+ output regardless of all_metrics_per_category. If evaluating class-agnostic
+ mode, per_category_ap is an empty dictionary.
+
+ Raises:
+ ValueError: If category_stats does not exist.
+ """
+ self.evaluate()
+ self.accumulate()
+ self.summarize()
+
+ summary_metrics = OrderedDict(
+ [
+ ("Precision/mAP", self.stats[0]),
+ ("Precision/mAP@.50IOU", self.stats[1]),
+ ("Precision/mAP@.75IOU", self.stats[2]),
+ ("Precision/mAP (small)", self.stats[3]),
+ ("Precision/mAP (medium)", self.stats[4]),
+ ("Precision/mAP (large)", self.stats[5]),
+ ("Recall/AR@1", self.stats[6]),
+ ("Recall/AR@10", self.stats[7]),
+ ("Recall/AR@100", self.stats[8]),
+ ("Recall/AR@100 (small)", self.stats[9]),
+ ("Recall/AR@100 (medium)", self.stats[10]),
+ ("Recall/AR@100 (large)", self.stats[11]),
+ ]
+ )
+ if not include_metrics_per_category:
+ return summary_metrics, {}
+ if not hasattr(self, "category_stats"):
+ raise ValueError("Category stats do not exist")
+ per_category_ap = OrderedDict([])
+ if self.GetAgnosticMode():
+ return summary_metrics, per_category_ap
+ for category_index, category_id in enumerate(self.GetCategoryIdList()):
+ category = self.GetCategory(category_id)["name"]
+ # Kept for backward compatilbility
+ # pylint: disable=no-member
+ per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index]
+ if all_metrics_per_category:
+ per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index]
+ per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][
+ category_index
+ ]
+ per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][
+ category_index
+ ]
+ per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][
+ category_index
+ ]
+ per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][
+ category_index
+ ]
+ per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][
+ category_index
+ ]
+ per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index]
+ per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index]
+ per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index]
+ per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][
+ category_index
+ ]
+ per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][
+ category_index
+ ]
+ per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][
+ category_index
+ ]
+
+ return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+ """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+
+ This is a utility function for converting from our internal
+ [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+ i.e., [xmin, ymin, width, height].
+
+ Args:
+ box: a numpy array in format of [ymin, xmin, ymax, xmax]
+
+ Returns:
+ A list of floats, in COCO format, representing [xmin, ymin, width, height]
+ """
+ return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])]
+
+
+def _RleCompress(masks):
+ """Compresses mask using Run-length encoding provided by pycocotools.
+
+ Args:
+ masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+ {0, 1}.
+
+ Returns:
+ A pycocotools Run-length encoding of the mask.
+ """
+ return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(
+ image_id: Union[int, str],
+ next_annotation_id: int,
+ category_id_set: Set[str],
+ groundtruth_boxes: np.array,
+ groundtruth_classes: np.array,
+ groundtruth_masks: Union[np.array, None] = None,
+ groundtruth_is_crowd: Union[np.array, None] = None,
+) -> list:
+ """Export groundtruth of a single image to COCO format.
+
+ This function converts groundtruth detection annotations represented as numpy
+ arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+ that the image_ids provided here must match the ones given to
+ ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+ correspondence - that is: groundtruth_boxes[i, :], and
+ groundtruth_classes[i] are associated with the same groundtruth annotation.
+
+ In the exported result, "area" fields are always set to the area of the
+ groundtruth bounding box.
+
+ Args:
+ image_id: a unique image identifier either of type integer or string.
+ next_annotation_id: integer specifying the first id to use for the
+ groundtruth annotations. All annotations are assigned a continuous integer
+ id starting from this value.
+ category_id_set: A set of valid class ids. Groundtruth with classes not in
+ category_id_set are dropped.
+ groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+ groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+ groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+ image_height, image_width] containing detection_masks.
+ groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+ indicating whether groundtruth boxes are crowd.
+
+ Returns:
+ A list of groundtruth annotations for a single image in the COCO format.
+
+ Raises:
+ ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+ right lengths or (2) if each of the elements inside these lists do not
+ have the correct shapes or (3) if image_ids are not integers
+ """
+ if len(groundtruth_classes.shape) != 1:
+ raise ValueError("groundtruth_classes is " "expected to be of rank 1.")
+ if len(groundtruth_boxes.shape) != 2:
+ raise ValueError("groundtruth_boxes is expected to be of " "rank 2.")
+ if groundtruth_boxes.shape[1] != 4:
+ raise ValueError("groundtruth_boxes should have " "shape[1] == 4.")
+ num_boxes = groundtruth_classes.shape[0]
+ if num_boxes != groundtruth_boxes.shape[0]:
+ raise ValueError(
+ "Corresponding entries in groundtruth_classes, "
+ "and groundtruth_boxes should have "
+ "compatible shapes (i.e., agree on the 0th dimension)."
+ "Classes shape: %d. Boxes shape: %d. Image ID: %s"
+ % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id)
+ )
+ has_is_crowd = groundtruth_is_crowd is not None
+ if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+ raise ValueError("groundtruth_is_crowd is expected to be of rank 1.")
+ groundtruth_list = []
+ for i in range(num_boxes):
+ if groundtruth_classes[i] in category_id_set:
+ iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+ export_dict = {
+ "id": next_annotation_id + i,
+ "image_id": image_id,
+ "category_id": int(groundtruth_classes[i]),
+ "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+ "area": float(
+ (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0])
+ * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])
+ ),
+ "iscrowd": iscrowd,
+ }
+ if groundtruth_masks is not None:
+ export_dict["segmentation"] = _RleCompress(groundtruth_masks[i])
+ groundtruth_list.append(export_dict)
+ return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(
+ image_id: Union[int, str],
+ category_id_set: Set[int],
+ detection_boxes: np.array,
+ detection_scores: np.array,
+ detection_classes: np.array,
+) -> list:
+ """Export detections of a single image to COCO format.
+
+ This function converts detections represented as numpy arrays to dictionaries
+ that can be ingested by the COCO evaluation API. Note that the image_ids
+ provided here must match the ones given to the
+ ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+ correspondence - that is: boxes[i, :], and classes[i]
+ are associated with the same groundtruth annotation.
+
+ Args:
+ image_id: unique image identifier either of type integer or string.
+ category_id_set: A set of valid class ids. Detections with classes not in
+ category_id_set are dropped.
+ detection_boxes: float numpy array of shape [num_detections, 4] containing
+ detection boxes.
+ detection_scores: float numpy array of shape [num_detections] containing
+ scored for the detection boxes.
+ detection_classes: integer numpy array of shape [num_detections] containing
+ the classes for detection boxes.
+
+ Returns:
+ A list of detection annotations for a single image in the COCO format.
+
+ Raises:
+ ValueError: if (1) detection_boxes, detection_scores and detection_classes
+ do not have the right lengths or (2) if each of the elements inside these
+ lists do not have the correct shapes or (3) if image_ids are not integers.
+ """
+ if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+ raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.")
+ if len(detection_boxes.shape) != 2:
+ raise ValueError("All entries in detection_boxes expected to be of " "rank 2.")
+ if detection_boxes.shape[1] != 4:
+ raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.")
+ num_boxes = detection_classes.shape[0]
+ if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+ raise ValueError(
+ "Corresponding entries in detection_classes, "
+ "detection_scores and detection_boxes should have "
+ "compatible shapes (i.e., agree on the 0th dimension). "
+ "Classes shape: %d. Boxes shape: %d. "
+ "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0])
+ )
+ detections_list = []
+ for i in range(num_boxes):
+ if detection_classes[i] in category_id_set:
+ detections_list.append(
+ {
+ "image_id": image_id,
+ "category_id": int(detection_classes[i]),
+ "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+ "score": float(detection_scores[i]),
+ }
+ )
+ return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(
+ image_id: Union[str, int],
+ category_id_set: Set[int],
+ detection_masks: np.array,
+ detection_scores: np.array,
+ detection_classes: np.array,
+) -> list:
+ """Export detection masks of a single image to COCO format.
+
+ This function converts detections represented as numpy arrays to dictionaries
+ that can be ingested by the COCO evaluation API. We assume that
+ detection_masks, detection_scores, and detection_classes are in correspondence
+ - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+ are associated with the same annotation.
+
+ Args:
+ image_id: unique image identifier either of type integer or string.
+ category_id_set: A set of valid class ids. Detections with classes not in
+ category_id_set are dropped.
+ detection_masks: uint8 numpy array of shape [num_detections, image_height,
+ image_width] containing detection_masks.
+ detection_scores: float numpy array of shape [num_detections] containing
+ scores for detection masks.
+ detection_classes: integer numpy array of shape [num_detections] containing
+ the classes for detection masks.
+
+ Returns:
+ A list of detection mask annotations for a single image in the COCO format.
+
+ Raises:
+ ValueError: if (1) detection_masks, detection_scores and detection_classes
+ do not have the right lengths or (2) if each of the elements inside these
+ lists do not have the correct shapes or (3) if image_ids are not integers.
+ """
+ if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+ raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.")
+ num_boxes = detection_classes.shape[0]
+ if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+ raise ValueError(
+ "Corresponding entries in detection_classes, "
+ "detection_scores and detection_masks should have "
+ "compatible lengths and shapes "
+ "Classes length: %d. Masks length: %d. "
+ "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0])
+ )
+ detections_list = []
+ for i in range(num_boxes):
+ if detection_classes[i] in category_id_set:
+ detections_list.append(
+ {
+ "image_id": image_id,
+ "category_id": int(detection_classes[i]),
+ "segmentation": _RleCompress(detection_masks[i]),
+ "score": float(detection_scores[i]),
+ }
+ )
+ return detections_list
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..32e55adb3fd
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py
@@ -0,0 +1,655 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import cv2
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+interpolation_map = {
+ "nearest": cv2.INTER_NEAREST,
+ "bilinear": cv2.INTER_LINEAR,
+ "bicubic": cv2.INTER_CUBIC,
+}
+
+category_map = {
+ 1: "person",
+ 2: "bicycle",
+ 3: "car",
+ 4: "motorcycle",
+ 5: "airplane",
+ 6: "bus",
+ 7: "train",
+ 8: "truck",
+ 9: "boat",
+ 10: "traffic light",
+ 11: "fire hydrant",
+ 13: "stop sign",
+ 14: "parking meter",
+ 15: "bench",
+ 16: "bird",
+ 17: "cat",
+ 18: "dog",
+ 19: "horse",
+ 20: "sheep",
+ 21: "cow",
+ 22: "elephant",
+ 23: "bear",
+ 24: "zebra",
+ 25: "giraffe",
+ 27: "backpack",
+ 28: "umbrella",
+ 31: "handbag",
+ 32: "tie",
+ 33: "suitcase",
+ 34: "frisbee",
+ 35: "skis",
+ 36: "snowboard",
+ 37: "sports ball",
+ 38: "kite",
+ 39: "baseball bat",
+ 40: "baseball glove",
+ 41: "skateboard",
+ 42: "surfboard",
+ 43: "tennis racket",
+ 44: "bottle",
+ 46: "wine glass",
+ 47: "cup",
+ 48: "fork",
+ 49: "knife",
+ 50: "spoon",
+ 51: "bowl",
+ 52: "banana",
+ 53: "apple",
+ 54: "sandwich",
+ 55: "orange",
+ 56: "broccoli",
+ 57: "carrot",
+ 58: "hot dog",
+ 59: "pizza",
+ 60: "donut",
+ 61: "cake",
+ 62: "chair",
+ 63: "couch",
+ 64: "potted plant",
+ 65: "bed",
+ 67: "dining table",
+ 70: "toilet",
+ 72: "tv",
+ 73: "laptop",
+ 74: "mouse",
+ 75: "remote",
+ 76: "keyboard",
+ 77: "cell phone",
+ 78: "microwave",
+ 79: "oven",
+ 80: "toaster",
+ 81: "sink",
+ 82: "refrigerator",
+ 84: "book",
+ 85: "clock",
+ 86: "vase",
+ 87: "scissors",
+ 88: "teddy bear",
+ 89: "hair drier",
+ 90: "toothbrush",
+}
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+
+class ResizeTFTransform(object):
+ """Resize the input image to the given size.
+
+ Args:
+ size (list or int): Size of the result
+ interpolation (str, default='bilinear'):Desired interpolation type,
+ support 'bilinear', 'nearest', 'bicubic'
+
+ Returns:
+ tuple of processed image and label
+ """
+
+ def __init__(self, size, interpolation="bilinear"):
+ """Initialize `ResizeTFTransform` class."""
+ if isinstance(size, int):
+ self.size = size, size
+ elif isinstance(size, list):
+ if len(size) == 1:
+ self.size = size[0], size[0]
+ elif len(size) == 2:
+ self.size = size[0], size[1]
+ self.interpolation = interpolation
+
+ if self.interpolation not in ["bilinear", "nearest", "bicubic"]:
+ raise ValueError("Unsupported interpolation type!")
+
+ def __call__(self, sample):
+ """Resize the input image in sample to the given size."""
+ image, label = sample
+ if isinstance(image, tf.Tensor):
+ image = tf.image.resize(image, self.size, method=self.interpolation)
+ else:
+ image = cv2.resize(image, self.size, interpolation=interpolation_map[self.interpolation])
+ return (image, label)
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class COCOmAPv2(BaseMetric):
+ """Compute mean average precision of the detection task."""
+
+ def __init__(
+ self,
+ anno_path=None,
+ iou_thrs="0.5:0.05:0.95",
+ map_points=101,
+ map_key="DetectionBoxes_Precision/mAP",
+ output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2},
+ ):
+ """Initialize the metric.
+
+ Args:
+ anno_path: The path of annotation file.
+ iou_thrs: Minimal value for intersection over union that allows to make decision
+ that prediction bounding box is true positive. You can specify one float value
+ between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
+ map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+ 11-point interpolated AP, 0 for area under PR curve.
+ map_key: The key that mapping to pycocotools COCOeval.
+ Defaults to 'DetectionBoxes_Precision/mAP'.
+ output_index_mapping: The output index mapping.
+ Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
+ """
+ self.output_index_mapping = output_index_mapping
+
+ if anno_path:
+ import os
+ import yaml
+
+ assert os.path.exists(anno_path), "Annotation path does not exists!"
+ with open(anno_path, "r") as f:
+ label_map = yaml.safe_load(f.read())
+ self.category_map_reverse = {k: v for k, v in label_map.items()}
+ else:
+ # label: index
+ self.category_map_reverse = {v: k for k, v in category_map.items()}
+ self.image_ids = []
+ self.ground_truth_list = []
+ self.detection_list = []
+ self.annotation_id = 1
+ self.category_map = category_map
+ self.category_id_set = set([cat for cat in self.category_map]) # index
+ self.iou_thrs = iou_thrs
+ self.map_points = map_points
+ self.map_key = map_key
+
+ def update(self, predicts, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ predicts: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight. Defaults to None.
+ """
+ from coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco
+
+ detections = []
+ if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1:
+ for item in zip(*predicts):
+ detection = {}
+ num = int(item[self.output_index_mapping["num_detections"]])
+ detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num]
+ detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num]
+ detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num]
+ detections.append(detection)
+ else:
+ for item in zip(*predicts):
+ detection = {}
+ detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])
+ detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])
+ detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])
+ detections.append(detection)
+
+ bboxes, str_labels, int_labels, image_ids = labels
+ labels = []
+ if len(int_labels[0]) == 0:
+ for str_label in str_labels:
+ str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label]
+ labels.append([self.category_map_reverse[x] for x in str_label])
+ elif len(str_labels[0]) == 0:
+ for int_label in int_labels:
+ labels.append([x for x in int_label])
+
+ for idx, image_id in enumerate(image_ids):
+ image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8")
+ if image_id in self.image_ids:
+ continue
+ self.image_ids.append(image_id)
+
+ ground_truth = {}
+ ground_truth["boxes"] = np.asarray(bboxes[idx])
+ ground_truth["classes"] = np.asarray(labels[idx])
+
+ self.ground_truth_list.extend(
+ ExportSingleImageGroundtruthToCoco(
+ image_id=image_id,
+ next_annotation_id=self.annotation_id,
+ category_id_set=self.category_id_set,
+ groundtruth_boxes=ground_truth["boxes"],
+ groundtruth_classes=ground_truth["classes"],
+ )
+ )
+ self.annotation_id += ground_truth["boxes"].shape[0]
+
+ self.detection_list.extend(
+ ExportSingleImageDetectionBoxesToCoco(
+ image_id=image_id,
+ category_id_set=self.category_id_set,
+ detection_boxes=detections[idx]["boxes"],
+ detection_scores=detections[idx]["scores"],
+ detection_classes=detections[idx]["classes"],
+ )
+ )
+
+ def reset(self):
+ """Reset the prediction and labels."""
+ self.image_ids = []
+ self.ground_truth_list = []
+ self.detection_list = []
+ self.annotation_id = 1
+
+ def result(self):
+ """Compute mean average precision.
+
+ Returns:
+ The mean average precision score.
+ """
+ from coco_tools import COCOEvalWrapper, COCOWrapper
+
+ if len(self.ground_truth_list) == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ else:
+ groundtruth_dict = {
+ "annotations": self.ground_truth_list,
+ "images": [{"id": image_id} for image_id in self.image_ids],
+ "categories": [{"id": k, "name": v} for k, v in self.category_map.items()],
+ }
+ coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
+ coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list)
+ box_evaluator = COCOEvalWrapper(
+ coco_wrapped_groundtruth,
+ coco_wrapped_detections,
+ agnostic_mode=False,
+ iou_thrs=self.iou_thrs,
+ map_points=self.map_points,
+ )
+ box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
+ include_metrics_per_category=False, all_metrics_per_category=False
+ )
+ box_metrics.update(box_per_category_ap)
+ box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())}
+
+ return box_metrics[self.map_key]
+
+
+class ParseDecodeCoco: # pragma: no cover
+ """Helper function for TensorflowModelZooBertDataset.
+
+ Parse the features from sample.
+ """
+
+ def __call__(self, sample):
+ """Parse the sample data.
+
+ Args:
+ sample: Data to be parsed.
+ """
+ # Dense features in Example proto.
+ feature_map = {
+ "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""),
+ "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string),
+ "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64),
+ "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""),
+ }
+ sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32)
+ # Sparse features in Example proto.
+ feature_map.update(
+ {
+ k: sparse_float32
+ for k in [
+ "image/object/bbox/xmin",
+ "image/object/bbox/ymin",
+ "image/object/bbox/xmax",
+ "image/object/bbox/ymax",
+ ]
+ }
+ )
+
+ features = tf.io.parse_single_example(sample, feature_map)
+
+ xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0)
+ ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0)
+ xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0)
+ ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0)
+
+ bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
+ # Force the variable number of bounding boxes into the shape
+ # [1, num_boxes, coords].
+ bbox = tf.expand_dims(bbox, 0)
+ bbox = tf.transpose(bbox, [0, 2, 1])
+
+ encoded_image = features["image/encoded"]
+ image_tensor = tf.image.decode_image(encoded_image, channels=3)
+ image_tensor.set_shape([None, None, 3])
+
+ str_label = features["image/object/class/text"].values
+ int_label = features["image/object/class/label"].values
+ image_id = features["image/source_id"]
+
+ return image_tensor, (bbox[0], str_label, int_label, image_id)
+
+
+class COCORecordDataset(object):
+ """Tensorflow COCO dataset in tf record format.
+
+ Root is a full path to tfrecord file, which contains the file name.
+ Please use Resize transform when batch_size > 1
+
+ Args: root (str): Root directory of dataset.
+ num_cores (int, default=28):The number of input Datasets to interleave from in parallel.
+ transform (transform object, default=None): transform to process input data.
+ filter (Filter objects, default=None): filter out examples according
+ to specific conditions.
+ """
+
+ def __new__(cls, root, num_cores=28, transform=None, filter=filter):
+ """Build a new object."""
+ record_iterator = tf.compat.v1.python_io.tf_record_iterator(root)
+ example = tf.train.SequenceExample()
+ for element in record_iterator:
+ example.ParseFromString(element)
+ break
+ feature = example.context.feature
+ if (
+ len(feature["image/object/class/text"].bytes_list.value) == 0
+ and len(feature["image/object/class/label"].int64_list.value) == 0
+ ):
+ raise ValueError(
+ "Tfrecord format is incorrect, please refer\
+ 'https://github.com/tensorflow/models/blob/master/research/\
+ object_detection/dataset_tools/create_coco_tf_record.py' to\
+ create correct tfrecord"
+ )
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.data.experimental import parallel_interleave
+
+ tfrecord_paths = [root]
+ ds = tf.data.TFRecordDataset.list_files(tfrecord_paths)
+ ds = ds.apply(
+ parallel_interleave(
+ tf.data.TFRecordDataset,
+ cycle_length=num_cores,
+ block_length=5,
+ sloppy=True,
+ buffer_output_elements=10000,
+ prefetch_input_elements=10000,
+ )
+ )
+ if transform is not None:
+ transform.transform_list.insert(0, ParseDecodeCoco())
+ else:
+ transform = ParseDecodeCoco()
+ ds = ds.map(transform, num_parallel_calls=None)
+ if filter is not None:
+ ds = ds.filter(filter)
+ ds = ds.prefetch(buffer_size=1000)
+ return ds
+
+
+class TFDataLoader(object):
+ """Tensorflow dataloader class.
+
+ In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
+ method to do session run, this dataloader is designed to satisfy the usage of feed dict
+ in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
+
+ Args:
+ dataset: obj. wrapper of needed data.
+ batch_size: int. batch size
+ """
+
+ def __init__(self, dataset, batch_size=1, last_batch="rollover"):
+ """Initialize `TFDataDataLoader` class."""
+ self.dataset = dataset
+ self.last_batch = last_batch
+ self.batch_size = batch_size
+ dataset = dataset.batch(batch_size)
+
+ def batch(self, batch_size, last_batch="rollover"):
+ """Dataset return data per batch."""
+ drop_last = False if last_batch == "rollover" else True
+ self.batch_size = batch_size
+ self.dataset = self.dataset.batch(batch_size, drop_last)
+
+ def __iter__(self):
+ """Iterate dataloader."""
+ return self._generate_dataloader(
+ self.dataset,
+ batch_size=self.batch_size,
+ last_batch=self.last_batch,
+ )
+
+ def _generate_dataloader(
+ self,
+ dataset,
+ batch_size=1,
+ last_batch="rollover",
+ collate_fn=None,
+ sampler=None,
+ batch_sampler=None,
+ num_workers=None,
+ pin_memory=None,
+ distributed=False,
+ ):
+ """Yield data."""
+ drop_last = False if last_batch == "rollover" else True
+
+ def check_dynamic_shape(element_spec):
+ if isinstance(element_spec, collections.abc.Sequence):
+ return any([check_dynamic_shape(ele) for ele in element_spec])
+ elif isinstance(element_spec, tf.TensorSpec):
+ return True if element_spec.shape.num_elements() is None else False
+ else:
+ raise ValueError("unrecognized element spec...")
+
+ def squeeze_output(output):
+ if isinstance(output, collections.abc.Sequence):
+ return [squeeze_output(ele) for ele in output]
+ elif isinstance(output, np.ndarray):
+ return np.squeeze(output, axis=0)
+ else:
+ raise ValueError("not supported output format....")
+
+ if tf.executing_eagerly():
+ index = 0
+ outputs = []
+ for iter_tensors in dataset:
+ samples = []
+ iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1]
+ if isinstance(iter_inputs, tf.Tensor):
+ samples.append(iter_inputs.numpy())
+ else:
+ samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs))
+ if isinstance(iter_labels, tf.Tensor):
+ samples.append(iter_labels.numpy())
+ else:
+ samples.append([np.array(l) for l in iter_labels])
+ index += 1
+ outputs.append(samples)
+ if index == batch_size:
+ outputs = default_collate(outputs)
+ yield outputs
+ outputs = []
+ index = 0
+ if len(outputs) > 0:
+ outputs = default_collate(outputs)
+ yield outputs
+ else:
+ try_single_batch = check_dynamic_shape(dataset.element_spec)
+ dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last)
+ ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+ iter_tensors = ds_iterator.get_next()
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.use_per_session_threads = 1
+ data_config.intra_op_parallelism_threads = 1
+ data_config.inter_op_parallelism_threads = 16
+ data_sess = tf.compat.v1.Session(config=data_config)
+ # pylint: disable=no-name-in-module
+ from tensorflow.python.framework.errors_impl import OutOfRangeError
+
+ while True:
+ if not try_single_batch:
+ try:
+ outputs = data_sess.run(iter_tensors)
+ yield outputs
+ except OutOfRangeError:
+ data_sess.close()
+ return
+ else:
+ try:
+ outputs = []
+ for i in range(0, batch_size):
+ outputs.append(squeeze_output(data_sess.run(iter_tensors)))
+ outputs = default_collate(outputs)
+ yield outputs
+ except OutOfRangeError:
+ if len(outputs) == 0:
+ data_sess.close()
+ return
+ else:
+ outputs = default_collate(outputs)
+ yield outputs
+ data_sess.close()
+ return
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py
new file mode 100644
index 00000000000..dbced65f2d7
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py
@@ -0,0 +1,129 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+from __future__ import division
+
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from argparse import ArgumentParser
+from data_process import(
+ COCOmAPv2,
+ COCORecordDataset,
+ ComposeTransform,
+ ResizeTFTransform,
+ TFDataLoader,
+)
+
+arg_parser = ArgumentParser(description='Parse args')
+
+arg_parser.add_argument('-g',
+ "--input-graph",
+ help='Specify the input graph.',
+ dest='input_graph')
+arg_parser.add_argument('--config', type=str, default='')
+arg_parser.add_argument('--dataset_location', type=str, default='')
+arg_parser.add_argument('--output_model', type=str, default='')
+arg_parser.add_argument('--mode', type=str, default='performance')
+arg_parser.add_argument('--batch_size', type=int, default=10)
+arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='iterations')
+arg_parser.add_argument('--tune', action='store_true', default=False)
+arg_parser.add_argument('--benchmark', dest='benchmark',
+ action='store_true', help='run benchmark')
+args = arg_parser.parse_args()
+
+def evaluate(model):
+ """Custom evaluate function to estimate the accuracy of the model.
+
+ Args:
+ model (tf.Graph or string or INC.model.TensorflowCheckpointModel): The input model.
+
+ Returns:
+ accuracy (float): evaluation result, the larger is better.
+ """
+ from neural_compressor.tensorflow import Model
+ if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph):
+ model = Model(model)
+ model.input_tensor_names = ["image_tensor:0"]
+ model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \
+ "detection_scores:0", "detection_classes:0"]
+ input_tensor = model.input_tensor
+ output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+ model.output_tensor[0]
+ warmup = 5
+ iteration = -1
+ if args.benchmark and args.mode == 'performance':
+ iteration = args.iters
+ metric = COCOmAPv2(output_index_mapping={'num_detections':0, 'boxes':1, 'scores':2, 'classes':3})
+
+ def eval_func(dataloader):
+ latency_list = []
+ for idx, (inputs, labels) in enumerate(dataloader):
+ # dataloader should keep the order and len of inputs same with input_tensor
+ inputs = np.array([inputs])
+ feed_dict = dict(zip(input_tensor, inputs))
+
+ start = time.time()
+ predictions = model.sess.run(output_tensor, feed_dict)
+ end = time.time()
+
+ metric.update(predictions, labels)
+ latency_list.append(end-start)
+ if idx + 1 == iteration:
+ break
+ latency = np.array(latency_list[warmup:]).mean() / args.batch_size
+ return latency
+
+ eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \
+ transform=ComposeTransform(transform_list=[ResizeTFTransform(size=300)]))
+ eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=args.batch_size)
+ latency = eval_func(eval_dataloader)
+ if args.benchmark and args.mode == 'performance':
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency * 1000))
+ print("Throughput: {:.3f} images/sec".format(1. / latency))
+ acc = metric.result()
+ return acc
+
+def main(_):
+ calib_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \
+ transform=ComposeTransform(transform_list=[ResizeTFTransform(size=300)]))
+ calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=args.batch_size)
+
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+ quant_config = StaticQuantConfig(weight_granularity="per_channel")
+ model = Model(args.input_graph)
+ model.input_tensor_names = ['image_tensor']
+ model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"]
+ q_model = quantize_model(model, quant_config, calib_dataloader)
+ q_model.save(args.output_model)
+
+ if args.benchmark:
+ if args.mode == 'performance':
+ evaluate(args.input_graph)
+ else:
+ accuracy = evaluate(args.input_graph)
+ print('Batch size = %d' % args.batch_size)
+ print("Accuracy: %.5f" % accuracy)
+
+if __name__ == "__main__":
+ tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py
new file mode 100644
index 00000000000..51882cf0bfe
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py
@@ -0,0 +1,99 @@
+import os
+import argparse
+import enum
+import tarfile
+import abc
+
+
+class SupportedModels(enum.Enum):
+ """
+ Enumeration containing supported models
+ """
+ ssd_resnet50_v1 = 'ssd_resnet50_v1'
+ ssd_mobilnet_v1 = 'ssd_mobilenet_v1'
+
+
+class Model(abc.ABC):
+ """
+ Base model class used to obtain the model (and perform any necessary operations to make it usable)
+ """
+
+ @abc.abstractmethod
+ def get_pretrained_model(self, destination):
+ """
+ Base method for obtaining a ready to use model
+ Args:
+ destination: path to where the file should be stored
+ """
+ pass
+
+
+class SsdMobilenetV1(Model):
+ """ Concrete implementation of the Model base class for ssd_mobilenet_v1"""
+
+ def get_pretrained_model(self, destination):
+ """
+ Obtains a ready to use ssd_mobilenet_v1 model file.
+ Args:
+ destination: path to where the file should be stored
+ """
+ url = 'http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz'
+ os.system("curl -o ssd_mobilenet_v1_coco_2018_01_28.tar.gz {0}".format(url))
+ with tarfile.open("ssd_mobilenet_v1_coco_2018_01_28.tar.gz") as tar:
+ if not os.path.exists(destination):
+ os.makedirs(destination)
+ tar.extractall(destination)
+
+
+class SsdResnet50(Model):
+ """ Concrete implementation of the Model base class for ssd_resnet_50"""
+
+ def get_pretrained_model(self, destination):
+ """
+ Obtains a ready to use ssd_resnet_50 model file.
+ Args:
+ destination: path to where the file should be stored
+ """
+ url = "http://download.tensorflow.org/models/object_detection/" \
+ "ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz"
+ os.system("curl -o ssd_resnet50_v1.tar.gz {0}".format(url))
+ with tarfile.open("ssd_resnet50_v1.tar.gz") as tar:
+ if not os.path.exists(destination):
+ os.makedirs(destination)
+ tar.extractall(destination)
+
+
+def get_model(model: SupportedModels) -> Model:
+ """
+ Factory method that returns the requested model object
+ Args:
+ model: model from SupportedModels enumeration
+
+ Returns: Concrete object inheriting the Model base class
+
+ """
+ if model == SupportedModels.ssd_resnet50_v1:
+ return SsdResnet50()
+ if model == SupportedModels.ssd_mobilnet_v1:
+ return SsdMobilenetV1()
+ else:
+ raise AttributeError("The model {0} is not supported. Supported models: {1}"
+ .format(model_name, SupportedModels.__members__.keys()))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description='Prepare pre-trained model for COCO object detection')
+ parser.add_argument('--model_name', type=str, default='ssd_resnet50_v1',
+ help='model to download, default is ssd_resnet50_v1',
+ choices=["ssd_resnet50_v1", "ssd_mobilenet_v1"])
+ parser.add_argument('--model_path', type=str, default='./model', help='directory to put models, default is ./model')
+
+ args = parser.parse_args()
+ model_name = args.model_name
+ model_path = args.model_path
+ try:
+ model = get_model(SupportedModels(model_name))
+ model.get_pretrained_model(model_path)
+ except AttributeError:
+ print("The model {0} is not supported. Supported models: {1}"
+ .format(model_name, SupportedModels.__members__.keys()))
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..8ee728de373
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=32
+ iters=100
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+
+# run_tuning
+function run_benchmark {
+
+ python main.py \
+ --input-graph ${input_model} \
+ --mode ${mode} \
+ --dataset_location "${dataset_location}" \
+ --batch_size ${batch_size} \
+ --benchmark \
+ --iters ${iters}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..559d695f768
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --input_model=*)
+ input_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo "$var" |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input-graph "${input_model}" \
+ --output_model "${output_model}" \
+ --dataset_location "${dataset_location}" \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md
new file mode 100644
index 00000000000..845e383cd59
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md
@@ -0,0 +1,96 @@
+This document describes the step-by-step to reproduce Yolo-v5 tuning result with Neural Compressor. This example can run on Intel CPUs and GPUs.
+
+# Prerequisite
+
+
+## 1. Environment
+Recommend python 3.10 or higher version.
+
+### Install Intel® Neural Compressor
+```shell
+pip install neural-compressor
+```
+
+### Install Tensorflow
+```shell
+pip install tensorflow
+```
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### Installation Dependency packages
+```shell
+cd examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq
+pip install -r requirements.txt
+```
+
+### Install Intel Extension for Tensorflow
+
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare model
+
+Users can choose to automatically or manually download the model.
+### Automatic download
+
+Run the `prepare_model.sh` script.
+```shell
+. prepare_model.sh
+```
+
+This script will load yolov5 model to `./yolov5/yolov5s.pb`.
+
+### Manual download
+
+To get a TensorFlow pretrained model, you need to export it from a PyTorch model. Clone the [Ultralytics yolov5 repository](https://github.com/ultralytics/yolov5.git).
+Generate the pretrained PyTorch model and then export to a Tensorflow supported format with the following commands:
+```shell
+python yolov5/models/tf.py --weights yolov5/yolov5s.pt
+python yolov5/export.py --weights yolov5/yolov5s.pt --include pb
+```
+
+The yolov5 model will be loaded to `./yolov5/yolov5s.pb`.
+
+## 3. Prepare Dataset
+
+Users can choose to automatically or manually download the dataset.
+### Automatic download
+
+Run the `prepare_dataset.sh` script.
+```shell
+. prepare_dataset.sh
+```
+The validation set of coco2017 will be downloaded into a `./coco` folder.
+
+# Run
+
+## 1. Quantization
+```python
+bash run_quant.sh --input_model=./yolov5/yolov5s.pb --output_model=yolov5s_int8.pb --dataset_location=/path/to/dataset
+```
+
+## 2. Benchmark
+```python
+# run performance benchmark
+bash run_benchmark.sh --input_model=yolov5s_int8.pb --dataset_location=/path/to/dataset --mode=performance
+
+# run accuracy benchmark
+bash run_benchmark.sh --input_model=yolov5s_int8.pb --dataset_location=/path/to/dataset --mode=accuracy
+```
+
+Finally, the program will generate the quantized Yolo-v5 model with relative 1% loss.
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py
new file mode 100644
index 00000000000..50d750344bc
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py
@@ -0,0 +1,304 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+import argparse
+import os
+import sys
+import torch
+import numpy as np
+
+from pathlib import Path
+from tqdm import tqdm
+
+
+from yolov5.models.common import DetectMultiBackend
+from yolov5.utils.callbacks import Callbacks
+from yolov5.utils.dataloaders import create_dataloader
+from yolov5.utils.general import (
+ LOGGER,
+ TQDM_BAR_FORMAT,
+ Profile,
+ check_dataset,
+ check_img_size,
+ check_requirements,
+ check_yaml,
+ coco80_to_coco91_class,
+ colorstr,
+ increment_path,
+ non_max_suppression,
+ print_args,
+ scale_boxes,
+ xywh2xyxy,
+)
+from yolov5.utils.metrics import ap_per_class, box_iou
+from yolov5.utils.plots import output_to_target, plot_images, plot_val_study
+from yolov5.utils.torch_utils import select_device, smart_inference_mode
+
+from neural_compressor.tensorflow.utils import BaseModel, CpuInfo
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--dataset_location', type=str, default='/datasets/mnist', help='dataset path')
+parser.add_argument('--input_model', type=str, default='yolov5s.pb', help='input model path(s)')
+parser.add_argument('--output_model', type=str, default='yolov5s_int8.pb', help='output model path(s)')
+parser.add_argument('--batch_size', type=int, default=128, help='batch size')
+parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=224, help='inference size (pixels)')
+parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
+parser.add_argument('--verbose', nargs='?', const=True, default=False, help='verbose output')
+parser.add_argument('--project', default='evaluate/val-cls', help='save to project/name')
+parser.add_argument('--name', default='exp', help='save to project/name')
+parser.add_argument('--tune', action="store_true", help='whether to apply quantization')
+parser.add_argument('--benchmark', action="store_true", help='whether to run benchmark')
+parser.add_argument('--mode', type=str, default='performance', help='run performance or accuracy benchmark')
+parser.add_argument('--iteration', type=int, default=100, help='iteration for calibration or evaluation')
+args = parser.parse_args()
+
+def process_batch(detections, labels, iouv):
+ """
+ Return correct prediction matrix.
+
+ Arguments:
+ detections (array[N, 6]), x1, y1, x2, y2, conf, class
+ labels (array[M, 5]), class, x1, y1, x2, y2
+ Returns:
+ correct (array[N, 10]), for 10 IoU levels
+ """
+ correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
+ iou = box_iou(labels[:, 1:], detections[:, :4])
+ correct_class = labels[:, 0:1] == detections[:, 5]
+ for i in range(len(iouv)):
+ x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match
+ if x[0].shape[0]:
+ matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou]
+ if x[0].shape[0] > 1:
+ matches = matches[matches[:, 2].argsort()[::-1]]
+ matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+ # matches = matches[matches[:, 2].argsort()[::-1]]
+ matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+ correct[matches[:, 1].astype(int), i] = True
+ return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
+
+@smart_inference_mode()
+def evaluate(
+ model, # model.pt path(s)
+ source=args.dataset_location,
+ imgsz=640, # inference size (pixels)
+ conf_thres=0.001, # confidence threshold
+ iou_thres=0.6, # NMS IoU threshold
+ max_det=300, # maximum detections per image
+ task="val", # train, val, test, speed or study
+ device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu
+ workers=8, # max dataloader workers (per RANK in DDP mode)
+ single_cls=False, # treat as single-class dataset
+ verbose=False, # verbose output
+ project=args.project, # save to project/name
+ name="exp", # save to project/name
+ exist_ok=False, # existing project/name ok, do not increment
+ save_dir=Path(""),
+ callbacks=Callbacks(),
+ compute_loss=None,
+):
+ if isinstance(model, BaseModel):
+ model.save("./yolov5s_eval.pb")
+ model = "./yolov5s_eval.pb"
+ device = select_device(device)
+
+ save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
+ (save_dir / "labels").mkdir(parents=True, exist_ok=True) # make dir
+
+ # Load model
+ model = DetectMultiBackend(model, device=device)
+ stride, pt = model.stride, model.pt
+ imgsz = check_img_size(imgsz, s=stride) # check image size
+ device = model.device
+ batch_size = 1 # export.py models default to batch-size 1
+ LOGGER.info(f"Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
+
+ # Data
+ #data = check_dataset(yaml_path) # check
+
+ # Configure
+ model.eval()
+ nc = 1 if single_cls else 80 # number of classes
+ iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95
+ niou = iouv.numel()
+
+ # Dataloader
+ model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup
+ pad, rect = (0.5, pt) # square inference for benchmarks
+
+ dataloader = create_dataloader(
+ source,
+ imgsz,
+ batch_size,
+ stride,
+ single_cls,
+ pad=pad,
+ rect=rect,
+ workers=workers,
+ prefix=colorstr(f"{task}: "),
+ )[0]
+
+ seen = 0
+ names = model.names if hasattr(model, "names") else model.module.names # get class names
+ if isinstance(names, (list, tuple)): # old format
+ names = dict(enumerate(names))
+ s = ("%22s" + "%11s" * 6) % ("Class", "Images", "Instances", "P", "R", "mAP50", "mAP50-95")
+ p, r, mp, mr, map50, ap50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+ dt = Profile(), Profile(), Profile() # profiling times
+ loss = torch.zeros(3, device=device)
+ stats, ap, ap_class = [], [], []
+ callbacks.run("on_val_start")
+ pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT) # progress bar
+ iters = -1 if args.mode == "accuracy" else args.iteration
+ for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
+ if batch_i == iters:
+ break
+
+ callbacks.run("on_val_batch_start")
+ with dt[0]:
+ im = im.float() # uint8 to fp16/32
+ im /= 255 # 0 - 255 to 0.0 - 1.0
+ _, _, height, width = im.shape # batch size, channels, height, width
+
+ # Inference
+ with dt[1]:
+ preds, train_out = model(im) if compute_loss else (model(im, augment=False), None)
+
+ # Batch size 1 inference drops the batch dim
+ if isinstance(preds, list):
+ preds = preds[0]
+
+ if preds.dim() == 2:
+ preds=preds.unsqueeze(0)
+
+ # Loss
+ if compute_loss:
+ loss += compute_loss(train_out, targets)[1] # box, obj, cls
+
+ # NMS
+ targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels
+ lb = [] # for autolabelling
+ with dt[2]:
+ preds = non_max_suppression(
+ preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det
+ )
+
+ if args.benchmark:
+ # Metrics
+ for si, pred in enumerate(preds):
+ labels = targets[targets[:, 0] == si, 1:]
+ nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions
+ path, shape = Path(paths[si]), shapes[si][0]
+ correct = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init
+ seen += 1
+
+ if npr == 0:
+ if nl:
+ stats.append((correct, *torch.zeros((2, 0), device=device), labels[:, 0]))
+ continue
+
+ # Predictions
+ if single_cls:
+ pred[:, 5] = 0
+ predn = pred.clone()
+ scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred
+
+ # Evaluate
+ if nl:
+ tbox = xywh2xyxy(labels[:, 1:5]) # target boxes
+ scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels
+ labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels
+ correct = process_batch(predn, labelsn, iouv)
+ stats.append((correct, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls)
+
+ callbacks.run("on_val_image_end", pred, predn, path, names, im[si])
+
+
+ callbacks.run("on_val_batch_end", batch_i, im, targets, paths, shapes, preds)
+
+ if args.tune:
+ return 1
+
+ # Compute metrics
+ stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy
+ if len(stats) and stats[0].any():
+ _, _, p, r, _, ap, ap_class = ap_per_class(*stats, plot=False, save_dir=save_dir, names=names)
+ ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95
+ mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
+ nt = np.bincount(stats[3].astype(int), minlength=nc) # number of targets per class
+ if nt.sum() == 0:
+ LOGGER.warning(f"WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels")
+
+ pf = "%22s" + "%11i" * 2 + "%11.4g" * 4 # print format
+
+ # Print results per class
+ if (verbose or (nc < 50)) and nc > 1 and len(stats):
+ for i, c in enumerate(ap_class):
+ LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
+
+ # Print speeds
+ t = tuple(x.t / seen * 1e3 for x in dt) # images per second
+ latency = t[2]
+ if args.benchmark and args.mode == "performance":
+ print("Batch size = {}".format(args.batch_size))
+ print("Latency: {:.3f} ms".format(latency))
+ print("Throughput: {:.3f} images/sec".format(1000/latency))
+
+ # Return results
+ model.float() # for training
+ maps = np.zeros(nc) + map
+ for i, c in enumerate(ap_class):
+ maps[c] = ap[i]
+ return map50
+
+
+def main():
+ if args.tune:
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+
+ excluded_conv_names = [
+ "functional_16_1/tf_conv_1/sequential_1/conv2d_1/convolution",
+ "functional_16_1/tf_conv_1_2/sequential_1_1/conv2d_1_1/convolution",
+ "functional_16_1/tfc3_1/tf_conv_2_1/conv2d_2_1/convolution",
+ "functional_16_1/tfc3_1/sequential_2_1/tf_bottleneck_1/tf_conv_5_1/conv2d_5_1/convolution",
+ "functional_16_1/tfc3_1/tf_conv_3_1/conv2d_3_1/convolution",
+ "functional_16_1/tfc3_1/tf_conv_4_1/conv2d_4_1/convolution"
+ ]
+ quant_config = StaticQuantConfig(weight_granularity="per_channel")
+ local_dtype = "bf16" if CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1" else "fp32"
+ local_config = StaticQuantConfig(weight_dtype=local_dtype, act_dtype=local_dtype)
+ for conv_name in excluded_conv_names:
+ quant_config.set_local(conv_name, local_config)
+
+ q_model = quantize_model(args.input_model, quant_config, calib_func=evaluate)
+ q_model.save(args.output_model)
+
+ if args.benchmark:
+ if args.mode == 'performance':
+ evaluate(args.input_model)
+ elif args.mode == 'accuracy':
+ map50 = evaluate(args.input_model)
+ print("Batch size = %d" % args.batch_size)
+ LOGGER.info("Accuracy: %.4g" % map50)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh
new file mode 100644
index 00000000000..acdf8a2b5e8
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# set -x
+
+wget https://github.com/ultralytics/assets/releases/download/v0.0.0/coco2017labels.zip
+unzip -o coco2017labels.zip
+rm coco2017labels.zip
+
+cd coco
+mkdir images
+cd images
+wget http://images.cocodataset.org/zips/val2017.zip
+unzip -o val2017.zip
+rm val2017.zip
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh
new file mode 100644
index 00000000000..3446739939d
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh
@@ -0,0 +1,3 @@
+INSTALLATION_PATH=$(python3 -c "import sys; import yolov5; p=sys.modules['yolov5'].__file__; print(p.replace('/__init__.py', ''))")
+python $INSTALLATION_PATH/models/tf.py --weights yolov5/yolov5s.pt
+python $INSTALLATION_PATH/export.py --weights yolov5/yolov5s.pt --include pb
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..2c40b972bcd
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt
@@ -0,0 +1 @@
+yolov5
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..df8009e115b
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ batch_size=128
+ iters=100
+ for var in "$@"
+ do
+ case $var in
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+
+# run_tuning
+function run_benchmark {
+ if [[ ${mode} == "accuracy" ]]; then
+ python main.py \
+ --input_model ${input_model} \
+ --dataset_location ${dataset_location} \
+ --mode ${mode} \
+ --batch_size ${batch_size} \
+ --benchmark
+ elif [[ ${mode} == "performance" ]]; then
+ incbench --num_c 4 main.py \
+ --input_model ${input_model} \
+ --dataset_location ${dataset_location} \
+ --mode ${mode} \
+ --batch_size ${batch_size} \
+ --iteration ${iters} \
+ --benchmark
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..8d1f6807138
--- /dev/null
+++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+
+# run tuning
+function run_benchmark {
+ python main.py \
+ --input_model ${input_model} \
+ --output_model ${output_model} \
+ --dataset_location ${dataset_location} \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md
new file mode 100644
index 00000000000..7bff08a2f84
--- /dev/null
+++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md
@@ -0,0 +1,98 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow Wide & Deep tuning zoo result.
+This example can run on Intel CPUs and GPUs.
+
+# Prerequisite
+
+## 1. Environment
+
+### Installation
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+### Install Intel Tensorflow
+```shell
+pip install intel-tensorflow
+```
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### Install Intel Extension for Tensorflow
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation.
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers).
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+### Install Additional Dependency packages
+```shell
+cd examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq
+pip install -r requirements.txt
+```
+
+### 2. Download Frozen PB
+```shell
+wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/wide_deep_fp32_pretrained_model.pb
+```
+
+### 3. Prepare Dataset
+Download training dataset: (8 million samples)
+```bash
+$ wget https://storage.googleapis.com/dataset-uploader/criteo-kaggle/large_version/train.csv
+```
+Download evaluation dataset (2 million samples)
+```bash
+$ wget https://storage.googleapis.com/dataset-uploader/criteo-kaggle/large_version/eval.csv
+```
+
+### 4. Process Dataset
+Process calib dataset
+```bash
+python preprocess_csv_tfrecords.py \
+ --inputcsv-datafile train.csv \
+ --calibrationcsv-datafile eval.csv \
+ --outputfile-name processed_data
+```
+Process eval dataset
+```bash
+python preprocess_csv_tfrecords.py \
+ --inputcsv-datafile eval.csv \
+ --calibrationcsv-datafile train.csv \
+ --outputfile-name processed_data
+```
+Two .tfrecords files are generated and will be used later on:
+1) train_processed_data.tfrecords
+2) eval_processed_data.tfrecords
+
+
+# Run Command
+
+## Quantization
+ ```shell
+ bash run_quant.sh --dataset_location=/path/to/datasets --input_model=/path/to/wide_deep_fp32_pretrained_model.pb --output_model=./wnd_int8_opt.pb
+ ```
+
+## Benchmark
+ ```
+ bash run_benchmark.sh --dataset_location=/path/to/datasets --input_model=./wnd_int8_opt.pb --mode=accuracy --batch_size=500
+ bash run_benchmark.sh --dataset_location=/path/to/datasets --input_model=./wnd_int8_opt.pb --mode=performance --batch_size=500
+ ```
+
+# Other
+This example takes the reference from https://github.com/IntelAI/models/tree/master/benchmarks/recommendation/tensorflow/wide_deep_large_ds.
+The pretrained model was trained with preprocessed data from dataset Criteo.
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py
new file mode 100644
index 00000000000..a89efd25537
--- /dev/null
+++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py
@@ -0,0 +1,347 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+import os
+import numpy as np
+import argparse
+import collections
+import time
+import math
+import json
+import datetime
+
+import tensorflow as tf
+
+from tensorflow.python.framework import ops
+from tensorflow.core.framework import graph_pb2
+from google.protobuf import text_format
+from argparse import ArgumentParser
+from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference
+from tensorflow.compat.v1 import graph_util
+
+
+def load_graph(model_file):
+ """This is a function to load TF graph from pb file
+
+ Args:
+ model_file (string): TF pb file local path
+
+ Returns:
+ graph: TF graph object
+ """
+ graph = tf.Graph()
+ #graph_def = tf.compat.v1.GraphDef()
+ graph_def = graph_pb2.GraphDef()
+
+ file_ext = os.path.splitext(model_file)[1]
+
+ with open(model_file, "rb") as f:
+ if file_ext == '.pbtxt':
+ text_format.Merge(f.read(), graph_def)
+ else:
+ graph_def.ParseFromString(f.read())
+
+ with graph.as_default():
+ tf.import_graph_def(graph_def, name='')
+
+ return graph
+
+
+numeric_feature_names = ["numeric_1"]
+string_feature_names = ["string_1"]
+
+def get_feature_name(compute_accuracy):
+
+ if compute_accuracy:
+ full_features_names = numeric_feature_names + string_feature_names + ["label"]
+ feature_datatypes = [tf.io.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature(
+ [], tf.int64, default_value=0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature([], tf.int64, default_value=0, allow_missing=True)]
+ else:
+ full_features_names = numeric_feature_names + string_feature_names
+ feature_datatypes = [tf.io.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature(
+ [], tf.int64, default_value=0, allow_missing=True)]
+ return full_features_names, feature_datatypes
+
+def input_fn(data_file, num_epochs, shuffle, batch_size, compute_accuracy=True):
+ """Generate an input function for the Estimator."""
+ full_features_names, feature_datatypes = get_feature_name(compute_accuracy)
+ def _parse_function(proto):
+ f = collections.OrderedDict(
+ zip(full_features_names, feature_datatypes))
+ parsed_features = tf.io.parse_example(proto, f)
+ parsed_feature_vals_num = [tf.reshape(
+ parsed_features["numeric_1"], shape=[-1, 13])]
+ parsed_feature_vals_str = [tf.reshape(
+ parsed_features["string_1"], shape=[-1, 2]) for i in string_feature_names]
+ parsed_feature_vals = parsed_feature_vals_num + parsed_feature_vals_str
+ if compute_accuracy:
+ parsed_feature_vals_label = [tf.reshape(parsed_features[i], shape=[-1]) for i in ["label"]]
+ parsed_feature_vals = parsed_feature_vals + parsed_feature_vals_label
+ return parsed_feature_vals
+
+ # Extract lines from input files using the Dataset API.
+ dataset = tf.data.TFRecordDataset([data_file])
+ if shuffle:
+ dataset = dataset.shuffle(buffer_size=20000)
+ dataset = dataset.batch(batch_size)
+ dataset = dataset.map(_parse_function, num_parallel_calls=28)
+ dataset = dataset.prefetch(batch_size*10)
+ return dataset
+
+def evaluation_func(model, measurer=None):
+ return evaluate_opt_graph.eval_inference(model)
+
+class eval_classifier_optimized_graph:
+ """Evaluate image classifier with optimized TensorFlow graph"""
+
+ def __init__(self):
+ arg_parser = ArgumentParser(description='Parse args')
+ arg_parser.add_argument('-i', '--input_graph', type=str,
+ help='Specify the input of the model',
+ dest='input_graph',
+ required=True)
+ arg_parser.add_argument('-o', '--output_graph', type=str,
+ help='Specify the output of the model',
+ dest='output_graph')
+ arg_parser.add_argument('--calibration_data_location', type=str,
+ help='full path of calibration data file',
+ dest='calib_data')
+ arg_parser.add_argument('--evaluation_data_location', type=str,
+ help='full path of validation data file',
+ dest='eval_data',
+ required=True)
+ arg_parser.add_argument('--batch_size', type=int,
+ help='batch size for inference.Default is 512',
+ default=512,
+ dest='batch_size')
+ arg_parser.add_argument('--num_intra_threads', type=int,
+ help='number of threads for an operator',
+ required=False,
+ default=0,
+ dest='num_intra_threads')
+ arg_parser.add_argument('--num_inter_threads', type=int,
+ help='number of threads across operators',
+ required=False,
+ default=0,
+ dest='num_inter_threads')
+ arg_parser.add_argument('--kmp_blocktime', type=str,
+ help='KMP_BLOCKTIME value',
+ required=False,
+ default=None,
+ dest='kmp_blocktime')
+ arg_parser.add_argument('-r', "--accuracy",
+ help='For accuracy measurement only.',
+ dest='accuracy', action='store_true')
+ arg_parser.add_argument("--config", default=None,
+ help="tuning config")
+ arg_parser.add_argument('--performance',
+ dest='performance',
+ action='store_true',
+ help='run performance')
+ arg_parser.add_argument('--tune',
+ dest='tune',
+ action='store_true',
+ help='use neural_compressor to tune.')
+ arg_parser.add_argument("--warmup-steps",
+ type=int, default=50,
+ help="number of warmup steps")
+ arg_parser.add_argument("--steps",
+ type=int, default=2000,
+ help="number of iterations")
+
+ arg_parser.add_argument('--env',
+ dest='env',
+ help='specific Tensorflow env',
+ default='mkl')
+
+
+ self.args = arg_parser.parse_args()
+
+ def auto_tune(self):
+ """This is neural_compressor tuning part to generate a quantized pb
+ Returns:
+ graph: it will return a quantized pb
+ """
+ from neural_compressor.common import set_random_seed
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+ set_random_seed(9527)
+ infer_graph = load_graph(self.args.input_graph)
+ model = Model(infer_graph)
+ model.input_tensor_names = ["new_numeric_placeholder", "new_categorical_placeholder"]
+ model.output_tensor_names = ["import/head/predictions/probabilities"]
+
+ if self.args.calib_data:
+ quant_config = StaticQuantConfig()
+ calib_dataloader=Dataloader(self.args.calib_data, self.args.batch_size)
+ q_model = quantize_model(model, quant_config, calib_dataloader)
+ return q_model
+ print("Please provide calibration dataset!")
+
+ def eval_inference(self, infer_graph):
+ print("Run inference")
+ if isinstance(infer_graph, tf.compat.v1.GraphDef):
+ graph = tf.Graph()
+ with graph.as_default():
+ tf.import_graph_def(infer_graph, name='')
+ infer_graph = graph
+
+ data_config = tf.compat.v1.ConfigProto()
+ data_config.intra_op_parallelism_threads = self.args.num_intra_threads
+ data_config.inter_op_parallelism_threads = self.args.num_inter_threads
+ data_config.use_per_session_threads = 1
+
+ infer_config = tf.compat.v1.ConfigProto()
+ if self.args.env == 'mkl':
+ print("Set inter and intra for mkl: ")
+ print("intra_op_parallelism_threads = ", self.args.num_intra_threads)
+ print("inter_op_parallelism_threads = ", self.args.num_inter_threads)
+ infer_config.intra_op_parallelism_threads = self.args.num_intra_threads
+ infer_config.inter_op_parallelism_threads = self.args.num_inter_threads
+ infer_config.use_per_session_threads = 1
+
+ total_test_samples = sum(1 for _ in tf.compat.v1.python_io.tf_record_iterator(self.args.eval_data))
+ total_batches = math.ceil(float(total_test_samples)/self.args.batch_size)
+ placeholder_list = ['new_numeric_placeholder','new_categorical_placeholder']
+ input_tensor = [infer_graph.get_tensor_by_name(name + ":0") for name in placeholder_list]
+ output_name = "import/head/predictions/probabilities"
+ output_tensor = infer_graph.get_tensor_by_name(output_name + ":0" )
+ correctly_predicted = 0
+ evaluate_duration = 0.0
+
+ features_list = []
+ data_graph = tf.Graph()
+ with data_graph.as_default():
+ res_dataset = input_fn(self.args.eval_data, 1, False, self.args.batch_size)
+ iterator = tf.compat.v1.data.make_one_shot_iterator(res_dataset)
+ next_element = iterator.get_next()
+ with tf.compat.v1.Session(config=data_config, graph=data_graph) as data_sess:
+ for i in range(int(total_batches)):
+ batch = data_sess.run(next_element)
+ features=batch[0:3]
+ features_list.append(features)
+
+ if self.args.performance:
+ iteration = 0
+ warm_up_iteration = self.args.warmup_steps
+ total_run = self.args.steps
+
+ if total_run > total_batches:
+ total_run = total_batches
+
+ with tf.compat.v1.Session(config=infer_config, graph=infer_graph) as infer_sess:
+ i = 0
+ for i in range(int(total_run)):
+ start_time = time.time()
+ logistic = infer_sess.run(output_tensor, dict(zip(input_tensor, features_list[iteration][0:2])))
+ time_consume = time.time() - start_time
+
+ if iteration > warm_up_iteration:
+ evaluate_duration += time_consume
+
+ iteration += 1
+ if iteration > total_batches:
+ iteration = 0
+ test_batches = total_run - warm_up_iteration
+ else:
+ with tf.compat.v1.Session(config=infer_config, graph=infer_graph) as infer_sess:
+ i = 0
+ for i in range(int(total_batches)):
+ start_time = time.time()
+ logistic = infer_sess.run(output_tensor, dict(zip(input_tensor, features_list[i][0:2])))
+ time_consume = time.time() - start_time
+ evaluate_duration += time_consume
+
+ predicted_labels = np.argmax(logistic,1)
+ correctly_predicted=correctly_predicted+np.sum(features_list[i][2] == predicted_labels)
+
+ i=i+1
+
+ accuracy = float(correctly_predicted) / float(total_test_samples)
+ test_batches = total_batches
+
+ no_of_test_samples = test_batches * self.args.batch_size
+ latency = 1000 * float(evaluate_duration) / float(test_batches)
+ throughput = no_of_test_samples / evaluate_duration
+
+ print('--------------------------------------------------')
+ print('Total test records: %d' % no_of_test_samples)
+ print('Number of batches: %d' % test_batches)
+ print('Batch size = %d' % self.args.batch_size)
+ print('Latency: %.3f ms' % latency)
+ print('Throughput: %.3f records/sec' % throughput)
+ print('--------------------------------------------------')
+
+ return accuracy
+
+ def run(self):
+ """ This is neural_compressor function include tuning and benchmark option """
+
+ if self.args.tune:
+ q_model = evaluate_opt_graph.auto_tune()
+ q_model.save(self.args.output_graph)
+ else:
+ if self.args.accuracy:
+ infer_graph = load_graph(self.args.input_graph)
+ acc = evaluation_func(infer_graph)
+ print("Accuracy: %.5f" % acc)
+ if self.args.performance:
+ infer_graph = load_graph(self.args.input_graph)
+ evaluation_func(infer_graph)
+
+
+class Dataloader(object):
+ def __init__(self, data_location, batch_size):
+ """dataloader generator
+
+ Args:
+ data_location (str): tf recorder local path
+ batch_size (int): dataloader batch size
+ """
+ self.batch_size = batch_size
+ self.data_file = data_location
+ self.total_samples = sum(1 for _ in tf.compat.v1.python_io.tf_record_iterator(data_location))
+ self.n = math.ceil(float(self.total_samples) / batch_size)
+ print("batch size is " + str(self.batch_size) + "," + str(self.n) + " iteration")
+
+ def __iter__(self):
+ data_graph = tf.Graph()
+ with data_graph.as_default():
+ self.dataset = input_fn(self.data_file, 1, False, self.batch_size)
+ self.dataset_iterator = tf.compat.v1.data.make_one_shot_iterator(self.dataset)
+ next_element = self.dataset_iterator.get_next()
+
+ with tf.compat.v1.Session(graph=data_graph) as sess:
+ for i in range(self.n):
+ batch = sess.run(next_element)
+ yield (batch[0:2], batch[2])
+
+ def __len__(self):
+ return self.n
+
+
+if __name__ == "__main__":
+ evaluate_opt_graph = eval_classifier_optimized_graph()
+ evaluate_opt_graph.run()
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py
new file mode 100644
index 00000000000..e1a82cd674c
--- /dev/null
+++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py
@@ -0,0 +1,155 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import pandas
+import argparse
+import numpy as np
+import tensorflow as tf
+if tf.version.VERSION < '2.0':
+ tf.enable_eager_execution()
+parser = argparse.ArgumentParser()
+parser.add_argument('--inputcsv-datafile', type=str,
+ help='full path of data file e.g. eval.csv',
+ dest='evaldatafile_path',
+ required=True)
+parser.add_argument('--calibrationcsv-datafile', type=str,
+ help='full path of data file of calibration/train dataset to get normalization ranges',
+ dest='traindatafile_path',
+ default='NULL',
+ required=False)
+
+parser.add_argument('--outputfile-name', type=str,
+ help='output tfrecord file name e.g. processed_eval.[tfrecords]',
+ dest='outputfile_path',
+ default="processed_data.tfrecords",
+ required=False)
+
+args = parser.parse_args()
+
+eval_csv_file = args.evaldatafile_path
+train_csv_file = args.traindatafile_path
+output_file = args.outputfile_path
+
+if not os.path.isfile(eval_csv_file):
+ print("Please input a valid csv file")
+ sys.exit(1)
+
+filename, file_ext = os.path.splitext(output_file)
+in_filename, _ = os.path.splitext(os.path.basename(eval_csv_file))
+
+if file_ext != ".tfrecords":
+ output_file = output_file + ".tfrecords"
+
+output_file = "{}_{}".format(in_filename,output_file)
+csv = pandas.read_csv(eval_csv_file, header=None)
+if len(csv.columns)==39:
+ dataset_type = 'test'
+else:
+ dataset_type = 'eval'
+fill_na_dict = {}
+if dataset_type=='test':
+ for i in range(0,13):
+ fill_na_dict[i]=0.0
+ for i in range(13,39):
+ fill_na_dict[i]=""
+else:
+ for i in range(1,14):
+ fill_na_dict[i]=0.0
+ for i in range(14,40):
+ fill_na_dict[i]=""
+csv=csv.fillna(value=fill_na_dict).values
+numeric_feature_names = ["numeric_1"]
+string_feature_names = ["string_1"]
+LABEL_COLUMN =["clicked"]
+CATEGORICAL_COLUMNS1 = ["C"+str(i)+"_embedding" for i in range(1, 27)]
+NUMERIC_COLUMNS1 = ["I"+str(i) for i in range(1, 14)]
+if dataset_type=='eval':
+ DATA_COLUMNS = LABEL_COLUMN + NUMERIC_COLUMNS1 + CATEGORICAL_COLUMNS1
+else:
+ DATA_COLUMNS = NUMERIC_COLUMNS1 + CATEGORICAL_COLUMNS1
+CATEGORICAL_COLUMNS2 = ["C"+str(i)+"_embedding" for i in range(1, 27)]
+NUMERIC_COLUMNS2 = ["I"+str(i) for i in range(1, 14)]
+
+CATEGORICAL_COLUMNS1.sort()
+NUMERIC_COLUMNS1.sort()
+no_of_rows = 0
+with open(eval_csv_file, 'r') as f:
+ if not os.path.isfile(train_csv_file):
+ nums=[line.strip('\n\r').split(',') for line in f.readlines()]
+ else:
+ f1 = open(train_csv_file, 'r')
+ nums=[line.strip('\n\r').split(',') for line in f.readlines(
+ )]+[line.strip('\n\t').split(',') for line in f1.readlines()]
+ numpy_arr = np.array(nums)
+ numpy_arr[numpy_arr=='']='0'
+ min_list,max_list,range_list = [],[],[]
+ for i in range(len(DATA_COLUMNS)):
+ if DATA_COLUMNS[i] in NUMERIC_COLUMNS1:
+ col_min = numpy_arr[:,i].astype(np.float32).min()
+ col_max = numpy_arr[:,i].astype(np.float32).max()
+ min_list.append(col_min)
+ max_list.append(col_max)
+ range_list.append(col_max-col_min)
+ if os.path.isfile(train_csv_file):
+ f1.close()
+ print('min list',min_list)
+ print('max list',max_list)
+ print('range list',range_list)
+
+
+with tf.compat.v1.python_io.TFRecordWriter(output_file) as writer:
+ print('*****Processing data******')
+ for row in csv:
+ no_of_rows = no_of_rows+1
+ if dataset_type == 'eval':
+ unnormalized_vals = np.array(row[1:14])
+ else:
+ unnormalized_vals = np.array(row[0:13])
+ normalized_vals = (unnormalized_vals-min_list)/range_list
+ if dataset_type == 'eval':
+ new_categorical_dict = dict(zip(CATEGORICAL_COLUMNS2, row[14:40]))
+ else:
+ new_categorical_dict = dict(zip(CATEGORICAL_COLUMNS2, row[13:39]))
+ new_categorical_list = []
+ for i in CATEGORICAL_COLUMNS1:
+ if pandas.isnull(new_categorical_dict[i]):
+ new_categorical_list.append("")
+ else:
+ new_categorical_list.append(new_categorical_dict[i])
+ hash_values = tf.compat.v1.string_to_hash_bucket_fast(
+ new_categorical_list, 1000).numpy()
+ new_numerical_dict = dict(zip(NUMERIC_COLUMNS2, normalized_vals))
+ example = tf.train.Example()
+ for i in NUMERIC_COLUMNS1:
+ example.features.feature[numeric_feature_names[0]].float_list.value.extend([new_numerical_dict[i]])
+ for i in range(0, 26):
+ example.features.feature[string_feature_names[0]].int64_list.value.extend([i])
+ example.features.feature[string_feature_names[0]].int64_list.value.extend([hash_values[i]])
+ if dataset_type == 'eval':
+ example.features.feature["label"].int64_list.value.append(row[0])
+ writer.write(example.SerializeToString())
+
+print('Total number of rows ', no_of_rows)
+print('Generated output file name :'+output_file)
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..e2f0ef81736
--- /dev/null
+++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt
@@ -0,0 +1,9 @@
+intel-tensorflow>=2.12.0
+Cython
+contextlib2
+pillow>=8.2.0
+lxml>=4.6.2
+matplotlib
+numpy>=1.17.4
+pycocotools
+protobuf
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..72ab01f2a19
--- /dev/null
+++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ define_mode
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=1000
+ for var in "$@"
+ do
+ case $var in
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+function define_mode {
+ if [[ ${mode} == "accuracy" ]]; then
+ mode_cmd=" --accuracy"
+ elif [[ ${mode} == "performance" ]]; then
+ mode_cmd=" --performance"
+ else
+ echo "Error: No such mode: ${mode}"
+ exit 1
+ fi
+}
+
+# run_tuning
+function run_benchmark {
+ #numactl -N 0 -m 0 \
+ python main.py \
+ --input_graph ${input_model} \
+ --evaluation_data_location ${dataset_location}/eval_processed_data.tfrecords \
+ --batch_size ${batch_size} \
+ --num_inter_threads 4 \
+ ${mode_cmd}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..a8068917a27
--- /dev/null
+++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input_graph ${input_model} \
+ --evaluation_data_location ${dataset_location}/eval_processed_data.tfrecords \
+ --calibration_data_location ${dataset_location}/train_processed_data.tfrecords \
+ --accuracy \
+ --batch_size 1000 \
+ --output_graph ${output_model} \
+ --tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md
new file mode 100644
index 00000000000..4307ec85480
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md
@@ -0,0 +1,76 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of 3dunet-mlperf.
+This example can run on Intel CPUs and GPUs.
+
+# Prerequisite
+
+## 1. Environment
+
+### Installation
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+
+### Install requirements
+```shell
+pip install -r requirements.txt
+```
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### Install Intel Extension for Tensorflow
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation.
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers).
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+## 2. Prepare Pre-trained model
+ Download the pre-trained model from the
+ [3DUnetCNN](https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/3dunet_dynamic_ndhwc.pb).
+ In this example, we are using the model,
+ trained using the fold 1 BRATS 2019 data.
+ The validation files have been copied from [here](https://github.com/mlcommons/inference/tree/r0.7/vision/medical_imaging/3d-unet/folds)
+
+## 3. Prepare dataset
+
+### Download BraTS 2019 dataset
+ Please download [Brats 2019](https://www.med.upenn.edu/cbica/brats2019/data.html)
+ separately and unzip the dataset. The directory that contains the dataset files will be
+ passed to the launch script when running the benchmarking script.
+
+### Prepare Calibration set
+ The calibration set is the forty images listed in brats_cal_images_list.txt. They are randomly selected from Fold 0, Fold 2, Fold 3, and Fold 4 of BraTS 2019 Training Dataset.
+
+
+# Run command
+Please set the following environment variables before running quantization or benchmark commands:
+
+* `export nnUNet_preprocessed=/build/preprocessed_data`
+* `export nnUNet_raw_data_base=/build/raw_data`
+* `export RESULTS_FOLDER=/build/result`
+
+## Quantization
+
+`bash run_quant.sh --input_model=3dunet_dynamic_ndhwc.pb --dataset_location=/build --output_model=3dunet_dynamic_ndhwc_int8.pb`
+
+## Benchmark
+
+`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=/build --batch_size=100 --iters=500 --mode=benchmark`
+
+`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=/build --batch_size=1 --mode=accuracy`
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py
new file mode 100644
index 00000000000..0a18c579d8b
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py
@@ -0,0 +1,19 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt
new file mode 100644
index 00000000000..69276e67b6a
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt
@@ -0,0 +1,40 @@
+HGG__BraTS19_2013_18_1
+HGG__BraTS19_2013_20_1
+HGG__BraTS19_CBICA_AAP_1
+HGG__BraTS19_CBICA_ABN_1
+HGG__BraTS19_CBICA_ABO_1
+HGG__BraTS19_CBICA_ALU_1
+HGG__BraTS19_CBICA_ANZ_1
+HGG__BraTS19_CBICA_APY_1
+HGG__BraTS19_CBICA_AQJ_1
+HGG__BraTS19_CBICA_AQZ_1
+HGG__BraTS19_CBICA_ASN_1
+HGG__BraTS19_CBICA_ASY_1
+HGG__BraTS19_CBICA_AUW_1
+HGG__BraTS19_CBICA_AXJ_1
+HGG__BraTS19_CBICA_AXM_1
+HGG__BraTS19_CBICA_AYG_1
+HGG__BraTS19_CBICA_AYU_1
+HGG__BraTS19_CBICA_AZD_1
+HGG__BraTS19_CBICA_BAX_1
+HGG__BraTS19_CBICA_BGR_1
+HGG__BraTS19_CBICA_BHV_1
+HGG__BraTS19_TCIA01_235_1
+HGG__BraTS19_TCIA02_394_1
+HGG__BraTS19_TCIA02_473_1
+HGG__BraTS19_TCIA02_606_1
+HGG__BraTS19_TCIA03_419_1
+HGG__BraTS19_TCIA04_192_1
+HGG__BraTS19_TCIA04_479_1
+HGG__BraTS19_TCIA06_372_1
+HGG__BraTS19_TCIA08_278_1
+LGG__BraTS19_2013_28_1
+LGG__BraTS19_TCIA09_462_1
+LGG__BraTS19_TCIA10_130_1
+LGG__BraTS19_TCIA10_202_1
+LGG__BraTS19_TCIA10_346_1
+LGG__BraTS19_TCIA10_387_1
+LGG__BraTS19_TCIA10_628_1
+LGG__BraTS19_TCIA12_470_1
+LGG__BraTS19_TCIA13_621_1
+LGG__BraTS19_TCIA13_653_1
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py
new file mode 100644
index 00000000000..bc8ce8edc07
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py
@@ -0,0 +1,219 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+from argparse import ArgumentParser
+import os
+import pickle
+import sys
+import math
+import array
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.framework import dtypes
+from tensorflow.core.protobuf import rewriter_config_pb2
+from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference
+from nnunet.evaluation.region_based_evaluation import evaluate_regions, get_brats_regions
+
+from nnUNet.setup import setup
+from nnUNet.postprocess import postprocess_output
+
+INPUTS = 'input'
+OUTPUTS = 'Identity'
+
+if __name__ == "__main__":
+ """Evaluate 3d_unet with optimized TensorFlow graph"""
+ def get_args():
+ arg_parser = ArgumentParser(description='Parse args')
+
+ arg_parser.add_argument('-m', "--mode",
+ help="One of three options: 'benchmark'/'accuracy'/'tune'.")
+ arg_parser.add_argument('-n', "--iters",
+ help='The number of iteration. shall > warmup num(10)',
+ type=int, default=100)
+ arg_parser.add_argument('-e', "--num-inter-threads",
+ help='The number of inter-thread.',
+ dest='num_inter_threads', type=int, default=0)
+ arg_parser.add_argument('-a', "--num-intra-threads",
+ help='The number of intra-thread.',
+ dest='num_intra_threads', type=int, default=0)
+ arg_parser.add_argument('-i', "--input-model",
+ help='Specify the input graph.',
+ dest='input_model')
+ arg_parser.add_argument('-o', "--output-model",
+ help='Specify the output graph.',
+ dest='output_model')
+ arg_parser.add_argument('-c', "--calib-preprocess",
+ help='Specify calibration preprocess dir.',
+ dest='calib_preprocess')
+ arg_parser.add_argument('-d', "--data-location",
+ help='Specify the location of the data.',
+ dest="data_location", default=None)
+ arg_parser.add_argument("--batch-size", dest="batch_size", type=int, default=1)
+ arg_parser.add_argument("--bfloat16", type=int, default=0)
+
+ args = arg_parser.parse_args()
+ print(args)
+ return args
+
+ def eval_func(graph):
+ print("Run inference for accuracy")
+ args = get_args()
+ #setup(args.data_location, args.input_model)
+
+ output_graph = optimize_for_inference(graph.as_graph_def(), [INPUTS], [OUTPUTS],
+ dtypes.float32.as_datatype_enum, False)
+ tf.import_graph_def(output_graph, name="")
+
+ input_tensor = graph.get_tensor_by_name('input:0')
+ output_tensor = graph.get_tensor_by_name('Identity:0')
+
+ config = tf.compat.v1.ConfigProto()
+ config.intra_op_parallelism_threads=args.num_intra_threads
+ config.inter_op_parallelism_threads=args.num_inter_threads
+ if args.bfloat16:
+ config.graph_options.rewrite_options.auto_mixed_precision_mkl = rewriter_config_pb2.RewriterConfig.ON
+
+ sess = tf.compat.v1.Session(graph=graph, config=config)
+ if args.mode:
+ print("Inference with real data")
+ preprocessed_data_dir = os.path.join(args.data_location, "preprocessed_data")
+ with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"), "rb") as f:
+ preprocessed_files = pickle.load(f)
+
+ dictionaries = []
+ for preprocessed_file in preprocessed_files:
+ with open(os.path.join(preprocessed_data_dir, preprocessed_file + ".pkl"), "rb") as f:
+ dct = pickle.load(f)[1]
+ dictionaries.append(dct)
+
+ count = len(preprocessed_files)
+ predictions = [None] * count
+ validation_indices = list(range(0,count))
+ print("Found {:d} preprocessed files".format(count))
+ loaded_files = {}
+ batch_size = args.batch_size
+
+ # Get the number of steps based on batch size
+ steps = count#math.ceil(count/batch_size)
+ warmup = 10
+ assert args.iters >= warmup, 'iteration must be larger than warmup'
+ time_list=[]
+ for i in range(steps):
+ print("Iteration {} ...".format(i))
+ test_data_index = validation_indices[i]#validation_indices[i * batch_size:(i + 1) * batch_size]
+ file_name = preprocessed_files[test_data_index]
+ with open(os.path.join(preprocessed_data_dir, "{:}.pkl".format(file_name)), "rb") as f:
+ data = pickle.load(f)[0]
+ if args.mode == 'performance' and i < args.iters:
+ time_start = time.time()
+ predictions[i] = sess.run(output_tensor, feed_dict={input_tensor: data[np.newaxis, ...]})[0].astype(np.float32)
+ duration = time.time() - time_start
+ time_list.append(duration)
+ else:
+ predictions[i] = sess.run(output_tensor, feed_dict={input_tensor: data[np.newaxis, ...]})[0].astype(np.float32)
+ if args.mode == 'performance':
+ latency = np.array(time_list[warmup: ]).mean() / args.batch_size
+ print('Batch size = {}'.format(args.batch_size))
+ print('Latency: {:.3f} ms'.format(latency * 1000))
+ print('Throughput: {:.3f} items/sec'.format(1./ latency))
+ else:
+ output_folder = os.path.join(args.data_location, "postprocessed_data")
+ output_files = preprocessed_files
+ # Post Process
+ postprocess_output(predictions, dictionaries, validation_indices, output_folder, output_files)
+
+ ground_truths = os.path.join(args.data_location, \
+ "raw_data/nnUNet_raw_data/Task043_BraTS2019/labelsTr")
+ # Run evaluation
+ print("Running evaluation...")
+ evaluate_regions(output_folder, ground_truths, get_brats_regions())
+ # Load evaluation summary
+ print("Loading evaluation summary...")
+ accuracy=0.0
+ with open(os.path.join(output_folder, "summary.csv")) as f:
+ for line in f:
+ words = line.split(",")
+ if words[0] == "mean":
+ whole = float(words[1])
+ core = float(words[2])
+ enhancing = float(words[3])
+ mean = (whole + core + enhancing) / 3
+ accuracy=mean
+ print("Batch size =", args.batch_size)
+ print("Accuracy is {:.5f}".format(mean))
+ break
+ print("Done!")
+ return accuracy
+
+ def load_graph(file_name):
+ tf.compat.v1.logging.info('Loading graph from: ' + file_name)
+ with tf.io.gfile.GFile(file_name, "rb") as f:
+ graph_def = tf.compat.v1.GraphDef()
+ graph_def.ParseFromString(f.read())
+ with tf.Graph().as_default() as graph:
+ tf.import_graph_def(graph_def, name='')
+ return graph
+
+ class CalibrationDL():
+ def __init__(self):
+ path = os.path.abspath(os.path.expanduser(
+ './brats_cal_images_list.txt'))
+ with open(path, 'r') as f:
+ self.preprocess_files = [line.rstrip() for line in f]
+
+ self.loaded_files = {}
+ self.batch_size = 1
+
+ def __getitem__(self, sample_id):
+ file_name = self.preprocess_files[sample_id]
+ print("Loading file {:}".format(file_name))
+ with open(os.path.join(args.calib_preprocess, "{:}.pkl".format(file_name)), "rb") as f:
+ self.loaded_files[sample_id] = pickle.load(f)[0]
+ # note that calibration phase does not care label, here we return 0 for label free case.
+ return self.loaded_files[sample_id], 0
+
+ def __len__(self):
+ self.count = len(self.preprocess_files)
+ return self.count
+
+
+ args = get_args()
+ print(args)
+ graph = load_graph(args.input_model)
+ if args.mode == 'tune':
+ from neural_compressor.common import set_random_seed
+ from neural_compressor.tensorflow.utils import BaseDataLoader
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+
+ set_random_seed(9527)
+ quant_config = StaticQuantConfig()
+ calib_dataloader=BaseDataLoader(dataset=CalibrationDL())
+ q_model = quantize_model(graph, quant_config, calib_dataloader)
+ try:
+ q_model.save(args.output_model)
+ except Exception as e:
+ print("Failed to save model due to {}".format(str(e)))
+ else:
+ eval_func(graph)
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py
new file mode 100644
index 00000000000..d26521276d6
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file is copied from nnUnet/nnunet/dataset_conversion/Task043_BraTS_2019.py, except that
+# the validation/test set part is removed and downloaded_data_dir is now configurable.
+
+import argparse
+import numpy as np
+from collections import OrderedDict
+import os
+import sys
+
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+import SimpleITK as sitk
+import shutil
+
+def copy_BraTS_segmentation_and_convert_labels(in_file, out_file):
+ # use this for segmentation only!!!
+ # nnUNet wants the labels to be continuous. BraTS is 0, 1, 2, 4 -> we make that into 0, 1, 2, 3
+ img = sitk.ReadImage(in_file)
+ img_npy = sitk.GetArrayFromImage(img)
+
+ uniques = np.unique(img_npy)
+ for u in uniques:
+ if u not in [0, 1, 2, 4]:
+ raise RuntimeError('unexpected label')
+
+ seg_new = np.zeros_like(img_npy)
+ seg_new[img_npy == 4] = 3
+ seg_new[img_npy == 2] = 1
+ seg_new[img_npy == 1] = 2
+ img_corr = sitk.GetImageFromArray(seg_new)
+ img_corr.CopyInformation(img)
+ sitk.WriteImage(img_corr, out_file)
+
+def task_setup(downloaded_data_dir):
+ """
+ REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
+ """
+
+ task_name = "Task043_BraTS2019"
+ print(task_name)
+ print(downloaded_data_dir)
+ print(nnUNet_raw_data)
+
+ target_base = join(nnUNet_raw_data, task_name)
+ if not os.path.isdir(target_base):
+ target_imagesTr = join(target_base, "imagesTr")
+ target_imagesVal = join(target_base, "imagesVal")
+ target_imagesTs = join(target_base, "imagesTs")
+ target_labelsTr = join(target_base, "labelsTr")
+
+ maybe_mkdir_p(target_imagesTr)
+ maybe_mkdir_p(target_imagesVal)
+ maybe_mkdir_p(target_imagesTs)
+ maybe_mkdir_p(target_labelsTr)
+
+ patient_names = []
+ for tpe in ["HGG", "LGG"]:
+ cur = join(downloaded_data_dir, tpe)
+ for p in subdirs(cur, join=False):
+ patdir = join(cur, p)
+ patient_name = tpe + "__" + p
+ patient_names.append(patient_name)
+ t1 = join(patdir, p + "_t1.nii.gz")
+ t1c = join(patdir, p + "_t1ce.nii.gz")
+ t2 = join(patdir, p + "_t2.nii.gz")
+ flair = join(patdir, p + "_flair.nii.gz")
+ seg = join(patdir, p + "_seg.nii.gz")
+
+ assert all([
+ isfile(t1),
+ isfile(t1c),
+ isfile(t2),
+ isfile(flair),
+ isfile(seg)
+ ]), "%s" % patient_name
+
+ shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
+ shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
+ shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
+ shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))
+
+ copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))
+
+ json_dict = OrderedDict()
+ json_dict['name'] = "BraTS2019"
+ json_dict['description'] = "nothing"
+ json_dict['tensorImageSize'] = "4D"
+ json_dict['reference'] = "see BraTS2019"
+ json_dict['licence'] = "see BraTS2019 license"
+ json_dict['release'] = "0.0"
+ json_dict['modality'] = {
+ "0": "T1",
+ "1": "T1ce",
+ "2": "T2",
+ "3": "FLAIR"
+ }
+ json_dict['labels'] = {
+ "0": "background",
+ "1": "edema",
+ "2": "non-enhancing",
+ "3": "enhancing",
+ }
+ json_dict['numTraining'] = len(patient_names)
+ json_dict['numTest'] = 0
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+ patient_names]
+ json_dict['test'] = []
+
+ save_json(json_dict, join(target_base, "dataset.json"))
+ print("DONE")
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py
new file mode 100644
index 00000000000..0a18c579d8b
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py
@@ -0,0 +1,19 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt
new file mode 100644
index 00000000000..57eeeb651c5
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt
@@ -0,0 +1,67 @@
+HGG__BraTS19_2013_22_1
+HGG__BraTS19_2013_23_1
+HGG__BraTS19_2013_3_1
+HGG__BraTS19_2013_5_1
+HGG__BraTS19_2013_7_1
+HGG__BraTS19_CBICA_AAB_1
+HGG__BraTS19_CBICA_AAL_1
+HGG__BraTS19_CBICA_ABN_1
+HGG__BraTS19_CBICA_ALU_1
+HGG__BraTS19_CBICA_AME_1
+HGG__BraTS19_CBICA_ANG_1
+HGG__BraTS19_CBICA_AOC_1
+HGG__BraTS19_CBICA_AOD_1
+HGG__BraTS19_CBICA_APZ_1
+HGG__BraTS19_CBICA_AQD_1
+HGG__BraTS19_CBICA_AQJ_1
+HGG__BraTS19_CBICA_AQN_1
+HGG__BraTS19_CBICA_ASA_1
+HGG__BraTS19_CBICA_ASK_1
+HGG__BraTS19_CBICA_ASO_1
+HGG__BraTS19_CBICA_AWH_1
+HGG__BraTS19_CBICA_AWV_1
+HGG__BraTS19_CBICA_AYA_1
+HGG__BraTS19_CBICA_AYC_1
+HGG__BraTS19_CBICA_AYI_1
+HGG__BraTS19_CBICA_BFB_1
+HGG__BraTS19_CBICA_BGN_1
+HGG__BraTS19_CBICA_BGR_1
+HGG__BraTS19_CBICA_BJY_1
+HGG__BraTS19_TCIA01_231_1
+HGG__BraTS19_TCIA01_378_1
+HGG__BraTS19_TCIA01_390_1
+HGG__BraTS19_TCIA01_412_1
+HGG__BraTS19_TCIA02_135_1
+HGG__BraTS19_TCIA02_179_1
+HGG__BraTS19_TCIA02_208_1
+HGG__BraTS19_TCIA02_274_1
+HGG__BraTS19_TCIA02_314_1
+HGG__BraTS19_TCIA02_430_1
+HGG__BraTS19_TCIA02_608_1
+HGG__BraTS19_TCIA03_121_1
+HGG__BraTS19_TCIA03_138_1
+HGG__BraTS19_TCIA03_375_1
+HGG__BraTS19_TCIA03_498_1
+HGG__BraTS19_TCIA06_184_1
+HGG__BraTS19_TCIA06_372_1
+HGG__BraTS19_TCIA08_113_1
+HGG__BraTS19_TCIA08_162_1
+HGG__BraTS19_TCIA08_218_1
+HGG__BraTS19_TCIA08_469_1
+LGG__BraTS19_2013_6_1
+LGG__BraTS19_TCIA09_141_1
+LGG__BraTS19_TCIA09_255_1
+LGG__BraTS19_TCIA09_402_1
+LGG__BraTS19_TCIA09_451_1
+LGG__BraTS19_TCIA09_462_1
+LGG__BraTS19_TCIA09_620_1
+LGG__BraTS19_TCIA10_266_1
+LGG__BraTS19_TCIA10_413_1
+LGG__BraTS19_TCIA10_628_1
+LGG__BraTS19_TCIA10_629_1
+LGG__BraTS19_TCIA10_640_1
+LGG__BraTS19_TCIA12_298_1
+LGG__BraTS19_TCIA12_470_1
+LGG__BraTS19_TCIA13_621_1
+LGG__BraTS19_TCIA13_624_1
+LGG__BraTS19_TCIA13_654_1
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt
new file mode 100644
index 00000000000..d24f39b67c4
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt
@@ -0,0 +1,67 @@
+HGG__BraTS19_2013_13_1
+HGG__BraTS19_2013_19_1
+HGG__BraTS19_2013_27_1
+HGG__BraTS19_CBICA_AAG_1
+HGG__BraTS19_CBICA_ALN_1
+HGG__BraTS19_CBICA_ANV_1
+HGG__BraTS19_CBICA_AOH_1
+HGG__BraTS19_CBICA_APK_1
+HGG__BraTS19_CBICA_APR_1
+HGG__BraTS19_CBICA_AQG_1
+HGG__BraTS19_CBICA_AQP_1
+HGG__BraTS19_CBICA_ARZ_1
+HGG__BraTS19_CBICA_ASF_1
+HGG__BraTS19_CBICA_ASG_1
+HGG__BraTS19_CBICA_ATP_1
+HGG__BraTS19_CBICA_ATX_1
+HGG__BraTS19_CBICA_AUA_1
+HGG__BraTS19_CBICA_AVJ_1
+HGG__BraTS19_CBICA_AVV_1
+HGG__BraTS19_CBICA_AWG_1
+HGG__BraTS19_CBICA_AXL_1
+HGG__BraTS19_CBICA_AXQ_1
+HGG__BraTS19_CBICA_BAN_1
+HGG__BraTS19_CBICA_BBG_1
+HGG__BraTS19_CBICA_BGE_1
+HGG__BraTS19_CBICA_BHQ_1
+HGG__BraTS19_CBICA_BIC_1
+HGG__BraTS19_CBICA_BNR_1
+HGG__BraTS19_TCIA01_131_1
+HGG__BraTS19_TCIA01_147_1
+HGG__BraTS19_TCIA01_180_1
+HGG__BraTS19_TCIA01_190_1
+HGG__BraTS19_TCIA01_221_1
+HGG__BraTS19_TCIA01_335_1
+HGG__BraTS19_TCIA01_411_1
+HGG__BraTS19_TCIA02_151_1
+HGG__BraTS19_TCIA02_321_1
+HGG__BraTS19_TCIA02_331_1
+HGG__BraTS19_TCIA02_368_1
+HGG__BraTS19_TCIA02_471_1
+HGG__BraTS19_TCIA03_257_1
+HGG__BraTS19_TCIA03_474_1
+HGG__BraTS19_TCIA04_111_1
+HGG__BraTS19_TCIA04_328_1
+HGG__BraTS19_TCIA04_343_1
+HGG__BraTS19_TCIA05_277_1
+HGG__BraTS19_TCIA05_478_1
+HGG__BraTS19_TCIA06_165_1
+HGG__BraTS19_TCIA08_105_1
+HGG__BraTS19_TCIA08_280_1
+HGG__BraTS19_TMC_15477_1
+HGG__BraTS19_TMC_21360_1
+HGG__BraTS19_TMC_30014_1
+LGG__BraTS19_TCIA09_428_1
+LGG__BraTS19_TCIA10_175_1
+LGG__BraTS19_TCIA10_276_1
+LGG__BraTS19_TCIA10_393_1
+LGG__BraTS19_TCIA10_408_1
+LGG__BraTS19_TCIA10_410_1
+LGG__BraTS19_TCIA10_449_1
+LGG__BraTS19_TCIA10_490_1
+LGG__BraTS19_TCIA10_625_1
+LGG__BraTS19_TCIA10_637_1
+LGG__BraTS19_TCIA12_249_1
+LGG__BraTS19_TCIA12_466_1
+LGG__BraTS19_TCIA13_615_1
+LGG__BraTS19_TCIA13_630_1
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt
new file mode 100644
index 00000000000..c468e57417d
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt
@@ -0,0 +1,67 @@
+HGG__BraTS19_2013_11_1
+HGG__BraTS19_2013_21_1
+HGG__BraTS19_2013_2_1
+HGG__BraTS19_2013_4_1
+HGG__BraTS19_CBICA_ABB_1
+HGG__BraTS19_CBICA_ABE_1
+HGG__BraTS19_CBICA_ABM_1
+HGG__BraTS19_CBICA_ANZ_1
+HGG__BraTS19_CBICA_AOP_1
+HGG__BraTS19_CBICA_APY_1
+HGG__BraTS19_CBICA_AQA_1
+HGG__BraTS19_CBICA_AQO_1
+HGG__BraTS19_CBICA_AQU_1
+HGG__BraTS19_CBICA_ARW_1
+HGG__BraTS19_CBICA_ASV_1
+HGG__BraTS19_CBICA_AUN_1
+HGG__BraTS19_CBICA_AUW_1
+HGG__BraTS19_CBICA_AUX_1
+HGG__BraTS19_CBICA_AVB_1
+HGG__BraTS19_CBICA_AVF_1
+HGG__BraTS19_CBICA_AWX_1
+HGG__BraTS19_CBICA_AXO_1
+HGG__BraTS19_CBICA_AYW_1
+HGG__BraTS19_CBICA_BAX_1
+HGG__BraTS19_CBICA_BEM_1
+HGG__BraTS19_CBICA_BHK_1
+HGG__BraTS19_CBICA_BHM_1
+HGG__BraTS19_CBICA_BLJ_1
+HGG__BraTS19_TCIA01_150_1
+HGG__BraTS19_TCIA01_203_1
+HGG__BraTS19_TCIA01_235_1
+HGG__BraTS19_TCIA01_401_1
+HGG__BraTS19_TCIA01_448_1
+HGG__BraTS19_TCIA01_499_1
+HGG__BraTS19_TCIA02_168_1
+HGG__BraTS19_TCIA02_222_1
+HGG__BraTS19_TCIA02_226_1
+HGG__BraTS19_TCIA02_283_1
+HGG__BraTS19_TCIA02_290_1
+HGG__BraTS19_TCIA02_309_1
+HGG__BraTS19_TCIA02_394_1
+HGG__BraTS19_TCIA02_455_1
+HGG__BraTS19_TCIA02_606_1
+HGG__BraTS19_TCIA03_133_1
+HGG__BraTS19_TCIA04_192_1
+HGG__BraTS19_TCIA04_361_1
+HGG__BraTS19_TCIA06_332_1
+HGG__BraTS19_TCIA08_167_1
+HGG__BraTS19_TCIA08_205_1
+HGG__BraTS19_TCIA08_234_1
+HGG__BraTS19_TCIA08_242_1
+HGG__BraTS19_TCIA08_278_1
+HGG__BraTS19_TCIA08_436_1
+HGG__BraTS19_TMC_12866_1
+LGG__BraTS19_2013_15_1
+LGG__BraTS19_2013_1_1
+LGG__BraTS19_TCIA09_312_1
+LGG__BraTS19_TCIA10_109_1
+LGG__BraTS19_TCIA10_130_1
+LGG__BraTS19_TCIA10_152_1
+LGG__BraTS19_TCIA10_241_1
+LGG__BraTS19_TCIA10_282_1
+LGG__BraTS19_TCIA10_325_1
+LGG__BraTS19_TCIA10_639_1
+LGG__BraTS19_TCIA13_618_1
+LGG__BraTS19_TCIA13_633_1
+LGG__BraTS19_TMC_09043_1
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt
new file mode 100644
index 00000000000..171a51a02a8
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt
@@ -0,0 +1,67 @@
+HGG__BraTS19_2013_12_1
+HGG__BraTS19_2013_14_1
+HGG__BraTS19_2013_18_1
+HGG__BraTS19_2013_20_1
+HGG__BraTS19_2013_26_1
+HGG__BraTS19_CBICA_ABO_1
+HGG__BraTS19_CBICA_ALX_1
+HGG__BraTS19_CBICA_ANP_1
+HGG__BraTS19_CBICA_AOS_1
+HGG__BraTS19_CBICA_AOZ_1
+HGG__BraTS19_CBICA_AQT_1
+HGG__BraTS19_CBICA_ARF_1
+HGG__BraTS19_CBICA_ASE_1
+HGG__BraTS19_CBICA_ASW_1
+HGG__BraTS19_CBICA_ATN_1
+HGG__BraTS19_CBICA_ATV_1
+HGG__BraTS19_CBICA_AUQ_1
+HGG__BraTS19_CBICA_AVG_1
+HGG__BraTS19_CBICA_AVT_1
+HGG__BraTS19_CBICA_AWI_1
+HGG__BraTS19_CBICA_AXW_1
+HGG__BraTS19_CBICA_AYG_1
+HGG__BraTS19_CBICA_AYU_1
+HGG__BraTS19_CBICA_BAP_1
+HGG__BraTS19_CBICA_BCL_1
+HGG__BraTS19_CBICA_BDK_1
+HGG__BraTS19_CBICA_BGG_1
+HGG__BraTS19_CBICA_BGT_1
+HGG__BraTS19_CBICA_BGW_1
+HGG__BraTS19_CBICA_BGX_1
+HGG__BraTS19_TCIA01_186_1
+HGG__BraTS19_TCIA01_429_1
+HGG__BraTS19_TCIA01_460_1
+HGG__BraTS19_TCIA02_171_1
+HGG__BraTS19_TCIA02_370_1
+HGG__BraTS19_TCIA02_374_1
+HGG__BraTS19_TCIA02_377_1
+HGG__BraTS19_TCIA02_473_1
+HGG__BraTS19_TCIA02_491_1
+HGG__BraTS19_TCIA02_607_1
+HGG__BraTS19_TCIA03_296_1
+HGG__BraTS19_TCIA03_338_1
+HGG__BraTS19_TCIA03_419_1
+HGG__BraTS19_TCIA04_437_1
+HGG__BraTS19_TCIA04_479_1
+HGG__BraTS19_TCIA06_247_1
+HGG__BraTS19_TCIA06_603_1
+HGG__BraTS19_TMC_11964_1
+LGG__BraTS19_2013_28_1
+LGG__BraTS19_2013_29_1
+LGG__BraTS19_2013_9_1
+LGG__BraTS19_TCIA09_177_1
+LGG__BraTS19_TCIA09_254_1
+LGG__BraTS19_TCIA10_103_1
+LGG__BraTS19_TCIA10_299_1
+LGG__BraTS19_TCIA10_310_1
+LGG__BraTS19_TCIA10_330_1
+LGG__BraTS19_TCIA10_346_1
+LGG__BraTS19_TCIA10_351_1
+LGG__BraTS19_TCIA10_420_1
+LGG__BraTS19_TCIA10_442_1
+LGG__BraTS19_TCIA10_632_1
+LGG__BraTS19_TCIA10_644_1
+LGG__BraTS19_TCIA12_480_1
+LGG__BraTS19_TCIA13_623_1
+LGG__BraTS19_TCIA13_642_1
+LGG__BraTS19_TCIA13_645_1
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt
new file mode 100644
index 00000000000..0fc2a8bc9cc
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt
@@ -0,0 +1,67 @@
+HGG__BraTS19_2013_10_1
+HGG__BraTS19_2013_17_1
+HGG__BraTS19_2013_25_1
+HGG__BraTS19_CBICA_AAP_1
+HGG__BraTS19_CBICA_ABY_1
+HGG__BraTS19_CBICA_AMH_1
+HGG__BraTS19_CBICA_ANI_1
+HGG__BraTS19_CBICA_AOO_1
+HGG__BraTS19_CBICA_AQQ_1
+HGG__BraTS19_CBICA_AQR_1
+HGG__BraTS19_CBICA_AQV_1
+HGG__BraTS19_CBICA_AQY_1
+HGG__BraTS19_CBICA_AQZ_1
+HGG__BraTS19_CBICA_ASH_1
+HGG__BraTS19_CBICA_ASN_1
+HGG__BraTS19_CBICA_ASR_1
+HGG__BraTS19_CBICA_ASU_1
+HGG__BraTS19_CBICA_ASY_1
+HGG__BraTS19_CBICA_ATB_1
+HGG__BraTS19_CBICA_ATD_1
+HGG__BraTS19_CBICA_ATF_1
+HGG__BraTS19_CBICA_AUR_1
+HGG__BraTS19_CBICA_AXJ_1
+HGG__BraTS19_CBICA_AXM_1
+HGG__BraTS19_CBICA_AXN_1
+HGG__BraTS19_CBICA_AZD_1
+HGG__BraTS19_CBICA_AZH_1
+HGG__BraTS19_CBICA_BCF_1
+HGG__BraTS19_CBICA_BFP_1
+HGG__BraTS19_CBICA_BGO_1
+HGG__BraTS19_CBICA_BHB_1
+HGG__BraTS19_CBICA_BHV_1
+HGG__BraTS19_CBICA_BHZ_1
+HGG__BraTS19_CBICA_BKV_1
+HGG__BraTS19_TCIA01_201_1
+HGG__BraTS19_TCIA01_425_1
+HGG__BraTS19_TCIA02_117_1
+HGG__BraTS19_TCIA02_118_1
+HGG__BraTS19_TCIA02_198_1
+HGG__BraTS19_TCIA02_300_1
+HGG__BraTS19_TCIA02_322_1
+HGG__BraTS19_TCIA02_605_1
+HGG__BraTS19_TCIA03_199_1
+HGG__BraTS19_TCIA03_265_1
+HGG__BraTS19_TCIA04_149_1
+HGG__BraTS19_TCIA05_396_1
+HGG__BraTS19_TCIA05_444_1
+HGG__BraTS19_TCIA06_211_1
+HGG__BraTS19_TCIA06_409_1
+HGG__BraTS19_TCIA08_319_1
+HGG__BraTS19_TCIA08_406_1
+HGG__BraTS19_TMC_06290_1
+HGG__BraTS19_TMC_06643_1
+HGG__BraTS19_TMC_27374_1
+LGG__BraTS19_2013_0_1
+LGG__BraTS19_2013_16_1
+LGG__BraTS19_2013_24_1
+LGG__BraTS19_2013_8_1
+LGG__BraTS19_TCIA09_493_1
+LGG__BraTS19_TCIA10_202_1
+LGG__BraTS19_TCIA10_261_1
+LGG__BraTS19_TCIA10_307_1
+LGG__BraTS19_TCIA10_387_1
+LGG__BraTS19_TCIA12_101_1
+LGG__BraTS19_TCIA13_634_1
+LGG__BraTS19_TCIA13_650_1
+LGG__BraTS19_TCIA13_653_1
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py
new file mode 100644
index 00000000000..e5590bdb338
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py
@@ -0,0 +1,72 @@
+# coding=utf-8
+# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
+# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from multiprocessing import Pool
+import os
+from nnunet.inference.segmentation_export import save_segmentation_nifti_from_softmax
+
+def load_predictions(predictions, dictionaries, validation_indices):
+ assert len(predictions) == len(dictionaries),"Number of predictions does not match number of samples in validation set!"
+ padded_shape = [224,224,160]
+ results = [None for i in range(len(predictions))]
+ for i in range(len(predictions)):
+ qsl_idx = validation_indices[i]
+ prediction = predictions[qsl_idx]
+ assert qsl_idx >= 0 and qsl_idx < len(predictions), "Invalid qsl_idx!"
+ raw_shape = list(dictionaries[qsl_idx]["size_after_cropping"])
+ # Remove the padded part
+ pad_before = [(p - r) // 2 for p, r in zip(padded_shape, raw_shape)]
+ pad_after = [-(p - r - b) for p, r, b in zip(padded_shape, raw_shape, pad_before)]
+ result_shape = (4,) + tuple(padded_shape)
+ result = np.reshape(prediction, result_shape).astype(np.float32)
+ results[qsl_idx] = result[:, pad_before[0]:pad_after[0], pad_before[1]:pad_after[1], pad_before[2]:pad_after[2]]
+ assert all([i is not None for i in results]), "Missing some results!"
+ return results
+
+def postprocess_output(predictions, dictionaries, validation_indices, output_folder, output_files):
+ processed_predictions = load_predictions(predictions, dictionaries, validation_indices)
+ print("Running postprocessing with multiple threads...")
+ force_separate_z=None
+ interp_order=3
+ interp_order_z=0
+ num_threads_nifti_save = 12
+ all_in_gpu = "None"
+ print("Saving predictions...")
+ pool = Pool(num_threads_nifti_save)
+ results = []
+ for i, output_filename in enumerate(output_files):
+ print(i, "/", len(output_files))
+ output_filename = os.path.join(output_folder, output_filename + ".nii.gz")
+ softmax_mean = processed_predictions[i]
+ dct = dictionaries[i]
+ bytes_per_voxel = 4
+ if all_in_gpu:
+ bytes_per_voxel = 2 # if all_in_gpu then the return value is half (float16)
+ if np.prod(softmax_mean.shape) > (2e9 / bytes_per_voxel * 0.85): # * 0.85 just to be save
+ print(
+ "This output is too large for python process-process communication. Saving output temporarily to disk")
+ np.save(output_filename[:-7] + ".npy", softmax_mean)
+ softmax_mean = output_filename[:-7] + ".npy"
+
+ results.append(pool.starmap_async(save_segmentation_nifti_from_softmax,
+ ((softmax_mean, output_filename, dct, interp_order, None, None, None,
+ None, None, force_separate_z, interp_order_z),)
+ ))
+
+ _ = [i.get() for i in results]
+ pool.close()
+ pool.join()
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py
new file mode 100644
index 00000000000..048eb0e91cb
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py
@@ -0,0 +1,109 @@
+# coding=utf-8
+# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
+# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file has been copied from
+# https://github.com/mlcommons/inference/blob/r0.7/vision/medical_imaging/3d-unet/preprocess.py
+
+import argparse
+import numpy
+import os
+import pickle
+import sys
+import torch
+
+from batchgenerators.augmentations.utils import pad_nd_image
+from batchgenerators.utilities.file_and_folder_operations import subfiles
+from nnunet.training.model_restore import load_model_and_checkpoint_files
+from nnunet.inference.predict import preprocess_multithreaded
+
+def preprocess_MLPerf(model, checkpoint_name, folds, fp16, list_of_lists, output_filenames, preprocessing_folder, num_threads_preprocessing):
+ assert len(list_of_lists) == len(output_filenames)
+ print("loading parameters for folds", folds)
+ trainer, params = load_model_and_checkpoint_files(model, folds, fp16, checkpoint_name=checkpoint_name)
+
+ print("starting preprocessing generator")
+ preprocessing = preprocess_multithreaded(trainer, list_of_lists, output_filenames, num_threads_preprocessing, None)
+ print("Preprocessing images...")
+ all_output_files = []
+
+ for preprocessed in preprocessing:
+ output_filename, (d, dct) = preprocessed
+
+ all_output_files.append(output_filename)
+ if isinstance(d, str):
+ data = np.load(d)
+ os.remove(d)
+ d = data
+
+ # Pad to the desired full volume
+ d = pad_nd_image(d, trainer.patch_size, "constant", None, False, None)
+
+ with open(os.path.join(preprocessing_folder, output_filename+ ".pkl"), "wb") as f:
+ pickle.dump([d, dct], f)
+ f.close()
+
+ return all_output_files
+
+
+def preprocess_setup(preprocessed_data_dir):
+ print("Preparing for preprocessing data...")
+
+ # Validation set is fold 1
+ fold = 1
+ import sys
+ import os
+ CURRENT_DIR = os.path.split(os.path.abspath(__file__))[0]
+ #validation_fold_file = '/workspace/intelai_models/inference/nnUNet/folds/fold1_validation.txt'
+ validation_fold_file = os.path.join(CURRENT_DIR, 'folds/fold1_validation.txt')
+ # Make sure the model exists
+ model_dir = 'build/result/nnUNet/3d_fullres/Task043_BraTS2019/nnUNetTrainerV2__nnUNetPlansv2.mlperf.1'
+ model_path = os.path.join(model_dir, "plans.pkl")
+ assert os.path.isfile(model_path), "Cannot find the model file {:}!".format(model_path)
+ checkpoint_name = "model_final_checkpoint"
+
+ # Other settings
+ fp16 = False
+ num_threads_preprocessing = 12
+ raw_data_dir = 'build/raw_data/nnUNet_raw_data/Task043_BraTS2019/imagesTr'
+
+ # Open list containing validation images from specific fold (e.g. 1)
+ validation_files = []
+ with open(validation_fold_file) as f:
+ for line in f:
+ validation_files.append(line.rstrip())
+
+ # Create output and preprocessed directory
+ if not os.path.isdir(preprocessed_data_dir):
+ os.makedirs(preprocessed_data_dir)
+
+ # Create list of images locations (i.e. 4 images per case => 4 modalities)
+ all_files = subfiles(raw_data_dir, suffix=".nii.gz", join=False, sort=True)
+ list_of_lists = [[os.path.join(raw_data_dir, i) for i in all_files if i[:len(j)].startswith(j) and
+ len(i) == (len(j) + 12)] for j in validation_files]
+
+ # Preprocess images, returns filenames list
+ # This runs in multiprocess
+ print("Actually preprocessing data...")
+
+ preprocessed_files = preprocess_MLPerf(model_dir, checkpoint_name, fold, fp16, list_of_lists,
+ validation_files, preprocessed_data_dir, num_threads_preprocessing)
+
+ print("Saving metadata of the preprocessed data...")
+ with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"), "wb") as f:
+ pickle.dump(preprocessed_files, f)
+
+ print("Preprocessed data saved to {:}".format(preprocessed_data_dir))
+ print("Done!")
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py
new file mode 100644
index 00000000000..bf4d5981497
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py
@@ -0,0 +1,81 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: EPL-2.0
+#
+
+import os, shutil
+import argparse
+import sys
+import zipfile
+#import sys
+#print(sys.path)
+#sys.path.append('/home/sys_dltest/lpot/lz/frameworks.ai.models.intel-models/models/image_segmentation/tensorflow/3d_unet_mlperf')
+from nnUNet.Task043_BraTS_2019 import task_setup
+from nnUNet.preprocess import preprocess_setup
+
+BUILD_DIR = 'build'
+RAW_DATA_DIR = BUILD_DIR + '/raw_data'
+PREPROCESSED_DATA_DIR = BUILD_DIR + '/preprocessed_data'
+POSTPROCESSED_DATA_DIR = BUILD_DIR + '/postprocessed_data'
+MODEL_DIR = BUILD_DIR + '/model'
+RESULT_DIR = BUILD_DIR + '/result'
+TF_MODEL = '224_224_160.pb'
+OTHER_FILES = 'fold_1.zip'
+
+def create_directories():
+ print("Creating directories")
+ if not os.path.isdir(BUILD_DIR):
+ os.makedirs(BUILD_DIR)
+ if not os.path.isdir(RAW_DATA_DIR):
+ os.makedirs(RAW_DATA_DIR)
+ if not os.path.isdir(PREPROCESSED_DATA_DIR):
+ os.makedirs(PREPROCESSED_DATA_DIR)
+ if not os.path.isdir(POSTPROCESSED_DATA_DIR):
+ os.makedirs(POSTPROCESSED_DATA_DIR)
+ if not os.path.isdir(RESULT_DIR):
+ os.makedirs(RESULT_DIR)
+ if not os.path.isdir(MODEL_DIR):
+ os.makedirs(MODEL_DIR)
+
+def download_model(input_graph):
+ pwd = os.getcwd()
+ os.chdir(os.path.join(pwd, MODEL_DIR))
+ if input_graph == 'NONE':
+ print("Downloading TF model from Zenodo")
+ if not os.path.isfile(TF_MODEL):
+ os.system('wget -O 224_224_160.pb https://zenodo.org/record/3928991/files/224_224_160.pb?download=1;')
+ os.chdir(os.path.join(pwd, RESULT_DIR))
+ if not os.path.isfile(OTHER_FILES):
+ os.system('wget -O fold_1.zip https://zenodo.org/record/3904106/files/fold_1.zip?download=1;')
+ zip_file = "fold_1.zip"
+ #legacy bitmap issue https://bugzilla.redhat.com/show_bug.cgi?id=1802689
+ if (not os.path.isfile(OTHER_FILES)):
+ os.system('curl -O --output fold_1.zip https://zenodo.org/record/3904106/files/fold_1.zip')
+ try:
+ with zipfile.ZipFile(zip_file) as z:
+ z.extractall()
+ print("Extracted all")
+ except:
+ print("Could not extract fold_1.zip")
+ os.chdir(pwd)
+
+def setup(downloaded_data_dir, input_graph='NONE'):
+ create_directories()
+ download_model(input_graph)
+ task_setup(downloaded_data_dir)
+ preprocess_setup(PREPROCESSED_DATA_DIR)
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..d5069f8038f
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt
@@ -0,0 +1,2 @@
+nnunet
+tensorflow
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..36f8d8502f0
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ export BUILD_DIR=${dataset_location}
+ export nnUNet_preprocessed=${BUILD_DIR}/preprocessed_data
+ export nnUNet_raw_data_base=${BUILD_DIR}/raw_data
+ export RESULTS_FOLDER=${BUILD_DIR}/result
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ batch_size=1
+ for var in "$@"
+ do
+ case $var in
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+ if [[ ${bfloat16} == "true" ]]; then
+ extra_cmd="--bfloat16"
+ else
+ extra_cmd=""
+ fi
+
+ python main.py \
+ --input-model=${input_model} \
+ --data-location=${dataset_location} \
+ --calib-preprocess=${BUILD_DIR}/calib_preprocess \
+ --batch-size=${batch_size} \
+ --mode=${mode} \
+ --iters=${iters} \
+ ${extra_cmd}
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..79256545613
--- /dev/null
+++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ export BUILD_DIR=${dataset_location}
+ export nnUNet_preprocessed=${BUILD_DIR}/preprocessed_data
+ export nnUNet_raw_data_base=${BUILD_DIR}/raw_data
+ export RESULTS_FOLDER=${BUILD_DIR}/result
+ run_tuning
+
+}
+
+# init params
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo $var |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ python main.py \
+ --input-model=${input_model} \
+ --output-model=${output_model} \
+ --data-location=${dataset_location} \
+ --calib-preprocess=${BUILD_DIR}/calib_preprocess \
+ --mode=tune
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md
new file mode 100644
index 00000000000..6fa291d0b36
--- /dev/null
+++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md
@@ -0,0 +1,117 @@
+Step-by-Step
+============
+
+This document is used to list steps of reproducing TensorFlow style transfer Intel® Neural Compressor tuning zoo result.
+This example can run on Intel CPUs and GPUs.
+
+# Prerequisite
+
+## Prerequisite
+
+### Installation
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+### Install Intel Tensorflow
+```shell
+pip install intel-tensorflow
+```
+> Note: Supported Tensorflow [Version](../../../../../../README.md#supported-frameworks).
+
+### Install Additional Dependency packages
+```shell
+cd examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq
+pip install -r requirements.txt
+```
+
+### Install Intel Extension for Tensorflow
+#### Quantizing the model on Intel GPU(Mandatory to install ITEX)
+Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[xpu]
+```
+For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers)
+
+#### Quantizing the model on Intel CPU(Optional to install ITEX)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+> **Note**:
+> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX.
+
+### 2. Prepare Pretrained model
+
+#### Automated approach
+Run the `prepare_model.py` script located in `./examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq`.
+
+```
+usage: prepare_model.py [-h] [--model_path MODEL_PATH]
+
+optional arguments:
+ -h, --help show this help message and exit
+ --model_path MODEL_PATH directory to put models, default is ./model
+```
+
+#### Manual approach
+
+```shell
+wget https://storage.googleapis.com/download.magenta.tensorflow.org/models/arbitrary_style_transfer.tar.gz
+tar -xvzf arbitrary_style_transfer.tar.gz
+```
+
+### 3. Prepare Dataset
+There are two folders named style_images and content_images in current folder. Please use these two folders to generated stylized images for test. And you can also prepare your own style_images or content_images.
+
+
+# Run Command
+ ```shell
+ python main.py --output_dir=./result --style_images_paths=./style_images --content_images_paths=./content_images --input_model=./model/model.ckpt
+ ```
+
+
+## Quantization Config
+
+## Quantization
+ ```shell
+ bash run_quant.sh --dataset_location=style_images/,content_images/ --input_model=./model/model.ckpt --output_model=saved_model
+ ```
+## Benchmark
+ ```shell
+ bash run_benchmark.sh --dataset_location=style_images/,content_images/ --input_model=saved_model.pb --batch_size=1
+ ```
+
+Details of enabling Intel® Neural Compressor on style transfer for Tensorflow.
+=========================
+
+This is a tutorial of how to enable style_transfer model with Intel® Neural Compressor.
+## User Code Analysis
+1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file.
+
+2. User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself.
+
+For style_transfer, we applied the latter one because we don't have metric for style transfer model.The first one is to implement the q_dataloader and implement a fake *eval_func*. As neural_compressor have implement a style_transfer dataset, so only eval_func should be prepared after load the graph
+
+### Evaluation Part Adaption
+As style transfer don't have a metric to measure the accuracy, we only implement a fake eval_func
+```python
+def eval_func(model):
+ return 1.
+```
+
+Here we set the input tensor and output tensors name into *inputs* and *outputs* field. In this case we only calibration and quantize the model without tune the accuracy
+
+### Code update
+
+After prepare step is done, we just need add 2 lines to get the quantized model.
+```python
+from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+
+quant_config = StaticQuantConfig()
+q_model = quantize_model(graph, quant_config, calib_dataloader)
+q_model.save(FLAGS.output_model)
+```
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg
new file mode 100644
index 00000000000..5f6c5a6beb5
Binary files /dev/null and b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg differ
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg
new file mode 100644
index 00000000000..248d9fd31f9
Binary files /dev/null and b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg differ
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py
new file mode 100644
index 00000000000..d49c262bbec
--- /dev/null
+++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py
@@ -0,0 +1,362 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import glob
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+from abc import abstractmethod
+from neural_compressor.common import logger
+from neural_compressor.tensorflow.utils.data import default_collate
+
+
+class StyleTransferDataset(object):
+ """Dataset used for style transfer task on tensorflow/inteltensorflow/tensorflow_itex backend.
+
+ This Dataset is to construct a dataset from two specific image holders representing
+ content image folder and style image folder.
+ """
+
+ def __init__(
+ self,
+ content_folder,
+ style_folder,
+ crop_ratio=0.1,
+ resize_shape=(256, 256),
+ image_format="jpg",
+ transform=None,
+ filter=None,
+ ):
+ """Initialize `StyleTransferDataset` class.
+
+ Args:
+ content_folder (str): Root directory of content images.
+ style_folder (str): Root directory of style images.
+ crop_ratio (float, default=0.1): Cropped ratio to each side.
+ resize_shape (tuple, default=(256, 256)): Target size of image.
+ image_format (str, default='jpg'): Target image format.
+ transform (transform object, default=None): Transform to process input data.
+ filter (Filter objects, default=None): Filter out examples according to specific conditions.
+ """
+ self.transform = transform
+ self.content_folder = content_folder
+ self.style_folder = style_folder
+ self.resize_shape = resize_shape
+ self.crop_ratio = crop_ratio
+ self.content_images = glob.glob(os.path.join(content_folder, "*" + image_format))
+ self.style_images = glob.glob(os.path.join(style_folder, "*" + image_format))
+ self.image_list = []
+ for content in self.content_images:
+ for style in self.style_images:
+ self.image_list.append((content, style))
+
+ def __len__(self):
+ """Return the length of dataset."""
+ return len(self.image_list)
+
+ def __getitem__(self, index):
+ """Return the item of dataset according to the given index."""
+ from PIL import Image
+
+ content_image, style_image = self.image_list[index]
+ content_image = Image.open(content_image)
+ style_image = Image.open(style_image)
+ width, height = style_image.size
+ crop_ratio = self.crop_ratio
+ crop_box = (crop_ratio * height, crop_ratio * width, (1 - crop_ratio) * height, (1 - crop_ratio) * width)
+ content_image = np.asarray(content_image.resize(self.resize_shape))
+ style_image = np.asarray(style_image.resize(self.resize_shape))
+ if content_image.max() > 1.0:
+ content_image = content_image / 255.0
+ if style_image.max() > 1.0:
+ style_image = style_image / 255.0
+
+ return (content_image, style_image), 0
+
+
+class ComposeTransform(object):
+ """Composes several transforms together.
+
+ Args:
+ transform_list (list of Transform objects): list of transforms to compose
+
+ Returns:
+ sample (tuple): tuple of processed image and label
+ """
+
+ def __init__(self, transform_list):
+ """Initialize `ComposeTransform` class."""
+ self.transform_list = transform_list
+
+ def __call__(self, sample):
+ """Call transforms in transform_list."""
+ for transform in self.transform_list:
+ sample = transform(sample)
+ return sample
+
+class ParseDecodeVocTransform():
+ """Parse features in Example proto.
+
+ Returns:
+ tuple of parsed image and labels
+ """
+
+ def __call__(self, sample):
+ """Parse decode voc."""
+
+ # Currently only supports jpeg and png.
+ # Need to use this logic because the shape is not known for
+ # tf.image.decode_image and we rely on this info to
+ # extend label if necessary.
+ def _decode_image(content, channels):
+ """Decode the image with content."""
+ return tf.cond(
+ tf.image.is_jpeg(content),
+ lambda: tf.image.decode_jpeg(content, channels),
+ lambda: tf.image.decode_png(content, channels),
+ )
+
+ features = {
+ "image/encoded": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""),
+ "image/filename": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""),
+ "image/format": tf.compat.v1.FixedLenFeature((), tf.string, default_value="jpeg"),
+ "image/height": tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0),
+ "image/width": tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0),
+ "image/segmentation/class/encoded": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""),
+ "image/segmentation/class/format": tf.compat.v1.FixedLenFeature((), tf.string, default_value="png"),
+ }
+
+ parsed_features = tf.compat.v1.parse_single_example(sample, features)
+
+ image = _decode_image(parsed_features["image/encoded"], channels=3)
+
+ label = None
+ label = _decode_image(parsed_features["image/segmentation/class/encoded"], channels=1)
+
+ sample = {
+ "image": image,
+ }
+
+ label.set_shape([None, None, 1])
+
+ sample["labels_class"] = label
+
+ return sample["image"], sample["labels_class"]
+
+
+class BaseMetric(object):
+ """The base class of Metric."""
+
+ def __init__(self, metric, single_output=False, hvd=None):
+ """Initialize the basic metric.
+
+ Args:
+ metric: The metric class.
+ single_output: Whether the output is single or not, defaults to False.
+ hvd: The Horovod class for distributed training, defaults to None.
+ """
+ self._metric_cls = metric
+ self._single_output = single_output
+ self._hvd = hvd
+
+ def __call__(self, *args, **kwargs):
+ """Evaluate the model predictions, and the reference.
+
+ Returns:
+ The class itself.
+ """
+ self._metric = self._metric_cls(*args, **kwargs)
+ return self
+
+ @abstractmethod
+ def update(self, preds, labels=None, sample_weight=None):
+ """Update the state that need to be evaluated.
+
+ Args:
+ preds: The prediction result.
+ labels: The reference. Defaults to None.
+ sample_weight: The sampling weight. Defaults to None.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def reset(self):
+ """Clear the predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def result(self):
+ """Evaluate the difference between predictions and labels.
+
+ Raises:
+ NotImplementedError: The method should be implemented by subclass.
+ """
+ raise NotImplementedError
+
+ @property
+ def metric(self):
+ """Return its metric class.
+
+ Returns:
+ The metric class.
+ """
+ return self._metric_cls
+
+ @property
+ def hvd(self):
+ """Return its hvd class.
+
+ Returns:
+ The hvd class.
+ """
+ return self._hvd
+
+ @hvd.setter
+ def hvd(self, hvd):
+ """Set its hvd.
+
+ Args:
+ hvd: The Horovod class for distributed training.
+ """
+ self._hvd = hvd
+
+
+class TopKMetric(BaseMetric):
+ """Compute Top-k Accuracy classification score for Tensorflow model.
+
+ This metric computes the number of times where the correct label is among
+ the top k labels predicted.
+
+ Attributes:
+ k (int): The number of most likely outcomes considered to find the correct label.
+ num_correct: The number of predictions that were correct classified.
+ num_sample: The total number of predictions.
+ """
+
+ def __init__(self, k=1):
+ """Initialize the k, number of samples and correct predictions.
+
+ Args:
+ k: The number of most likely outcomes considered to find the correct label.
+ """
+ self.k = k
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def update(self, preds, labels, sample_weight=None):
+ """Add the predictions and labels.
+
+ Args:
+ preds: The predictions.
+ labels: The labels corresponding to the predictions.
+ sample_weight: The sample weight.
+ """
+ preds, labels = TopKMetric._topk_shape_validate(preds, labels)
+
+ labels = labels.reshape([len(labels)])
+ with tf.Graph().as_default() as acc_graph:
+ topk = tf.nn.in_top_k(
+ predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k
+ )
+ fp32_topk = tf.cast(topk, tf.float32)
+ correct_tensor = tf.reduce_sum(input_tensor=fp32_topk)
+
+ with tf.compat.v1.Session() as acc_sess:
+ correct = acc_sess.run(correct_tensor)
+
+ self.num_sample += len(labels)
+ self.num_correct += correct
+
+ def reset(self):
+ """Reset the number of samples and correct predictions."""
+ self.num_correct = 0
+ self.num_sample = 0
+
+ def result(self):
+ """Compute the top-k score.
+
+ Returns:
+ The top-k score.
+ """
+ if self.num_sample == 0:
+ logger.warning("Sample num during evaluation is 0.")
+ return 0
+ elif getattr(self, "_hvd", None) is not None: # pragma: no cover
+ allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct))
+ allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample))
+ return allgather_num_correct / allgather_num_sample
+ return self.num_correct / self.num_sample
+
+ @staticmethod
+ def _topk_shape_validate(preds, labels):
+ # preds shape can be Nxclass_num or class_num(N=1 by default)
+ # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax
+ if isinstance(preds, int):
+ preds = [preds]
+ preds = np.array(preds)
+ elif isinstance(preds, np.ndarray):
+ preds = np.array(preds)
+ elif isinstance(preds, list):
+ preds = np.array(preds)
+ preds = preds.reshape((-1, preds.shape[-1]))
+
+ # consider labels just int value 1x1
+ if isinstance(labels, int):
+ labels = [labels]
+ labels = np.array(labels)
+ elif isinstance(labels, tuple):
+ labels = np.array([labels])
+ labels = labels.reshape((labels.shape[-1], -1))
+ elif isinstance(labels, list):
+ if isinstance(labels[0], int):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[0], 1))
+ elif isinstance(labels[0], tuple):
+ labels = np.array(labels)
+ labels = labels.reshape((labels.shape[-1], -1))
+ else:
+ labels = np.array(labels)
+ # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot)
+ # only support 2 dimension one-shot labels
+ # or 1 dimension one-hot class_num will confuse with N
+
+ if len(preds.shape) == 1:
+ N = 1
+ class_num = preds.shape[0]
+ preds = preds.reshape([-1, class_num])
+ elif len(preds.shape) >= 2:
+ N = preds.shape[0]
+ preds = preds.reshape([N, -1])
+ class_num = preds.shape[1]
+
+ label_N = labels.shape[0]
+ assert label_N == N, "labels batch size should same with preds"
+ labels = labels.reshape([N, -1])
+ # one-hot labels will have 2 dimension not equal 1
+ if labels.shape[1] != 1:
+ labels = labels.argsort()[..., -1:]
+ return preds, labels
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py
new file mode 100644
index 00000000000..440b0cee4af
--- /dev/null
+++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py
@@ -0,0 +1,207 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+import os
+import io
+import skimage.io
+import glob
+import numpy as np
+import tensorflow.compat.v1 as tf
+from PIL import Image
+import time
+
+from neural_compressor.tensorflow.utils import BaseDataLoader, DummyDatasetV2
+from data_process import (
+ StyleTransferDataset,
+ ComposeTransform,
+ ParseDecodeVocTransform,
+)
+
+flags = tf.flags
+flags.DEFINE_string('style_images_paths', None, 'Paths to the style images'
+ 'for evaluation.')
+flags.DEFINE_string('content_images_paths', None, 'Paths to the content images'
+ 'for evaluation.')
+flags.DEFINE_string('output_dir', './result', 'Output stylized image directory.')
+
+flags.DEFINE_string('output_model', None, 'Output model directory.')
+
+flags.DEFINE_string('input_model', None, 'Output directory.')
+
+flags.DEFINE_integer('batch_size', 1, 'batch_size')
+
+flags.DEFINE_bool('tune', False, 'if use tune')
+
+FLAGS = flags.FLAGS
+
+def load_img(path, resize_shape=(256, 256), crop_ratio=0.1):
+ img = Image.open(path)
+ width, height = img.size
+ crop_box = (crop_ratio*height, crop_ratio*width, (1-crop_ratio)*height, (1-crop_ratio)*width)
+ img = np.asarray(img.crop(crop_box).resize(resize_shape))
+ if img.max() > 1.0:
+ img = img / 255.
+ img = img.astype(np.float32)[np.newaxis, ...]
+ return img
+
+def save_image(image, output_file, save_format='jpeg'):
+ image = np.uint8(image * 255.0)
+ buf = io.BytesIO()
+ skimage.io.imsave(buf, np.squeeze(image, 0), format=save_format)
+ buf.seek(0)
+ f = tf.gfile.GFile(output_file, 'w')
+ f.write(buf.getvalue())
+ f.close()
+
+def image_style_transfer(sess, content_img_path, style_img_path):
+ stylized_images = sess.graph.get_tensor_by_name('import/import/transformer/expand/conv3/conv/Sigmoid:0')
+ style_img_np = load_img(style_img_path, crop_ratio=0)
+ content_img_np = load_img(content_img_path, crop_ratio=0)
+ stylized_image_res = sess.run(
+ stylized_images,
+ feed_dict={
+ 'import/import/style_input:0': style_img_np,
+ 'import/import/content_input:0': content_img_np})
+ # saves stylized image.
+ save_image(stylized_image_res, os.path.join(FLAGS.output_dir, 'stylized_image.jpg'))
+
+def main(args=None):
+ tf.logging.set_verbosity(tf.logging.INFO)
+ if not tf.gfile.Exists(FLAGS.output_dir):
+ tf.gfile.MkDir(FLAGS.output_dir)
+
+ with tf.Session() as sess:
+ if FLAGS.input_model.rsplit('.', 1)[-1] == 'ckpt':
+ style_img_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3], name='style_input')
+ content_img_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3], name='content_input')
+ # import meta_graph
+ meta_data_path = FLAGS.input_model + '.meta'
+ saver = tf.train.import_meta_graph(meta_data_path, clear_devices=True)
+
+ sess.run(tf.global_variables_initializer())
+ saver.restore(sess, FLAGS.input_model)
+ graph_def = sess.graph.as_graph_def()
+
+ replace_style = 'style_image_processing/ResizeBilinear_2'
+ replace_content = 'batch_processing/batch'
+ for node in graph_def.node:
+ for idx, input_name in enumerate(node.input):
+ # replace style input and content input nodes to placeholder
+ if replace_content == input_name:
+ node.input[idx] = 'content_input'
+ if replace_style == input_name:
+ node.input[idx] = 'style_input'
+
+ if FLAGS.tune:
+ from neural_compressor.tensorflow.quantization.utils.utility import _parse_ckpt_bn_input
+ _parse_ckpt_bn_input(graph_def)
+ output_name = 'transformer/expand/conv3/conv/Sigmoid'
+ frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph_def, [output_name])
+ # use frozen pb instead
+ elif FLAGS.input_model.rsplit('.', 1)[-1] == 'pb':
+ with open(FLAGS.input_model, 'rb') as f:
+ frozen_graph = tf.GraphDef()
+ frozen_graph.ParseFromString(f.read())
+ else:
+ print("not supported model format")
+ exit(-1)
+
+ if FLAGS.tune:
+ with tf.Graph().as_default() as graph:
+ tf.import_graph_def(frozen_graph, name='')
+ from neural_compressor.common import set_random_seed
+ from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+
+ set_random_seed(9527)
+ dataset = StyleTransferDataset(
+ content_folder=FLAGS.content_images_paths.strip(),
+ style_folder=FLAGS.style_images_paths.strip(),
+ transform=ComposeTransform(transform_list= [
+ ParseDecodeVocTransform(),
+ ]
+ )
+ )
+ calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size)
+
+ quant_config = StaticQuantConfig()
+ q_model = quantize_model(graph, quant_config, calib_dataloader)
+ q_model.save(FLAGS.output_model)
+ frozen_graph= q_model.graph_def
+
+ # validate the quantized model here
+ with tf.Graph().as_default(), tf.Session() as sess:
+ if FLAGS.tune:
+ # create dataloader using default style_transfer dataset
+ # generate stylized images
+ dataset = StyleTransferDataset(
+ content_folder=FLAGS.content_images_paths.strip(),
+ style_folder=FLAGS.style_images_paths.strip(),
+ crop_ratio=0.2,
+ resize_shape=(256, 256)
+ )
+ else:
+ dataset = DummyDatasetV2(input_shape=[(256, 256, 3), (256, 256, 3)], label_shape=(1, ))
+
+ dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size)
+ tf.import_graph_def(frozen_graph, name='')
+ style_transfer(sess, dataloader)
+
+def add_import_to_name(sess, name, try_cnt=2):
+ for i in range(0, try_cnt):
+ try:
+ sess.graph.get_tensor_by_name(name)
+ return name
+ except:
+ name = 'import/' + name
+
+ raise ValueError('can not find tensor by name')
+
+# validate and save the files
+def style_transfer(sess, dataloader):
+ time_list = []
+ output_name = add_import_to_name(sess, 'transformer/expand/conv3/conv/Sigmoid:0', 3)
+ style_name = add_import_to_name(sess, 'style_input:0', 3)
+ content_name = add_import_to_name(sess, 'content_input:0', 3)
+
+ stylized_images = sess.graph.get_tensor_by_name(output_name)
+
+ for idx, ((content_img_np, style_img_np), _) in enumerate(dataloader):
+ start_time = time.time()
+ stylized_image_res = sess.run(
+ stylized_images,
+ feed_dict={
+ style_name: style_img_np,
+ content_name: content_img_np})
+ duration = time.time() - start_time
+ time_list.append(duration)
+ if idx + 1 == 20:
+ break
+ warm_up = 1
+ throughput = (len(time_list) - warm_up)/ np.array(time_list[warm_up:]).sum()
+ print('Batch size = {}'.format(FLAGS.batch_size))
+ print('Latency: {:.3f} ms'.format(np.array(time_list[warm_up:]).mean() * 1000))
+ print('Throughput: {:.3f} images/sec'.format(throughput))
+
+
+def run_tuning():
+ tf.disable_v2_behavior()
+ tf.app.run(main)
+
+if __name__ == '__main__':
+ run_tuning()
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py
new file mode 100644
index 00000000000..74182ad5f37
--- /dev/null
+++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py
@@ -0,0 +1,33 @@
+import os
+import argparse
+import enum
+import tarfile
+import abc
+
+def get_pretrained_model(destination):
+ """
+ Obtains a ready to use style_transfer model file.
+ Args:
+ destination: path to where the file should be stored
+ """
+ url = "https://storage.googleapis.com/download.magenta.tensorflow.org/models/ \
+ arbitrary_style_transfer.tar.gz"
+
+ os.system("curl -o arbitrary_style_transfer.tar.gz {0}".format(url))
+ with tarfile.open("arbitrary_style_transfer.tar.gz") as tar:
+ if not os.path.exists(destination):
+ os.makedirs(destination)
+ tar.extractall(destination)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description='Prepare pre-trained model for style transfer model')
+ parser.add_argument('--model_path', type=str, default='./model', help='directory to put models, default is ./model')
+
+ args = parser.parse_args()
+ model_path = args.model_path
+ try:
+ get_pretrained_model(model_path)
+ except AttributeError:
+ print("The model fetched failed.")
+
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..1e5d462dcd4
--- /dev/null
+++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt
@@ -0,0 +1,2 @@
+scikit-image
+Pillow>=8.2.0
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh
new file mode 100644
index 00000000000..41fee820958
--- /dev/null
+++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+set -x
+
+function main {
+
+ init_params "$@"
+ run_benchmark
+
+}
+
+# init params
+function init_params {
+ iters=100
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo $var |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var |cut -f2 -d=)
+ ;;
+ --mode=*)
+ mode=$(echo $var |cut -f2 -d=)
+ ;;
+ --batch_size=*)
+ batch_size=$(echo $var |cut -f2 -d=)
+ ;;
+ --iters=*)
+ iters=$(echo ${var} |cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+
+}
+
+
+# run_tuning
+function run_benchmark {
+ style_images=$(echo ${dataset_location} | awk -F ',' '{print $1}')
+ content_images=$(echo ${dataset_location} | awk -F ',' '{print $2}')
+ echo "$style_images, $content_images"
+
+ python main.py \
+ --input_model "${input_model}" \
+ --style_images_paths "${style_images}" \
+ --content_images_paths "${content_images}" \
+ --batch_size "${batch_size}" \
+ --tune=False \
+ --output_model "${output_model}"
+
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh
new file mode 100644
index 00000000000..4fdfdd2e8a5
--- /dev/null
+++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# set -x
+
+function main {
+
+ init_params "$@"
+
+ run_tuning
+
+}
+
+# init params
+function init_params {
+
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var |cut -f2 -d=)
+ ;;
+ --dataset_location=*)
+ dataset_location=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ --output_model=*)
+ output_model=$(echo "$var" |cut -f2 -d=)
+ ;;
+ esac
+ done
+
+}
+
+# run_tuning
+function run_tuning {
+ style_images=$(echo ${dataset_location} | awk -F ',' '{print $1}')
+ content_images=$(echo ${dataset_location} | awk -F ',' '{print $2}')
+ echo "$style_images, $content_images"
+
+ python main.py \
+ --input_model "${input_model}" \
+ --style_images_paths "${style_images}" \
+ --content_images_paths "${content_images}" \
+ --config "./conf.yaml" \
+ --tune=True \
+ --output_model "${output_model}"
+}
+
+main "$@"
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg
new file mode 100644
index 00000000000..5af5a0eff59
Binary files /dev/null and b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg differ
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg
new file mode 100644
index 00000000000..bb0c46ea1de
Binary files /dev/null and b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg differ
diff --git a/examples/helloworld/fp8_example/README.md b/examples/helloworld/fp8_example/README.md
new file mode 100644
index 00000000000..b758768ef0f
--- /dev/null
+++ b/examples/helloworld/fp8_example/README.md
@@ -0,0 +1,96 @@
+### Usage demo:
+
+#### two steps to get quantized model
+
+```diff
+import torch
++ from neural_compressor.torch.quantization import FP8Config, convert, prepare, finalize_calibration
+import habana_frameworks.torch.core as htcore
+
+class M(torch.nn.Module):
+ def __init__(self) -> None:
+ super().__init__()
+ self.fc1 = torch.nn.Linear(10, 5)
+ self.fc2 = torch.nn.Linear(5, 10)
+
+ def forward(self, inp):
+ x1 = self.fc1(inp)
+ x2 = self.fc2(x1)
+ return x2
+
+model = M().eval()
+
++ config = FP8Config.from_json_file(args.quant_config) # args.quant_config is the path of json file
+
++ if config.measure:
++ model = prepare(model, config)
+
++ if config.quantize:
++ htcore.hpu_initialize()
++ model = convert(model, config)
+
+# user code run
+with torch.no_grad():
+ model.to("hpu")
+ output = model(torch.randn(1, 10).to("hpu"))
+ print(output)
+
++ if config.measure:
++ finalize_calibration(model)
+```
+
+
+Whole script and config refer to [sample_two_steps.py](./sample_two_steps.py), [maxabs_measure.json](./maxabs_measure.json) and [maxabs_quant.json](./maxabs_quant.json).
+
+First, measure the tensor quantization statistic:
+```shell
+python sample_two_steps.py --quant_config=maxabs_measure.json
+```
+
+Then quantize the model based on previous measurements:
+```shell
+python sample_two_steps.py --quant_config=maxabs_quant.json
+```
+
+#### one step to get quantized model
+
+```diff
+import torch
++ from neural_compressor.torch.quantization import FP8Config, convert, prepare, finalize_calibration
+import habana_frameworks.torch.core as htcore
+
+class M(torch.nn.Module):
+ def __init__(self) -> None:
+ super().__init__()
+ self.fc1 = torch.nn.Linear(10, 5)
+ self.fc2 = torch.nn.Linear(5, 10)
+
+ def forward(self, inp):
+ x1 = self.fc1(inp)
+ x2 = self.fc2(x1)
+ return x2
+
+model = M().to("hpu")
+
++ config = FP8Config.from_json_file(args.quant_config) # args.quant_config is the path of json file
++ model = prepare(model, config)
+
+# user code run to do calibration
+with torch.no_grad():
+ output = model(torch.randn(1, 10).to("hpu"))
+ print(output)
+
++ finalize_calibration(model)
++ model = convert(model)
+
+# user code to run benchmark for quantized model
+with torch.no_grad():
+ output = model(torch.randn(1, 10).to("hpu"))
+ print(output)
+```
+
+Whole script and config refer to [sample_one_step.py](./sample_one_step.py).
+
+```shell
+python sample_one_step.py --quant_config=quant_config.json
+```
diff --git a/examples/helloworld/fp8_example/maxabs_measure.json b/examples/helloworld/fp8_example/maxabs_measure.json
new file mode 100644
index 00000000000..8d55f33e57a
--- /dev/null
+++ b/examples/helloworld/fp8_example/maxabs_measure.json
@@ -0,0 +1,7 @@
+{
+ "mode": "MEASURE",
+ "observer": "maxabs",
+ "allowlist": {"types": [], "names": []},
+ "blocklist": {"types": [], "names": []},
+ "dump_stats_path": "./hqt_output/measure"
+}
diff --git a/examples/helloworld/fp8_example/maxabs_quant.json b/examples/helloworld/fp8_example/maxabs_quant.json
new file mode 100644
index 00000000000..d1f76f8f630
--- /dev/null
+++ b/examples/helloworld/fp8_example/maxabs_quant.json
@@ -0,0 +1,8 @@
+{
+ "mode": "QUANTIZE",
+ "observer": "maxabs",
+ "scale_method": "maxabs_hw",
+ "allowlist": {"types": [], "names": []},
+ "blocklist": {"types": [], "names": []},
+ "dump_stats_path": "./hqt_output/measure"
+}
diff --git a/examples/helloworld/fp8_example/quant_config.json b/examples/helloworld/fp8_example/quant_config.json
new file mode 100644
index 00000000000..c139d13bbea
--- /dev/null
+++ b/examples/helloworld/fp8_example/quant_config.json
@@ -0,0 +1,8 @@
+{
+ "mode": "AUTO",
+ "observer": "maxabs",
+ "scale_method": "maxabs_hw",
+ "allowlist": {"types": [], "names": []},
+ "blocklist": {"types": [], "names": []},
+ "dump_stats_path": "./hqt_output/measure"
+}
diff --git a/examples/helloworld/fp8_example/sample_one_step.py b/examples/helloworld/fp8_example/sample_one_step.py
new file mode 100644
index 00000000000..54a4090a833
--- /dev/null
+++ b/examples/helloworld/fp8_example/sample_one_step.py
@@ -0,0 +1,56 @@
+import argparse
+import torch
+import habana_frameworks.torch.core as htcore
+htcore.hpu_set_env()
+
+from neural_compressor.torch.quantization import FP8Config, convert, finalize_calibration, prepare
+
+torch.manual_seed(1)
+
+
+# 1. python sample_one_step.py --quant_config=quant_config.json
+
+
+class M(torch.nn.Module):
+ def __init__(self) -> None:
+ super().__init__()
+ self.fc1 = torch.nn.Linear(10, 5)
+ self.fc2 = torch.nn.Linear(5, 10)
+
+ def forward(self, inp):
+ x1 = self.fc1(inp)
+ x2 = self.fc2(x1)
+ return x2
+
+
+def eval_func(model):
+ # user's eval func
+ input = torch.randn(1, 10)
+ model(input.to("hpu"))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Habana FP8 sample code.", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+ )
+ parser.add_argument("--quant_config", type=str, help="json file of quantization config")
+ args = parser.parse_args()
+
+ model = M().eval().to("hpu")
+ htcore.hpu_initialize()
+
+ config = FP8Config.from_json_file(args.quant_config)
+ model = prepare(model, config)
+
+ # for calibration
+ with torch.no_grad():
+ # model.to("hpu")
+ output = model(torch.randn(1, 10).to("hpu"))
+
+ model = convert(model)
+ print(model)
+
+ # for benchmark
+ with torch.no_grad():
+ output = model(torch.randn(1, 10).to("hpu"))
+ print(output)
diff --git a/examples/helloworld/fp8_example/sample_two_steps.py b/examples/helloworld/fp8_example/sample_two_steps.py
new file mode 100644
index 00000000000..9e17748b9b0
--- /dev/null
+++ b/examples/helloworld/fp8_example/sample_two_steps.py
@@ -0,0 +1,50 @@
+import argparse
+import torch
+import habana_frameworks.torch.core as htcore
+htcore.hpu_set_env()
+
+from neural_compressor.torch.quantization import FP8Config, convert, finalize_calibration, prepare
+
+torch.manual_seed(1)
+
+# 1. python sample_two_steps.py --quant_config=maxabs_measure.json
+# 2. python sample_two_steps.py --quant_config=maxabs_quant.json
+
+
+class M(torch.nn.Module):
+ def __init__(self) -> None:
+ super().__init__()
+ self.fc1 = torch.nn.Linear(10, 5)
+ self.fc2 = torch.nn.Linear(5, 10)
+
+ def forward(self, inp):
+ x1 = self.fc1(inp)
+ x2 = self.fc2(x1)
+ return x2
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Habana FP8 sample code.", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+ )
+ parser.add_argument("--quant_config", type=str, help="json file of quantization config")
+ args = parser.parse_args()
+
+ model = M().eval()
+ config = FP8Config.from_json_file(args.quant_config)
+
+ if config.measure:
+ model = prepare(model, config)
+
+ if config.quantize:
+ htcore.hpu_initialize()
+ model = convert(model, config)
+ print(model)
+
+ with torch.no_grad():
+ model.to("hpu")
+ output = model(torch.randn(1, 10).to("hpu"))
+ print(output)
+
+ if config.measure:
+ finalize_calibration(model)
diff --git a/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/README.md b/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/README.md
index 9fbd442ce8e..409e8e2a7f4 100644
--- a/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/README.md
+++ b/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/README.md
@@ -32,28 +32,6 @@ tar -xvzf caffe_ilsvrc12.tar.gz val.txt
# Run
-## Diagnosis
-
-Neural Compressor offers quantization and benchmark diagnosis. Adding `diagnosis` parameter to Quantization/Benchmark config will provide additional details useful in diagnostics.
-
-### Quantization diagnosis
-
-```
-config = PostTrainingQuantConfig(
- diagnosis=True,
- ...
-)
-```
-
-### Benchmark diagnosis
-
-```
-config = BenchmarkConfig(
- diagnosis=True,
- ...
-)
-```
-
## 1. Quantization
Quantize model with QLinearOps:
diff --git a/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/main.py b/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/main.py
index 1fc6e1b3a4b..90c796e312b 100644
--- a/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/main.py
+++ b/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/main.py
@@ -286,9 +286,6 @@ def eval(onnx_model):
return eval_func(onnx_model, dataloader, top1)
if args.benchmark:
- if args.diagnose and args.mode != "performance":
- print("[ WARNING ] Diagnosis works only with performance benchmark.")
-
if args.mode == 'performance':
from neural_compressor.benchmark import fit
from neural_compressor.config import BenchmarkConfig
@@ -297,7 +294,6 @@ def eval(onnx_model):
iteration=1000,
cores_per_instance=4,
num_of_instance=1,
- diagnosis=args.diagnose,
)
fit(model, conf, b_dataloader=dataloader)
elif args.mode == 'accuracy':
@@ -308,7 +304,6 @@ def eval(onnx_model):
from neural_compressor import quantization, PostTrainingQuantConfig
config = PostTrainingQuantConfig(
quant_format=args.quant_format,
- diagnosis=args.diagnose,
)
q_model = quantization.fit(model, config, calib_dataloader=dataloader,
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/README.md b/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/README.md
index 47a4f568238..5ec8f534b46 100644
--- a/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/README.md
+++ b/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/README.md
@@ -31,29 +31,6 @@ tar -xvzf caffe_ilsvrc12.tar.gz val.txt
```
# Run
-
-## Diagnosis
-
-Neural Compressor offers quantization and benchmark diagnosis. Adding `diagnosis` parameter to Quantization/Benchmark config will provide additional details useful in diagnostics.
-
-### Quantization diagnosis
-
-```
-config = PostTrainingQuantConfig(
- diagnosis=True,
- ...
-)
-```
-
-### Benchmark diagnosis
-
-```
-config = BenchmarkConfig(
- diagnosis=True,
- ...
-)
-```
-
## 1. Quantization
Quantize model with QLinearOps:
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/main.py b/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/main.py
index 91843d01471..aacad83a241 100644
--- a/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/main.py
+++ b/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/main.py
@@ -267,8 +267,6 @@ def eval(onnx_model):
return eval_func(onnx_model, dataloader, top1)
if args.benchmark:
- if args.diagnose and args.mode != "performance":
- print("[ WARNING ] Diagnosis works only with performance benchmark.")
if args.mode == 'performance':
from neural_compressor.benchmark import fit
from neural_compressor.config import BenchmarkConfig
@@ -277,7 +275,6 @@ def eval(onnx_model):
iteration=1000,
cores_per_instance=4,
num_of_instance=1,
- diagnosis=args.diagnose,
)
fit(model, conf, b_dataloader=dataloader)
elif args.mode == 'accuracy':
@@ -288,7 +285,6 @@ def eval(onnx_model):
from neural_compressor import quantization, PostTrainingQuantConfig
config = PostTrainingQuantConfig(
quant_format=args.quant_format,
- diagnosis=args.diagnose,
)
q_model = quantization.fit(model, config, calib_dataloader=dataloader,
diff --git a/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/README.md b/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/README.md
index 1113e97d6d6..f889c658614 100644
--- a/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/README.md
+++ b/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/README.md
@@ -32,28 +32,6 @@ tar -xvzf caffe_ilsvrc12.tar.gz val.txt
# Run
-## Diagnosis
-
-Neural Compressor offers quantization and benchmark diagnosis. Adding `diagnosis` parameter to Quantization/Benchmark config will provide additional details useful in diagnostics.
-
-### Quantization diagnosis
-
-```
-config = PostTrainingQuantConfig(
- diagnosis=True,
- ...
-)
-```
-
-### Benchmark diagnosis
-
-```
-config = BenchmarkConfig(
- diagnosis=True,
- ...
-)
-```
-
## 1. Quantization
Quantize model with QLinearOps:
diff --git a/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py b/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py
index b42815a1d3d..802e9b18fef 100644
--- a/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py
+++ b/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py
@@ -295,9 +295,6 @@ def eval_func(model, dataloader, metric):
def eval(onnx_model):
return eval_func(onnx_model, dataloader, top1)
- if args.benchmark and args.diagnose and args.mode != "performance":
- print("[ WARNING ] Diagnosis works only with performance benchmark.")
-
if args.benchmark:
if args.mode == 'performance':
from neural_compressor.benchmark import fit
@@ -307,7 +304,6 @@ def eval(onnx_model):
iteration=1000,
cores_per_instance=4,
num_of_instance=1,
- diagnosis=args.diagnose,
device=args.device,
backend=backend,
)
@@ -320,7 +316,6 @@ def eval(onnx_model):
from neural_compressor import quantization, PostTrainingQuantConfig
config = PostTrainingQuantConfig(
quant_format=args.quant_format,
- diagnosis=args.diagnose,
device=args.device,
backend=backend
)
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md
index 6e0078f99f3..aaed30c3075 100644
--- a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md
+++ b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md
@@ -32,19 +32,6 @@ python prepare_model.py --input_model='MRPC.zip' --output_model='bert.onnx'
# Run
-## Diagnosis
-
-Neural Compressor offers quantization and benchmark diagnosis. Adding `diagnosis` parameter to Quantization/Benchmark config will provide additional details useful in diagnostics.
-
-### Benchmark diagnosis
-
-```
-config = BenchmarkConfig(
- diagnosis=True,
- ...
-)
-```
-
## 1. Quantization
Dynamic quantization:
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/README.md b/examples/onnxrt/nlp/bert/quantization/ptq_static/README.md
index 3f6d640d947..4fbb4357574 100644
--- a/examples/onnxrt/nlp/bert/quantization/ptq_static/README.md
+++ b/examples/onnxrt/nlp/bert/quantization/ptq_static/README.md
@@ -31,17 +31,6 @@ python prepare_model.py --input_model='MRPC.zip' --output_model='bert.onnx'
# Run
-## Diagnosis
-Neural Compressor offers quantization and benchmark diagnosis. Adding `diagnosis` parameter to Quantization/Benchmark config will provide additional details useful in diagnostics.
-
-### Benchmark diagnosis
-```
-config = BenchmarkConfig(
- diagnosis=True,
- ...
-)
-```
-
## 1. Quantization
Static quantization with QOperator format:
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/main.py
index 95a49ce37ab..c7cf936270d 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/main.py
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/main.py
@@ -216,8 +216,6 @@ def _process_dataset(self):
self.label = []
self.onnx_inputs = []
for inputs in self.dataset:
- # import pdb;
- # pdb.set_trace()
onnx_inputs = []
has_labels = all(inputs.get(k) is not None for k in self.label_names)
if has_labels:
@@ -237,8 +235,6 @@ def _process_dataset(self):
}
"""
for key in self.onnx_input_names:
- # import pdb;
- # pdb.set_trace()
if key in inputs:
# onnx_inputs[key] = np.array([inputs[key]])
onnx_inputs.append(np.array(inputs[key]))
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/main.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/main.py
index b3de22ac766..5540f4c002d 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/main.py
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/main.py
@@ -216,8 +216,6 @@ def _process_dataset(self):
self.label = []
self.onnx_inputs = []
for inputs in self.dataset:
- # import pdb;
- # pdb.set_trace()
onnx_inputs = []
has_labels = all(inputs.get(k) is not None for k in self.label_names)
if has_labels:
@@ -237,8 +235,6 @@ def _process_dataset(self):
}
"""
for key in self.onnx_input_names:
- # import pdb;
- # pdb.set_trace()
if key in inputs:
# onnx_inputs[key] = np.array([inputs[key]])
onnx_inputs.append(np.array(inputs[key]))
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md
index d3c849b517a..47746bb4394 100644
--- a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md
+++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md
@@ -27,28 +27,6 @@ Download [MS COCO 2017 dataset](https://cocodataset.org/#download).
# Run
-## Diagnosis
-
-Neural Compressor offers quantization and benchmark diagnosis. Adding `diagnosis` parameter to Quantization/Benchmark config will provide additional details useful in diagnostics.
-
-### Quantization diagnosis
-
-```
-config = PostTrainingQuantConfig(
- diagnosis=True,
- ...
-)
-```
-
-### Benchmark diagnosis
-
-```
-config = BenchmarkConfig(
- diagnosis=True,
- ...
-)
-```
-
## 1. Quantization
Static quantization with QOperator format:
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py
index 0e9e0235661..aaffb794e0a 100644
--- a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py
+++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py
@@ -140,8 +140,6 @@ def eval_func(model):
return metric.result()
if args.benchmark:
- if args.diagnose and args.mode != "performance":
- print("[ WARNING ] Diagnosis works only with performance benchmark.")
if args.mode == 'performance':
from neural_compressor.benchmark import fit
from neural_compressor.config import BenchmarkConfig
@@ -149,7 +147,6 @@ def eval_func(model):
iteration=100,
cores_per_instance=4,
num_of_instance=1,
- diagnosis=args.diagnose,
device=args.device,
backend=backend,
)
@@ -169,7 +166,6 @@ def eval_func(model):
accuracy_criterion=accuracy_criterion,
quant_format=args.quant_format,
calibration_sampling_size=[50],
- diagnosis=args.diagnose,
device=args.device,
backend=backend,
)
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/utils.py b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/utils.py
index 940722075ab..e4d5db6b8ee 100644
--- a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/utils.py
+++ b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/utils.py
@@ -59,7 +59,6 @@ def _compute_padding(self, input, dim):
return additional_padding, total_padding
def forward(self, input):
- #import pdb; pdb.set_trace()
if self.padding == "VALID":
return F.conv2d(
input,
@@ -180,7 +179,6 @@ def decode_boxes(rel_codes, boxes, weights):
dh = dh / wh
pred_ctr_x = dx * widths + ctr_x
- #import pdb; pdb.set_trace()
pred_ctr_y = dy * heights + ctr_y
pred_w = torch.exp(dw) * widths
pred_h = torch.exp(dh) * heights
@@ -194,5 +192,4 @@ def decode_boxes(rel_codes, boxes, weights):
],
dim=2,
)
- #import pdb; pdb.set_trace()
return pred_boxes
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/README.md b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/README.md
index 9f368bc8bd6..90c307d5021 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/README.md
+++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/README.md
@@ -76,24 +76,6 @@ config = PostTrainingQuantConfig(
)
```
-## Diagnosis
-Neural Compressor offers quantization and benchmark diagnosis. Adding `diagnosis` parameter to Quantization/Benchmark config will provide additional details useful in diagnostics.
-### Quantization diagnosis
-```
-config = PostTrainingQuantConfig(
- diagnosis=True,
- ...
-)
-```
-
-### Benchmark diagnosis
-```
-config = BenchmarkConfig(
- diagnosis=True,
- ...
-)
-```
-
## 1 Quantization
```shell
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py
index c375fa277ec..8c6229989de 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py
@@ -249,7 +249,6 @@ def run(self):
conf = PostTrainingQuantConfig(
calibration_sampling_size=[20, 50],
op_name_dict=op_name_dict,
- diagnosis=args.diagnose,
)
from neural_compressor import Metric
top1 = Metric(name="topk", k=1)
@@ -283,7 +282,6 @@ def eval(model):
iteration=100,
cores_per_instance=4,
num_of_instance=1,
- diagnosis=args.diagnose,
)
fit(args.input_graph, conf, b_dataloader=dataloader)
elif args.mode == 'accuracy':
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/README.md b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/README.md
index 520705edf8f..30e494a47f5 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/README.md
+++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/README.md
@@ -72,24 +72,6 @@ config = PostTrainingQuantConfig(
)
```
-## Diagnosis
-Neural Compressor offers quantization and benchmark diagnosis. Adding `diagnosis` parameter to Quantization/Benchmark config will provide additional details useful in diagnostics.
-### Quantization diagnosis
-```
-config = PostTrainingQuantConfig(
- diagnosis=True,
- ...
-)
-```
-
-### Benchmark diagnosis
-```
-config = BenchmarkConfig(
- diagnosis=True,
- ...
-)
-```
-
## 1 Quantization
```shell
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py
index 11f4dcd6e96..cb944f90711 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py
@@ -120,7 +120,6 @@ def run(self):
conf = PostTrainingQuantConfig(
outputs=['softmax_tensor'],
calibration_sampling_size=[50, 100],
- diagnosis=args.diagnose,
)
from neural_compressor import Metric
top1 = Metric(name="topk", k=1)
@@ -155,7 +154,6 @@ def eval(model):
iteration=100,
cores_per_instance=4,
num_of_instance=1,
- diagnosis=args.diagnose,
)
fit(args.input_graph, conf, b_dataloader=dataloader)
elif args.mode == 'accuracy':
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md
index 547a2f6d7c8..b62baeb61cf 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md
+++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md
@@ -105,24 +105,6 @@ config = PostTrainingQuantConfig(
)
```
-## Diagnosis
-Neural Compressor offers quantization and benchmark diagnosis. Adding `diagnosis` parameter to Quantization/Benchmark config will provide additional details useful in diagnostics.
-### Quantization diagnosis
-```
-config = PostTrainingQuantConfig(
- diagnosis=True,
- ...
-)
-```
-
-### Benchmark diagnosis
-```
-config = BenchmarkConfig(
- diagnosis=True,
- ...
-)
-```
-
## 1 Quantization
```shell
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py
index 73da3fd60a6..95b17af0290 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py
@@ -114,7 +114,6 @@ def run(self):
eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args)
conf = PostTrainingQuantConfig(
calibration_sampling_size=[50, 100],
- diagnosis=args.diagnose,
)
from neural_compressor import Metric
top1 = Metric(name="topk", k=1)
@@ -148,7 +147,6 @@ def eval(model):
iteration=100,
cores_per_instance=4,
num_of_instance=1,
- diagnosis=args.diagnose,
)
fit(args.input_graph, conf, b_dataloader=dataloader)
elif args.mode == 'accuracy':
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt b/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
index 74b67c4c7b3..186124490e2 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
@@ -1,4 +1,4 @@
-tensorflow==2.12
+tensorflow==2.12.1
transformers
datasets
numpy
\ No newline at end of file
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
index 78c91f446bb..b88cd9f7a09 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
@@ -188,7 +188,6 @@ def eval_func(model):
from neural_compressor.config import AccuracyCriterion
from neural_compressor import quantization
-os.environ["TF_USE_LEGACY_KERAS"]="False"
recipes = {}
if args.sq:
recipes = {"smooth_quant": True, "smooth_quant_args": {'alpha': args.alpha}}
diff --git a/neural_coder/README.md b/neural_coder/README.md
deleted file mode 100644
index b7c6d0fb63b..00000000000
--- a/neural_coder/README.md
+++ /dev/null
@@ -1,52 +0,0 @@
-Neural Coder
-===========================
-## What do we offer?
-
-Neural Coder is a novel component under Intel® Neural Compressor to further simplify the deployment of deep learning models via one-click automated code changes for device switch (e.g., CUDA to CPU) and optimization enabling. Subsequently, Neural Coder can also perform automated benchmark on all applicable optimization sets acquired from the automated enabling, and evaluate for the best out-of-box performance.
-
-Neural Coder leverages static program analysis techniques and heuristic optimization rules to simplify the usage of various Deep Learning optimization APIs for increasing computation efficiency of AI models and improving user experience for general AI customers. We demonstrate great improvement of developer productivity and aim to facilitate enhanced Deep Learning acceleration adoption via this toolkit.
-
-Neural Coder helps you code Deep Learning optimizations automatically into your scripts. For example, to apply
-- Automatic Mixed Precision (torch.cpu.amp.autocast)
-- JIT Script computation graph transformation (torch.jit.script)
-- Channels Last memory format transformation (torch.channels_last)
-
-simultaneously on below PyTorch evaluation code, we generate the optimized code in one-click by detecting the correct position to insert the correct API code lines:
-```diff
- import torch
- import torchvision.models as models
- my_model = models.resnet50(pretrained=True)
-+ import torch
-+ with torch.no_grad():
-+ my_model = my_model.to(memory_format=torch.channels_last)
-+ import torch
-+ with torch.no_grad():
-+ my_model.eval()
-+ my_model = torch.jit.script(my_model)
-+ my_model = torch.jit.freeze(my_model)
- my_model.eval()
- batch_size = 112
- input = torch.rand(batch_size, 3, 224, 224)
- with torch.no_grad():
-+ import torch
-+ with torch.cpu.amp.autocast(enabled=True, dtype=torch.bfloat16):
- my_model(input)
-```
-
-## Getting Started!
-
-There are currently 3 ways to use Neural Coder for automatic quantization enabling and benchmark.
-
-### Jupyter Lab Extension
-We offer Neural Coder as an extension plugin in Jupyter Lab. This enables users to utilize Neural Coder while writing their Deep Learning models in Jupyter Lab coding platform. Users can simply search for ```jupyter-lab-neural-compressor``` in the Extension Manager in JupyterLab and install Neural Coder with one click. For more details, please refer to this [guide](extensions/neural_compressor_ext_lab/README.md).
-
-[AWS Amazon SageMaker](https://aws.amazon.com/sagemaker/) users can also use Neural Coder as an extension following this [guide](docs/AWSSageMakerSupport.md).
-
-### Python Launcher
-Neural Coder can be used as a Python Launcher. Users can run the Python Deep Learning model code as it is with automatic enabling of optimizations by simply adding an inline prefix ```-m neural_coder``` to the Python command line. For more details, please refer to this [guide](docs/PythonLauncher.md).
-
-### Python API
-There are 3 user-facing APIs for Neural Coder: enable, bench and superbench. For more details, please refer to this [guide](docs/PythonAPI.md). We have provided a [list](docs/SupportMatrix.md) of supported Deep Learning optimization features. Specifically for quantization, we provide an auto-quantization API that helps automatically enable quantization on Deep Learning models and automatically evaluates for the best performance on the model with no manual coding needed. Supported features include Post-Training Static Quantization, Post-Training Dynamic Quantization, and Mixed Precision. For more details, please refer to this [guide](docs/Quantization.md).
-
-## Contact
-Please contact us at [inc.maintainers@intel.com](mailto:inc.maintainers@intel.com) for any Neural Coder related question.
diff --git a/neural_coder/__init__.py b/neural_coder/__init__.py
deleted file mode 100644
index 7bf18cd4956..00000000000
--- a/neural_coder/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .interface import enable
-from .interface import bench
-from .interface import superbench
-
-# from .interface import superreport
-from .interface import auto_quant
diff --git a/neural_coder/__main__.py b/neural_coder/__main__.py
deleted file mode 100644
index 0551b72dc21..00000000000
--- a/neural_coder/__main__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .launcher import Launcher
-
-args = Launcher.parse_args()
-Launcher.execute(args)
diff --git a/neural_coder/backends/.yaml b/neural_coder/backends/.yaml
deleted file mode 100644
index 1e3b1fa1501..00000000000
--- a/neural_coder/backends/.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- -
- -
- -
- content:
- - |-
- [+] YOUR CODE LINE 1
- [+] YOUR CODE LINE 2
- [+] YOUR CODE LINE 3
- - |-
- [+] YOUR CODE LINE 1
- [+] YOUR CODE LINE 2
- [+] YOUR CODE LINE 3
- - |-
- [+] YOUR CODE LINE 1
- [+] YOUR CODE LINE 2
- [+] YOUR CODE LINE 3
- order:
- - below:
- -
- -
- above:
- -
- -
- - below:
- -
- -
- above:
- -
- -
- - below:
- -
- -
- above:
- -
- -
diff --git a/neural_coder/backends/__init__.py b/neural_coder/backends/__init__.py
deleted file mode 100644
index e833188cc78..00000000000
--- a/neural_coder/backends/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/neural_coder/backends/intel_extension_for_transformers.yaml b/neural_coder/backends/intel_extension_for_transformers.yaml
deleted file mode 100644
index a1accbbfb4b..00000000000
--- a/neural_coder/backends/intel_extension_for_transformers.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Note: For intel_extension_for_transformers support
-# we default apply "PostTrainingDynamic" and "eval_f1"
-# support for customization is pending further evaluation
-
-transformation:
- location:
- - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
- content:
- - |-
- [+] metric = metrics.Metric(name="eval_f1", is_relative=True, criterion=0.01)
- [+] objective = objectives.performance
- [+] q_config = QuantizationConfig(approach="PostTrainingDynamic", metrics=[metric], objectives=[objective])
- [+] MODEL_NAME = trainer.quantize(quant_config=q_config)
- order:
- - below:
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - pytorch_channels_last
diff --git a/neural_coder/backends/keras_inc.yaml b/neural_coder/backends/keras_inc.yaml
deleted file mode 100644
index 0731a4bbbc9..00000000000
--- a/neural_coder/backends/keras_inc.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
- content:
- - |-
- [+] from neural_compressor.quantization import fit
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] config = PostTrainingQuantConfig(backend='itex', quant_level=1)
- [+] quantized_model = fit(MODEL_NAME, conf=config, calib_dataloader=DATALOADER_NAME, eval_func=eval_func)
- [+] quantized_model.save("./quantized_model")
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_bf16.yaml b/neural_coder/backends/nano_bf16.yaml
deleted file mode 100644
index afecf6908f1..00000000000
--- a/neural_coder/backends/nano_bf16.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, precision="bf16", input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_bf16_channels_last.yaml b/neural_coder/backends/nano_bf16_channels_last.yaml
deleted file mode 100644
index d0f3987ef29..00000000000
--- a/neural_coder/backends/nano_bf16_channels_last.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, precision="bf16", channels_last=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_bf16_ipex.yaml b/neural_coder/backends/nano_bf16_ipex.yaml
deleted file mode 100644
index 6e8db6f8719..00000000000
--- a/neural_coder/backends/nano_bf16_ipex.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, precision="bf16", use_ipex=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_bf16_ipex_channels_last.yaml b/neural_coder/backends/nano_bf16_ipex_channels_last.yaml
deleted file mode 100644
index 3b99ef12f35..00000000000
--- a/neural_coder/backends/nano_bf16_ipex_channels_last.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, precision="bf16", use_ipex=True, channels_last=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_fp32_channels_last.yaml b/neural_coder/backends/nano_fp32_channels_last.yaml
deleted file mode 100644
index 9516ba4d50d..00000000000
--- a/neural_coder/backends/nano_fp32_channels_last.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, channels_last=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_fp32_ipex.yaml b/neural_coder/backends/nano_fp32_ipex.yaml
deleted file mode 100644
index c6fc4329b38..00000000000
--- a/neural_coder/backends/nano_fp32_ipex.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, use_ipex=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_fp32_ipex_channels_last.yaml b/neural_coder/backends/nano_fp32_ipex_channels_last.yaml
deleted file mode 100644
index aea74db737a..00000000000
--- a/neural_coder/backends/nano_fp32_ipex_channels_last.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, use_ipex=True, channels_last=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_gpu_to_cpu.yaml b/neural_coder/backends/nano_gpu_to_cpu.yaml
deleted file mode 100644
index 426f72ce980..00000000000
--- a/neural_coder/backends/nano_gpu_to_cpu.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_above_model_definition_line", "insert_above_input_definition_line"]
- content:
- - |-
- [+] from bigdl.nano.pytorch import patch_torch
- [+] patch_torch()
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_int8.yaml b/neural_coder/backends/nano_int8.yaml
deleted file mode 100644
index cb846256435..00000000000
--- a/neural_coder/backends/nano_int8.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, precision="int8", input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_jit_bf16.yaml b/neural_coder/backends/nano_jit_bf16.yaml
deleted file mode 100644
index 122d93ca717..00000000000
--- a/neural_coder/backends/nano_jit_bf16.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="jit", precision="bf16", input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_jit_bf16_channels_last.yaml b/neural_coder/backends/nano_jit_bf16_channels_last.yaml
deleted file mode 100644
index cf7e1437c4a..00000000000
--- a/neural_coder/backends/nano_jit_bf16_channels_last.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="jit", precision="bf16", channels_last=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_jit_bf16_ipex.yaml b/neural_coder/backends/nano_jit_bf16_ipex.yaml
deleted file mode 100644
index 1a237c6edf8..00000000000
--- a/neural_coder/backends/nano_jit_bf16_ipex.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="jit", precision="bf16", use_ipex=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml b/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml
deleted file mode 100644
index 603db6942f8..00000000000
--- a/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="jit", precision="bf16", use_ipex=True, channels_last=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_jit_fp32.yaml b/neural_coder/backends/nano_jit_fp32.yaml
deleted file mode 100644
index 71e7d4ede95..00000000000
--- a/neural_coder/backends/nano_jit_fp32.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="jit", input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_jit_fp32_channels_last.yaml b/neural_coder/backends/nano_jit_fp32_channels_last.yaml
deleted file mode 100644
index c30a1767175..00000000000
--- a/neural_coder/backends/nano_jit_fp32_channels_last.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="jit", channels_last=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_jit_fp32_ipex.yaml b/neural_coder/backends/nano_jit_fp32_ipex.yaml
deleted file mode 100644
index f673b076a20..00000000000
--- a/neural_coder/backends/nano_jit_fp32_ipex.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="jit", use_ipex=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml b/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml
deleted file mode 100644
index 54514000ac5..00000000000
--- a/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="jit", use_ipex=True, channels_last=True, input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_onnxruntime_fp32.yaml b/neural_coder/backends/nano_onnxruntime_fp32.yaml
deleted file mode 100644
index c29e3410bc4..00000000000
--- a/neural_coder/backends/nano_onnxruntime_fp32.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="onnxruntime", input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml b/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml
deleted file mode 100644
index bb5b35557da..00000000000
--- a/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="onnxruntime", precision="int8", input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_openvino_fp32.yaml b/neural_coder/backends/nano_openvino_fp32.yaml
deleted file mode 100644
index 6b9324cec39..00000000000
--- a/neural_coder/backends/nano_openvino_fp32.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.trace(MODEL_NAME, accelerator="openvino", input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/nano_openvino_int8.yaml b/neural_coder/backends/nano_openvino_int8.yaml
deleted file mode 100644
index 9f1b70814e7..00000000000
--- a/neural_coder/backends/nano_openvino_int8.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- - "indent_inference_line"
- content:
- - |-
- [+] from bigdl.nano.pytorch import InferenceOptimizer
- [+] MODEL_NAME = InferenceOptimizer.quantize(MODEL_NAME, accelerator="openvino", precision="int8", input_sample=INPUT_NAME)
- [+] with InferenceOptimizer.get_context(MODEL_NAME):
- - 1
- order:
- - below:
- above:
diff --git a/neural_coder/backends/onnx_inc_dynamic_quant.yaml b/neural_coder/backends/onnx_inc_dynamic_quant.yaml
deleted file mode 100644
index 98a98d941ca..00000000000
--- a/neural_coder/backends/onnx_inc_dynamic_quant.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- content:
- - |-
- [+] from neural_compressor.quantization import fit
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] config = PostTrainingQuantConfig(approach='dynamic', quant_level=1)
- [+] MODEL_NAME = fit(MODEL_NAME, conf=config, calib_dataloader=DATALOADER_NAME, eval_func=EVAL_FUNCTION_NAME)
- [+] MODEL_NAME.save("./quantized_model.onnx")
- order:
- - below:
- above:
diff --git a/neural_coder/backends/onnx_inc_static_quant_qdq.yaml b/neural_coder/backends/onnx_inc_static_quant_qdq.yaml
deleted file mode 100644
index 6df51eefb54..00000000000
--- a/neural_coder/backends/onnx_inc_static_quant_qdq.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- content:
- - |-
- [+] from neural_compressor.quantization import fit
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] config = PostTrainingQuantConfig(quant_format='QDQ', quant_level=1)
- [+] MODEL_NAME = fit(MODEL_NAME, conf=config, calib_dataloader=DATALOADER_NAME, eval_func=EVAL_FUNCTION_NAME)
- [+] MODEL_NAME.save("./quantized_model.onnx")
- order:
- - below:
- above:
diff --git a/neural_coder/backends/onnx_inc_static_quant_qlinear.yaml b/neural_coder/backends/onnx_inc_static_quant_qlinear.yaml
deleted file mode 100644
index 0c7f5e500f7..00000000000
--- a/neural_coder/backends/onnx_inc_static_quant_qlinear.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- content:
- - |-
- [+] from neural_compressor.quantization import fit
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] config = PostTrainingQuantConfig(quant_format='default', quant_level=1)
- [+] MODEL_NAME = fit(MODEL_NAME, conf=config, calib_dataloader=DATALOADER_NAME, eval_func=EVAL_FUNCTION_NAME)
- [+] MODEL_NAME.save("./quantized_model.onnx")
- order:
- - below:
- above:
diff --git a/neural_coder/backends/pytorch_aliblade.yaml b/neural_coder/backends/pytorch_aliblade.yaml
deleted file mode 100644
index ba29ac86548..00000000000
--- a/neural_coder/backends/pytorch_aliblade.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- content:
- - |-
- [+] import torch_blade
- [+] with torch.no_grad():
- [+] MODEL_NAME = torch_blade.optimize(MODEL_NAME, allow_tracing=True, model_inputs=tuple(INPUT_NAME))
- order:
- - below:
- above:
diff --git a/neural_coder/backends/pytorch_benchmark.yaml b/neural_coder/backends/pytorch_benchmark.yaml
deleted file mode 100644
index 1a153637d57..00000000000
--- a/neural_coder/backends/pytorch_benchmark.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_above_inference_line
- - insert_below_inference_line
- - indent_inference_line
- content:
- - |-
- [+] if not ACCURACY_MODE:
- [+] try:
- [+] time
- [+] time_nc = time.time
- [+] except:
- [+] from time import time as time_nc
- [+] count_iter_ = 0
- [+] total_time_ = 0
- [+] num_iter_ = NUM_BENCHMARK_ITERATION
- [+] num_warmup_iter_ = 10
- [+] list_batch_time_ = []
- [+] for i_ in range(num_iter_):
- [+] count_iter_ = count_iter_ + 1
- [+] if count_iter_ > num_warmup_iter_:
- [+] t1_ = time_nc()
- [+] try:
- [+] torch
- [+] no_grad = torch.no_grad
- [+] except:
- [+] from torch import no_grad
- [+] with no_grad():
- - |-
- [+] if count_iter_ > num_warmup_iter_:
- [+] t2_ = time_nc()
- [+] batch_time_ = t2_ - t1_
- [+] list_batch_time_.append(batch_time_)
- [+] total_time_ = total_time_ + batch_time_
- [+] print("Neural_Coder_Bench_IPS: ", round((num_iter_ - num_warmup_iter_) / total_time_, 3))
- [+] print("Neural_Coder_Bench_MSPI: ", round(total_time_ / (num_iter_ - num_warmup_iter_) * 1000, 3))
- [+] list_batch_time_.sort()
- [+] p50_latency_ = list_batch_time_[int(len(list_batch_time_) * 0.50) - 1] * 1000
- [+] p90_latency_ = list_batch_time_[int(len(list_batch_time_) * 0.90) - 1] * 1000
- [+] p99_latency_ = list_batch_time_[int(len(list_batch_time_) * 0.99) - 1] * 1000
- [+] print("Neural_Coder_Bench_P50: ", round(p50_latency_, 3))
- [+] print("Neural_Coder_Bench_P90: ", round(p90_latency_, 3))
- [+] print("Neural_Coder_Bench_P99: ", round(p99_latency_, 3))
- [+] quit()
- [+] else:
- [+] INFERENCE_LINE
- - 3
- order:
- - below:
- above:
- - below:
- above:
- - below:
- above:
diff --git a/neural_coder/backends/pytorch_channels_last.yaml b/neural_coder/backends/pytorch_channels_last.yaml
deleted file mode 100644
index 75343032b02..00000000000
--- a/neural_coder/backends/pytorch_channels_last.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- content:
- - |-
- [+] import torch
- [+] with torch.no_grad():
- [+] MODEL_NAME.eval()
- [+] MODEL_NAME = MODEL_NAME.to(memory_format=torch.channels_last)
- order:
- - below:
- - pytorch_inc_static_quant_fx
- - pytorch_inc_static_quant_ipex
- - pytorch_inc_dynamic_quant
- above:
- - pytorch_ipex_fp32
- - pytorch_ipex_bf16
- - pytorch_ipex_int8_static_quant
- - pytorch_ipex_int8_dynamic_quant
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
diff --git a/neural_coder/backends/pytorch_inc_bf16.yaml b/neural_coder/backends/pytorch_inc_bf16.yaml
deleted file mode 100644
index bdbf3b07272..00000000000
--- a/neural_coder/backends/pytorch_inc_bf16.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- content:
- - |-
- [+] import torch
- [+] torch.backends.quantized.engine = 'onednn'
- [+] from neural_compressor.config import MixedPrecisionConfig
- [+] from neural_compressor import mix_precision
- [+] config = MixedPrecisionConfig()
- [+] MODEL_NAME = mix_precision.fit(model=MODEL_NAME, config=config)
- [+] try:
- [+] with torch.no_grad():
- [+] MODEL_NAME = torch.jit.script(MODEL_NAME)
- [+] MODEL_NAME = torch.jit.freeze(MODEL_NAME)
- [+] except:
- [+] pass
- order:
- - below:
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - pytorch_channels_last
diff --git a/neural_coder/backends/pytorch_inc_dynamic_quant.yaml b/neural_coder/backends/pytorch_inc_dynamic_quant.yaml
deleted file mode 100644
index edb27b4d188..00000000000
--- a/neural_coder/backends/pytorch_inc_dynamic_quant.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- content:
- - |-
- [+] def eval_func(model):
- [+] EVAL_FUNC_LINES
- [+] try:
- [+] torch.backends.quantized.engine = 'onednn'
- [+] except:
- [+] from torch.backends.quantized import engine; engine = 'onednn'
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] from neural_compressor.quantization import fit
- [+] conf = PostTrainingQuantConfig(approach="dynamic", quant_level=1)
- [+] MODEL_NAME = fit(model=MODEL_NAME, conf=conf, eval_func=eval_func)
- [+] MODEL_NAME.save("./quantized_model")
- [+] MODEL_NAME.eval()
- [+] try:
- [+] with torch.no_grad():
- [+] MODEL_NAME = torch.jit.script(MODEL_NAME)
- [+] MODEL_NAME = torch.jit.freeze(MODEL_NAME)
- [+] except:
- [+] pass
- order:
- - below:
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - pytorch_channels_last
diff --git a/neural_coder/backends/pytorch_inc_dynamic_quant_fp8.yaml b/neural_coder/backends/pytorch_inc_dynamic_quant_fp8.yaml
deleted file mode 100644
index f32b91389bb..00000000000
--- a/neural_coder/backends/pytorch_inc_dynamic_quant_fp8.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
- content:
- - |-
- [+] def eval_func(model):
- [+] EVAL_FUNC_LINES
- [+] try:
- [+] torch.backends.quantized.engine = 'onednn'
- [+] except:
- [+] from torch.backends.quantized import engine; engine = 'onednn'
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] from neural_compressor.quantization import fit
- [+] conf = PostTrainingQuantConfig(approach="dynamic", precision = FP8_DATA_FORMAT, quant_level=1)
- [+] MODEL_NAME = fit(model=MODEL_NAME, conf=conf, calib_dataloader=DATALOADER_NAME, eval_func=eval_func)
- [+] MODEL_NAME.save("./quantized_model")
- [+] MODEL_NAME.eval()
- [+] try:
- [+] with torch.no_grad():
- [+] MODEL_NAME = torch.jit.script(MODEL_NAME)
- [+] MODEL_NAME = torch.jit.freeze(MODEL_NAME)
- [+] except:
- [+] pass
- order:
- - below:
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - pytorch_channels_last
diff --git a/neural_coder/backends/pytorch_inc_huggingface_optimum_dynamic.yaml b/neural_coder/backends/pytorch_inc_huggingface_optimum_dynamic.yaml
deleted file mode 100644
index 890ff34d0a2..00000000000
--- a/neural_coder/backends/pytorch_inc_huggingface_optimum_dynamic.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
- content:
- - |-
- [+] def eval_func(model):
- [+] EVAL_FUNC_LINES
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] from optimum.intel.neural_compressor import INCQuantizer
- [+] quantization_config = PostTrainingQuantConfig(approach="dynamic", quant_level=1)
- [+] quantizer = INCQuantizer.from_pretrained(MODEL_NAME)
- [+] quantizer.quantize(quantization_config=quantization_config, save_directory="quantized_model", save_onnx_model=False)
- [+] MODEL_NAME = quantizer._quantized_model
- [+] MODEL_NAME.save("./quantized_model")
- [+] MODEL_NAME.eval()
- order:
- - below:
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - pytorch_channels_last
diff --git a/neural_coder/backends/pytorch_inc_huggingface_optimum_static.yaml b/neural_coder/backends/pytorch_inc_huggingface_optimum_static.yaml
deleted file mode 100644
index 08df89f24fd..00000000000
--- a/neural_coder/backends/pytorch_inc_huggingface_optimum_static.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
- content:
- - |-
- [+] def eval_func(model):
- [+] EVAL_FUNC_LINES
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] from optimum.intel.neural_compressor import INCQuantizer
- [+] quantization_config = PostTrainingQuantConfig(approach="static", quant_level=1)
- [+] quantizer = INCQuantizer.from_pretrained(MODEL_NAME)
- [+] quantizer.quantize(quantization_config=quantization_config, calibration_dataset=eval_dataset, save_directory="quantized_model", save_onnx_model=False)
- [+] MODEL_NAME = quantizer._quantized_model
- [+] MODEL_NAME.save("./quantized_model")
- [+] MODEL_NAME.eval()
- order:
- - below:
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - pytorch_channels_last
diff --git a/neural_coder/backends/pytorch_inc_static_quant_fx.yaml b/neural_coder/backends/pytorch_inc_static_quant_fx.yaml
deleted file mode 100644
index f77801ebc51..00000000000
--- a/neural_coder/backends/pytorch_inc_static_quant_fx.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
- content:
- - |-
- [+] def eval_func(model):
- [+] EVAL_FUNC_LINES
- [+] try:
- [+] torch.backends.quantized.engine = 'onednn'
- [+] except:
- [+] from torch.backends.quantized import engine; engine = 'onednn'
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] from neural_compressor.quantization import fit
- [+] conf = PostTrainingQuantConfig(quant_level=1)
- [+] MODEL_NAME = fit(model=MODEL_NAME, conf=conf, calib_dataloader=DATALOADER_NAME, eval_func=eval_func)
- [+] MODEL_NAME.save("./quantized_model")
- [+] MODEL_NAME.eval()
- [+] try:
- [+] with torch.no_grad():
- [+] MODEL_NAME = torch.jit.script(MODEL_NAME)
- [+] MODEL_NAME = torch.jit.freeze(MODEL_NAME)
- [+] except:
- [+] pass
-
- order:
- - below:
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - pytorch_channels_last
diff --git a/neural_coder/backends/pytorch_inc_static_quant_fx_fp8.yaml b/neural_coder/backends/pytorch_inc_static_quant_fx_fp8.yaml
deleted file mode 100644
index 419c5256588..00000000000
--- a/neural_coder/backends/pytorch_inc_static_quant_fx_fp8.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
- content:
- - |-
- [+] def eval_func(model):
- [+] EVAL_FUNC_LINES
- [+] try:
- [+] torch.backends.quantized.engine = 'onednn'
- [+] except:
- [+] from torch.backends.quantized import engine; engine = 'onednn'
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] from neural_compressor.quantization import fit
- [+] conf = PostTrainingQuantConfig(precision=FP8_DATA_FORMAT, quant_level=1)
- [+] MODEL_NAME = fit(model=MODEL_NAME, conf=conf, calib_dataloader=DATALOADER_NAME, eval_func=eval_func)
- [+] MODEL_NAME.save("./quantized_model")
- [+] MODEL_NAME.eval()
- [+] try:
- [+] with torch.no_grad():
- [+] MODEL_NAME = torch.jit.script(MODEL_NAME)
- [+] MODEL_NAME = torch.jit.freeze(MODEL_NAME)
- [+] except:
- [+] pass
-
- order:
- - below:
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - pytorch_channels_last
diff --git a/neural_coder/backends/pytorch_inc_static_quant_ipex.yaml b/neural_coder/backends/pytorch_inc_static_quant_ipex.yaml
deleted file mode 100644
index 9cde95fa197..00000000000
--- a/neural_coder/backends/pytorch_inc_static_quant_ipex.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
- content:
- - |-
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] from neural_compressor.quantization import fit
- [+] conf = PostTrainingQuantConfig(backend='ipex', quant_level=1)
- [+] MODEL_NAME = fit(model=MODEL_NAME, conf=conf, calib_dataloader=DATALOADER_NAME)
- [+] MODEL_NAME.save("./quantized_model")
- [+] MODEL_NAME.eval()
- order:
- - below:
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - pytorch_channels_last
diff --git a/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml b/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml
deleted file mode 100644
index f4835516cdf..00000000000
--- a/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
- content:
- - |-
- [+] from neural_compressor.config import PostTrainingQuantConfig
- [+] from neural_compressor.quantization import fit
- [+] MODEL_NAME = MODEL_NAME.to("xpu")
- [+] conf = PostTrainingQuantConfig(backend='ipex', quant_level=1, device="xpu")
- [+] MODEL_NAME = fit(model=MODEL_NAME, conf=conf, calib_dataloader=DATALOADER_NAME)
- [+] MODEL_NAME.save("./quantized_model")
- [+] MODEL_NAME.eval()
- order:
- - below:
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - pytorch_channels_last
diff --git a/neural_coder/backends/pytorch_ipex_bf16.yaml b/neural_coder/backends/pytorch_ipex_bf16.yaml
deleted file mode 100644
index d27f7c6c58b..00000000000
--- a/neural_coder/backends/pytorch_ipex_bf16.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- - insert_above_inference_line
- - indent_inference_line
- content:
- - |-
- [+] import torch
- [+] import intel_extension_for_pytorch as ipex
- [+] with torch.no_grad():
- [+] MODEL_NAME.eval()
- [+] MODEL_NAME = ipex.optimize(MODEL_NAME, dtype=torch.bfloat16)
- - |-
- [+] import torch
- [+] with torch.cpu.amp.autocast(enabled=True, dtype=torch.bfloat16):
- - 1
- order:
- - below:
- - pytorch_channels_last
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
- - below:
- above:
- - below:
- above:
diff --git a/neural_coder/backends/pytorch_ipex_fp32.yaml b/neural_coder/backends/pytorch_ipex_fp32.yaml
deleted file mode 100644
index 0256429948d..00000000000
--- a/neural_coder/backends/pytorch_ipex_fp32.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- content:
- - |-
- [+] import torch
- [+] import intel_extension_for_pytorch as ipex
- [+] with torch.no_grad():
- [+] MODEL_NAME.eval()
- [+] MODEL_NAME = ipex.optimize(MODEL_NAME, dtype=torch.float32)
- order:
- - below:
- - pytorch_channels_last
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
diff --git a/neural_coder/backends/pytorch_ipex_int8_dynamic_quant.yaml b/neural_coder/backends/pytorch_ipex_int8_dynamic_quant.yaml
deleted file mode 100644
index 06d35d57ef8..00000000000
--- a/neural_coder/backends/pytorch_ipex_int8_dynamic_quant.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- content:
- - |-
- [+] if "quantize" not in str(type(MODEL_NAME)) and "jit" not in str(type(MODEL_NAME)):
- [+] import torch
- [+] import intel_extension_for_pytorch as ipex
- [+] qconfig = ipex.quantization.default_dynamic_qconfig
- [+] MODEL_NAME = ipex.quantization.prepare(MODEL_NAME, qconfig, example_inputs=INPUT_NAME, inplace=False)
- [+] with torch.no_grad():
- [+] for i in range(10):
- [+] INFERENCE_LINE
- [+] MODEL_NAME = ipex.quantization.convert(MODEL_NAME)
- [+] with torch.no_grad():
- [+] INFERENCE_LINE
- [+] MODEL_NAME.eval()
- order:
- - below:
- - pytorch_channels_last
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
diff --git a/neural_coder/backends/pytorch_ipex_int8_static_quant.yaml b/neural_coder/backends/pytorch_ipex_int8_static_quant.yaml
deleted file mode 100644
index 9de931ee9fe..00000000000
--- a/neural_coder/backends/pytorch_ipex_int8_static_quant.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- content:
- - |-
- [+] if "quantize" not in str(type(MODEL_NAME)) and "jit" not in str(type(MODEL_NAME)):
- [+] import torch
- [+] import intel_extension_for_pytorch as ipex
- [+] qconfig = ipex.quantization.default_static_qconfig
- [+] MODEL_NAME = ipex.quantization.prepare(MODEL_NAME, qconfig, example_inputs=INPUT_NAME, inplace=False)
- [+] with torch.no_grad():
- [+] for i in range(10):
- [+] INFERENCE_LINE
- [+] MODEL_NAME = ipex.quantization.convert(MODEL_NAME)
- [+] with torch.no_grad():
- [+] INFERENCE_LINE
- [+] MODEL_NAME.eval()
- order:
- - below:
- - pytorch_channels_last
- above:
- - pytorch_jit_script
- - pytorch_jit_script_ofi
- - pytorch_jit_trace
- - pytorch_jit_trace_ofi
diff --git a/neural_coder/backends/pytorch_jit_script.yaml b/neural_coder/backends/pytorch_jit_script.yaml
deleted file mode 100644
index 014cc5177f1..00000000000
--- a/neural_coder/backends/pytorch_jit_script.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- content:
- - |-
- [+] if "jit" not in str(type(MODEL_NAME)):
- [+] import torch
- [+] with torch.no_grad():
- [+] MODEL_NAME.eval()
- [+] MODEL_NAME = torch.jit.script(MODEL_NAME)
- [+] MODEL_NAME = torch.jit.freeze(MODEL_NAME)
- order:
- - below:
- - pytorch_inc_static_quant_fx
- - pytorch_inc_static_quant_ipex
- - pytorch_inc_dynamic_quant
- - pytorch_ipex_fp32
- - pytorch_ipex_bf16
- - pytorch_ipex_int8_static_quant
- - pytorch_ipex_int8_dynamic_quant
- - pytorch_channels_last
- above:
diff --git a/neural_coder/backends/pytorch_jit_script_ofi.yaml b/neural_coder/backends/pytorch_jit_script_ofi.yaml
deleted file mode 100644
index 2e9c5868d86..00000000000
--- a/neural_coder/backends/pytorch_jit_script_ofi.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- content:
- - |-
- [+] if "jit" not in str(type(MODEL_NAME)):
- [+] import torch
- [+] with torch.no_grad():
- [+] MODEL_NAME.eval()
- [+] MODEL_NAME = torch.jit.optimize_for_inference(torch.jit.script(MODEL_NAME))
- order:
- - below:
- - pytorch_inc_static_quant_fx
- - pytorch_inc_static_quant_ipex
- - pytorch_inc_dynamic_quant
- - pytorch_ipex_fp32
- - pytorch_ipex_bf16
- - pytorch_ipex_int8_static_quant
- - pytorch_ipex_int8_dynamic_quant
- - pytorch_channels_last
- above:
diff --git a/neural_coder/backends/pytorch_jit_trace.yaml b/neural_coder/backends/pytorch_jit_trace.yaml
deleted file mode 100644
index e52fb7f62ab..00000000000
--- a/neural_coder/backends/pytorch_jit_trace.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- content:
- - |-
- [+] if "jit" not in str(type(MODEL_NAME)):
- [+] import torch
- [+] with torch.no_grad():
- [+] MODEL_NAME.eval()
- [+] MODEL_NAME = torch.jit.trace(MODEL_NAME, INPUT_NAME, strict=False, check_trace=False)
- [+] MODEL_NAME = torch.jit.freeze(MODEL_NAME)
- order:
- - below:
- - pytorch_inc_static_quant_fx
- - pytorch_inc_static_quant_ipex
- - pytorch_inc_dynamic_quant
- - pytorch_ipex_fp32
- - pytorch_ipex_bf16
- - pytorch_ipex_int8_static_quant
- - pytorch_ipex_int8_dynamic_quant
- - pytorch_channels_last
- above:
diff --git a/neural_coder/backends/pytorch_jit_trace_ofi.yaml b/neural_coder/backends/pytorch_jit_trace_ofi.yaml
deleted file mode 100644
index 3b52bdbd5b1..00000000000
--- a/neural_coder/backends/pytorch_jit_trace_ofi.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - ["insert_below_model_definition_line", "insert_below_input_definition_line"]
- content:
- - |-
- [+] if "jit" not in str(type(MODEL_NAME)):
- [+] import torch
- [+] with torch.no_grad():
- [+] MODEL_NAME.eval()
- [+] MODEL_NAME = torch.jit.optimize_for_inference(torch.jit.trace(MODEL_NAME, INPUT_NAME, strict=False, check_trace=False))
- order:
- - below:
- - pytorch_inc_static_quant_fx
- - pytorch_inc_static_quant_ipex
- - pytorch_inc_dynamic_quant
- - pytorch_ipex_fp32
- - pytorch_ipex_bf16
- - pytorch_ipex_int8_static_quant
- - pytorch_ipex_int8_dynamic_quant
- - pytorch_channels_last
- above:
diff --git a/neural_coder/backends/pytorch_mixed_precision_cpu.yaml b/neural_coder/backends/pytorch_mixed_precision_cpu.yaml
deleted file mode 100644
index ad4337f5e57..00000000000
--- a/neural_coder/backends/pytorch_mixed_precision_cpu.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_above_inference_line
- - indent_inference_line
- content:
- - |-
- [+] import torch
- [+] with torch.cpu.amp.autocast(enabled=True, dtype=torch.bfloat16):
- - 1
- order:
- - below:
- above:
- - below:
- above:
diff --git a/neural_coder/backends/pytorch_mixed_precision_cuda.yaml b/neural_coder/backends/pytorch_mixed_precision_cuda.yaml
deleted file mode 100644
index 60c597cedd8..00000000000
--- a/neural_coder/backends/pytorch_mixed_precision_cuda.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_above_inference_line
- - indent_inference_line
- content:
- - |-
- [+] import torch
- [+] with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16):
- - 1
- order:
- - below:
- above:
- - below:
- above:
diff --git a/neural_coder/backends/pytorch_mixed_precision_intel_gpu.yaml b/neural_coder/backends/pytorch_mixed_precision_intel_gpu.yaml
deleted file mode 100644
index 3b5c86ae977..00000000000
--- a/neural_coder/backends/pytorch_mixed_precision_intel_gpu.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_above_inference_line
- - indent_inference_line
- content:
- - |-
- [+] import torch
- [+] with torch.xpu.amp.autocast(dtype=torch.half):
- - 1
- order:
- - below:
- above:
- - below:
- above:
diff --git a/neural_coder/backends/pytorch_torchdynamo_jit_script.yaml b/neural_coder/backends/pytorch_torchdynamo_jit_script.yaml
deleted file mode 100644
index 48281956ca7..00000000000
--- a/neural_coder/backends/pytorch_torchdynamo_jit_script.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- - insert_above_inference_line
- - indent_inference_line
- content:
- - |-
- [+] from typing import List
- [+] import torch
- [+] import torchdynamo
- [+] torchdynamo.config.raise_on_backend_error = False
- [+] def dynamo_backend(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
- [+] return torch.jit.script(gm)
- - |-
- [+] import torchdynamo
- [+] with torchdynamo.optimize(dynamo_backend):
- - 1
- order:
- - below:
- above:
- - below:
- above:
- - below:
- above:
diff --git a/neural_coder/backends/pytorch_torchdynamo_jit_script_ofi.yaml b/neural_coder/backends/pytorch_torchdynamo_jit_script_ofi.yaml
deleted file mode 100644
index dcee3603d33..00000000000
--- a/neural_coder/backends/pytorch_torchdynamo_jit_script_ofi.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- - insert_above_inference_line
- - indent_inference_line
- content:
- - |-
- [+] from typing import List
- [+] import torch
- [+] import torchdynamo
- [+] torchdynamo.config.raise_on_backend_error = False
- [+] def dynamo_backend(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
- [+] return torch.jit.optimize_for_inference(torch.jit.script(gm))
- - |-
- [+] import torchdynamo
- [+] with torchdynamo.optimize(dynamo_backend):
- - 1
- order:
- - below:
- above:
- - below:
- above:
- - below:
- above:
diff --git a/neural_coder/backends/pytorch_torchdynamo_jit_trace.yaml b/neural_coder/backends/pytorch_torchdynamo_jit_trace.yaml
deleted file mode 100644
index d3673153986..00000000000
--- a/neural_coder/backends/pytorch_torchdynamo_jit_trace.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- - insert_above_inference_line
- - indent_inference_line
- content:
- - |-
- [+] from typing import List
- [+] import torch
- [+] import torchdynamo
- [+] torchdynamo.config.raise_on_backend_error = False
- [+] def dynamo_backend(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
- [+] return torch.jit.trace(gm, example_inputs)
- - |-
- [+] import torchdynamo
- [+] with torchdynamo.optimize(dynamo_backend):
- - 1
- order:
- - below:
- above:
- - below:
- above:
- - below:
- above:
diff --git a/neural_coder/backends/pytorch_torchdynamo_jit_trace_ofi.yaml b/neural_coder/backends/pytorch_torchdynamo_jit_trace_ofi.yaml
deleted file mode 100644
index 9f9d1eafce5..00000000000
--- a/neural_coder/backends/pytorch_torchdynamo_jit_trace_ofi.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- - insert_below_model_definition_line
- - insert_above_inference_line
- - indent_inference_line
- content:
- - |-
- [+] from typing import List
- [+] import torch
- [+] import torchdynamo
- [+] torchdynamo.config.raise_on_backend_error = False
- [+] def dynamo_backend(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
- [+] return torch.jit.optimize_for_inference(torch.jit.trace(gm, example_inputs))
- - |-
- [+] import torchdynamo
- [+] with torchdynamo.optimize(dynamo_backend):
- - 1
- order:
- - below:
- above:
- - below:
- above:
- - below:
- above:
diff --git a/neural_coder/backends/template.yaml b/neural_coder/backends/template.yaml
deleted file mode 100644
index 1e3b1fa1501..00000000000
--- a/neural_coder/backends/template.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transformation:
- location:
- -
- -
- -
- content:
- - |-
- [+] YOUR CODE LINE 1
- [+] YOUR CODE LINE 2
- [+] YOUR CODE LINE 3
- - |-
- [+] YOUR CODE LINE 1
- [+] YOUR CODE LINE 2
- [+] YOUR CODE LINE 3
- - |-
- [+] YOUR CODE LINE 1
- [+] YOUR CODE LINE 2
- [+] YOUR CODE LINE 3
- order:
- - below:
- -
- -
- above:
- -
- -
- - below:
- -
- -
- above:
- -
- -
- - below:
- -
- -
- above:
- -
- -
diff --git a/neural_coder/coders/__init__.py b/neural_coder/coders/__init__.py
deleted file mode 100644
index e833188cc78..00000000000
--- a/neural_coder/coders/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/neural_coder/coders/autoinc/__init__.py b/neural_coder/coders/autoinc/__init__.py
deleted file mode 100644
index e833188cc78..00000000000
--- a/neural_coder/coders/autoinc/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/neural_coder/coders/autoinc/autoinc_harness.py b/neural_coder/coders/autoinc/autoinc_harness.py
deleted file mode 100644
index e21b9bfdf8e..00000000000
--- a/neural_coder/coders/autoinc/autoinc_harness.py
+++ /dev/null
@@ -1,476 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import re
-import sys
-
-import yaml
-
-from ... import globals
-from ...utils.line_operation import (
- get_line_indent_level,
- get_line_left_hand_side,
- get_line_wo_comment,
- is_eval_func_model_name,
- single_line_comment_or_empty_line_detection,
-)
-
-logging.basicConfig(
- level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000"
-)
-logger = logging.getLogger(__name__)
-
-
-class AutoInc_Harness(object):
- def __init__(self, backend):
- self.backend = backend
-
- def print_info(self):
- for i in globals.list_model_def_instance:
- logger.debug(f"i.print_info(): {i.print_info()}")
-
- # collect file transformation info and register in globals
- # (i.e. which file to add which lines at which location)
- def register_transformation(self):
- backend_file = open(os.path.dirname(__file__) + "/../../backends/" + self.backend + ".yaml")
- backend_dict = yaml.load(backend_file, Loader=yaml.BaseLoader)
- logger.debug(f"backend_dict: {backend_dict}")
- bk_trans_location = backend_dict["transformation"]["location"] # string
- bk_trans_content = backend_dict["transformation"]["content"] # string
- bk_trans_order = backend_dict["transformation"]["order"] # list
-
- # modular design
- if globals.use_modular:
- content = globals.modular_item
- bk_trans_content = ["[+] " + content.replace("\n", "\n[+] ")[:-5]]
-
- list_code = []
- history = set()
- for i in globals.list_code_path:
- list_code.append(open(i, "r").read())
- for loc in bk_trans_location:
- # PART 1 - "model_definition_line"
- if "insert_below_model_definition_line" in loc:
- for ins in globals.list_model_def_instance:
- model_name = ins.model_name
- if model_name in history and globals.code_domain == "torchvision":
- continue
- else:
- history.add(model_name)
- file_path = ins.file_path
- model_def_line_idx = ins.model_def_line_idx
-
- file_path_idx = globals.list_code_path.index(file_path)
- lines = list_code[file_path_idx].split("\n")
- line_idx = 0
-
- # to check if this model has an inference line is in the file
- # if not, skip this model
- to_transform = False
- for i in range(len(lines)):
- line = lines[i]
- if model_name + "(" in line or (
- model_name + "." in line and line.find(model_name) < line.find(".") and "(" in line
- ):
- to_transform = True
- if not to_transform and globals.code_domain == "onnx":
- pass
- elif not to_transform:
- continue
- ### information
-
- # search DataLoader definition in this file
- dataloader_name = ""
- for i in range(len(lines)):
- line = lines[i]
- if not single_line_comment_or_empty_line_detection(line):
- if ("DataLoader(" in line and "=" in line and line.find("=") < line.find("DataLoader")) or (
- "dataloader" in line and "=" in line and line.find("=") > line.find("dataloader")
- ):
- dataloader_def_line_indent_level = get_line_indent_level(line)
- dataloader_name = get_line_left_hand_side(line)
- dataloader_def_line_idx = i
-
- # search inference line in this file, and also input_name
- inference_line = ""
- input_name = ""
- for i in range(len(lines)):
- line = lines[i]
- is_eval_func, eval_func_type = is_eval_func_model_name(model_name, line)
- if not single_line_comment_or_empty_line_detection(line):
- if is_eval_func and "[coder-enabled]" not in line:
- inference_line = line
- input_name = line[line.find("(") + 1 : line.find(")")].replace("*", "")
- # get "c" in "a = b(**c)"
-
- # search input definition in this file (if any)
- if input_name != "":
- for i in range(len(lines)):
- line = lines[i]
- if not single_line_comment_or_empty_line_detection(line):
- if input_name in line and "=" in line and line.find("=") > line.find(input_name):
- input_def_line_indent_level = get_line_indent_level(line)
- input_def_line_idx = i
-
- # search model definition line and its end line index
- # (only has 1 model definition line, because it's in loop of globals.list_model_def_instance)
- for i in range(len(lines)):
- line = lines[i]
- if line_idx == model_def_line_idx and "[coder-enabled]" not in line:
- model_def_line_indent_level = get_line_indent_level(line)
- if ")" in line and line.count(")") == line.count("("): # e.g. model = Net(xxx)
- model_definition_end_line_idx = line_idx + 1
- else: # e.g. model = Net(xxx, \n xxx, \n xxx)
- do_search = True
- i_search = 1
- while do_search:
- following_line = lines[line_idx + i_search]
- if ")" in following_line and following_line.count(")") > following_line.count("("):
- do_search = False
- i_search += 1
- model_definition_end_line_idx = line_idx + i_search
- line_idx += 1
-
- ### check
-
- bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)]
- if file_path_idx == 0 and (globals.code_domain in ["transformers_trainer", "torchvision", "onnx"]):
- pass
- elif (
- ("INPUT_NAME" in bk_trans_content_this and input_name == "")
- or ("DATALOADER_NAME" in bk_trans_content_this and dataloader_name == "")
- or ("INFERENCE_LINE" in bk_trans_content_this and inference_line == "")
- ):
- logger.info(
- f"Skipped due to not having enough information required by "
- "the transformation content specified in the config file "
- "(e.g. INPUT_NAME, DATALOADER_NAME, INFERENCE_LINE). "
- f"File path: {file_path}"
- )
- continue
-
- ### location
-
- # search for features to put below them
- """
- Example (psuedo-code):
- model = Net()
- # jit script begin mark
- model = torch.jit.script(model)
- # jit script end mark (feature name + model name to handle multi-model situation)
- model = ipex.optimize(model, "fp32") # "ipex fp32" must be put below "jit script"
- """
- put_below_idx = 0
- for i in range(len(lines)):
- for item in bk_trans_order[0]["below"]:
- line = lines[i]
- if item in line and model_name in line:
- put_below_idx = max(put_below_idx, i + 1)
-
- # search for features to put above them
- put_above_idx = sys.maxsize
- for i in range(len(lines)):
- for item in bk_trans_order[0]["above"]:
- line = lines[i]
- if item in line and model_name in line:
- put_above_idx = min(put_above_idx, i)
-
- # location assignment (below model def / dataloader def / input def)
- torchvision_indent = -1
- if file_path_idx == 0 and globals.code_domain == "transformers_trainer":
- for i in range(len(lines)):
- line = lines[i]
- if re.findall("trainer = .*Trainer", line):
- if "(" in line and line.count(")") == line.count("("):
- trans_insert_location = i + 1
- else:
- do_search = True
- i_search = 1
- while do_search:
- following_line = lines[i + i_search]
- if ")" in following_line and following_line.count(")") > following_line.count(
- "("
- ):
- do_search = False
- i_search += 1
- trans_insert_location = i + i_search
- trans_insert_location = min(max(trans_insert_location, put_below_idx), put_above_idx)
- elif file_path_idx == 0 and globals.code_domain == "torchvision":
- trans_insert_location = 1
- for i in range(len(lines)):
- line = lines[i]
- if (
- "val_loader" in line
- and "aux_val_loader" not in line
- and (
- "torch.utils.data.DataLoader" in line
- or "utils.data.DataLoader" in line
- or "DataLoader" in line
- )
- ):
- torchvision_indent = get_line_indent_level(line)
- if "(" in line and line.count(")") == line.count("("):
- trans_insert_location = i + 1
- else:
- do_search = True
- i_search = 1
- while do_search:
- following_line = lines[i + i_search]
- if ")" in following_line and following_line.count(")") > following_line.count(
- "("
- ):
- do_search = False
- i_search += 1
- trans_insert_location = i + i_search
- trans_insert_location = min(max(trans_insert_location, put_below_idx), put_above_idx)
- else:
- if "insert_below_model_definition_line" in loc:
- trans_insert_location = min(
- max(model_definition_end_line_idx, put_below_idx), put_above_idx
- )
- if "insert_below_dataloader_definition_line" in loc:
- try:
- dataloader_def_line_idx
- except:
- logger.warning(
- f"Skipped due to not having dataloader definition required by "
- "the transformation content specified in the config file. "
- f"File path: {file_path}"
- )
- continue
- trans_insert_location = max(
- trans_insert_location,
- min(max(dataloader_def_line_idx + 1, put_below_idx), put_above_idx),
- )
- if "insert_below_input_definition_line" in loc:
- try:
- input_def_line_idx
- except:
- logger.warning(
- f"Skipped due to not having input definition required by "
- "the transformation content specified in the config file. "
- f"File path: {file_path}"
- )
- continue
- trans_insert_location = max(
- trans_insert_location, min(max(input_def_line_idx + 1, put_below_idx), put_above_idx)
- )
-
- insert_indent_level = (
- get_line_indent_level(lines[trans_insert_location - 1])
- if torchvision_indent == -1
- else torchvision_indent
- )
- ### content
- # lines to insert
- lines_to_insert = bk_trans_content_this
- if globals.code_domain == "transformers_trainer":
- lines_to_insert = lines_to_insert.replace(
- "EVAL_FUNC_LINES", globals.list_eval_func_lines[0]
- ).replace("DATALOADER_NAME", globals.list_calib_dataloader_name[0])
- elif globals.code_domain == "transformers_no_trainer":
- pass
- elif globals.code_domain == "torchvision":
- lines_to_insert = lines_to_insert.replace(
- "EVAL_FUNC_LINES", globals.list_eval_func_lines[0]
- ).replace("DATALOADER_NAME", globals.list_calib_dataloader_name[0])
- elif globals.code_domain == "onnx":
- lines_to_insert = lines_to_insert.replace(
- "EVAL_FUNCTION_NAME", globals.list_eval_func_name[0]
- ).replace("DATALOADER_NAME", globals.list_calib_dataloader_name[0])
- else:
- lines_to_insert = lines_to_insert.replace("DATALOADER_NAME", dataloader_name).replace(
- "def eval_func", "# def eval_func"
- )
-
- optimum_quant_config_line = (
- 'IncQuantizationConfig.from_pretrained("' + globals.optimum_quant_config + '")'
- )
-
- # replace [+] indication with empty
- lines_to_insert = lines_to_insert.replace("[+] ", " " * insert_indent_level)
- # add begin indicator
- lines_to_insert = (
- " " * insert_indent_level
- + "# [NeuralCoder] "
- + self.backend
- + " for "
- + model_name
- + " [Beginning Line]\n"
- + lines_to_insert
- )
- # replace INDICATIONS with real stuff
- lines_to_insert = (
- lines_to_insert.replace("MODEL_NAME", model_name)
- .replace("INPUT_NAME", input_name)
- .replace("EVAL_FUNC_LINES", "# return 1")
- .replace("OPTIMUM_QUANT_CONFIG", optimum_quant_config_line)
- .replace("\n", " # [coder-enabled]\n")
- )
- # add end indicator
- lines_to_insert += (
- " # [coder-enabled]\n"
- + " " * insert_indent_level
- + "# [NeuralCoder] "
- + self.backend
- + " for "
- + model_name
- + " [Ending Line] # [coder-enabled]"
- )
-
- ### register
-
- if file_path not in globals.list_trans_insert_modified_file:
- globals.list_trans_insert_modified_file.append(file_path)
- globals.list_trans_insert_location_idxs.append([trans_insert_location])
- globals.list_trans_insert_number_insert_lines.append([lines_to_insert.count("\n") + 1])
- globals.list_trans_insert_lines_to_insert.append([lines_to_insert])
- else:
- idx = globals.list_trans_insert_modified_file.index(file_path)
- globals.list_trans_insert_location_idxs[idx].append(trans_insert_location)
- globals.list_trans_insert_number_insert_lines[idx].append(lines_to_insert.count("\n") + 1)
- globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert)
-
- # PART 2 - "inference line"
- if (
- "indent_inference_line" in loc
- or "insert_above_inference_line" in loc
- or "insert_below_inference_line" in loc
- ):
- for file_path in globals.list_code_path:
- code = open(file_path, "r").read()
- lines = code.split("\n")
- line_idx = 0
- for i in range(len(lines)):
- line = lines[i]
- for model_name in globals.list_model_name:
- is_eval_func, eval_func_type = is_eval_func_model_name(model_name, line)
- if is_eval_func and "[coder-enabled]" not in line:
- if eval_func_type == "non-forward":
- pass # do something
-
- inference_line_indent_level = get_line_indent_level(line)
-
- if "indent_inference_line" in loc:
- bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)]
- add_indent_level = int(bk_trans_content_this)
-
- trans_indent_location = []
- # indent can have multiple location, so is a list of numbers
- trans_indent_level = []
-
- if ")" in line: # e.g. model = Net(xxx)
- trans_indent_location.append(line_idx)
- trans_indent_level.append(add_indent_level)
- else: # e.g. model = Net(xxx, \n xxx, \n xxx)
- trans_indent_location.append(line_idx)
- trans_indent_level.append(add_indent_level)
- do_search = True
- i_search = 1
- while do_search:
- trans_indent_location.append(line_idx + i_search)
- trans_indent_level.append(add_indent_level)
- following_line = lines[line_idx + i_search]
- if ")" in following_line:
- do_search = False
- i_search += 1
-
- ### register
-
- if file_path not in globals.list_trans_indent_modified_file:
- globals.list_trans_indent_modified_file.append(file_path)
- globals.list_trans_indent_location_idxs.append(trans_indent_location)
- globals.list_trans_indent_level.append(trans_indent_level)
- else:
- idx = globals.list_trans_indent_modified_file.index(file_path)
- for i in trans_indent_location:
- globals.list_trans_indent_location_idxs[idx].append(i)
- for i in trans_indent_level:
- globals.list_trans_indent_level[idx].append(i)
-
- if "insert_above_inference_line" in loc:
- idx_offset = 0
- elif "insert_below_inference_line" in loc:
- idx_offset = 1
-
- if "insert_above_inference_line" in loc or "insert_below_inference_line" in loc:
- bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)]
-
- trans_insert_location = line_idx + idx_offset
-
- insert_indent_level = inference_line_indent_level
-
- ### content
-
- # lines to insert
- lines_to_insert = bk_trans_content_this
- # replace [+] indication with empty
- lines_to_insert = lines_to_insert.replace("[+] ", " " * insert_indent_level)
- # add begin indicator
- lines_to_insert = (
- " " * insert_indent_level
- + "# [NeuralCoder] "
- + self.backend
- + " [Beginning Line] \n"
- + lines_to_insert
- )
- # replace INDICATIONS with real stuff
- # (for now, inference_line related transformations )
- # (have nothing to do with input, dataloader etc, )
- # (so no need to put replaces here.)
- lines_to_insert = lines_to_insert.replace("\n", " # [coder-enabled]\n")
- # add end indicator
- lines_to_insert += (
- " # [coder-enabled]\n"
- + " " * insert_indent_level
- + "# [NeuralCoder] "
- + self.backend
- + " [Ending Line] # [coder-enabled]"
- )
-
- # customized argument
- if self.backend == "pytorch_benchmark":
- lines_to_insert = lines_to_insert.replace(
- "NUM_BENCHMARK_ITERATION", globals.num_benchmark_iteration
- )
- lines_to_insert = lines_to_insert.replace("ACCURACY_MODE", str(False))
- lines_to_insert = lines_to_insert.replace("EVAL_FUNC_LINES", line.strip())
-
- ### register
-
- if file_path not in globals.list_trans_insert_modified_file:
- globals.list_trans_insert_modified_file.append(file_path)
- globals.list_trans_insert_location_idxs.append([trans_insert_location])
- globals.list_trans_insert_number_insert_lines.append(
- [lines_to_insert.count("\n") + 1]
- )
- globals.list_trans_insert_lines_to_insert.append([lines_to_insert])
- else:
- idx = globals.list_trans_insert_modified_file.index(file_path)
- globals.list_trans_insert_location_idxs[idx].append(trans_insert_location)
- globals.list_trans_insert_number_insert_lines[idx].append(
- lines_to_insert.count("\n") + 1
- )
- globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert)
-
- line_idx += 1
-
- # PART 3 - for customized location
-
- logger.debug(f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}")
- logger.debug(f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}")
- logger.debug(f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}")
- logger.debug(f"globals.list_trans_insert_lines_to_insert: {globals.list_trans_insert_lines_to_insert}")
diff --git a/neural_coder/coders/autoinc/calib_dataloader.py b/neural_coder/coders/autoinc/calib_dataloader.py
deleted file mode 100644
index 8d287837676..00000000000
--- a/neural_coder/coders/autoinc/calib_dataloader.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-
-from ... import globals
-
-
-class Calib_Dataloader(object):
- def __init__(self):
- pass
-
- def register_transformation(self):
- if globals.code_domain == "transformers_trainer":
- globals.list_calib_dataloader_name.append("trainer.get_eval_dataloader()")
- elif globals.code_domain == "transformers_no_trainer":
- pass
- elif globals.code_domain == "torchvision":
- globals.list_calib_dataloader_name.append("val_loader")
- elif globals.code_domain == "onnx":
- codes = open(globals.list_code_path[0], "r").read().split("\n")
- for line in codes:
- line = line.strip()
- if "loader" in line and "=" in line:
- end = 0
- for i in range(len(line)):
- if line[i] == "=":
- end = i
- if line[end - 1] == " ":
- globals.list_calib_dataloader_name.append(line[: end - 1])
- else:
- globals.list_calib_dataloader_name.append(line[:end])
- else: # random model
- pass
diff --git a/neural_coder/coders/autoinc/domain.py b/neural_coder/coders/autoinc/domain.py
deleted file mode 100644
index ed4437d7455..00000000000
--- a/neural_coder/coders/autoinc/domain.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-
-def determine_domain(path) -> str:
- codes = open(path, "r").read()
- if ("import torchvision.models" in codes or "from torchvision.models" in codes) and "val_loader" in codes:
- return "torchvision"
- elif re.search(r"from (.*)transformers import", codes) and re.search(r"(.*)Model(.*)", codes):
- if "Trainer" in codes or "trainer" in codes:
- return "transformers_trainer"
- else:
- return "transformers_no_trainer"
- elif "onnx.load(" in codes:
- return "onnx"
- elif "keras.Sequential" in codes:
- return "keras_script"
- elif "from tensorflow import" in codes or "import tensorflow" in codes:
- return "tensorflow_keras_model"
- else:
- return "random model"
diff --git a/neural_coder/coders/autoinc/eval_func.py b/neural_coder/coders/autoinc/eval_func.py
deleted file mode 100644
index 9ea65d36315..00000000000
--- a/neural_coder/coders/autoinc/eval_func.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ... import globals
-from ...utils.line_operation import get_line_indent_level
-
-
-class Eval_Func(object):
- def __init__(self):
- pass
-
- def register_transformation(self):
- if globals.code_domain == "transformers_trainer":
- lines = [
- "trainer.model = model",
- "metrics = trainer.evaluate() # check if all tasks do not have parameters in evaluate()",
- "keys = [",
- ' "eval_accuracy",',
- ' "eval_bleu",',
- ' "eval_matthews_correlation",',
- ' "eval_pearsonr",',
- ' "eval_precision",',
- ' "eval_recall",',
- ' "eval_rouge",',
- ' "eval_sacrebleu",',
- ' "eval_spearmanr",',
- ' "eval_mcc",',
- ' "eval_acc",',
- ' "eval_acc_and_f1",',
- ' "eval_corr",',
- ' "eval_mnli/acc",',
- ' "eval_mnli-mm/acc",',
- ' "eval_exact_match",',
- ' "eval_f1",',
- "] # METRIC_TAGS in transformers",
- "for key in keys:",
- " if key in metrics.keys():",
- " return metrics[key]",
- 'assert False, "No metric returned, Please check inference metric!"',
- ]
- for index, line in enumerate(lines):
- if index != 0:
- lines[index] = "[+] " + " " * 4 + line
- lines = "\n".join(lines)
- globals.list_eval_func_lines.append(lines)
- elif globals.code_domain == "transformers_no_trainer":
- pass
- elif globals.code_domain == "torchvision":
- # search for 'validate()'
- codes = open(globals.list_code_path[0], "r").read().split("\n")
- lines = []
- for index, line in enumerate(codes):
- if "def validate(" in line:
- start = index
- start_indent = get_line_indent_level(codes[start])
- for i in range(start + 1, len(codes)):
- if codes[i] == "":
- continue
- line_indent = get_line_indent_level(codes[i])
- if line_indent > start_indent:
- change_indent = line_indent - 4
- lines.append(" " * change_indent + codes[i].lstrip())
- # no 'print'
- else:
- break
- break
- else:
- pass
- for index, line in enumerate(lines):
- if "return" in line:
- indent = get_line_indent_level(line)
- line_list = line.split()
- line_list[1] = "float(" + line_list[1] + ")"
- lines[index] = " " * indent + " ".join(line_list)
- for index, line in enumerate(lines):
- if index != 0:
- lines[index] = "[+] " + " " * 8 + line
- lines = "\n".join(lines)
- globals.list_eval_func_lines.append(lines)
- elif globals.code_domain == "onnx":
- # look for sess = onnxruntime.InferenceSession(MODEL_NAME.SerializeToString(), None)
- codes = open(globals.list_code_path[0], "r").read().split("\n")
- start = 0
- for idx, line in enumerate(codes):
- if "onnxruntime.InferenceSession(" in line:
- start = idx
- break
- line_indent = get_line_indent_level(codes[start])
- target = None
- for i in range(start, -1, -1):
- if "def" in codes[i] and (line_indent - get_line_indent_level(codes[i])) == 4:
- target = codes[i].split(" ")[1]
- break
- func_name = None
- for i in range(len(target)):
- if target[i] == "(":
- globals.list_eval_func_name.append(target[:i])
- break
- else: # random model
- pass
diff --git a/neural_coder/coders/pytorch/__init__.py b/neural_coder/coders/pytorch/__init__.py
deleted file mode 100644
index e833188cc78..00000000000
--- a/neural_coder/coders/pytorch/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/neural_coder/coders/pytorch/batch_size.py b/neural_coder/coders/pytorch/batch_size.py
deleted file mode 100644
index ec61e19d70f..00000000000
--- a/neural_coder/coders/pytorch/batch_size.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ... import globals
-
-
-class BatchSizeCoder(object):
- def __init__(self, file) -> None:
- self.file = file
- self.result = []
-
- def transform(self):
- lines = self.file.split("\n")
- for line in lines:
- if self.not_modify(line):
- new_line = self.modify(line)
- self.result.append(new_line)
- else:
- self.result.append(line)
- for index, line in enumerate(self.result):
- if index != len(self.result) - 1:
- self.result[index] += "\n"
- return "".join(self.result)
-
- def not_modify(self, s):
- if "batch_size" in s and "=" in s:
- return True
- return False
-
- def modify(self, s):
- idx = s.find("batch_size")
- s_right = s[idx:]
- if " = " in s_right:
- index = s.find(" = ")
- s_left = s[:index]
- if "batch_size" in s_left:
- if "," in s_left:
- index1 = s_left.find(",")
- index2 = s_left.find("batch_size")
- if index1 > index2:
- slice1 = s_left[:index1]
- else:
- s_left1 = s_left[:index2]
- s_right = s_left[index2:]
- index3 = s_left1.rfind(",")
- if "," in s_right:
- index4 = s_right.find(",") + len(s_left1)
- slice1 = s_left[index3 + 2 : index4]
- else:
- slice1 = s_left[index3 + 2 : index]
- s1 = slice1 + " = " + globals.target_batch_size
- s = s[:] + "\n" + s1
- else:
- s_right = s[index + 3 :]
- s_right = s_right.replace(s_right, globals.target_batch_size)
- s = s_left + " = " + s_right
- elif "batch_size=" in s:
- idx = s.find("batch_size=")
- s_right = s[idx:]
- idx2 = s_right.find("batch_size")
- if "," in s_right:
- index2 = s_right.find(",")
- old = s_right[idx2:index2]
- s = s.replace(old, "batch_size=" + globals.target_batch_size)
- elif ")" in s_right:
- index2 = s_right.find(")")
- old = s_right[idx2:index2]
- s = s.replace(old, "batch_size=" + globals.target_batch_size)
- else:
- old = s_right[idx2:]
- s = s.replace(old, "batch_size=" + globals.target_batch_size)
- return s
diff --git a/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py b/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py
deleted file mode 100644
index 7c0d71ce534..00000000000
--- a/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from ...utils.line_operation import get_line_indent_level
-
-
-class TrainerToNLPTrainer(object):
- def __init__(self, file) -> None:
- self.file = file
- self.result = []
-
- def transform(self):
- lines = self.file.split("\n")
-
- for line in lines:
- if self.is_modify(line):
- new_line = self.modify(line)
- self.result.append(new_line)
- else:
- self.result.append(line)
- for index, line in enumerate(self.result):
- if index != len(self.result) - 1:
- self.result[index] += "\n"
- return "".join(self.result)
-
- def is_modify(self, s):
- if "trainer = Trainer(" in s:
- return True
- else:
- return False
-
- def modify(self, s):
- old = "Trainer"
- s = s.replace(old, "NLPTrainer")
- return s
diff --git a/neural_coder/coders/pytorch/cuda_to_cpu.py b/neural_coder/coders/pytorch/cuda_to_cpu.py
deleted file mode 100644
index 2d6c9c8a13e..00000000000
--- a/neural_coder/coders/pytorch/cuda_to_cpu.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from ...utils.line_operation import get_line_indent_level
-
-
-class CudaToCpu(object):
- def __init__(self, file) -> None:
- self.file = file
- self.result = []
-
- def transform(self):
- lines = self.file.split("\n")
- # determine if jump the whole file (in cases where: args.device, args.cuda etc)
- to_jump = False
- for line in lines:
- if self.is_jump_file(line):
- to_jump = True
- break
-
- if to_jump: # this file do not need transformation
- for line in lines:
- self.result.append(line)
- else: # this file might need transformation
- for line in lines:
- if self.is_delete(line):
- indent_level = get_line_indent_level(line)
- new_line = " " * indent_level + "pass"
- self.result.append(new_line)
- elif self.is_modify(line):
- new_line = self.change_to_cpu(line)
- self.result.append(new_line)
- else:
- self.result.append(line)
- for index, line in enumerate(self.result):
- if index != len(self.result) - 1:
- self.result[index] += "\n"
- return "".join(self.result)
-
- def is_jump_file(self, s):
- if (
- "args.device" in s
- or "args.cpu" in s
- or "args.gpu" in s
- or "args.cuda" in s
- or "torch.cuda.is_available()" in s
- ):
- return True
- else:
- return False
-
- def is_delete(self, s):
- if "cuda." in s and "=" not in s and "if" not in s:
- return True
- else:
- return False
-
- def is_modify(self, s):
- if "'cuda'" in s or '"cuda"' in s or "'cuda:0'" in s or '"cuda:0"' in s or "cuda()" in s:
- return True
- else:
- return False
-
- def change_to_cpu(self, s):
- if "'cuda'" in s or "'cuda:0'" in s:
- old = "'cuda'" if "'cuda'" in s else "'cuda:0'"
- s = s.replace(old, "'cpu'")
- elif '"cuda"' in s or '"cuda:0"' in s:
- old = '"cuda"' if '"cuda"' in s else '"cuda:0"'
- s = s.replace(old, '"cpu"')
- elif "cuda()" in s:
- old = "cuda"
- s = s.replace(old, "cpu")
- return s
diff --git a/neural_coder/coders/pytorch/dummy_dataloader.py b/neural_coder/coders/pytorch/dummy_dataloader.py
deleted file mode 100644
index 64e745cdacf..00000000000
--- a/neural_coder/coders/pytorch/dummy_dataloader.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-
-from ... import globals
-from ...utils.line_operation import get_line_indent_level, get_line_left_hand_side, is_eval_func_model_name
-
-logging.basicConfig(
- level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000"
-)
-logger = logging.getLogger(__name__)
-
-
-class DummyDataLoader(object):
- def __init__(self, list_model_def_instance):
- self.list_model_def_instance = list_model_def_instance
-
- def print_info(self):
- for i in self.list_model_def_instance:
- logger.debug(f"i.print_info(): {i.print_info()}")
-
- # collect file transformation info and register (store) in globals
- # (i.e. which file to add which lines at which location)
- def register_transformation(self):
- list_code = []
- for i in globals.list_code_path:
- list_code.append(open(i, "r").read())
-
- for ins in self.list_model_def_instance:
- model_name = ins.model_name
- file_path = ins.file_path
- model_def_line_idx = ins.model_def_line_idx
- function_def_line_idx = ins.function_def_line_idx
- class_name = ins.class_name
-
- # transformation
- file_path_idx = globals.list_code_path.index(file_path)
- lines = list_code[file_path_idx].split("\n")
- line_idx = 0
-
- # search DataLoader
- dataloader_name = ""
- for i in range(len(lines)): # each item is a str of this code line
- line = lines[i]
- if "DataLoader(" in line and "=" in line and line.find("=") < line.find("DataLoader"):
- dataloader_name = get_line_left_hand_side(line)
- dataloader_def_line_idx = i
-
- if dataloader_name != "":
- return
- else:
- input_dimension_str = "3, 224, 224)"
- for i in range(len(lines)):
- line = lines[i]
- if ("input" in line and "=" in line and line.find("=") > line.find("input")) or (
- "image" in line and "=" in line and line.find("=") > line.find("image")
- ):
- input_dimension_str = line[line.find(",") + 2 :]
-
- for i in range(len(lines)):
- line = lines[i]
- if line_idx == model_def_line_idx:
- indent_level = get_line_indent_level(line)
- lines_to_insert = ""
- lines_to_insert += " " * indent_level + "import torch" + "\n"
- lines_to_insert += " " * indent_level + "from torch.utils.data import Dataset" + "\n"
- lines_to_insert += " " * indent_level + "class DummyDataset(Dataset):" + "\n"
- lines_to_insert += (
- " " * indent_level + " def __init__(self, *shapes, num_samples: int = 10000):" + "\n"
- )
- lines_to_insert += " " * indent_level + " super().__init__()" + "\n"
- lines_to_insert += " " * indent_level + " self.shapes = shapes" + "\n"
- lines_to_insert += " " * indent_level + " self.num_samples = num_samples" + "\n"
- lines_to_insert += " " * indent_level + " def __len__(self):" + "\n"
- lines_to_insert += " " * indent_level + " return self.num_samples" + "\n"
- lines_to_insert += " " * indent_level + " def __getitem__(self, idx: int):" + "\n"
- lines_to_insert += " " * indent_level + " sample = []" + "\n"
- lines_to_insert += " " * indent_level + " for shape in self.shapes:" + "\n"
- lines_to_insert += " " * indent_level + " spl = torch.rand(*shape)" + "\n"
- lines_to_insert += " " * indent_level + " sample.append(spl)" + "\n"
- lines_to_insert += " " * indent_level + " return sample" + "\n"
- lines_to_insert += " " * indent_level + "from torch.utils.data import DataLoader" + "\n"
- lines_to_insert += (
- " " * indent_level
- + "dummy_dataset = DummyDataset(("
- + input_dimension_str
- + ", (1, ))"
- + "\n"
- )
- lines_to_insert += (
- " " * indent_level + "dummy_dataloader = DataLoader(dummy_dataset, batch_size=1)"
- )
-
- trans_insert_location = 0
-
- if file_path not in globals.list_trans_insert_modified_file:
- globals.list_trans_insert_modified_file.append(file_path)
- globals.list_trans_insert_location_idxs.append([trans_insert_location])
- globals.list_trans_insert_number_insert_lines.append([lines_to_insert.count("\n") + 1])
- globals.list_trans_insert_lines_to_insert.append([lines_to_insert])
- else:
- idx = globals.list_trans_insert_modified_file.index(file_path)
- globals.list_trans_insert_location_idxs[idx].append(trans_insert_location)
- globals.list_trans_insert_number_insert_lines[idx].append(lines_to_insert.count("\n") + 1)
- globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert)
-
- line_idx += 1
-
- logger.debug(f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}")
- logger.debug(f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}")
- logger.debug(f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}")
- logger.debug(f"globals.list_trans_insert_lines_to_insert: {globals.list_trans_insert_lines_to_insert}")
diff --git a/neural_coder/coders/pytorch/harness.py b/neural_coder/coders/pytorch/harness.py
deleted file mode 100644
index 3921383791e..00000000000
--- a/neural_coder/coders/pytorch/harness.py
+++ /dev/null
@@ -1,421 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import sys
-
-import yaml
-
-from ... import globals
-from ...utils.line_operation import (
- get_line_indent_level,
- get_line_left_hand_side,
- get_line_wo_comment,
- is_eval_func_model_name,
- single_line_comment_or_empty_line_detection,
-)
-
-logging.basicConfig(
- level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000"
-)
-logger = logging.getLogger(__name__)
-
-
-class Harness(object):
- def __init__(self, backend):
- self.backend = backend
-
- def print_info(self):
- for i in globals.list_model_def_instance:
- logger.debug(f"i.print_info(): {i.print_info()}")
-
- # collect file transformation info and register in globals
- # (i.e. which file to add which lines at which location)
- def register_transformation(self):
- backend_file = open(os.path.dirname(__file__) + "/../../backends/" + self.backend + ".yaml")
- backend_dict = yaml.load(backend_file, Loader=yaml.BaseLoader)
- logger.debug(f"backend_dict: {backend_dict}")
-
- bk_trans_location = backend_dict["transformation"]["location"] # string
- bk_trans_content = backend_dict["transformation"]["content"] # string
- bk_trans_order = backend_dict["transformation"]["order"] # list
-
- list_code = []
- for i in globals.list_code_path:
- list_code.append(open(i, "r").read())
-
- for loc in bk_trans_location:
- # PART 1 - "model_definition_line"
- if "insert_below_model_definition_line" in loc:
- for ins in globals.list_model_def_instance:
- model_name = ins.model_name
- file_path = ins.file_path
- model_def_line_idx = ins.model_def_line_idx
-
- file_path_idx = globals.list_code_path.index(file_path)
- lines = list_code[file_path_idx].split("\n")
- line_idx = 0
-
- # to check if this model has an inference line is in the file
- # if not, skip this model
- to_transform = False
- for i in range(len(lines)):
- line = lines[i]
- if model_name + "(" in line or (
- model_name + "." in line and line.find(model_name) < line.find(".") and "(" in line
- ):
- to_transform = True
- if not to_transform:
- continue
-
- ### information
-
- # search DataLoader definition in this file
- dataloader_name = ""
- for i in range(len(lines)):
- line = lines[i]
- if not single_line_comment_or_empty_line_detection(line):
- if ("DataLoader(" in line and "=" in line and line.find("=") < line.find("DataLoader")) or (
- "dataloader" in line and "=" in line and line.find("=") > line.find("dataloader")
- ):
- dataloader_def_line_indent_level = get_line_indent_level(line)
- dataloader_name = get_line_left_hand_side(line)
- dataloader_def_line_idx = i
-
- # search inference line in this file, and also input_name
- inference_line = ""
- input_name = ""
- for i in range(len(lines)):
- line = lines[i]
- is_eval_func, eval_func_type = is_eval_func_model_name(model_name, line)
- if not single_line_comment_or_empty_line_detection(line):
- if is_eval_func and "[coder-enabled]" not in line:
- inference_line = line
- input_name = line[line.find("(") + 1 : line.find(")")].replace("*", "")
- # get "c" in "a = b(**c)"
-
- # search input definition in this file (if any)
- if input_name != "":
- for i in range(len(lines)):
- line = lines[i]
- if not single_line_comment_or_empty_line_detection(line):
- if input_name in line and "=" in line and line.find("=") > line.find(input_name):
- input_def_line_indent_level = get_line_indent_level(line)
- input_def_line_idx = i
-
- # search trainer definition in this file (for transformers trainer only)
- trainer_def_line_idx = -1
- for i in range(len(lines)):
- line = lines[i]
- if not single_line_comment_or_empty_line_detection(line):
- if "trainer = Trainer(" in line:
- trainer_def_line_indent_level = get_line_indent_level(line)
- trainer_def_line_idx = i
-
- # search model definition line and its end line index
- # (only has 1 model definition line, because it's in loop of globals.list_model_def_instance)
- for i in range(len(lines)):
- line = lines[i]
- if line_idx == model_def_line_idx and "[coder-enabled]" not in line:
- model_def_line_indent_level = get_line_indent_level(line)
- if ")" in line and line.count(")") == line.count("("): # e.g. model = Net(xxx)
- model_definition_end_line_idx = line_idx + 1
- else: # e.g. model = Net(xxx, \n xxx, \n xxx)
- do_search = True
- i_search = 1
- while do_search:
- following_line = lines[line_idx + i_search]
- if ")" in following_line and following_line.count(")") > following_line.count("("):
- do_search = False
- i_search += 1
- model_definition_end_line_idx = line_idx + i_search
- line_idx += 1
-
- ### check
-
- bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)]
-
- if (
- ("INPUT_NAME" in bk_trans_content_this and input_name == "")
- or ("DATALOADER_NAME" in bk_trans_content_this and dataloader_name == "")
- or ("INFERENCE_LINE" in bk_trans_content_this and inference_line == "")
- ):
- logger.info(
- f"Skipped due to not having enough information required by "
- "the transformation content specified in the config file "
- "(e.g. INPUT_NAME, DATALOADER_NAME, INFERENCE_LINE). "
- f"File path: {file_path}"
- )
- continue
-
- ### location
-
- # search for features to put below them
- """
- Example (psuedo-code):
- model = Net()
- # jit script begin mark
- model = torch.jit.script(model)
- # jit script end mark (feature name + model name to handle multi-model situation)
- model = ipex.optimize(model, "fp32") # "ipex fp32" must be put below "jit script"
- """
- put_below_idx = 0
- for i in range(len(lines)):
- for item in bk_trans_order[0]["below"]:
- line = lines[i]
- if item in line and model_name in line:
- put_below_idx = max(put_below_idx, i + 1)
-
- # search for features to put above them
- put_above_idx = sys.maxsize
- for i in range(len(lines)):
- for item in bk_trans_order[0]["above"]:
- line = lines[i]
- if item in line and model_name in line:
- put_above_idx = min(put_above_idx, i)
-
- # location assignment (below model def / dataloader def / input def)
- if "insert_below_model_definition_line" in loc:
- trans_insert_location = min(max(model_definition_end_line_idx, put_below_idx), put_above_idx)
- if trainer_def_line_idx > 0:
- trans_insert_location = trainer_def_line_idx - 1
- # for transformers trainer to put right above trainer def
- if "insert_below_dataloader_definition_line" in loc:
- try:
- dataloader_def_line_idx
- except:
- logger.warning(
- f"Skipped due to not having dataloader definition required by "
- "the transformation content specified in the config file. "
- f"File path: {file_path}"
- )
- continue
- trans_insert_location = max(
- trans_insert_location, min(max(dataloader_def_line_idx + 1, put_below_idx), put_above_idx)
- )
- if "insert_below_input_definition_line" in loc:
- try:
- input_def_line_idx
- except:
- logger.warning(
- f"Skipped due to not having input definition required by "
- "the transformation content specified in the config file. "
- f"File path: {file_path}"
- )
- continue
- trans_insert_location = max(
- trans_insert_location, min(max(input_def_line_idx + 1, put_below_idx), put_above_idx)
- )
-
- insert_indent_level = get_line_indent_level(lines[trans_insert_location - 1])
- if trainer_def_line_idx > 0: # for transformers trainer to put right above trainer def
- insert_indent_level = get_line_indent_level(lines[trans_insert_location])
- ### content
-
- # lines to insert
- lines_to_insert = bk_trans_content_this
- # replace [+] indication with empty
- lines_to_insert = lines_to_insert.replace("[+] ", " " * insert_indent_level)
- # add begin indicator
- lines_to_insert = (
- " " * insert_indent_level
- + "# [NeuralCoder] "
- + self.backend
- + " for "
- + model_name
- + " [Beginning Line]\n"
- + lines_to_insert
- )
- # replace INDICATIONS with real stuff
- lines_to_insert = (
- lines_to_insert.replace("MODEL_NAME", model_name)
- .replace("INPUT_NAME", input_name)
- .replace("DATALOADER_NAME", dataloader_name)
- .replace("INFERENCE_LINE", inference_line.strip())
- .replace("\n", " # [coder-enabled]\n")
- )
- # add end indicator
- lines_to_insert += (
- " # [coder-enabled]\n"
- + " " * insert_indent_level
- + "# [NeuralCoder] "
- + self.backend
- + " for "
- + model_name
- + " [Ending Line] # [coder-enabled]"
- )
-
- ### register
-
- if file_path not in globals.list_trans_insert_modified_file:
- globals.list_trans_insert_modified_file.append(file_path)
- globals.list_trans_insert_location_idxs.append([trans_insert_location])
- globals.list_trans_insert_number_insert_lines.append([lines_to_insert.count("\n") + 1])
- globals.list_trans_insert_lines_to_insert.append([lines_to_insert])
- else:
- idx = globals.list_trans_insert_modified_file.index(file_path)
- globals.list_trans_insert_location_idxs[idx].append(trans_insert_location)
- globals.list_trans_insert_number_insert_lines[idx].append(lines_to_insert.count("\n") + 1)
- globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert)
-
- # PART 2 - "inference line"
- if (
- "indent_inference_line" in loc
- or "insert_above_inference_line" in loc
- or "insert_below_inference_line" in loc
- ):
- for file_path in globals.list_code_path:
- code = open(file_path, "r").read()
- lines = code.split("\n")
- line_idx = 0
- for i in range(len(lines)):
- line = lines[i]
- for model_name in globals.list_model_name:
- is_eval_func, eval_func_type = is_eval_func_model_name(model_name, line)
- if is_eval_func and "[coder-enabled]" not in line:
- if eval_func_type == "non-forward":
- pass # do something
- inference_line = line
- inference_line_indent_level = get_line_indent_level(line)
-
- if "indent_inference_line" in loc:
- bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)]
- add_indent_level = int(bk_trans_content_this)
-
- trans_indent_location = []
- # indent can have multiple location, so is a list of numbers
- trans_indent_level = []
-
- if ")" in line: # e.g. model = Net(xxx)
- trans_indent_location.append(line_idx)
- trans_indent_level.append(add_indent_level)
- else: # e.g. model = Net(xxx, \n xxx, \n xxx)
- trans_indent_location.append(line_idx)
- trans_indent_level.append(add_indent_level)
- do_search = True
- i_search = 1
- while do_search:
- trans_indent_location.append(line_idx + i_search)
- trans_indent_level.append(add_indent_level)
- following_line = lines[line_idx + i_search]
- if ")" in following_line:
- do_search = False
- i_search += 1
-
- ### register
-
- if file_path not in globals.list_trans_indent_modified_file:
- globals.list_trans_indent_modified_file.append(file_path)
- globals.list_trans_indent_location_idxs.append(trans_indent_location)
- globals.list_trans_indent_level.append(trans_indent_level)
- else:
- idx = globals.list_trans_indent_modified_file.index(file_path)
- for i in trans_indent_location:
- globals.list_trans_indent_location_idxs[idx].append(i)
- for i in trans_indent_level:
- globals.list_trans_indent_level[idx].append(i)
-
- if "insert_above_inference_line" in loc:
- idx_offset = 0
- elif "insert_below_inference_line" in loc:
- if ")" in line: # e.g. model = Net(xxx)
- idx_offset = 1
- else: # e.g. model = Net(xxx, \n xxx, \n xxx)
- do_search = True
- i_search = 1
- while do_search:
- following_line = lines[line_idx + i_search]
- if ")" in following_line:
- do_search = False
- i_search += 1
- inference_line = (
- inference_line
- + "\n"
- + " " * (get_line_indent_level(line) + 4)
- + following_line
- )
- idx_offset = i_search
-
- if "insert_above_inference_line" in loc or "insert_below_inference_line" in loc:
- bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)]
-
- trans_insert_location = line_idx + idx_offset
-
- insert_indent_level = inference_line_indent_level
-
- ### content
-
- # lines to insert
- lines_to_insert = bk_trans_content_this
- # replace [+] indication with empty
- lines_to_insert = lines_to_insert.replace("[+] ", " " * insert_indent_level)
- # add begin indicator
- lines_to_insert = (
- " " * insert_indent_level
- + "# [NeuralCoder] "
- + self.backend
- + " [Beginning Line] \n"
- + lines_to_insert
- )
- # replace INDICATIONS with real stuff
- # (for now, inference_line related transformations )
- # (have nothing to do with input, dataloader etc, )
- # (so no need to put replaces here.)
- lines_to_insert = lines_to_insert.replace("\n", " # [coder-enabled]\n")
- # add end indicator
- lines_to_insert += (
- " # [coder-enabled]\n"
- + " " * insert_indent_level
- + "# [NeuralCoder] "
- + self.backend
- + " [Ending Line] # [coder-enabled]"
- )
-
- # customized argument
- if self.backend == "pytorch_benchmark":
- lines_to_insert = lines_to_insert.replace(
- "NUM_BENCHMARK_ITERATION", globals.num_benchmark_iteration
- )
- lines_to_insert = lines_to_insert.replace("ACCURACY_MODE", str(False))
- lines_to_insert = lines_to_insert.replace(
- "INFERENCE_LINE", inference_line.strip()
- )
-
- ### register
-
- if file_path not in globals.list_trans_insert_modified_file:
- globals.list_trans_insert_modified_file.append(file_path)
- globals.list_trans_insert_location_idxs.append([trans_insert_location])
- globals.list_trans_insert_number_insert_lines.append(
- [lines_to_insert.count("\n") + 1]
- )
- globals.list_trans_insert_lines_to_insert.append([lines_to_insert])
- else:
- idx = globals.list_trans_insert_modified_file.index(file_path)
- globals.list_trans_insert_location_idxs[idx].append(trans_insert_location)
- globals.list_trans_insert_number_insert_lines[idx].append(
- lines_to_insert.count("\n") + 1
- )
- globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert)
-
- break # already transformed this line, so skip any further model_name search
- line_idx += 1
-
- # PART 3 - for customized location
-
- logger.debug(f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}")
- logger.debug(f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}")
- logger.debug(f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}")
- logger.debug(f"globals.list_trans_insert_lines_to_insert: {globals.list_trans_insert_lines_to_insert}")
diff --git a/neural_coder/coders/pytorch/lightning.py b/neural_coder/coders/pytorch/lightning.py
deleted file mode 100644
index 383432e2c3f..00000000000
--- a/neural_coder/coders/pytorch/lightning.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-class Lightning(object):
- def __init__(self, file) -> None:
- self.file = file
- self.result = []
-
- def transform(self):
- lines = self.file.split("\n")
- for line in lines:
- if self.not_add_accelerator(line) or self.not_add_precision(line):
- new_line = self.add(line)
- if self.not_modify(new_line):
- new_line = self.modify(new_line)
- self.result.append(new_line)
- elif self.not_modify(line):
- new_line = self.modify(line)
- self.result.append(new_line)
- if not self.not_add_accelerator(line) and not self.not_add_precision(line) and not self.not_modify(line):
- if line == "" and self.result[-1] == "":
- continue
- self.result.append(line)
-
- for index, line in enumerate(self.result):
- if index != len(self.result) - 1:
- self.result[index] += "\n"
- return "".join(self.result)
-
- def not_add_precision(self, s):
- if "Trainer" in s:
- if "precision" not in s:
- return True
- else:
- return False
- return False
-
- def not_add_accelerator(self, s):
- if "Trainer" in s:
- if "accelerator" not in s:
- return True
- else:
- return False
- return False
-
- def add(self, s):
- if "Trainer" in s:
- if "precision" not in s:
- s_index = s.find(")")
- s = s[:s_index] + ', precision="bf16"' + s[s_index:]
- if "accelerator" not in s:
- s_index = s.find(")")
- s = s[:s_index] + ', accelerator="cpu"' + s[s_index:]
- return s
-
- def not_modify(self, s):
- if "bf16" in s and "cpu" in s:
- return False
- return True
-
- def modify(self, s):
- if "16" in s:
- old = "16"
- s = s.replace(old, '"bf16"')
- if "32" in s:
- old = "32"
- s = s.replace(old, '"bf16"')
- if '"gpu"' in s:
- old = '"gpu"'
- s = s.replace(old, '"cpu"')
- if '"tpu"' in s:
- old = '"tpu"'
- s = s.replace(old, '"cpu"')
- return s
diff --git a/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py b/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py
deleted file mode 100644
index 82f324e26b3..00000000000
--- a/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-
-from ... import globals
-from ...utils.line_operation import (
- get_line_indent_level,
- get_line_left_hand_side,
- is_eval_func_model_name,
- single_line_comment_or_empty_line_detection,
-)
-
-logging.basicConfig(
- level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000"
-)
-logger = logging.getLogger(__name__)
-
-
-class ReclaimInferenceTransformersTrainer(object):
- def __init__(self, list_model_def_instance):
- self.list_model_def_instance = list_model_def_instance
-
- def print_info(self):
- for i in self.list_model_def_instance:
- logger.debug(f"i.print_info(): {i.print_info()}")
-
- # collect file transformation info and register (store) in globals
- # (i.e. which file to add which lines at which location)
- def register_transformation(self):
- file_path = globals.list_code_path[0]
- lines = open(file_path, "r").read().split("\n")
- line_idx = 0
-
- for i in range(len(lines)):
- line = lines[i]
-
- if "# Evaluation" in line:
- indent_level = get_line_indent_level(line)
- trans_insert_location = i
- lines_to_insert = ""
- lines_to_insert += " " * indent_level + "eval_dataloader = trainer.get_eval_dataloader()" + "\n"
- lines_to_insert += " " * indent_level + "import torch" + "\n"
- lines_to_insert += " " * indent_level + "for step, inputs in enumerate(eval_dataloader):" + "\n"
- lines_to_insert += " " * indent_level + " with torch.no_grad():" + "\n"
- lines_to_insert += " " * indent_level + " model(**inputs)"
-
- if file_path not in globals.list_trans_insert_modified_file:
- globals.list_trans_insert_modified_file.append(file_path)
- globals.list_trans_insert_location_idxs.append([trans_insert_location])
- globals.list_trans_insert_number_insert_lines.append([lines_to_insert.count("\n") + 1])
- globals.list_trans_insert_lines_to_insert.append([lines_to_insert])
- else:
- idx = globals.list_trans_insert_modified_file.index(file_path)
- globals.list_trans_insert_location_idxs[idx].append(trans_insert_location)
- globals.list_trans_insert_number_insert_lines[idx].append(lines_to_insert.count("\n") + 1)
- globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert)
-
- line_idx += 1
-
- logger.debug(f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}")
- logger.debug(f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}")
- logger.debug(f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}")
- logger.debug(f"globals.list_trans_insert_lines_to_insert: {globals.list_trans_insert_lines_to_insert}")
diff --git a/neural_coder/coders/pytorch/reclaim_inputs.py b/neural_coder/coders/pytorch/reclaim_inputs.py
deleted file mode 100644
index 16397012e37..00000000000
--- a/neural_coder/coders/pytorch/reclaim_inputs.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-
-from ... import globals
-from ...utils.line_operation import (
- get_line_indent_level,
- get_line_left_hand_side,
- is_eval_func_model_name,
- single_line_comment_or_empty_line_detection,
-)
-
-logging.basicConfig(
- level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000"
-)
-logger = logging.getLogger(__name__)
-
-
-class ReclaimInputs(object):
- def __init__(self, list_model_def_instance):
- self.list_model_def_instance = list_model_def_instance
-
- def print_info(self):
- for i in self.list_model_def_instance:
- logger.debug(f"i.print_info(): {i.print_info()}")
-
- # collect file transformation info and register (store) in globals
- # (i.e. which file to add which lines at which location)
- def register_transformation(self):
- list_code = []
- for i in globals.list_code_path:
- list_code.append(open(i, "r").read())
-
- for ins in self.list_model_def_instance:
- model_name = ins.model_name
- file_path = ins.file_path
- model_def_line_idx = ins.model_def_line_idx
- function_def_line_idx = ins.function_def_line_idx
- class_name = ins.class_name
-
- # transformation
- file_path_idx = globals.list_code_path.index(file_path)
- lines = list_code[file_path_idx].split("\n")
- line_idx = 0
-
- # search inference line in this file, and also input_name
- inference_line = ""
- input_name = ""
- for i in range(len(lines)):
- line = lines[i]
- is_eval_func, eval_func_type = is_eval_func_model_name(model_name, line)
- if is_eval_func and "[coder-enabled]" not in line:
- inference_line = line
- input_name = line[line.find("(") + 1 : line.find(")")].replace("*", "") # get "c" in "a = b(**c)"
-
- # if there is already a "input = xxx", then quit this function
- if input_name != "":
- for i in range(len(lines)):
- line = lines[i]
- if not single_line_comment_or_empty_line_detection(line):
- if input_name in line and "=" in line and line.find(input_name) < line.find("="):
- return
-
- # add the created lines for inputs
- if inference_line != "" and input_name != "":
- for i in range(len(lines)):
- line = lines[i]
- is_eval_func, eval_func_type = is_eval_func_model_name(model_name, line)
- if is_eval_func and "[coder-enabled]" not in line:
- indent_level = get_line_indent_level(line)
- trans_insert_location = i
- lines_to_insert = ""
- lines_to_insert += " " * indent_level + "try:" + "\n"
- lines_to_insert += " " * indent_level + " " + input_name + " = " + input_name + "\n"
- lines_to_insert += " " * indent_level + "except:" + "\n"
- lines_to_insert += " " * indent_level + " pass"
-
- if file_path not in globals.list_trans_insert_modified_file:
- globals.list_trans_insert_modified_file.append(file_path)
- globals.list_trans_insert_location_idxs.append([trans_insert_location])
- globals.list_trans_insert_number_insert_lines.append([lines_to_insert.count("\n") + 1])
- globals.list_trans_insert_lines_to_insert.append([lines_to_insert])
- else:
- idx = globals.list_trans_insert_modified_file.index(file_path)
- globals.list_trans_insert_location_idxs[idx].append(trans_insert_location)
- globals.list_trans_insert_number_insert_lines[idx].append(lines_to_insert.count("\n") + 1)
- globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert)
-
- line_idx += 1
-
- logger.debug(f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}")
- logger.debug(f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}")
- logger.debug(f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}")
- logger.debug(f"globals.list_trans_insert_lines_to_insert: {globals.list_trans_insert_lines_to_insert}")
diff --git a/neural_coder/coders/tensorflow/__init__.py b/neural_coder/coders/tensorflow/__init__.py
deleted file mode 100644
index e833188cc78..00000000000
--- a/neural_coder/coders/tensorflow/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/neural_coder/coders/tensorflow/amp.py b/neural_coder/coders/tensorflow/amp.py
deleted file mode 100644
index 70302d78d4a..00000000000
--- a/neural_coder/coders/tensorflow/amp.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ...utils.line_operation import get_line_left_hand_side
-
-
-class TensorFlowKerasAMP(object):
- def __init__(self, file) -> None:
- self.file = file
- self.result = []
- self.keras_edited_flag = False
-
- def transform(self):
- # import pdb
- # pdb.set_trace()
- lines = self.file.split("\n")
- for line in lines:
- if self.is_modify(line):
- if ".ConfigProto()" in line: # TF AMP
- config_name = get_line_left_hand_side(line)
- new_line_1 = "from tensorflow.core.protobuf import rewriter_config_pb2"
- new_line_2 = (
- config_name
- + ".graph_options.rewrite_options.auto_mixed_precision_mkl = "
- + "rewriter_config_pb2.RewriterConfig.ON"
- )
- self.result.append(line)
- self.result.append(new_line_1)
- self.result.append(new_line_2)
- elif "keras" in line and "import" in line: # Keras AMP
- if not self.keras_edited_flag:
- new_line_1 = "from tensorflow.keras.mixed_precision import experimental as mixed_precision"
- new_line_2 = "policy = mixed_precision.Policy('mixed_bfloat16')"
- new_line_3 = "mixed_precision.set_policy(policy)"
- self.result.append(line)
- self.result.append(new_line_1)
- self.result.append(new_line_2)
- self.result.append(new_line_3)
- self.keras_edited_flag = True
- else:
- self.result.append(line)
- else:
- self.result.append(line)
- for index, line in enumerate(self.result):
- if index != len(self.result) - 1:
- self.result[index] += "\n"
- return "".join(self.result)
-
- def is_modify(self, s):
- if ".ConfigProto()" in s or ("keras" in s and "import" in s):
- return True
- else:
- return False
diff --git a/neural_coder/coders/tensorflow/inc.py b/neural_coder/coders/tensorflow/inc.py
deleted file mode 100644
index 837dff143fb..00000000000
--- a/neural_coder/coders/tensorflow/inc.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ...utils.line_operation import get_line_indent_level, get_line_left_hand_side
-
-
-class TensorFlowKerasINC(object):
- def __init__(self, file) -> None:
- self.file = file
- self.result = []
-
- def transform(self):
- # import pdb
- # pdb.set_trace()
- lines = self.file.split("\n")
- for line in lines:
- if self.is_modify(line):
- model_name = "model"
- indent_level = get_line_indent_level(line)
- self.result.append(line)
- self.result.append(" " * indent_level + "from neural_compressor.quantization import fit")
- self.result.append(" " * indent_level + "from neural_compressor.config import PostTrainingQuantConfig")
- self.result.append(" " * indent_level + "from neural_compressor import common")
- self.result.append(" " * indent_level + "config = PostTrainingQuantConfig(quant_level=1)")
- self.result.append(" " * indent_level + model_name + " = fit(" + model_name + ", conf=config)")
- self.result.append(" " * indent_level + model_name + '.save("./quantized_model")')
- else:
- self.result.append(line)
- for index, line in enumerate(self.result):
- if index != len(self.result) - 1:
- self.result[index] += "\n"
- return "".join(self.result)
-
- def is_modify(self, s):
- if "model = tf." in s or "model = load_model(" in s:
- if "self.model" not in s:
- return True
- else:
- return False
diff --git a/neural_coder/coders/transform.py b/neural_coder/coders/transform.py
deleted file mode 100644
index c553cbbb87b..00000000000
--- a/neural_coder/coders/transform.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-
-from .. import globals
-
-logging.basicConfig(
- level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000"
-)
-logger = logging.getLogger(__name__)
-
-
-def execute_insert_transformation(list_transformed_code):
- """Insert code lines into file."""
- for index, file_path in enumerate(globals.list_trans_insert_modified_file):
- trans_location_idxs = globals.list_trans_insert_location_idxs[index]
- trans_number_insert_lines = globals.list_trans_insert_number_insert_lines[index]
- trans_lines_to_insert = globals.list_trans_insert_lines_to_insert[index]
-
- # sort trans_location_idxs and sort the other lists accordingly
- trans_number_insert_lines = [i for _, i in sorted(zip(trans_location_idxs, trans_number_insert_lines))]
- trans_lines_to_insert = [i for _, i in sorted(zip(trans_location_idxs, trans_lines_to_insert))]
- trans_location_idxs = sorted(trans_location_idxs)
-
- file_path_idx = globals.list_code_path.index(file_path)
- lines_transformed = list_transformed_code[file_path_idx].split("\n")
-
- # math
- t = [0]
- u = 0
- for n in trans_number_insert_lines:
- u = u + n
- t.append(u)
- t = t[:-1]
-
- logger.debug(f"t: {t}")
- trans_location_idxs = [sum(i) for i in zip(trans_location_idxs, t)]
- logger.debug(f"trans_location_idxs after adjustment: {trans_location_idxs}")
-
- for idx in trans_location_idxs: # actual transformation (insertion)
- additions = trans_lines_to_insert[trans_location_idxs.index(idx)].split("\n")
- additions = additions[::-1] # reverse
- for i in range(len(additions)):
- lines_transformed.insert(idx, additions[i])
-
- # transfer lines_transformed to code format ("\n" save write)
- code_transformed = "".join([i + "\n" for i in lines_transformed])[0:-1]
-
- list_transformed_code[file_path_idx] = code_transformed
-
- return list_transformed_code
-
-
-def execute_indent_transformation(list_transformed_code):
- """Indent code lines with spaces at the beginning."""
- for index, file_path in enumerate(globals.list_trans_indent_modified_file):
- trans_location_idxs = globals.list_trans_indent_location_idxs[index]
- trans_indent_level = globals.list_trans_indent_level[index]
-
- file_path_idx = globals.list_code_path.index(file_path)
- lines_transformed = list_transformed_code[file_path_idx].split("\n")
-
- for idx in trans_location_idxs: # actual transformation (indent)
- this_indent_level = trans_indent_level[trans_location_idxs.index(idx)]
- lines_transformed[idx] = " " * 4 * this_indent_level + lines_transformed[idx]
-
- # transfer lines_transformed to code format ("\n" save write)
- code_transformed = "".join([i + "\n" for i in lines_transformed])[0:-1]
-
- list_transformed_code[file_path_idx] = code_transformed
-
- return list_transformed_code
diff --git a/neural_coder/docs/AWSSageMakerSupport.md b/neural_coder/docs/AWSSageMakerSupport.md
deleted file mode 100644
index eb8926c12ee..00000000000
--- a/neural_coder/docs/AWSSageMakerSupport.md
+++ /dev/null
@@ -1,32 +0,0 @@
-AWS Amazon SageMaker Support
-=====
-
-[AWS Amazon SageMaker](https://aws.amazon.com/sagemaker/) users can easily enjoy the productivity boost brought by Neural Coder by one-click installing [Neural Coder Jupyter Lab extension](https://www.npmjs.com/package/jupyter-lab-neural-compressor) in either **SageMaker Studio** or **SageMaker Notebook instance**.
-
-## Start Jupyter Lab 3
-[Neural Coder Jupyter extension](https://www.npmjs.com/package/jupyter-lab-neural-compressor) requires Jupyter Lab 3. Using Jupyter Lab 1 will cause installation error. To start Jupyter Lab 3, please check the following:
-
-#### For SageMaker Studio
-
-
-#### For SageMaker Notebook instance
-
-
-## Installation Guide
-For both cases, the installation process is exactly the same, which is by **searching ```neural-compressor``` in the Extension Manager**.
-
-1. Search and Install
-
-
-
-2. Rebuild
-
-
-
-3. Save and Reload
-
-
-
-4. Done!
-
-
diff --git a/neural_coder/docs/BigDLNanoSupport.md b/neural_coder/docs/BigDLNanoSupport.md
deleted file mode 100644
index 0ea0e4dc849..00000000000
--- a/neural_coder/docs/BigDLNanoSupport.md
+++ /dev/null
@@ -1,37 +0,0 @@
-BigDL Nano Support
-===========================
-
-Neural Coder collaborates with [BigDL-Nano](https://bigdl.readthedocs.io/en/latest/doc/Nano/Overview/nano.html), a Python library that automatically applies modern CPU optimizations, to further democratize ease-of-use BigDL-Nano APIs as a **no-code** solution for PyTorch Deep Learning programmers.
-
-## Example
-For instance, to perform BF16 + Channels Last optimizations with BigDL-Nano API using Neural Coder on the [example code](../examples/nano/resnet18.py) and run this code with the enabled optimizations, users can simply execute this command:
-```
-python -m neural_coder -o nano_bf16_channels_last ../examples/nano/resnet18.py
-```
-The alias for each optimization set is documented in the below Support Matrix. Note that you need to ```pip install bigdl``` first following [BigDL-Nano documentation](https://github.com/intel-analytics/BigDL#installing).
-
-## Support Matrix
-
-| Optimization Set | API Alias |
-| ------------- | ------------- |
-| BF16 + Channels Last | `nano_bf16_channels_last` |
-| BF16 + IPEX + Channels Last | `nano_bf16_ipex_channels_last` |
-| BF16 + IPEX | `nano_bf16_ipex` |
-| BF16 | `nano_bf16` |
-| Channels Last | `nano_fp32_channels_last` |
-| IPEX + Channels Last | `nano_fp32_ipex_channels_last` |
-| IPEX | `nano_fp32_ipex` |
-| Convert CUDA TO GPU | `nano_gpu_to_cpu` |
-| INT8 | `nano_int8` |
-| JIT + BF16 + Channels Last | `nano_jit_bf16_channels_last` |
-| JIT + BF16 + IPEX + Channels Last | `nano_jit_bf16_ipex_channels_last` |
-| JIT + BF16 + IPEX | `nano_jit_bf16_ipex` |
-| JIT + BF16 | `nano_jit_bf16` |
-| JIT + Channels Last | `nano_jit_fp32_channels_last` |
-| JIT + IPEX + Channels Last | `nano_jit_fp32_ipex_channels_last` |
-| JIT + IPEX | `nano_jit_fp32_ipex` |
-| JIT | `nano_jit_fp32` |
-| ONNX Runtime | `nano_onnxruntime_fp32` |
-| ONNX Runtime + INT8 | `nano_onnxruntime_int8_qlinear` |
-| OpenVINO | `nano_openvino_fp32` |
-| OpenVINO + INT8 | `nano_openvino_int8` |
diff --git a/neural_coder/docs/IntelCPU_PerformanceSetting.md b/neural_coder/docs/IntelCPU_PerformanceSetting.md
deleted file mode 100644
index a112fd3516f..00000000000
--- a/neural_coder/docs/IntelCPU_PerformanceSetting.md
+++ /dev/null
@@ -1,51 +0,0 @@
-## Intel CPU Platforms: Best Performance Setting
-### Install MKL, OpenMP and JEMALLOC
-The simplest way for installation is through ```conda install```:
-```bash
-conda install -y mkl mkl-include jemalloc
-```
-
-### Install NUMA Controller
-```bash
-apt-get update && apt-get install bc numactl
-```
-
-### Environment Variables
-Check if your ```CONDA_PREFIX``` has a value by:
-```bash
-echo ${CONDA_PREFIX}
-```
-If it is empty, it means that you are not in a traditional CONDA environment, you need to find the location of the ```.so``` files by:
-```bash
-find / -name "libjemalloc.so"
-find / -name "libiomp5.so"
-```
-It will show the path these files were installed into. For example:
-```bash
-/home/name/lib/libjemalloc.so
-/home/name/lib/libiomp5.so
-```
-And then you should ```export``` this path as ```CONDA_PREFIX```:
-```bash
-export CONDA_PREFIX="/home/name"
-```
-Finally:
-```bash
-export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libjemalloc.so
-export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
-export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"
-export KMP_AFFINITY="granularity=fine,compact,1,0"
-export KMP_BLOCKTIME=1
-export DNNL_PRIMITIVE_CACHE_CAPACITY=1024
-```
-
-### Frequency Governers
-Check the frequency governor state on your machine:
-```bash
-cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
-```
-If it shows ```powersave``` instead of ```performance```, execute:
-```bash
-echo "performance" | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
-cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
-```
diff --git a/neural_coder/docs/PythonAPI.md b/neural_coder/docs/PythonAPI.md
deleted file mode 100644
index dfc7567fe43..00000000000
--- a/neural_coder/docs/PythonAPI.md
+++ /dev/null
@@ -1,58 +0,0 @@
-Neural Coder as Python API
-===========================
-
-Neural Coder can be used as Python APIs. We currently provide 3 main user-facing APIs for Neural Coder: enable, bench and superbench.
-
-#### Enable
-Users can use ```enable()``` to enable specific features into DL scripts:
-```
-from neural_coder import enable
-enable(
- code="neural_coder/examples/vision/resnet50.py",
- features=[
- "pytorch_jit_script",
- "pytorch_channels_last",
- ],
-)
-```
-To run benchmark directly on the optimization together with the enabling:
-```
-from neural_coder import enable
-enable(
- code="neural_coder/examples/vision/resnet50.py",
- features=[
- "pytorch_jit_script",
- "pytorch_channels_last"
- ],
- run_bench=True,
-)
-```
-
-#### Bench
-To run benchmark on your code with an existing patch:
-```
-from neural_coder import bench
-bench(
- code="neural_coder/examples/vision/resnet50.py",
- patch_path="${your_patch_path}",
-)
-```
-
-#### SuperBench
-To sweep on optimization sets with a fixed benchmark configuration:
-```
-from neural_coder import superbench
-superbench(code="neural_coder/examples/vision/resnet50.py")
-```
-To sweep on benchmark configurations for a fixed optimization set:
-```
-from neural_coder import superbench
-superbench(
- code="neural_coder/examples/vision/resnet50.py",
- sweep_objective="bench_config",
- bench_feature=[
- "pytorch_jit_script",
- "pytorch_channels_last",
- ],
-)
-```
diff --git a/neural_coder/docs/PythonLauncher.md b/neural_coder/docs/PythonLauncher.md
deleted file mode 100644
index 74ad230e1f9..00000000000
--- a/neural_coder/docs/PythonLauncher.md
+++ /dev/null
@@ -1,40 +0,0 @@
-Python Launcher
-===========================
-
-Neural Coder can be used as a Python **Launcher**. Users can run the Python model code as it is with automatic enabling of Deep Learning optimizations by using Neural Coder's inline Python **Launcher** design.
-
-## Quick-Start
-
-Example: Let's say you are running an NLP model using ```run_glue.py``` from HuggingFace transformers [examples](https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py).
-
-Pre-requisites:
-```bash
-pip install transformers==4.21.0 torch datasets
-```
-
-Generally we run this code with a Python command line like this:
-```bash
-python run_glue.py --model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result
-```
-
-With Neural Coder's **Launcher**, users can easily enjoy Deep Learning optimizations (e.g. default - INT8 dynamic quantization by Intel® Neural Compressor for PyTorch models) by simply adding an inline prefix
-```bash
--m neural_coder
-```
-to the Python command line, and everything else remains the same:
-```bash
-python -m neural_coder run_glue.py --model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result
-```
-
-This will run ```run_glue.py``` with the Deep Learning optimization automatically enabled, while everything else (e.g. your input arguments for the code itself) remains the same as the original code. You can also check out the optimized code ```run_glue_optimized.py``` auto-generated by the **Launcher** under the same folder if you want to learn the code enabling.
-
-Note: Any modification on the optimized code ```run_glue_optimized.py``` will be overwritten every time you run Neural Coder **Launcher** on ```run_glue.py```, so please make any modification on the original code ```run_glue.py``` instead of the optimized one. The optimized code is only saved for your reference.
-
-## Launcher Arguments (Optional)
-
-Users can specify which Deep Learning optimization they want to conduct using ```--opt``` argument. The list of supported Deep Learning optimization features can be found [here](SupportMatrix.md).
-
-Note that if specifically optimizing with INT8 quantization by Intel® Neural Compressor, to choose a quantization approach (strategy), ```--approach``` argument can be specified with either ```static```, ```static_ipex``` or ```dynamic```. For example, to run INT8 static quantization by Intel® Neural Compressor:
-```bash
-python -m neural_coder --approach static run_glue.py --model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result
-```
diff --git a/neural_coder/docs/Quantization.md b/neural_coder/docs/Quantization.md
deleted file mode 100644
index 555834c74a8..00000000000
--- a/neural_coder/docs/Quantization.md
+++ /dev/null
@@ -1,39 +0,0 @@
-Neural Coder for Quantization
-===========================
-This feature helps automatically enable quantization on Deep Learning models and automatically evaluates for the best performance on the model. It is a code-free solution that can help users enable quantization algorithms on a model with no manual coding needed. Supported features include Post-Training Static Quantization, Post-Training Dynamic Quantization, and Mixed Precision.
-
-
-## Features Supported
-- Post-Training Static Quantization for [Stock PyTorch](https://pytorch.org/tutorials/prototype/fx_graph_mode_ptq_static.html) (with FX backend)
-- Post-Training Static Quantization for [IPEX](https://github.com/intel/intel-extension-for-pytorch/blob/v1.12.0/docs/tutorials/features/int8.md)
-- Post-Training Dynamic Quantization for [Stock PyTorch](https://pytorch.org/tutorials/recipes/recipes/dynamic_quantization.html)
-- Mixed Precision for [Stock PyTorch](https://pytorch.org/tutorials/recipes/recipes/amp_recipe.html)
-
-## Models Supported
-- HuggingFace [Transformers](https://github.com/huggingface/transformers) models
-- [torchvision](https://pytorch.org/vision/stable/index.html) models
-- Broad models (under development)
-
-## Usage
-- PyPI distribution with a one-line API call
-- [JupyterLab extension](../extensions/neural_compressor_ext_lab/README.md)
-
-## Example
-### PyPI distribution:
-HuggingFace [Transformers](https://github.com/huggingface/transformers) models: [text-classification/run_glue.py](https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py)
-```
-from neural_coder import auto_quant
-auto_quant(
- code="https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
- args="--model_name_or_path albert-base-v2 --task_name sst2 --do_eval --output_dir result",
-)
-```
-
-[torchvision](https://pytorch.org/vision/stable/index.html) models: [imagenet/main.py](https://github.com/pytorch/examples/blob/main/imagenet/main.py)
-```
-from neural_coder import auto_quant
-auto_quant(
- code="https://github.com/pytorch/examples/blob/main/imagenet/main.py",
- args="-a alexnet --pretrained -e /path/to/imagenet/",
-)
-```
diff --git a/neural_coder/docs/SupportMatrix.md b/neural_coder/docs/SupportMatrix.md
deleted file mode 100644
index be2a7fea308..00000000000
--- a/neural_coder/docs/SupportMatrix.md
+++ /dev/null
@@ -1,26 +0,0 @@
-Supported Optimization Features
-===========================
-
-| Category | Optimization | API Alias |
-| ------------- | ------------- | ------------- |
-| PyTorch | [Mixed Precision](https://pytorch.org/docs/stable/amp.html) | `pytorch_amp` |
-| PyTorch | [Channels Last](https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html) | `pytorch_channels_last` |
-| PyTorch | [JIT (Just-In-Time) Script/Trace](https://pytorch.org/docs/stable/jit.html) & [optimize_for_inference](https://pytorch.org/docs/stable/generated/torch.jit.optimize_for_inference.html) | `pytorch_jit_script`, `pytorch_jit_trace`, `pytorch_jit_script_ofi`, `pytorch_jit_trace_ofi` |
-| PyTorch | JIT with [TorchDynamo](https://github.com/pytorch/torchdynamo) | `pytorch_torchdynamo_jit_script`, `pytorch_torchdynamo_jit_trace`, `pytorch_torchdynamo_jit_script_ofi`, `pytorch_torchdynamo_jit_trace_ofi` |
-| PyTorch | [Intel Neural Compressor (INC) Mixed Precision](https://github.com/intel/neural-compressor/blob/master/docs/source/mixed_precision.md) | `pytorch_inc_bf16` |
-| PyTorch | [INC INT8 Static Quantization (FX/IPEX)](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_static_quant_fx`, `pytorch_inc_static_quant_ipex`, `pytorch_inc_static_quant_ipex_xpu` |
-| PyTorch | [INC INT8 Dynamic Quantization](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_dynamic_quant` |
-| PyTorch | [Intel Extension for PyTorch (FP32, BF16, INT8 Static/Dynamic Quantization)](https://github.com/intel/intel-extension-for-pytorch) | `pytorch_ipex_fp32`, `pytorch_ipex_bf16`, `pytorch_ipex_int8_static_quant`, `pytorch_ipex_int8_dynamic_quant` |
-| PyTorch | [Alibaba Blade-DISC](https://github.com/alibaba/BladeDISC) | `pytorch_aliblade` |
-| PyTorch Lightning | [Mixed Precision](https://pytorch-lightning.readthedocs.io/en/latest/guides/speed.html) | `pytorch_lightning_bf16_cpu` |
-| TensorFlow | [Mixed Precision](https://www.intel.com/content/www/us/en/developer/articles/guide/getting-started-with-automixedprecisionmkl.html) | `tensorflow_amp` |
-| Keras | [Mixed Precision](https://www.tensorflow.org/guide/mixed_precision) | `keras_amp` |
-| TensorFlow/Keras Model | [INC Quantization](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `tensorflow_inc` |
-| Keras Script | [INC Quantization](https://github.com/intel/neural-compressor/tree/master/examples/keras/mnist) | `keras_inc` |
-| ONNX Runtime | [INC Static Quantization (QLinear)](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `onnx_inc_static_quant_qlinear` |
-| ONNX Runtime | [INC Static Quantization (QDQ)](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `onnx_inc_static_quant_qdq` |
-| ONNX Runtime | [INC Dynamic Quantization](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `onnx_inc_dynamic_quant` |
-| [HuggingFace Optimum-Intel](https://huggingface.co/docs/optimum/intel/index) | INC Quantization | `pytorch_inc_huggingface_optimum_static`, `pytorch_inc_huggingface_optimum_dynamic` |
-| [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers/) | INC Quantization | `intel_extension_for_transformers` |
-| [BigDL Nano](https://bigdl.readthedocs.io/en/latest/doc/PythonAPI/Nano/pytorch.html#bigdl-nano-pytorch-inferenceoptimizer) | [Optimization List](./BigDLNanoSupport.md) | `nano_` + [specific alias](./BigDLNanoSupport.md) |
-| Auto-Detect | [INC Quantization](https://github.com/intel/neural-compressor) | `inc_auto` |
diff --git a/neural_coder/docs/cloud_autobench/CloudAutobench.MD b/neural_coder/docs/cloud_autobench/CloudAutobench.MD
deleted file mode 100644
index d906a80292f..00000000000
--- a/neural_coder/docs/cloud_autobench/CloudAutobench.MD
+++ /dev/null
@@ -1,95 +0,0 @@
-# Cloud Auto-bench
-This is a user guide for the automated bash script for creating a cloud instance, configuring the environment, running the benchmark code, and terminating the instance. The script supports AWS and Ali Yun for now and will support more cloud vendors in the future.
-
-## Prerequisite
-## AWS
-#### Install AWS CLI
-Install the latest AWS CLI according to https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html.
-Example: install AWS CLI on Linux x86 (64-bit) by the following commands.
-```
-curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
-unzip awscliv2.zip
-./aws/install -i ~/.Awscli2 -b ~/.Awscli2/bin
-export PATH=/.Awscli2/bin:$PATH
-```
-Confirm the installation with the following command.
-```
-export PATH=/.Awscli2/bin:$PATH
-```
-
-#### Configure IAM role
-Configure IAM role to access AWS according to https://us-east-2.console.aws.amazon.com/iamv2/home#/users.
-
-#### Configure AWS clients
-Configure AWS clients via "aws configure" which fetch from step 2.
-```
-aws --configure
-AWS Access Key ID [None]: x
-AWS Secret Access Key [None]: x
-Default region name [None]: us-east-2
-Default output format [None]: text
-```
-
-#### Create a key pair
-Create a key pair x.pem via https://us-east-2.console.aws.amazon.com/ec2/v2/home?region=us-east-2#KeyPairs:, add x.pem to the current folder, and modify its permission by
-```
-chmod 400 x.pem
-```
-
-## Ali Yun
-#### Install Alibaba Cloud CLI
-Download the installation package for Linux from https://www.alibabacloud.com/help/en/alibaba-cloud-cli/latest/linux
-
-Decompress the downloaded file to obtain the executable file named aliyun
-```
-tar xzvf (aliyun-cli-linux-latest-amd64.tgz)
-```
-In the "()" is the file name of the installation file you have downloaded
-
-Set environment variables
-add this line into the ~/.bash_profile file
-```
-export PATH=/home/your_directory/.Awscli2/bin:$PATH
-```
-
-Your directory is the directory of bin in the ALi Yun installation directory)
-source environment variables
-
-#### Configure Ali Yun clients
-run
-```
-aliyun configure
-```
-
-Input the key ID, key secret, region ID and default language of your Ali Yun account
-```
-Access Key Id [************************]:
-Access Key Secret [******************************]:
-Default Region Id [**-****]:
-Default Output Format [json]: json (Only support json)
-Default Language [zh|en] en
-```
-
-### Create a key pair of Ali Yun according to page https://ecs.console.aliyun.com/#/keyPair/
-add x.pem to the current folder, and modify its permission by
-```
-chmod 400 x.pem
-```
-
-## Launch the cloud benchmark script
-### Modify the permission of bench.sh
-```
-chmod 755 bench.sh
-```
-
-### Input your task config information in the config.conf file
-You need to input the information of your task in the config.conf file
-You can choose the cloud vendor(AWS or Ali Yun), Instance number, type and other information with the comments as a reference
-
-### Launch the script
-```
-./bench.sh
-```
-
-### Output
-Example Cloud auto-bench report: ```superbench_report_aws_icx.pdf```
diff --git a/neural_coder/docs/cloud_autobench/bench.sh b/neural_coder/docs/cloud_autobench/bench.sh
deleted file mode 100644
index b28b2de9ed2..00000000000
--- a/neural_coder/docs/cloud_autobench/bench.sh
+++ /dev/null
@@ -1,497 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-
-# Read the user input
-
-check_input()
-{
-if [ $whether_launch_new -eq "1" ]
-then
- if [ $vendor -eq "1" ]
- then
- if [ -z $security_id_aws ]
- then
- echo "[ERROR] There is no security group ID, you must specify security ID in config file when creating a new instance"
- fi
-
- if [ -z $subnet_id_aws ]
- then
- echo "[ERROR] There is no subnet ID, you must specify subnet ID in config file when creating a new instance"
- fi
- elif [ $vendor -eq "2" ]
- then
- if [ -z $security_id_ali ]
- then
- echo "[ERROR] There is no security group ID, you must specify security ID in config file when creating a new instance"
- fi
-
- if [ -z $region_id_ali ]
- then
- echo "[ERROR] There is no region ID, you must specify region ID in config file when creating a new instance"
- fi
- else
- echo "[ERROR] There is no this vendor"
- fi
-else
- if [ -z $instance_id ]
- then
- echo "[ERROR] There is no instance ID, you must specify instance ID in config file when using an existed instance"
- fi
-fi
-
-}
-
-create_AWS_instance()
-{
-if [ $os -eq "1" ]
-then
- if [ $arch -eq "1" ]
- then
- ami_ID="ami-02f3416038bdb17fb"
- else
- ami_ID="ami-0ff596d41505819fd"
- fi
-elif [ $os -eq "2" ]
-then
- if [ $arch -eq "1" ]
- then
- ami_ID="ami-02d1e544b84bf7502"
- else
- ami_ID="ami-03e57de632660544c"
- fi
-elif [ $os -eq "3" ]
-then
- if [ $arch -eq "1" ]
- then
- ami_ID="ami-092b43193629811af"
- else
- ami_ID="ami-0082f8c86a7132597"
- fi
-elif [ $os -eq "4" ]
-then
- if [ $arch -eq "1" ]
- then
- ami_ID="ami-0f7cb53c916a75006"
- else
- ami_ID="ami-075a486be6269029f"
- fi
-else
- echo "[ERROR] The operating system is invalid"
- exit 0
-fi
-
-echo "[INFO] Starting creating AMS instance ..."
-
-instance_id=$(aws ec2 run-instances --image-id $ami_ID --count $count --instance-type $i_type --key-name $key_name --security-group-ids $security_id_aws --subnet-id $subnet_id_aws --block-device-mappings 'DeviceName=/dev/sda1, Ebs={VolumeSize=30}' --query "Instances[0].InstanceId")
-result=$?
-if [ $result -ne '0' ]
-then
- echo "[ERROR] Create AWS Instance failed"
- exit 0
-else
- echo "[INFO] Create AWS instance success"
- echo "[INFO] Your Instance Id is $instance_id"
- echo "[INFO] Waiting for instance to initialize ..."
- echo "[INFO] 15s left ..."
- sleep 5s
- echo "[INFO] 10s left ..."
- sleep 5s
- echo "[INFO] 5s left ..."
- sleep 5s
-fi
-}
-
-create_Ali_Yun_instance()
-{
-if [ $os -eq "1" ]
-then
- if [ $arch -eq "1" ]
- then
- ami_ID="ubuntu_20_04_x64_20G_alibase_20220524.vhd"
- else
- ami_ID="ubuntu_20_04_x64_20G_alibase_20220524.vhd"
- fi
-elif [ $os -eq "2" ]
-then
- if [ $arch -eq "1" ]
- then
- ami_ID="ubuntu_20_04_x64_20G_alibase_20220524.vhd"
- else
- ami_ID="ubuntu_20_04_x64_20G_alibase_20220524.vhd"
- fi
-elif [ $os -eq "3" ]
-then
- if [ $arch -eq "1" ]
- then
- ami_ID="ubuntu_20_04_x64_20G_alibase_20220524.vhd"
- else
- ami_ID="ubuntu_20_04_x64_20G_alibase_20220524.vhd"
- fi
-elif [ $os -eq "4" ]
-then
- if [ $arch -eq "1" ]
- then
- ami_ID="ubuntu_20_04_x64_20G_alibase_20220524.vhd"
- else
- ami_ID="ubuntu_20_04_x64_20G_alibase_20220524.vhd"
- fi
-else
- echo "[ERROR] The operating system is invalid"
- exit 0
-fi
-
-i_type="ecs.$i_type_family.$i_type_size"
-
-echo "[INFO] Starting creating Ali Yun instance ..."
-
-instance_id=$(aliyun ecs RunInstances --RegionId $region_id_ali --InstanceType $i_type --InstanceChargeType PostPaid --ImageId $ami_ID --KeyPairName $key_name --SecurityGroupId $security_id_ali --VSwitchId vsw-m5ethlhigvonp2kuyzhjw --InternetMaxBandwidthIn 1 --InternetMaxBandwidthOut 1 |grep "i-")
-result=$?
-
-instance_id="${instance_id:4:22}"
-if [ $result -ne '0' ]
-then
- echo "[ERROR] Create Ali Yun Instance failed"
- exit 0
-else
- echo "[INFO] Create Ali Yun instance successfully"
- echo "[INFO] The Ali Yun instance id is: $instance_id"
- echo "[INFO] Waiting for instance to initialize ..."
- echo "[INFO] 35s left ..."
- sleep 5s
- echo "[INFO] 30s left ..."
- sleep 5s
- echo "[INFO] 25s left ..."
- sleep 5s
- echo "[INFO] 20s left ..."
- sleep 5s
- echo "[INFO] 15s left ..."
- sleep 5s
- echo "[INFO] 10s left ..."
- sleep 5s
- echo "[INFO] 5s left ..."
- sleep 5s
-fi
-}
-
-connect_AWS()
-{
-dns_name=$(aws ec2 describe-instances --instance-ids $instance_id --query "Reservations[0].Instances[0].PublicDnsName")
-result=$?
-if [ $result -ne '0' ]
-then
- echo "[ERROR] Can not find this instance, please check"
- exit 0
-fi
-
-host_name=$dns_name
-
-if [ $os -eq "1" ]
-then
- host_name="ubuntu@$dns_name"
-else
- host_name="ec2-user@$dns_name"
-fi
-
-key_name="$key_name.pem"
-echo "[INFO] Your instance host name is: $host_name"
-echo "[INFO] Connecting to AWS Instance ..."
-ssh -i $key_name $host_name -o "StrictHostKeyChecking no" "uname -a ; exit"
-result=$?
-if [ $result -ne '0' ]
-then
- echo "[ERROR] SSH connection failed"
- echo "[INFO] Start terminating the Instance"
- aws ec2 terminate-instances --instance-ids $instance_id
- result=$?
- if [ $result -ne '0' ]
- then
- echo "[ERROR] Instance termination failed"
- else
- echo "[INFO] Instance termination success"
- fi
- exit 0
-else
- echo "[INFO] Connect to AWS Instance success"
-fi
-
-echo "[INFO] Start to transferring benchmark files"
-scp -i $key_name -r ./code/ $host_name:/tmp
-result=$?
-if [ $result -ne '0' ]
-then
- echo "[ERROR] SSH connection failed"
- exit 0
-else
- echo "[INFO] File transferring success"
-fi
-
-if [ $whether_launch_new -eq "1" ]
-then
- ssh -i $key_name $host_name "cd /tmp/code; chmod +x ./config.sh; ./config.sh; exit"
- echo "[INFO] Install dependencies finished"
-else
- echo "[INFO] Configured environment"
-fi
-
-echo "[INFO] Start launching the task ..."
-
-ssh -i $key_name $host_name "cd /tmp/code; chmod +x ./launch.sh; ./launch.sh; exit"
-
-echo "[INFO] Benchmark Execution finished"
-}
-
-connect_Ali_Yun()
-{
-public_ip=$(aliyun ecs DescribeInstances --output cols=InstanceId,PublicIpAddress.IpAddress rows=Instances.Instance[] |grep $instance_id)
-result=$?
-if [ $result -ne '0' ]
-then
- echo "[ERROR] Can not find this instance, please check"
- exit 0
-fi
-
-public_ip="${public_ip:25}"
-length=${#public_ip}
-public_ip="${public_ip:1:$length-2}"
-host_name="root@$public_ip"
-key_name="$key_name.pem"
-echo "[INFO] Your instance host name is: $host_name"
-
-echo "[INFO] Start to connecting Ali Yun instance"
-ssh -i $key_name $host_name -o "StrictHostKeyChecking no" "uname -a ; exit"
-result=$?
-if [ $result -ne '0' ]
-then
- echo "[ERROR] SSH connection failed"
- echo "[INFO] Start to delete instance $instance_id"
- sleep 60s
- aliyun ecs DeleteInstance --InstanceId $instance_id --Force true
- result=$?
- if [ $result -ne '0' ]
- then
- echo "[ERROR] Instance termination failed"
- exit 0
- else
- echo "[INFO] Instance termination success"
- fi
- exit 0
-else
- echo "[INFO] Connect to Ali Yun Instance success"
-fi
-
-echo "[INFO] Start to transferring benchmark files"
-scp -i $key_name -r ./code/ $host_name:/tmp
-result=$?
-if [ $result -ne '0' ]
-then
- echo "[ERROR] SSH connection failed"
- exit 0
-else
- echo "[INFO] File transferring success"
-fi
-
-if [ $whether_launch_new -eq "1" ]
-then
- ssh -i $key_name $host_name "cd /tmp/code; chmod +x ./config.sh; ./config.sh; exit"
- echo "[INFO] Install dependencies finished"
-else
- echo "[INFO] Configured environment"
-fi
-
-echo "[INFO] Start launching the task ..."
-
-ssh -i $key_name $host_name "cd /tmp/code; chmod +x ./launch.sh; ./launch.sh; exit"
-
-echo "[INFO] Benchmark Execution finished"
-}
-
-close_AWS()
-{
-
-if [ $whether_retain -eq "1" ]
-then
- echo "[INFO] Start stopping the Instance"
-
- aws ec2 stop-instances --instance-ids $instance_id
- result=$?
- if [ $result -ne '0' ]
- then
- echo "[ERROR] Instance stop failed"
- exit 0
- else
- echo "[INFO] Instance stop success"
- echo "[INFO] The instance id is $instance_id, Please record this $instance_id for next use"
- fi
-else
- echo "[INFO] Start terminating the Instance"
-
- aws ec2 terminate-instances --instance-ids $instance_id
- result=$?
- if [ $result -ne '0' ]
- then
- echo "[ERROR] Instance termination failed"
- exit 0
- else
- echo "[INFO] Instance termination success"
- fi
-fi
-}
-
-close_Ali_Yun()
-{
-
-if [ $whether_retain -eq "1" ]
-then
- echo "[INFO] Start stopping the Instance"
-
- aliyun ecs StopInstance --InstanceId $instance_id
- result=$?
- if [ $result -ne '0' ]
- then
- echo "[ERROR] Instance stop failed"
- exit 0
- else
- echo "[INFO] Instance stop success"
- echo "[INFO] The instance id is $instance_id, Please record this $instance_id for next use"
- fi
-elif [ $whether_retain -eq "2" ]
-then
- echo "[INFO] Start terminating the Instance"
-
- aliyun ecs DeleteInstance --InstanceId $instance_id --Force true
- result=$?
- if [ $result -ne '0' ]
- then
- echo "[ERROR] Instance termination failed"
- exit 0
- else
- echo "[INFO] Instance termination success"
- fi
-fi
-}
-
-
-main()
-{
-vendor=$(sed '/^cloud_vendor=/!d; s/.*=//' config.conf)
-os=$(sed '/^OS=/!d; s/.*=//' config.conf)
-arch=$(sed '/^arch=/!d; s/.*=//' config.conf)
-count=$(sed '/^count=/!d; s/.*=//' config.conf)
-i_type_family=$(sed '/^i_type_family=/!d; s/.*=//' config.conf)
-i_type_size=$(sed '/^i_type_size=/!d; s/.*=//' config.conf)
-key_name=$(sed '/^key_name=/!d; s/.*=//' config.conf)
-instance_id=$(sed '/^instance_id=/!d; s/.*=//' config.conf)
-security_id_aws=$(sed '/^security_id_aws=/!d; s/.*=//' config.conf)
-subnet_id_aws=$(sed '/^subnet_id_aws=/!d; s/.*=//' config.conf)
-security_id_ali=$(sed '/^security_id_ali=/!d; s/.*=//' config.conf)
-region_id_ali=$(sed '/^region_id_ali=/!d; s/.*=//' config.conf)
-
-whether_retain=$(sed '/^whether_retain=/!d; s/.*=//' config.conf)
-whether_launch_new=$(sed '/^whether_launch_new=/!d; s/.*=//' config.conf)
-
-i_type="$i_type_family.$i_type_size"
-
-check_input
-
-if [ ! -f "$key_name.pem" ]; then
- echo "[ERROR] Can not find the key pair file $key_name.pem, please put the $key_name.pem file in this folder"
- exit 0
-else
- chmod 400 ./"$key_name.pem"
-fi
-
-if [ ! -f "./code/benchmark.py" ]; then
- echo "[ERROR] Can not find the benchmark file, please put the benchmark file in code folder"
- exit 0
-fi
-
-
-if [ $whether_launch_new -eq "1" ]
-then
- echo "[INFO] Your instance info:"
- echo "[INFO] Instance key name: $key_name"
- echo "[INFO] Instance count: $count"
- echo "[INFO] Instance_type: $i_type"
-else
- echo "[INFO] The existed instance you choose: $instance_id"
-fi
-
-if [ $whether_launch_new -eq "1" ]
-then
- if [ $vendor -eq "1" ]
- then
- create_AWS_instance
- elif [ $vendor -eq "2" ]
- then
- create_Ali_Yun_instance
- else
- echo "Tencent Cloud"
- fi
-else
- if [ $vendor -eq "1" ]
- then
- aws ec2 start-instances --instance-ids $instance_id
- echo "[INFO] Waiting for instance to Start ..."
- echo "[INFO] 15s left ..."
- sleep 5s
- echo "[INFO] 10s left ..."
- sleep 5s
- echo "[INFO] 5s left ..."
- sleep 5s
- elif [ $vendor -eq "2" ]
- then
- aliyun ecs StartInstance --InstanceId $instance_id
- echo "[INFO] Waiting for instance to Start ..."
- echo "[INFO] 45s left ..."
- sleep 15s
- echo "[INFO] 30s left ..."
- sleep 15s
- echo "[INFO] 15s left ..."
- sleep 15s
- else
- echo "Tencent Cloud"
- fi
-fi
-
-if [ $vendor -eq "1" ]
-then
- connect_AWS
-elif [ $vendor -eq "2" ]
-then
- connect_Ali_Yun
-else
- echo "Tencent Cloud"
-fi
-
-if [ $vendor -eq "1" ]
-then
- close_AWS
-elif [ $vendor -eq "2" ]
-then
- close_Ali_Yun
-else
- echo "Tencent Cloud"
-fi
-
-exit 0
-
-}
-
-main
-
-
-
-
diff --git a/neural_coder/docs/cloud_autobench/code/config.sh b/neural_coder/docs/cloud_autobench/code/config.sh
deleted file mode 100644
index 10e9449a12f..00000000000
--- a/neural_coder/docs/cloud_autobench/code/config.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -x
-
-echo "[INFO] Start installing software packages and dependencies"
-
-# install machine packages
-sudo apt-get -y update
-sudo apt-get install -y wget
-sudo apt-get install -y git
-sudo apt-get install -y build-essential
-sudo apt-get install -y htop aha html2text numactl bc
-sudo apt-get install -y ffmpeg libsm6 libxext6
-sudo apt-get install -y automake libtool
-sudo apt-get install -y python3 pip
-
-# install conda
-wget https://repo.continuum.io/archive/Anaconda3-5.0.0-Linux-x86_64.sh -O anaconda3.sh
-chmod +x anaconda3.sh
-sudo ./anaconda3.sh -b -p /home/anaconda3
-export PATH=/home/anaconda3/bin:$PATH
-conda create -yn test python=3.9
-source activate test
-
-# install pip modules
-pip install numpy
-pip install pyyaml
-pip install typing_extensions
-pip install psutil
-pip install neural_compressor intel_extension_for_pytorch
-
-# install torch
-pip3 install torch torchvision torchaudio
-pip3 install torchdynamo
-pip3 install transformers
diff --git a/neural_coder/docs/cloud_autobench/code/launch.sh b/neural_coder/docs/cloud_autobench/code/launch.sh
deleted file mode 100644
index 1521301e1c3..00000000000
--- a/neural_coder/docs/cloud_autobench/code/launch.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -x
-
-export PATH=/home/anaconda3/bin:$PATH
-source activate test
-echo "[INFO] Start running auto benchmark..."
-python -c "from neural_coder import superreport; superreport(code='resnet50.py')"
-# Note: you need to uncomment superreport in neural_coder/interface.py and neural_coder/__init__.py to use this API.
diff --git a/neural_coder/docs/cloud_autobench/code/resnet50.py b/neural_coder/docs/cloud_autobench/code/resnet50.py
deleted file mode 100644
index 13caa61334a..00000000000
--- a/neural_coder/docs/cloud_autobench/code/resnet50.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-import torchvision.models as models
-
-model = models.resnet50(pretrained=True)
-model.eval()
-batch_size = 1
-input = torch.rand(batch_size, 3, 224, 224)
-with torch.no_grad():
- model(input)
diff --git a/neural_coder/docs/cloud_autobench/config.conf b/neural_coder/docs/cloud_autobench/config.conf
deleted file mode 100644
index d598178debb..00000000000
--- a/neural_coder/docs/cloud_autobench/config.conf
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This is an example of providing Cloud configs for cloud benchmark
-
-# Choose Cloud Vendor
-# 1 AWS 2 Ali Yun 3 Tencent Cloud
-cloud_vendor=1
-
-# Choose Instance Operating System
-# 1 Ubuntu 2 Amazon Linux 3 Red Hat 4 SUSE Linux
-OS=1
-
-# Choose Instance CPU architecture
-# 1 x86 2 ARM
-arch=1
-
-# Number of Instances need to be launched
-count=1
-
-# Family of instance type
-i_type_family=c6i
-
-# Size of instance type
-i_type_size=2xlarge
-
-# Private key name
-key_name=huangyu
-
-# Instance id (Required if use an existing instance)
-instance_id=i-0ecdba84ca1ea5d68
-
-# Security Group id AWS (Required)
-security_id_aws=sg-00139ed15a90e83e0
-
-# Subnet id AWS (Required)
-subnet_id_aws=subnet-010cad6a296e44f8b
-
-# Security Group id Ali Yun (Required)
-security_id_ali=sg-m5e61j9zh9hivx3k9iwp
-
-# RegionId Ali Yun (Required)
-region_id_ali=cn-qingdao
-
-# Whether retain the instance after this task
-# 1 retain 2 Not retain
-whether_retain=2
-
-# Create a new instance or use an existing instance
-# 1 Creat new 2 Use an existing one
-whether_launch_new=2
diff --git a/neural_coder/docs/cloud_autobench/superbench_report_aws_icx.pdf b/neural_coder/docs/cloud_autobench/superbench_report_aws_icx.pdf
deleted file mode 100644
index cdb6d01608c..00000000000
Binary files a/neural_coder/docs/cloud_autobench/superbench_report_aws_icx.pdf and /dev/null differ
diff --git a/neural_coder/docs/release_notes/v0.4.md b/neural_coder/docs/release_notes/v0.4.md
deleted file mode 100644
index 933c02900a3..00000000000
--- a/neural_coder/docs/release_notes/v0.4.md
+++ /dev/null
@@ -1,25 +0,0 @@
-v0.4
-=====
-
-## Highlights
-- **Visual Studio Code extension**: We are delighted to announce the release of Neural Coder's [Visual Studio Code extension](https://marketplace.visualstudio.com/items?itemName=IntelNeuralCompressor.neural-coder-ext-vscode). VS Code programmers can enjoy one-click automatic enabling of Deep Learning optimization API and accelerate their Deep Learning models without manual coding.
-
-- **HuggingFace Transformers**:
- - We supported **all** HuggingFace Transformers [examples](https://github.com/huggingface/transformers/tree/main/examples/pytorch) that calls ```Trainer``` class, and validated over **500** models from HuggingFace Transformers [model hub](https://huggingface.co/models). The models are able to be accelerated automatically with Neural Coder with minimum loss of prediction accuracy.
- - We enabled the support of [HuggingFace Optimum-Intel](https://huggingface.co/docs/optimum/intel/index). User scripts of HuggingFace Transformers models will by default be optimized with Optimum-Intel API to enjoy performance speed-up brought by INT8 quantization.
- - We enabled the support of [Intel® Extension for Transformers](https://github.com/intel/intel-extension-for-transformers), an innovative toolkit to accelerate Transformer-based models on Intel platforms. For more details, please refer to the updated [support matrix](../SupportMatrix.md).
-
-- **Support of BigDL Nano**: We are delighted to announce the collaboration between Neural Coder and [BigDL Nano](https://bigdl.readthedocs.io/en/latest/doc/Nano/index.html). Users can now one-click enable BigDL Nano optimizations for PyTorch in Neural Coder. For detailed support matrix for BigDL Nano features, please refer to this [guide](../BigDLNanoSupport.md).
-
-- **Amazon AWS SageMaker**: We provided a user [tutorial](../AWSSageMakerSupport.md) for installing Neural Coder's JupyterLab extension in AWS SageMaker platform. Users are able to one-click install the extension in Amazon AWS SageMaker with Jupyter 3 and enjoy Neural Coder's functionalities.
-
-- **Python Launcher**: We added the implementation of [Python Launcher](../PythonLauncher.md) usage for Neural Coder, which will be one of the recommended user interfaces in the future as a replacement of Python API. Users can run the Python model code as it is with automatic enabling of Deep Learning optimizations by using Neural Coder's inline Python Launcher design: ```-m neural_coder```.
-
-- **Device Detection**: We enabled the capability of detecting running device and its ISA automatically and adjusting applied optimization features accordingly. For instance, when running Neural Coder on Intel GPU instead of Intel CPU, the PyTorch Mixed Precision optimization feature will adapt ```xpu``` instead of ```cpu```, and ```torch.half``` instead of ```torch.bfloat16```.
-
-## Others
-- **INT8 Accuracy Evaluation**: We enabled accuracy evaluation for INT8 quantizations in Neural Coder. Users are able to view the accuracy delta for each quantization optimization in Neural Coder's auto-benchmark output log. The calculation is ```acc_delta = (int8_acc - fp32_acc)/(fp32_acc)```.
-
-- **Auto-quantize TensorFlow/Keras scripts**: We enabled the support of auto-quantizing TensorFlow/Keras script-based models with Intel® Neural Compressor. The default quantization scheme will be applied. For more details, please refer to the updated [support matrix](../SupportMatrix.md).
-
-- **Auto-quantize ONNX Runtime scripts**: We enabled the support of auto-quantizing ONNX Runtime script-based models with Intel® Neural Compressor. We support [dynamic quantization](https://github.com/intel/neural-compressor/tree/master/examples/onnxrt#dynamic-quantization), static quantization ([QDQ](https://github.com/intel/neural-compressor/tree/master/examples/onnxrt#tensor-oriented-qdq-format)), and static quantization ([QLinearOps](https://github.com/intel/neural-compressor/tree/master/examples/onnxrt#operator-oriented-with-qlinearops)). For more details, please refer to the updated [support matrix](../SupportMatrix.md).
diff --git a/neural_coder/examples/keras/mnist.py b/neural_coder/examples/keras/mnist.py
deleted file mode 100644
index 22ba9d66876..00000000000
--- a/neural_coder/examples/keras/mnist.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import tensorflow as tf
-import numpy as np
-from tensorflow import keras
-from tensorflow.keras import layers
-import time
-from torch.utils.data import DataLoader
-
-num_classes = 10
-
-def build_dataset():
- # Load the data and split it between train and test sets
- (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
-
- # Scale images to the [0, 1] range
- x_train = x_train.astype("float32") / 255
- x_test = x_test.astype("float32") / 255
- # Make sure images have shape (28, 28, 1)
- x_train = np.expand_dims(x_train, -1)
- x_test = np.expand_dims(x_test, -1)
-
- # convert class vectors to binary class matrices
- y_train = keras.utils.to_categorical(y_train, num_classes)
- y_test = keras.utils.to_categorical(y_test, num_classes)
- return x_train, y_train, x_test, y_test
-
-class Dataset():
- def __init__(self, ):
- _, _ , self.inputs, self.labels = build_dataset()
-
- def __getitem__(self, idx):
- return self.inputs[idx], self.labels[idx]
-
- def __len__(self):
- assert len(self.inputs) == len(self.labels), 'inputs should have equal len with labels'
- return len(self.inputs)
-
-def build_model(x_train, y_train, x_test, y_test):
- if os.path.exists('fp32_model'):
- model = keras.models.load_model('fp32_model')
- return model
- # Model / data parameters
- input_shape = (28, 28, 1)
- model = keras.Sequential(
- [
- keras.Input(shape=input_shape),
- layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
- layers.MaxPooling2D(pool_size=(2, 2)),
- layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
- layers.MaxPooling2D(pool_size=(2, 2)),
- layers.Flatten(),
- layers.Dropout(0.5),
- layers.Dense(num_classes, activation="softmax"),
- ]
- )
-
- batch_size = 128
- epochs = 1
-
- model.compile(loss="categorical_crossentropy", optimizer="adam",
- metrics=["accuracy"], run_eagerly=True)
- model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)
- model.summary()
- if not os.path.exists('fp32_model'):
- model.save('fp32_model')
- return model
-
-def eval_func(model):
- x_train, y_train, x_test, y_test = build_dataset()
- model.compile(metrics=["accuracy"], run_eagerly=False)
- score = model.evaluate(x_test, y_test)
- return score[1]
-
-def main():
- x_train, y_train, x_test, y_test = build_dataset()
- model = build_model(x_train, y_train, x_test, y_test)
- calib_dataloader = DataLoader(Dataset(), batch_size=10)
-
-if __name__ == '__main__':
- main()
diff --git a/neural_coder/examples/nano/resnet18.py b/neural_coder/examples/nano/resnet18.py
deleted file mode 100644
index 8d189bf75b2..00000000000
--- a/neural_coder/examples/nano/resnet18.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-from torchvision.models import resnet18
-
-if __name__ == "__main__":
-
- model_ft = resnet18(pretrained=True)
-
- x = torch.rand(2, 3, 224, 224)
- y_hat = model_ft(x)
- predictions = y_hat.argmax(dim=1)
- print(predictions)
diff --git a/neural_coder/examples/nlp/distilbert.py b/neural_coder/examples/nlp/distilbert.py
deleted file mode 100644
index eab5513a51a..00000000000
--- a/neural_coder/examples/nlp/distilbert.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from transformers import (
- AutoModelForSequenceClassification,
- AutoTokenizer
-)
-
-finetuned_model = "distilbert-base-uncased-finetuned-sst-2-english"
-
-
-class MyDataLoader(object):
- def __init__(self):
- self.tokenizer = AutoTokenizer.from_pretrained(finetuned_model)
- self.sequence = "Shanghai is a beautiful city!"
- self.encoded_input = self.tokenizer(
- self.sequence,
- return_tensors='pt'
- )
- self.label = 1 # negative sentence: 0; positive sentence: 1
- self.batch_size = 1
-
- def __iter__(self):
- yield self.encoded_input, self.label
-
-
-my_nlp_model = AutoModelForSequenceClassification.from_pretrained(
- finetuned_model,
-)
-
-my_nlp_dataloader = MyDataLoader()
-
-output = my_nlp_model(**my_nlp_dataloader.encoded_input)
diff --git a/neural_coder/examples/nlp/run_glue.py b/neural_coder/examples/nlp/run_glue.py
deleted file mode 100644
index f3a8ccfd75c..00000000000
--- a/neural_coder/examples/nlp/run_glue.py
+++ /dev/null
@@ -1,617 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# code source
-# https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py
-
-import logging
-import os
-import random
-import sys
-from dataclasses import dataclass, field
-from typing import Optional
-
-import datasets
-import numpy as np
-from datasets import load_dataset, load_metric
-
-import transformers
-from transformers import (
- AutoConfig,
- AutoModelForSequenceClassification,
- AutoTokenizer,
- DataCollatorWithPadding,
- EvalPrediction,
- HfArgumentParser,
- PretrainedConfig,
- Trainer,
- TrainingArguments,
- default_data_collator,
- set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint
-from transformers.utils import check_min_version, send_example_telemetry
-from transformers.utils.versions import require_version
-
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.21.0")
-
-require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
-
-task_to_keys = {
- "cola": ("sentence", None),
- "mnli": ("premise", "hypothesis"),
- "mrpc": ("sentence1", "sentence2"),
- "qnli": ("question", "sentence"),
- "qqp": ("question1", "question2"),
- "rte": ("sentence1", "sentence2"),
- "sst2": ("sentence", None),
- "stsb": ("sentence1", "sentence2"),
- "wnli": ("sentence1", "sentence2"),
-}
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class DataTrainingArguments:
- """
- Arguments pertaining to what data we are going to input our model for training and eval.
-
- Using `HfArgumentParser` we can turn this class
- into argparse arguments to be able to specify them on
- the command line.
- """
-
- task_name: Optional[str] = field(
- default=None,
- metadata={"help": "The name of the task to train on: " + ", ".join(task_to_keys.keys())},
- )
- dataset_name: Optional[str] = field(
- default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
- )
- dataset_config_name: Optional[str] = field(
- default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
- )
- max_seq_length: int = field(
- default=128,
- metadata={
- "help": (
- "The maximum total input sequence length after tokenization. Sequences longer "
- "than this will be truncated, sequences shorter will be padded."
- )
- },
- )
- overwrite_cache: bool = field(
- default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
- )
- pad_to_max_length: bool = field(
- default=True,
- metadata={
- "help": (
- "Whether to pad all samples to `max_seq_length`. "
- "If False, will pad the samples dynamically when batching to the maximum length in the batch."
- )
- },
- )
- max_train_samples: Optional[int] = field(
- default=None,
- metadata={
- "help": (
- "For debugging purposes or quicker training, truncate the number of training examples to this "
- "value if set."
- )
- },
- )
- max_eval_samples: Optional[int] = field(
- default=None,
- metadata={
- "help": (
- "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
- "value if set."
- )
- },
- )
- max_predict_samples: Optional[int] = field(
- default=None,
- metadata={
- "help": (
- "For debugging purposes or quicker training, truncate the number of prediction examples to this "
- "value if set."
- )
- },
- )
- train_file: Optional[str] = field(
- default=None, metadata={"help": "A csv or a json file containing the training data."}
- )
- validation_file: Optional[str] = field(
- default=None, metadata={"help": "A csv or a json file containing the validation data."}
- )
- test_file: Optional[str] = field(default=None, metadata={"help": "A csv or a json file containing the test data."})
-
- def __post_init__(self):
- if self.task_name is not None:
- self.task_name = self.task_name.lower()
- if self.task_name not in task_to_keys.keys():
- raise ValueError("Unknown task, you should pick one in " + ",".join(task_to_keys.keys()))
- elif self.dataset_name is not None:
- pass
- elif self.train_file is None or self.validation_file is None:
- raise ValueError("Need either a GLUE task, a training/validation file or a dataset name.")
- else:
- train_extension = self.train_file.split(".")[-1]
- assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file."
- validation_extension = self.validation_file.split(".")[-1]
- assert (
- validation_extension == train_extension
- ), "`validation_file` should have the same extension (csv or json) as `train_file`."
-
-
-@dataclass
-class ModelArguments:
- """
- Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
- """
-
- model_name_or_path: str = field(
- metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
- )
- config_name: Optional[str] = field(
- default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
- )
- tokenizer_name: Optional[str] = field(
- default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
- )
- cache_dir: Optional[str] = field(
- default=None,
- metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
- )
- use_fast_tokenizer: bool = field(
- default=True,
- metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
- )
- model_revision: str = field(
- default="main",
- metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
- )
- use_auth_token: bool = field(
- default=False,
- metadata={
- "help": (
- "Will use the token generated when running `transformers-cli login` (necessary to use this script "
- "with private models)."
- )
- },
- )
- ignore_mismatched_sizes: bool = field(
- default=False,
- metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
- )
-
-
-def main():
- # See all possible arguments in src/transformers/training_args.py
- # or by passing the --help flag to this script.
- # We now keep distinct sets of args, for a cleaner separation of concerns.
-
- parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
- if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
- # If we pass only one argument to the script and it's the path to a json file,
- # let's parse it to get our arguments.
- model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
- else:
- model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
- # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
- # information sent is the one passed as arguments along with your Python/PyTorch versions.
- send_example_telemetry("run_glue", model_args, data_args)
-
- # Setup logging
- logging.basicConfig(
- format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
- datefmt="%m/%d/%Y %H:%M:%S",
- handlers=[logging.StreamHandler(sys.stdout)],
- )
-
- log_level = training_args.get_process_log_level()
- logger.setLevel(log_level)
- datasets.utils.logging.set_verbosity(log_level)
- transformers.utils.logging.set_verbosity(log_level)
- transformers.utils.logging.enable_default_handler()
- transformers.utils.logging.enable_explicit_format()
-
- # Log on each process the small summary:
- logger.warning(
- f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
- + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
- )
- logger.info(f"Training/evaluation parameters {training_args}")
-
- # Detecting last checkpoint.
- last_checkpoint = None
- if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
- last_checkpoint = get_last_checkpoint(training_args.output_dir)
- if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
- raise ValueError(
- f"Output directory ({training_args.output_dir}) already exists and is not empty. "
- "Use --overwrite_output_dir to overcome."
- )
- elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
- logger.info(
- f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
- "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
- )
-
- # Set seed before initializing model.
- set_seed(training_args.seed)
-
- # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
- # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
- #
- # For CSV/JSON files, this script will use as labels the column called 'label' and as pair of sentences the
- # sentences in columns called 'sentence1' and 'sentence2' if such column exists or the first two columns not named
- # label if at least two columns are provided.
- #
- # If the CSVs/JSONs contain only one non-label column, the script does single sentence classification on this
- # single column. You can easily tweak this behavior (see below)
- #
- # In distributed training, the load_dataset function guarantee that only one local process can concurrently
- # download the dataset.
- if data_args.task_name is not None:
- # Downloading and loading a dataset from the hub.
- raw_datasets = load_dataset(
- "glue",
- data_args.task_name,
- cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
- )
- elif data_args.dataset_name is not None:
- # Downloading and loading a dataset from the hub.
- raw_datasets = load_dataset(
- data_args.dataset_name,
- data_args.dataset_config_name,
- cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
- )
- else:
- # Loading a dataset from your local files.
- # CSV/JSON training and evaluation files are needed.
- data_files = {"train": data_args.train_file, "validation": data_args.validation_file}
-
- # Get the test dataset: you can provide your own CSV/JSON test file (see below)
- # when you use `do_predict` without specifying a GLUE benchmark task.
- if training_args.do_predict:
- if data_args.test_file is not None:
- train_extension = data_args.train_file.split(".")[-1]
- test_extension = data_args.test_file.split(".")[-1]
- assert (
- test_extension == train_extension
- ), "`test_file` should have the same extension (csv or json) as `train_file`."
- data_files["test"] = data_args.test_file
- else:
- raise ValueError("Need either a GLUE task or a test file for `do_predict`.")
-
- for key in data_files.keys():
- logger.info(f"load a local file for {key}: {data_files[key]}")
-
- if data_args.train_file.endswith(".csv"):
- # Loading a dataset from local csv files
- raw_datasets = load_dataset(
- "csv",
- data_files=data_files,
- cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
- )
- else:
- # Loading a dataset from local json files
- raw_datasets = load_dataset(
- "json",
- data_files=data_files,
- cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
- )
- # See more about loading any type of standard or custom dataset at
- # https://huggingface.co/docs/datasets/loading_datasets.html.
-
- # Labels
- if data_args.task_name is not None:
- is_regression = data_args.task_name == "stsb"
- if not is_regression:
- label_list = raw_datasets["train"].features["label"].names
- num_labels = len(label_list)
- else:
- num_labels = 1
- else:
- # Trying to have good defaults here, don't hesitate to tweak to your needs.
- is_regression = raw_datasets["train"].features["label"].dtype in ["float32", "float64"]
- if is_regression:
- num_labels = 1
- else:
- # A useful fast method:
- # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique
- label_list = raw_datasets["train"].unique("label")
- label_list.sort() # Let's sort it for determinism
- num_labels = len(label_list)
-
- # Load pretrained model and tokenizer
- #
- # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
- # download model & vocab.
- config = AutoConfig.from_pretrained(
- model_args.config_name if model_args.config_name else model_args.model_name_or_path,
- num_labels=num_labels,
- finetuning_task=data_args.task_name,
- cache_dir=model_args.cache_dir,
- revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
- )
- tokenizer = AutoTokenizer.from_pretrained(
- model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
- cache_dir=model_args.cache_dir,
- use_fast=model_args.use_fast_tokenizer,
- revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
- )
- model = AutoModelForSequenceClassification.from_pretrained(
- model_args.model_name_or_path,
- from_tf=bool(".ckpt" in model_args.model_name_or_path),
- config=config,
- cache_dir=model_args.cache_dir,
- revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
- ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
- )
-
- # Preprocessing the raw_datasets
- if data_args.task_name is not None:
- sentence1_key, sentence2_key = task_to_keys[data_args.task_name]
- else:
- # Again, we try to have some nice defaults but don't hesitate to tweak to your use case.
- non_label_column_names = [name for name in raw_datasets["train"].column_names if name != "label"]
- if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names:
- sentence1_key, sentence2_key = "sentence1", "sentence2"
- else:
- if len(non_label_column_names) >= 2:
- sentence1_key, sentence2_key = non_label_column_names[:2]
- else:
- sentence1_key, sentence2_key = non_label_column_names[0], None
-
- # Padding strategy
- if data_args.pad_to_max_length:
- padding = "max_length"
- else:
- # We will pad later, dynamically at batch creation, to the max sequence length in each batch
- padding = False
-
- # Some models have set the order of the labels to use, so let's make sure we do use it.
- label_to_id = None
- if (
- model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id
- and data_args.task_name is not None
- and not is_regression
- ):
- # Some have all caps in their config, some don't.
- label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
- if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
- label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
- else:
- logger.warning(
- "Your model seems to have been trained with labels, but they don't match the dataset: ",
- f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
- "\nIgnoring the model labels as a result.",
- )
- elif data_args.task_name is None and not is_regression:
- label_to_id = {v: i for i, v in enumerate(label_list)}
-
- if label_to_id is not None:
- model.config.label2id = label_to_id
- model.config.id2label = {id: label for label, id in config.label2id.items()}
- elif data_args.task_name is not None and not is_regression:
- model.config.label2id = {l: i for i, l in enumerate(label_list)}
- model.config.id2label = {id: label for label, id in config.label2id.items()}
-
- if data_args.max_seq_length > tokenizer.model_max_length:
- logger.warning(
- f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
- f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
- )
- max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
-
- def preprocess_function(examples):
- # Tokenize the texts
- args = (
- (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key])
- )
- result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True)
-
- # Map labels to IDs (not necessary for GLUE tasks)
- if label_to_id is not None and "label" in examples:
- result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]]
- return result
-
- with training_args.main_process_first(desc="dataset map pre-processing"):
- raw_datasets = raw_datasets.map(
- preprocess_function,
- batched=True,
- load_from_cache_file=not data_args.overwrite_cache,
- desc="Running tokenizer on dataset",
- )
- if training_args.do_train:
- if "train" not in raw_datasets:
- raise ValueError("--do_train requires a train dataset")
- train_dataset = raw_datasets["train"]
- if data_args.max_train_samples is not None:
- max_train_samples = min(len(train_dataset), data_args.max_train_samples)
- train_dataset = train_dataset.select(range(max_train_samples))
-
- if training_args.do_eval:
- if "validation" not in raw_datasets and "validation_matched" not in raw_datasets:
- raise ValueError("--do_eval requires a validation dataset")
- eval_dataset = raw_datasets["validation_matched" if data_args.task_name == "mnli" else "validation"]
- if data_args.max_eval_samples is not None:
- max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
- eval_dataset = eval_dataset.select(range(max_eval_samples))
-
- if training_args.do_predict or data_args.task_name is not None or data_args.test_file is not None:
- if "test" not in raw_datasets and "test_matched" not in raw_datasets:
- raise ValueError("--do_predict requires a test dataset")
- predict_dataset = raw_datasets["test_matched" if data_args.task_name == "mnli" else "test"]
- if data_args.max_predict_samples is not None:
- max_predict_samples = min(len(predict_dataset), data_args.max_predict_samples)
- predict_dataset = predict_dataset.select(range(max_predict_samples))
-
- # Log a few random samples from the training set:
- if training_args.do_train:
- for index in random.sample(range(len(train_dataset)), 3):
- logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")
-
- # Get the metric function
- if data_args.task_name is not None:
- metric = load_metric("glue", data_args.task_name)
- else:
- metric = load_metric("accuracy")
-
- # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
- # predictions and label_ids field) and has to return a dictionary string to float.
- def compute_metrics(p: EvalPrediction):
- preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
- preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1)
- if data_args.task_name is not None:
- result = metric.compute(predictions=preds, references=p.label_ids)
- if len(result) > 1:
- result["combined_score"] = np.mean(list(result.values())).item()
- return result
- elif is_regression:
- return {"mse": ((preds - p.label_ids) ** 2).mean().item()}
- else:
- return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}
-
- # Data collator will default to DataCollatorWithPadding when the tokenizer is passed to Trainer, so we change it if
- # we already did the padding.
- if data_args.pad_to_max_length:
- data_collator = default_data_collator
- elif training_args.fp16:
- data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)
- else:
- data_collator = None
-
- # Initialize our Trainer
- trainer = Trainer(
- model=model,
- args=training_args,
- train_dataset=train_dataset if training_args.do_train else None,
- eval_dataset=eval_dataset if training_args.do_eval else None,
- compute_metrics=compute_metrics,
- tokenizer=tokenizer,
- data_collator=data_collator,
- )
-
- # Training
- if training_args.do_train:
- checkpoint = None
- if training_args.resume_from_checkpoint is not None:
- checkpoint = training_args.resume_from_checkpoint
- elif last_checkpoint is not None:
- checkpoint = last_checkpoint
- train_result = trainer.train(resume_from_checkpoint=checkpoint)
- metrics = train_result.metrics
- max_train_samples = (
- data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
- )
- metrics["train_samples"] = min(max_train_samples, len(train_dataset))
-
- trainer.save_model() # Saves the tokenizer too for easy upload
-
- trainer.log_metrics("train", metrics)
- trainer.save_metrics("train", metrics)
- trainer.save_state()
-
- # Evaluation
- if training_args.do_eval:
- logger.info("*** Evaluate ***")
-
- # Loop to handle MNLI double evaluation (matched, mis-matched)
- tasks = [data_args.task_name]
- eval_datasets = [eval_dataset]
- if data_args.task_name == "mnli":
- tasks.append("mnli-mm")
- eval_datasets.append(raw_datasets["validation_mismatched"])
- combined = {}
-
- for eval_dataset, task in zip(eval_datasets, tasks):
- metrics = trainer.evaluate(eval_dataset=eval_dataset)
-
- max_eval_samples = (
- data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
- )
- metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
-
- if task == "mnli-mm":
- metrics = {k + "_mm": v for k, v in metrics.items()}
- if task is not None and "mnli" in task:
- combined.update(metrics)
-
- trainer.log_metrics("eval", metrics)
- trainer.save_metrics("eval", combined if task is not None and "mnli" in task else metrics)
-
- if training_args.do_predict:
- logger.info("*** Predict ***")
-
- # Loop to handle MNLI double evaluation (matched, mis-matched)
- tasks = [data_args.task_name]
- predict_datasets = [predict_dataset]
- if data_args.task_name == "mnli":
- tasks.append("mnli-mm")
- predict_datasets.append(raw_datasets["test_mismatched"])
-
- for predict_dataset, task in zip(predict_datasets, tasks):
- # Removing the `label` columns because it contains -1 and Trainer won't like that.
- predict_dataset = predict_dataset.remove_columns("label")
- predictions = trainer.predict(predict_dataset, metric_key_prefix="predict").predictions
- predictions = np.squeeze(predictions) if is_regression else np.argmax(predictions, axis=1)
-
- output_predict_file = os.path.join(training_args.output_dir, f"predict_results_{task}.txt")
- if trainer.is_world_process_zero():
- with open(output_predict_file, "w") as writer:
- logger.info(f"***** Predict results {task} *****")
- writer.write("index\tprediction\n")
- for index, item in enumerate(predictions):
- if is_regression:
- writer.write(f"{index}\t{item:3.3f}\n")
- else:
- item = label_list[item]
- writer.write(f"{index}\t{item}\n")
-
- kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"}
- if data_args.task_name is not None:
- kwargs["language"] = "en"
- kwargs["dataset_tags"] = "glue"
- kwargs["dataset_args"] = data_args.task_name
- kwargs["dataset"] = f"GLUE {data_args.task_name.upper()}"
-
- if training_args.push_to_hub:
- trainer.push_to_hub(**kwargs)
- else:
- trainer.create_model_card(**kwargs)
-
-
-def _mp_fn(index):
- # For xla_spawn (TPUs)
- main()
-
-
-if __name__ == "__main__":
- main()
diff --git a/neural_coder/examples/onnx/onnx_model.py b/neural_coder/examples/onnx/onnx_model.py
deleted file mode 100644
index 8e2c7b2d928..00000000000
--- a/neural_coder/examples/onnx/onnx_model.py
+++ /dev/null
@@ -1,211 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import logging
-import argparse
-
-import onnx
-import yaml
-
-from pycocotools.coco import COCO
-from pycocotools.mask import iou, encode
-import numpy as np
-from torchvision import transforms
-from PIL import Image
-from onnx import numpy_helper
-import os
-import onnxruntime
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
- datefmt = '%m/%d/%Y %H:%M:%S',
- level = logging.WARN)
-logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
-parser = argparse.ArgumentParser(
- formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-parser.add_argument(
- '--model_path',
- type=str,
- help="Pre-trained model on onnx file"
-)
-parser.add_argument(
- '--label_path',
- type=str,
- help="Annotation file path"
-)
-parser.add_argument(
- '--data_path',
- type=str,
- help="Path to val2017 of COCO"
-)
-parser.add_argument(
- '--benchmark',
- action='store_true', \
- default=False
-)
-parser.add_argument(
- '--tune',
- action='store_true', \
- default=False,
- help="whether quantize the model"
-)
-parser.add_argument(
- '--config',
- type=str,
- help="config yaml path"
-)
-parser.add_argument(
- '--output_model',
- type=str,
- help="output model path"
-)
-parser.add_argument(
- '--mode',
- type=str,
- help="benchmark mode of performance or accuracy"
-)
-args = parser.parse_args()
-
-# key = COCO id, value = Pascal VOC id
-COCO_TO_VOC = {
- 1: 15, # person
- 2: 2, # bicycle
- 3: 7, # car
- 4: 14, # motorbike
- 5: 1, # airplane
- 6: 6, # bus
- 7: 19, # train
- 9: 4, # boat
- 16: 3, # bird
- 17: 8, # cat
- 18: 12, # dog
- 19: 13, # horse
- 20: 17, # sheep
- 21: 10, # cow
- 44: 5, # bottle
- 62: 9, # chair
- 63: 18, # couch/sofa
- 64: 16, # potted plant
- 67: 11, # dining table
- 72: 20, # tv
-}
-VOC_CAT_IDS = list(COCO_TO_VOC.keys())
-cocoGt = COCO(str(args.label_path))
-
-preprocess = transforms.Compose([
- transforms.ToTensor(),
- transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-])
-
-class Dataset:
- def __init__(self):
- imgIds = self.getImgIdsUnion(cocoGt, VOC_CAT_IDS)
- self.data = []
- for imgId in imgIds:
- img_path = os.path.join(args.data_path, cocoGt.imgs[imgId]['file_name'])
- if os.path.exists(img_path):
- input_tensor = self.load_image(img_path)
-
- _, height, width = input_tensor.shape
- output_tensor = np.zeros((21, height, width), dtype=np.uint8)
-
- annIds = cocoGt.getAnnIds(imgId, VOC_CAT_IDS)
- for ann in cocoGt.loadAnns(annIds):
- mask = cocoGt.annToMask(ann)
- output_tensor[COCO_TO_VOC[ann['category_id']]] |= mask
-
- # Set everything not labeled to be background
- output_tensor[0] = 1 - np.max(output_tensor, axis=0)
- self.data.append((input_tensor, output_tensor))
-
- def __len__(self):
- return len(self.data)
-
- def __getitem__(self, index):
- return self.data[index]
-
- def getImgIdsUnion(self, gt, catIds):
- """
- Returns all the images that have *any* of the categories in `catIds`,
- unlike the built-in `gt.getImgIds` which returns all the images containing
- *all* of the categories in `catIds`.
- """
- imgIds = set()
- for catId in catIds:
- imgIds |= set(gt.catToImgs[catId])
- return list(imgIds)
-
- def load_image(self, img_path):
- input_image = Image.open(img_path).convert('RGB')
- input_tensor = preprocess(input_image)
- input_tensor = input_tensor.detach().cpu().numpy()
- return input_tensor
-
-def iou(model_tensor, target_tensor):
- # Don't include the background when summing
- model_tensor = model_tensor[:, 1:, :, :]
- target_tensor = target_tensor[:, 1:, :, :]
-
- intersection = np.sum(np.logical_and(model_tensor, target_tensor))
- union = np.sum(np.logical_or(model_tensor, target_tensor))
-
- if union == 0:
- # Can only happen if nothing was there and nothing was predicted,
- # which is a perfect score
- return 1
- else:
- return intersection / union
-
-def evaluate(model, dataloader):
- totalIoU = 0
- sess = onnxruntime.InferenceSession(model.SerializeToString(), None)
- idx = 1
- for input_tensor, target_tensor in dataloader:
- input_tensor = input_tensor[np.newaxis, ...]
- target_tensor = target_tensor[np.newaxis, ...]
- model_tensor = sess.run(["out"], {"input": input_tensor})[0]
-
- batch_size, nclasses, height, width = model_tensor.shape
- raw_labels = np.argmax(model_tensor, axis=1).astype(np.uint8)
-
- output_tensor = np.zeros((nclasses, batch_size, height, width), dtype=np.uint8)
- for c in range(nclasses):
- output_tensor[c][raw_labels==c] = 1
-
- output_tensor = np.transpose(output_tensor, [1, 0, 2, 3])
- totalIoU += iou(output_tensor, target_tensor)
- idx += 1
- return totalIoU / idx
-
-if __name__ == "__main__":
- from neural_compressor.experimental import common
- ds = Dataset()
- dataloader = common.DataLoader(ds)
- model = onnx.load(args.model_path)
- def eval(model):
- return evaluate(model, ds)
-
- if args.benchmark and args.mode == "accuracy":
- results = eval(model)
- print("Batch size = 1")
- print("Accuracy: %.5f" % results)
-
- if args.benchmark and args.mode == "performance":
- from neural_compressor.experimental import Benchmark, common
- evaluator = Benchmark(args.config)
- evaluator.model = common.Model(model)
- evaluator.b_dataloader = common.DataLoader(ds)
- evaluator(args.mode)
diff --git a/neural_coder/examples/vision/alexnet.py b/neural_coder/examples/vision/alexnet.py
deleted file mode 100644
index 10ac6d8b080..00000000000
--- a/neural_coder/examples/vision/alexnet.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-import torchvision.models as models
-model = models.alexnet(pretrained=True)
-model.eval()
-batch_size = 1
-input = torch.rand(batch_size, 3, 224, 224)
-with torch.no_grad():
- model(input)
diff --git a/neural_coder/examples/vision/main.py b/neural_coder/examples/vision/main.py
deleted file mode 100644
index 3b9bfd65298..00000000000
--- a/neural_coder/examples/vision/main.py
+++ /dev/null
@@ -1,504 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# code source
-# https://github.com/pytorch/examples/blob/main/imagenet/main.py
-
-import argparse
-import os
-import random
-import shutil
-import time
-import warnings
-from enum import Enum
-
-import torch
-import torch.nn as nn
-import torch.nn.parallel
-import torch.backends.cudnn as cudnn
-import torch.distributed as dist
-import torch.optim
-from torch.optim.lr_scheduler import StepLR
-import torch.multiprocessing as mp
-import torch.utils.data
-import torch.utils.data.distributed
-import torchvision.transforms as transforms
-import torchvision.datasets as datasets
-import torchvision.models as models
-from torch.utils.data import Subset
-
-model_names = models.list_models(module=models)
-
-parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
-parser.add_argument('data', metavar='DIR', nargs='?', default='imagenet',
- help='path to dataset (default: imagenet)')
-parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
- choices=model_names,
- help='model architecture: ' +
- ' | '.join(model_names) +
- ' (default: resnet18)')
-parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
- help='number of data loading workers (default: 4)')
-parser.add_argument('--epochs', default=90, type=int, metavar='N',
- help='number of total epochs to run')
-parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
- help='manual epoch number (useful on restarts)')
-parser.add_argument('-b', '--batch-size', default=256, type=int,
- metavar='N',
- help='mini-batch size (default: 256), this is the total '
- 'batch size of all GPUs on the current node when '
- 'using Data Parallel or Distributed Data Parallel')
-parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
- metavar='LR', help='initial learning rate', dest='lr')
-parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
- help='momentum')
-parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
- metavar='W', help='weight decay (default: 1e-4)',
- dest='weight_decay')
-parser.add_argument('-p', '--print-freq', default=10, type=int,
- metavar='N', help='print frequency (default: 10)')
-parser.add_argument('--resume', default='', type=str, metavar='PATH',
- help='path to latest checkpoint (default: none)')
-parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
- help='evaluate model on validation set')
-parser.add_argument('--pretrained', dest='pretrained', action='store_true',
- help='use pre-trained model')
-parser.add_argument('--world-size', default=-1, type=int,
- help='number of nodes for distributed training')
-parser.add_argument('--rank', default=-1, type=int,
- help='node rank for distributed training')
-parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
- help='url used to set up distributed training')
-parser.add_argument('--dist-backend', default='nccl', type=str,
- help='distributed backend')
-parser.add_argument('--seed', default=None, type=int,
- help='seed for initializing training. ')
-parser.add_argument('--gpu', default=None, type=int,
- help='GPU id to use.')
-parser.add_argument('--multiprocessing-distributed', action='store_true',
- help='Use multi-processing distributed training to launch '
- 'N processes per node, which has N GPUs. This is the '
- 'fastest way to use PyTorch for either single node or '
- 'multi node data parallel training')
-parser.add_argument('--dummy', action='store_true', help="use fake data to benchmark")
-
-best_acc1 = 0
-
-
-def main():
- args = parser.parse_args()
-
- if args.seed is not None:
- random.seed(args.seed)
- torch.manual_seed(args.seed)
- cudnn.deterministic = True
- warnings.warn('You have chosen to seed training. '
- 'This will turn on the CUDNN deterministic setting, '
- 'which can slow down your training considerably! '
- 'You may see unexpected behavior when restarting '
- 'from checkpoints.')
-
- if args.gpu is not None:
- warnings.warn('You have chosen a specific GPU. This will completely '
- 'disable data parallelism.')
-
- if args.dist_url == "env://" and args.world_size == -1:
- args.world_size = int(os.environ["WORLD_SIZE"])
-
- args.distributed = args.world_size > 1 or args.multiprocessing_distributed
-
- ngpus_per_node = torch.cuda.device_count()
- if args.multiprocessing_distributed:
- # Since we have ngpus_per_node processes per node, the total world_size
- # needs to be adjusted accordingly
- args.world_size = ngpus_per_node * args.world_size
- # Use torch.multiprocessing.spawn to launch distributed processes: the
- # main_worker process function
- mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
- else:
- # Simply call main_worker function
- main_worker(args.gpu, ngpus_per_node, args)
-
-
-def main_worker(gpu, ngpus_per_node, args):
- global best_acc1
- args.gpu = gpu
-
- if args.gpu is not None:
- print("Use GPU: {} for training".format(args.gpu))
-
- if args.distributed:
- if args.dist_url == "env://" and args.rank == -1:
- args.rank = int(os.environ["RANK"])
- if args.multiprocessing_distributed:
- # For multiprocessing distributed training, rank needs to be the
- # global rank among all the processes
- args.rank = args.rank * ngpus_per_node + gpu
- dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
- world_size=args.world_size, rank=args.rank)
- # create model
- if args.pretrained:
- print("=> using pre-trained model '{}'".format(args.arch))
- model = models.__dict__[args.arch](pretrained=True)
- else:
- print("=> creating model '{}'".format(args.arch))
- model = models.__dict__[args.arch]()
-
- if not torch.cuda.is_available():
- print('using CPU, this will be slow')
- elif args.distributed:
- # For multiprocessing distributed, DistributedDataParallel constructor
- # should always set the single device scope, otherwise,
- # DistributedDataParallel will use all available devices.
- if args.gpu is not None:
- torch.cuda.set_device(args.gpu)
- model.cuda(args.gpu)
- # When using a single GPU per process and per
- # DistributedDataParallel, we need to divide the batch size
- # ourselves based on the total number of GPUs of the current node.
- args.batch_size = int(args.batch_size / ngpus_per_node)
- args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
- model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
- else:
- model.cuda()
- # DistributedDataParallel will divide and allocate batch_size to all
- # available GPUs if device_ids are not set
- model = torch.nn.parallel.DistributedDataParallel(model)
- elif args.gpu is not None:
- torch.cuda.set_device(args.gpu)
- model = model.cuda(args.gpu)
- else:
- # DataParallel will divide and allocate batch_size to all available GPUs
- if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
- model.features = torch.nn.DataParallel(model.features)
- model.cuda()
- else:
- model = torch.nn.DataParallel(model).cuda()
-
- # define loss function (criterion), optimizer, and learning rate scheduler
- criterion = nn.CrossEntropyLoss().cuda(args.gpu)
-
- optimizer = torch.optim.SGD(model.parameters(), args.lr,
- momentum=args.momentum,
- weight_decay=args.weight_decay)
-
- """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
- scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
-
- # optionally resume from a checkpoint
- if args.resume:
- if os.path.isfile(args.resume):
- print("=> loading checkpoint '{}'".format(args.resume))
- if args.gpu is None:
- checkpoint = torch.load(args.resume)
- else:
- # Map model to be loaded to specified single gpu.
- loc = 'cuda:{}'.format(args.gpu)
- checkpoint = torch.load(args.resume, map_location=loc)
- args.start_epoch = checkpoint['epoch']
- best_acc1 = checkpoint['best_acc1']
- if args.gpu is not None:
- # best_acc1 may be from a checkpoint from a different GPU
- best_acc1 = best_acc1.to(args.gpu)
- model.load_state_dict(checkpoint['state_dict'])
- optimizer.load_state_dict(checkpoint['optimizer'])
- scheduler.load_state_dict(checkpoint['scheduler'])
- print("=> loaded checkpoint '{}' (epoch {})"
- .format(args.resume, checkpoint['epoch']))
- else:
- print("=> no checkpoint found at '{}'".format(args.resume))
-
- cudnn.benchmark = True
-
- # Data loading code
- if args.dummy:
- print("=> Dummy data is used!")
- train_dataset = datasets.FakeData(1281167, (3, 224, 224), 1000, transforms.ToTensor())
- val_dataset = datasets.FakeData(50000, (3, 224, 224), 1000, transforms.ToTensor())
- else:
- traindir = os.path.join(args.data, 'train')
- valdir = os.path.join(args.data, 'val')
- normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
- std=[0.229, 0.224, 0.225])
-
- train_dataset = datasets.ImageFolder(
- traindir,
- transforms.Compose([
- transforms.RandomResizedCrop(224),
- transforms.RandomHorizontalFlip(),
- transforms.ToTensor(),
- normalize,
- ]))
-
- val_dataset = datasets.ImageFolder(
- valdir,
- transforms.Compose([
- transforms.Resize(256),
- transforms.CenterCrop(224),
- transforms.ToTensor(),
- normalize,
- ]))
-
- if args.distributed:
- train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
- val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False, drop_last=True)
- else:
- train_sampler = None
- val_sampler = None
-
- train_loader = torch.utils.data.DataLoader(
- train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
- num_workers=args.workers, pin_memory=True, sampler=train_sampler)
-
- val_loader = torch.utils.data.DataLoader(
- val_dataset, batch_size=args.batch_size, shuffle=False,
- num_workers=args.workers, pin_memory=True, sampler=val_sampler)
-
- if args.evaluate:
- validate(val_loader, model, criterion, args)
- return
-
- for epoch in range(args.start_epoch, args.epochs):
- if args.distributed:
- train_sampler.set_epoch(epoch)
-
- # train for one epoch
- train(train_loader, model, criterion, optimizer, epoch, args)
-
- # evaluate on validation set
- acc1 = validate(val_loader, model, criterion, args)
-
- scheduler.step()
-
-
- # remember best acc@1 and save checkpoint
- is_best = acc1 > best_acc1
- best_acc1 = max(acc1, best_acc1)
-
- if not args.multiprocessing_distributed or (args.multiprocessing_distributed
- and args.rank % ngpus_per_node == 0):
- save_checkpoint({
- 'epoch': epoch + 1,
- 'arch': args.arch,
- 'state_dict': model.state_dict(),
- 'best_acc1': best_acc1,
- 'optimizer' : optimizer.state_dict(),
- 'scheduler' : scheduler.state_dict()
- }, is_best)
-
-
-def train(train_loader, model, criterion, optimizer, epoch, args):
- batch_time = AverageMeter('Time', ':6.3f')
- data_time = AverageMeter('Data', ':6.3f')
- losses = AverageMeter('Loss', ':.4e')
- top1 = AverageMeter('Acc@1', ':6.2f')
- top5 = AverageMeter('Acc@5', ':6.2f')
- progress = ProgressMeter(
- len(train_loader),
- [batch_time, data_time, losses, top1, top5],
- prefix="Epoch: [{}]".format(epoch))
-
- # switch to train mode
- model.train()
-
- end = time.time()
- for i, (images, target) in enumerate(train_loader):
- # measure data loading time
- data_time.update(time.time() - end)
-
- if args.gpu is not None:
- images = images.cuda(args.gpu, non_blocking=True)
- if torch.cuda.is_available():
- target = target.cuda(args.gpu, non_blocking=True)
-
- # compute output
- output = model(images)
- loss = criterion(output, target)
-
- # measure accuracy and record loss
- acc1, acc5 = accuracy(output, target, topk=(1, 5))
- losses.update(loss.item(), images.size(0))
- top1.update(acc1[0], images.size(0))
- top5.update(acc5[0], images.size(0))
-
- # compute gradient and do SGD step
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
-
- # measure elapsed time
- batch_time.update(time.time() - end)
- end = time.time()
-
- if i % args.print_freq == 0:
- progress.display(i + 1)
-
-
-def validate(val_loader, model, criterion, args):
-
- def run_validate(loader, base_progress=0):
- with torch.no_grad():
- end = time.time()
- for i, (images, target) in enumerate(loader):
- i = base_progress + i
- if args.gpu is not None:
- images = images.cuda(args.gpu, non_blocking=True)
- if torch.cuda.is_available():
- target = target.cuda(args.gpu, non_blocking=True)
-
- # compute output
- output = model(images)
- loss = criterion(output, target)
-
- # measure accuracy and record loss
- acc1, acc5 = accuracy(output, target, topk=(1, 5))
- losses.update(loss.item(), images.size(0))
- top1.update(acc1[0], images.size(0))
- top5.update(acc5[0], images.size(0))
-
- # measure elapsed time
- batch_time.update(time.time() - end)
- end = time.time()
-
- if i % args.print_freq == 0:
- progress.display(i + 1)
-
- batch_time = AverageMeter('Time', ':6.3f', Summary.NONE)
- losses = AverageMeter('Loss', ':.4e', Summary.NONE)
- top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE)
- top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE)
- progress = ProgressMeter(
- len(val_loader) + (args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset))),
- [batch_time, losses, top1, top5],
- prefix='Test: ')
-
- # switch to evaluate mode
- model.eval()
-
- run_validate(val_loader)
- if args.distributed:
- top1.all_reduce()
- top5.all_reduce()
-
- if args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset)):
- aux_val_dataset = Subset(val_loader.dataset,
- range(len(val_loader.sampler) * args.world_size, len(val_loader.dataset)))
- aux_val_loader = torch.utils.data.DataLoader(
- aux_val_dataset, batch_size=args.batch_size, shuffle=False,
- num_workers=args.workers, pin_memory=True)
- run_validate(aux_val_loader, len(val_loader))
-
- progress.display_summary()
-
- return top1.avg
-
-
-def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
- torch.save(state, filename)
- if is_best:
- shutil.copyfile(filename, 'model_best.pth.tar')
-
-class Summary(Enum):
- NONE = 0
- AVERAGE = 1
- SUM = 2
- COUNT = 3
-
-class AverageMeter(object):
- """Computes and stores the average and current value"""
- def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE):
- self.name = name
- self.fmt = fmt
- self.summary_type = summary_type
- self.reset()
-
- def reset(self):
- self.val = 0
- self.avg = 0
- self.sum = 0
- self.count = 0
-
- def update(self, val, n=1):
- self.val = val
- self.sum += val * n
- self.count += n
- self.avg = self.sum / self.count
-
- def all_reduce(self):
- total = torch.FloatTensor([self.sum, self.count])
- dist.all_reduce(total, dist.ReduceOp.SUM, async_op=False)
- self.sum, self.count = total.tolist()
- self.avg = self.sum / self.count
-
- def __str__(self):
- fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
- return fmtstr.format(**self.__dict__)
-
- def summary(self):
- fmtstr = ''
- if self.summary_type is Summary.NONE:
- fmtstr = ''
- elif self.summary_type is Summary.AVERAGE:
- fmtstr = '{name} {avg:.3f}'
- elif self.summary_type is Summary.SUM:
- fmtstr = '{name} {sum:.3f}'
- elif self.summary_type is Summary.COUNT:
- fmtstr = '{name} {count:.3f}'
- else:
- raise ValueError('invalid summary type %r' % self.summary_type)
-
- return fmtstr.format(**self.__dict__)
-
-
-class ProgressMeter(object):
- def __init__(self, num_batches, meters, prefix=""):
- self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
- self.meters = meters
- self.prefix = prefix
-
- def display(self, batch):
- entries = [self.prefix + self.batch_fmtstr.format(batch)]
- entries += [str(meter) for meter in self.meters]
- print('\t'.join(entries))
-
- def display_summary(self):
- entries = [" *"]
- entries += [meter.summary() for meter in self.meters]
- print(' '.join(entries))
-
- def _get_batch_fmtstr(self, num_batches):
- num_digits = len(str(num_batches // 1))
- fmt = '{:' + str(num_digits) + 'd}'
- return '[' + fmt + '/' + fmt.format(num_batches) + ']'
-
-def accuracy(output, target, topk=(1,)):
- """Computes the accuracy over the k top predictions for the specified values of k"""
- with torch.no_grad():
- maxk = max(topk)
- batch_size = target.size(0)
-
- _, pred = output.topk(maxk, 1, True, True)
- pred = pred.t()
- correct = pred.eq(target.view(1, -1).expand_as(pred))
-
- res = []
- for k in topk:
- correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
- res.append(correct_k.mul_(100.0 / batch_size))
- return res
-
-
-if __name__ == '__main__':
- main()
diff --git a/neural_coder/examples/vision/resnet18.py b/neural_coder/examples/vision/resnet18.py
deleted file mode 100644
index a7fadf9c70b..00000000000
--- a/neural_coder/examples/vision/resnet18.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-import torchvision.models as models
-model = models.resnet18(pretrained=True)
-model.eval()
-batch_size = 1
-input = torch.rand(batch_size, 3, 224, 224)
-with torch.no_grad():
- model(input)
diff --git a/neural_coder/examples/vision/resnet50.py b/neural_coder/examples/vision/resnet50.py
deleted file mode 100644
index c7091e2e7bc..00000000000
--- a/neural_coder/examples/vision/resnet50.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-import torchvision.models as models
-model = models.resnet50(pretrained=True)
-model.eval()
-batch_size = 1
-input = torch.rand(batch_size, 3, 224, 224)
-with torch.no_grad():
- model(input)
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/.eslintignore b/neural_coder/extensions/neural_compressor_ext_lab/.eslintignore
deleted file mode 100644
index fffa32fdf63..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/.eslintignore
+++ /dev/null
@@ -1,8 +0,0 @@
-node_modules
-dist
-coverage
-**/*.d.ts
-tests
-
-**/__tests__
-ui-tests
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/.eslintrc.js b/neural_coder/extensions/neural_compressor_ext_lab/.eslintrc.js
deleted file mode 100644
index c64b3721828..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/.eslintrc.js
+++ /dev/null
@@ -1,44 +0,0 @@
-module.exports = {
- extends: [
- 'eslint:recommended',
- 'plugin:@typescript-eslint/eslint-recommended',
- 'plugin:@typescript-eslint/recommended',
- 'plugin:prettier/recommended'
- ],
- parser: '@typescript-eslint/parser',
- parserOptions: {
- project: 'tsconfig.json',
- sourceType: 'module'
- },
- plugins: ['@typescript-eslint'],
- rules: {
- '@typescript-eslint/naming-convention': [
- 'error',
- {
- selector: 'interface',
- format: ['PascalCase'],
- custom: {
- regex: '^I[A-Z]',
- match: true
- }
- }
- ],
- '@typescript-eslint/no-unused-vars': ['warn', { args: 'none' }],
- '@typescript-eslint/no-explicit-any': 'off',
- '@typescript-eslint/no-namespace': 'off',
- '@typescript-eslint/no-use-before-define': 'off',
- '@typescript-eslint/quotes': [
- 'error',
- 'single',
- { avoidEscape: true, allowTemplateLiterals: false }
- ],
- node: {
- fs: 'empty',
- net:'empty',
- tls:'empty',
-},
- curly: ['error', 'all'],
- eqeqeq: 'error',
- 'prefer-arrow-callback': 'error'
- }
-};
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/.prettierignore b/neural_coder/extensions/neural_compressor_ext_lab/.prettierignore
deleted file mode 100644
index 0de58a6f50b..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/.prettierignore
+++ /dev/null
@@ -1,5 +0,0 @@
-node_modules
-**/node_modules
-**/lib
-**/package.json
-neural_compressor_ext_lab
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/.prettierrc b/neural_coder/extensions/neural_compressor_ext_lab/.prettierrc
deleted file mode 100644
index d0824a69c14..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/.prettierrc
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- "singleQuote": true,
- "trailingComma": "none",
- "arrowParens": "avoid",
- "endOfLine": "auto"
-}
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/.stylelintrc b/neural_coder/extensions/neural_compressor_ext_lab/.stylelintrc
deleted file mode 100644
index 0e1ff30327c..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/.stylelintrc
+++ /dev/null
@@ -1,12 +0,0 @@
-{
- "extends": [
- "stylelint-config-recommended",
- "stylelint-config-standard",
- "stylelint-prettier/recommended"
- ],
- "rules": {
- "property-no-vendor-prefix": null,
- "selector-no-vendor-prefix": null,
- "value-no-vendor-prefix": null
- }
-}
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/CHANGELOG.md b/neural_coder/extensions/neural_compressor_ext_lab/CHANGELOG.md
deleted file mode 100644
index 2d352af421a..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/CHANGELOG.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Changelog
-
-
-
-
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/DEVELOP.md b/neural_coder/extensions/neural_compressor_ext_lab/DEVELOP.md
deleted file mode 100644
index 7141373e783..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/DEVELOP.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# neural_compressor_ext_lab
-
-A JupyterLab extension.
-
-## Requirements
-
-- JupyterLab >= 3.0
-
-## Install
-
-To install the extension, execute:
-
-```bash
-pip install neural_compressor_ext_lab
-```
-
-## Uninstall
-
-To remove the extension, execute:
-
-```bash
-pip uninstall neural_compressor_ext_lab
-```
-
-## Contributing
-
-### Development install
-
-Note: You will need NodeJS to build the extension package.
-
-The `jlpm` command is JupyterLab's pinned version of
-[yarn](https://yarnpkg.com/) that is installed with JupyterLab. You may use
-`yarn` or `npm` in lieu of `jlpm` below.
-
-```bash
-# Clone the repo to your local environment
-# Change directory to the neural_compressor_ext_lab directory
-# Install package in development mode
-pip install -e .
-# Link your development version of the extension with JupyterLab
-jupyter labextension develop . --overwrite
-# Rebuild extension Typescript source after making changes
-jlpm build
-```
-
-You can watch the source directory and run JupyterLab at the same time in different terminals to watch for changes in the extension's source and automatically rebuild the extension.
-
-```bash
-# Watch the source directory in one terminal, automatically rebuilding when needed
-jlpm watch
-# Run JupyterLab in another terminal
-jupyter lab
-```
-
-With the watch command running, every saved change will immediately be built locally and available in your running JupyterLab. Refresh JupyterLab to load the change in your browser (you may need to wait several seconds for the extension to be rebuilt).
-
-By default, the `jlpm build` command generates the source maps for this extension to make it easier to debug using the browser dev tools. To also generate source maps for the JupyterLab core extensions, you can run the following command:
-
-```bash
-jupyter lab build --minimize=False
-```
-
-### Development uninstall
-
-```bash
-pip uninstall neural_compressor_ext_lab
-```
-
-In development mode, you will also need to remove the symlink created by `jupyter labextension develop`
-command. To find its location, you can run `jupyter labextension list` to figure out where the `labextensions`
-folder is located. Then you can remove the symlink named `neural_compressor_ext_lab` within that folder.
-
-### Packaging the extension
-
-See [RELEASE](RELEASE.md)
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/LICENSE b/neural_coder/extensions/neural_compressor_ext_lab/LICENSE
deleted file mode 100644
index d3f814da892..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/LICENSE
+++ /dev/null
@@ -1,29 +0,0 @@
-BSD 3-Clause License
-
-Copyright (c) 2022, neural_coder[C[C[C[C[C
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/MANIFEST.in b/neural_coder/extensions/neural_compressor_ext_lab/MANIFEST.in
deleted file mode 100644
index 99115494fae..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/MANIFEST.in
+++ /dev/null
@@ -1,24 +0,0 @@
-include LICENSE
-include *.md
-include pyproject.toml
-
-include package.json
-include install.json
-include ts*.json
-include yarn.lock
-
-graft neural_compressor_ext_lab/labextension
-
-# Javascript files
-graft src
-graft style
-prune **/node_modules
-prune lib
-prune binder
-
-# Patterns to exclude from any directory
-global-exclude *~
-global-exclude *.pyc
-global-exclude *.pyo
-global-exclude .git
-global-exclude .ipynb_checkpoints
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/README.md b/neural_coder/extensions/neural_compressor_ext_lab/README.md
deleted file mode 100644
index edd9e1e53a6..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-Intel® Neural Compressor as JupyterLab Extension
-===========================
-A JupyterLab Extension library supporting Neural Coder, a novel feature powered by Intel® Neural Compressor providing automatic quantization to further simplify computing performance optimizations of Deep Learning models.
-
-## Installation
-**By Extension Manager in JupyterLab (Recommended)**
-
-Search for ```jupyter-lab-neural-compressor``` in the Extension Manager in JupyterLab.
-
-**By Linux Terminal**
-```bash
-npm i jupyter-lab-neural-compressor
-jupyter labextension install jupyter-lab-neural-compressor
-```
-
-## Getting Started!
-
-As shown in the drop-down list, the supported features include "INT8 (Static Quantization)", "INT8 (Dynamic Quantization)", "BF16", and "Auto Enable & Benchmark". Each of the first three options enables a specific quantization feature into your Deep Learning scripts. The last option automatically enables all quantization features on a Deep Learning script and automatically evaluates the best performance on the model. It is a code-free solution that can help users enable quantization algorithms on a Deep Learning model with no manual coding needed.
-
-
-
-### Auto-enable a feature
-Click the run button on the left side of the drop-down list to start. After finishing, you can see the code changes for the specific optimization enabling as shown in the figure below:
-
-
-
-### Or let us help you auto-select the best feature
-The last option automatically enables each quantization feature on your Deep Learning script and automatically evaluates for the best performance among all features on your Deep Learning model. Since it will automatically run the Python script for benchmark, it requires you to enter additional parameters needed to run your Python script. If there is no additional parameter needed, you can just leave it blank:
-
-
-
-In the new cell box appeared below your Code cell boxes, you can see the execution progress, and at the end you can see which one turns out to be the best optimization and how much performance gain can it bring to your Deep Learning model:
-
-
-
-When it is finished, you can also see that the code changes for the best optimization are automatically enabled into your script:
-
-
-
-## Pre-requisites
-```bash
-apt-get update && apt-get install bc numactl
-conda install mkl mkl-include jemalloc
-pip3 install neural-compressor opencv-python-headless
-```
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/RELEASE.md b/neural_coder/extensions/neural_compressor_ext_lab/RELEASE.md
deleted file mode 100644
index bd51b356f28..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/RELEASE.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# Making a new release of neural_compressor_ext_lab
-
-The extension can be published to `PyPI` and `npm` manually or using the [Jupyter Releaser](https://github.com/jupyter-server/jupyter_releaser).
-
-## Manual release
-
-### Python package
-
-This extension can be distributed as Python
-packages. All of the Python
-packaging instructions in the `pyproject.toml` file to wrap your extension in a
-Python package. Before generating a package, we first need to install `build`.
-
-```bash
-pip install build twine
-```
-
-To create a Python source package (`.tar.gz`) and the binary package (`.whl`) in the `dist/` directory, do:
-
-```bash
-python -m build
-```
-
-> `python setup.py sdist bdist_wheel` is deprecated and will not work for this package.
-
-Then to upload the package to PyPI, do:
-
-```bash
-twine upload dist/*
-```
-
-### NPM package
-
-To publish the frontend part of the extension as a NPM package, do:
-
-```bash
-npm login
-npm publish --access public
-```
-
-## Automated releases with the Jupyter Releaser
-
-The extension repository should already be compatible with the Jupyter Releaser.
-
-Check out the [workflow documentation](https://github.com/jupyter-server/jupyter_releaser#typical-workflow) for more information.
-
-Here is a summary of the steps to cut a new release:
-
-- Fork the [`jupyter-releaser` repo](https://github.com/jupyter-server/jupyter_releaser)
-- Add `ADMIN_GITHUB_TOKEN`, `PYPI_TOKEN` and `NPM_TOKEN` to the Github Secrets in the fork
-- Go to the Actions panel
-- Run the "Draft Changelog" workflow
-- Merge the Changelog PR
-- Run the "Draft Release" workflow
-- Run the "Publish Release" workflow
-
-## Publishing to `conda-forge`
-
-If the package is not on conda forge yet, check the documentation to learn how to add it: https://conda-forge.org/docs/maintainer/adding_pkgs.html
-
-Otherwise a bot should pick up the new version publish to PyPI, and open a new PR on the feedstock repository automatically.
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/binder/environment.yml b/neural_coder/extensions/neural_compressor_ext_lab/binder/environment.yml
deleted file mode 100644
index 23bed128c24..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/binder/environment.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-# a mybinder.org-ready environment for demoing neural_compressor_ext_lab
-# this environment may also be used locally on Linux/MacOS/Windows, e.g.
-#
-# conda env update --file binder/environment.yml
-# conda activate neural-compressor-ext-lab-demo
-#
-name: neural-compressor-ext-lab-demo
-
-channels:
- - conda-forge
-
-dependencies:
- # runtime dependencies
- - python >=3.8,<3.9.0a0
- - jupyterlab >=3,<4.0.0a0
- # labextension build dependencies
- - nodejs >=14,<15
- - pip
- - wheel
- # additional packages for demos
- # - ipywidgets
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/binder/postBuild b/neural_coder/extensions/neural_compressor_ext_lab/binder/postBuild
deleted file mode 100644
index 95eabd91874..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/binder/postBuild
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/env python3
-""" perform a development install of neural_compressor_ext_lab
-
- On Binder, this will run _after_ the environment has been fully created from
- the environment.yml in this directory.
-
- This script should also run locally on Linux/MacOS/Windows:
-
- python3 binder/postBuild
-"""
-import subprocess
-import sys
-from pathlib import Path
-
-
-ROOT = Path.cwd()
-
-def _(*args, **kwargs):
- """ Run a command, echoing the args
-
- fails hard if something goes wrong
- """
- print("\n\t", " ".join(args), "\n")
- return_code = subprocess.call(args, **kwargs)
- if return_code != 0:
- print("\nERROR", return_code, " ".join(args))
- sys.exit(return_code)
-
-# verify the environment is self-consistent before even starting
-_(sys.executable, "-m", "pip", "check")
-
-# install the labextension
-_(sys.executable, "-m", "pip", "install", "-e", ".")
-_(sys.executable, "-m", "jupyter", "labextension", "develop", "--overwrite", ".")
-
-# verify the environment the extension didn't break anything
-_(sys.executable, "-m", "pip", "check")
-
-# list the extensions
-_("jupyter", "server", "extension", "list")
-
-# initially list installed extensions to determine if there are any surprises
-_("jupyter", "labextension", "list")
-
-
-print("JupyterLab with neural_compressor_ext_lab is ready to run with:\n")
-print("\tjupyter lab\n")
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/install.json b/neural_coder/extensions/neural_compressor_ext_lab/install.json
deleted file mode 100644
index 24f9a58e094..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/install.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
- "packageManager": "python",
- "packageName": "neural_compressor_ext_lab",
- "uninstallInstructions": "Use your Python package manager (pip, conda, etc.) to uninstall the package neural_compressor_ext_lab"
-}
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/lib/constants.d.ts b/neural_coder/extensions/neural_compressor_ext_lab/lib/constants.d.ts
deleted file mode 100644
index 071e86ae2c1..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/lib/constants.d.ts
+++ /dev/null
@@ -1,11 +0,0 @@
-export declare namespace Constants {
- const SHORT_PLUGIN_NAME = "neural_compressor_ext_lab";
- const WORK_PATH = "neural_coder_workspace/";
- const ICON_FORMAT_ALL_SVG = " ";
- const ICON_RUN = " ";
- const SVG = " ";
- const LONG_PLUGIN_NAME: string;
- const SETTINGS_SECTION: string;
- const COMMAND_SECTION_NAME = "Jupyterlab Code Optimizer";
- const PLUGIN_VERSION = "0.1.0";
-}
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/lib/constants.js b/neural_coder/extensions/neural_compressor_ext_lab/lib/constants.js
deleted file mode 100644
index 13acd1a7ad3..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/lib/constants.js
+++ /dev/null
@@ -1,12 +0,0 @@
-export var Constants;
-(function (Constants) {
- Constants.SHORT_PLUGIN_NAME = 'neural_compressor_ext_lab';
- Constants.WORK_PATH = "neural_coder_workspace/";
- Constants.ICON_FORMAT_ALL_SVG = ' ';
- Constants.ICON_RUN = ' ';
- Constants.SVG = ' ';
- Constants.LONG_PLUGIN_NAME = `@rya/${Constants.SHORT_PLUGIN_NAME}`;
- Constants.SETTINGS_SECTION = `${Constants.LONG_PLUGIN_NAME}:settings`;
- Constants.COMMAND_SECTION_NAME = 'Jupyterlab Code Optimizer';
- Constants.PLUGIN_VERSION = '0.1.0';
-})(Constants || (Constants = {}));
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/lib/deepcoder.d.ts b/neural_coder/extensions/neural_compressor_ext_lab/lib/deepcoder.d.ts
deleted file mode 100644
index dcf8ddfba2a..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/lib/deepcoder.d.ts
+++ /dev/null
@@ -1,27 +0,0 @@
-import { Cell, CodeCell } from '@jupyterlab/cells';
-import { ToolbarButton } from '@jupyterlab/apputils';
-import { Widget } from '@lumino/widgets';
-import { INotebookTracker, NotebookPanel, Notebook } from '@jupyterlab/notebook';
-declare class JupyterlabCodeOptimizer {
- protected working: boolean;
- protected panel: NotebookPanel;
- private tmp_path;
- log_path: string;
- tmp_log_path: string;
- rand: number;
- markdown: Cell | undefined;
- cells: CodeCell[];
- constructor(panel: NotebookPanel);
- optimizeCode(code: string[], formatter: string, name: string, next: string, options: string | undefined, notebook: boolean, panel: NotebookPanel, cell: CodeCell, run?: ToolbarButton | undefined): Promise;
-}
-export declare class JupyterlabNotebookCodeOptimizer extends JupyterlabCodeOptimizer {
- protected notebookname: string;
- protected notebookTracker: INotebookTracker;
- constructor(notebookTracker: INotebookTracker, panel: NotebookPanel);
- optimizeAction(config: any, formatter?: string): Promise;
- optimizeAllCodeCells(config?: string, formatter?: string, notebook?: Notebook, run?: ToolbarButton): Promise;
- private getCodeCells;
- private optimizeCells;
- applicable(formatter: string, currentWidget: Widget): boolean | null;
-}
-export {};
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/lib/deepcoder.js b/neural_coder/extensions/neural_compressor_ext_lab/lib/deepcoder.js
deleted file mode 100644
index 744d7a7a8e3..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/lib/deepcoder.js
+++ /dev/null
@@ -1,295 +0,0 @@
-import { NotebookActions } from '@jupyterlab/notebook';
-import NotebookUtilities from "./utils";
-import { Constants } from './constants';
-class JupyterlabCodeOptimizer {
- constructor(panel) {
- this.working = false;
- this.panel = panel;
- this.tmp_path = "tmp.py";
- this.rand = NotebookUtilities.GetRandomNum(0, 200);
- this.log_path = Constants.WORK_PATH + "NeuralCoder" + this.rand + ".log";
- this.tmp_log_path = Constants.WORK_PATH + "NeuralCoder_tmp" + ".log";
- this.cells = [];
- }
- async optimizeCode(code, formatter, name, next, options, notebook, panel, cell, run) {
- let codes = [];
- code.forEach(function (value) {
- value = value.replace(/('\\n')/g, '^^^');
- value = value.replace(/\\n"/g, '###');
- value = value.replace(/\\n'/g, '###');
- value = value.replace(/"\\n/g, '@@');
- value = value.replace(/'\\n/g, '@@');
- value = value.replace(/\n/g, '\\n');
- value = value.replace(/"/g, '+++');
- value = value.replace(/,/g, '$');
- codes.push(value);
- });
- let gen_code = `code = "${codes}"\ncodes = code.split(',')\nwith open( '${this.tmp_path}', 'w+' ) as f:\n for i in range(0,len(codes)):\n f.write('# this is the beginning of a single code snippet\\n')\n code_list = codes[i].replace('$',',').replace('+++','\"').split('\\n')\n for line in code_list:\n if('split(^^^)' in line):\n line=line.replace('split(^^^)', 'split(\\'\\\\n\\')')\n if('###' in line):\n line=line.replace('###', '\\\\n\"')\n if('@@' in line):\n line=line.replace('@@', '\"\\\\n')\n f.write(line+'\\n')`;
- const expr = { code_list: `code_list` };
- NotebookUtilities.sendKernelRequestFromNotebook(panel, gen_code, expr, false);
- if (options === 'normal') {
- let runcode = `from neural_coder import enable\nenable(code="${this.tmp_path}",features=["${formatter}"], overwrite=True)`;
- let expr = { sum: ` ` };
- NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let run_code1 = `with open("${this.tmp_path}", 'r') as f:\n optimized_code = f.read()\n`;
- let expr1 = { optimizedCode: "optimized_code" };
- let result2 = NotebookUtilities.sendKernelRequestFromNotebook(panel, run_code1, expr1, false);
- result2.then(value => {
- var _a, _b, _c, _d;
- let optimizedTexts = Object.values(value.optimizedCode.data)[0];
- let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\n').slice(1);
- optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);
- for (let i = 0; i < optimizeCodes.length; ++i) {
- const cell = this.cells[i];
- const currentTexts = this.cells.map(cell => cell.model.value.text);
- const currentText = currentTexts[i];
- let optimizedtext = optimizeCodes[i];
- optimizedtext = optimizedtext.replace(/\\'\\\\n\\'/g, "^^^");
- optimizedtext = optimizedtext.replace(/\\\\n"/g, "+++");
- optimizedtext = optimizedtext.replace(/\\\\n'/g, "+++");
- optimizedtext = optimizedtext.replace(/"\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/'\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/\\n/g, '\n');
- optimizedtext = optimizedtext.replace(/\\'/g, "'");
- optimizedtext = optimizedtext.replace(/\^\^\^/g, "'\\n'");
- optimizedtext = optimizedtext.replace(/\+\+\+/g, "\\n\"");
- optimizedtext = optimizedtext.replace(/\@\@\@/g, "\"\\n");
- if (cell.model.value.text === currentText) {
- cell.model.value.text = optimizedtext;
- }
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = Constants.ICON_RUN;
- (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);
- }
- });
- }
- else {
- if (formatter === '') {
- if (this.markdown) {
- this.markdown.model.value.text += "[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ...... \n";
- }
- // cell.outputArea.node.innerText += "[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\n"
- let runcode1 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\\n")`;
- let expr1 = { path: "" };
- NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode1, expr1, false);
- let runcode = `from neural_coder import enable\nperfomance, mode, path = enable(code="${this.tmp_path}",features=[], run_bench=True, args="${options}")\nwith open(path + '/bench.log', 'r') as f:\n logs = f.readlines()\nlog_line = logs[4]\nlog = log_line.split("[")[1].split("]")[0]`;
- let expr = { path: "path", log: "log" };
- let result = NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let fps;
- result.then(value => {
- fps = Object.values(value.log.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second) \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\n`
- let text = `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\\n`;
- let runcode = `with open("${this.log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr = { path: "" };
- NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\n`
- let runcode1 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n")`;
- let expr1 = { path: "" };
- NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode1, expr1, false);
- let runcode2 = `with open("${this.tmp_log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr2 = { path: "" };
- NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- });
- }
- else {
- let runcode = `from neural_coder import enable\nperfomance, mode, path = enable(code="${this.tmp_path}", features=["${formatter}"], run_bench=True, args="${options}")\nwith open(path + '/bench.log', 'r') as f:\n logs = f.readlines()\nlog_line = logs[4]\nlog = log_line.split("[")[1].split("]")[0]`;
- let expr = { path: "path", log: "log" };
- let result = NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let fps;
- result.then(value => {
- fps = Object.values(value.log.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second) \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (FPS)\n`
- let text = `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second)\\n`;
- let runcode = `with open("${this.log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr = { path: "" };
- NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- if (next !== '') {
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\n`
- let runcode2 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n")`;
- let expr2 = { path: "" };
- NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- }
- let runcode3 = `with open("${this.tmp_log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr3 = { path: "" };
- let res_tmp = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);
- res_tmp.then(value => {
- if (formatter === 'pytorch_inc_bf16') {
- let read_log = `import re\nwith open("${this.tmp_log_path}", 'r') as f:\n logs = f.readlines()\n fps_list=[]\n for log_line in logs[-4:]:\n pat = re.compile(r\'\\d+\\.?\\d+')\n fps = re.findall(pat,log_line)[-1]\n fps_list.append(float(fps))\nmaxi = max(fps_list)\nindex = fps_list.index(maxi)\nboost = round(maxi/fps_list[0],1)\nfeatures=['','pytorch_inc_static_quant_fx','pytorch_inc_dynamic_quant','pytorch_inc_bf16']\nfeature_name=['Original Model','INC Enable INT8 (Static)','INC Enable INT8 (Dynamic)','INC Enable BF16']\nbest_feature = features[index]\nbest_name = feature_name[index]\nfeature_l = []\nfeature_l.append(best_feature)\nfrom neural_coder import enable\nenable(code="${this.tmp_path}",features=feature_l, overwrite=True)\nwith open("${this.tmp_path}", 'r') as f:\n optimized_code = f.read()\n`;
- let read_expr = { boost: "boost", best_feature: "best_feature", best_name: "best_name", optimizeCode: "optimized_code", feature_l: "fps_list", maxi: "maxi", index: "index" };
- let read_result = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, read_log, read_expr, false);
- read_result.then(value => {
- var _a, _b, _c, _d;
- console.log("resres", value);
- let boost = Object.values(value.boost.data)[0];
- let best_name = Object.values(value.best_name.data)[0];
- let optimizedTexts = Object.values(value.optimizeCode.data)[0];
- let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\n').slice(1);
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] The Best Intel Optimization: ${best_name} \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] You can get up to ${boost}X performance boost. \n`;
- }
- // cell.outputArea.node.innerText +=`[NeuralCoder INFO] The Best Intel Optimization: ${best_name}\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] You can get up to ${boost}X performance boost.\n`
- optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);
- for (let i = 0; i < optimizeCodes.length; ++i) {
- const cell = this.cells[i];
- const currentTexts = this.cells.map(cell => cell.model.value.text);
- const currentText = currentTexts[i];
- let optimizedtext = optimizeCodes[i];
- optimizedtext = optimizedtext.replace(/\\'\\\\n\\'/g, "^^^");
- optimizedtext = optimizedtext.replace(/\\\\n"/g, "+++");
- optimizedtext = optimizedtext.replace(/\\\\n'/g, "+++");
- optimizedtext = optimizedtext.replace(/"\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/'\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/\\n/g, '\n');
- optimizedtext = optimizedtext.replace(/\\'/g, "'");
- optimizedtext = optimizedtext.replace(/\^\^\^/g, "'\\n'");
- optimizedtext = optimizedtext.replace(/\+\+\+/g, "\\n\"");
- optimizedtext = optimizedtext.replace(/\@\@\@/g, "\"\\n");
- if (cell.model.value.text === currentText) {
- cell.model.value.text = optimizedtext;
- }
- }
- // if(this.markdown){
- // this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: 4th Gen Intel Xeon Scalable processor with AMX \n`
- // this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log \n`
- // }
- let command = "lscpu | grep 'Model name'";
- let get_hardware = `import subprocess\nsubp = subprocess.Popen("${command}",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8")\nsubp.wait(2)\nhardware = subp.communicate()[0].replace("Model name:","").strip()`;
- let expr_hardware = { hardware: "hardware" };
- let hard_res = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, get_hardware, expr_hardware, false);
- hard_res.then(value => {
- let hard = Object.values(value.hardware.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: ${hard} \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] HardWare: ${hard}\n`
- });
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log\n`
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = Constants.ICON_RUN;
- (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);
- });
- }
- });
- });
- }
- }
- }
-}
-export class JupyterlabNotebookCodeOptimizer extends JupyterlabCodeOptimizer {
- constructor(notebookTracker, panel) {
- super(panel);
- this.notebookTracker = notebookTracker;
- this.notebookname = '';
- }
- async optimizeAction(config, formatter) {
- return this.optimizeCells(true, config, formatter);
- }
- async optimizeAllCodeCells(config, formatter, notebook, run) {
- return this.optimizeCells(false, config, formatter, notebook, run);
- }
- getCodeCells(ifmarkdown = true, notebook) {
- if (!this.notebookTracker.currentWidget) {
- return [];
- }
- const codeCells = [];
- notebook = notebook || this.notebookTracker.currentWidget.content;
- this.notebookname = notebook.title.label;
- let count = 0;
- notebook.widgets.forEach((cell) => {
- if (cell.model.type === 'code') {
- count += 1;
- codeCells.push(cell);
- }
- });
- if (ifmarkdown) {
- NotebookActions.insertBelow(notebook);
- this.notebookTracker.currentWidget.content.activeCellIndex = count + 1;
- NotebookActions.changeCellType(notebook, 'markdown');
- const activeCell = notebook.activeCell;
- if (activeCell) {
- this.markdown = activeCell;
- }
- }
- this.cells = codeCells;
- return codeCells;
- }
- async optimizeCells(selectedOnly, config, formatter, notebook, run) {
- if (this.working) {
- return new Promise((resolve, reject) => {
- resolve("false!");
- });
- }
- console.log("arrive here 333");
- this.working = true;
- const optimize_type = formatter !== undefined ? formatter : 'pytorch_mixed_precision_cpu';
- if (optimize_type === 'auto-quant') {
- selectedOnly = true;
- }
- else {
- selectedOnly = false;
- }
- const selectedCells = this.getCodeCells(selectedOnly, notebook);
- let cell = selectedCells[selectedCells.length - 1];
- if (selectedCells.length === 0) {
- this.working = false;
- return new Promise((resolve, reject) => {
- resolve("false!");
- });
- }
- const currentTexts = selectedCells.map(cell => cell.model.value.text);
- if (optimize_type === 'auto-quant') {
- console.log("arrive here 444-111");
- if (this.markdown) {
- this.markdown.model.value.text = `[NeuralCoder INFO] Auto-Quant Started ...... \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook "${this.notebookname}" \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Mode: Throughput \n`;
- }
- // cell.outputArea.node.innerText = `[NeuralCoder INFO] Auto-Quant Started ......\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook "${this.notebookname}"\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Mode: Throughput\n`
- let runcode = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Auto-Quant Started ......\\n")`;
- let expr = { path: "" };
- NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- let runcode2 = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Code: User code from Jupyter Lab notebook '${this.notebookname}'\\n")`;
- let expr2 = { path: "" };
- NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- let runcode3 = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Benchmark Mode: Throughput\\n")`;
- let expr3 = { path: "" };
- NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);
- // cell.outputArea.node.setAttribute("class","pad")
- await this.optimizeCode(currentTexts, '', 'The Original Model', 'INC Enable INT8 (Static)', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_static_quant_fx', 'INC Enable INT8 (Static)', 'INC Enable INT8 (Dynamic)', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_dynamic_quant', 'INC Enable INT8 (Dynamic)', 'INC Enable BF16', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_bf16', 'INC Enable BF16', '', config, true, this.panel, cell, run);
- }
- else {
- console.log("arrive here 444-222");
- await this.optimizeCode(currentTexts, optimize_type, "", "", "normal", true, this.panel, cell, run);
- }
- this.working = false;
- console.log("arrive here 555");
- return new Promise((resolve, reject) => {
- resolve("success!");
- });
- }
- applicable(formatter, currentWidget) {
- const currentNotebookWidget = this.notebookTracker.currentWidget;
- return currentNotebookWidget && currentWidget === currentNotebookWidget;
- }
-}
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/lib/index.d.ts b/neural_coder/extensions/neural_compressor_ext_lab/lib/index.d.ts
deleted file mode 100644
index f256eada9ba..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/lib/index.d.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-import { JupyterFrontEndPlugin } from '@jupyterlab/application';
-/**
- * Initialization data for the neural_compressor_ext_lab extension.
- */
-declare const plugin: JupyterFrontEndPlugin;
-export default plugin;
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/lib/index.js b/neural_coder/extensions/neural_compressor_ext_lab/lib/index.js
deleted file mode 100644
index 190b66e0840..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/lib/index.js
+++ /dev/null
@@ -1,105 +0,0 @@
-import { INotebookTracker } from '@jupyterlab/notebook';
-import { ToolbarButton, showDialog, Dialog } from '@jupyterlab/apputils';
-import { ISettingRegistry } from '@jupyterlab/settingregistry';
-import { IMainMenu } from '@jupyterlab/mainmenu';
-import { LabIcon } from '@jupyterlab/ui-components';
-import { Widget } from '@lumino/widgets';
-import { JupyterlabNotebookCodeOptimizer } from './deepcoder';
-import { Constants } from './constants';
-class neural_compressor_ext_lab {
- constructor(app, tracker, notebookpanel) {
- this.app = app;
- this.tracker = tracker;
- this.notebookpanel = notebookpanel;
- this.setupWidgetExtension();
- this.config = '';
- }
- createNew(nb) {
- this.notebookpanel = nb;
- this.notebookCodeOptimizer = new JupyterlabNotebookCodeOptimizer(this.tracker, this.notebookpanel);
- const svg = document.createElement("svg");
- svg.innerHTML = Constants.ICON_FORMAT_ALL_SVG;
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = Constants.ICON_RUN;
- const div = document.createElement("div");
- div.setAttribute("class", "wrapper");
- const span = document.createElement("span");
- span.setAttribute("class", "f1ozlkqi");
- span.innerHTML = Constants.SVG;
- const selector = document.createElement("select");
- selector.setAttribute("class", "aselector");
- selector.id = "NeuralCoder";
- const option1 = document.createElement("option");
- option1.value = "pytorch_inc_static_quant_fx";
- option1.innerText = "INC Enable INT8 (Static)";
- option1.selected = true;
- const option2 = document.createElement("option");
- option2.value = "pytorch_inc_dynamic_quant";
- option2.innerText = "INC Enable INT8 (Dynamic)";
- const option3 = document.createElement("option");
- option3.value = "pytorch_inc_bf16";
- option3.innerText = "INC Enable BF16";
- const option4 = document.createElement("option");
- option4.value = "auto-quant";
- option4.innerText = "INC Auto Enable & Benchmark";
- selector.options.add(option1);
- selector.options.add(option2);
- selector.options.add(option3);
- selector.options.add(option4);
- div.appendChild(selector);
- div.appendChild(span);
- const selector_widget = new Widget();
- selector_widget.node.appendChild(div);
- selector_widget.addClass("aselector");
- let notebookCodeOptimizer = this.notebookCodeOptimizer;
- let config = this.config;
- const dia_input = document.createElement("input");
- const dia_widget = new Widget();
- dia_widget.node.appendChild(dia_input);
- dia_widget.addClass("dialog");
- const run_button = new ToolbarButton({
- tooltip: 'NeuralCoder',
- icon: new LabIcon({
- name: "run",
- svgstr: Constants.ICON_RUN
- }),
- onClick: async function () {
- var _a, _b, _c, _d;
- console.log("arrive here 111");
- (_d = (_c = (_b = (_a = run_button.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(svg);
- if (selector.options[selector.selectedIndex].value === 'auto-quant') {
- await showDialog({
- title: 'Please input execute parameters:',
- body: dia_widget,
- buttons: [Dialog.okButton({ label: 'Confirm' })]
- }).then(result => {
- if (result.button.accept) {
- config = dia_input.value;
- }
- });
- }
- console.log("arrive here 222");
- await notebookCodeOptimizer.optimizeAllCodeCells(config, selector.options[selector.selectedIndex].value, undefined, run_button);
- }
- });
- nb.toolbar.insertItem(11, "nc", run_button);
- nb.toolbar.insertItem(12, "selector", selector_widget);
- }
- setupWidgetExtension() {
- this.app.docRegistry.addWidgetExtension('Notebook', this);
- }
-}
-/**
- * Initialization data for the neural_compressor_ext_lab extension.
- */
-const plugin = {
- id: 'neural_compressor_ext_lab:plugin',
- autoStart: true,
- requires: [INotebookTracker, IMainMenu],
- optional: [ISettingRegistry],
- activate: (app, tracker, notebookpanel) => {
- new neural_compressor_ext_lab(app, tracker, notebookpanel);
- console.log('JupyterLab extension neural_compressor_ext_lab is activated!');
- }
-};
-export default plugin;
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/lib/test.d.ts b/neural_coder/extensions/neural_compressor_ext_lab/lib/test.d.ts
deleted file mode 100644
index 2c28e08c632..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/lib/test.d.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-declare let str: string;
-declare let str1: number;
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/lib/test.js b/neural_coder/extensions/neural_compressor_ext_lab/lib/test.js
deleted file mode 100644
index c679ea06c74..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/lib/test.js
+++ /dev/null
@@ -1,4 +0,0 @@
-"use strict";
-let str = '# this is the beginning of a single code snippet\nimport glob\nimport torch\nimport os\nimport sys\nfrom tqdm import tqdm\nfrom dalle_pytorch import VQGanVAE, DALLE, DiscreteVAE\nfrom dalle_pytorch.tokenizer import tokenizer\nfrom einops import repeat\nfrom dalle_nc import DALLE, DiscreteVAE\nfrom torch.utils.data import DataLoader\nfrom torch.utils.data import Dataset\n\n# model\nvae = DiscreteVAE(\n image_size = 8,\n num_layers = 3,\n num_tokens = 8192,\n codebook_dim = 1024,\n hidden_dim = 64,\n num_resnet_blocks = 1,\n temperature = 0.9\n)\n\ndalle = DALLE(\n dim = 1024,\n vae = vae, # automatically infer (1) image sequence length and (2) number of image tokens\n num_text_tokens = 100000, # vocab size for text\n text_seq_len = 256, # text sequence length\n depth = 12, # should aim to be 64\n heads = 16, # attention heads\n dim_head = 64, # attention head dimension\n attn_dropout = 0.1, # attention dropout\n ff_dropout = 0.1 # feedforward dropout\n)\n# [NeuralCoder] pytorch_inc_dynamic_quant for dalle [Beginning Line]\nif "GraphModule" not in str(type(dalle)):\n from neural_compressor.quantization import fit\n from neural_compressor.config import PostTrainingQuantConfig\n config = PostTrainingQuantConfig(approach="dynamic")\n dalle = fit(dalle, conf=config)\n dalle = dalle.model\n dalle.eval()\n# [NeuralCoder] pytorch_inc_dynamic_quant for dalle [Ending Line]\n\ndalle.eval()\n\n# real data for DALLE image generation\nfiles = glob.glob(\'/home2/longxin/neural_compressor_ext_lab/real_text.txt\')\n\n# create dataloader\ninput_list = []\nwith torch.no_grad():\n count = 0\n for file in files:\n texts = open(file, \'r\').read().split(\'\\n\')\n for text in texts:\n print(text)\n\n num_images = 1\n\n top_k = 0.9\n\n image_size = vae.image_size\n\n texts = text.split(\'|\')\n\n for j, text in tqdm(enumerate(texts)):\n text_tokens = tokenizer.tokenize([text], 256).to(\'cpu\')\n\n text_tokens = repeat(text_tokens, \'() n -> b n\', b=num_images)\n\n for text_chunk in tqdm(text_tokens):\n d = {}\n d["text"] = text_chunk\n d["filter_thres"] = top_k\n input_list.append(d)\n\nclass MyDataset(Dataset):\n def __init__(self):\n self.samples = input_list\n\n def __getitem__(self, idx):\n return self.samples[idx], 1\n\n def __len__(self):\n return len(self.samples)\ndataset = MyDataset()\ndataloader = DataLoader(dataset)\n\n# inference\nwith torch.no_grad():\n for step, (inputs, labels) in enumerate(dataloader):\n print("running inference ...")\n output = dalle(**inputs)\n\n';
-let str1 = str.split('# this is the beginning of a single code snippet\\n').length;
-console.log(__filename);
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/lib/utils.d.ts b/neural_coder/extensions/neural_compressor_ext_lab/lib/utils.d.ts
deleted file mode 100644
index 8b7a2173767..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/lib/utils.d.ts
+++ /dev/null
@@ -1,120 +0,0 @@
-import { NotebookPanel } from '@jupyterlab/notebook';
-import { Kernel } from '@jupyterlab/services';
-import { CommandRegistry } from '@phosphor/commands';
-export default class NotebookUtilities {
- /**
- * generate random number
- * @Min
- * @Max
- */
- static GetRandomNum(Min: number, Max: number): number;
- /**
- * Builds an HTML container by sanitizing a list of strings and converting
- * them in valid HTML
- * @param msg A list of string with HTML formatting
- * @returns a HTMLDivElement composed of a list of spans with formatted text
- */
- private static buildDialogBody;
- /**
- * Opens a pop-up dialog in JupyterLab to display a simple message.
- * @param title The title for the message popup
- * @param msg The message as an array of strings
- * @param buttonLabel The label to use for the button. Default is 'OK'
- * @param buttonClassName The classname to give to the 'ok' button
- * @returns Promise - A promise once the message is closed.
- */
- static showMessage(title: string, msg: string[], buttonLabel?: string, buttonClassName?: string): Promise;
- /**
- * Opens a pop-up dialog in JupyterLab to display a yes/no dialog.
- * @param title The title for the message popup
- * @param msg The message
- * @param acceptLabel The label to use for the accept button. Default is 'YES'
- * @param rejectLabel The label to use for the reject button. Default is 'NO'
- * @param yesButtonClassName The classname to give to the accept button.
- * @param noButtonClassName The classname to give to the cancel button.
- * @returns Promise - A promise once the message is closed.
- */
- static showYesNoDialog(title: string, msg: string[], acceptLabel?: string, rejectLabel?: string, yesButtonClassName?: string, noButtonClassName?: string): Promise;
- /**
- * Opens a pop-up dialog in JupyterLab with various information and button
- * triggering reloading the page.
- * @param title The title for the message popup
- * @param msg The message
- * @param buttonLabel The label to use for the button. Default is 'Refresh'
- * @param buttonClassName The classname to give to the 'refresh' button.
- * @returns Promise - A promise once the message is closed.
- */
- static showRefreshDialog(title: string, msg: string[], buttonLabel?: string, buttonClassName?: string): Promise;
- /**
- * @description Creates a new JupyterLab notebook for use by the application
- * @param command The command registry
- * @returns Promise - A promise containing the notebook panel object that was created (if successful).
- */
- static createNewNotebook(command: CommandRegistry): Promise;
- /**
- * Safely saves the Jupyter notebook document contents to disk
- * @param notebookPanel The notebook panel containing the notebook to save
- */
- static saveNotebook(notebookPanel: NotebookPanel): Promise;
- /**
- * Convert the notebook contents to JSON
- * @param notebookPanel The notebook panel containing the notebook to serialize
- */
- static notebookToJSON(notebookPanel: NotebookPanel): any;
- /**
- * @description Gets the value of a key from specified notebook's metadata.
- * @param notebookPanel The notebook to get meta data from.
- * @param key The key of the value.
- * @returns any -The value of the metadata. Returns null if the key doesn't exist.
- */
- static getMetaData(notebookPanel: NotebookPanel, key: string): any;
- /**
- * @description Sets the key value pair in the notebook's metadata.
- * If the key doesn't exists it will add one.
- * @param notebookPanel The notebook to set meta data in.
- * @param key The key of the value to create.
- * @param value The value to set.
- * @param save Default is false. Whether the notebook should be saved after the meta data is set.
- * Note: This function will not wait for the save to complete, it only sends a save request.
- * @returns The old value for the key, or undefined if it did not exist.
- */
- static setMetaData(notebookPanel: NotebookPanel, key: string, value: any, save?: boolean): any;
- /**
- * @description This function runs code directly in the notebook's kernel and then evaluates the
- * result and returns it as a promise.
- * @param kernel The kernel to run the code in.
- * @param runCode The code to run in the kernel.
- * @param userExpressions The expressions used to capture the desired info from the executed code.
- * @param runSilent Default is false. If true, kernel will execute as quietly as possible.
- * store_history will be set to false, and no broadcast on IOPUB channel will be made.
- * @param storeHistory Default is false. If true, the code executed will be stored in the kernel's history
- * and the counter which is shown in the cells will be incremented to reflect code was run.
- * @param allowStdIn Default is false. If true, code running in kernel can prompt user for input using
- * an input_request message.
- * @param stopOnError Default is false. If True, does not abort the execution queue, if an exception is encountered.
- * This allows the queued execution of multiple execute_requests, even if they generate exceptions.
- * @returns Promise - A promise containing the execution results of the code as an object with
- * keys based on the user_expressions.
- * @example
- * //The code
- * const code = "a=123\nb=456\nsum=a+b";
- * //The user expressions
- * const expr = {sum: "sum",prod: "a*b",args:"[a,b,sum]"};
- * //Async function call (returns a promise)
- * sendKernelRequest(notebookPanel, code, expr,false);
- * //Result when promise resolves:
- * {
- * sum:{status:"ok",data:{"text/plain":"579"},metadata:{}},
- * prod:{status:"ok",data:{"text/plain":"56088"},metadata:{}},
- * args:{status:"ok",data:{"text/plain":"[123, 456, 579]"}}
- * }
- * @see For more information on JupyterLab messages:
- * https://jupyter-client.readthedocs.io/en/latest/messaging.html#execution-results
- */
- static sendKernelRequest(kernel: Kernel.IKernelConnection | null | undefined, runCode: string, userExpressions: any, runSilent?: boolean, storeHistory?: boolean, allowStdIn?: boolean, stopOnError?: boolean): Promise;
- /**
- * Same as method sendKernelRequest but passing
- * a NotebookPanel instead of a Kernel
- */
- static sendKernelRequestFromNotebook(notebookPanel: NotebookPanel, runCode: string, userExpressions: any, runSilent?: boolean, storeHistory?: boolean, allowStdIn?: boolean, stopOnError?: boolean): Promise;
-}
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/lib/utils.js b/neural_coder/extensions/neural_compressor_ext_lab/lib/utils.js
deleted file mode 100644
index 5ac72df1071..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/lib/utils.js
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright 2019-2020 The Kale Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-import { Dialog, showDialog } from '@jupyterlab/apputils';
-// @ts-ignore
-import SanitizedHTML from 'react-sanitized-html';
-import * as React from 'react';
-export default class NotebookUtilities {
- /**
- * generate random number
- * @Min
- * @Max
- */
- static GetRandomNum(Min, Max) {
- let Range;
- Range = Max - Min;
- var Rand = Math.random();
- return (Min + Math.round(Rand * Range));
- }
- /**
- * Builds an HTML container by sanitizing a list of strings and converting
- * them in valid HTML
- * @param msg A list of string with HTML formatting
- * @returns a HTMLDivElement composed of a list of spans with formatted text
- */
- static buildDialogBody(msg) {
- return (React.createElement("div", null, msg.map((s, i) => {
- return (React.createElement(React.Fragment, { key: `msg-${i}` },
- React.createElement(SanitizedHTML, { allowedAttributes: { a: ['href'] }, allowedTags: ['b', 'i', 'em', 'strong', 'a', 'pre'], html: s }),
- React.createElement("br", null)));
- })));
- }
- /**
- * Opens a pop-up dialog in JupyterLab to display a simple message.
- * @param title The title for the message popup
- * @param msg The message as an array of strings
- * @param buttonLabel The label to use for the button. Default is 'OK'
- * @param buttonClassName The classname to give to the 'ok' button
- * @returns Promise - A promise once the message is closed.
- */
- static async showMessage(title, msg, buttonLabel = 'Dismiss', buttonClassName = '') {
- const buttons = [
- Dialog.okButton({ label: buttonLabel, className: buttonClassName }),
- ];
- const messageBody = this.buildDialogBody(msg);
- await showDialog({ title, buttons, body: messageBody });
- }
- /**
- * Opens a pop-up dialog in JupyterLab to display a yes/no dialog.
- * @param title The title for the message popup
- * @param msg The message
- * @param acceptLabel The label to use for the accept button. Default is 'YES'
- * @param rejectLabel The label to use for the reject button. Default is 'NO'
- * @param yesButtonClassName The classname to give to the accept button.
- * @param noButtonClassName The classname to give to the cancel button.
- * @returns Promise - A promise once the message is closed.
- */
- static async showYesNoDialog(title, msg, acceptLabel = 'YES', rejectLabel = 'NO', yesButtonClassName = '', noButtonClassName = '') {
- const buttons = [
- Dialog.okButton({ label: acceptLabel, className: yesButtonClassName }),
- Dialog.cancelButton({ label: rejectLabel, className: noButtonClassName }),
- ];
- const messageBody = this.buildDialogBody(msg);
- const result = await showDialog({ title, buttons, body: messageBody });
- return result.button.label === acceptLabel;
- }
- /**
- * Opens a pop-up dialog in JupyterLab with various information and button
- * triggering reloading the page.
- * @param title The title for the message popup
- * @param msg The message
- * @param buttonLabel The label to use for the button. Default is 'Refresh'
- * @param buttonClassName The classname to give to the 'refresh' button.
- * @returns Promise - A promise once the message is closed.
- */
- static async showRefreshDialog(title, msg, buttonLabel = 'Refresh', buttonClassName = '') {
- await this.showMessage(title, msg, buttonLabel, buttonClassName);
- location.reload();
- }
- /**
- * @description Creates a new JupyterLab notebook for use by the application
- * @param command The command registry
- * @returns Promise - A promise containing the notebook panel object that was created (if successful).
- */
- static async createNewNotebook(command) {
- const notebook = await command.execute('notebook:create-new', {
- activate: true,
- path: '',
- preferredLanguage: '',
- });
- await notebook.session.ready;
- return notebook;
- }
- /**
- * Safely saves the Jupyter notebook document contents to disk
- * @param notebookPanel The notebook panel containing the notebook to save
- */
- static async saveNotebook(notebookPanel) {
- if (notebookPanel) {
- await notebookPanel.context.ready;
- notebookPanel.context.save();
- return true;
- }
- return false;
- }
- /**
- * Convert the notebook contents to JSON
- * @param notebookPanel The notebook panel containing the notebook to serialize
- */
- static notebookToJSON(notebookPanel) {
- if (notebookPanel.content.model) {
- return notebookPanel.content.model.toJSON();
- }
- return null;
- }
- /**
- * @description Gets the value of a key from specified notebook's metadata.
- * @param notebookPanel The notebook to get meta data from.
- * @param key The key of the value.
- * @returns any -The value of the metadata. Returns null if the key doesn't exist.
- */
- static getMetaData(notebookPanel, key) {
- if (!notebookPanel) {
- throw new Error('The notebook is null or undefined. No meta data available.');
- }
- if (notebookPanel.model && notebookPanel.model.metadata.has(key)) {
- return notebookPanel.model.metadata.get(key);
- }
- return null;
- }
- /**
- * @description Sets the key value pair in the notebook's metadata.
- * If the key doesn't exists it will add one.
- * @param notebookPanel The notebook to set meta data in.
- * @param key The key of the value to create.
- * @param value The value to set.
- * @param save Default is false. Whether the notebook should be saved after the meta data is set.
- * Note: This function will not wait for the save to complete, it only sends a save request.
- * @returns The old value for the key, or undefined if it did not exist.
- */
- static setMetaData(notebookPanel, key, value, save = false) {
- var _a;
- if (!notebookPanel) {
- throw new Error('The notebook is null or undefined. No meta data available.');
- }
- const oldVal = (_a = notebookPanel.model) === null || _a === void 0 ? void 0 : _a.metadata.set(key, value);
- if (save) {
- this.saveNotebook(notebookPanel);
- }
- return oldVal;
- }
- // /**
- // * Get a new Kernel, not tied to a Notebook
- // * Source code here: https://github.com/jupyterlab/jupyterlab/tree/473348d25bcb258ca2f0c127dd8fb5b193217135/packages/services
- // */
- // public static async createNewKernel() {
- // // Get info about the available kernels and start a new one.
- // let options: Kernel.IOptions = await Kernel.getSpecs().then(kernelSpecs => {
- // // console.log('Default spec:', kernelSpecs.default);
- // // console.log('Available specs', Object.keys(kernelSpecs.kernelspecs));
- // // use the default name
- // return { name: kernelSpecs.default };
- // });
- // return await Kernel.startNew(options).then(_kernel => {
- // return _kernel;
- // });
- // }
- // // TODO: We can use this context manager to execute commands inside a new kernel
- // // and be sure that it will be disposed of at the end.
- // // Another approach could be to create a kale_rpc Kernel, as a singleton,
- // // created at startup. The only (possible) drawback is that we can not name
- // // a kernel instance with a custom id/name, so when refreshing JupyterLab we would
- // // not recognize the kernel. A solution could be to have a kernel spec dedicated to kale rpc calls.
- // public static async executeWithNewKernel(action: Function, args: any[] = []) {
- // // create brand new kernel
- // const _k = await this.createNewKernel();
- // // execute action inside kernel
- // const res = await action(_k, ...args);
- // // close kernel
- // _k.shutdown();
- // // return result
- // return res;
- // }
- /**
- * @description This function runs code directly in the notebook's kernel and then evaluates the
- * result and returns it as a promise.
- * @param kernel The kernel to run the code in.
- * @param runCode The code to run in the kernel.
- * @param userExpressions The expressions used to capture the desired info from the executed code.
- * @param runSilent Default is false. If true, kernel will execute as quietly as possible.
- * store_history will be set to false, and no broadcast on IOPUB channel will be made.
- * @param storeHistory Default is false. If true, the code executed will be stored in the kernel's history
- * and the counter which is shown in the cells will be incremented to reflect code was run.
- * @param allowStdIn Default is false. If true, code running in kernel can prompt user for input using
- * an input_request message.
- * @param stopOnError Default is false. If True, does not abort the execution queue, if an exception is encountered.
- * This allows the queued execution of multiple execute_requests, even if they generate exceptions.
- * @returns Promise - A promise containing the execution results of the code as an object with
- * keys based on the user_expressions.
- * @example
- * //The code
- * const code = "a=123\nb=456\nsum=a+b";
- * //The user expressions
- * const expr = {sum: "sum",prod: "a*b",args:"[a,b,sum]"};
- * //Async function call (returns a promise)
- * sendKernelRequest(notebookPanel, code, expr,false);
- * //Result when promise resolves:
- * {
- * sum:{status:"ok",data:{"text/plain":"579"},metadata:{}},
- * prod:{status:"ok",data:{"text/plain":"56088"},metadata:{}},
- * args:{status:"ok",data:{"text/plain":"[123, 456, 579]"}}
- * }
- * @see For more information on JupyterLab messages:
- * https://jupyter-client.readthedocs.io/en/latest/messaging.html#execution-results
- */
- static async sendKernelRequest(kernel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {
- if (!kernel) {
- throw new Error('Kernel is null or undefined.');
- }
- // Wait for kernel to be ready before sending request
- // await kernel.status;
- const message = await kernel.requestExecute({
- allow_stdin: allowStdIn,
- code: runCode,
- silent: runSilent,
- stop_on_error: stopOnError,
- store_history: storeHistory,
- user_expressions: userExpressions,
- }).done;
- const content = message.content;
- if (content.status !== 'ok') {
- // If response is not 'ok', throw contents as error, log code
- const msg = `Code caused an error:\n${runCode}`;
- console.error(msg);
- if (content.traceback) {
- content.traceback.forEach((line) => console.log(line.replace(/[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, '')));
- }
- throw content;
- }
- // Return user_expressions of the content
- return content.user_expressions;
- }
- /**
- * Same as method sendKernelRequest but passing
- * a NotebookPanel instead of a Kernel
- */
- static async sendKernelRequestFromNotebook(notebookPanel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {
- var _a, _b, _c, _d;
- if (!notebookPanel) {
- throw new Error('Notebook is null or undefined.');
- }
- // Wait for notebook panel to be ready
- await notebookPanel.activate;
- await ((_a = notebookPanel.sessionContext) === null || _a === void 0 ? void 0 : _a.ready);
- console.log('get kernel', (_b = notebookPanel.sessionContext.session) === null || _b === void 0 ? void 0 : _b.kernel);
- return this.sendKernelRequest((_d = (_c = notebookPanel.sessionContext) === null || _c === void 0 ? void 0 : _c.session) === null || _d === void 0 ? void 0 : _d.kernel, runCode, userExpressions, runSilent, storeHistory, allowStdIn, stopOnError);
- }
-}
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/__init__.py b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/__init__.py
deleted file mode 100644
index 74c4e9f68fc..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/__init__.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import json
-from pathlib import Path
-
-from ._version import __version__
-
-
-HERE = Path(__file__).parent.resolve()
-
-
-with (HERE / "labextension" / "package.json").open() as fid:
- data = json.load(fid)
-
-
-def _jupyter_labextension_paths():
- return [{
- "src": "labextension",
- "dest": data["name"]
- }]
-
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/_version.py b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/_version.py
deleted file mode 100644
index 30cfd6b1021..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/_version.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import json
-from pathlib import Path
-
-__all__ = ["__version__"]
-
-def _fetchVersion():
- HERE = Path(__file__).parent.resolve()
-
- for settings in HERE.rglob("package.json"):
- try:
- with settings.open() as f:
- version = json.load(f)["version"]
- return (
- version.replace("-alpha.", "a")
- .replace("-beta.", "b")
- .replace("-rc.", "rc")
- )
- except FileNotFoundError:
- pass
-
- raise FileNotFoundError(f"Could not find package.json under dir {HERE!s}")
-
-__version__ = _fetchVersion()
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/build_log.json b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/build_log.json
deleted file mode 100644
index 30f46e6eba0..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/build_log.json
+++ /dev/null
@@ -1,651 +0,0 @@
-[
- {
- "bail": false,
- "module": {
- "rules": [
- {
- "test": {},
- "use": [
- "style-loader",
- "css-loader"
- ]
- },
- {
- "test": {},
- "use": "raw-loader"
- },
- {
- "test": {},
- "use": "raw-loader"
- },
- {
- "test": {},
- "use": "file-loader"
- },
- {
- "test": {},
- "use": "file-loader"
- },
- {
- "test": {},
- "use": "url-loader?limit=10000&mimetype=application/font-woff"
- },
- {
- "test": {},
- "use": "url-loader?limit=10000&mimetype=application/font-woff"
- },
- {
- "test": {},
- "use": "url-loader?limit=10000&mimetype=application/octet-stream"
- },
- {
- "test": {},
- "use": "file-loader"
- },
- {
- "test": {},
- "issuer": {},
- "use": {
- "loader": "svg-url-loader",
- "options": {
- "encoding": "none",
- "limit": 10000
- }
- }
- },
- {
- "test": {},
- "issuer": {},
- "use": {
- "loader": "raw-loader"
- }
- },
- {
- "test": {},
- "type": "javascript/auto"
- },
- {
- "test": {},
- "resolve": {
- "fullySpecified": false
- }
- },
- {
- "test": {},
- "resolve": {
- "fullySpecified": false
- }
- },
- {
- "test": {},
- "use": "file-loader"
- }
- ]
- },
- "resolve": {
- "alias": {},
- "fallback": {
- "url": false,
- "buffer": false,
- "crypto": false,
- "path": "/home/demo/longxin/neural_compressor_ext_lab/node_modules/path-browserify/index.js",
- "process": "/home/demo/longxin/neural_compressor_ext_lab/node_modules/process/browser.js"
- }
- },
- "watchOptions": {
- "poll": 500,
- "aggregateTimeout": 1000
- },
- "output": {
- "hashFunction": "sha256",
- "filename": "[name].[contenthash].js",
- "path": "/home/demo/longxin/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static",
- "publicPath": "auto"
- },
- "plugins": [
- {
- "definitions": {
- "process": "process/browser"
- }
- },
- {
- "_options": {
- "name": "neural_compressor_ext_lab",
- "library": {
- "type": "var",
- "name": [
- "_JUPYTERLAB",
- "neural_compressor_ext_lab"
- ]
- },
- "filename": "remoteEntry.[contenthash].js",
- "exposes": {
- "./index": "/home/demo/longxin/neural_compressor_ext_lab/lib/index.js",
- "./extension": "/home/demo/longxin/neural_compressor_ext_lab/lib/index.js",
- "./style": "/home/demo/longxin/neural_compressor_ext_lab/style/index.js"
- },
- "shared": {
- "@jupyterlab/application": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/application-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/apputils-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/cell-toolbar-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/celltags-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/codemirror-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/completer-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/console-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/coreutils": {
- "requiredVersion": "^5.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/csvviewer-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/debugger-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/docmanager-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/docprovider-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/documentsearch-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/extensionmanager-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/filebrowser-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/fileeditor-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/help-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/htmlviewer-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/hub-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/imageviewer-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/inspector-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/javascript-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/json-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/launcher-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/logconsole-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/mainmenu-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/markdownviewer-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/mathjax2-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/notebook-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/pdf-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/rendermime-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/running-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/settingeditor-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/shortcuts-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/statusbar-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/terminal-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/theme-dark-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/theme-light-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/toc-extension": {
- "requiredVersion": "^5.4.7",
- "import": false
- },
- "@jupyterlab/tooltip-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/translation-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/ui-components-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/vdom-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/vega5-extension": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/apputils": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/attachments": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/cell-toolbar": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/cells": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/celltags": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/codeeditor": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/codemirror": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/completer": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/console": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/csvviewer": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/debugger": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/docmanager": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/docprovider": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/docregistry": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/documentsearch": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/extensionmanager": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/filebrowser": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/fileeditor": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/htmlviewer": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/imageviewer": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/inspector": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/launcher": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/logconsole": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/mainmenu": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/markdownviewer": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/mathjax2": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/metapackage": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/nbconvert-css": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/nbformat": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/notebook": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/observables": {
- "requiredVersion": "^4.4.7",
- "import": false
- },
- "@jupyterlab/outputarea": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/property-inspector": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/rendermime": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/rendermime-interfaces": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/running": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@jupyterlab/services": {
- "requiredVersion": "^6.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/settingeditor": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/settingregistry": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/shared-models": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/statedb": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/statusbar": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/terminal": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/toc": {
- "requiredVersion": "^5.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/tooltip": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/translation": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/ui-components": {
- "requiredVersion": "^3.4.7",
- "import": false,
- "singleton": true
- },
- "@jupyterlab/vdom": {
- "requiredVersion": "^3.4.7",
- "import": false
- },
- "@lumino/algorithm": {
- "requiredVersion": "^1.9.0",
- "import": false,
- "singleton": true
- },
- "@lumino/application": {
- "requiredVersion": "^1.27.0",
- "import": false,
- "singleton": true
- },
- "@lumino/commands": {
- "requiredVersion": "^1.19.0",
- "import": false,
- "singleton": true
- },
- "@lumino/coreutils": {
- "requiredVersion": "^1.11.0",
- "import": false,
- "singleton": true
- },
- "@lumino/disposable": {
- "requiredVersion": "^1.10.0",
- "import": false,
- "singleton": true
- },
- "@lumino/domutils": {
- "requiredVersion": "^1.8.0",
- "import": false,
- "singleton": true
- },
- "@lumino/dragdrop": {
- "requiredVersion": "^1.13.0",
- "import": false,
- "singleton": true
- },
- "@lumino/messaging": {
- "requiredVersion": "^1.10.0",
- "import": false,
- "singleton": true
- },
- "@lumino/properties": {
- "requiredVersion": "^1.8.0",
- "import": false,
- "singleton": true
- },
- "@lumino/signaling": {
- "requiredVersion": "^1.10.0",
- "import": false,
- "singleton": true
- },
- "@lumino/virtualdom": {
- "requiredVersion": "^1.14.0",
- "import": false,
- "singleton": true
- },
- "@lumino/widgets": {
- "requiredVersion": "^1.33.0",
- "import": false,
- "singleton": true
- },
- "react": {
- "requiredVersion": "^17.0.1",
- "import": false,
- "singleton": true
- },
- "react-dom": {
- "requiredVersion": "^17.0.1",
- "import": false,
- "singleton": true
- },
- "yjs": {
- "requiredVersion": "^13.5.17",
- "import": false,
- "singleton": true
- },
- "@phosphor/commands": {},
- "@types/lerna__child-process": {},
- "@types/shelljs": {},
- "ajv": {},
- "ajv-keywords": {},
- "assert": {},
- "browserify-fs": {},
- "brython": {},
- "buffer": {},
- "cacheable-request": {},
- "child_process": {},
- "clone-response": {},
- "constants": {},
- "css-loader": {},
- "duplicate-package-checker-webpack-plugin": {},
- "enhanced-resolve": {},
- "es-abstract": {},
- "es-to-primitive": {},
- "fs": {},
- "got": {},
- "has": {},
- "icss-utils": {},
- "react-sanitized-html": {},
- "sanitize-html": {},
- "shelljs": {},
- "stream": {},
- "util": {},
- "neural_compressor_ext_lab": {
- "version": "0.1.0",
- "singleton": true,
- "import": "/home/demo/longxin/neural_compressor_ext_lab/lib/index.js"
- }
- }
- }
- },
- {}
- ],
- "mode": "development",
- "devtool": "source-map",
- "entry": {}
- }
-]
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/package.json b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/package.json
deleted file mode 100644
index b5d8d15ca60..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/package.json
+++ /dev/null
@@ -1,164 +0,0 @@
-{
- "name": "jupyter-lab-neural-compressor",
- "version": "1.0.2",
- "description": "Intel® Neural Compressor auto-quantization plugin.",
- "keywords": [
- "jupyter",
- "jupyterlab",
- "jupyterlab-extension"
- ],
- "homepage": "https://github.com/intel/neural-compressor/tree/master/neural_coder/extensions/neural_compressor_ext_lab",
- "bugs": {
- "url": "https://github.com/intel/neural-compressor/tree/master/neural_coder/extensions/neural_compressor_ext_lab"
- },
- "license": "BSD-3-Clause",
- "author": {
- "name": "neural_compressor_ext_lab",
- "email": "inc.maintainers@intel.com"
- },
- "files": [
- "lib/**/*.{d.ts,eot,gif,html,jpg,js,js.map,json,png,svg,woff2,ttf}",
- "style/**/*.{css,js,eot,gif,html,jpg,json,png,svg,woff2,ttf}"
- ],
- "main": "lib/index.js",
- "types": "lib/index.d.ts",
- "style": "style/index.css",
- "repository": {
- "type": "git",
- "url": "https://github.com/intel/neural-compressor.git"
- },
- "scripts": {
- "preinstall": "npx npm-force-resolutions",
- "build": "jlpm build:lib && jlpm build:labextension:dev",
- "build:prod": "jlpm clean && jlpm build:lib && jlpm build:labextension",
- "build:labextension": "jupyter labextension build .",
- "build:labextension:dev": "jupyter labextension build --development True .",
- "build:lib": "tsc",
- "clean": "jlpm clean:lib",
- "clean:lib": "rimraf lib tsconfig.tsbuildinfo",
- "clean:lintcache": "rimraf .eslintcache .stylelintcache",
- "clean:labextension": "rimraf neural_compressor_ext_lab/labextension",
- "clean:all": "jlpm clean:lib && jlpm clean:labextension && jlpm clean:lintcache",
- "eslint": "jlpm eslint:check --fix",
- "eslint:check": "eslint . --cache --ext .ts,.tsx",
- "install:extension": "jlpm build",
- "lint": "jlpm stylelint && jlpm prettier && jlpm eslint",
- "lint:check": "jlpm stylelint:check && jlpm prettier:check && jlpm eslint:check",
- "prettier": "jlpm prettier:base --write --list-different",
- "prettier:base": "prettier \"**/*{.ts,.tsx,.js,.jsx,.css,.json,.md}\"",
- "prettier:check": "jlpm prettier:base --check",
- "stylelint": "jlpm stylelint:check --fix",
- "stylelint:check": "stylelint --cache \"style/**/*.css\"",
- "watch": "run-p watch:src watch:labextension",
- "watch:src": "tsc -w",
- "watch:labextension": "jupyter labextension watch ."
- },
- "dependencies": {
- "@jupyterlab/application": "^3.4.7",
- "@jupyterlab/apputils": "^3.4.7",
- "@jupyterlab/cells": "^3.4.7",
- "@jupyterlab/coreutils": "^5.4.7",
- "@jupyterlab/docregistry": "^3.4.7",
- "@jupyterlab/fileeditor": "^3.4.6",
- "@jupyterlab/mainmenu": "^3.4.6",
- "@jupyterlab/notebook": "^3.4.7",
- "@jupyterlab/services": "^6.4.7",
- "@jupyterlab/settingregistry": "^3.4.7",
- "@jupyterlab/statedb": "^3.4.7",
- "@lumino/coreutils": "^1.12.1",
- "@lumino/messaging": "^1.10.2",
- "@phosphor/commands": "^1.7.2",
- "@types/lerna__child-process": "^5.1.0",
- "@types/shelljs": "^0.8.11",
- "ajv": "^8.11.0",
- "ajv-keywords": "^5.1.0",
- "assert": "^2.0.0",
- "browserify-fs": "^1.0.0",
- "brython": "^3.10.6",
- "buffer": "^6.0.3",
- "cacheable-request": "^10.1.2",
- "child_process": "^1.0.2",
- "clone-response": "^2.0.0",
- "constants": "^0.0.2",
- "css-loader": "^6.7.1",
- "duplicate-package-checker-webpack-plugin": "^3.0.0",
- "enhanced-resolve": "^5.10.0",
- "es-abstract": "^1.20.2",
- "es-to-primitive": "^1.2.1",
- "fs": "^0.0.1-security",
- "has": "^1.0.3",
- "icss-utils": "^5.1.0",
- "loader-utils": "^2.0.3",
- "react": "^17.0.2",
- "react-sanitized-html": "^2.0.0",
- "sanitize-html": "^2.7.2",
- "shelljs": "^0.8.5",
- "stream": "^0.0.2",
- "util": "^0.12.4"
- },
- "devDependencies": {
- "@jupyterlab/builder": "^3.1.0",
- "@types/fs-extra": "^9.0.13",
- "@types/node": "^18.7.15",
- "@typescript-eslint/eslint-plugin": "^4.8.1",
- "@typescript-eslint/parser": "^4.8.1",
- "bl": "^1.2.3",
- "eslint": "^7.14.0",
- "eslint-config-prettier": "^6.15.0",
- "eslint-plugin-prettier": "^3.1.4",
- "got": "^12.1.0",
- "npm-run-all": "^4.1.5",
- "prettier": "^2.1.1",
- "rimraf": "^3.0.2",
- "semver": "^5.7.2",
- "stylelint": "^15.6.0",
- "stylelint-config-prettier": "^9.0.3",
- "stylelint-config-recommended": "^12.0.0",
- "stylelint-config-standard": "~33.0.0",
- "stylelint-prettier": "^2.0.0",
- "typescript": "~4.1.3"
- },
- "sideEffects": [
- "style/*.css",
- "style/index.js"
- ],
- "styleModule": "style/index.js",
- "publishConfig": {
- "access": "public"
- },
- "jupyterlab": {
- "extension": true,
- "outputDir": "neural_compressor_ext_lab/labextension",
- "_build": {
- "load": "static/remoteEntry.34f9ad20791fd484f052.js",
- "extension": "./extension",
- "style": "./style"
- }
- },
- "browser": {
- "child_process": false,
- "lerna__child_process": false,
- "fs": false,
- "path": false,
- "os": false
- },
- "jupyter-releaser": {
- "hooks": {
- "before-build-npm": [
- "python -m pip install jupyterlab~=3.1",
- "jlpm"
- ],
- "before-build-python": [
- "jlpm clean:all"
- ]
- }
- },
- "resolutions": {
- "got": "^12.1.0",
- "semver": "^5.7.2",
- "bl": "^1.2.3",
- "loader-utils": "^2.0.3",
- "json5": "^2.2.2",
- "jsonwebtoken": "^9.0.0"
- }
-}
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.0c0187df9df8bc38b9c5.js b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.0c0187df9df8bc38b9c5.js
deleted file mode 100644
index 644b1b8bff1..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.0c0187df9df8bc38b9c5.js
+++ /dev/null
@@ -1,767 +0,0 @@
-"use strict";
-(self["webpackChunkneural_compressor_ext_lab"] = self["webpackChunkneural_compressor_ext_lab"] || []).push([["lib_index_js"],{
-
-/***/ "./lib/constants.js":
-/*!**************************!*\
- !*** ./lib/constants.js ***!
- \**************************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "Constants": () => (/* binding */ Constants)
-/* harmony export */ });
-var Constants;
-(function (Constants) {
- Constants.SHORT_PLUGIN_NAME = 'neural_compressor_ext_lab';
- Constants.WORK_PATH = "neural_coder_workspace/";
- Constants.ICON_FORMAT_ALL_SVG = ' ';
- Constants.ICON_RUN = ' ';
- Constants.SVG = ' ';
- Constants.LONG_PLUGIN_NAME = `@rya/${Constants.SHORT_PLUGIN_NAME}`;
- Constants.SETTINGS_SECTION = `${Constants.LONG_PLUGIN_NAME}:settings`;
- Constants.COMMAND_SECTION_NAME = 'Jupyterlab Code Optimizer';
- Constants.PLUGIN_VERSION = '0.1.0';
-})(Constants || (Constants = {}));
-
-
-/***/ }),
-
-/***/ "./lib/deepcoder.js":
-/*!**************************!*\
- !*** ./lib/deepcoder.js ***!
- \**************************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "JupyterlabNotebookCodeOptimizer": () => (/* binding */ JupyterlabNotebookCodeOptimizer)
-/* harmony export */ });
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! @jupyterlab/notebook */ "webpack/sharing/consume/default/@jupyterlab/notebook");
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var _utils__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./utils */ "./lib/utils.js");
-/* harmony import */ var _constants__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./constants */ "./lib/constants.js");
-
-
-
-class JupyterlabCodeOptimizer {
- constructor(panel) {
- this.working = false;
- this.panel = panel;
- this.tmp_path = "tmp.py";
- this.rand = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].GetRandomNum(0, 200);
- this.log_path = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.WORK_PATH + "NeuralCoder" + this.rand + ".log";
- this.tmp_log_path = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.WORK_PATH + "NeuralCoder_tmp" + ".log";
- this.cells = [];
- }
- async optimizeCode(code, formatter, name, next, options, notebook, panel, cell, run) {
- let codes = [];
- code.forEach(function (value) {
- value = value.replace(/('\\n')/g, '^^^');
- value = value.replace(/\\n"/g, '###');
- value = value.replace(/\\n'/g, '###');
- value = value.replace(/"\\n/g, '@@');
- value = value.replace(/'\\n/g, '@@');
- value = value.replace(/\n/g, '\\n');
- value = value.replace(/"/g, '+++');
- value = value.replace(/,/g, '$');
- codes.push(value);
- });
- let gen_code = `code = "${codes}"\ncodes = code.split(',')\nwith open( '${this.tmp_path}', 'w+' ) as f:\n for i in range(0,len(codes)):\n f.write('# this is the beginning of a single code snippet\\n')\n code_list = codes[i].replace('$',',').replace('+++','\"').split('\\n')\n for line in code_list:\n if('split(^^^)' in line):\n line=line.replace('split(^^^)', 'split(\\'\\\\n\\')')\n if('###' in line):\n line=line.replace('###', '\\\\n\"')\n if('@@' in line):\n line=line.replace('@@', '\"\\\\n')\n f.write(line+'\\n')`;
- const expr = { code_list: `code_list` };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, gen_code, expr, false);
- if (options === 'normal') {
- let runcode = `from neural_coder import enable\nenable(code="${this.tmp_path}",features=["${formatter}"], overwrite=True)`;
- let expr = { sum: ` ` };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let run_code1 = `with open("${this.tmp_path}", 'r') as f:\n optimized_code = f.read()\n`;
- let expr1 = { optimizedCode: "optimized_code" };
- let result2 = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, run_code1, expr1, false);
- result2.then(value => {
- var _a, _b, _c, _d;
- let optimizedTexts = Object.values(value.optimizedCode.data)[0];
- let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\n').slice(1);
- optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);
- for (let i = 0; i < optimizeCodes.length; ++i) {
- const cell = this.cells[i];
- const currentTexts = this.cells.map(cell => cell.model.value.text);
- const currentText = currentTexts[i];
- let optimizedtext = optimizeCodes[i];
- optimizedtext = optimizedtext.replace(/\\'\\\\n\\'/g, "^^^");
- optimizedtext = optimizedtext.replace(/\\\\n"/g, "+++");
- optimizedtext = optimizedtext.replace(/\\\\n'/g, "+++");
- optimizedtext = optimizedtext.replace(/"\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/'\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/\\n/g, '\n');
- optimizedtext = optimizedtext.replace(/\\'/g, "'");
- optimizedtext = optimizedtext.replace(/\^\^\^/g, "'\\n'");
- optimizedtext = optimizedtext.replace(/\+\+\+/g, "\\n\"");
- optimizedtext = optimizedtext.replace(/\@\@\@/g, "\"\\n");
- if (cell.model.value.text === currentText) {
- cell.model.value.text = optimizedtext;
- }
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.ICON_RUN;
- (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);
- }
- });
- }
- else {
- if (formatter === '') {
- if (this.markdown) {
- this.markdown.model.value.text += "[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ...... \n";
- }
- // cell.outputArea.node.innerText += "[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\n"
- let runcode1 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\\n")`;
- let expr1 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode1, expr1, false);
- let runcode = `from neural_coder import enable\nperfomance, mode, path = enable(code="${this.tmp_path}",features=[], run_bench=True, args="${options}")\nwith open(path + '/bench.log', 'r') as f:\n logs = f.readlines()\nlog_line = logs[4]\nlog = log_line.split("[")[1].split("]")[0]`;
- let expr = { path: "path", log: "log" };
- let result = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let fps;
- result.then(value => {
- fps = Object.values(value.log.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second) \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\n`
- let text = `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\\n`;
- let runcode = `with open("${this.log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\n`
- let runcode1 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n")`;
- let expr1 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode1, expr1, false);
- let runcode2 = `with open("${this.tmp_log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr2 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- });
- }
- else {
- let runcode = `from neural_coder import enable\nperfomance, mode, path = enable(code="${this.tmp_path}", features=["${formatter}"], run_bench=True, args="${options}")\nwith open(path + '/bench.log', 'r') as f:\n logs = f.readlines()\nlog_line = logs[4]\nlog = log_line.split("[")[1].split("]")[0]`;
- let expr = { path: "path", log: "log" };
- let result = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let fps;
- result.then(value => {
- fps = Object.values(value.log.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second) \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (FPS)\n`
- let text = `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second)\\n`;
- let runcode = `with open("${this.log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- if (next !== '') {
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\n`
- let runcode2 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n")`;
- let expr2 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- }
- let runcode3 = `with open("${this.tmp_log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr3 = { path: "" };
- let res_tmp = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);
- res_tmp.then(value => {
- if (formatter === 'pytorch_inc_bf16') {
- let read_log = `import re\nwith open("${this.tmp_log_path}", 'r') as f:\n logs = f.readlines()\n fps_list=[]\n for log_line in logs[-4:]:\n pat = re.compile(r\'\\d+\\.?\\d+')\n fps = re.findall(pat,log_line)[-1]\n fps_list.append(float(fps))\nmaxi = max(fps_list)\nindex = fps_list.index(maxi)\nboost = round(maxi/fps_list[0],1)\nfeatures=['','pytorch_inc_static_quant_fx','pytorch_inc_dynamic_quant','pytorch_inc_bf16']\nfeature_name=['Original Model','INC Enable INT8 (Static)','INC Enable INT8 (Dynamic)','INC Enable BF16']\nbest_feature = features[index]\nbest_name = feature_name[index]\nfeature_l = []\nfeature_l.append(best_feature)\nfrom neural_coder import enable\nenable(code="${this.tmp_path}",features=feature_l, overwrite=True)\nwith open("${this.tmp_path}", 'r') as f:\n optimized_code = f.read()\n`;
- let read_expr = { boost: "boost", best_feature: "best_feature", best_name: "best_name", optimizeCode: "optimized_code", feature_l: "fps_list", maxi: "maxi", index: "index" };
- let read_result = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, read_log, read_expr, false);
- read_result.then(value => {
- var _a, _b, _c, _d;
- console.log("resres", value);
- let boost = Object.values(value.boost.data)[0];
- let best_name = Object.values(value.best_name.data)[0];
- let optimizedTexts = Object.values(value.optimizeCode.data)[0];
- let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\n').slice(1);
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] The Best Intel Optimization: ${best_name} \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] You can get up to ${boost}X performance boost. \n`;
- }
- // cell.outputArea.node.innerText +=`[NeuralCoder INFO] The Best Intel Optimization: ${best_name}\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] You can get up to ${boost}X performance boost.\n`
- optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);
- for (let i = 0; i < optimizeCodes.length; ++i) {
- const cell = this.cells[i];
- const currentTexts = this.cells.map(cell => cell.model.value.text);
- const currentText = currentTexts[i];
- let optimizedtext = optimizeCodes[i];
- optimizedtext = optimizedtext.replace(/\\'\\\\n\\'/g, "^^^");
- optimizedtext = optimizedtext.replace(/\\\\n"/g, "+++");
- optimizedtext = optimizedtext.replace(/\\\\n'/g, "+++");
- optimizedtext = optimizedtext.replace(/"\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/'\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/\\n/g, '\n');
- optimizedtext = optimizedtext.replace(/\\'/g, "'");
- optimizedtext = optimizedtext.replace(/\^\^\^/g, "'\\n'");
- optimizedtext = optimizedtext.replace(/\+\+\+/g, "\\n\"");
- optimizedtext = optimizedtext.replace(/\@\@\@/g, "\"\\n");
- if (cell.model.value.text === currentText) {
- cell.model.value.text = optimizedtext;
- }
- }
- // if(this.markdown){
- // this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: 4th Gen Intel Xeon Scalable processor with AMX \n`
- // this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log \n`
- // }
- let command = "lscpu | grep 'Model name'";
- let get_hardware = `import subprocess\nsubp = subprocess.Popen("${command}",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8")\nsubp.wait(2)\nhardware = subp.communicate()[0].replace("Model name:","").strip()`;
- let expr_hardware = { hardware: "hardware" };
- let hard_res = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, get_hardware, expr_hardware, false);
- hard_res.then(value => {
- let hard = Object.values(value.hardware.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: ${hard} \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] HardWare: ${hard}\n`
- });
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log\n`
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.ICON_RUN;
- (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);
- });
- }
- });
- });
- }
- }
- }
-}
-class JupyterlabNotebookCodeOptimizer extends JupyterlabCodeOptimizer {
- constructor(notebookTracker, panel) {
- super(panel);
- this.notebookTracker = notebookTracker;
- this.notebookname = '';
- }
- async optimizeAction(config, formatter) {
- return this.optimizeCells(true, config, formatter);
- }
- async optimizeAllCodeCells(config, formatter, notebook, run) {
- return this.optimizeCells(false, config, formatter, notebook, run);
- }
- getCodeCells(ifmarkdown = true, notebook) {
- if (!this.notebookTracker.currentWidget) {
- return [];
- }
- const codeCells = [];
- notebook = notebook || this.notebookTracker.currentWidget.content;
- this.notebookname = notebook.title.label;
- let count = 0;
- notebook.widgets.forEach((cell) => {
- if (cell.model.type === 'code') {
- count += 1;
- codeCells.push(cell);
- }
- });
- if (ifmarkdown) {
- _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__.NotebookActions.insertBelow(notebook);
- this.notebookTracker.currentWidget.content.activeCellIndex = count + 1;
- _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__.NotebookActions.changeCellType(notebook, 'markdown');
- const activeCell = notebook.activeCell;
- if (activeCell) {
- this.markdown = activeCell;
- }
- }
- this.cells = codeCells;
- return codeCells;
- }
- async optimizeCells(selectedOnly, config, formatter, notebook, run) {
- if (this.working) {
- return new Promise((resolve, reject) => {
- resolve("false!");
- });
- }
- console.log("arrive here 333");
- this.working = true;
- const optimize_type = formatter !== undefined ? formatter : 'pytorch_mixed_precision_cpu';
- if (optimize_type === 'auto-quant') {
- selectedOnly = true;
- }
- else {
- selectedOnly = false;
- }
- const selectedCells = this.getCodeCells(selectedOnly, notebook);
- let cell = selectedCells[selectedCells.length - 1];
- if (selectedCells.length === 0) {
- this.working = false;
- return new Promise((resolve, reject) => {
- resolve("false!");
- });
- }
- const currentTexts = selectedCells.map(cell => cell.model.value.text);
- if (optimize_type === 'auto-quant') {
- console.log("arrive here 444-111");
- if (this.markdown) {
- this.markdown.model.value.text = `[NeuralCoder INFO] Auto-Quant Started ...... \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook "${this.notebookname}" \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Mode: Throughput \n`;
- }
- // cell.outputArea.node.innerText = `[NeuralCoder INFO] Auto-Quant Started ......\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook "${this.notebookname}"\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Mode: Throughput\n`
- let runcode = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Auto-Quant Started ......\\n")`;
- let expr = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- let runcode2 = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Code: User code from Jupyter Lab notebook '${this.notebookname}'\\n")`;
- let expr2 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- let runcode3 = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Benchmark Mode: Throughput\\n")`;
- let expr3 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);
- // cell.outputArea.node.setAttribute("class","pad")
- await this.optimizeCode(currentTexts, '', 'The Original Model', 'INC Enable INT8 (Static)', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_static_quant_fx', 'INC Enable INT8 (Static)', 'INC Enable INT8 (Dynamic)', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_dynamic_quant', 'INC Enable INT8 (Dynamic)', 'INC Enable BF16', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_bf16', 'INC Enable BF16', '', config, true, this.panel, cell, run);
- }
- else {
- console.log("arrive here 444-222");
- await this.optimizeCode(currentTexts, optimize_type, "", "", "normal", true, this.panel, cell, run);
- }
- this.working = false;
- console.log("arrive here 555");
- return new Promise((resolve, reject) => {
- resolve("success!");
- });
- }
- applicable(formatter, currentWidget) {
- const currentNotebookWidget = this.notebookTracker.currentWidget;
- return currentNotebookWidget && currentWidget === currentNotebookWidget;
- }
-}
-
-
-/***/ }),
-
-/***/ "./lib/index.js":
-/*!**********************!*\
- !*** ./lib/index.js ***!
- \**********************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "default": () => (__WEBPACK_DEFAULT_EXPORT__)
-/* harmony export */ });
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! @jupyterlab/notebook */ "webpack/sharing/consume/default/@jupyterlab/notebook");
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! @jupyterlab/apputils */ "webpack/sharing/consume/default/@jupyterlab/apputils");
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__);
-/* harmony import */ var _jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! @jupyterlab/settingregistry */ "webpack/sharing/consume/default/@jupyterlab/settingregistry");
-/* harmony import */ var _jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2__);
-/* harmony import */ var _jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! @jupyterlab/mainmenu */ "webpack/sharing/consume/default/@jupyterlab/mainmenu");
-/* harmony import */ var _jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3__);
-/* harmony import */ var _jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! @jupyterlab/ui-components */ "webpack/sharing/consume/default/@jupyterlab/ui-components");
-/* harmony import */ var _jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4__);
-/* harmony import */ var _lumino_widgets__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! @lumino/widgets */ "webpack/sharing/consume/default/@lumino/widgets");
-/* harmony import */ var _lumino_widgets__WEBPACK_IMPORTED_MODULE_5___default = /*#__PURE__*/__webpack_require__.n(_lumino_widgets__WEBPACK_IMPORTED_MODULE_5__);
-/* harmony import */ var _deepcoder__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./deepcoder */ "./lib/deepcoder.js");
-/* harmony import */ var _constants__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./constants */ "./lib/constants.js");
-
-
-
-
-
-
-
-
-class neural_compressor_ext_lab {
- constructor(app, tracker, notebookpanel) {
- this.app = app;
- this.tracker = tracker;
- this.notebookpanel = notebookpanel;
- this.setupWidgetExtension();
- this.config = '';
- }
- createNew(nb) {
- this.notebookpanel = nb;
- this.notebookCodeOptimizer = new _deepcoder__WEBPACK_IMPORTED_MODULE_6__.JupyterlabNotebookCodeOptimizer(this.tracker, this.notebookpanel);
- const svg = document.createElement("svg");
- svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.ICON_FORMAT_ALL_SVG;
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.ICON_RUN;
- const div = document.createElement("div");
- div.setAttribute("class", "wrapper");
- const span = document.createElement("span");
- span.setAttribute("class", "f1ozlkqi");
- span.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.SVG;
- const selector = document.createElement("select");
- selector.setAttribute("class", "aselector");
- selector.id = "NeuralCoder";
- const option1 = document.createElement("option");
- option1.value = "pytorch_inc_static_quant_fx";
- option1.innerText = "INC Enable INT8 (Static)";
- option1.selected = true;
- const option2 = document.createElement("option");
- option2.value = "pytorch_inc_dynamic_quant";
- option2.innerText = "INC Enable INT8 (Dynamic)";
- const option3 = document.createElement("option");
- option3.value = "pytorch_inc_bf16";
- option3.innerText = "INC Enable BF16";
- const option4 = document.createElement("option");
- option4.value = "auto-quant";
- option4.innerText = "INC Auto Enable & Benchmark";
- selector.options.add(option1);
- selector.options.add(option2);
- selector.options.add(option3);
- selector.options.add(option4);
- div.appendChild(selector);
- div.appendChild(span);
- const selector_widget = new _lumino_widgets__WEBPACK_IMPORTED_MODULE_5__.Widget();
- selector_widget.node.appendChild(div);
- selector_widget.addClass("aselector");
- let notebookCodeOptimizer = this.notebookCodeOptimizer;
- let config = this.config;
- const dia_input = document.createElement("input");
- const dia_widget = new _lumino_widgets__WEBPACK_IMPORTED_MODULE_5__.Widget();
- dia_widget.node.appendChild(dia_input);
- dia_widget.addClass("dialog");
- const run_button = new _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__.ToolbarButton({
- tooltip: 'NeuralCoder',
- icon: new _jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4__.LabIcon({
- name: "run",
- svgstr: _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.ICON_RUN
- }),
- onClick: async function () {
- var _a, _b, _c, _d;
- console.log("arrive here 111");
- (_d = (_c = (_b = (_a = run_button.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(svg);
- if (selector.options[selector.selectedIndex].value === 'auto-quant') {
- await (0,_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__.showDialog)({
- title: 'Please input execute parameters:',
- body: dia_widget,
- buttons: [_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__.Dialog.okButton({ label: 'Confirm' })]
- }).then(result => {
- if (result.button.accept) {
- config = dia_input.value;
- }
- });
- }
- console.log("arrive here 222");
- await notebookCodeOptimizer.optimizeAllCodeCells(config, selector.options[selector.selectedIndex].value, undefined, run_button);
- }
- });
- nb.toolbar.insertItem(11, "nc", run_button);
- nb.toolbar.insertItem(12, "selector", selector_widget);
- }
- setupWidgetExtension() {
- this.app.docRegistry.addWidgetExtension('Notebook', this);
- }
-}
-/**
- * Initialization data for the neural_compressor_ext_lab extension.
- */
-const plugin = {
- id: 'neural_compressor_ext_lab:plugin',
- autoStart: true,
- requires: [_jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__.INotebookTracker, _jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3__.IMainMenu],
- optional: [_jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2__.ISettingRegistry],
- activate: (app, tracker, notebookpanel) => {
- new neural_compressor_ext_lab(app, tracker, notebookpanel);
- console.log('JupyterLab extension neural_compressor_ext_lab is activated!');
- }
-};
-/* harmony default export */ const __WEBPACK_DEFAULT_EXPORT__ = (plugin);
-
-
-/***/ }),
-
-/***/ "./lib/utils.js":
-/*!**********************!*\
- !*** ./lib/utils.js ***!
- \**********************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "default": () => (/* binding */ NotebookUtilities)
-/* harmony export */ });
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! @jupyterlab/apputils */ "webpack/sharing/consume/default/@jupyterlab/apputils");
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var react_sanitized_html__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! react-sanitized-html */ "webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html");
-/* harmony import */ var react_sanitized_html__WEBPACK_IMPORTED_MODULE_1___default = /*#__PURE__*/__webpack_require__.n(react_sanitized_html__WEBPACK_IMPORTED_MODULE_1__);
-/* harmony import */ var react__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! react */ "webpack/sharing/consume/default/react");
-/* harmony import */ var react__WEBPACK_IMPORTED_MODULE_2___default = /*#__PURE__*/__webpack_require__.n(react__WEBPACK_IMPORTED_MODULE_2__);
-/*
- * Copyright 2019-2020 The Kale Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// @ts-ignore
-
-
-class NotebookUtilities {
- /**
- * generate random number
- * @Min
- * @Max
- */
- static GetRandomNum(Min, Max) {
- let Range;
- Range = Max - Min;
- var Rand = Math.random();
- return (Min + Math.round(Rand * Range));
- }
- /**
- * Builds an HTML container by sanitizing a list of strings and converting
- * them in valid HTML
- * @param msg A list of string with HTML formatting
- * @returns a HTMLDivElement composed of a list of spans with formatted text
- */
- static buildDialogBody(msg) {
- return (react__WEBPACK_IMPORTED_MODULE_2__.createElement("div", null, msg.map((s, i) => {
- return (react__WEBPACK_IMPORTED_MODULE_2__.createElement(react__WEBPACK_IMPORTED_MODULE_2__.Fragment, { key: `msg-${i}` },
- react__WEBPACK_IMPORTED_MODULE_2__.createElement((react_sanitized_html__WEBPACK_IMPORTED_MODULE_1___default()), { allowedAttributes: { a: ['href'] }, allowedTags: ['b', 'i', 'em', 'strong', 'a', 'pre'], html: s }),
- react__WEBPACK_IMPORTED_MODULE_2__.createElement("br", null)));
- })));
- }
- /**
- * Opens a pop-up dialog in JupyterLab to display a simple message.
- * @param title The title for the message popup
- * @param msg The message as an array of strings
- * @param buttonLabel The label to use for the button. Default is 'OK'
- * @param buttonClassName The classname to give to the 'ok' button
- * @returns Promise - A promise once the message is closed.
- */
- static async showMessage(title, msg, buttonLabel = 'Dismiss', buttonClassName = '') {
- const buttons = [
- _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.Dialog.okButton({ label: buttonLabel, className: buttonClassName }),
- ];
- const messageBody = this.buildDialogBody(msg);
- await (0,_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.showDialog)({ title, buttons, body: messageBody });
- }
- /**
- * Opens a pop-up dialog in JupyterLab to display a yes/no dialog.
- * @param title The title for the message popup
- * @param msg The message
- * @param acceptLabel The label to use for the accept button. Default is 'YES'
- * @param rejectLabel The label to use for the reject button. Default is 'NO'
- * @param yesButtonClassName The classname to give to the accept button.
- * @param noButtonClassName The classname to give to the cancel button.
- * @returns Promise - A promise once the message is closed.
- */
- static async showYesNoDialog(title, msg, acceptLabel = 'YES', rejectLabel = 'NO', yesButtonClassName = '', noButtonClassName = '') {
- const buttons = [
- _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.Dialog.okButton({ label: acceptLabel, className: yesButtonClassName }),
- _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.Dialog.cancelButton({ label: rejectLabel, className: noButtonClassName }),
- ];
- const messageBody = this.buildDialogBody(msg);
- const result = await (0,_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.showDialog)({ title, buttons, body: messageBody });
- return result.button.label === acceptLabel;
- }
- /**
- * Opens a pop-up dialog in JupyterLab with various information and button
- * triggering reloading the page.
- * @param title The title for the message popup
- * @param msg The message
- * @param buttonLabel The label to use for the button. Default is 'Refresh'
- * @param buttonClassName The classname to give to the 'refresh' button.
- * @returns Promise - A promise once the message is closed.
- */
- static async showRefreshDialog(title, msg, buttonLabel = 'Refresh', buttonClassName = '') {
- await this.showMessage(title, msg, buttonLabel, buttonClassName);
- location.reload();
- }
- /**
- * @description Creates a new JupyterLab notebook for use by the application
- * @param command The command registry
- * @returns Promise - A promise containing the notebook panel object that was created (if successful).
- */
- static async createNewNotebook(command) {
- const notebook = await command.execute('notebook:create-new', {
- activate: true,
- path: '',
- preferredLanguage: '',
- });
- await notebook.session.ready;
- return notebook;
- }
- /**
- * Safely saves the Jupyter notebook document contents to disk
- * @param notebookPanel The notebook panel containing the notebook to save
- */
- static async saveNotebook(notebookPanel) {
- if (notebookPanel) {
- await notebookPanel.context.ready;
- notebookPanel.context.save();
- return true;
- }
- return false;
- }
- /**
- * Convert the notebook contents to JSON
- * @param notebookPanel The notebook panel containing the notebook to serialize
- */
- static notebookToJSON(notebookPanel) {
- if (notebookPanel.content.model) {
- return notebookPanel.content.model.toJSON();
- }
- return null;
- }
- /**
- * @description Gets the value of a key from specified notebook's metadata.
- * @param notebookPanel The notebook to get meta data from.
- * @param key The key of the value.
- * @returns any -The value of the metadata. Returns null if the key doesn't exist.
- */
- static getMetaData(notebookPanel, key) {
- if (!notebookPanel) {
- throw new Error('The notebook is null or undefined. No meta data available.');
- }
- if (notebookPanel.model && notebookPanel.model.metadata.has(key)) {
- return notebookPanel.model.metadata.get(key);
- }
- return null;
- }
- /**
- * @description Sets the key value pair in the notebook's metadata.
- * If the key doesn't exists it will add one.
- * @param notebookPanel The notebook to set meta data in.
- * @param key The key of the value to create.
- * @param value The value to set.
- * @param save Default is false. Whether the notebook should be saved after the meta data is set.
- * Note: This function will not wait for the save to complete, it only sends a save request.
- * @returns The old value for the key, or undefined if it did not exist.
- */
- static setMetaData(notebookPanel, key, value, save = false) {
- var _a;
- if (!notebookPanel) {
- throw new Error('The notebook is null or undefined. No meta data available.');
- }
- const oldVal = (_a = notebookPanel.model) === null || _a === void 0 ? void 0 : _a.metadata.set(key, value);
- if (save) {
- this.saveNotebook(notebookPanel);
- }
- return oldVal;
- }
- // /**
- // * Get a new Kernel, not tied to a Notebook
- // * Source code here: https://github.com/jupyterlab/jupyterlab/tree/473348d25bcb258ca2f0c127dd8fb5b193217135/packages/services
- // */
- // public static async createNewKernel() {
- // // Get info about the available kernels and start a new one.
- // let options: Kernel.IOptions = await Kernel.getSpecs().then(kernelSpecs => {
- // // console.log('Default spec:', kernelSpecs.default);
- // // console.log('Available specs', Object.keys(kernelSpecs.kernelspecs));
- // // use the default name
- // return { name: kernelSpecs.default };
- // });
- // return await Kernel.startNew(options).then(_kernel => {
- // return _kernel;
- // });
- // }
- // // TODO: We can use this context manager to execute commands inside a new kernel
- // // and be sure that it will be disposed of at the end.
- // // Another approach could be to create a kale_rpc Kernel, as a singleton,
- // // created at startup. The only (possible) drawback is that we can not name
- // // a kernel instance with a custom id/name, so when refreshing JupyterLab we would
- // // not recognize the kernel. A solution could be to have a kernel spec dedicated to kale rpc calls.
- // public static async executeWithNewKernel(action: Function, args: any[] = []) {
- // // create brand new kernel
- // const _k = await this.createNewKernel();
- // // execute action inside kernel
- // const res = await action(_k, ...args);
- // // close kernel
- // _k.shutdown();
- // // return result
- // return res;
- // }
- /**
- * @description This function runs code directly in the notebook's kernel and then evaluates the
- * result and returns it as a promise.
- * @param kernel The kernel to run the code in.
- * @param runCode The code to run in the kernel.
- * @param userExpressions The expressions used to capture the desired info from the executed code.
- * @param runSilent Default is false. If true, kernel will execute as quietly as possible.
- * store_history will be set to false, and no broadcast on IOPUB channel will be made.
- * @param storeHistory Default is false. If true, the code executed will be stored in the kernel's history
- * and the counter which is shown in the cells will be incremented to reflect code was run.
- * @param allowStdIn Default is false. If true, code running in kernel can prompt user for input using
- * an input_request message.
- * @param stopOnError Default is false. If True, does not abort the execution queue, if an exception is encountered.
- * This allows the queued execution of multiple execute_requests, even if they generate exceptions.
- * @returns Promise - A promise containing the execution results of the code as an object with
- * keys based on the user_expressions.
- * @example
- * //The code
- * const code = "a=123\nb=456\nsum=a+b";
- * //The user expressions
- * const expr = {sum: "sum",prod: "a*b",args:"[a,b,sum]"};
- * //Async function call (returns a promise)
- * sendKernelRequest(notebookPanel, code, expr,false);
- * //Result when promise resolves:
- * {
- * sum:{status:"ok",data:{"text/plain":"579"},metadata:{}},
- * prod:{status:"ok",data:{"text/plain":"56088"},metadata:{}},
- * args:{status:"ok",data:{"text/plain":"[123, 456, 579]"}}
- * }
- * @see For more information on JupyterLab messages:
- * https://jupyter-client.readthedocs.io/en/latest/messaging.html#execution-results
- */
- static async sendKernelRequest(kernel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {
- if (!kernel) {
- throw new Error('Kernel is null or undefined.');
- }
- // Wait for kernel to be ready before sending request
- // await kernel.status;
- const message = await kernel.requestExecute({
- allow_stdin: allowStdIn,
- code: runCode,
- silent: runSilent,
- stop_on_error: stopOnError,
- store_history: storeHistory,
- user_expressions: userExpressions,
- }).done;
- const content = message.content;
- if (content.status !== 'ok') {
- // If response is not 'ok', throw contents as error, log code
- const msg = `Code caused an error:\n${runCode}`;
- console.error(msg);
- if (content.traceback) {
- content.traceback.forEach((line) => console.log(line.replace(/[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, '')));
- }
- throw content;
- }
- // Return user_expressions of the content
- return content.user_expressions;
- }
- /**
- * Same as method sendKernelRequest but passing
- * a NotebookPanel instead of a Kernel
- */
- static async sendKernelRequestFromNotebook(notebookPanel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {
- var _a, _b, _c, _d;
- if (!notebookPanel) {
- throw new Error('Notebook is null or undefined.');
- }
- // Wait for notebook panel to be ready
- await notebookPanel.activate;
- await ((_a = notebookPanel.sessionContext) === null || _a === void 0 ? void 0 : _a.ready);
- console.log('get kernel', (_b = notebookPanel.sessionContext.session) === null || _b === void 0 ? void 0 : _b.kernel);
- return this.sendKernelRequest((_d = (_c = notebookPanel.sessionContext) === null || _c === void 0 ? void 0 : _c.session) === null || _d === void 0 ? void 0 : _d.kernel, runCode, userExpressions, runSilent, storeHistory, allowStdIn, stopOnError);
- }
-}
-
-
-/***/ })
-
-}]);
-//# sourceMappingURL=lib_index_js.0c0187df9df8bc38b9c5.js.map
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.0c0187df9df8bc38b9c5.js.map b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.0c0187df9df8bc38b9c5.js.map
deleted file mode 100644
index a0dd93ddd68..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.0c0187df9df8bc38b9c5.js.map
+++ /dev/null
@@ -1 +0,0 @@
-{"version":3,"file":"lib_index_js.0c0187df9df8bc38b9c5.js","mappings":";;;;;;;;;;;;;AAAO;AACP;AACA;AACA;AACA,6IAA6I,gCAAgC,gBAAgB,sBAAsB,qVAAqV,mBAAmB,gVAAgV,mBAAmB;AAC95B;AACA;AACA,yCAAyC,4BAA4B;AACrE,oCAAoC,2BAA2B;AAC/D;AACA;AACA,CAAC,8BAA8B;;;;;;;;;;;;;;;;;;;ACXwB;AACf;AACA;AACxC;AACA;AACA;AACA;AACA;AACA,oBAAoB,2DAA8B;AAClD,wBAAwB,2DAAmB;AAC3C,4BAA4B,2DAAmB;AAC/C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT,kCAAkC,MAAM,0CAA0C,cAAc;AAChG,uBAAuB;AACvB,QAAQ,4EAA+C;AACvD;AACA,2EAA2E,cAAc,eAAe,UAAU;AAClH,yBAAyB;AACzB,YAAY,4EAA+C;AAC3D,0CAA0C,cAAc;AACxD,0BAA0B;AAC1B,0BAA0B,4EAA+C;AACzE;AACA;AACA;AACA;AACA;AACA,gCAAgC,0BAA0B;AAC1D;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,wCAAwC,0DAAkB;AAC1D;AACA;AACA,aAAa;AACb;AACA;AACA;AACA;AACA;AACA;AACA;AACA,6CAA6C,cAAc;AAC3D,8BAA8B;AAC9B,gBAAgB,4EAA+C;AAC/D,wGAAwG,cAAc,uCAAuC,QAAQ;AACrK,6BAA6B;AAC7B,6BAA6B,4EAA+C;AAC5E;AACA;AACA;AACA;AACA,wIAAwI,KAAK;AAC7I;AACA,uIAAuI,KAAK;AAC5I,6GAA6G,KAAK;AAClH,gDAAgD,cAAc,8BAA8B,KAAK;AACjG,iCAAiC;AACjC,oBAAoB,4EAA+C;AACnE;AACA,8GAA8G,MAAM;AACpH;AACA,6GAA6G,MAAM;AACnH,iDAAiD,cAAc,mFAAmF,MAAM;AACxJ,kCAAkC;AAClC,oBAAoB,4EAA+C;AACnE,iDAAiD,kBAAkB,kCAAkC,KAAK;AAC1G,kCAAkC;AAClC,oBAAoB,4EAA+C;AACnE,iBAAiB;AACjB;AACA;AACA,wGAAwG,cAAc,gBAAgB,UAAU,4BAA4B,QAAQ;AACpL,6BAA6B;AAC7B,6BAA6B,4EAA+C;AAC5E;AACA;AACA;AACA;AACA,kHAAkH,MAAM,KAAK,KAAK;AAClI;AACA,iHAAiH,MAAM,KAAK,KAAK;AACjI,uFAAuF,MAAM,KAAK,KAAK;AACvG,gDAAgD,cAAc,kCAAkC,KAAK;AACrG,iCAAiC;AACjC,oBAAoB,4EAA+C;AACnE;AACA;AACA,kHAAkH,MAAM;AACxH;AACA,iHAAiH,MAAM;AACvH,qDAAqD,cAAc,mFAAmF,MAAM;AAC5J,sCAAsC;AACtC,wBAAwB,4EAA+C;AACvE;AACA,iDAAiD,kBAAkB,kCAAkC,KAAK;AAC1G,kCAAkC;AAClC,kCAAkC,4EAA+C;AACjF;AACA;AACA,oEAAoE,kBAAkB,qpBAAqpB,cAAc,oDAAoD,cAAc;AAC3zB,8CAA8C;AAC9C,8CAA8C,4EAA+C;AAC7F;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,yHAAyH,YAAY;AACrI,8GAA8G,MAAM;AACpH;AACA,uHAAuH,UAAU;AACjI,6GAA6G,MAAM;AACnH;AACA,gDAAgD,0BAA0B;AAC1E;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,yJAAyJ,UAAU;AACnK;AACA;AACA,kGAAkG,QAAQ;AAC1G,sDAAsD;AACtD,+CAA+C,4EAA+C;AAC9F;AACA;AACA;AACA,0GAA0G,OAAO;AACjH,wJAAwJ,UAAU;AAClK;AACA,yGAAyG,KAAK;AAC9G,iCAAiC;AACjC,oJAAoJ,UAAU;AAC9J;AACA,oDAAoD,0DAAkB;AACtE;AACA,6BAA6B;AAC7B;AACA,qBAAqB;AACrB,iBAAiB;AACjB;AACA;AACA;AACA;AACO;AACP;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA,YAAY,6EAA2B;AACvC;AACA,YAAY,gFAA8B;AAC1C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,aAAa;AACb;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,aAAa;AACb;AACA;AACA;AACA;AACA;AACA;AACA,mHAAmH,kBAAkB;AACrI;AACA;AACA;AACA,kHAAkH,kBAAkB;AACpI;AACA,wCAAwC,cAAc;AACtD,yBAAyB;AACzB,YAAY,4EAA+C;AAC3D,yCAAyC,cAAc,gGAAgG,kBAAkB;AACzK,0BAA0B;AAC1B,YAAY,4EAA+C;AAC3D,yCAAyC,cAAc;AACvD,0BAA0B;AAC1B,YAAY,4EAA+C;AAC3D;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACtSwD;AACiB;AACV;AACd;AACG;AACX;AACqB;AACtB;AACxC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,yCAAyC,uEAA+B;AACxE;AACA,wBAAwB,qEAA6B;AACrD;AACA,4BAA4B,0DAAkB;AAC9C;AACA;AACA;AACA;AACA,yBAAyB,qDAAa;AACtC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,oCAAoC,mDAAM;AAC1C;AACA;AACA;AACA;AACA;AACA,+BAA+B,mDAAM;AACrC;AACA;AACA,+BAA+B,+DAAa;AAC5C;AACA,sBAAsB,8DAAO;AAC7B;AACA,wBAAwB,0DAAkB;AAC1C,aAAa;AACb;AACA;AACA;AACA;AACA;AACA,0BAA0B,gEAAU;AACpC;AACA;AACA,kCAAkC,iEAAe,GAAG,kBAAkB;AACtE,qBAAqB;AACrB;AACA;AACA;AACA,qBAAqB;AACrB;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,kEAAgB,EAAE,2DAAS;AAC1C,eAAe,yEAAgB;AAC/B;AACA;AACA;AACA;AACA;AACA,iEAAe,MAAM,EAAC;;;;;;;;;;;;;;;;;;;;;ACxGtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AAC0D;AAC1D;AACiD;AAClB;AAChB;AACf;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,gBAAgB,gDAAmB;AACnC,oBAAoB,gDAAmB,CAAC,2CAAc,IAAI,YAAY,EAAE,GAAG;AAC3E,gBAAgB,gDAAmB,CAAC,6DAAa,IAAI,qBAAqB,aAAa,gEAAgE;AACvJ,gBAAgB,gDAAmB;AACnC,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,YAAY,iEAAe,GAAG,gDAAgD;AAC9E;AACA;AACA,cAAc,gEAAU,GAAG,mCAAmC;AAC9D;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,YAAY,iEAAe,GAAG,mDAAmD;AACjF,YAAY,qEAAmB,GAAG,kDAAkD;AACpF;AACA;AACA,6BAA6B,gEAAU,GAAG,mCAAmC;AAC7E;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,oBAAoB;AACpB,UAAU;AACV;AACA;AACA,UAAU;AACV;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,qBAAqB;AACrB;AACA;AACA;AACA;AACA,aAAa,kBAAkB,mBAAmB,aAAa;AAC/D,cAAc,kBAAkB,qBAAqB,aAAa;AAClE,cAAc,kBAAkB;AAChC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA,kDAAkD,QAAQ;AAC1D;AACA;AACA,kGAAkG,YAAY,IAAI,IAAI,MAAM,IAAI;AAChI;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA","sources":["webpack://neural_compressor_ext_lab/./lib/constants.js","webpack://neural_compressor_ext_lab/./lib/deepcoder.js","webpack://neural_compressor_ext_lab/./lib/index.js","webpack://neural_compressor_ext_lab/./lib/utils.js"],"sourcesContent":["export var Constants;\n(function (Constants) {\n Constants.SHORT_PLUGIN_NAME = 'neural_compressor_ext_lab';\n Constants.WORK_PATH = \"neural_coder_workspace/\";\n Constants.ICON_FORMAT_ALL_SVG = ' ';\n Constants.ICON_RUN = ' ';\n Constants.SVG = ' ';\n Constants.LONG_PLUGIN_NAME = `@rya/${Constants.SHORT_PLUGIN_NAME}`;\n Constants.SETTINGS_SECTION = `${Constants.LONG_PLUGIN_NAME}:settings`;\n Constants.COMMAND_SECTION_NAME = 'Jupyterlab Code Optimizer';\n Constants.PLUGIN_VERSION = '0.1.0';\n})(Constants || (Constants = {}));\n","import { NotebookActions } from '@jupyterlab/notebook';\nimport NotebookUtilities from \"./utils\";\nimport { Constants } from './constants';\nclass JupyterlabCodeOptimizer {\n constructor(panel) {\n this.working = false;\n this.panel = panel;\n this.tmp_path = \"tmp.py\";\n this.rand = NotebookUtilities.GetRandomNum(0, 200);\n this.log_path = Constants.WORK_PATH + \"NeuralCoder\" + this.rand + \".log\";\n this.tmp_log_path = Constants.WORK_PATH + \"NeuralCoder_tmp\" + \".log\";\n this.cells = [];\n }\n async optimizeCode(code, formatter, name, next, options, notebook, panel, cell, run) {\n let codes = [];\n code.forEach(function (value) {\n value = value.replace(/('\\\\n')/g, '^^^');\n value = value.replace(/\\\\n\"/g, '###');\n value = value.replace(/\\\\n'/g, '###');\n value = value.replace(/\"\\\\n/g, '@@');\n value = value.replace(/'\\\\n/g, '@@');\n value = value.replace(/\\n/g, '\\\\n');\n value = value.replace(/\"/g, '+++');\n value = value.replace(/,/g, '$');\n codes.push(value);\n });\n let gen_code = `code = \"${codes}\"\\ncodes = code.split(',')\\nwith open( '${this.tmp_path}', 'w+' ) as f:\\n for i in range(0,len(codes)):\\n f.write('# this is the beginning of a single code snippet\\\\n')\\n code_list = codes[i].replace('$',',').replace('+++','\\\"').split('\\\\n')\\n for line in code_list:\\n if('split(^^^)' in line):\\n line=line.replace('split(^^^)', 'split(\\\\'\\\\\\\\n\\\\')')\\n if('###' in line):\\n line=line.replace('###', '\\\\\\\\n\\\"')\\n if('@@' in line):\\n line=line.replace('@@', '\\\"\\\\\\\\n')\\n f.write(line+'\\\\n')`;\n const expr = { code_list: `code_list` };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, gen_code, expr, false);\n if (options === 'normal') {\n let runcode = `from neural_coder import enable\\nenable(code=\"${this.tmp_path}\",features=[\"${formatter}\"], overwrite=True)`;\n let expr = { sum: ` ` };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);\n let run_code1 = `with open(\"${this.tmp_path}\", 'r') as f:\\n optimized_code = f.read()\\n`;\n let expr1 = { optimizedCode: \"optimized_code\" };\n let result2 = NotebookUtilities.sendKernelRequestFromNotebook(panel, run_code1, expr1, false);\n result2.then(value => {\n var _a, _b, _c, _d;\n let optimizedTexts = Object.values(value.optimizedCode.data)[0];\n let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\\\n').slice(1);\n optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);\n for (let i = 0; i < optimizeCodes.length; ++i) {\n const cell = this.cells[i];\n const currentTexts = this.cells.map(cell => cell.model.value.text);\n const currentText = currentTexts[i];\n let optimizedtext = optimizeCodes[i];\n optimizedtext = optimizedtext.replace(/\\\\'\\\\\\\\n\\\\'/g, \"^^^\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n\"/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n'/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\"\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/'\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/\\\\n/g, '\\n');\n optimizedtext = optimizedtext.replace(/\\\\'/g, \"'\");\n optimizedtext = optimizedtext.replace(/\\^\\^\\^/g, \"'\\\\n'\");\n optimizedtext = optimizedtext.replace(/\\+\\+\\+/g, \"\\\\n\\\"\");\n optimizedtext = optimizedtext.replace(/\\@\\@\\@/g, \"\\\"\\\\n\");\n if (cell.model.value.text === currentText) {\n cell.model.value.text = optimizedtext;\n }\n const run_svg = document.createElement(\"svg\");\n run_svg.innerHTML = Constants.ICON_RUN;\n (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);\n }\n });\n }\n else {\n if (formatter === '') {\n if (this.markdown) {\n this.markdown.model.value.text += \"[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ...... \\n\";\n }\n // cell.outputArea.node.innerText += \"[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\\n\"\n let runcode1 = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\\\\n\")`;\n let expr1 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode1, expr1, false);\n let runcode = `from neural_coder import enable\\nperfomance, mode, path = enable(code=\"${this.tmp_path}\",features=[], run_bench=True, args=\"${options}\")\\nwith open(path + '/bench.log', 'r') as f:\\n logs = f.readlines()\\nlog_line = logs[4]\\nlog = log_line.split(\"[\")[1].split(\"]\")[0]`;\n let expr = { path: \"path\", log: \"log\" };\n let result = NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);\n let fps;\n result.then(value => {\n fps = Object.values(value.log.data)[0];\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second) \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\\n`\n let text = `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\\\\n`;\n let runcode = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n`\n let runcode1 = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\\\n\")`;\n let expr1 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode1, expr1, false);\n let runcode2 = `with open(\"${this.tmp_log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr2 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);\n });\n }\n else {\n let runcode = `from neural_coder import enable\\nperfomance, mode, path = enable(code=\"${this.tmp_path}\", features=[\"${formatter}\"], run_bench=True, args=\"${options}\")\\nwith open(path + '/bench.log', 'r') as f:\\n logs = f.readlines()\\nlog_line = logs[4]\\nlog = log_line.split(\"[\")[1].split(\"]\")[0]`;\n let expr = { path: \"path\", log: \"log\" };\n let result = NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);\n let fps;\n result.then(value => {\n fps = Object.values(value.log.data)[0];\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second) \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (FPS)\\n`\n let text = `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second)\\\\n`;\n let runcode = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);\n if (next !== '') {\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n`\n let runcode2 = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\\\n\")`;\n let expr2 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);\n }\n let runcode3 = `with open(\"${this.tmp_log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr3 = { path: \"\" };\n let res_tmp = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);\n res_tmp.then(value => {\n if (formatter === 'pytorch_inc_bf16') {\n let read_log = `import re\\nwith open(\"${this.tmp_log_path}\", 'r') as f:\\n logs = f.readlines()\\n fps_list=[]\\n for log_line in logs[-4:]:\\n pat = re.compile(r\\'\\\\d+\\\\.?\\\\d+')\\n fps = re.findall(pat,log_line)[-1]\\n fps_list.append(float(fps))\\nmaxi = max(fps_list)\\nindex = fps_list.index(maxi)\\nboost = round(maxi/fps_list[0],1)\\nfeatures=['','pytorch_inc_static_quant_fx','pytorch_inc_dynamic_quant','pytorch_inc_bf16']\\nfeature_name=['Original Model','INC Enable INT8 (Static)','INC Enable INT8 (Dynamic)','INC Enable BF16']\\nbest_feature = features[index]\\nbest_name = feature_name[index]\\nfeature_l = []\\nfeature_l.append(best_feature)\\nfrom neural_coder import enable\\nenable(code=\"${this.tmp_path}\",features=feature_l, overwrite=True)\\nwith open(\"${this.tmp_path}\", 'r') as f:\\n optimized_code = f.read()\\n`;\n let read_expr = { boost: \"boost\", best_feature: \"best_feature\", best_name: \"best_name\", optimizeCode: \"optimized_code\", feature_l: \"fps_list\", maxi: \"maxi\", index: \"index\" };\n let read_result = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, read_log, read_expr, false);\n read_result.then(value => {\n var _a, _b, _c, _d;\n console.log(\"resres\", value);\n let boost = Object.values(value.boost.data)[0];\n let best_name = Object.values(value.best_name.data)[0];\n let optimizedTexts = Object.values(value.optimizeCode.data)[0];\n let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\\\n').slice(1);\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] The Best Intel Optimization: ${best_name} \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] You can get up to ${boost}X performance boost. \\n`;\n }\n // cell.outputArea.node.innerText +=`[NeuralCoder INFO] The Best Intel Optimization: ${best_name}\\n`\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] You can get up to ${boost}X performance boost.\\n`\n optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);\n for (let i = 0; i < optimizeCodes.length; ++i) {\n const cell = this.cells[i];\n const currentTexts = this.cells.map(cell => cell.model.value.text);\n const currentText = currentTexts[i];\n let optimizedtext = optimizeCodes[i];\n optimizedtext = optimizedtext.replace(/\\\\'\\\\\\\\n\\\\'/g, \"^^^\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n\"/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n'/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\"\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/'\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/\\\\n/g, '\\n');\n optimizedtext = optimizedtext.replace(/\\\\'/g, \"'\");\n optimizedtext = optimizedtext.replace(/\\^\\^\\^/g, \"'\\\\n'\");\n optimizedtext = optimizedtext.replace(/\\+\\+\\+/g, \"\\\\n\\\"\");\n optimizedtext = optimizedtext.replace(/\\@\\@\\@/g, \"\\\"\\\\n\");\n if (cell.model.value.text === currentText) {\n cell.model.value.text = optimizedtext;\n }\n }\n // if(this.markdown){\n // this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: 4th Gen Intel Xeon Scalable processor with AMX \\n`\n // this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\\\NeuralCoder${this.rand}.log \\n`\n // }\n let command = \"lscpu | grep 'Model name'\";\n let get_hardware = `import subprocess\\nsubp = subprocess.Popen(\"${command}\",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding=\"utf-8\")\\nsubp.wait(2)\\nhardware = subp.communicate()[0].replace(\"Model name:\",\"\").strip()`;\n let expr_hardware = { hardware: \"hardware\" };\n let hard_res = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, get_hardware, expr_hardware, false);\n hard_res.then(value => {\n let hard = Object.values(value.hardware.data)[0];\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: ${hard} \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\\\NeuralCoder${this.rand}.log \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] HardWare: ${hard}\\n`\n });\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\\\NeuralCoder${this.rand}.log\\n`\n const run_svg = document.createElement(\"svg\");\n run_svg.innerHTML = Constants.ICON_RUN;\n (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);\n });\n }\n });\n });\n }\n }\n }\n}\nexport class JupyterlabNotebookCodeOptimizer extends JupyterlabCodeOptimizer {\n constructor(notebookTracker, panel) {\n super(panel);\n this.notebookTracker = notebookTracker;\n this.notebookname = '';\n }\n async optimizeAction(config, formatter) {\n return this.optimizeCells(true, config, formatter);\n }\n async optimizeAllCodeCells(config, formatter, notebook, run) {\n return this.optimizeCells(false, config, formatter, notebook, run);\n }\n getCodeCells(ifmarkdown = true, notebook) {\n if (!this.notebookTracker.currentWidget) {\n return [];\n }\n const codeCells = [];\n notebook = notebook || this.notebookTracker.currentWidget.content;\n this.notebookname = notebook.title.label;\n let count = 0;\n notebook.widgets.forEach((cell) => {\n if (cell.model.type === 'code') {\n count += 1;\n codeCells.push(cell);\n }\n });\n if (ifmarkdown) {\n NotebookActions.insertBelow(notebook);\n this.notebookTracker.currentWidget.content.activeCellIndex = count + 1;\n NotebookActions.changeCellType(notebook, 'markdown');\n const activeCell = notebook.activeCell;\n if (activeCell) {\n this.markdown = activeCell;\n }\n }\n this.cells = codeCells;\n return codeCells;\n }\n async optimizeCells(selectedOnly, config, formatter, notebook, run) {\n if (this.working) {\n return new Promise((resolve, reject) => {\n resolve(\"false!\");\n });\n }\n console.log(\"arrive here 333\");\n this.working = true;\n const optimize_type = formatter !== undefined ? formatter : 'pytorch_mixed_precision_cpu';\n if (optimize_type === 'auto-quant') {\n selectedOnly = true;\n }\n else {\n selectedOnly = false;\n }\n const selectedCells = this.getCodeCells(selectedOnly, notebook);\n let cell = selectedCells[selectedCells.length - 1];\n if (selectedCells.length === 0) {\n this.working = false;\n return new Promise((resolve, reject) => {\n resolve(\"false!\");\n });\n }\n const currentTexts = selectedCells.map(cell => cell.model.value.text);\n if (optimize_type === 'auto-quant') {\n console.log(\"arrive here 444-111\");\n if (this.markdown) {\n this.markdown.model.value.text = `[NeuralCoder INFO] Auto-Quant Started ...... \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook \"${this.notebookname}\" \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Mode: Throughput \\n`;\n }\n // cell.outputArea.node.innerText = `[NeuralCoder INFO] Auto-Quant Started ......\\n`\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook \"${this.notebookname}\"\\n`\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Mode: Throughput\\n`\n let runcode = `with open('${this.log_path}', 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Auto-Quant Started ......\\\\n\")`;\n let expr = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);\n let runcode2 = `with open('${this.log_path}', 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Code: User code from Jupyter Lab notebook '${this.notebookname}'\\\\n\")`;\n let expr2 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);\n let runcode3 = `with open('${this.log_path}', 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Benchmark Mode: Throughput\\\\n\")`;\n let expr3 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);\n // cell.outputArea.node.setAttribute(\"class\",\"pad\")\n await this.optimizeCode(currentTexts, '', 'The Original Model', 'INC Enable INT8 (Static)', config, true, this.panel, cell, run);\n await this.optimizeCode(currentTexts, 'pytorch_inc_static_quant_fx', 'INC Enable INT8 (Static)', 'INC Enable INT8 (Dynamic)', config, true, this.panel, cell, run);\n await this.optimizeCode(currentTexts, 'pytorch_inc_dynamic_quant', 'INC Enable INT8 (Dynamic)', 'INC Enable BF16', config, true, this.panel, cell, run);\n await this.optimizeCode(currentTexts, 'pytorch_inc_bf16', 'INC Enable BF16', '', config, true, this.panel, cell, run);\n }\n else {\n console.log(\"arrive here 444-222\");\n await this.optimizeCode(currentTexts, optimize_type, \"\", \"\", \"normal\", true, this.panel, cell, run);\n }\n this.working = false;\n console.log(\"arrive here 555\");\n return new Promise((resolve, reject) => {\n resolve(\"success!\");\n });\n }\n applicable(formatter, currentWidget) {\n const currentNotebookWidget = this.notebookTracker.currentWidget;\n return currentNotebookWidget && currentWidget === currentNotebookWidget;\n }\n}\n","import { INotebookTracker } from '@jupyterlab/notebook';\nimport { ToolbarButton, showDialog, Dialog } from '@jupyterlab/apputils';\nimport { ISettingRegistry } from '@jupyterlab/settingregistry';\nimport { IMainMenu } from '@jupyterlab/mainmenu';\nimport { LabIcon } from '@jupyterlab/ui-components';\nimport { Widget } from '@lumino/widgets';\nimport { JupyterlabNotebookCodeOptimizer } from './deepcoder';\nimport { Constants } from './constants';\nclass neural_compressor_ext_lab {\n constructor(app, tracker, notebookpanel) {\n this.app = app;\n this.tracker = tracker;\n this.notebookpanel = notebookpanel;\n this.setupWidgetExtension();\n this.config = '';\n }\n createNew(nb) {\n this.notebookpanel = nb;\n this.notebookCodeOptimizer = new JupyterlabNotebookCodeOptimizer(this.tracker, this.notebookpanel);\n const svg = document.createElement(\"svg\");\n svg.innerHTML = Constants.ICON_FORMAT_ALL_SVG;\n const run_svg = document.createElement(\"svg\");\n run_svg.innerHTML = Constants.ICON_RUN;\n const div = document.createElement(\"div\");\n div.setAttribute(\"class\", \"wrapper\");\n const span = document.createElement(\"span\");\n span.setAttribute(\"class\", \"f1ozlkqi\");\n span.innerHTML = Constants.SVG;\n const selector = document.createElement(\"select\");\n selector.setAttribute(\"class\", \"aselector\");\n selector.id = \"NeuralCoder\";\n const option1 = document.createElement(\"option\");\n option1.value = \"pytorch_inc_static_quant_fx\";\n option1.innerText = \"INC Enable INT8 (Static)\";\n option1.selected = true;\n const option2 = document.createElement(\"option\");\n option2.value = \"pytorch_inc_dynamic_quant\";\n option2.innerText = \"INC Enable INT8 (Dynamic)\";\n const option3 = document.createElement(\"option\");\n option3.value = \"pytorch_inc_bf16\";\n option3.innerText = \"INC Enable BF16\";\n const option4 = document.createElement(\"option\");\n option4.value = \"auto-quant\";\n option4.innerText = \"INC Auto Enable & Benchmark\";\n selector.options.add(option1);\n selector.options.add(option2);\n selector.options.add(option3);\n selector.options.add(option4);\n div.appendChild(selector);\n div.appendChild(span);\n const selector_widget = new Widget();\n selector_widget.node.appendChild(div);\n selector_widget.addClass(\"aselector\");\n let notebookCodeOptimizer = this.notebookCodeOptimizer;\n let config = this.config;\n const dia_input = document.createElement(\"input\");\n const dia_widget = new Widget();\n dia_widget.node.appendChild(dia_input);\n dia_widget.addClass(\"dialog\");\n const run_button = new ToolbarButton({\n tooltip: 'NeuralCoder',\n icon: new LabIcon({\n name: \"run\",\n svgstr: Constants.ICON_RUN\n }),\n onClick: async function () {\n var _a, _b, _c, _d;\n console.log(\"arrive here 111\");\n (_d = (_c = (_b = (_a = run_button.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(svg);\n if (selector.options[selector.selectedIndex].value === 'auto-quant') {\n await showDialog({\n title: 'Please input execute parameters:',\n body: dia_widget,\n buttons: [Dialog.okButton({ label: 'Confirm' })]\n }).then(result => {\n if (result.button.accept) {\n config = dia_input.value;\n }\n });\n }\n console.log(\"arrive here 222\");\n await notebookCodeOptimizer.optimizeAllCodeCells(config, selector.options[selector.selectedIndex].value, undefined, run_button);\n }\n });\n nb.toolbar.insertItem(11, \"nc\", run_button);\n nb.toolbar.insertItem(12, \"selector\", selector_widget);\n }\n setupWidgetExtension() {\n this.app.docRegistry.addWidgetExtension('Notebook', this);\n }\n}\n/**\n * Initialization data for the neural_compressor_ext_lab extension.\n */\nconst plugin = {\n id: 'neural_compressor_ext_lab:plugin',\n autoStart: true,\n requires: [INotebookTracker, IMainMenu],\n optional: [ISettingRegistry],\n activate: (app, tracker, notebookpanel) => {\n new neural_compressor_ext_lab(app, tracker, notebookpanel);\n console.log('JupyterLab extension neural_compressor_ext_lab is activated!');\n }\n};\nexport default plugin;\n","/*\n * Copyright 2019-2020 The Kale Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\nimport { Dialog, showDialog } from '@jupyterlab/apputils';\n// @ts-ignore\nimport SanitizedHTML from 'react-sanitized-html';\nimport * as React from 'react';\nexport default class NotebookUtilities {\n /**\n * generate random number\n * @Min\n * @Max\n */\n static GetRandomNum(Min, Max) {\n let Range;\n Range = Max - Min;\n var Rand = Math.random();\n return (Min + Math.round(Rand * Range));\n }\n /**\n * Builds an HTML container by sanitizing a list of strings and converting\n * them in valid HTML\n * @param msg A list of string with HTML formatting\n * @returns a HTMLDivElement composed of a list of spans with formatted text\n */\n static buildDialogBody(msg) {\n return (React.createElement(\"div\", null, msg.map((s, i) => {\n return (React.createElement(React.Fragment, { key: `msg-${i}` },\n React.createElement(SanitizedHTML, { allowedAttributes: { a: ['href'] }, allowedTags: ['b', 'i', 'em', 'strong', 'a', 'pre'], html: s }),\n React.createElement(\"br\", null)));\n })));\n }\n /**\n * Opens a pop-up dialog in JupyterLab to display a simple message.\n * @param title The title for the message popup\n * @param msg The message as an array of strings\n * @param buttonLabel The label to use for the button. Default is 'OK'\n * @param buttonClassName The classname to give to the 'ok' button\n * @returns Promise - A promise once the message is closed.\n */\n static async showMessage(title, msg, buttonLabel = 'Dismiss', buttonClassName = '') {\n const buttons = [\n Dialog.okButton({ label: buttonLabel, className: buttonClassName }),\n ];\n const messageBody = this.buildDialogBody(msg);\n await showDialog({ title, buttons, body: messageBody });\n }\n /**\n * Opens a pop-up dialog in JupyterLab to display a yes/no dialog.\n * @param title The title for the message popup\n * @param msg The message\n * @param acceptLabel The label to use for the accept button. Default is 'YES'\n * @param rejectLabel The label to use for the reject button. Default is 'NO'\n * @param yesButtonClassName The classname to give to the accept button.\n * @param noButtonClassName The classname to give to the cancel button.\n * @returns Promise - A promise once the message is closed.\n */\n static async showYesNoDialog(title, msg, acceptLabel = 'YES', rejectLabel = 'NO', yesButtonClassName = '', noButtonClassName = '') {\n const buttons = [\n Dialog.okButton({ label: acceptLabel, className: yesButtonClassName }),\n Dialog.cancelButton({ label: rejectLabel, className: noButtonClassName }),\n ];\n const messageBody = this.buildDialogBody(msg);\n const result = await showDialog({ title, buttons, body: messageBody });\n return result.button.label === acceptLabel;\n }\n /**\n * Opens a pop-up dialog in JupyterLab with various information and button\n * triggering reloading the page.\n * @param title The title for the message popup\n * @param msg The message\n * @param buttonLabel The label to use for the button. Default is 'Refresh'\n * @param buttonClassName The classname to give to the 'refresh' button.\n * @returns Promise - A promise once the message is closed.\n */\n static async showRefreshDialog(title, msg, buttonLabel = 'Refresh', buttonClassName = '') {\n await this.showMessage(title, msg, buttonLabel, buttonClassName);\n location.reload();\n }\n /**\n * @description Creates a new JupyterLab notebook for use by the application\n * @param command The command registry\n * @returns Promise - A promise containing the notebook panel object that was created (if successful).\n */\n static async createNewNotebook(command) {\n const notebook = await command.execute('notebook:create-new', {\n activate: true,\n path: '',\n preferredLanguage: '',\n });\n await notebook.session.ready;\n return notebook;\n }\n /**\n * Safely saves the Jupyter notebook document contents to disk\n * @param notebookPanel The notebook panel containing the notebook to save\n */\n static async saveNotebook(notebookPanel) {\n if (notebookPanel) {\n await notebookPanel.context.ready;\n notebookPanel.context.save();\n return true;\n }\n return false;\n }\n /**\n * Convert the notebook contents to JSON\n * @param notebookPanel The notebook panel containing the notebook to serialize\n */\n static notebookToJSON(notebookPanel) {\n if (notebookPanel.content.model) {\n return notebookPanel.content.model.toJSON();\n }\n return null;\n }\n /**\n * @description Gets the value of a key from specified notebook's metadata.\n * @param notebookPanel The notebook to get meta data from.\n * @param key The key of the value.\n * @returns any -The value of the metadata. Returns null if the key doesn't exist.\n */\n static getMetaData(notebookPanel, key) {\n if (!notebookPanel) {\n throw new Error('The notebook is null or undefined. No meta data available.');\n }\n if (notebookPanel.model && notebookPanel.model.metadata.has(key)) {\n return notebookPanel.model.metadata.get(key);\n }\n return null;\n }\n /**\n * @description Sets the key value pair in the notebook's metadata.\n * If the key doesn't exists it will add one.\n * @param notebookPanel The notebook to set meta data in.\n * @param key The key of the value to create.\n * @param value The value to set.\n * @param save Default is false. Whether the notebook should be saved after the meta data is set.\n * Note: This function will not wait for the save to complete, it only sends a save request.\n * @returns The old value for the key, or undefined if it did not exist.\n */\n static setMetaData(notebookPanel, key, value, save = false) {\n var _a;\n if (!notebookPanel) {\n throw new Error('The notebook is null or undefined. No meta data available.');\n }\n const oldVal = (_a = notebookPanel.model) === null || _a === void 0 ? void 0 : _a.metadata.set(key, value);\n if (save) {\n this.saveNotebook(notebookPanel);\n }\n return oldVal;\n }\n // /**\n // * Get a new Kernel, not tied to a Notebook\n // * Source code here: https://github.com/jupyterlab/jupyterlab/tree/473348d25bcb258ca2f0c127dd8fb5b193217135/packages/services\n // */\n // public static async createNewKernel() {\n // // Get info about the available kernels and start a new one.\n // let options: Kernel.IOptions = await Kernel.getSpecs().then(kernelSpecs => {\n // // console.log('Default spec:', kernelSpecs.default);\n // // console.log('Available specs', Object.keys(kernelSpecs.kernelspecs));\n // // use the default name\n // return { name: kernelSpecs.default };\n // });\n // return await Kernel.startNew(options).then(_kernel => {\n // return _kernel;\n // });\n // }\n // // TODO: We can use this context manager to execute commands inside a new kernel\n // // and be sure that it will be disposed of at the end.\n // // Another approach could be to create a kale_rpc Kernel, as a singleton,\n // // created at startup. The only (possible) drawback is that we can not name\n // // a kernel instance with a custom id/name, so when refreshing JupyterLab we would\n // // not recognize the kernel. A solution could be to have a kernel spec dedicated to kale rpc calls.\n // public static async executeWithNewKernel(action: Function, args: any[] = []) {\n // // create brand new kernel\n // const _k = await this.createNewKernel();\n // // execute action inside kernel\n // const res = await action(_k, ...args);\n // // close kernel\n // _k.shutdown();\n // // return result\n // return res;\n // }\n /**\n * @description This function runs code directly in the notebook's kernel and then evaluates the\n * result and returns it as a promise.\n * @param kernel The kernel to run the code in.\n * @param runCode The code to run in the kernel.\n * @param userExpressions The expressions used to capture the desired info from the executed code.\n * @param runSilent Default is false. If true, kernel will execute as quietly as possible.\n * store_history will be set to false, and no broadcast on IOPUB channel will be made.\n * @param storeHistory Default is false. If true, the code executed will be stored in the kernel's history\n * and the counter which is shown in the cells will be incremented to reflect code was run.\n * @param allowStdIn Default is false. If true, code running in kernel can prompt user for input using\n * an input_request message.\n * @param stopOnError Default is false. If True, does not abort the execution queue, if an exception is encountered.\n * This allows the queued execution of multiple execute_requests, even if they generate exceptions.\n * @returns Promise - A promise containing the execution results of the code as an object with\n * keys based on the user_expressions.\n * @example\n * //The code\n * const code = \"a=123\\nb=456\\nsum=a+b\";\n * //The user expressions\n * const expr = {sum: \"sum\",prod: \"a*b\",args:\"[a,b,sum]\"};\n * //Async function call (returns a promise)\n * sendKernelRequest(notebookPanel, code, expr,false);\n * //Result when promise resolves:\n * {\n * sum:{status:\"ok\",data:{\"text/plain\":\"579\"},metadata:{}},\n * prod:{status:\"ok\",data:{\"text/plain\":\"56088\"},metadata:{}},\n * args:{status:\"ok\",data:{\"text/plain\":\"[123, 456, 579]\"}}\n * }\n * @see For more information on JupyterLab messages:\n * https://jupyter-client.readthedocs.io/en/latest/messaging.html#execution-results\n */\n static async sendKernelRequest(kernel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {\n if (!kernel) {\n throw new Error('Kernel is null or undefined.');\n }\n // Wait for kernel to be ready before sending request\n // await kernel.status;\n const message = await kernel.requestExecute({\n allow_stdin: allowStdIn,\n code: runCode,\n silent: runSilent,\n stop_on_error: stopOnError,\n store_history: storeHistory,\n user_expressions: userExpressions,\n }).done;\n const content = message.content;\n if (content.status !== 'ok') {\n // If response is not 'ok', throw contents as error, log code\n const msg = `Code caused an error:\\n${runCode}`;\n console.error(msg);\n if (content.traceback) {\n content.traceback.forEach((line) => console.log(line.replace(/[\\u001b\\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, '')));\n }\n throw content;\n }\n // Return user_expressions of the content\n return content.user_expressions;\n }\n /**\n * Same as method sendKernelRequest but passing\n * a NotebookPanel instead of a Kernel\n */\n static async sendKernelRequestFromNotebook(notebookPanel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {\n var _a, _b, _c, _d;\n if (!notebookPanel) {\n throw new Error('Notebook is null or undefined.');\n }\n // Wait for notebook panel to be ready\n await notebookPanel.activate;\n await ((_a = notebookPanel.sessionContext) === null || _a === void 0 ? void 0 : _a.ready);\n console.log('get kernel', (_b = notebookPanel.sessionContext.session) === null || _b === void 0 ? void 0 : _b.kernel);\n return this.sendKernelRequest((_d = (_c = notebookPanel.sessionContext) === null || _c === void 0 ? void 0 : _c.session) === null || _d === void 0 ? void 0 : _d.kernel, runCode, userExpressions, runSilent, storeHistory, allowStdIn, stopOnError);\n }\n}\n"],"names":[],"sourceRoot":""}
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c3b18119886a0a82200.js b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c3b18119886a0a82200.js
deleted file mode 100644
index 246f4ba64ae..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c3b18119886a0a82200.js
+++ /dev/null
@@ -1,767 +0,0 @@
-"use strict";
-(self["webpackChunkneural_compressor_ext_lab"] = self["webpackChunkneural_compressor_ext_lab"] || []).push([["lib_index_js"],{
-
-/***/ "./lib/constants.js":
-/*!**************************!*\
- !*** ./lib/constants.js ***!
- \**************************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "Constants": () => (/* binding */ Constants)
-/* harmony export */ });
-var Constants;
-(function (Constants) {
- Constants.SHORT_PLUGIN_NAME = 'neural_compressor_ext_lab';
- Constants.WORK_PATH = "neural_coder_workspace/";
- Constants.ICON_FORMAT_ALL_SVG = ' ';
- Constants.ICON_RUN = ' ';
- Constants.SVG = ' ';
- Constants.LONG_PLUGIN_NAME = `@rya/${Constants.SHORT_PLUGIN_NAME}`;
- Constants.SETTINGS_SECTION = `${Constants.LONG_PLUGIN_NAME}:settings`;
- Constants.COMMAND_SECTION_NAME = 'Jupyterlab Code Optimizer';
- Constants.PLUGIN_VERSION = '0.1.0';
-})(Constants || (Constants = {}));
-
-
-/***/ }),
-
-/***/ "./lib/deepcoder.js":
-/*!**************************!*\
- !*** ./lib/deepcoder.js ***!
- \**************************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "JupyterlabNotebookCodeOptimizer": () => (/* binding */ JupyterlabNotebookCodeOptimizer)
-/* harmony export */ });
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! @jupyterlab/notebook */ "webpack/sharing/consume/default/@jupyterlab/notebook");
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var _utils__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./utils */ "./lib/utils.js");
-/* harmony import */ var _constants__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./constants */ "./lib/constants.js");
-
-
-
-class JupyterlabCodeOptimizer {
- constructor(panel) {
- this.working = false;
- this.panel = panel;
- this.tmp_path = "tmp.py";
- this.rand = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].GetRandomNum(0, 200);
- this.log_path = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.WORK_PATH + "NeuralCoder" + this.rand + ".log";
- this.tmp_log_path = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.WORK_PATH + "NeuralCoder_tmp" + ".log";
- this.cells = [];
- }
- async optimizeCode(code, formatter, name, next, options, notebook, panel, cell, run) {
- let codes = [];
- code.forEach(function (value) {
- value = value.replace(/('\\n')/g, '^^^');
- value = value.replace(/\\n"/g, '###');
- value = value.replace(/\\n'/g, '###');
- value = value.replace(/"\\n/g, '@@');
- value = value.replace(/'\\n/g, '@@');
- value = value.replace(/\n/g, '\\n');
- value = value.replace(/"/g, '+++');
- value = value.replace(/,/g, '$');
- codes.push(value);
- });
- let gen_code = `code = "${codes}"\ncodes = code.split(',')\nwith open( '${this.tmp_path}', 'w+' ) as f:\n for i in range(0,len(codes)):\n f.write('# this is the beginning of a single code snippet\\n')\n code_list = codes[i].replace('$',',').replace('+++','\"').split('\\n')\n for line in code_list:\n if('split(^^^)' in line):\n line=line.replace('split(^^^)', 'split(\\'\\\\n\\')')\n if('###' in line):\n line=line.replace('###', '\\\\n\"')\n if('@@' in line):\n line=line.replace('@@', '\"\\\\n')\n f.write(line+'\\n')`;
- const expr = { code_list: `code_list` };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, gen_code, expr, false);
- if (options === 'normal') {
- let runcode = `from neural_coder import enable\nenable(code="${this.tmp_path}",features=["${formatter}"], overwrite=True)`;
- let expr = { sum: ` ` };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let run_code1 = `with open("${this.tmp_path}", 'r') as f:\n optimized_code = f.read()\n`;
- let expr1 = { optimizedCode: "optimized_code" };
- let result2 = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, run_code1, expr1, false);
- result2.then(value => {
- var _a, _b, _c, _d;
- let optimizedTexts = Object.values(value.optimizedCode.data)[0];
- let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\n').slice(1);
- optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);
- for (let i = 0; i < optimizeCodes.length; ++i) {
- const cell = this.cells[i];
- const currentTexts = this.cells.map(cell => cell.model.value.text);
- const currentText = currentTexts[i];
- let optimizedtext = optimizeCodes[i];
- optimizedtext = optimizedtext.replace(/\\'\\\\n\\'/g, "^^^");
- optimizedtext = optimizedtext.replace(/\\\\n"/g, "+++");
- optimizedtext = optimizedtext.replace(/\\\\n'/g, "+++");
- optimizedtext = optimizedtext.replace(/"\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/'\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/\\n/g, '\n');
- optimizedtext = optimizedtext.replace(/\\'/g, "'");
- optimizedtext = optimizedtext.replace(/\^\^\^/g, "'\\n'");
- optimizedtext = optimizedtext.replace(/\+\+\+/g, "\\n\"");
- optimizedtext = optimizedtext.replace(/\@\@\@/g, "\"\\n");
- if (cell.model.value.text === currentText) {
- cell.model.value.text = optimizedtext;
- }
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.ICON_RUN;
- (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);
- }
- });
- }
- else {
- if (formatter === '') {
- if (this.markdown) {
- this.markdown.model.value.text += "[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ...... \n";
- }
- // cell.outputArea.node.innerText += "[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\n"
- let runcode1 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\\n")`;
- let expr1 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode1, expr1, false);
- let runcode = `from neural_coder import enable\nperfomance, mode, path = enable(code="${this.tmp_path}",features=[], run_bench=True, args="${options}")\nwith open(path + '/bench.log', 'r') as f:\n logs = f.readlines()\nlog_line = logs[4]\nlog = log_line.split("[")[1].split("]")[0]`;
- let expr = { path: "path", log: "log" };
- let result = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let fps;
- result.then(value => {
- fps = Object.values(value.log.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second) \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\n`
- let text = `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\\n`;
- let runcode = `with open("${this.log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\n`
- let runcode1 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n")`;
- let expr1 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode1, expr1, false);
- let runcode2 = `with open("${this.tmp_log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr2 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- });
- }
- else {
- let runcode = `from neural_coder import enable\nperfomance, mode, path = enable(code="${this.tmp_path}", features=["${formatter}"], run_bench=True, args="${options}")\nwith open(path + '/bench.log', 'r') as f:\n logs = f.readlines()\nlog_line = logs[4]\nlog = log_line.split("[")[1].split("]")[0]`;
- let expr = { path: "path", log: "log" };
- let result = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let fps;
- result.then(value => {
- fps = Object.values(value.log.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second) \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (FPS)\n`
- let text = `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second)\\n`;
- let runcode = `with open("${this.log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- if (next !== '') {
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\n`
- let runcode2 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n")`;
- let expr2 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- }
- let runcode3 = `with open("${this.tmp_log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr3 = { path: "" };
- let res_tmp = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);
- res_tmp.then(value => {
- if (formatter === 'pytorch_inc_bf16') {
- let read_log = `import re\nwith open("${this.tmp_log_path}", 'r') as f:\n logs = f.readlines()\n fps_list=[]\n for log_line in logs[-4:]:\n pat = re.compile(r\'\\d+\\.?\\d+')\n fps = re.findall(pat,log_line)[-1]\n fps_list.append(float(fps))\nmaxi = max(fps_list)\nindex = fps_list.index(maxi)\nboost = round(maxi/fps_list[0],1)\nfeatures=['','pytorch_inc_static_quant_fx','pytorch_inc_dynamic_quant','pytorch_inc_bf16']\nfeature_name=['Original Model','INC Enable INT8 (Static)','INC Enable INT8 (Dynamic)','INC Enable BF16']\nbest_feature = features[index]\nbest_name = feature_name[index]\nfeature_l = []\nfeature_l.append(best_feature)\nfrom neural_coder import enable\nenable(code="${this.tmp_path}",features=feature_l, overwrite=True)\nwith open("${this.tmp_path}", 'r') as f:\n optimized_code = f.read()\n`;
- let read_expr = { boost: "boost", best_feature: "best_feature", best_name: "best_name", optimizeCode: "optimized_code", feature_l: "fps_list", maxi: "maxi", index: "index" };
- let read_result = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, read_log, read_expr, false);
- read_result.then(value => {
- var _a, _b, _c, _d;
- console.log("resres", value);
- let boost = Object.values(value.boost.data)[0];
- let best_name = Object.values(value.best_name.data)[0];
- let optimizedTexts = Object.values(value.optimizeCode.data)[0];
- let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\n').slice(1);
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] The Best Intel Optimization: ${best_name} \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] You can get up to ${boost}X performance boost. \n`;
- }
- // cell.outputArea.node.innerText +=`[NeuralCoder INFO] The Best Intel Optimization: ${best_name}\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] You can get up to ${boost}X performance boost.\n`
- optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);
- for (let i = 0; i < optimizeCodes.length; ++i) {
- const cell = this.cells[i];
- const currentTexts = this.cells.map(cell => cell.model.value.text);
- const currentText = currentTexts[i];
- let optimizedtext = optimizeCodes[i];
- optimizedtext = optimizedtext.replace(/\\'\\\\n\\'/g, "^^^");
- optimizedtext = optimizedtext.replace(/\\\\n"/g, "+++");
- optimizedtext = optimizedtext.replace(/\\\\n'/g, "+++");
- optimizedtext = optimizedtext.replace(/"\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/'\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/\\n/g, '\n');
- optimizedtext = optimizedtext.replace(/\\'/g, "'");
- optimizedtext = optimizedtext.replace(/\^\^\^/g, "'\\n'");
- optimizedtext = optimizedtext.replace(/\+\+\+/g, "\\n\"");
- optimizedtext = optimizedtext.replace(/\@\@\@/g, "\"\\n");
- if (cell.model.value.text === currentText) {
- cell.model.value.text = optimizedtext;
- }
- }
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: 4th Gen Intel Xeon Scalable processor with AMX \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log \n`;
- }
- // let command = "lscpu | grep 'Model name'"
- // let get_hardware = `import subprocess\nsubp = subprocess.Popen("${command}",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8")\nsubp.wait(2)\nhardware = subp.communicate()[0].replace("Model name:","").strip()`
- // let expr_hardware = {hardware: "hardware"}
- // let hard_res = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, get_hardware, expr_hardware,false);
- // hard_res.then(value =>{
- // let hard = Object.values(value.hardware.data)[0] as string;
- // if(this.markdown){
- // this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: ${hard} \n`
- // this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log \n`
- // }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] HardWare: ${hard}\n`
- // })
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] The log was saved to lab_workspace\\NeuralCoder${this.rand}.log\n`
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.ICON_RUN;
- (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);
- });
- }
- });
- });
- }
- }
- }
-}
-class JupyterlabNotebookCodeOptimizer extends JupyterlabCodeOptimizer {
- constructor(notebookTracker, panel) {
- super(panel);
- this.notebookTracker = notebookTracker;
- this.notebookname = '';
- }
- async optimizeAction(config, formatter) {
- return this.optimizeCells(true, config, formatter);
- }
- async optimizeAllCodeCells(config, formatter, notebook, run) {
- return this.optimizeCells(false, config, formatter, notebook, run);
- }
- getCodeCells(ifmarkdown = true, notebook) {
- if (!this.notebookTracker.currentWidget) {
- return [];
- }
- const codeCells = [];
- notebook = notebook || this.notebookTracker.currentWidget.content;
- this.notebookname = notebook.title.label;
- let count = 0;
- notebook.widgets.forEach((cell) => {
- if (cell.model.type === 'code') {
- count += 1;
- codeCells.push(cell);
- }
- });
- if (ifmarkdown) {
- _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__.NotebookActions.insertBelow(notebook);
- this.notebookTracker.currentWidget.content.activeCellIndex = count + 1;
- _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__.NotebookActions.changeCellType(notebook, 'markdown');
- const activeCell = notebook.activeCell;
- if (activeCell) {
- this.markdown = activeCell;
- }
- }
- this.cells = codeCells;
- return codeCells;
- }
- async optimizeCells(selectedOnly, config, formatter, notebook, run) {
- if (this.working) {
- return new Promise((resolve, reject) => {
- resolve("false!");
- });
- }
- console.log("arrive here 333");
- this.working = true;
- const optimize_type = formatter !== undefined ? formatter : 'pytorch_mixed_precision_cpu';
- if (optimize_type === 'auto-quant') {
- selectedOnly = true;
- }
- else {
- selectedOnly = false;
- }
- const selectedCells = this.getCodeCells(selectedOnly, notebook);
- let cell = selectedCells[selectedCells.length - 1];
- if (selectedCells.length === 0) {
- this.working = false;
- return new Promise((resolve, reject) => {
- resolve("false!");
- });
- }
- const currentTexts = selectedCells.map(cell => cell.model.value.text);
- if (optimize_type === 'auto-quant') {
- console.log("arrive here 444-111");
- if (this.markdown) {
- this.markdown.model.value.text = `[NeuralCoder INFO] Auto-Quant Started ...... \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook "${this.notebookname}" \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Mode: Throughput \n`;
- }
- // cell.outputArea.node.innerText = `[NeuralCoder INFO] Auto-Quant Started ......\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook "${this.notebookname}"\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Mode: Throughput\n`
- let runcode = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Auto-Quant Started ......\\n")`;
- let expr = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- let runcode2 = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Code: User code from Jupyter Lab notebook '${this.notebookname}'\\n")`;
- let expr2 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- let runcode3 = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Benchmark Mode: Throughput\\n")`;
- let expr3 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);
- // cell.outputArea.node.setAttribute("class","pad")
- await this.optimizeCode(currentTexts, '', 'The Original Model', 'INC Enable INT8 (Static)', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_static_quant_fx', 'INC Enable INT8 (Static)', 'INC Enable INT8 (Dynamic)', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_dynamic_quant', 'INC Enable INT8 (Dynamic)', 'INC Enable BF16', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_bf16', 'INC Enable BF16', '', config, true, this.panel, cell, run);
- }
- else {
- console.log("arrive here 444-222");
- await this.optimizeCode(currentTexts, optimize_type, "", "", "normal", true, this.panel, cell, run);
- }
- this.working = false;
- console.log("arrive here 555");
- return new Promise((resolve, reject) => {
- resolve("success!");
- });
- }
- applicable(formatter, currentWidget) {
- const currentNotebookWidget = this.notebookTracker.currentWidget;
- return currentNotebookWidget && currentWidget === currentNotebookWidget;
- }
-}
-
-
-/***/ }),
-
-/***/ "./lib/index.js":
-/*!**********************!*\
- !*** ./lib/index.js ***!
- \**********************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "default": () => (__WEBPACK_DEFAULT_EXPORT__)
-/* harmony export */ });
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! @jupyterlab/notebook */ "webpack/sharing/consume/default/@jupyterlab/notebook");
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! @jupyterlab/apputils */ "webpack/sharing/consume/default/@jupyterlab/apputils");
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__);
-/* harmony import */ var _jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! @jupyterlab/settingregistry */ "webpack/sharing/consume/default/@jupyterlab/settingregistry");
-/* harmony import */ var _jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2__);
-/* harmony import */ var _jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! @jupyterlab/mainmenu */ "webpack/sharing/consume/default/@jupyterlab/mainmenu");
-/* harmony import */ var _jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3__);
-/* harmony import */ var _jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! @jupyterlab/ui-components */ "webpack/sharing/consume/default/@jupyterlab/ui-components");
-/* harmony import */ var _jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4__);
-/* harmony import */ var _lumino_widgets__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! @lumino/widgets */ "webpack/sharing/consume/default/@lumino/widgets");
-/* harmony import */ var _lumino_widgets__WEBPACK_IMPORTED_MODULE_5___default = /*#__PURE__*/__webpack_require__.n(_lumino_widgets__WEBPACK_IMPORTED_MODULE_5__);
-/* harmony import */ var _deepcoder__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./deepcoder */ "./lib/deepcoder.js");
-/* harmony import */ var _constants__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./constants */ "./lib/constants.js");
-
-
-
-
-
-
-
-
-class neural_compressor_ext_lab {
- constructor(app, tracker, notebookpanel) {
- this.app = app;
- this.tracker = tracker;
- this.notebookpanel = notebookpanel;
- this.setupWidgetExtension();
- this.config = '';
- }
- createNew(nb) {
- this.notebookpanel = nb;
- this.notebookCodeOptimizer = new _deepcoder__WEBPACK_IMPORTED_MODULE_6__.JupyterlabNotebookCodeOptimizer(this.tracker, this.notebookpanel);
- const svg = document.createElement("svg");
- svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.ICON_FORMAT_ALL_SVG;
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.ICON_RUN;
- const div = document.createElement("div");
- div.setAttribute("class", "wrapper");
- const span = document.createElement("span");
- span.setAttribute("class", "f1ozlkqi");
- span.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.SVG;
- const selector = document.createElement("select");
- selector.setAttribute("class", "aselector");
- selector.id = "NeuralCoder";
- const option1 = document.createElement("option");
- option1.value = "pytorch_inc_static_quant_fx";
- option1.innerText = "INC Enable INT8 (Static)";
- option1.selected = true;
- const option2 = document.createElement("option");
- option2.value = "pytorch_inc_dynamic_quant";
- option2.innerText = "INC Enable INT8 (Dynamic)";
- const option3 = document.createElement("option");
- option3.value = "pytorch_inc_bf16";
- option3.innerText = "INC Enable BF16";
- const option4 = document.createElement("option");
- option4.value = "auto-quant";
- option4.innerText = "INC Auto Enable & Benchmark";
- selector.options.add(option1);
- selector.options.add(option2);
- selector.options.add(option3);
- selector.options.add(option4);
- div.appendChild(selector);
- div.appendChild(span);
- const selector_widget = new _lumino_widgets__WEBPACK_IMPORTED_MODULE_5__.Widget();
- selector_widget.node.appendChild(div);
- selector_widget.addClass("aselector");
- let notebookCodeOptimizer = this.notebookCodeOptimizer;
- let config = this.config;
- const dia_input = document.createElement("input");
- const dia_widget = new _lumino_widgets__WEBPACK_IMPORTED_MODULE_5__.Widget();
- dia_widget.node.appendChild(dia_input);
- dia_widget.addClass("dialog");
- const run_button = new _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__.ToolbarButton({
- tooltip: 'NeuralCoder',
- icon: new _jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4__.LabIcon({
- name: "run",
- svgstr: _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.ICON_RUN
- }),
- onClick: async function () {
- var _a, _b, _c, _d;
- console.log("arrive here 111");
- (_d = (_c = (_b = (_a = run_button.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(svg);
- if (selector.options[selector.selectedIndex].value === 'auto-quant') {
- await (0,_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__.showDialog)({
- title: 'Please input execute parameters:',
- body: dia_widget,
- buttons: [_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__.Dialog.okButton({ label: 'Confirm' })]
- }).then(result => {
- if (result.button.accept) {
- config = dia_input.value;
- }
- });
- }
- console.log("arrive here 222");
- await notebookCodeOptimizer.optimizeAllCodeCells(config, selector.options[selector.selectedIndex].value, undefined, run_button);
- }
- });
- nb.toolbar.insertItem(11, "nc", run_button);
- nb.toolbar.insertItem(12, "selector", selector_widget);
- }
- setupWidgetExtension() {
- this.app.docRegistry.addWidgetExtension('Notebook', this);
- }
-}
-/**
- * Initialization data for the neural_compressor_ext_lab extension.
- */
-const plugin = {
- id: 'neural_compressor_ext_lab:plugin',
- autoStart: true,
- requires: [_jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__.INotebookTracker, _jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3__.IMainMenu],
- optional: [_jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2__.ISettingRegistry],
- activate: (app, tracker, notebookpanel) => {
- new neural_compressor_ext_lab(app, tracker, notebookpanel);
- console.log('JupyterLab extension neural_compressor_ext_lab is activated!');
- }
-};
-/* harmony default export */ const __WEBPACK_DEFAULT_EXPORT__ = (plugin);
-
-
-/***/ }),
-
-/***/ "./lib/utils.js":
-/*!**********************!*\
- !*** ./lib/utils.js ***!
- \**********************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "default": () => (/* binding */ NotebookUtilities)
-/* harmony export */ });
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! @jupyterlab/apputils */ "webpack/sharing/consume/default/@jupyterlab/apputils");
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var react_sanitized_html__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! react-sanitized-html */ "webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html");
-/* harmony import */ var react_sanitized_html__WEBPACK_IMPORTED_MODULE_1___default = /*#__PURE__*/__webpack_require__.n(react_sanitized_html__WEBPACK_IMPORTED_MODULE_1__);
-/* harmony import */ var react__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! react */ "webpack/sharing/consume/default/react");
-/* harmony import */ var react__WEBPACK_IMPORTED_MODULE_2___default = /*#__PURE__*/__webpack_require__.n(react__WEBPACK_IMPORTED_MODULE_2__);
-/*
- * Copyright 2019-2020 The Kale Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// @ts-ignore
-
-
-class NotebookUtilities {
- /**
- * generate random number
- * @Min
- * @Max
- */
- static GetRandomNum(Min, Max) {
- let Range;
- Range = Max - Min;
- var Rand = Math.random();
- return (Min + Math.round(Rand * Range));
- }
- /**
- * Builds an HTML container by sanitizing a list of strings and converting
- * them in valid HTML
- * @param msg A list of string with HTML formatting
- * @returns a HTMLDivElement composed of a list of spans with formatted text
- */
- static buildDialogBody(msg) {
- return (react__WEBPACK_IMPORTED_MODULE_2__.createElement("div", null, msg.map((s, i) => {
- return (react__WEBPACK_IMPORTED_MODULE_2__.createElement(react__WEBPACK_IMPORTED_MODULE_2__.Fragment, { key: `msg-${i}` },
- react__WEBPACK_IMPORTED_MODULE_2__.createElement((react_sanitized_html__WEBPACK_IMPORTED_MODULE_1___default()), { allowedAttributes: { a: ['href'] }, allowedTags: ['b', 'i', 'em', 'strong', 'a', 'pre'], html: s }),
- react__WEBPACK_IMPORTED_MODULE_2__.createElement("br", null)));
- })));
- }
- /**
- * Opens a pop-up dialog in JupyterLab to display a simple message.
- * @param title The title for the message popup
- * @param msg The message as an array of strings
- * @param buttonLabel The label to use for the button. Default is 'OK'
- * @param buttonClassName The classname to give to the 'ok' button
- * @returns Promise - A promise once the message is closed.
- */
- static async showMessage(title, msg, buttonLabel = 'Dismiss', buttonClassName = '') {
- const buttons = [
- _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.Dialog.okButton({ label: buttonLabel, className: buttonClassName }),
- ];
- const messageBody = this.buildDialogBody(msg);
- await (0,_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.showDialog)({ title, buttons, body: messageBody });
- }
- /**
- * Opens a pop-up dialog in JupyterLab to display a yes/no dialog.
- * @param title The title for the message popup
- * @param msg The message
- * @param acceptLabel The label to use for the accept button. Default is 'YES'
- * @param rejectLabel The label to use for the reject button. Default is 'NO'
- * @param yesButtonClassName The classname to give to the accept button.
- * @param noButtonClassName The classname to give to the cancel button.
- * @returns Promise - A promise once the message is closed.
- */
- static async showYesNoDialog(title, msg, acceptLabel = 'YES', rejectLabel = 'NO', yesButtonClassName = '', noButtonClassName = '') {
- const buttons = [
- _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.Dialog.okButton({ label: acceptLabel, className: yesButtonClassName }),
- _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.Dialog.cancelButton({ label: rejectLabel, className: noButtonClassName }),
- ];
- const messageBody = this.buildDialogBody(msg);
- const result = await (0,_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.showDialog)({ title, buttons, body: messageBody });
- return result.button.label === acceptLabel;
- }
- /**
- * Opens a pop-up dialog in JupyterLab with various information and button
- * triggering reloading the page.
- * @param title The title for the message popup
- * @param msg The message
- * @param buttonLabel The label to use for the button. Default is 'Refresh'
- * @param buttonClassName The classname to give to the 'refresh' button.
- * @returns Promise - A promise once the message is closed.
- */
- static async showRefreshDialog(title, msg, buttonLabel = 'Refresh', buttonClassName = '') {
- await this.showMessage(title, msg, buttonLabel, buttonClassName);
- location.reload();
- }
- /**
- * @description Creates a new JupyterLab notebook for use by the application
- * @param command The command registry
- * @returns Promise - A promise containing the notebook panel object that was created (if successful).
- */
- static async createNewNotebook(command) {
- const notebook = await command.execute('notebook:create-new', {
- activate: true,
- path: '',
- preferredLanguage: '',
- });
- await notebook.session.ready;
- return notebook;
- }
- /**
- * Safely saves the Jupyter notebook document contents to disk
- * @param notebookPanel The notebook panel containing the notebook to save
- */
- static async saveNotebook(notebookPanel) {
- if (notebookPanel) {
- await notebookPanel.context.ready;
- notebookPanel.context.save();
- return true;
- }
- return false;
- }
- /**
- * Convert the notebook contents to JSON
- * @param notebookPanel The notebook panel containing the notebook to serialize
- */
- static notebookToJSON(notebookPanel) {
- if (notebookPanel.content.model) {
- return notebookPanel.content.model.toJSON();
- }
- return null;
- }
- /**
- * @description Gets the value of a key from specified notebook's metadata.
- * @param notebookPanel The notebook to get meta data from.
- * @param key The key of the value.
- * @returns any -The value of the metadata. Returns null if the key doesn't exist.
- */
- static getMetaData(notebookPanel, key) {
- if (!notebookPanel) {
- throw new Error('The notebook is null or undefined. No meta data available.');
- }
- if (notebookPanel.model && notebookPanel.model.metadata.has(key)) {
- return notebookPanel.model.metadata.get(key);
- }
- return null;
- }
- /**
- * @description Sets the key value pair in the notebook's metadata.
- * If the key doesn't exists it will add one.
- * @param notebookPanel The notebook to set meta data in.
- * @param key The key of the value to create.
- * @param value The value to set.
- * @param save Default is false. Whether the notebook should be saved after the meta data is set.
- * Note: This function will not wait for the save to complete, it only sends a save request.
- * @returns The old value for the key, or undefined if it did not exist.
- */
- static setMetaData(notebookPanel, key, value, save = false) {
- var _a;
- if (!notebookPanel) {
- throw new Error('The notebook is null or undefined. No meta data available.');
- }
- const oldVal = (_a = notebookPanel.model) === null || _a === void 0 ? void 0 : _a.metadata.set(key, value);
- if (save) {
- this.saveNotebook(notebookPanel);
- }
- return oldVal;
- }
- // /**
- // * Get a new Kernel, not tied to a Notebook
- // * Source code here: https://github.com/jupyterlab/jupyterlab/tree/473348d25bcb258ca2f0c127dd8fb5b193217135/packages/services
- // */
- // public static async createNewKernel() {
- // // Get info about the available kernels and start a new one.
- // let options: Kernel.IOptions = await Kernel.getSpecs().then(kernelSpecs => {
- // // console.log('Default spec:', kernelSpecs.default);
- // // console.log('Available specs', Object.keys(kernelSpecs.kernelspecs));
- // // use the default name
- // return { name: kernelSpecs.default };
- // });
- // return await Kernel.startNew(options).then(_kernel => {
- // return _kernel;
- // });
- // }
- // // TODO: We can use this context manager to execute commands inside a new kernel
- // // and be sure that it will be disposed of at the end.
- // // Another approach could be to create a kale_rpc Kernel, as a singleton,
- // // created at startup. The only (possible) drawback is that we can not name
- // // a kernel instance with a custom id/name, so when refreshing JupyterLab we would
- // // not recognize the kernel. A solution could be to have a kernel spec dedicated to kale rpc calls.
- // public static async executeWithNewKernel(action: Function, args: any[] = []) {
- // // create brand new kernel
- // const _k = await this.createNewKernel();
- // // execute action inside kernel
- // const res = await action(_k, ...args);
- // // close kernel
- // _k.shutdown();
- // // return result
- // return res;
- // }
- /**
- * @description This function runs code directly in the notebook's kernel and then evaluates the
- * result and returns it as a promise.
- * @param kernel The kernel to run the code in.
- * @param runCode The code to run in the kernel.
- * @param userExpressions The expressions used to capture the desired info from the executed code.
- * @param runSilent Default is false. If true, kernel will execute as quietly as possible.
- * store_history will be set to false, and no broadcast on IOPUB channel will be made.
- * @param storeHistory Default is false. If true, the code executed will be stored in the kernel's history
- * and the counter which is shown in the cells will be incremented to reflect code was run.
- * @param allowStdIn Default is false. If true, code running in kernel can prompt user for input using
- * an input_request message.
- * @param stopOnError Default is false. If True, does not abort the execution queue, if an exception is encountered.
- * This allows the queued execution of multiple execute_requests, even if they generate exceptions.
- * @returns Promise - A promise containing the execution results of the code as an object with
- * keys based on the user_expressions.
- * @example
- * //The code
- * const code = "a=123\nb=456\nsum=a+b";
- * //The user expressions
- * const expr = {sum: "sum",prod: "a*b",args:"[a,b,sum]"};
- * //Async function call (returns a promise)
- * sendKernelRequest(notebookPanel, code, expr,false);
- * //Result when promise resolves:
- * {
- * sum:{status:"ok",data:{"text/plain":"579"},metadata:{}},
- * prod:{status:"ok",data:{"text/plain":"56088"},metadata:{}},
- * args:{status:"ok",data:{"text/plain":"[123, 456, 579]"}}
- * }
- * @see For more information on JupyterLab messages:
- * https://jupyter-client.readthedocs.io/en/latest/messaging.html#execution-results
- */
- static async sendKernelRequest(kernel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {
- if (!kernel) {
- throw new Error('Kernel is null or undefined.');
- }
- // Wait for kernel to be ready before sending request
- // await kernel.status;
- const message = await kernel.requestExecute({
- allow_stdin: allowStdIn,
- code: runCode,
- silent: runSilent,
- stop_on_error: stopOnError,
- store_history: storeHistory,
- user_expressions: userExpressions,
- }).done;
- const content = message.content;
- if (content.status !== 'ok') {
- // If response is not 'ok', throw contents as error, log code
- const msg = `Code caused an error:\n${runCode}`;
- console.error(msg);
- if (content.traceback) {
- content.traceback.forEach((line) => console.log(line.replace(/[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, '')));
- }
- throw content;
- }
- // Return user_expressions of the content
- return content.user_expressions;
- }
- /**
- * Same as method sendKernelRequest but passing
- * a NotebookPanel instead of a Kernel
- */
- static async sendKernelRequestFromNotebook(notebookPanel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {
- var _a, _b, _c, _d;
- if (!notebookPanel) {
- throw new Error('Notebook is null or undefined.');
- }
- // Wait for notebook panel to be ready
- await notebookPanel.activate;
- await ((_a = notebookPanel.sessionContext) === null || _a === void 0 ? void 0 : _a.ready);
- console.log('get kernel', (_b = notebookPanel.sessionContext.session) === null || _b === void 0 ? void 0 : _b.kernel);
- return this.sendKernelRequest((_d = (_c = notebookPanel.sessionContext) === null || _c === void 0 ? void 0 : _c.session) === null || _d === void 0 ? void 0 : _d.kernel, runCode, userExpressions, runSilent, storeHistory, allowStdIn, stopOnError);
- }
-}
-
-
-/***/ })
-
-}]);
-//# sourceMappingURL=lib_index_js.2c3b18119886a0a82200.js.map
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c3b18119886a0a82200.js.map b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c3b18119886a0a82200.js.map
deleted file mode 100644
index c4bb7ec7d6d..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c3b18119886a0a82200.js.map
+++ /dev/null
@@ -1 +0,0 @@
-{"version":3,"file":"lib_index_js.2c3b18119886a0a82200.js","mappings":";;;;;;;;;;;;;AAAO;AACP;AACA;AACA;AACA,6IAA6I,gCAAgC,gBAAgB,sBAAsB,qVAAqV,mBAAmB,gVAAgV,mBAAmB;AAC95B;AACA;AACA,yCAAyC,4BAA4B;AACrE,oCAAoC,2BAA2B;AAC/D;AACA;AACA,CAAC,8BAA8B;;;;;;;;;;;;;;;;;;;ACXwB;AACf;AACA;AACxC;AACA;AACA;AACA;AACA;AACA,oBAAoB,2DAA8B;AAClD,wBAAwB,2DAAmB;AAC3C,4BAA4B,2DAAmB;AAC/C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT,kCAAkC,MAAM,0CAA0C,cAAc;AAChG,uBAAuB;AACvB,QAAQ,4EAA+C;AACvD;AACA,2EAA2E,cAAc,eAAe,UAAU;AAClH,yBAAyB;AACzB,YAAY,4EAA+C;AAC3D,0CAA0C,cAAc;AACxD,0BAA0B;AAC1B,0BAA0B,4EAA+C;AACzE;AACA;AACA;AACA;AACA;AACA,gCAAgC,0BAA0B;AAC1D;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,wCAAwC,0DAAkB;AAC1D;AACA;AACA,aAAa;AACb;AACA;AACA;AACA;AACA;AACA;AACA;AACA,6CAA6C,cAAc;AAC3D,8BAA8B;AAC9B,gBAAgB,4EAA+C;AAC/D,wGAAwG,cAAc,uCAAuC,QAAQ;AACrK,6BAA6B;AAC7B,6BAA6B,4EAA+C;AAC5E;AACA;AACA;AACA;AACA,wIAAwI,KAAK;AAC7I;AACA,uIAAuI,KAAK;AAC5I,6GAA6G,KAAK;AAClH,gDAAgD,cAAc,8BAA8B,KAAK;AACjG,iCAAiC;AACjC,oBAAoB,4EAA+C;AACnE;AACA,8GAA8G,MAAM;AACpH;AACA,6GAA6G,MAAM;AACnH,iDAAiD,cAAc,mFAAmF,MAAM;AACxJ,kCAAkC;AAClC,oBAAoB,4EAA+C;AACnE,iDAAiD,kBAAkB,kCAAkC,KAAK;AAC1G,kCAAkC;AAClC,oBAAoB,4EAA+C;AACnE,iBAAiB;AACjB;AACA;AACA,wGAAwG,cAAc,gBAAgB,UAAU,4BAA4B,QAAQ;AACpL,6BAA6B;AAC7B,6BAA6B,4EAA+C;AAC5E;AACA;AACA;AACA;AACA,kHAAkH,MAAM,KAAK,KAAK;AAClI;AACA,iHAAiH,MAAM,KAAK,KAAK;AACjI,uFAAuF,MAAM,KAAK,KAAK;AACvG,gDAAgD,cAAc,kCAAkC,KAAK;AACrG,iCAAiC;AACjC,oBAAoB,4EAA+C;AACnE;AACA;AACA,kHAAkH,MAAM;AACxH;AACA,iHAAiH,MAAM;AACvH,qDAAqD,cAAc,mFAAmF,MAAM;AAC5J,sCAAsC;AACtC,wBAAwB,4EAA+C;AACvE;AACA,iDAAiD,kBAAkB,kCAAkC,KAAK;AAC1G,kCAAkC;AAClC,kCAAkC,4EAA+C;AACjF;AACA;AACA,oEAAoE,kBAAkB,qpBAAqpB,cAAc,oDAAoD,cAAc;AAC3zB,8CAA8C;AAC9C,8CAA8C,4EAA+C;AAC7F;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,yHAAyH,YAAY;AACrI,8GAA8G,MAAM;AACpH;AACA,uHAAuH,UAAU;AACjI,6GAA6G,MAAM;AACnH;AACA,gDAAgD,0BAA0B;AAC1E;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,oJAAoJ,UAAU;AAC9J;AACA;AACA,qGAAqG,QAAQ;AAC7G,wDAAwD;AACxD;AACA;AACA;AACA;AACA,yGAAyG,OAAO;AAChH,uJAAuJ,UAAU;AACjK;AACA,qGAAqG,KAAK;AAC1G,qCAAqC;AACrC,2IAA2I,UAAU;AACrJ;AACA,oDAAoD,0DAAkB;AACtE;AACA,6BAA6B;AAC7B;AACA,qBAAqB;AACrB,iBAAiB;AACjB;AACA;AACA;AACA;AACO;AACP;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA,YAAY,6EAA2B;AACvC;AACA,YAAY,gFAA8B;AAC1C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,aAAa;AACb;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,aAAa;AACb;AACA;AACA;AACA;AACA;AACA;AACA,mHAAmH,kBAAkB;AACrI;AACA;AACA;AACA,kHAAkH,kBAAkB;AACpI;AACA,wCAAwC,cAAc;AACtD,yBAAyB;AACzB,YAAY,4EAA+C;AAC3D,yCAAyC,cAAc,gGAAgG,kBAAkB;AACzK,0BAA0B;AAC1B,YAAY,4EAA+C;AAC3D,yCAAyC,cAAc;AACvD,0BAA0B;AAC1B,YAAY,4EAA+C;AAC3D;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACtSwD;AACiB;AACV;AACd;AACG;AACX;AACqB;AACtB;AACxC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,yCAAyC,uEAA+B;AACxE;AACA,wBAAwB,qEAA6B;AACrD;AACA,4BAA4B,0DAAkB;AAC9C;AACA;AACA;AACA;AACA,yBAAyB,qDAAa;AACtC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,oCAAoC,mDAAM;AAC1C;AACA;AACA;AACA;AACA;AACA,+BAA+B,mDAAM;AACrC;AACA;AACA,+BAA+B,+DAAa;AAC5C;AACA,sBAAsB,8DAAO;AAC7B;AACA,wBAAwB,0DAAkB;AAC1C,aAAa;AACb;AACA;AACA;AACA;AACA;AACA,0BAA0B,gEAAU;AACpC;AACA;AACA,kCAAkC,iEAAe,GAAG,kBAAkB;AACtE,qBAAqB;AACrB;AACA;AACA;AACA,qBAAqB;AACrB;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,kEAAgB,EAAE,2DAAS;AAC1C,eAAe,yEAAgB;AAC/B;AACA;AACA;AACA;AACA;AACA,iEAAe,MAAM,EAAC;;;;;;;;;;;;;;;;;;;;;ACxGtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AAC0D;AAC1D;AACiD;AAClB;AAChB;AACf;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,gBAAgB,gDAAmB;AACnC,oBAAoB,gDAAmB,CAAC,2CAAc,IAAI,YAAY,EAAE,GAAG;AAC3E,gBAAgB,gDAAmB,CAAC,6DAAa,IAAI,qBAAqB,aAAa,gEAAgE;AACvJ,gBAAgB,gDAAmB;AACnC,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,YAAY,iEAAe,GAAG,gDAAgD;AAC9E;AACA;AACA,cAAc,gEAAU,GAAG,mCAAmC;AAC9D;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,YAAY,iEAAe,GAAG,mDAAmD;AACjF,YAAY,qEAAmB,GAAG,kDAAkD;AACpF;AACA;AACA,6BAA6B,gEAAU,GAAG,mCAAmC;AAC7E;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,oBAAoB;AACpB,UAAU;AACV;AACA;AACA,UAAU;AACV;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,qBAAqB;AACrB;AACA;AACA;AACA;AACA,aAAa,kBAAkB,mBAAmB,aAAa;AAC/D,cAAc,kBAAkB,qBAAqB,aAAa;AAClE,cAAc,kBAAkB;AAChC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA,kDAAkD,QAAQ;AAC1D;AACA;AACA,kGAAkG,YAAY,IAAI,IAAI,MAAM,IAAI;AAChI;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA","sources":["webpack://neural_compressor_ext_lab/./lib/constants.js","webpack://neural_compressor_ext_lab/./lib/deepcoder.js","webpack://neural_compressor_ext_lab/./lib/index.js","webpack://neural_compressor_ext_lab/./lib/utils.js"],"sourcesContent":["export var Constants;\n(function (Constants) {\n Constants.SHORT_PLUGIN_NAME = 'neural_compressor_ext_lab';\n Constants.WORK_PATH = \"neural_coder_workspace/\";\n Constants.ICON_FORMAT_ALL_SVG = ' ';\n Constants.ICON_RUN = ' ';\n Constants.SVG = ' ';\n Constants.LONG_PLUGIN_NAME = `@rya/${Constants.SHORT_PLUGIN_NAME}`;\n Constants.SETTINGS_SECTION = `${Constants.LONG_PLUGIN_NAME}:settings`;\n Constants.COMMAND_SECTION_NAME = 'Jupyterlab Code Optimizer';\n Constants.PLUGIN_VERSION = '0.1.0';\n})(Constants || (Constants = {}));\n","import { NotebookActions } from '@jupyterlab/notebook';\nimport NotebookUtilities from \"./utils\";\nimport { Constants } from './constants';\nclass JupyterlabCodeOptimizer {\n constructor(panel) {\n this.working = false;\n this.panel = panel;\n this.tmp_path = \"tmp.py\";\n this.rand = NotebookUtilities.GetRandomNum(0, 200);\n this.log_path = Constants.WORK_PATH + \"NeuralCoder\" + this.rand + \".log\";\n this.tmp_log_path = Constants.WORK_PATH + \"NeuralCoder_tmp\" + \".log\";\n this.cells = [];\n }\n async optimizeCode(code, formatter, name, next, options, notebook, panel, cell, run) {\n let codes = [];\n code.forEach(function (value) {\n value = value.replace(/('\\\\n')/g, '^^^');\n value = value.replace(/\\\\n\"/g, '###');\n value = value.replace(/\\\\n'/g, '###');\n value = value.replace(/\"\\\\n/g, '@@');\n value = value.replace(/'\\\\n/g, '@@');\n value = value.replace(/\\n/g, '\\\\n');\n value = value.replace(/\"/g, '+++');\n value = value.replace(/,/g, '$');\n codes.push(value);\n });\n let gen_code = `code = \"${codes}\"\\ncodes = code.split(',')\\nwith open( '${this.tmp_path}', 'w+' ) as f:\\n for i in range(0,len(codes)):\\n f.write('# this is the beginning of a single code snippet\\\\n')\\n code_list = codes[i].replace('$',',').replace('+++','\\\"').split('\\\\n')\\n for line in code_list:\\n if('split(^^^)' in line):\\n line=line.replace('split(^^^)', 'split(\\\\'\\\\\\\\n\\\\')')\\n if('###' in line):\\n line=line.replace('###', '\\\\\\\\n\\\"')\\n if('@@' in line):\\n line=line.replace('@@', '\\\"\\\\\\\\n')\\n f.write(line+'\\\\n')`;\n const expr = { code_list: `code_list` };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, gen_code, expr, false);\n if (options === 'normal') {\n let runcode = `from neural_coder import enable\\nenable(code=\"${this.tmp_path}\",features=[\"${formatter}\"], overwrite=True)`;\n let expr = { sum: ` ` };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);\n let run_code1 = `with open(\"${this.tmp_path}\", 'r') as f:\\n optimized_code = f.read()\\n`;\n let expr1 = { optimizedCode: \"optimized_code\" };\n let result2 = NotebookUtilities.sendKernelRequestFromNotebook(panel, run_code1, expr1, false);\n result2.then(value => {\n var _a, _b, _c, _d;\n let optimizedTexts = Object.values(value.optimizedCode.data)[0];\n let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\\\n').slice(1);\n optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);\n for (let i = 0; i < optimizeCodes.length; ++i) {\n const cell = this.cells[i];\n const currentTexts = this.cells.map(cell => cell.model.value.text);\n const currentText = currentTexts[i];\n let optimizedtext = optimizeCodes[i];\n optimizedtext = optimizedtext.replace(/\\\\'\\\\\\\\n\\\\'/g, \"^^^\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n\"/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n'/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\"\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/'\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/\\\\n/g, '\\n');\n optimizedtext = optimizedtext.replace(/\\\\'/g, \"'\");\n optimizedtext = optimizedtext.replace(/\\^\\^\\^/g, \"'\\\\n'\");\n optimizedtext = optimizedtext.replace(/\\+\\+\\+/g, \"\\\\n\\\"\");\n optimizedtext = optimizedtext.replace(/\\@\\@\\@/g, \"\\\"\\\\n\");\n if (cell.model.value.text === currentText) {\n cell.model.value.text = optimizedtext;\n }\n const run_svg = document.createElement(\"svg\");\n run_svg.innerHTML = Constants.ICON_RUN;\n (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);\n }\n });\n }\n else {\n if (formatter === '') {\n if (this.markdown) {\n this.markdown.model.value.text += \"[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ...... \\n\";\n }\n // cell.outputArea.node.innerText += \"[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\\n\"\n let runcode1 = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\\\\n\")`;\n let expr1 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode1, expr1, false);\n let runcode = `from neural_coder import enable\\nperfomance, mode, path = enable(code=\"${this.tmp_path}\",features=[], run_bench=True, args=\"${options}\")\\nwith open(path + '/bench.log', 'r') as f:\\n logs = f.readlines()\\nlog_line = logs[4]\\nlog = log_line.split(\"[\")[1].split(\"]\")[0]`;\n let expr = { path: \"path\", log: \"log\" };\n let result = NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);\n let fps;\n result.then(value => {\n fps = Object.values(value.log.data)[0];\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second) \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\\n`\n let text = `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\\\\n`;\n let runcode = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n`\n let runcode1 = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\\\n\")`;\n let expr1 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode1, expr1, false);\n let runcode2 = `with open(\"${this.tmp_log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr2 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);\n });\n }\n else {\n let runcode = `from neural_coder import enable\\nperfomance, mode, path = enable(code=\"${this.tmp_path}\", features=[\"${formatter}\"], run_bench=True, args=\"${options}\")\\nwith open(path + '/bench.log', 'r') as f:\\n logs = f.readlines()\\nlog_line = logs[4]\\nlog = log_line.split(\"[\")[1].split(\"]\")[0]`;\n let expr = { path: \"path\", log: \"log\" };\n let result = NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);\n let fps;\n result.then(value => {\n fps = Object.values(value.log.data)[0];\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second) \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (FPS)\\n`\n let text = `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second)\\\\n`;\n let runcode = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);\n if (next !== '') {\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n`\n let runcode2 = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\\\n\")`;\n let expr2 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);\n }\n let runcode3 = `with open(\"${this.tmp_log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr3 = { path: \"\" };\n let res_tmp = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);\n res_tmp.then(value => {\n if (formatter === 'pytorch_inc_bf16') {\n let read_log = `import re\\nwith open(\"${this.tmp_log_path}\", 'r') as f:\\n logs = f.readlines()\\n fps_list=[]\\n for log_line in logs[-4:]:\\n pat = re.compile(r\\'\\\\d+\\\\.?\\\\d+')\\n fps = re.findall(pat,log_line)[-1]\\n fps_list.append(float(fps))\\nmaxi = max(fps_list)\\nindex = fps_list.index(maxi)\\nboost = round(maxi/fps_list[0],1)\\nfeatures=['','pytorch_inc_static_quant_fx','pytorch_inc_dynamic_quant','pytorch_inc_bf16']\\nfeature_name=['Original Model','INC Enable INT8 (Static)','INC Enable INT8 (Dynamic)','INC Enable BF16']\\nbest_feature = features[index]\\nbest_name = feature_name[index]\\nfeature_l = []\\nfeature_l.append(best_feature)\\nfrom neural_coder import enable\\nenable(code=\"${this.tmp_path}\",features=feature_l, overwrite=True)\\nwith open(\"${this.tmp_path}\", 'r') as f:\\n optimized_code = f.read()\\n`;\n let read_expr = { boost: \"boost\", best_feature: \"best_feature\", best_name: \"best_name\", optimizeCode: \"optimized_code\", feature_l: \"fps_list\", maxi: \"maxi\", index: \"index\" };\n let read_result = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, read_log, read_expr, false);\n read_result.then(value => {\n var _a, _b, _c, _d;\n console.log(\"resres\", value);\n let boost = Object.values(value.boost.data)[0];\n let best_name = Object.values(value.best_name.data)[0];\n let optimizedTexts = Object.values(value.optimizeCode.data)[0];\n let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\\\n').slice(1);\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] The Best Intel Optimization: ${best_name} \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] You can get up to ${boost}X performance boost. \\n`;\n }\n // cell.outputArea.node.innerText +=`[NeuralCoder INFO] The Best Intel Optimization: ${best_name}\\n`\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] You can get up to ${boost}X performance boost.\\n`\n optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);\n for (let i = 0; i < optimizeCodes.length; ++i) {\n const cell = this.cells[i];\n const currentTexts = this.cells.map(cell => cell.model.value.text);\n const currentText = currentTexts[i];\n let optimizedtext = optimizeCodes[i];\n optimizedtext = optimizedtext.replace(/\\\\'\\\\\\\\n\\\\'/g, \"^^^\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n\"/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n'/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\"\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/'\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/\\\\n/g, '\\n');\n optimizedtext = optimizedtext.replace(/\\\\'/g, \"'\");\n optimizedtext = optimizedtext.replace(/\\^\\^\\^/g, \"'\\\\n'\");\n optimizedtext = optimizedtext.replace(/\\+\\+\\+/g, \"\\\\n\\\"\");\n optimizedtext = optimizedtext.replace(/\\@\\@\\@/g, \"\\\"\\\\n\");\n if (cell.model.value.text === currentText) {\n cell.model.value.text = optimizedtext;\n }\n }\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: 4th Gen Intel Xeon Scalable processor with AMX \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\\\NeuralCoder${this.rand}.log \\n`;\n }\n // let command = \"lscpu | grep 'Model name'\"\n // let get_hardware = `import subprocess\\nsubp = subprocess.Popen(\"${command}\",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding=\"utf-8\")\\nsubp.wait(2)\\nhardware = subp.communicate()[0].replace(\"Model name:\",\"\").strip()`\n // let expr_hardware = {hardware: \"hardware\"}\n // let hard_res = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, get_hardware, expr_hardware,false);\n // hard_res.then(value =>{\n // let hard = Object.values(value.hardware.data)[0] as string;\n // if(this.markdown){\n // this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: ${hard} \\n`\n // this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\\\NeuralCoder${this.rand}.log \\n`\n // }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] HardWare: ${hard}\\n`\n // })\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] The log was saved to lab_workspace\\\\NeuralCoder${this.rand}.log\\n`\n const run_svg = document.createElement(\"svg\");\n run_svg.innerHTML = Constants.ICON_RUN;\n (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);\n });\n }\n });\n });\n }\n }\n }\n}\nexport class JupyterlabNotebookCodeOptimizer extends JupyterlabCodeOptimizer {\n constructor(notebookTracker, panel) {\n super(panel);\n this.notebookTracker = notebookTracker;\n this.notebookname = '';\n }\n async optimizeAction(config, formatter) {\n return this.optimizeCells(true, config, formatter);\n }\n async optimizeAllCodeCells(config, formatter, notebook, run) {\n return this.optimizeCells(false, config, formatter, notebook, run);\n }\n getCodeCells(ifmarkdown = true, notebook) {\n if (!this.notebookTracker.currentWidget) {\n return [];\n }\n const codeCells = [];\n notebook = notebook || this.notebookTracker.currentWidget.content;\n this.notebookname = notebook.title.label;\n let count = 0;\n notebook.widgets.forEach((cell) => {\n if (cell.model.type === 'code') {\n count += 1;\n codeCells.push(cell);\n }\n });\n if (ifmarkdown) {\n NotebookActions.insertBelow(notebook);\n this.notebookTracker.currentWidget.content.activeCellIndex = count + 1;\n NotebookActions.changeCellType(notebook, 'markdown');\n const activeCell = notebook.activeCell;\n if (activeCell) {\n this.markdown = activeCell;\n }\n }\n this.cells = codeCells;\n return codeCells;\n }\n async optimizeCells(selectedOnly, config, formatter, notebook, run) {\n if (this.working) {\n return new Promise((resolve, reject) => {\n resolve(\"false!\");\n });\n }\n console.log(\"arrive here 333\");\n this.working = true;\n const optimize_type = formatter !== undefined ? formatter : 'pytorch_mixed_precision_cpu';\n if (optimize_type === 'auto-quant') {\n selectedOnly = true;\n }\n else {\n selectedOnly = false;\n }\n const selectedCells = this.getCodeCells(selectedOnly, notebook);\n let cell = selectedCells[selectedCells.length - 1];\n if (selectedCells.length === 0) {\n this.working = false;\n return new Promise((resolve, reject) => {\n resolve(\"false!\");\n });\n }\n const currentTexts = selectedCells.map(cell => cell.model.value.text);\n if (optimize_type === 'auto-quant') {\n console.log(\"arrive here 444-111\");\n if (this.markdown) {\n this.markdown.model.value.text = `[NeuralCoder INFO] Auto-Quant Started ...... \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook \"${this.notebookname}\" \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Mode: Throughput \\n`;\n }\n // cell.outputArea.node.innerText = `[NeuralCoder INFO] Auto-Quant Started ......\\n`\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook \"${this.notebookname}\"\\n`\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Mode: Throughput\\n`\n let runcode = `with open('${this.log_path}', 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Auto-Quant Started ......\\\\n\")`;\n let expr = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);\n let runcode2 = `with open('${this.log_path}', 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Code: User code from Jupyter Lab notebook '${this.notebookname}'\\\\n\")`;\n let expr2 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);\n let runcode3 = `with open('${this.log_path}', 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Benchmark Mode: Throughput\\\\n\")`;\n let expr3 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);\n // cell.outputArea.node.setAttribute(\"class\",\"pad\")\n await this.optimizeCode(currentTexts, '', 'The Original Model', 'INC Enable INT8 (Static)', config, true, this.panel, cell, run);\n await this.optimizeCode(currentTexts, 'pytorch_inc_static_quant_fx', 'INC Enable INT8 (Static)', 'INC Enable INT8 (Dynamic)', config, true, this.panel, cell, run);\n await this.optimizeCode(currentTexts, 'pytorch_inc_dynamic_quant', 'INC Enable INT8 (Dynamic)', 'INC Enable BF16', config, true, this.panel, cell, run);\n await this.optimizeCode(currentTexts, 'pytorch_inc_bf16', 'INC Enable BF16', '', config, true, this.panel, cell, run);\n }\n else {\n console.log(\"arrive here 444-222\");\n await this.optimizeCode(currentTexts, optimize_type, \"\", \"\", \"normal\", true, this.panel, cell, run);\n }\n this.working = false;\n console.log(\"arrive here 555\");\n return new Promise((resolve, reject) => {\n resolve(\"success!\");\n });\n }\n applicable(formatter, currentWidget) {\n const currentNotebookWidget = this.notebookTracker.currentWidget;\n return currentNotebookWidget && currentWidget === currentNotebookWidget;\n }\n}\n","import { INotebookTracker } from '@jupyterlab/notebook';\nimport { ToolbarButton, showDialog, Dialog } from '@jupyterlab/apputils';\nimport { ISettingRegistry } from '@jupyterlab/settingregistry';\nimport { IMainMenu } from '@jupyterlab/mainmenu';\nimport { LabIcon } from '@jupyterlab/ui-components';\nimport { Widget } from '@lumino/widgets';\nimport { JupyterlabNotebookCodeOptimizer } from './deepcoder';\nimport { Constants } from './constants';\nclass neural_compressor_ext_lab {\n constructor(app, tracker, notebookpanel) {\n this.app = app;\n this.tracker = tracker;\n this.notebookpanel = notebookpanel;\n this.setupWidgetExtension();\n this.config = '';\n }\n createNew(nb) {\n this.notebookpanel = nb;\n this.notebookCodeOptimizer = new JupyterlabNotebookCodeOptimizer(this.tracker, this.notebookpanel);\n const svg = document.createElement(\"svg\");\n svg.innerHTML = Constants.ICON_FORMAT_ALL_SVG;\n const run_svg = document.createElement(\"svg\");\n run_svg.innerHTML = Constants.ICON_RUN;\n const div = document.createElement(\"div\");\n div.setAttribute(\"class\", \"wrapper\");\n const span = document.createElement(\"span\");\n span.setAttribute(\"class\", \"f1ozlkqi\");\n span.innerHTML = Constants.SVG;\n const selector = document.createElement(\"select\");\n selector.setAttribute(\"class\", \"aselector\");\n selector.id = \"NeuralCoder\";\n const option1 = document.createElement(\"option\");\n option1.value = \"pytorch_inc_static_quant_fx\";\n option1.innerText = \"INC Enable INT8 (Static)\";\n option1.selected = true;\n const option2 = document.createElement(\"option\");\n option2.value = \"pytorch_inc_dynamic_quant\";\n option2.innerText = \"INC Enable INT8 (Dynamic)\";\n const option3 = document.createElement(\"option\");\n option3.value = \"pytorch_inc_bf16\";\n option3.innerText = \"INC Enable BF16\";\n const option4 = document.createElement(\"option\");\n option4.value = \"auto-quant\";\n option4.innerText = \"INC Auto Enable & Benchmark\";\n selector.options.add(option1);\n selector.options.add(option2);\n selector.options.add(option3);\n selector.options.add(option4);\n div.appendChild(selector);\n div.appendChild(span);\n const selector_widget = new Widget();\n selector_widget.node.appendChild(div);\n selector_widget.addClass(\"aselector\");\n let notebookCodeOptimizer = this.notebookCodeOptimizer;\n let config = this.config;\n const dia_input = document.createElement(\"input\");\n const dia_widget = new Widget();\n dia_widget.node.appendChild(dia_input);\n dia_widget.addClass(\"dialog\");\n const run_button = new ToolbarButton({\n tooltip: 'NeuralCoder',\n icon: new LabIcon({\n name: \"run\",\n svgstr: Constants.ICON_RUN\n }),\n onClick: async function () {\n var _a, _b, _c, _d;\n console.log(\"arrive here 111\");\n (_d = (_c = (_b = (_a = run_button.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(svg);\n if (selector.options[selector.selectedIndex].value === 'auto-quant') {\n await showDialog({\n title: 'Please input execute parameters:',\n body: dia_widget,\n buttons: [Dialog.okButton({ label: 'Confirm' })]\n }).then(result => {\n if (result.button.accept) {\n config = dia_input.value;\n }\n });\n }\n console.log(\"arrive here 222\");\n await notebookCodeOptimizer.optimizeAllCodeCells(config, selector.options[selector.selectedIndex].value, undefined, run_button);\n }\n });\n nb.toolbar.insertItem(11, \"nc\", run_button);\n nb.toolbar.insertItem(12, \"selector\", selector_widget);\n }\n setupWidgetExtension() {\n this.app.docRegistry.addWidgetExtension('Notebook', this);\n }\n}\n/**\n * Initialization data for the neural_compressor_ext_lab extension.\n */\nconst plugin = {\n id: 'neural_compressor_ext_lab:plugin',\n autoStart: true,\n requires: [INotebookTracker, IMainMenu],\n optional: [ISettingRegistry],\n activate: (app, tracker, notebookpanel) => {\n new neural_compressor_ext_lab(app, tracker, notebookpanel);\n console.log('JupyterLab extension neural_compressor_ext_lab is activated!');\n }\n};\nexport default plugin;\n","/*\n * Copyright 2019-2020 The Kale Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\nimport { Dialog, showDialog } from '@jupyterlab/apputils';\n// @ts-ignore\nimport SanitizedHTML from 'react-sanitized-html';\nimport * as React from 'react';\nexport default class NotebookUtilities {\n /**\n * generate random number\n * @Min\n * @Max\n */\n static GetRandomNum(Min, Max) {\n let Range;\n Range = Max - Min;\n var Rand = Math.random();\n return (Min + Math.round(Rand * Range));\n }\n /**\n * Builds an HTML container by sanitizing a list of strings and converting\n * them in valid HTML\n * @param msg A list of string with HTML formatting\n * @returns a HTMLDivElement composed of a list of spans with formatted text\n */\n static buildDialogBody(msg) {\n return (React.createElement(\"div\", null, msg.map((s, i) => {\n return (React.createElement(React.Fragment, { key: `msg-${i}` },\n React.createElement(SanitizedHTML, { allowedAttributes: { a: ['href'] }, allowedTags: ['b', 'i', 'em', 'strong', 'a', 'pre'], html: s }),\n React.createElement(\"br\", null)));\n })));\n }\n /**\n * Opens a pop-up dialog in JupyterLab to display a simple message.\n * @param title The title for the message popup\n * @param msg The message as an array of strings\n * @param buttonLabel The label to use for the button. Default is 'OK'\n * @param buttonClassName The classname to give to the 'ok' button\n * @returns Promise - A promise once the message is closed.\n */\n static async showMessage(title, msg, buttonLabel = 'Dismiss', buttonClassName = '') {\n const buttons = [\n Dialog.okButton({ label: buttonLabel, className: buttonClassName }),\n ];\n const messageBody = this.buildDialogBody(msg);\n await showDialog({ title, buttons, body: messageBody });\n }\n /**\n * Opens a pop-up dialog in JupyterLab to display a yes/no dialog.\n * @param title The title for the message popup\n * @param msg The message\n * @param acceptLabel The label to use for the accept button. Default is 'YES'\n * @param rejectLabel The label to use for the reject button. Default is 'NO'\n * @param yesButtonClassName The classname to give to the accept button.\n * @param noButtonClassName The classname to give to the cancel button.\n * @returns Promise - A promise once the message is closed.\n */\n static async showYesNoDialog(title, msg, acceptLabel = 'YES', rejectLabel = 'NO', yesButtonClassName = '', noButtonClassName = '') {\n const buttons = [\n Dialog.okButton({ label: acceptLabel, className: yesButtonClassName }),\n Dialog.cancelButton({ label: rejectLabel, className: noButtonClassName }),\n ];\n const messageBody = this.buildDialogBody(msg);\n const result = await showDialog({ title, buttons, body: messageBody });\n return result.button.label === acceptLabel;\n }\n /**\n * Opens a pop-up dialog in JupyterLab with various information and button\n * triggering reloading the page.\n * @param title The title for the message popup\n * @param msg The message\n * @param buttonLabel The label to use for the button. Default is 'Refresh'\n * @param buttonClassName The classname to give to the 'refresh' button.\n * @returns Promise - A promise once the message is closed.\n */\n static async showRefreshDialog(title, msg, buttonLabel = 'Refresh', buttonClassName = '') {\n await this.showMessage(title, msg, buttonLabel, buttonClassName);\n location.reload();\n }\n /**\n * @description Creates a new JupyterLab notebook for use by the application\n * @param command The command registry\n * @returns Promise - A promise containing the notebook panel object that was created (if successful).\n */\n static async createNewNotebook(command) {\n const notebook = await command.execute('notebook:create-new', {\n activate: true,\n path: '',\n preferredLanguage: '',\n });\n await notebook.session.ready;\n return notebook;\n }\n /**\n * Safely saves the Jupyter notebook document contents to disk\n * @param notebookPanel The notebook panel containing the notebook to save\n */\n static async saveNotebook(notebookPanel) {\n if (notebookPanel) {\n await notebookPanel.context.ready;\n notebookPanel.context.save();\n return true;\n }\n return false;\n }\n /**\n * Convert the notebook contents to JSON\n * @param notebookPanel The notebook panel containing the notebook to serialize\n */\n static notebookToJSON(notebookPanel) {\n if (notebookPanel.content.model) {\n return notebookPanel.content.model.toJSON();\n }\n return null;\n }\n /**\n * @description Gets the value of a key from specified notebook's metadata.\n * @param notebookPanel The notebook to get meta data from.\n * @param key The key of the value.\n * @returns any -The value of the metadata. Returns null if the key doesn't exist.\n */\n static getMetaData(notebookPanel, key) {\n if (!notebookPanel) {\n throw new Error('The notebook is null or undefined. No meta data available.');\n }\n if (notebookPanel.model && notebookPanel.model.metadata.has(key)) {\n return notebookPanel.model.metadata.get(key);\n }\n return null;\n }\n /**\n * @description Sets the key value pair in the notebook's metadata.\n * If the key doesn't exists it will add one.\n * @param notebookPanel The notebook to set meta data in.\n * @param key The key of the value to create.\n * @param value The value to set.\n * @param save Default is false. Whether the notebook should be saved after the meta data is set.\n * Note: This function will not wait for the save to complete, it only sends a save request.\n * @returns The old value for the key, or undefined if it did not exist.\n */\n static setMetaData(notebookPanel, key, value, save = false) {\n var _a;\n if (!notebookPanel) {\n throw new Error('The notebook is null or undefined. No meta data available.');\n }\n const oldVal = (_a = notebookPanel.model) === null || _a === void 0 ? void 0 : _a.metadata.set(key, value);\n if (save) {\n this.saveNotebook(notebookPanel);\n }\n return oldVal;\n }\n // /**\n // * Get a new Kernel, not tied to a Notebook\n // * Source code here: https://github.com/jupyterlab/jupyterlab/tree/473348d25bcb258ca2f0c127dd8fb5b193217135/packages/services\n // */\n // public static async createNewKernel() {\n // // Get info about the available kernels and start a new one.\n // let options: Kernel.IOptions = await Kernel.getSpecs().then(kernelSpecs => {\n // // console.log('Default spec:', kernelSpecs.default);\n // // console.log('Available specs', Object.keys(kernelSpecs.kernelspecs));\n // // use the default name\n // return { name: kernelSpecs.default };\n // });\n // return await Kernel.startNew(options).then(_kernel => {\n // return _kernel;\n // });\n // }\n // // TODO: We can use this context manager to execute commands inside a new kernel\n // // and be sure that it will be disposed of at the end.\n // // Another approach could be to create a kale_rpc Kernel, as a singleton,\n // // created at startup. The only (possible) drawback is that we can not name\n // // a kernel instance with a custom id/name, so when refreshing JupyterLab we would\n // // not recognize the kernel. A solution could be to have a kernel spec dedicated to kale rpc calls.\n // public static async executeWithNewKernel(action: Function, args: any[] = []) {\n // // create brand new kernel\n // const _k = await this.createNewKernel();\n // // execute action inside kernel\n // const res = await action(_k, ...args);\n // // close kernel\n // _k.shutdown();\n // // return result\n // return res;\n // }\n /**\n * @description This function runs code directly in the notebook's kernel and then evaluates the\n * result and returns it as a promise.\n * @param kernel The kernel to run the code in.\n * @param runCode The code to run in the kernel.\n * @param userExpressions The expressions used to capture the desired info from the executed code.\n * @param runSilent Default is false. If true, kernel will execute as quietly as possible.\n * store_history will be set to false, and no broadcast on IOPUB channel will be made.\n * @param storeHistory Default is false. If true, the code executed will be stored in the kernel's history\n * and the counter which is shown in the cells will be incremented to reflect code was run.\n * @param allowStdIn Default is false. If true, code running in kernel can prompt user for input using\n * an input_request message.\n * @param stopOnError Default is false. If True, does not abort the execution queue, if an exception is encountered.\n * This allows the queued execution of multiple execute_requests, even if they generate exceptions.\n * @returns Promise - A promise containing the execution results of the code as an object with\n * keys based on the user_expressions.\n * @example\n * //The code\n * const code = \"a=123\\nb=456\\nsum=a+b\";\n * //The user expressions\n * const expr = {sum: \"sum\",prod: \"a*b\",args:\"[a,b,sum]\"};\n * //Async function call (returns a promise)\n * sendKernelRequest(notebookPanel, code, expr,false);\n * //Result when promise resolves:\n * {\n * sum:{status:\"ok\",data:{\"text/plain\":\"579\"},metadata:{}},\n * prod:{status:\"ok\",data:{\"text/plain\":\"56088\"},metadata:{}},\n * args:{status:\"ok\",data:{\"text/plain\":\"[123, 456, 579]\"}}\n * }\n * @see For more information on JupyterLab messages:\n * https://jupyter-client.readthedocs.io/en/latest/messaging.html#execution-results\n */\n static async sendKernelRequest(kernel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {\n if (!kernel) {\n throw new Error('Kernel is null or undefined.');\n }\n // Wait for kernel to be ready before sending request\n // await kernel.status;\n const message = await kernel.requestExecute({\n allow_stdin: allowStdIn,\n code: runCode,\n silent: runSilent,\n stop_on_error: stopOnError,\n store_history: storeHistory,\n user_expressions: userExpressions,\n }).done;\n const content = message.content;\n if (content.status !== 'ok') {\n // If response is not 'ok', throw contents as error, log code\n const msg = `Code caused an error:\\n${runCode}`;\n console.error(msg);\n if (content.traceback) {\n content.traceback.forEach((line) => console.log(line.replace(/[\\u001b\\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, '')));\n }\n throw content;\n }\n // Return user_expressions of the content\n return content.user_expressions;\n }\n /**\n * Same as method sendKernelRequest but passing\n * a NotebookPanel instead of a Kernel\n */\n static async sendKernelRequestFromNotebook(notebookPanel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {\n var _a, _b, _c, _d;\n if (!notebookPanel) {\n throw new Error('Notebook is null or undefined.');\n }\n // Wait for notebook panel to be ready\n await notebookPanel.activate;\n await ((_a = notebookPanel.sessionContext) === null || _a === void 0 ? void 0 : _a.ready);\n console.log('get kernel', (_b = notebookPanel.sessionContext.session) === null || _b === void 0 ? void 0 : _b.kernel);\n return this.sendKernelRequest((_d = (_c = notebookPanel.sessionContext) === null || _c === void 0 ? void 0 : _c.session) === null || _d === void 0 ? void 0 : _d.kernel, runCode, userExpressions, runSilent, storeHistory, allowStdIn, stopOnError);\n }\n}\n"],"names":[],"sourceRoot":""}
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c52105b83d9cf4290a9.js b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c52105b83d9cf4290a9.js
deleted file mode 100644
index 5291fab139f..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c52105b83d9cf4290a9.js
+++ /dev/null
@@ -1,767 +0,0 @@
-"use strict";
-(self["webpackChunkneural_compressor_ext_lab"] = self["webpackChunkneural_compressor_ext_lab"] || []).push([["lib_index_js"],{
-
-/***/ "./lib/constants.js":
-/*!**************************!*\
- !*** ./lib/constants.js ***!
- \**************************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "Constants": () => (/* binding */ Constants)
-/* harmony export */ });
-var Constants;
-(function (Constants) {
- Constants.SHORT_PLUGIN_NAME = 'neural_compressor_ext_lab';
- Constants.WORK_PATH = "neural_coder_workspace/";
- Constants.ICON_FORMAT_ALL_SVG = ' ';
- Constants.ICON_RUN = ' ';
- Constants.SVG = ' ';
- Constants.LONG_PLUGIN_NAME = `@rya/${Constants.SHORT_PLUGIN_NAME}`;
- Constants.SETTINGS_SECTION = `${Constants.LONG_PLUGIN_NAME}:settings`;
- Constants.COMMAND_SECTION_NAME = 'Jupyterlab Code Optimizer';
- Constants.PLUGIN_VERSION = '0.1.0';
-})(Constants || (Constants = {}));
-
-
-/***/ }),
-
-/***/ "./lib/deepcoder.js":
-/*!**************************!*\
- !*** ./lib/deepcoder.js ***!
- \**************************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "JupyterlabNotebookCodeOptimizer": () => (/* binding */ JupyterlabNotebookCodeOptimizer)
-/* harmony export */ });
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! @jupyterlab/notebook */ "webpack/sharing/consume/default/@jupyterlab/notebook");
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var _utils__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./utils */ "./lib/utils.js");
-/* harmony import */ var _constants__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./constants */ "./lib/constants.js");
-
-
-
-class JupyterlabCodeOptimizer {
- constructor(panel) {
- this.working = false;
- this.panel = panel;
- this.tmp_path = "tmp.py";
- this.rand = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].GetRandomNum(0, 200);
- this.log_path = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.WORK_PATH + "NeuralCoder" + this.rand + ".log";
- this.tmp_log_path = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.WORK_PATH + "NeuralCoder_tmp" + ".log";
- this.cells = [];
- }
- async optimizeCode(code, formatter, name, next, options, notebook, panel, cell, run) {
- let codes = [];
- code.forEach(function (value) {
- value = value.replace(/('\\n')/g, '^^^');
- value = value.replace(/\\n"/g, '###');
- value = value.replace(/\\n'/g, '###');
- value = value.replace(/"\\n/g, '@@');
- value = value.replace(/'\\n/g, '@@');
- value = value.replace(/\n/g, '\\n');
- value = value.replace(/"/g, '+++');
- value = value.replace(/,/g, '$');
- codes.push(value);
- });
- let gen_code = `code = "${codes}"\ncodes = code.split(',')\nwith open( '${this.tmp_path}', 'w+' ) as f:\n for i in range(0,len(codes)):\n f.write('# this is the beginning of a single code snippet\\n')\n code_list = codes[i].replace('$',',').replace('+++','\"').split('\\n')\n for line in code_list:\n if('split(^^^)' in line):\n line=line.replace('split(^^^)', 'split(\\'\\\\n\\')')\n if('###' in line):\n line=line.replace('###', '\\\\n\"')\n if('@@' in line):\n line=line.replace('@@', '\"\\\\n')\n f.write(line+'\\n')`;
- const expr = { code_list: `code_list` };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, gen_code, expr, false);
- if (options === 'normal') {
- let runcode = `from neural_coder import enable\nenable(code="${this.tmp_path}",features=["${formatter}"], overwrite=True)`;
- let expr = { sum: ` ` };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let run_code1 = `with open("${this.tmp_path}", 'r') as f:\n optimized_code = f.read()\n`;
- let expr1 = { optimizedCode: "optimized_code" };
- let result2 = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, run_code1, expr1, false);
- result2.then(value => {
- var _a, _b, _c, _d;
- let optimizedTexts = Object.values(value.optimizedCode.data)[0];
- let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\n').slice(1);
- optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);
- for (let i = 0; i < optimizeCodes.length; ++i) {
- const cell = this.cells[i];
- const currentTexts = this.cells.map(cell => cell.model.value.text);
- const currentText = currentTexts[i];
- let optimizedtext = optimizeCodes[i];
- optimizedtext = optimizedtext.replace(/\\'\\\\n\\'/g, "^^^");
- optimizedtext = optimizedtext.replace(/\\\\n"/g, "+++");
- optimizedtext = optimizedtext.replace(/\\\\n'/g, "+++");
- optimizedtext = optimizedtext.replace(/"\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/'\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/\\n/g, '\n');
- optimizedtext = optimizedtext.replace(/\\'/g, "'");
- optimizedtext = optimizedtext.replace(/\^\^\^/g, "'\\n'");
- optimizedtext = optimizedtext.replace(/\+\+\+/g, "\\n\"");
- optimizedtext = optimizedtext.replace(/\@\@\@/g, "\"\\n");
- if (cell.model.value.text === currentText) {
- cell.model.value.text = optimizedtext;
- }
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.ICON_RUN;
- (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);
- }
- });
- }
- else {
- if (formatter === '') {
- if (this.markdown) {
- this.markdown.model.value.text += "[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ...... \n";
- }
- // cell.outputArea.node.innerText += "[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\n"
- let runcode1 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\\n")`;
- let expr1 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode1, expr1, false);
- let runcode = `from neural_coder import enable\nperfomance, mode, path = enable(code="${this.tmp_path}",features=[], run_bench=True, args="${options}")\nwith open(path + '/bench.log', 'r') as f:\n logs = f.readlines()\nlog_line = logs[4]\nlog = log_line.split("[")[1].split("]")[0]`;
- let expr = { path: "path", log: "log" };
- let result = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let fps;
- result.then(value => {
- fps = Object.values(value.log.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second) \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\n`
- let text = `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\\n`;
- let runcode = `with open("${this.log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\n`
- let runcode1 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n")`;
- let expr1 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode1, expr1, false);
- let runcode2 = `with open("${this.tmp_log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr2 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- });
- }
- else {
- let runcode = `from neural_coder import enable\nperfomance, mode, path = enable(code="${this.tmp_path}", features=["${formatter}"], run_bench=True, args="${options}")\nwith open(path + '/bench.log', 'r') as f:\n logs = f.readlines()\nlog_line = logs[4]\nlog = log_line.split("[")[1].split("]")[0]`;
- let expr = { path: "path", log: "log" };
- let result = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(panel, runcode, expr, false);
- let fps;
- result.then(value => {
- fps = Object.values(value.log.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second) \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (FPS)\n`
- let text = `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second)\\n`;
- let runcode = `with open("${this.log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- if (next !== '') {
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \n`;
- }
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\n`
- let runcode2 = `with open("${this.log_path}", 'a' ) as f:\n f.write("[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n")`;
- let expr2 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- }
- let runcode3 = `with open("${this.tmp_log_path}", 'a' ) as f:\n f.write("${text}")`;
- let expr3 = { path: "" };
- let res_tmp = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);
- res_tmp.then(value => {
- if (formatter === 'pytorch_inc_bf16') {
- let read_log = `import re\nwith open("${this.tmp_log_path}", 'r') as f:\n logs = f.readlines()\n fps_list=[]\n for log_line in logs[-4:]:\n pat = re.compile(r\'\\d+\\.?\\d+')\n fps = re.findall(pat,log_line)[-1]\n fps_list.append(float(fps))\nmaxi = max(fps_list)\nindex = fps_list.index(maxi)\nboost = round(maxi/fps_list[0],1)\nfeatures=['','pytorch_inc_static_quant_fx','pytorch_inc_dynamic_quant','pytorch_inc_bf16']\nfeature_name=['Original Model','INC Enable INT8 (Static)','INC Enable INT8 (Dynamic)','INC Enable BF16']\nbest_feature = features[index]\nbest_name = feature_name[index]\nfeature_l = []\nfeature_l.append(best_feature)\nfrom neural_coder import enable\nenable(code="${this.tmp_path}",features=feature_l, overwrite=True)\nwith open("${this.tmp_path}", 'r') as f:\n optimized_code = f.read()\n`;
- let read_expr = { boost: "boost", best_feature: "best_feature", best_name: "best_name", optimizeCode: "optimized_code", feature_l: "fps_list", maxi: "maxi", index: "index" };
- let read_result = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, read_log, read_expr, false);
- read_result.then(value => {
- var _a, _b, _c, _d;
- console.log("resres", value);
- let boost = Object.values(value.boost.data)[0];
- let best_name = Object.values(value.best_name.data)[0];
- let optimizedTexts = Object.values(value.optimizeCode.data)[0];
- let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\n').slice(1);
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] The Best Intel Optimization: ${best_name} \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] You can get up to ${boost}X performance boost. \n`;
- }
- // cell.outputArea.node.innerText +=`[NeuralCoder INFO] The Best Intel Optimization: ${best_name}\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] You can get up to ${boost}X performance boost.\n`
- optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);
- for (let i = 0; i < optimizeCodes.length; ++i) {
- const cell = this.cells[i];
- const currentTexts = this.cells.map(cell => cell.model.value.text);
- const currentText = currentTexts[i];
- let optimizedtext = optimizeCodes[i];
- optimizedtext = optimizedtext.replace(/\\'\\\\n\\'/g, "^^^");
- optimizedtext = optimizedtext.replace(/\\\\n"/g, "+++");
- optimizedtext = optimizedtext.replace(/\\\\n'/g, "+++");
- optimizedtext = optimizedtext.replace(/"\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/'\\\\n/g, "@@@");
- optimizedtext = optimizedtext.replace(/\\n/g, '\n');
- optimizedtext = optimizedtext.replace(/\\'/g, "'");
- optimizedtext = optimizedtext.replace(/\^\^\^/g, "'\\n'");
- optimizedtext = optimizedtext.replace(/\+\+\+/g, "\\n\"");
- optimizedtext = optimizedtext.replace(/\@\@\@/g, "\"\\n");
- if (cell.model.value.text === currentText) {
- cell.model.value.text = optimizedtext;
- }
- }
- // if(this.markdown){
- // this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: 4th Gen Intel Xeon Scalable processor with AMX \n`
- // this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log \n`
- // }
- let command = "lscpu | grep 'Model name'";
- let get_hardware = `import subprocess\nsubp = subprocess.Popen("${command}",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8")\nsubp.wait(2)\nhardware = subp.communicate()[0].replace("Model name:","").strip()`;
- let expr_hardware = { hardware: "hardware" };
- let hard_res = _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, get_hardware, expr_hardware, false);
- hard_res.then(value => {
- let hard = Object.values(value.hardware.data)[0];
- if (this.markdown) {
- this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: ${hard} \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log \n`;
- }
- cell.outputArea.node.innerText += `[NeuralCoder INFO] HardWare: ${hard}\n`;
- });
- cell.outputArea.node.innerText += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\NeuralCoder${this.rand}.log\n`;
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_2__.Constants.ICON_RUN;
- (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);
- });
- }
- });
- });
- }
- }
- }
-}
-class JupyterlabNotebookCodeOptimizer extends JupyterlabCodeOptimizer {
- constructor(notebookTracker, panel) {
- super(panel);
- this.notebookTracker = notebookTracker;
- this.notebookname = '';
- }
- async optimizeAction(config, formatter) {
- return this.optimizeCells(true, config, formatter);
- }
- async optimizeAllCodeCells(config, formatter, notebook, run) {
- return this.optimizeCells(false, config, formatter, notebook, run);
- }
- getCodeCells(ifmarkdown = true, notebook) {
- if (!this.notebookTracker.currentWidget) {
- return [];
- }
- const codeCells = [];
- notebook = notebook || this.notebookTracker.currentWidget.content;
- this.notebookname = notebook.title.label;
- let count = 0;
- notebook.widgets.forEach((cell) => {
- if (cell.model.type === 'code') {
- count += 1;
- codeCells.push(cell);
- }
- });
- if (ifmarkdown) {
- _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__.NotebookActions.insertBelow(notebook);
- this.notebookTracker.currentWidget.content.activeCellIndex = count + 1;
- _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__.NotebookActions.changeCellType(notebook, 'markdown');
- const activeCell = notebook.activeCell;
- if (activeCell) {
- this.markdown = activeCell;
- }
- }
- this.cells = codeCells;
- return codeCells;
- }
- async optimizeCells(selectedOnly, config, formatter, notebook, run) {
- if (this.working) {
- return new Promise((resolve, reject) => {
- resolve("false!");
- });
- }
- console.log("arrive here 333");
- this.working = true;
- const optimize_type = formatter !== undefined ? formatter : 'pytorch_mixed_precision_cpu';
- if (optimize_type === 'auto-quant') {
- selectedOnly = true;
- }
- else {
- selectedOnly = false;
- }
- const selectedCells = this.getCodeCells(selectedOnly, notebook);
- let cell = selectedCells[selectedCells.length - 1];
- if (selectedCells.length === 0) {
- this.working = false;
- return new Promise((resolve, reject) => {
- resolve("false!");
- });
- }
- const currentTexts = selectedCells.map(cell => cell.model.value.text);
- if (optimize_type === 'auto-quant') {
- console.log("arrive here 444-111");
- if (this.markdown) {
- this.markdown.model.value.text = `[NeuralCoder INFO] Auto-Quant Started ...... \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook "${this.notebookname}" \n`;
- this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Mode: Throughput \n`;
- }
- // cell.outputArea.node.innerText = `[NeuralCoder INFO] Auto-Quant Started ......\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook "${this.notebookname}"\n`
- // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Mode: Throughput\n`
- let runcode = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Auto-Quant Started ......\\n")`;
- let expr = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode, expr, false);
- let runcode2 = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Code: User code from Jupyter Lab notebook '${this.notebookname}'\\n")`;
- let expr2 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);
- let runcode3 = `with open('${this.log_path}', 'a' ) as f:\n f.write("[NeuralCoder INFO] Benchmark Mode: Throughput\\n")`;
- let expr3 = { path: "" };
- _utils__WEBPACK_IMPORTED_MODULE_1__["default"].sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);
- // cell.outputArea.node.setAttribute("class","pad")
- await this.optimizeCode(currentTexts, '', 'The Original Model', 'INC Enable INT8 (Static)', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_static_quant_fx', 'INC Enable INT8 (Static)', 'INC Enable INT8 (Dynamic)', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_dynamic_quant', 'INC Enable INT8 (Dynamic)', 'INC Enable BF16', config, true, this.panel, cell, run);
- await this.optimizeCode(currentTexts, 'pytorch_inc_bf16', 'INC Enable BF16', '', config, true, this.panel, cell, run);
- }
- else {
- console.log("arrive here 444-222");
- await this.optimizeCode(currentTexts, optimize_type, "", "", "normal", true, this.panel, cell, run);
- }
- this.working = false;
- console.log("arrive here 555");
- return new Promise((resolve, reject) => {
- resolve("success!");
- });
- }
- applicable(formatter, currentWidget) {
- const currentNotebookWidget = this.notebookTracker.currentWidget;
- return currentNotebookWidget && currentWidget === currentNotebookWidget;
- }
-}
-
-
-/***/ }),
-
-/***/ "./lib/index.js":
-/*!**********************!*\
- !*** ./lib/index.js ***!
- \**********************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "default": () => (__WEBPACK_DEFAULT_EXPORT__)
-/* harmony export */ });
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! @jupyterlab/notebook */ "webpack/sharing/consume/default/@jupyterlab/notebook");
-/* harmony import */ var _jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! @jupyterlab/apputils */ "webpack/sharing/consume/default/@jupyterlab/apputils");
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__);
-/* harmony import */ var _jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! @jupyterlab/settingregistry */ "webpack/sharing/consume/default/@jupyterlab/settingregistry");
-/* harmony import */ var _jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2__);
-/* harmony import */ var _jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! @jupyterlab/mainmenu */ "webpack/sharing/consume/default/@jupyterlab/mainmenu");
-/* harmony import */ var _jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3__);
-/* harmony import */ var _jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! @jupyterlab/ui-components */ "webpack/sharing/consume/default/@jupyterlab/ui-components");
-/* harmony import */ var _jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4__);
-/* harmony import */ var _lumino_widgets__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! @lumino/widgets */ "webpack/sharing/consume/default/@lumino/widgets");
-/* harmony import */ var _lumino_widgets__WEBPACK_IMPORTED_MODULE_5___default = /*#__PURE__*/__webpack_require__.n(_lumino_widgets__WEBPACK_IMPORTED_MODULE_5__);
-/* harmony import */ var _deepcoder__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./deepcoder */ "./lib/deepcoder.js");
-/* harmony import */ var _constants__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./constants */ "./lib/constants.js");
-
-
-
-
-
-
-
-
-class neural_compressor_ext_lab {
- constructor(app, tracker, notebookpanel) {
- this.app = app;
- this.tracker = tracker;
- this.notebookpanel = notebookpanel;
- this.setupWidgetExtension();
- this.config = '';
- }
- createNew(nb) {
- this.notebookpanel = nb;
- this.notebookCodeOptimizer = new _deepcoder__WEBPACK_IMPORTED_MODULE_6__.JupyterlabNotebookCodeOptimizer(this.tracker, this.notebookpanel);
- const svg = document.createElement("svg");
- svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.ICON_FORMAT_ALL_SVG;
- const run_svg = document.createElement("svg");
- run_svg.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.ICON_RUN;
- const div = document.createElement("div");
- div.setAttribute("class", "wrapper");
- const span = document.createElement("span");
- span.setAttribute("class", "f1ozlkqi");
- span.innerHTML = _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.SVG;
- const selector = document.createElement("select");
- selector.setAttribute("class", "aselector");
- selector.id = "NeuralCoder";
- const option1 = document.createElement("option");
- option1.value = "pytorch_inc_static_quant_fx";
- option1.innerText = "INC Enable INT8 (Static)";
- option1.selected = true;
- const option2 = document.createElement("option");
- option2.value = "pytorch_inc_dynamic_quant";
- option2.innerText = "INC Enable INT8 (Dynamic)";
- const option3 = document.createElement("option");
- option3.value = "pytorch_inc_bf16";
- option3.innerText = "INC Enable BF16";
- const option4 = document.createElement("option");
- option4.value = "auto-quant";
- option4.innerText = "INC Auto Enable & Benchmark";
- selector.options.add(option1);
- selector.options.add(option2);
- selector.options.add(option3);
- selector.options.add(option4);
- div.appendChild(selector);
- div.appendChild(span);
- const selector_widget = new _lumino_widgets__WEBPACK_IMPORTED_MODULE_5__.Widget();
- selector_widget.node.appendChild(div);
- selector_widget.addClass("aselector");
- let notebookCodeOptimizer = this.notebookCodeOptimizer;
- let config = this.config;
- const dia_input = document.createElement("input");
- const dia_widget = new _lumino_widgets__WEBPACK_IMPORTED_MODULE_5__.Widget();
- dia_widget.node.appendChild(dia_input);
- dia_widget.addClass("dialog");
- const run_button = new _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__.ToolbarButton({
- tooltip: 'NeuralCoder',
- icon: new _jupyterlab_ui_components__WEBPACK_IMPORTED_MODULE_4__.LabIcon({
- name: "run",
- svgstr: _constants__WEBPACK_IMPORTED_MODULE_7__.Constants.ICON_RUN
- }),
- onClick: async function () {
- var _a, _b, _c, _d;
- console.log("arrive here 111");
- (_d = (_c = (_b = (_a = run_button.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(svg);
- if (selector.options[selector.selectedIndex].value === 'auto-quant') {
- await (0,_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__.showDialog)({
- title: 'Please input execute parameters:',
- body: dia_widget,
- buttons: [_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_1__.Dialog.okButton({ label: 'Confirm' })]
- }).then(result => {
- if (result.button.accept) {
- config = dia_input.value;
- }
- });
- }
- console.log("arrive here 222");
- await notebookCodeOptimizer.optimizeAllCodeCells(config, selector.options[selector.selectedIndex].value, undefined, run_button);
- }
- });
- nb.toolbar.insertItem(11, "nc", run_button);
- nb.toolbar.insertItem(12, "selector", selector_widget);
- }
- setupWidgetExtension() {
- this.app.docRegistry.addWidgetExtension('Notebook', this);
- }
-}
-/**
- * Initialization data for the neural_compressor_ext_lab extension.
- */
-const plugin = {
- id: 'neural_compressor_ext_lab:plugin',
- autoStart: true,
- requires: [_jupyterlab_notebook__WEBPACK_IMPORTED_MODULE_0__.INotebookTracker, _jupyterlab_mainmenu__WEBPACK_IMPORTED_MODULE_3__.IMainMenu],
- optional: [_jupyterlab_settingregistry__WEBPACK_IMPORTED_MODULE_2__.ISettingRegistry],
- activate: (app, tracker, notebookpanel) => {
- new neural_compressor_ext_lab(app, tracker, notebookpanel);
- console.log('JupyterLab extension neural_compressor_ext_lab is activated!');
- }
-};
-/* harmony default export */ const __WEBPACK_DEFAULT_EXPORT__ = (plugin);
-
-
-/***/ }),
-
-/***/ "./lib/utils.js":
-/*!**********************!*\
- !*** ./lib/utils.js ***!
- \**********************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "default": () => (/* binding */ NotebookUtilities)
-/* harmony export */ });
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! @jupyterlab/apputils */ "webpack/sharing/consume/default/@jupyterlab/apputils");
-/* harmony import */ var _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var react_sanitized_html__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! react-sanitized-html */ "webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html");
-/* harmony import */ var react_sanitized_html__WEBPACK_IMPORTED_MODULE_1___default = /*#__PURE__*/__webpack_require__.n(react_sanitized_html__WEBPACK_IMPORTED_MODULE_1__);
-/* harmony import */ var react__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! react */ "webpack/sharing/consume/default/react");
-/* harmony import */ var react__WEBPACK_IMPORTED_MODULE_2___default = /*#__PURE__*/__webpack_require__.n(react__WEBPACK_IMPORTED_MODULE_2__);
-/*
- * Copyright 2019-2020 The Kale Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// @ts-ignore
-
-
-class NotebookUtilities {
- /**
- * generate random number
- * @Min
- * @Max
- */
- static GetRandomNum(Min, Max) {
- let Range;
- Range = Max - Min;
- var Rand = Math.random();
- return (Min + Math.round(Rand * Range));
- }
- /**
- * Builds an HTML container by sanitizing a list of strings and converting
- * them in valid HTML
- * @param msg A list of string with HTML formatting
- * @returns a HTMLDivElement composed of a list of spans with formatted text
- */
- static buildDialogBody(msg) {
- return (react__WEBPACK_IMPORTED_MODULE_2__.createElement("div", null, msg.map((s, i) => {
- return (react__WEBPACK_IMPORTED_MODULE_2__.createElement(react__WEBPACK_IMPORTED_MODULE_2__.Fragment, { key: `msg-${i}` },
- react__WEBPACK_IMPORTED_MODULE_2__.createElement((react_sanitized_html__WEBPACK_IMPORTED_MODULE_1___default()), { allowedAttributes: { a: ['href'] }, allowedTags: ['b', 'i', 'em', 'strong', 'a', 'pre'], html: s }),
- react__WEBPACK_IMPORTED_MODULE_2__.createElement("br", null)));
- })));
- }
- /**
- * Opens a pop-up dialog in JupyterLab to display a simple message.
- * @param title The title for the message popup
- * @param msg The message as an array of strings
- * @param buttonLabel The label to use for the button. Default is 'OK'
- * @param buttonClassName The classname to give to the 'ok' button
- * @returns Promise - A promise once the message is closed.
- */
- static async showMessage(title, msg, buttonLabel = 'Dismiss', buttonClassName = '') {
- const buttons = [
- _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.Dialog.okButton({ label: buttonLabel, className: buttonClassName }),
- ];
- const messageBody = this.buildDialogBody(msg);
- await (0,_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.showDialog)({ title, buttons, body: messageBody });
- }
- /**
- * Opens a pop-up dialog in JupyterLab to display a yes/no dialog.
- * @param title The title for the message popup
- * @param msg The message
- * @param acceptLabel The label to use for the accept button. Default is 'YES'
- * @param rejectLabel The label to use for the reject button. Default is 'NO'
- * @param yesButtonClassName The classname to give to the accept button.
- * @param noButtonClassName The classname to give to the cancel button.
- * @returns Promise - A promise once the message is closed.
- */
- static async showYesNoDialog(title, msg, acceptLabel = 'YES', rejectLabel = 'NO', yesButtonClassName = '', noButtonClassName = '') {
- const buttons = [
- _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.Dialog.okButton({ label: acceptLabel, className: yesButtonClassName }),
- _jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.Dialog.cancelButton({ label: rejectLabel, className: noButtonClassName }),
- ];
- const messageBody = this.buildDialogBody(msg);
- const result = await (0,_jupyterlab_apputils__WEBPACK_IMPORTED_MODULE_0__.showDialog)({ title, buttons, body: messageBody });
- return result.button.label === acceptLabel;
- }
- /**
- * Opens a pop-up dialog in JupyterLab with various information and button
- * triggering reloading the page.
- * @param title The title for the message popup
- * @param msg The message
- * @param buttonLabel The label to use for the button. Default is 'Refresh'
- * @param buttonClassName The classname to give to the 'refresh' button.
- * @returns Promise - A promise once the message is closed.
- */
- static async showRefreshDialog(title, msg, buttonLabel = 'Refresh', buttonClassName = '') {
- await this.showMessage(title, msg, buttonLabel, buttonClassName);
- location.reload();
- }
- /**
- * @description Creates a new JupyterLab notebook for use by the application
- * @param command The command registry
- * @returns Promise - A promise containing the notebook panel object that was created (if successful).
- */
- static async createNewNotebook(command) {
- const notebook = await command.execute('notebook:create-new', {
- activate: true,
- path: '',
- preferredLanguage: '',
- });
- await notebook.session.ready;
- return notebook;
- }
- /**
- * Safely saves the Jupyter notebook document contents to disk
- * @param notebookPanel The notebook panel containing the notebook to save
- */
- static async saveNotebook(notebookPanel) {
- if (notebookPanel) {
- await notebookPanel.context.ready;
- notebookPanel.context.save();
- return true;
- }
- return false;
- }
- /**
- * Convert the notebook contents to JSON
- * @param notebookPanel The notebook panel containing the notebook to serialize
- */
- static notebookToJSON(notebookPanel) {
- if (notebookPanel.content.model) {
- return notebookPanel.content.model.toJSON();
- }
- return null;
- }
- /**
- * @description Gets the value of a key from specified notebook's metadata.
- * @param notebookPanel The notebook to get meta data from.
- * @param key The key of the value.
- * @returns any -The value of the metadata. Returns null if the key doesn't exist.
- */
- static getMetaData(notebookPanel, key) {
- if (!notebookPanel) {
- throw new Error('The notebook is null or undefined. No meta data available.');
- }
- if (notebookPanel.model && notebookPanel.model.metadata.has(key)) {
- return notebookPanel.model.metadata.get(key);
- }
- return null;
- }
- /**
- * @description Sets the key value pair in the notebook's metadata.
- * If the key doesn't exists it will add one.
- * @param notebookPanel The notebook to set meta data in.
- * @param key The key of the value to create.
- * @param value The value to set.
- * @param save Default is false. Whether the notebook should be saved after the meta data is set.
- * Note: This function will not wait for the save to complete, it only sends a save request.
- * @returns The old value for the key, or undefined if it did not exist.
- */
- static setMetaData(notebookPanel, key, value, save = false) {
- var _a;
- if (!notebookPanel) {
- throw new Error('The notebook is null or undefined. No meta data available.');
- }
- const oldVal = (_a = notebookPanel.model) === null || _a === void 0 ? void 0 : _a.metadata.set(key, value);
- if (save) {
- this.saveNotebook(notebookPanel);
- }
- return oldVal;
- }
- // /**
- // * Get a new Kernel, not tied to a Notebook
- // * Source code here: https://github.com/jupyterlab/jupyterlab/tree/473348d25bcb258ca2f0c127dd8fb5b193217135/packages/services
- // */
- // public static async createNewKernel() {
- // // Get info about the available kernels and start a new one.
- // let options: Kernel.IOptions = await Kernel.getSpecs().then(kernelSpecs => {
- // // console.log('Default spec:', kernelSpecs.default);
- // // console.log('Available specs', Object.keys(kernelSpecs.kernelspecs));
- // // use the default name
- // return { name: kernelSpecs.default };
- // });
- // return await Kernel.startNew(options).then(_kernel => {
- // return _kernel;
- // });
- // }
- // // TODO: We can use this context manager to execute commands inside a new kernel
- // // and be sure that it will be disposed of at the end.
- // // Another approach could be to create a kale_rpc Kernel, as a singleton,
- // // created at startup. The only (possible) drawback is that we can not name
- // // a kernel instance with a custom id/name, so when refreshing JupyterLab we would
- // // not recognize the kernel. A solution could be to have a kernel spec dedicated to kale rpc calls.
- // public static async executeWithNewKernel(action: Function, args: any[] = []) {
- // // create brand new kernel
- // const _k = await this.createNewKernel();
- // // execute action inside kernel
- // const res = await action(_k, ...args);
- // // close kernel
- // _k.shutdown();
- // // return result
- // return res;
- // }
- /**
- * @description This function runs code directly in the notebook's kernel and then evaluates the
- * result and returns it as a promise.
- * @param kernel The kernel to run the code in.
- * @param runCode The code to run in the kernel.
- * @param userExpressions The expressions used to capture the desired info from the executed code.
- * @param runSilent Default is false. If true, kernel will execute as quietly as possible.
- * store_history will be set to false, and no broadcast on IOPUB channel will be made.
- * @param storeHistory Default is false. If true, the code executed will be stored in the kernel's history
- * and the counter which is shown in the cells will be incremented to reflect code was run.
- * @param allowStdIn Default is false. If true, code running in kernel can prompt user for input using
- * an input_request message.
- * @param stopOnError Default is false. If True, does not abort the execution queue, if an exception is encountered.
- * This allows the queued execution of multiple execute_requests, even if they generate exceptions.
- * @returns Promise - A promise containing the execution results of the code as an object with
- * keys based on the user_expressions.
- * @example
- * //The code
- * const code = "a=123\nb=456\nsum=a+b";
- * //The user expressions
- * const expr = {sum: "sum",prod: "a*b",args:"[a,b,sum]"};
- * //Async function call (returns a promise)
- * sendKernelRequest(notebookPanel, code, expr,false);
- * //Result when promise resolves:
- * {
- * sum:{status:"ok",data:{"text/plain":"579"},metadata:{}},
- * prod:{status:"ok",data:{"text/plain":"56088"},metadata:{}},
- * args:{status:"ok",data:{"text/plain":"[123, 456, 579]"}}
- * }
- * @see For more information on JupyterLab messages:
- * https://jupyter-client.readthedocs.io/en/latest/messaging.html#execution-results
- */
- static async sendKernelRequest(kernel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {
- if (!kernel) {
- throw new Error('Kernel is null or undefined.');
- }
- // Wait for kernel to be ready before sending request
- // await kernel.status;
- const message = await kernel.requestExecute({
- allow_stdin: allowStdIn,
- code: runCode,
- silent: runSilent,
- stop_on_error: stopOnError,
- store_history: storeHistory,
- user_expressions: userExpressions,
- }).done;
- const content = message.content;
- if (content.status !== 'ok') {
- // If response is not 'ok', throw contents as error, log code
- const msg = `Code caused an error:\n${runCode}`;
- console.error(msg);
- if (content.traceback) {
- content.traceback.forEach((line) => console.log(line.replace(/[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, '')));
- }
- throw content;
- }
- // Return user_expressions of the content
- return content.user_expressions;
- }
- /**
- * Same as method sendKernelRequest but passing
- * a NotebookPanel instead of a Kernel
- */
- static async sendKernelRequestFromNotebook(notebookPanel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {
- var _a, _b, _c, _d;
- if (!notebookPanel) {
- throw new Error('Notebook is null or undefined.');
- }
- // Wait for notebook panel to be ready
- await notebookPanel.activate;
- await ((_a = notebookPanel.sessionContext) === null || _a === void 0 ? void 0 : _a.ready);
- console.log('get kernel', (_b = notebookPanel.sessionContext.session) === null || _b === void 0 ? void 0 : _b.kernel);
- return this.sendKernelRequest((_d = (_c = notebookPanel.sessionContext) === null || _c === void 0 ? void 0 : _c.session) === null || _d === void 0 ? void 0 : _d.kernel, runCode, userExpressions, runSilent, storeHistory, allowStdIn, stopOnError);
- }
-}
-
-
-/***/ })
-
-}]);
-//# sourceMappingURL=lib_index_js.2c52105b83d9cf4290a9.js.map
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c52105b83d9cf4290a9.js.map b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c52105b83d9cf4290a9.js.map
deleted file mode 100644
index 3515ea5e505..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/lib_index_js.2c52105b83d9cf4290a9.js.map
+++ /dev/null
@@ -1 +0,0 @@
-{"version":3,"file":"lib_index_js.2c52105b83d9cf4290a9.js","mappings":";;;;;;;;;;;;;AAAO;AACP;AACA;AACA;AACA,6IAA6I,gCAAgC,gBAAgB,sBAAsB,qVAAqV,mBAAmB,gVAAgV,mBAAmB;AAC95B;AACA;AACA,yCAAyC,4BAA4B;AACrE,oCAAoC,2BAA2B;AAC/D;AACA;AACA,CAAC,8BAA8B;;;;;;;;;;;;;;;;;;;ACXwB;AACf;AACA;AACxC;AACA;AACA;AACA;AACA;AACA,oBAAoB,2DAA8B;AAClD,wBAAwB,2DAAmB;AAC3C,4BAA4B,2DAAmB;AAC/C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT,kCAAkC,MAAM,0CAA0C,cAAc;AAChG,uBAAuB;AACvB,QAAQ,4EAA+C;AACvD;AACA,2EAA2E,cAAc,eAAe,UAAU;AAClH,yBAAyB;AACzB,YAAY,4EAA+C;AAC3D,0CAA0C,cAAc;AACxD,0BAA0B;AAC1B,0BAA0B,4EAA+C;AACzE;AACA;AACA;AACA;AACA;AACA,gCAAgC,0BAA0B;AAC1D;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,wCAAwC,0DAAkB;AAC1D;AACA;AACA,aAAa;AACb;AACA;AACA;AACA;AACA;AACA;AACA;AACA,6CAA6C,cAAc;AAC3D,8BAA8B;AAC9B,gBAAgB,4EAA+C;AAC/D,wGAAwG,cAAc,uCAAuC,QAAQ;AACrK,6BAA6B;AAC7B,6BAA6B,4EAA+C;AAC5E;AACA;AACA;AACA;AACA,wIAAwI,KAAK;AAC7I;AACA,uIAAuI,KAAK;AAC5I,6GAA6G,KAAK;AAClH,gDAAgD,cAAc,8BAA8B,KAAK;AACjG,iCAAiC;AACjC,oBAAoB,4EAA+C;AACnE;AACA,8GAA8G,MAAM;AACpH;AACA,6GAA6G,MAAM;AACnH,iDAAiD,cAAc,mFAAmF,MAAM;AACxJ,kCAAkC;AAClC,oBAAoB,4EAA+C;AACnE,iDAAiD,kBAAkB,kCAAkC,KAAK;AAC1G,kCAAkC;AAClC,oBAAoB,4EAA+C;AACnE,iBAAiB;AACjB;AACA;AACA,wGAAwG,cAAc,gBAAgB,UAAU,4BAA4B,QAAQ;AACpL,6BAA6B;AAC7B,6BAA6B,4EAA+C;AAC5E;AACA;AACA;AACA;AACA,kHAAkH,MAAM,KAAK,KAAK;AAClI;AACA,iHAAiH,MAAM,KAAK,KAAK;AACjI,uFAAuF,MAAM,KAAK,KAAK;AACvG,gDAAgD,cAAc,kCAAkC,KAAK;AACrG,iCAAiC;AACjC,oBAAoB,4EAA+C;AACnE;AACA;AACA,kHAAkH,MAAM;AACxH;AACA,iHAAiH,MAAM;AACvH,qDAAqD,cAAc,mFAAmF,MAAM;AAC5J,sCAAsC;AACtC,wBAAwB,4EAA+C;AACvE;AACA,iDAAiD,kBAAkB,kCAAkC,KAAK;AAC1G,kCAAkC;AAClC,kCAAkC,4EAA+C;AACjF;AACA;AACA,oEAAoE,kBAAkB,qpBAAqpB,cAAc,oDAAoD,cAAc;AAC3zB,8CAA8C;AAC9C,8CAA8C,4EAA+C;AAC7F;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,yHAAyH,YAAY;AACrI,8GAA8G,MAAM;AACpH;AACA,uHAAuH,UAAU;AACjI,6GAA6G,MAAM;AACnH;AACA,gDAAgD,0BAA0B;AAC1E;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,yJAAyJ,UAAU;AACnK;AACA;AACA,kGAAkG,QAAQ;AAC1G,sDAAsD;AACtD,+CAA+C,4EAA+C;AAC9F;AACA;AACA;AACA,0GAA0G,OAAO;AACjH,wJAAwJ,UAAU;AAClK;AACA,sGAAsG,KAAK;AAC3G,iCAAiC;AACjC,gJAAgJ,UAAU;AAC1J;AACA,oDAAoD,0DAAkB;AACtE;AACA,6BAA6B;AAC7B;AACA,qBAAqB;AACrB,iBAAiB;AACjB;AACA;AACA;AACA;AACO;AACP;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA,YAAY,6EAA2B;AACvC;AACA,YAAY,gFAA8B;AAC1C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,aAAa;AACb;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,aAAa;AACb;AACA;AACA;AACA;AACA;AACA;AACA,mHAAmH,kBAAkB;AACrI;AACA;AACA;AACA,kHAAkH,kBAAkB;AACpI;AACA,wCAAwC,cAAc;AACtD,yBAAyB;AACzB,YAAY,4EAA+C;AAC3D,yCAAyC,cAAc,gGAAgG,kBAAkB;AACzK,0BAA0B;AAC1B,YAAY,4EAA+C;AAC3D,yCAAyC,cAAc;AACvD,0BAA0B;AAC1B,YAAY,4EAA+C;AAC3D;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACtSwD;AACiB;AACV;AACd;AACG;AACX;AACqB;AACtB;AACxC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,yCAAyC,uEAA+B;AACxE;AACA,wBAAwB,qEAA6B;AACrD;AACA,4BAA4B,0DAAkB;AAC9C;AACA;AACA;AACA;AACA,yBAAyB,qDAAa;AACtC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,oCAAoC,mDAAM;AAC1C;AACA;AACA;AACA;AACA;AACA,+BAA+B,mDAAM;AACrC;AACA;AACA,+BAA+B,+DAAa;AAC5C;AACA,sBAAsB,8DAAO;AAC7B;AACA,wBAAwB,0DAAkB;AAC1C,aAAa;AACb;AACA;AACA;AACA;AACA;AACA,0BAA0B,gEAAU;AACpC;AACA;AACA,kCAAkC,iEAAe,GAAG,kBAAkB;AACtE,qBAAqB;AACrB;AACA;AACA;AACA,qBAAqB;AACrB;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,kEAAgB,EAAE,2DAAS;AAC1C,eAAe,yEAAgB;AAC/B;AACA;AACA;AACA;AACA;AACA,iEAAe,MAAM,EAAC;;;;;;;;;;;;;;;;;;;;;ACxGtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AAC0D;AAC1D;AACiD;AAClB;AAChB;AACf;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,gBAAgB,gDAAmB;AACnC,oBAAoB,gDAAmB,CAAC,2CAAc,IAAI,YAAY,EAAE,GAAG;AAC3E,gBAAgB,gDAAmB,CAAC,6DAAa,IAAI,qBAAqB,aAAa,gEAAgE;AACvJ,gBAAgB,gDAAmB;AACnC,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,YAAY,iEAAe,GAAG,gDAAgD;AAC9E;AACA;AACA,cAAc,gEAAU,GAAG,mCAAmC;AAC9D;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,YAAY,iEAAe,GAAG,mDAAmD;AACjF,YAAY,qEAAmB,GAAG,kDAAkD;AACpF;AACA;AACA,6BAA6B,gEAAU,GAAG,mCAAmC;AAC7E;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,oBAAoB;AACpB,UAAU;AACV;AACA;AACA,UAAU;AACV;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,qBAAqB;AACrB;AACA;AACA;AACA;AACA,aAAa,kBAAkB,mBAAmB,aAAa;AAC/D,cAAc,kBAAkB,qBAAqB,aAAa;AAClE,cAAc,kBAAkB;AAChC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS;AACT;AACA;AACA;AACA,kDAAkD,QAAQ;AAC1D;AACA;AACA,kGAAkG,YAAY,IAAI,IAAI,MAAM,IAAI;AAChI;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA","sources":["webpack://neural_compressor_ext_lab/./lib/constants.js","webpack://neural_compressor_ext_lab/./lib/deepcoder.js","webpack://neural_compressor_ext_lab/./lib/index.js","webpack://neural_compressor_ext_lab/./lib/utils.js"],"sourcesContent":["export var Constants;\n(function (Constants) {\n Constants.SHORT_PLUGIN_NAME = 'neural_compressor_ext_lab';\n Constants.WORK_PATH = \"neural_coder_workspace/\";\n Constants.ICON_FORMAT_ALL_SVG = ' ';\n Constants.ICON_RUN = ' ';\n Constants.SVG = ' ';\n Constants.LONG_PLUGIN_NAME = `@rya/${Constants.SHORT_PLUGIN_NAME}`;\n Constants.SETTINGS_SECTION = `${Constants.LONG_PLUGIN_NAME}:settings`;\n Constants.COMMAND_SECTION_NAME = 'Jupyterlab Code Optimizer';\n Constants.PLUGIN_VERSION = '0.1.0';\n})(Constants || (Constants = {}));\n","import { NotebookActions } from '@jupyterlab/notebook';\nimport NotebookUtilities from \"./utils\";\nimport { Constants } from './constants';\nclass JupyterlabCodeOptimizer {\n constructor(panel) {\n this.working = false;\n this.panel = panel;\n this.tmp_path = \"tmp.py\";\n this.rand = NotebookUtilities.GetRandomNum(0, 200);\n this.log_path = Constants.WORK_PATH + \"NeuralCoder\" + this.rand + \".log\";\n this.tmp_log_path = Constants.WORK_PATH + \"NeuralCoder_tmp\" + \".log\";\n this.cells = [];\n }\n async optimizeCode(code, formatter, name, next, options, notebook, panel, cell, run) {\n let codes = [];\n code.forEach(function (value) {\n value = value.replace(/('\\\\n')/g, '^^^');\n value = value.replace(/\\\\n\"/g, '###');\n value = value.replace(/\\\\n'/g, '###');\n value = value.replace(/\"\\\\n/g, '@@');\n value = value.replace(/'\\\\n/g, '@@');\n value = value.replace(/\\n/g, '\\\\n');\n value = value.replace(/\"/g, '+++');\n value = value.replace(/,/g, '$');\n codes.push(value);\n });\n let gen_code = `code = \"${codes}\"\\ncodes = code.split(',')\\nwith open( '${this.tmp_path}', 'w+' ) as f:\\n for i in range(0,len(codes)):\\n f.write('# this is the beginning of a single code snippet\\\\n')\\n code_list = codes[i].replace('$',',').replace('+++','\\\"').split('\\\\n')\\n for line in code_list:\\n if('split(^^^)' in line):\\n line=line.replace('split(^^^)', 'split(\\\\'\\\\\\\\n\\\\')')\\n if('###' in line):\\n line=line.replace('###', '\\\\\\\\n\\\"')\\n if('@@' in line):\\n line=line.replace('@@', '\\\"\\\\\\\\n')\\n f.write(line+'\\\\n')`;\n const expr = { code_list: `code_list` };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, gen_code, expr, false);\n if (options === 'normal') {\n let runcode = `from neural_coder import enable\\nenable(code=\"${this.tmp_path}\",features=[\"${formatter}\"], overwrite=True)`;\n let expr = { sum: ` ` };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);\n let run_code1 = `with open(\"${this.tmp_path}\", 'r') as f:\\n optimized_code = f.read()\\n`;\n let expr1 = { optimizedCode: \"optimized_code\" };\n let result2 = NotebookUtilities.sendKernelRequestFromNotebook(panel, run_code1, expr1, false);\n result2.then(value => {\n var _a, _b, _c, _d;\n let optimizedTexts = Object.values(value.optimizedCode.data)[0];\n let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\\\n').slice(1);\n optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);\n for (let i = 0; i < optimizeCodes.length; ++i) {\n const cell = this.cells[i];\n const currentTexts = this.cells.map(cell => cell.model.value.text);\n const currentText = currentTexts[i];\n let optimizedtext = optimizeCodes[i];\n optimizedtext = optimizedtext.replace(/\\\\'\\\\\\\\n\\\\'/g, \"^^^\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n\"/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n'/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\"\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/'\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/\\\\n/g, '\\n');\n optimizedtext = optimizedtext.replace(/\\\\'/g, \"'\");\n optimizedtext = optimizedtext.replace(/\\^\\^\\^/g, \"'\\\\n'\");\n optimizedtext = optimizedtext.replace(/\\+\\+\\+/g, \"\\\\n\\\"\");\n optimizedtext = optimizedtext.replace(/\\@\\@\\@/g, \"\\\"\\\\n\");\n if (cell.model.value.text === currentText) {\n cell.model.value.text = optimizedtext;\n }\n const run_svg = document.createElement(\"svg\");\n run_svg.innerHTML = Constants.ICON_RUN;\n (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);\n }\n });\n }\n else {\n if (formatter === '') {\n if (this.markdown) {\n this.markdown.model.value.text += \"[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ...... \\n\";\n }\n // cell.outputArea.node.innerText += \"[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\\n\"\n let runcode1 = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Enabling and Benchmarking for The Original Model ......\\\\n\")`;\n let expr1 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode1, expr1, false);\n let runcode = `from neural_coder import enable\\nperfomance, mode, path = enable(code=\"${this.tmp_path}\",features=[], run_bench=True, args=\"${options}\")\\nwith open(path + '/bench.log', 'r') as f:\\n logs = f.readlines()\\nlog_line = logs[4]\\nlog = log_line.split(\"[\")[1].split(\"]\")[0]`;\n let expr = { path: \"path\", log: \"log\" };\n let result = NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);\n let fps;\n result.then(value => {\n fps = Object.values(value.log.data)[0];\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second) \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\\n`\n let text = `[NeuralCoder INFO] Benchmark Result (Performance) of The Original Model is ${fps} (samples/second)\\\\n`;\n let runcode = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n`\n let runcode1 = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\\\n\")`;\n let expr1 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode1, expr1, false);\n let runcode2 = `with open(\"${this.tmp_log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr2 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);\n });\n }\n else {\n let runcode = `from neural_coder import enable\\nperfomance, mode, path = enable(code=\"${this.tmp_path}\", features=[\"${formatter}\"], run_bench=True, args=\"${options}\")\\nwith open(path + '/bench.log', 'r') as f:\\n logs = f.readlines()\\nlog_line = logs[4]\\nlog = log_line.split(\"[\")[1].split(\"]\")[0]`;\n let expr = { path: \"path\", log: \"log\" };\n let result = NotebookUtilities.sendKernelRequestFromNotebook(panel, runcode, expr, false);\n let fps;\n result.then(value => {\n fps = Object.values(value.log.data)[0];\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second) \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (FPS)\\n`\n let text = `[NeuralCoder INFO] Benchmark Result (Performance) of ${name} is ${fps} (samples/second)\\\\n`;\n let runcode = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);\n if (next !== '') {\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ...... \\n`;\n }\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\n`\n let runcode2 = `with open(\"${this.log_path}\", 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Enabling and Benchmarking for ${next} ......\\\\n\")`;\n let expr2 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);\n }\n let runcode3 = `with open(\"${this.tmp_log_path}\", 'a' ) as f:\\n f.write(\"${text}\")`;\n let expr3 = { path: \"\" };\n let res_tmp = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);\n res_tmp.then(value => {\n if (formatter === 'pytorch_inc_bf16') {\n let read_log = `import re\\nwith open(\"${this.tmp_log_path}\", 'r') as f:\\n logs = f.readlines()\\n fps_list=[]\\n for log_line in logs[-4:]:\\n pat = re.compile(r\\'\\\\d+\\\\.?\\\\d+')\\n fps = re.findall(pat,log_line)[-1]\\n fps_list.append(float(fps))\\nmaxi = max(fps_list)\\nindex = fps_list.index(maxi)\\nboost = round(maxi/fps_list[0],1)\\nfeatures=['','pytorch_inc_static_quant_fx','pytorch_inc_dynamic_quant','pytorch_inc_bf16']\\nfeature_name=['Original Model','INC Enable INT8 (Static)','INC Enable INT8 (Dynamic)','INC Enable BF16']\\nbest_feature = features[index]\\nbest_name = feature_name[index]\\nfeature_l = []\\nfeature_l.append(best_feature)\\nfrom neural_coder import enable\\nenable(code=\"${this.tmp_path}\",features=feature_l, overwrite=True)\\nwith open(\"${this.tmp_path}\", 'r') as f:\\n optimized_code = f.read()\\n`;\n let read_expr = { boost: \"boost\", best_feature: \"best_feature\", best_name: \"best_name\", optimizeCode: \"optimized_code\", feature_l: \"fps_list\", maxi: \"maxi\", index: \"index\" };\n let read_result = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, read_log, read_expr, false);\n read_result.then(value => {\n var _a, _b, _c, _d;\n console.log(\"resres\", value);\n let boost = Object.values(value.boost.data)[0];\n let best_name = Object.values(value.best_name.data)[0];\n let optimizedTexts = Object.values(value.optimizeCode.data)[0];\n let optimizeCodes = optimizedTexts.split('# this is the beginning of a single code snippet\\\\n').slice(1);\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] The Best Intel Optimization: ${best_name} \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] You can get up to ${boost}X performance boost. \\n`;\n }\n // cell.outputArea.node.innerText +=`[NeuralCoder INFO] The Best Intel Optimization: ${best_name}\\n`\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] You can get up to ${boost}X performance boost.\\n`\n optimizeCodes[optimizeCodes.length - 1] = optimizeCodes[optimizeCodes.length - 1].slice(0, -3);\n for (let i = 0; i < optimizeCodes.length; ++i) {\n const cell = this.cells[i];\n const currentTexts = this.cells.map(cell => cell.model.value.text);\n const currentText = currentTexts[i];\n let optimizedtext = optimizeCodes[i];\n optimizedtext = optimizedtext.replace(/\\\\'\\\\\\\\n\\\\'/g, \"^^^\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n\"/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\\\\\\\\n'/g, \"+++\");\n optimizedtext = optimizedtext.replace(/\"\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/'\\\\\\\\n/g, \"@@@\");\n optimizedtext = optimizedtext.replace(/\\\\n/g, '\\n');\n optimizedtext = optimizedtext.replace(/\\\\'/g, \"'\");\n optimizedtext = optimizedtext.replace(/\\^\\^\\^/g, \"'\\\\n'\");\n optimizedtext = optimizedtext.replace(/\\+\\+\\+/g, \"\\\\n\\\"\");\n optimizedtext = optimizedtext.replace(/\\@\\@\\@/g, \"\\\"\\\\n\");\n if (cell.model.value.text === currentText) {\n cell.model.value.text = optimizedtext;\n }\n }\n // if(this.markdown){\n // this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: 4th Gen Intel Xeon Scalable processor with AMX \\n`\n // this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\\\NeuralCoder${this.rand}.log \\n`\n // }\n let command = \"lscpu | grep 'Model name'\";\n let get_hardware = `import subprocess\\nsubp = subprocess.Popen(\"${command}\",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding=\"utf-8\")\\nsubp.wait(2)\\nhardware = subp.communicate()[0].replace(\"Model name:\",\"\").strip()`;\n let expr_hardware = { hardware: \"hardware\" };\n let hard_res = NotebookUtilities.sendKernelRequestFromNotebook(this.panel, get_hardware, expr_hardware, false);\n hard_res.then(value => {\n let hard = Object.values(value.hardware.data)[0];\n if (this.markdown) {\n this.markdown.model.value.text += `[NeuralCoder INFO] HardWare: ${hard} \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\\\NeuralCoder${this.rand}.log \\n`;\n }\n cell.outputArea.node.innerText += `[NeuralCoder INFO] HardWare: ${hard}\\n`;\n });\n cell.outputArea.node.innerText += `[NeuralCoder INFO] The log was saved to neural_coder_workspace\\\\NeuralCoder${this.rand}.log\\n`;\n const run_svg = document.createElement(\"svg\");\n run_svg.innerHTML = Constants.ICON_RUN;\n (_d = (_c = (_b = (_a = run === null || run === void 0 ? void 0 : run.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(run_svg);\n });\n }\n });\n });\n }\n }\n }\n}\nexport class JupyterlabNotebookCodeOptimizer extends JupyterlabCodeOptimizer {\n constructor(notebookTracker, panel) {\n super(panel);\n this.notebookTracker = notebookTracker;\n this.notebookname = '';\n }\n async optimizeAction(config, formatter) {\n return this.optimizeCells(true, config, formatter);\n }\n async optimizeAllCodeCells(config, formatter, notebook, run) {\n return this.optimizeCells(false, config, formatter, notebook, run);\n }\n getCodeCells(ifmarkdown = true, notebook) {\n if (!this.notebookTracker.currentWidget) {\n return [];\n }\n const codeCells = [];\n notebook = notebook || this.notebookTracker.currentWidget.content;\n this.notebookname = notebook.title.label;\n let count = 0;\n notebook.widgets.forEach((cell) => {\n if (cell.model.type === 'code') {\n count += 1;\n codeCells.push(cell);\n }\n });\n if (ifmarkdown) {\n NotebookActions.insertBelow(notebook);\n this.notebookTracker.currentWidget.content.activeCellIndex = count + 1;\n NotebookActions.changeCellType(notebook, 'markdown');\n const activeCell = notebook.activeCell;\n if (activeCell) {\n this.markdown = activeCell;\n }\n }\n this.cells = codeCells;\n return codeCells;\n }\n async optimizeCells(selectedOnly, config, formatter, notebook, run) {\n if (this.working) {\n return new Promise((resolve, reject) => {\n resolve(\"false!\");\n });\n }\n console.log(\"arrive here 333\");\n this.working = true;\n const optimize_type = formatter !== undefined ? formatter : 'pytorch_mixed_precision_cpu';\n if (optimize_type === 'auto-quant') {\n selectedOnly = true;\n }\n else {\n selectedOnly = false;\n }\n const selectedCells = this.getCodeCells(selectedOnly, notebook);\n let cell = selectedCells[selectedCells.length - 1];\n if (selectedCells.length === 0) {\n this.working = false;\n return new Promise((resolve, reject) => {\n resolve(\"false!\");\n });\n }\n const currentTexts = selectedCells.map(cell => cell.model.value.text);\n if (optimize_type === 'auto-quant') {\n console.log(\"arrive here 444-111\");\n if (this.markdown) {\n this.markdown.model.value.text = `[NeuralCoder INFO] Auto-Quant Started ...... \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook \"${this.notebookname}\" \\n`;\n this.markdown.model.value.text += `[NeuralCoder INFO] Benchmark Mode: Throughput \\n`;\n }\n // cell.outputArea.node.innerText = `[NeuralCoder INFO] Auto-Quant Started ......\\n`\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Code: User code from Jupyter Lab notebook \"${this.notebookname}\"\\n`\n // cell.outputArea.node.innerText += `[NeuralCoder INFO] Benchmark Mode: Throughput\\n`\n let runcode = `with open('${this.log_path}', 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Auto-Quant Started ......\\\\n\")`;\n let expr = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode, expr, false);\n let runcode2 = `with open('${this.log_path}', 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Code: User code from Jupyter Lab notebook '${this.notebookname}'\\\\n\")`;\n let expr2 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode2, expr2, false);\n let runcode3 = `with open('${this.log_path}', 'a' ) as f:\\n f.write(\"[NeuralCoder INFO] Benchmark Mode: Throughput\\\\n\")`;\n let expr3 = { path: \"\" };\n NotebookUtilities.sendKernelRequestFromNotebook(this.panel, runcode3, expr3, false);\n // cell.outputArea.node.setAttribute(\"class\",\"pad\")\n await this.optimizeCode(currentTexts, '', 'The Original Model', 'INC Enable INT8 (Static)', config, true, this.panel, cell, run);\n await this.optimizeCode(currentTexts, 'pytorch_inc_static_quant_fx', 'INC Enable INT8 (Static)', 'INC Enable INT8 (Dynamic)', config, true, this.panel, cell, run);\n await this.optimizeCode(currentTexts, 'pytorch_inc_dynamic_quant', 'INC Enable INT8 (Dynamic)', 'INC Enable BF16', config, true, this.panel, cell, run);\n await this.optimizeCode(currentTexts, 'pytorch_inc_bf16', 'INC Enable BF16', '', config, true, this.panel, cell, run);\n }\n else {\n console.log(\"arrive here 444-222\");\n await this.optimizeCode(currentTexts, optimize_type, \"\", \"\", \"normal\", true, this.panel, cell, run);\n }\n this.working = false;\n console.log(\"arrive here 555\");\n return new Promise((resolve, reject) => {\n resolve(\"success!\");\n });\n }\n applicable(formatter, currentWidget) {\n const currentNotebookWidget = this.notebookTracker.currentWidget;\n return currentNotebookWidget && currentWidget === currentNotebookWidget;\n }\n}\n","import { INotebookTracker } from '@jupyterlab/notebook';\nimport { ToolbarButton, showDialog, Dialog } from '@jupyterlab/apputils';\nimport { ISettingRegistry } from '@jupyterlab/settingregistry';\nimport { IMainMenu } from '@jupyterlab/mainmenu';\nimport { LabIcon } from '@jupyterlab/ui-components';\nimport { Widget } from '@lumino/widgets';\nimport { JupyterlabNotebookCodeOptimizer } from './deepcoder';\nimport { Constants } from './constants';\nclass neural_compressor_ext_lab {\n constructor(app, tracker, notebookpanel) {\n this.app = app;\n this.tracker = tracker;\n this.notebookpanel = notebookpanel;\n this.setupWidgetExtension();\n this.config = '';\n }\n createNew(nb) {\n this.notebookpanel = nb;\n this.notebookCodeOptimizer = new JupyterlabNotebookCodeOptimizer(this.tracker, this.notebookpanel);\n const svg = document.createElement(\"svg\");\n svg.innerHTML = Constants.ICON_FORMAT_ALL_SVG;\n const run_svg = document.createElement(\"svg\");\n run_svg.innerHTML = Constants.ICON_RUN;\n const div = document.createElement(\"div\");\n div.setAttribute(\"class\", \"wrapper\");\n const span = document.createElement(\"span\");\n span.setAttribute(\"class\", \"f1ozlkqi\");\n span.innerHTML = Constants.SVG;\n const selector = document.createElement(\"select\");\n selector.setAttribute(\"class\", \"aselector\");\n selector.id = \"NeuralCoder\";\n const option1 = document.createElement(\"option\");\n option1.value = \"pytorch_inc_static_quant_fx\";\n option1.innerText = \"INC Enable INT8 (Static)\";\n option1.selected = true;\n const option2 = document.createElement(\"option\");\n option2.value = \"pytorch_inc_dynamic_quant\";\n option2.innerText = \"INC Enable INT8 (Dynamic)\";\n const option3 = document.createElement(\"option\");\n option3.value = \"pytorch_inc_bf16\";\n option3.innerText = \"INC Enable BF16\";\n const option4 = document.createElement(\"option\");\n option4.value = \"auto-quant\";\n option4.innerText = \"INC Auto Enable & Benchmark\";\n selector.options.add(option1);\n selector.options.add(option2);\n selector.options.add(option3);\n selector.options.add(option4);\n div.appendChild(selector);\n div.appendChild(span);\n const selector_widget = new Widget();\n selector_widget.node.appendChild(div);\n selector_widget.addClass(\"aselector\");\n let notebookCodeOptimizer = this.notebookCodeOptimizer;\n let config = this.config;\n const dia_input = document.createElement(\"input\");\n const dia_widget = new Widget();\n dia_widget.node.appendChild(dia_input);\n dia_widget.addClass(\"dialog\");\n const run_button = new ToolbarButton({\n tooltip: 'NeuralCoder',\n icon: new LabIcon({\n name: \"run\",\n svgstr: Constants.ICON_RUN\n }),\n onClick: async function () {\n var _a, _b, _c, _d;\n console.log(\"arrive here 111\");\n (_d = (_c = (_b = (_a = run_button.node.firstChild) === null || _a === void 0 ? void 0 : _a.firstChild) === null || _b === void 0 ? void 0 : _b.firstChild) === null || _c === void 0 ? void 0 : _c.firstChild) === null || _d === void 0 ? void 0 : _d.replaceWith(svg);\n if (selector.options[selector.selectedIndex].value === 'auto-quant') {\n await showDialog({\n title: 'Please input execute parameters:',\n body: dia_widget,\n buttons: [Dialog.okButton({ label: 'Confirm' })]\n }).then(result => {\n if (result.button.accept) {\n config = dia_input.value;\n }\n });\n }\n console.log(\"arrive here 222\");\n await notebookCodeOptimizer.optimizeAllCodeCells(config, selector.options[selector.selectedIndex].value, undefined, run_button);\n }\n });\n nb.toolbar.insertItem(11, \"nc\", run_button);\n nb.toolbar.insertItem(12, \"selector\", selector_widget);\n }\n setupWidgetExtension() {\n this.app.docRegistry.addWidgetExtension('Notebook', this);\n }\n}\n/**\n * Initialization data for the neural_compressor_ext_lab extension.\n */\nconst plugin = {\n id: 'neural_compressor_ext_lab:plugin',\n autoStart: true,\n requires: [INotebookTracker, IMainMenu],\n optional: [ISettingRegistry],\n activate: (app, tracker, notebookpanel) => {\n new neural_compressor_ext_lab(app, tracker, notebookpanel);\n console.log('JupyterLab extension neural_compressor_ext_lab is activated!');\n }\n};\nexport default plugin;\n","/*\n * Copyright 2019-2020 The Kale Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\nimport { Dialog, showDialog } from '@jupyterlab/apputils';\n// @ts-ignore\nimport SanitizedHTML from 'react-sanitized-html';\nimport * as React from 'react';\nexport default class NotebookUtilities {\n /**\n * generate random number\n * @Min\n * @Max\n */\n static GetRandomNum(Min, Max) {\n let Range;\n Range = Max - Min;\n var Rand = Math.random();\n return (Min + Math.round(Rand * Range));\n }\n /**\n * Builds an HTML container by sanitizing a list of strings and converting\n * them in valid HTML\n * @param msg A list of string with HTML formatting\n * @returns a HTMLDivElement composed of a list of spans with formatted text\n */\n static buildDialogBody(msg) {\n return (React.createElement(\"div\", null, msg.map((s, i) => {\n return (React.createElement(React.Fragment, { key: `msg-${i}` },\n React.createElement(SanitizedHTML, { allowedAttributes: { a: ['href'] }, allowedTags: ['b', 'i', 'em', 'strong', 'a', 'pre'], html: s }),\n React.createElement(\"br\", null)));\n })));\n }\n /**\n * Opens a pop-up dialog in JupyterLab to display a simple message.\n * @param title The title for the message popup\n * @param msg The message as an array of strings\n * @param buttonLabel The label to use for the button. Default is 'OK'\n * @param buttonClassName The classname to give to the 'ok' button\n * @returns Promise - A promise once the message is closed.\n */\n static async showMessage(title, msg, buttonLabel = 'Dismiss', buttonClassName = '') {\n const buttons = [\n Dialog.okButton({ label: buttonLabel, className: buttonClassName }),\n ];\n const messageBody = this.buildDialogBody(msg);\n await showDialog({ title, buttons, body: messageBody });\n }\n /**\n * Opens a pop-up dialog in JupyterLab to display a yes/no dialog.\n * @param title The title for the message popup\n * @param msg The message\n * @param acceptLabel The label to use for the accept button. Default is 'YES'\n * @param rejectLabel The label to use for the reject button. Default is 'NO'\n * @param yesButtonClassName The classname to give to the accept button.\n * @param noButtonClassName The classname to give to the cancel button.\n * @returns Promise - A promise once the message is closed.\n */\n static async showYesNoDialog(title, msg, acceptLabel = 'YES', rejectLabel = 'NO', yesButtonClassName = '', noButtonClassName = '') {\n const buttons = [\n Dialog.okButton({ label: acceptLabel, className: yesButtonClassName }),\n Dialog.cancelButton({ label: rejectLabel, className: noButtonClassName }),\n ];\n const messageBody = this.buildDialogBody(msg);\n const result = await showDialog({ title, buttons, body: messageBody });\n return result.button.label === acceptLabel;\n }\n /**\n * Opens a pop-up dialog in JupyterLab with various information and button\n * triggering reloading the page.\n * @param title The title for the message popup\n * @param msg The message\n * @param buttonLabel The label to use for the button. Default is 'Refresh'\n * @param buttonClassName The classname to give to the 'refresh' button.\n * @returns Promise - A promise once the message is closed.\n */\n static async showRefreshDialog(title, msg, buttonLabel = 'Refresh', buttonClassName = '') {\n await this.showMessage(title, msg, buttonLabel, buttonClassName);\n location.reload();\n }\n /**\n * @description Creates a new JupyterLab notebook for use by the application\n * @param command The command registry\n * @returns Promise - A promise containing the notebook panel object that was created (if successful).\n */\n static async createNewNotebook(command) {\n const notebook = await command.execute('notebook:create-new', {\n activate: true,\n path: '',\n preferredLanguage: '',\n });\n await notebook.session.ready;\n return notebook;\n }\n /**\n * Safely saves the Jupyter notebook document contents to disk\n * @param notebookPanel The notebook panel containing the notebook to save\n */\n static async saveNotebook(notebookPanel) {\n if (notebookPanel) {\n await notebookPanel.context.ready;\n notebookPanel.context.save();\n return true;\n }\n return false;\n }\n /**\n * Convert the notebook contents to JSON\n * @param notebookPanel The notebook panel containing the notebook to serialize\n */\n static notebookToJSON(notebookPanel) {\n if (notebookPanel.content.model) {\n return notebookPanel.content.model.toJSON();\n }\n return null;\n }\n /**\n * @description Gets the value of a key from specified notebook's metadata.\n * @param notebookPanel The notebook to get meta data from.\n * @param key The key of the value.\n * @returns any -The value of the metadata. Returns null if the key doesn't exist.\n */\n static getMetaData(notebookPanel, key) {\n if (!notebookPanel) {\n throw new Error('The notebook is null or undefined. No meta data available.');\n }\n if (notebookPanel.model && notebookPanel.model.metadata.has(key)) {\n return notebookPanel.model.metadata.get(key);\n }\n return null;\n }\n /**\n * @description Sets the key value pair in the notebook's metadata.\n * If the key doesn't exists it will add one.\n * @param notebookPanel The notebook to set meta data in.\n * @param key The key of the value to create.\n * @param value The value to set.\n * @param save Default is false. Whether the notebook should be saved after the meta data is set.\n * Note: This function will not wait for the save to complete, it only sends a save request.\n * @returns The old value for the key, or undefined if it did not exist.\n */\n static setMetaData(notebookPanel, key, value, save = false) {\n var _a;\n if (!notebookPanel) {\n throw new Error('The notebook is null or undefined. No meta data available.');\n }\n const oldVal = (_a = notebookPanel.model) === null || _a === void 0 ? void 0 : _a.metadata.set(key, value);\n if (save) {\n this.saveNotebook(notebookPanel);\n }\n return oldVal;\n }\n // /**\n // * Get a new Kernel, not tied to a Notebook\n // * Source code here: https://github.com/jupyterlab/jupyterlab/tree/473348d25bcb258ca2f0c127dd8fb5b193217135/packages/services\n // */\n // public static async createNewKernel() {\n // // Get info about the available kernels and start a new one.\n // let options: Kernel.IOptions = await Kernel.getSpecs().then(kernelSpecs => {\n // // console.log('Default spec:', kernelSpecs.default);\n // // console.log('Available specs', Object.keys(kernelSpecs.kernelspecs));\n // // use the default name\n // return { name: kernelSpecs.default };\n // });\n // return await Kernel.startNew(options).then(_kernel => {\n // return _kernel;\n // });\n // }\n // // TODO: We can use this context manager to execute commands inside a new kernel\n // // and be sure that it will be disposed of at the end.\n // // Another approach could be to create a kale_rpc Kernel, as a singleton,\n // // created at startup. The only (possible) drawback is that we can not name\n // // a kernel instance with a custom id/name, so when refreshing JupyterLab we would\n // // not recognize the kernel. A solution could be to have a kernel spec dedicated to kale rpc calls.\n // public static async executeWithNewKernel(action: Function, args: any[] = []) {\n // // create brand new kernel\n // const _k = await this.createNewKernel();\n // // execute action inside kernel\n // const res = await action(_k, ...args);\n // // close kernel\n // _k.shutdown();\n // // return result\n // return res;\n // }\n /**\n * @description This function runs code directly in the notebook's kernel and then evaluates the\n * result and returns it as a promise.\n * @param kernel The kernel to run the code in.\n * @param runCode The code to run in the kernel.\n * @param userExpressions The expressions used to capture the desired info from the executed code.\n * @param runSilent Default is false. If true, kernel will execute as quietly as possible.\n * store_history will be set to false, and no broadcast on IOPUB channel will be made.\n * @param storeHistory Default is false. If true, the code executed will be stored in the kernel's history\n * and the counter which is shown in the cells will be incremented to reflect code was run.\n * @param allowStdIn Default is false. If true, code running in kernel can prompt user for input using\n * an input_request message.\n * @param stopOnError Default is false. If True, does not abort the execution queue, if an exception is encountered.\n * This allows the queued execution of multiple execute_requests, even if they generate exceptions.\n * @returns Promise - A promise containing the execution results of the code as an object with\n * keys based on the user_expressions.\n * @example\n * //The code\n * const code = \"a=123\\nb=456\\nsum=a+b\";\n * //The user expressions\n * const expr = {sum: \"sum\",prod: \"a*b\",args:\"[a,b,sum]\"};\n * //Async function call (returns a promise)\n * sendKernelRequest(notebookPanel, code, expr,false);\n * //Result when promise resolves:\n * {\n * sum:{status:\"ok\",data:{\"text/plain\":\"579\"},metadata:{}},\n * prod:{status:\"ok\",data:{\"text/plain\":\"56088\"},metadata:{}},\n * args:{status:\"ok\",data:{\"text/plain\":\"[123, 456, 579]\"}}\n * }\n * @see For more information on JupyterLab messages:\n * https://jupyter-client.readthedocs.io/en/latest/messaging.html#execution-results\n */\n static async sendKernelRequest(kernel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {\n if (!kernel) {\n throw new Error('Kernel is null or undefined.');\n }\n // Wait for kernel to be ready before sending request\n // await kernel.status;\n const message = await kernel.requestExecute({\n allow_stdin: allowStdIn,\n code: runCode,\n silent: runSilent,\n stop_on_error: stopOnError,\n store_history: storeHistory,\n user_expressions: userExpressions,\n }).done;\n const content = message.content;\n if (content.status !== 'ok') {\n // If response is not 'ok', throw contents as error, log code\n const msg = `Code caused an error:\\n${runCode}`;\n console.error(msg);\n if (content.traceback) {\n content.traceback.forEach((line) => console.log(line.replace(/[\\u001b\\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, '')));\n }\n throw content;\n }\n // Return user_expressions of the content\n return content.user_expressions;\n }\n /**\n * Same as method sendKernelRequest but passing\n * a NotebookPanel instead of a Kernel\n */\n static async sendKernelRequestFromNotebook(notebookPanel, runCode, userExpressions, runSilent = false, storeHistory = false, allowStdIn = false, stopOnError = false) {\n var _a, _b, _c, _d;\n if (!notebookPanel) {\n throw new Error('Notebook is null or undefined.');\n }\n // Wait for notebook panel to be ready\n await notebookPanel.activate;\n await ((_a = notebookPanel.sessionContext) === null || _a === void 0 ? void 0 : _a.ready);\n console.log('get kernel', (_b = notebookPanel.sessionContext.session) === null || _b === void 0 ? void 0 : _b.kernel);\n return this.sendKernelRequest((_d = (_c = notebookPanel.sessionContext) === null || _c === void 0 ? void 0 : _c.session) === null || _d === void 0 ? void 0 : _d.kernel, runCode, userExpressions, runSilent, storeHistory, allowStdIn, stopOnError);\n }\n}\n"],"names":[],"sourceRoot":""}
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.27309f1e43e64d128990.js.map b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.27309f1e43e64d128990.js.map
deleted file mode 100644
index 7bd49677533..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.27309f1e43e64d128990.js.map
+++ /dev/null
@@ -1 +0,0 @@
-{"version":3,"file":"remoteEntry.27309f1e43e64d128990.js","mappings":";;;;;;;;;;;AAAA;AACA;AACA;AACA,EAAE;AACF;AACA;AACA,EAAE;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI;AACJ;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;;AAEA;AACA;AACA;AACA;AACA,CAAC;;;;;;UCpCD;UACA;;UAEA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;;UAEA;UACA;;UAEA;UACA;UACA;;UAEA;UACA;;UAEA;UACA;;;;;WC5BA;WACA;WACA;WACA;WACA;WACA,iCAAiC,WAAW;WAC5C;WACA;;;;;WCPA;WACA;WACA;WACA;WACA,yCAAyC,wCAAwC;WACjF;WACA;WACA;;;;;WCPA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;;;;;WCRA;WACA;WACA;WACA,8BAA8B,kgBAAkgB;WAChiB;;;;;WCJA;WACA;WACA;WACA;WACA,GAAG;WACH;WACA;WACA,CAAC;;;;;WCPD;;;;;WCAA;WACA;WACA;WACA;WACA,uBAAuB,4BAA4B;WACnD;WACA;WACA;WACA,iBAAiB,oBAAoB;WACrC;WACA,mGAAmG,YAAY;WAC/G;WACA;WACA;WACA;WACA;;WAEA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,mEAAmE,iCAAiC;WACpG;WACA;WACA;WACA;;;;;WCzCA;WACA;WACA;WACA,uDAAuD,iBAAiB;WACxE;WACA,gDAAgD,aAAa;WAC7D;;;;;WCNA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,oJAAoJ;WACpJ;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,IAAI,aAAa;WACjB;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;;;;;WC7CA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;;;;;WCfA;WACA;WACA,WAAW,6BAA6B,iBAAiB,GAAG,qEAAqE;WACjI;WACA;WACA;WACA,qCAAqC,aAAa,EAAE,wDAAwD,2BAA2B,4BAA4B,2BAA2B,+CAA+C,mCAAmC;WAChR;WACA;WACA;WACA,qBAAqB,8BAA8B,SAAS,sDAAsD,gBAAgB,eAAe,KAAK,6DAA6D,SAAS,SAAS,QAAQ,eAAe,KAAK,eAAe,qGAAqG,WAAW,aAAa;WAC7Y;WACA;WACA;WACA,gBAAgB,8BAA8B,qBAAqB,YAAY,sBAAsB,SAAS,iDAAiD,6FAA6F,WAAW,uBAAuB,2BAA2B,wBAAwB,KAAK,oCAAoC,oBAAoB,wBAAwB,oBAAoB,SAAS,KAAK,yBAAyB,KAAK,gCAAgC,yBAAyB,QAAQ,eAAe,KAAK,eAAe,4DAA4D;WACtoB;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,CAAC;;WAED;WACA;WACA;WACA,CAAC;WACD;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,MAAM;WACN,KAAK,WAAW;WAChB,GAAG;WACH;WACA;;;;;WC3LA;;WAEA;WACA;WACA;WACA;WACA;WACA;;WAEA;WACA;WACA;WACA,iCAAiC;;WAEjC;WACA;WACA;WACA,KAAK;WACL;WACA;WACA;WACA;;WAEA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,MAAM;WACN;WACA;WACA;;WAEA;;WAEA;;WAEA;;WAEA;;WAEA;;WAEA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,MAAM,qBAAqB;WAC3B;WACA;WACA;WACA;WACA;WACA;;WAEA;;WAEA;WACA;WACA;;;;;WCrFA;;;;;UEAA;UACA;UACA;UACA","sources":["webpack://neural_compressor_ext_lab/webpack/container-entry","webpack://neural_compressor_ext_lab/webpack/bootstrap","webpack://neural_compressor_ext_lab/webpack/runtime/compat get default export","webpack://neural_compressor_ext_lab/webpack/runtime/define property getters","webpack://neural_compressor_ext_lab/webpack/runtime/ensure chunk","webpack://neural_compressor_ext_lab/webpack/runtime/get javascript chunk filename","webpack://neural_compressor_ext_lab/webpack/runtime/global","webpack://neural_compressor_ext_lab/webpack/runtime/hasOwnProperty shorthand","webpack://neural_compressor_ext_lab/webpack/runtime/load script","webpack://neural_compressor_ext_lab/webpack/runtime/make namespace object","webpack://neural_compressor_ext_lab/webpack/runtime/sharing","webpack://neural_compressor_ext_lab/webpack/runtime/publicPath","webpack://neural_compressor_ext_lab/webpack/runtime/consumes","webpack://neural_compressor_ext_lab/webpack/runtime/jsonp chunk loading","webpack://neural_compressor_ext_lab/webpack/runtime/nonce","webpack://neural_compressor_ext_lab/webpack/before-startup","webpack://neural_compressor_ext_lab/webpack/startup","webpack://neural_compressor_ext_lab/webpack/after-startup"],"sourcesContent":["var moduleMap = {\n\t\"./index\": () => {\n\t\treturn Promise.all([__webpack_require__.e(\"webpack_sharing_consume_default_react\"), __webpack_require__.e(\"lib_index_js\")]).then(() => (() => ((__webpack_require__(/*! ./lib/index.js */ \"./lib/index.js\")))));\n\t},\n\t\"./extension\": () => {\n\t\treturn Promise.all([__webpack_require__.e(\"webpack_sharing_consume_default_react\"), __webpack_require__.e(\"lib_index_js\")]).then(() => (() => ((__webpack_require__(/*! ./lib/index.js */ \"./lib/index.js\")))));\n\t},\n\t\"./style\": () => {\n\t\treturn Promise.all([__webpack_require__.e(\"vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854\"), __webpack_require__.e(\"style_index_js\")]).then(() => (() => ((__webpack_require__(/*! ./style/index.js */ \"./style/index.js\")))));\n\t}\n};\nvar get = (module, getScope) => {\n\t__webpack_require__.R = getScope;\n\tgetScope = (\n\t\t__webpack_require__.o(moduleMap, module)\n\t\t\t? moduleMap[module]()\n\t\t\t: Promise.resolve().then(() => {\n\t\t\t\tthrow new Error('Module \"' + module + '\" does not exist in container.');\n\t\t\t})\n\t);\n\t__webpack_require__.R = undefined;\n\treturn getScope;\n};\nvar init = (shareScope, initScope) => {\n\tif (!__webpack_require__.S) return;\n\tvar name = \"default\"\n\tvar oldScope = __webpack_require__.S[name];\n\tif(oldScope && oldScope !== shareScope) throw new Error(\"Container initialization failed as it has already been initialized with a different share scope\");\n\t__webpack_require__.S[name] = shareScope;\n\treturn __webpack_require__.I(name, initScope);\n};\n\n// This exports getters to disallow modifications\n__webpack_require__.d(exports, {\n\tget: () => (get),\n\tinit: () => (init)\n});","// The module cache\nvar __webpack_module_cache__ = {};\n\n// The require function\nfunction __webpack_require__(moduleId) {\n\t// Check if module is in cache\n\tvar cachedModule = __webpack_module_cache__[moduleId];\n\tif (cachedModule !== undefined) {\n\t\treturn cachedModule.exports;\n\t}\n\t// Create a new module (and put it into the cache)\n\tvar module = __webpack_module_cache__[moduleId] = {\n\t\tid: moduleId,\n\t\t// no module.loaded needed\n\t\texports: {}\n\t};\n\n\t// Execute the module function\n\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n\n\t// Return the exports of the module\n\treturn module.exports;\n}\n\n// expose the modules object (__webpack_modules__)\n__webpack_require__.m = __webpack_modules__;\n\n// expose the module cache\n__webpack_require__.c = __webpack_module_cache__;\n\n","// getDefaultExport function for compatibility with non-harmony modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};","// define getter functions for harmony exports\n__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n\t\t}\n\t}\n};","__webpack_require__.f = {};\n// This file contains only the entry chunk.\n// The chunk loading function for additional chunks\n__webpack_require__.e = (chunkId) => {\n\treturn Promise.all(Object.keys(__webpack_require__.f).reduce((promises, key) => {\n\t\t__webpack_require__.f[key](chunkId, promises);\n\t\treturn promises;\n\t}, []));\n};","// This function allow to reference async chunks\n__webpack_require__.u = (chunkId) => {\n\t// return url for filenames based on template\n\treturn \"\" + chunkId + \".\" + {\"webpack_sharing_consume_default_react\":\"19c51f7b56cfd16da3f9\",\"lib_index_js\":\"2c52105b83d9cf4290a9\",\"vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854\":\"e09faf9ec3a764e40dc7\",\"style_index_js\":\"8d733cc8b74fabbd10b8\",\"vendors-node_modules_react-sanitized-html_lib_index_js\":\"500104f7c13c01fe1646\",\"webpack_sharing_consume_default_sanitize-html_sanitize-html\":\"635249bb6dc3884c24a3\",\"vendors-node_modules_sanitize-html_dist_sanitize-html_js\":\"825dbf94ec7371e0b28d\"}[chunkId] + \".js\";\n};","__webpack_require__.g = (function() {\n\tif (typeof globalThis === 'object') return globalThis;\n\ttry {\n\t\treturn this || new Function('return this')();\n\t} catch (e) {\n\t\tif (typeof window === 'object') return window;\n\t}\n})();","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","var inProgress = {};\nvar dataWebpackPrefix = \"neural_compressor_ext_lab:\";\n// loadScript function to load a script via script tag\n__webpack_require__.l = (url, done, key, chunkId) => {\n\tif(inProgress[url]) { inProgress[url].push(done); return; }\n\tvar script, needAttach;\n\tif(key !== undefined) {\n\t\tvar scripts = document.getElementsByTagName(\"script\");\n\t\tfor(var i = 0; i < scripts.length; i++) {\n\t\t\tvar s = scripts[i];\n\t\t\tif(s.getAttribute(\"src\") == url || s.getAttribute(\"data-webpack\") == dataWebpackPrefix + key) { script = s; break; }\n\t\t}\n\t}\n\tif(!script) {\n\t\tneedAttach = true;\n\t\tscript = document.createElement('script');\n\n\t\tscript.charset = 'utf-8';\n\t\tscript.timeout = 120;\n\t\tif (__webpack_require__.nc) {\n\t\t\tscript.setAttribute(\"nonce\", __webpack_require__.nc);\n\t\t}\n\t\tscript.setAttribute(\"data-webpack\", dataWebpackPrefix + key);\n\t\tscript.src = url;\n\t}\n\tinProgress[url] = [done];\n\tvar onScriptComplete = (prev, event) => {\n\t\t// avoid mem leaks in IE.\n\t\tscript.onerror = script.onload = null;\n\t\tclearTimeout(timeout);\n\t\tvar doneFns = inProgress[url];\n\t\tdelete inProgress[url];\n\t\tscript.parentNode && script.parentNode.removeChild(script);\n\t\tdoneFns && doneFns.forEach((fn) => (fn(event)));\n\t\tif(prev) return prev(event);\n\t}\n\t;\n\tvar timeout = setTimeout(onScriptComplete.bind(null, undefined, { type: 'timeout', target: script }), 120000);\n\tscript.onerror = onScriptComplete.bind(null, script.onerror);\n\tscript.onload = onScriptComplete.bind(null, script.onload);\n\tneedAttach && document.head.appendChild(script);\n};","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","__webpack_require__.S = {};\nvar initPromises = {};\nvar initTokens = {};\n__webpack_require__.I = (name, initScope) => {\n\tif(!initScope) initScope = [];\n\t// handling circular init calls\n\tvar initToken = initTokens[name];\n\tif(!initToken) initToken = initTokens[name] = {};\n\tif(initScope.indexOf(initToken) >= 0) return;\n\tinitScope.push(initToken);\n\t// only runs once\n\tif(initPromises[name]) return initPromises[name];\n\t// creates a new share scope if needed\n\tif(!__webpack_require__.o(__webpack_require__.S, name)) __webpack_require__.S[name] = {};\n\t// runs all init snippets from all modules reachable\n\tvar scope = __webpack_require__.S[name];\n\tvar warn = (msg) => (typeof console !== \"undefined\" && console.warn && console.warn(msg));\n\tvar uniqueName = \"neural_compressor_ext_lab\";\n\tvar register = (name, version, factory, eager) => {\n\t\tvar versions = scope[name] = scope[name] || {};\n\t\tvar activeVersion = versions[version];\n\t\tif(!activeVersion || (!activeVersion.loaded && (!eager != !activeVersion.eager ? eager : uniqueName > activeVersion.from))) versions[version] = { get: factory, from: uniqueName, eager: !!eager };\n\t};\n\tvar initExternal = (id) => {\n\t\tvar handleError = (err) => (warn(\"Initialization of sharing external failed: \" + err));\n\t\ttry {\n\t\t\tvar module = __webpack_require__(id);\n\t\t\tif(!module) return;\n\t\t\tvar initFn = (module) => (module && module.init && module.init(__webpack_require__.S[name], initScope))\n\t\t\tif(module.then) return promises.push(module.then(initFn, handleError));\n\t\t\tvar initResult = initFn(module);\n\t\t\tif(initResult && initResult.then) return promises.push(initResult['catch'](handleError));\n\t\t} catch(err) { handleError(err); }\n\t}\n\tvar promises = [];\n\tswitch(name) {\n\t\tcase \"default\": {\n\t\t\tregister(\"neural_compressor_ext_lab\", \"0.1.0\", () => (Promise.all([__webpack_require__.e(\"webpack_sharing_consume_default_react\"), __webpack_require__.e(\"lib_index_js\")]).then(() => (() => (__webpack_require__(/*! ./lib/index.js */ \"./lib/index.js\"))))));\n\t\t\tregister(\"react-sanitized-html\", \"2.0.0\", () => (Promise.all([__webpack_require__.e(\"vendors-node_modules_react-sanitized-html_lib_index_js\"), __webpack_require__.e(\"webpack_sharing_consume_default_sanitize-html_sanitize-html\"), __webpack_require__.e(\"webpack_sharing_consume_default_react\")]).then(() => (() => (__webpack_require__(/*! ./node_modules/react-sanitized-html/lib/index.js */ \"./node_modules/react-sanitized-html/lib/index.js\"))))));\n\t\t\tregister(\"sanitize-html\", \"1.27.5\", () => (__webpack_require__.e(\"vendors-node_modules_sanitize-html_dist_sanitize-html_js\").then(() => (() => (__webpack_require__(/*! ./node_modules/sanitize-html/dist/sanitize-html.js */ \"./node_modules/sanitize-html/dist/sanitize-html.js\"))))));\n\t\t}\n\t\tbreak;\n\t}\n\tif(!promises.length) return initPromises[name] = 1;\n\treturn initPromises[name] = Promise.all(promises).then(() => (initPromises[name] = 1));\n};","var scriptUrl;\nif (__webpack_require__.g.importScripts) scriptUrl = __webpack_require__.g.location + \"\";\nvar document = __webpack_require__.g.document;\nif (!scriptUrl && document) {\n\tif (document.currentScript)\n\t\tscriptUrl = document.currentScript.src\n\tif (!scriptUrl) {\n\t\tvar scripts = document.getElementsByTagName(\"script\");\n\t\tif(scripts.length) scriptUrl = scripts[scripts.length - 1].src\n\t}\n}\n// When supporting browsers where an automatic publicPath is not supported you must specify an output.publicPath manually via configuration\n// or pass an empty string (\"\") and set the __webpack_public_path__ variable from your code to use your own logic.\nif (!scriptUrl) throw new Error(\"Automatic publicPath is not supported in this browser\");\nscriptUrl = scriptUrl.replace(/#.*$/, \"\").replace(/\\?.*$/, \"\").replace(/\\/[^\\/]+$/, \"/\");\n__webpack_require__.p = scriptUrl;","var parseVersion = (str) => {\n\t// see webpack/lib/util/semver.js for original code\n\tvar p=p=>{return p.split(\".\").map((p=>{return+p==p?+p:p}))},n=/^([^-+]+)?(?:-([^+]+))?(?:\\+(.+))?$/.exec(str),r=n[1]?p(n[1]):[];return n[2]&&(r.length++,r.push.apply(r,p(n[2]))),n[3]&&(r.push([]),r.push.apply(r,p(n[3]))),r;\n}\nvar versionLt = (a, b) => {\n\t// see webpack/lib/util/semver.js for original code\n\ta=parseVersion(a),b=parseVersion(b);for(var r=0;;){if(r>=a.length)return r=b.length)return\"u\"==n;var t=b[r],f=(typeof t)[0];if(n!=f)return\"o\"==n&&\"n\"==f||(\"s\"==f||\"u\"==n);if(\"o\"!=n&&\"u\"!=n&&e!=t)return e {\n\t// see webpack/lib/util/semver.js for original code\n\tvar r=range[0],n=\"\";if(1===range.length)return\"*\";if(r+.5){n+=0==r?\">=\":-1==r?\"<\":1==r?\"^\":2==r?\"~\":r>0?\"=\":\"!=\";for(var e=1,a=1;a0?\".\":\"\")+(e=2,t)}return n}var g=[];for(a=1;a {\n\t// see webpack/lib/util/semver.js for original code\n\tif(0 in range){version=parseVersion(version);var e=range[0],r=e<0;r&&(e=-e-1);for(var n=0,i=1,a=!0;;i++,n++){var f,s,g=i=version.length||\"o\"==(s=(typeof(f=version[n]))[0]))return!a||(\"u\"==g?i>e&&!r:\"\"==g!=r);if(\"u\"==s){if(!a||\"u\"!=g)return!1}else if(a)if(g==s)if(i<=e){if(f!=range[i])return!1}else{if(r?f>range[i]:f {\n\tvar scope = __webpack_require__.S[scopeName];\n\tif(!scope || !__webpack_require__.o(scope, key)) throw new Error(\"Shared module \" + key + \" doesn't exist in shared scope \" + scopeName);\n\treturn scope;\n};\nvar findVersion = (scope, key) => {\n\tvar versions = scope[key];\n\tvar key = Object.keys(versions).reduce((a, b) => {\n\t\treturn !a || versionLt(a, b) ? b : a;\n\t}, 0);\n\treturn key && versions[key]\n};\nvar findSingletonVersionKey = (scope, key) => {\n\tvar versions = scope[key];\n\treturn Object.keys(versions).reduce((a, b) => {\n\t\treturn !a || (!versions[a].loaded && versionLt(a, b)) ? b : a;\n\t}, 0);\n};\nvar getInvalidSingletonVersionMessage = (scope, key, version, requiredVersion) => {\n\treturn \"Unsatisfied version \" + version + \" from \" + (version && scope[key][version].from) + \" of shared singleton module \" + key + \" (required \" + rangeToString(requiredVersion) + \")\"\n};\nvar getSingleton = (scope, scopeName, key, requiredVersion) => {\n\tvar version = findSingletonVersionKey(scope, key);\n\treturn get(scope[key][version]);\n};\nvar getSingletonVersion = (scope, scopeName, key, requiredVersion) => {\n\tvar version = findSingletonVersionKey(scope, key);\n\tif (!satisfy(requiredVersion, version)) typeof console !== \"undefined\" && console.warn && console.warn(getInvalidSingletonVersionMessage(scope, key, version, requiredVersion));\n\treturn get(scope[key][version]);\n};\nvar getStrictSingletonVersion = (scope, scopeName, key, requiredVersion) => {\n\tvar version = findSingletonVersionKey(scope, key);\n\tif (!satisfy(requiredVersion, version)) throw new Error(getInvalidSingletonVersionMessage(scope, key, version, requiredVersion));\n\treturn get(scope[key][version]);\n};\nvar findValidVersion = (scope, key, requiredVersion) => {\n\tvar versions = scope[key];\n\tvar key = Object.keys(versions).reduce((a, b) => {\n\t\tif (!satisfy(requiredVersion, b)) return a;\n\t\treturn !a || versionLt(a, b) ? b : a;\n\t}, 0);\n\treturn key && versions[key]\n};\nvar getInvalidVersionMessage = (scope, scopeName, key, requiredVersion) => {\n\tvar versions = scope[key];\n\treturn \"No satisfying version (\" + rangeToString(requiredVersion) + \") of shared module \" + key + \" found in shared scope \" + scopeName + \".\\n\" +\n\t\t\"Available versions: \" + Object.keys(versions).map((key) => {\n\t\treturn key + \" from \" + versions[key].from;\n\t}).join(\", \");\n};\nvar getValidVersion = (scope, scopeName, key, requiredVersion) => {\n\tvar entry = findValidVersion(scope, key, requiredVersion);\n\tif(entry) return get(entry);\n\tthrow new Error(getInvalidVersionMessage(scope, scopeName, key, requiredVersion));\n};\nvar warnInvalidVersion = (scope, scopeName, key, requiredVersion) => {\n\ttypeof console !== \"undefined\" && console.warn && console.warn(getInvalidVersionMessage(scope, scopeName, key, requiredVersion));\n};\nvar get = (entry) => {\n\tentry.loaded = 1;\n\treturn entry.get()\n};\nvar init = (fn) => (function(scopeName, a, b, c) {\n\tvar promise = __webpack_require__.I(scopeName);\n\tif (promise && promise.then) return promise.then(fn.bind(fn, scopeName, __webpack_require__.S[scopeName], a, b, c));\n\treturn fn(scopeName, __webpack_require__.S[scopeName], a, b, c);\n});\n\nvar load = /*#__PURE__*/ init((scopeName, scope, key) => {\n\tensureExistence(scopeName, key);\n\treturn get(findVersion(scope, key));\n});\nvar loadFallback = /*#__PURE__*/ init((scopeName, scope, key, fallback) => {\n\treturn scope && __webpack_require__.o(scope, key) ? get(findVersion(scope, key)) : fallback();\n});\nvar loadVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn get(findValidVersion(scope, key, version) || warnInvalidVersion(scope, scopeName, key, version) || findVersion(scope, key));\n});\nvar loadSingleton = /*#__PURE__*/ init((scopeName, scope, key) => {\n\tensureExistence(scopeName, key);\n\treturn getSingleton(scope, scopeName, key);\n});\nvar loadSingletonVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn getSingletonVersion(scope, scopeName, key, version);\n});\nvar loadStrictVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn getValidVersion(scope, scopeName, key, version);\n});\nvar loadStrictSingletonVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn getStrictSingletonVersion(scope, scopeName, key, version);\n});\nvar loadVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn get(findValidVersion(scope, key, version) || warnInvalidVersion(scope, scopeName, key, version) || findVersion(scope, key));\n});\nvar loadSingletonFallback = /*#__PURE__*/ init((scopeName, scope, key, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn getSingleton(scope, scopeName, key);\n});\nvar loadSingletonVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn getSingletonVersion(scope, scopeName, key, version);\n});\nvar loadStrictVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tvar entry = scope && __webpack_require__.o(scope, key) && findValidVersion(scope, key, version);\n\treturn entry ? get(entry) : fallback();\n});\nvar loadStrictSingletonVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn getStrictSingletonVersion(scope, scopeName, key, version);\n});\nvar installedModules = {};\nvar moduleToHandlerMapping = {\n\t\"webpack/sharing/consume/default/react\": () => (loadSingletonVersionCheck(\"default\", \"react\", [1,17,0,1])),\n\t\"webpack/sharing/consume/default/@jupyterlab/notebook\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/notebook\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/apputils\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/apputils\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/settingregistry\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/settingregistry\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/mainmenu\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/mainmenu\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/ui-components\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/ui-components\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@lumino/widgets\": () => (loadSingletonVersionCheck(\"default\", \"@lumino/widgets\", [1,1,33,0])),\n\t\"webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html\": () => (loadStrictVersionCheckFallback(\"default\", \"react-sanitized-html\", [1,2,0,0], () => (Promise.all([__webpack_require__.e(\"vendors-node_modules_react-sanitized-html_lib_index_js\"), __webpack_require__.e(\"webpack_sharing_consume_default_sanitize-html_sanitize-html\")]).then(() => (() => (__webpack_require__(/*! react-sanitized-html */ \"./node_modules/react-sanitized-html/lib/index.js\"))))))),\n\t\"webpack/sharing/consume/default/sanitize-html/sanitize-html\": () => (loadStrictVersionCheckFallback(\"default\", \"sanitize-html\", [1,1,16,1], () => (__webpack_require__.e(\"vendors-node_modules_sanitize-html_dist_sanitize-html_js\").then(() => (() => (__webpack_require__(/*! sanitize-html */ \"./node_modules/sanitize-html/dist/sanitize-html.js\")))))))\n};\n// no consumes in initial chunks\nvar chunkMapping = {\n\t\"webpack_sharing_consume_default_react\": [\n\t\t\"webpack/sharing/consume/default/react\"\n\t],\n\t\"lib_index_js\": [\n\t\t\"webpack/sharing/consume/default/@jupyterlab/notebook\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/apputils\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/settingregistry\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/mainmenu\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/ui-components\",\n\t\t\"webpack/sharing/consume/default/@lumino/widgets\",\n\t\t\"webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html\"\n\t],\n\t\"webpack_sharing_consume_default_sanitize-html_sanitize-html\": [\n\t\t\"webpack/sharing/consume/default/sanitize-html/sanitize-html\"\n\t]\n};\n__webpack_require__.f.consumes = (chunkId, promises) => {\n\tif(__webpack_require__.o(chunkMapping, chunkId)) {\n\t\tchunkMapping[chunkId].forEach((id) => {\n\t\t\tif(__webpack_require__.o(installedModules, id)) return promises.push(installedModules[id]);\n\t\t\tvar onFactory = (factory) => {\n\t\t\t\tinstalledModules[id] = 0;\n\t\t\t\t__webpack_require__.m[id] = (module) => {\n\t\t\t\t\tdelete __webpack_require__.c[id];\n\t\t\t\t\tmodule.exports = factory();\n\t\t\t\t}\n\t\t\t};\n\t\t\tvar onError = (error) => {\n\t\t\t\tdelete installedModules[id];\n\t\t\t\t__webpack_require__.m[id] = (module) => {\n\t\t\t\t\tdelete __webpack_require__.c[id];\n\t\t\t\t\tthrow error;\n\t\t\t\t}\n\t\t\t};\n\t\t\ttry {\n\t\t\t\tvar promise = moduleToHandlerMapping[id]();\n\t\t\t\tif(promise.then) {\n\t\t\t\t\tpromises.push(installedModules[id] = promise.then(onFactory)['catch'](onError));\n\t\t\t\t} else onFactory(promise);\n\t\t\t} catch(e) { onError(e); }\n\t\t});\n\t}\n}","__webpack_require__.b = document.baseURI || self.location.href;\n\n// object to store loaded and loading chunks\n// undefined = chunk not loaded, null = chunk preloaded/prefetched\n// [resolve, reject, Promise] = chunk loading, 0 = chunk loaded\nvar installedChunks = {\n\t\"neural_compressor_ext_lab\": 0\n};\n\n__webpack_require__.f.j = (chunkId, promises) => {\n\t\t// JSONP chunk loading for javascript\n\t\tvar installedChunkData = __webpack_require__.o(installedChunks, chunkId) ? installedChunks[chunkId] : undefined;\n\t\tif(installedChunkData !== 0) { // 0 means \"already installed\".\n\n\t\t\t// a Promise means \"currently loading\".\n\t\t\tif(installedChunkData) {\n\t\t\t\tpromises.push(installedChunkData[2]);\n\t\t\t} else {\n\t\t\t\tif(!/^webpack_sharing_consume_default_(react|sanitize\\-html_sanitize\\-html)$/.test(chunkId)) {\n\t\t\t\t\t// setup Promise in chunk cache\n\t\t\t\t\tvar promise = new Promise((resolve, reject) => (installedChunkData = installedChunks[chunkId] = [resolve, reject]));\n\t\t\t\t\tpromises.push(installedChunkData[2] = promise);\n\n\t\t\t\t\t// start chunk loading\n\t\t\t\t\tvar url = __webpack_require__.p + __webpack_require__.u(chunkId);\n\t\t\t\t\t// create error before stack unwound to get useful stacktrace later\n\t\t\t\t\tvar error = new Error();\n\t\t\t\t\tvar loadingEnded = (event) => {\n\t\t\t\t\t\tif(__webpack_require__.o(installedChunks, chunkId)) {\n\t\t\t\t\t\t\tinstalledChunkData = installedChunks[chunkId];\n\t\t\t\t\t\t\tif(installedChunkData !== 0) installedChunks[chunkId] = undefined;\n\t\t\t\t\t\t\tif(installedChunkData) {\n\t\t\t\t\t\t\t\tvar errorType = event && (event.type === 'load' ? 'missing' : event.type);\n\t\t\t\t\t\t\t\tvar realSrc = event && event.target && event.target.src;\n\t\t\t\t\t\t\t\terror.message = 'Loading chunk ' + chunkId + ' failed.\\n(' + errorType + ': ' + realSrc + ')';\n\t\t\t\t\t\t\t\terror.name = 'ChunkLoadError';\n\t\t\t\t\t\t\t\terror.type = errorType;\n\t\t\t\t\t\t\t\terror.request = realSrc;\n\t\t\t\t\t\t\t\tinstalledChunkData[1](error);\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t};\n\t\t\t\t\t__webpack_require__.l(url, loadingEnded, \"chunk-\" + chunkId, chunkId);\n\t\t\t\t} else installedChunks[chunkId] = 0;\n\t\t\t}\n\t\t}\n};\n\n// no prefetching\n\n// no preloaded\n\n// no HMR\n\n// no HMR manifest\n\n// no on chunks loaded\n\n// install a JSONP callback for chunk loading\nvar webpackJsonpCallback = (parentChunkLoadingFunction, data) => {\n\tvar [chunkIds, moreModules, runtime] = data;\n\t// add \"moreModules\" to the modules object,\n\t// then flag all \"chunkIds\" as loaded and fire callback\n\tvar moduleId, chunkId, i = 0;\n\tif(chunkIds.some((id) => (installedChunks[id] !== 0))) {\n\t\tfor(moduleId in moreModules) {\n\t\t\tif(__webpack_require__.o(moreModules, moduleId)) {\n\t\t\t\t__webpack_require__.m[moduleId] = moreModules[moduleId];\n\t\t\t}\n\t\t}\n\t\tif(runtime) var result = runtime(__webpack_require__);\n\t}\n\tif(parentChunkLoadingFunction) parentChunkLoadingFunction(data);\n\tfor(;i < chunkIds.length; i++) {\n\t\tchunkId = chunkIds[i];\n\t\tif(__webpack_require__.o(installedChunks, chunkId) && installedChunks[chunkId]) {\n\t\t\tinstalledChunks[chunkId][0]();\n\t\t}\n\t\tinstalledChunks[chunkId] = 0;\n\t}\n\n}\n\nvar chunkLoadingGlobal = self[\"webpackChunkneural_compressor_ext_lab\"] = self[\"webpackChunkneural_compressor_ext_lab\"] || [];\nchunkLoadingGlobal.forEach(webpackJsonpCallback.bind(null, 0));\nchunkLoadingGlobal.push = webpackJsonpCallback.bind(null, chunkLoadingGlobal.push.bind(chunkLoadingGlobal));","__webpack_require__.nc = undefined;","","// module cache are used so entry inlining is disabled\n// startup\n// Load entry module and return exports\nvar __webpack_exports__ = __webpack_require__(\"webpack/container/entry/neural_compressor_ext_lab\");\n",""],"names":[],"sourceRoot":""}
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.34f9ad20791fd484f052.js b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.34f9ad20791fd484f052.js
deleted file mode 100644
index a06a23ed0e0..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.34f9ad20791fd484f052.js
+++ /dev/null
@@ -1,572 +0,0 @@
-var _JUPYTERLAB;
-/******/ (() => { // webpackBootstrap
-/******/ "use strict";
-/******/ var __webpack_modules__ = ({
-
-/***/ "webpack/container/entry/neural_compressor_ext_lab":
-/*!***********************!*\
- !*** container entry ***!
- \***********************/
-/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
-
-var moduleMap = {
- "./index": () => {
- return Promise.all([__webpack_require__.e("webpack_sharing_consume_default_react"), __webpack_require__.e("lib_index_js")]).then(() => (() => ((__webpack_require__(/*! ./lib/index.js */ "./lib/index.js")))));
- },
- "./extension": () => {
- return Promise.all([__webpack_require__.e("webpack_sharing_consume_default_react"), __webpack_require__.e("lib_index_js")]).then(() => (() => ((__webpack_require__(/*! ./lib/index.js */ "./lib/index.js")))));
- },
- "./style": () => {
- return Promise.all([__webpack_require__.e("vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854"), __webpack_require__.e("style_index_js")]).then(() => (() => ((__webpack_require__(/*! ./style/index.js */ "./style/index.js")))));
- }
-};
-var get = (module, getScope) => {
- __webpack_require__.R = getScope;
- getScope = (
- __webpack_require__.o(moduleMap, module)
- ? moduleMap[module]()
- : Promise.resolve().then(() => {
- throw new Error('Module "' + module + '" does not exist in container.');
- })
- );
- __webpack_require__.R = undefined;
- return getScope;
-};
-var init = (shareScope, initScope) => {
- if (!__webpack_require__.S) return;
- var name = "default"
- var oldScope = __webpack_require__.S[name];
- if(oldScope && oldScope !== shareScope) throw new Error("Container initialization failed as it has already been initialized with a different share scope");
- __webpack_require__.S[name] = shareScope;
- return __webpack_require__.I(name, initScope);
-};
-
-// This exports getters to disallow modifications
-__webpack_require__.d(exports, {
- get: () => (get),
- init: () => (init)
-});
-
-/***/ })
-
-/******/ });
-/************************************************************************/
-/******/ // The module cache
-/******/ var __webpack_module_cache__ = {};
-/******/
-/******/ // The require function
-/******/ function __webpack_require__(moduleId) {
-/******/ // Check if module is in cache
-/******/ var cachedModule = __webpack_module_cache__[moduleId];
-/******/ if (cachedModule !== undefined) {
-/******/ return cachedModule.exports;
-/******/ }
-/******/ // Create a new module (and put it into the cache)
-/******/ var module = __webpack_module_cache__[moduleId] = {
-/******/ id: moduleId,
-/******/ // no module.loaded needed
-/******/ exports: {}
-/******/ };
-/******/
-/******/ // Execute the module function
-/******/ __webpack_modules__[moduleId](module, module.exports, __webpack_require__);
-/******/
-/******/ // Return the exports of the module
-/******/ return module.exports;
-/******/ }
-/******/
-/******/ // expose the modules object (__webpack_modules__)
-/******/ __webpack_require__.m = __webpack_modules__;
-/******/
-/******/ // expose the module cache
-/******/ __webpack_require__.c = __webpack_module_cache__;
-/******/
-/************************************************************************/
-/******/ /* webpack/runtime/compat get default export */
-/******/ (() => {
-/******/ // getDefaultExport function for compatibility with non-harmony modules
-/******/ __webpack_require__.n = (module) => {
-/******/ var getter = module && module.__esModule ?
-/******/ () => (module['default']) :
-/******/ () => (module);
-/******/ __webpack_require__.d(getter, { a: getter });
-/******/ return getter;
-/******/ };
-/******/ })();
-/******/
-/******/ /* webpack/runtime/define property getters */
-/******/ (() => {
-/******/ // define getter functions for harmony exports
-/******/ __webpack_require__.d = (exports, definition) => {
-/******/ for(var key in definition) {
-/******/ if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {
-/******/ Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });
-/******/ }
-/******/ }
-/******/ };
-/******/ })();
-/******/
-/******/ /* webpack/runtime/ensure chunk */
-/******/ (() => {
-/******/ __webpack_require__.f = {};
-/******/ // This file contains only the entry chunk.
-/******/ // The chunk loading function for additional chunks
-/******/ __webpack_require__.e = (chunkId) => {
-/******/ return Promise.all(Object.keys(__webpack_require__.f).reduce((promises, key) => {
-/******/ __webpack_require__.f[key](chunkId, promises);
-/******/ return promises;
-/******/ }, []));
-/******/ };
-/******/ })();
-/******/
-/******/ /* webpack/runtime/get javascript chunk filename */
-/******/ (() => {
-/******/ // This function allow to reference async chunks
-/******/ __webpack_require__.u = (chunkId) => {
-/******/ // return url for filenames based on template
-/******/ return "" + chunkId + "." + {"webpack_sharing_consume_default_react":"19c51f7b56cfd16da3f9","lib_index_js":"0c0187df9df8bc38b9c5","vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854":"e09faf9ec3a764e40dc7","style_index_js":"8d733cc8b74fabbd10b8","vendors-node_modules_react-sanitized-html_lib_index_js":"500104f7c13c01fe1646","webpack_sharing_consume_default_sanitize-html_sanitize-html":"635249bb6dc3884c24a3","vendors-node_modules_sanitize-html_dist_sanitize-html_js":"825dbf94ec7371e0b28d"}[chunkId] + ".js";
-/******/ };
-/******/ })();
-/******/
-/******/ /* webpack/runtime/global */
-/******/ (() => {
-/******/ __webpack_require__.g = (function() {
-/******/ if (typeof globalThis === 'object') return globalThis;
-/******/ try {
-/******/ return this || new Function('return this')();
-/******/ } catch (e) {
-/******/ if (typeof window === 'object') return window;
-/******/ }
-/******/ })();
-/******/ })();
-/******/
-/******/ /* webpack/runtime/hasOwnProperty shorthand */
-/******/ (() => {
-/******/ __webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))
-/******/ })();
-/******/
-/******/ /* webpack/runtime/load script */
-/******/ (() => {
-/******/ var inProgress = {};
-/******/ var dataWebpackPrefix = "neural_compressor_ext_lab:";
-/******/ // loadScript function to load a script via script tag
-/******/ __webpack_require__.l = (url, done, key, chunkId) => {
-/******/ if(inProgress[url]) { inProgress[url].push(done); return; }
-/******/ var script, needAttach;
-/******/ if(key !== undefined) {
-/******/ var scripts = document.getElementsByTagName("script");
-/******/ for(var i = 0; i < scripts.length; i++) {
-/******/ var s = scripts[i];
-/******/ if(s.getAttribute("src") == url || s.getAttribute("data-webpack") == dataWebpackPrefix + key) { script = s; break; }
-/******/ }
-/******/ }
-/******/ if(!script) {
-/******/ needAttach = true;
-/******/ script = document.createElement('script');
-/******/
-/******/ script.charset = 'utf-8';
-/******/ script.timeout = 120;
-/******/ if (__webpack_require__.nc) {
-/******/ script.setAttribute("nonce", __webpack_require__.nc);
-/******/ }
-/******/ script.setAttribute("data-webpack", dataWebpackPrefix + key);
-/******/ script.src = url;
-/******/ }
-/******/ inProgress[url] = [done];
-/******/ var onScriptComplete = (prev, event) => {
-/******/ // avoid mem leaks in IE.
-/******/ script.onerror = script.onload = null;
-/******/ clearTimeout(timeout);
-/******/ var doneFns = inProgress[url];
-/******/ delete inProgress[url];
-/******/ script.parentNode && script.parentNode.removeChild(script);
-/******/ doneFns && doneFns.forEach((fn) => (fn(event)));
-/******/ if(prev) return prev(event);
-/******/ }
-/******/ ;
-/******/ var timeout = setTimeout(onScriptComplete.bind(null, undefined, { type: 'timeout', target: script }), 120000);
-/******/ script.onerror = onScriptComplete.bind(null, script.onerror);
-/******/ script.onload = onScriptComplete.bind(null, script.onload);
-/******/ needAttach && document.head.appendChild(script);
-/******/ };
-/******/ })();
-/******/
-/******/ /* webpack/runtime/make namespace object */
-/******/ (() => {
-/******/ // define __esModule on exports
-/******/ __webpack_require__.r = (exports) => {
-/******/ if(typeof Symbol !== 'undefined' && Symbol.toStringTag) {
-/******/ Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
-/******/ }
-/******/ Object.defineProperty(exports, '__esModule', { value: true });
-/******/ };
-/******/ })();
-/******/
-/******/ /* webpack/runtime/sharing */
-/******/ (() => {
-/******/ __webpack_require__.S = {};
-/******/ var initPromises = {};
-/******/ var initTokens = {};
-/******/ __webpack_require__.I = (name, initScope) => {
-/******/ if(!initScope) initScope = [];
-/******/ // handling circular init calls
-/******/ var initToken = initTokens[name];
-/******/ if(!initToken) initToken = initTokens[name] = {};
-/******/ if(initScope.indexOf(initToken) >= 0) return;
-/******/ initScope.push(initToken);
-/******/ // only runs once
-/******/ if(initPromises[name]) return initPromises[name];
-/******/ // creates a new share scope if needed
-/******/ if(!__webpack_require__.o(__webpack_require__.S, name)) __webpack_require__.S[name] = {};
-/******/ // runs all init snippets from all modules reachable
-/******/ var scope = __webpack_require__.S[name];
-/******/ var warn = (msg) => (typeof console !== "undefined" && console.warn && console.warn(msg));
-/******/ var uniqueName = "neural_compressor_ext_lab";
-/******/ var register = (name, version, factory, eager) => {
-/******/ var versions = scope[name] = scope[name] || {};
-/******/ var activeVersion = versions[version];
-/******/ if(!activeVersion || (!activeVersion.loaded && (!eager != !activeVersion.eager ? eager : uniqueName > activeVersion.from))) versions[version] = { get: factory, from: uniqueName, eager: !!eager };
-/******/ };
-/******/ var initExternal = (id) => {
-/******/ var handleError = (err) => (warn("Initialization of sharing external failed: " + err));
-/******/ try {
-/******/ var module = __webpack_require__(id);
-/******/ if(!module) return;
-/******/ var initFn = (module) => (module && module.init && module.init(__webpack_require__.S[name], initScope))
-/******/ if(module.then) return promises.push(module.then(initFn, handleError));
-/******/ var initResult = initFn(module);
-/******/ if(initResult && initResult.then) return promises.push(initResult['catch'](handleError));
-/******/ } catch(err) { handleError(err); }
-/******/ }
-/******/ var promises = [];
-/******/ switch(name) {
-/******/ case "default": {
-/******/ register("neural_compressor_ext_lab", "0.1.0", () => (Promise.all([__webpack_require__.e("webpack_sharing_consume_default_react"), __webpack_require__.e("lib_index_js")]).then(() => (() => (__webpack_require__(/*! ./lib/index.js */ "./lib/index.js"))))));
-/******/ register("react-sanitized-html", "2.0.0", () => (Promise.all([__webpack_require__.e("vendors-node_modules_react-sanitized-html_lib_index_js"), __webpack_require__.e("webpack_sharing_consume_default_sanitize-html_sanitize-html"), __webpack_require__.e("webpack_sharing_consume_default_react")]).then(() => (() => (__webpack_require__(/*! ./node_modules/react-sanitized-html/lib/index.js */ "./node_modules/react-sanitized-html/lib/index.js"))))));
-/******/ register("sanitize-html", "1.27.5", () => (__webpack_require__.e("vendors-node_modules_sanitize-html_dist_sanitize-html_js").then(() => (() => (__webpack_require__(/*! ./node_modules/sanitize-html/dist/sanitize-html.js */ "./node_modules/sanitize-html/dist/sanitize-html.js"))))));
-/******/ }
-/******/ break;
-/******/ }
-/******/ if(!promises.length) return initPromises[name] = 1;
-/******/ return initPromises[name] = Promise.all(promises).then(() => (initPromises[name] = 1));
-/******/ };
-/******/ })();
-/******/
-/******/ /* webpack/runtime/publicPath */
-/******/ (() => {
-/******/ var scriptUrl;
-/******/ if (__webpack_require__.g.importScripts) scriptUrl = __webpack_require__.g.location + "";
-/******/ var document = __webpack_require__.g.document;
-/******/ if (!scriptUrl && document) {
-/******/ if (document.currentScript)
-/******/ scriptUrl = document.currentScript.src
-/******/ if (!scriptUrl) {
-/******/ var scripts = document.getElementsByTagName("script");
-/******/ if(scripts.length) scriptUrl = scripts[scripts.length - 1].src
-/******/ }
-/******/ }
-/******/ // When supporting browsers where an automatic publicPath is not supported you must specify an output.publicPath manually via configuration
-/******/ // or pass an empty string ("") and set the __webpack_public_path__ variable from your code to use your own logic.
-/******/ if (!scriptUrl) throw new Error("Automatic publicPath is not supported in this browser");
-/******/ scriptUrl = scriptUrl.replace(/#.*$/, "").replace(/\?.*$/, "").replace(/\/[^\/]+$/, "/");
-/******/ __webpack_require__.p = scriptUrl;
-/******/ })();
-/******/
-/******/ /* webpack/runtime/consumes */
-/******/ (() => {
-/******/ var parseVersion = (str) => {
-/******/ // see webpack/lib/util/semver.js for original code
-/******/ var p=p=>{return p.split(".").map((p=>{return+p==p?+p:p}))},n=/^([^-+]+)?(?:-([^+]+))?(?:\+(.+))?$/.exec(str),r=n[1]?p(n[1]):[];return n[2]&&(r.length++,r.push.apply(r,p(n[2]))),n[3]&&(r.push([]),r.push.apply(r,p(n[3]))),r;
-/******/ }
-/******/ var versionLt = (a, b) => {
-/******/ // see webpack/lib/util/semver.js for original code
-/******/ a=parseVersion(a),b=parseVersion(b);for(var r=0;;){if(r>=a.length)return r=b.length)return"u"==n;var t=b[r],f=(typeof t)[0];if(n!=f)return"o"==n&&"n"==f||("s"==f||"u"==n);if("o"!=n&&"u"!=n&&e!=t)return e {
-/******/ // see webpack/lib/util/semver.js for original code
-/******/ var r=range[0],n="";if(1===range.length)return"*";if(r+.5){n+=0==r?">=":-1==r?"<":1==r?"^":2==r?"~":r>0?"=":"!=";for(var e=1,a=1;a0?".":"")+(e=2,t)}return n}var g=[];for(a=1;a {
-/******/ // see webpack/lib/util/semver.js for original code
-/******/ if(0 in range){version=parseVersion(version);var e=range[0],r=e<0;r&&(e=-e-1);for(var n=0,i=1,a=!0;;i++,n++){var f,s,g=i=version.length||"o"==(s=(typeof(f=version[n]))[0]))return!a||("u"==g?i>e&&!r:""==g!=r);if("u"==s){if(!a||"u"!=g)return!1}else if(a)if(g==s)if(i<=e){if(f!=range[i])return!1}else{if(r?f>range[i]:f {
-/******/ var scope = __webpack_require__.S[scopeName];
-/******/ if(!scope || !__webpack_require__.o(scope, key)) throw new Error("Shared module " + key + " doesn't exist in shared scope " + scopeName);
-/******/ return scope;
-/******/ };
-/******/ var findVersion = (scope, key) => {
-/******/ var versions = scope[key];
-/******/ var key = Object.keys(versions).reduce((a, b) => {
-/******/ return !a || versionLt(a, b) ? b : a;
-/******/ }, 0);
-/******/ return key && versions[key]
-/******/ };
-/******/ var findSingletonVersionKey = (scope, key) => {
-/******/ var versions = scope[key];
-/******/ return Object.keys(versions).reduce((a, b) => {
-/******/ return !a || (!versions[a].loaded && versionLt(a, b)) ? b : a;
-/******/ }, 0);
-/******/ };
-/******/ var getInvalidSingletonVersionMessage = (scope, key, version, requiredVersion) => {
-/******/ return "Unsatisfied version " + version + " from " + (version && scope[key][version].from) + " of shared singleton module " + key + " (required " + rangeToString(requiredVersion) + ")"
-/******/ };
-/******/ var getSingleton = (scope, scopeName, key, requiredVersion) => {
-/******/ var version = findSingletonVersionKey(scope, key);
-/******/ return get(scope[key][version]);
-/******/ };
-/******/ var getSingletonVersion = (scope, scopeName, key, requiredVersion) => {
-/******/ var version = findSingletonVersionKey(scope, key);
-/******/ if (!satisfy(requiredVersion, version)) typeof console !== "undefined" && console.warn && console.warn(getInvalidSingletonVersionMessage(scope, key, version, requiredVersion));
-/******/ return get(scope[key][version]);
-/******/ };
-/******/ var getStrictSingletonVersion = (scope, scopeName, key, requiredVersion) => {
-/******/ var version = findSingletonVersionKey(scope, key);
-/******/ if (!satisfy(requiredVersion, version)) throw new Error(getInvalidSingletonVersionMessage(scope, key, version, requiredVersion));
-/******/ return get(scope[key][version]);
-/******/ };
-/******/ var findValidVersion = (scope, key, requiredVersion) => {
-/******/ var versions = scope[key];
-/******/ var key = Object.keys(versions).reduce((a, b) => {
-/******/ if (!satisfy(requiredVersion, b)) return a;
-/******/ return !a || versionLt(a, b) ? b : a;
-/******/ }, 0);
-/******/ return key && versions[key]
-/******/ };
-/******/ var getInvalidVersionMessage = (scope, scopeName, key, requiredVersion) => {
-/******/ var versions = scope[key];
-/******/ return "No satisfying version (" + rangeToString(requiredVersion) + ") of shared module " + key + " found in shared scope " + scopeName + ".\n" +
-/******/ "Available versions: " + Object.keys(versions).map((key) => {
-/******/ return key + " from " + versions[key].from;
-/******/ }).join(", ");
-/******/ };
-/******/ var getValidVersion = (scope, scopeName, key, requiredVersion) => {
-/******/ var entry = findValidVersion(scope, key, requiredVersion);
-/******/ if(entry) return get(entry);
-/******/ throw new Error(getInvalidVersionMessage(scope, scopeName, key, requiredVersion));
-/******/ };
-/******/ var warnInvalidVersion = (scope, scopeName, key, requiredVersion) => {
-/******/ typeof console !== "undefined" && console.warn && console.warn(getInvalidVersionMessage(scope, scopeName, key, requiredVersion));
-/******/ };
-/******/ var get = (entry) => {
-/******/ entry.loaded = 1;
-/******/ return entry.get()
-/******/ };
-/******/ var init = (fn) => (function(scopeName, a, b, c) {
-/******/ var promise = __webpack_require__.I(scopeName);
-/******/ if (promise && promise.then) return promise.then(fn.bind(fn, scopeName, __webpack_require__.S[scopeName], a, b, c));
-/******/ return fn(scopeName, __webpack_require__.S[scopeName], a, b, c);
-/******/ });
-/******/
-/******/ var load = /*#__PURE__*/ init((scopeName, scope, key) => {
-/******/ ensureExistence(scopeName, key);
-/******/ return get(findVersion(scope, key));
-/******/ });
-/******/ var loadFallback = /*#__PURE__*/ init((scopeName, scope, key, fallback) => {
-/******/ return scope && __webpack_require__.o(scope, key) ? get(findVersion(scope, key)) : fallback();
-/******/ });
-/******/ var loadVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {
-/******/ ensureExistence(scopeName, key);
-/******/ return get(findValidVersion(scope, key, version) || warnInvalidVersion(scope, scopeName, key, version) || findVersion(scope, key));
-/******/ });
-/******/ var loadSingleton = /*#__PURE__*/ init((scopeName, scope, key) => {
-/******/ ensureExistence(scopeName, key);
-/******/ return getSingleton(scope, scopeName, key);
-/******/ });
-/******/ var loadSingletonVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {
-/******/ ensureExistence(scopeName, key);
-/******/ return getSingletonVersion(scope, scopeName, key, version);
-/******/ });
-/******/ var loadStrictVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {
-/******/ ensureExistence(scopeName, key);
-/******/ return getValidVersion(scope, scopeName, key, version);
-/******/ });
-/******/ var loadStrictSingletonVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {
-/******/ ensureExistence(scopeName, key);
-/******/ return getStrictSingletonVersion(scope, scopeName, key, version);
-/******/ });
-/******/ var loadVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {
-/******/ if(!scope || !__webpack_require__.o(scope, key)) return fallback();
-/******/ return get(findValidVersion(scope, key, version) || warnInvalidVersion(scope, scopeName, key, version) || findVersion(scope, key));
-/******/ });
-/******/ var loadSingletonFallback = /*#__PURE__*/ init((scopeName, scope, key, fallback) => {
-/******/ if(!scope || !__webpack_require__.o(scope, key)) return fallback();
-/******/ return getSingleton(scope, scopeName, key);
-/******/ });
-/******/ var loadSingletonVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {
-/******/ if(!scope || !__webpack_require__.o(scope, key)) return fallback();
-/******/ return getSingletonVersion(scope, scopeName, key, version);
-/******/ });
-/******/ var loadStrictVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {
-/******/ var entry = scope && __webpack_require__.o(scope, key) && findValidVersion(scope, key, version);
-/******/ return entry ? get(entry) : fallback();
-/******/ });
-/******/ var loadStrictSingletonVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {
-/******/ if(!scope || !__webpack_require__.o(scope, key)) return fallback();
-/******/ return getStrictSingletonVersion(scope, scopeName, key, version);
-/******/ });
-/******/ var installedModules = {};
-/******/ var moduleToHandlerMapping = {
-/******/ "webpack/sharing/consume/default/react": () => (loadSingletonVersionCheck("default", "react", [1,17,0,1])),
-/******/ "webpack/sharing/consume/default/@jupyterlab/notebook": () => (loadSingletonVersionCheck("default", "@jupyterlab/notebook", [1,3,4,7])),
-/******/ "webpack/sharing/consume/default/@jupyterlab/apputils": () => (loadSingletonVersionCheck("default", "@jupyterlab/apputils", [1,3,4,7])),
-/******/ "webpack/sharing/consume/default/@jupyterlab/settingregistry": () => (loadSingletonVersionCheck("default", "@jupyterlab/settingregistry", [1,3,4,7])),
-/******/ "webpack/sharing/consume/default/@jupyterlab/mainmenu": () => (loadSingletonVersionCheck("default", "@jupyterlab/mainmenu", [1,3,4,7])),
-/******/ "webpack/sharing/consume/default/@jupyterlab/ui-components": () => (loadSingletonVersionCheck("default", "@jupyterlab/ui-components", [1,3,4,7])),
-/******/ "webpack/sharing/consume/default/@lumino/widgets": () => (loadSingletonVersionCheck("default", "@lumino/widgets", [1,1,33,0])),
-/******/ "webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html": () => (loadStrictVersionCheckFallback("default", "react-sanitized-html", [1,2,0,0], () => (Promise.all([__webpack_require__.e("vendors-node_modules_react-sanitized-html_lib_index_js"), __webpack_require__.e("webpack_sharing_consume_default_sanitize-html_sanitize-html")]).then(() => (() => (__webpack_require__(/*! react-sanitized-html */ "./node_modules/react-sanitized-html/lib/index.js"))))))),
-/******/ "webpack/sharing/consume/default/sanitize-html/sanitize-html": () => (loadStrictVersionCheckFallback("default", "sanitize-html", [1,1,16,1], () => (__webpack_require__.e("vendors-node_modules_sanitize-html_dist_sanitize-html_js").then(() => (() => (__webpack_require__(/*! sanitize-html */ "./node_modules/sanitize-html/dist/sanitize-html.js")))))))
-/******/ };
-/******/ // no consumes in initial chunks
-/******/ var chunkMapping = {
-/******/ "webpack_sharing_consume_default_react": [
-/******/ "webpack/sharing/consume/default/react"
-/******/ ],
-/******/ "lib_index_js": [
-/******/ "webpack/sharing/consume/default/@jupyterlab/notebook",
-/******/ "webpack/sharing/consume/default/@jupyterlab/apputils",
-/******/ "webpack/sharing/consume/default/@jupyterlab/settingregistry",
-/******/ "webpack/sharing/consume/default/@jupyterlab/mainmenu",
-/******/ "webpack/sharing/consume/default/@jupyterlab/ui-components",
-/******/ "webpack/sharing/consume/default/@lumino/widgets",
-/******/ "webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html"
-/******/ ],
-/******/ "webpack_sharing_consume_default_sanitize-html_sanitize-html": [
-/******/ "webpack/sharing/consume/default/sanitize-html/sanitize-html"
-/******/ ]
-/******/ };
-/******/ __webpack_require__.f.consumes = (chunkId, promises) => {
-/******/ if(__webpack_require__.o(chunkMapping, chunkId)) {
-/******/ chunkMapping[chunkId].forEach((id) => {
-/******/ if(__webpack_require__.o(installedModules, id)) return promises.push(installedModules[id]);
-/******/ var onFactory = (factory) => {
-/******/ installedModules[id] = 0;
-/******/ __webpack_require__.m[id] = (module) => {
-/******/ delete __webpack_require__.c[id];
-/******/ module.exports = factory();
-/******/ }
-/******/ };
-/******/ var onError = (error) => {
-/******/ delete installedModules[id];
-/******/ __webpack_require__.m[id] = (module) => {
-/******/ delete __webpack_require__.c[id];
-/******/ throw error;
-/******/ }
-/******/ };
-/******/ try {
-/******/ var promise = moduleToHandlerMapping[id]();
-/******/ if(promise.then) {
-/******/ promises.push(installedModules[id] = promise.then(onFactory)['catch'](onError));
-/******/ } else onFactory(promise);
-/******/ } catch(e) { onError(e); }
-/******/ });
-/******/ }
-/******/ }
-/******/ })();
-/******/
-/******/ /* webpack/runtime/jsonp chunk loading */
-/******/ (() => {
-/******/ __webpack_require__.b = document.baseURI || self.location.href;
-/******/
-/******/ // object to store loaded and loading chunks
-/******/ // undefined = chunk not loaded, null = chunk preloaded/prefetched
-/******/ // [resolve, reject, Promise] = chunk loading, 0 = chunk loaded
-/******/ var installedChunks = {
-/******/ "neural_compressor_ext_lab": 0
-/******/ };
-/******/
-/******/ __webpack_require__.f.j = (chunkId, promises) => {
-/******/ // JSONP chunk loading for javascript
-/******/ var installedChunkData = __webpack_require__.o(installedChunks, chunkId) ? installedChunks[chunkId] : undefined;
-/******/ if(installedChunkData !== 0) { // 0 means "already installed".
-/******/
-/******/ // a Promise means "currently loading".
-/******/ if(installedChunkData) {
-/******/ promises.push(installedChunkData[2]);
-/******/ } else {
-/******/ if(!/^webpack_sharing_consume_default_(react|sanitize\-html_sanitize\-html)$/.test(chunkId)) {
-/******/ // setup Promise in chunk cache
-/******/ var promise = new Promise((resolve, reject) => (installedChunkData = installedChunks[chunkId] = [resolve, reject]));
-/******/ promises.push(installedChunkData[2] = promise);
-/******/
-/******/ // start chunk loading
-/******/ var url = __webpack_require__.p + __webpack_require__.u(chunkId);
-/******/ // create error before stack unwound to get useful stacktrace later
-/******/ var error = new Error();
-/******/ var loadingEnded = (event) => {
-/******/ if(__webpack_require__.o(installedChunks, chunkId)) {
-/******/ installedChunkData = installedChunks[chunkId];
-/******/ if(installedChunkData !== 0) installedChunks[chunkId] = undefined;
-/******/ if(installedChunkData) {
-/******/ var errorType = event && (event.type === 'load' ? 'missing' : event.type);
-/******/ var realSrc = event && event.target && event.target.src;
-/******/ error.message = 'Loading chunk ' + chunkId + ' failed.\n(' + errorType + ': ' + realSrc + ')';
-/******/ error.name = 'ChunkLoadError';
-/******/ error.type = errorType;
-/******/ error.request = realSrc;
-/******/ installedChunkData[1](error);
-/******/ }
-/******/ }
-/******/ };
-/******/ __webpack_require__.l(url, loadingEnded, "chunk-" + chunkId, chunkId);
-/******/ } else installedChunks[chunkId] = 0;
-/******/ }
-/******/ }
-/******/ };
-/******/
-/******/ // no prefetching
-/******/
-/******/ // no preloaded
-/******/
-/******/ // no HMR
-/******/
-/******/ // no HMR manifest
-/******/
-/******/ // no on chunks loaded
-/******/
-/******/ // install a JSONP callback for chunk loading
-/******/ var webpackJsonpCallback = (parentChunkLoadingFunction, data) => {
-/******/ var [chunkIds, moreModules, runtime] = data;
-/******/ // add "moreModules" to the modules object,
-/******/ // then flag all "chunkIds" as loaded and fire callback
-/******/ var moduleId, chunkId, i = 0;
-/******/ if(chunkIds.some((id) => (installedChunks[id] !== 0))) {
-/******/ for(moduleId in moreModules) {
-/******/ if(__webpack_require__.o(moreModules, moduleId)) {
-/******/ __webpack_require__.m[moduleId] = moreModules[moduleId];
-/******/ }
-/******/ }
-/******/ if(runtime) var result = runtime(__webpack_require__);
-/******/ }
-/******/ if(parentChunkLoadingFunction) parentChunkLoadingFunction(data);
-/******/ for(;i < chunkIds.length; i++) {
-/******/ chunkId = chunkIds[i];
-/******/ if(__webpack_require__.o(installedChunks, chunkId) && installedChunks[chunkId]) {
-/******/ installedChunks[chunkId][0]();
-/******/ }
-/******/ installedChunks[chunkId] = 0;
-/******/ }
-/******/
-/******/ }
-/******/
-/******/ var chunkLoadingGlobal = self["webpackChunkneural_compressor_ext_lab"] = self["webpackChunkneural_compressor_ext_lab"] || [];
-/******/ chunkLoadingGlobal.forEach(webpackJsonpCallback.bind(null, 0));
-/******/ chunkLoadingGlobal.push = webpackJsonpCallback.bind(null, chunkLoadingGlobal.push.bind(chunkLoadingGlobal));
-/******/ })();
-/******/
-/******/ /* webpack/runtime/nonce */
-/******/ (() => {
-/******/ __webpack_require__.nc = undefined;
-/******/ })();
-/******/
-/************************************************************************/
-/******/
-/******/ // module cache are used so entry inlining is disabled
-/******/ // startup
-/******/ // Load entry module and return exports
-/******/ var __webpack_exports__ = __webpack_require__("webpack/container/entry/neural_compressor_ext_lab");
-/******/ (_JUPYTERLAB = typeof _JUPYTERLAB === "undefined" ? {} : _JUPYTERLAB).neural_compressor_ext_lab = __webpack_exports__;
-/******/
-/******/ })()
-;
-//# sourceMappingURL=remoteEntry.34f9ad20791fd484f052.js.map
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.34f9ad20791fd484f052.js.map b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.34f9ad20791fd484f052.js.map
deleted file mode 100644
index f8175790c7a..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.34f9ad20791fd484f052.js.map
+++ /dev/null
@@ -1 +0,0 @@
-{"version":3,"file":"remoteEntry.34f9ad20791fd484f052.js","mappings":";;;;;;;;;;;AAAA;AACA;AACA;AACA,EAAE;AACF;AACA;AACA,EAAE;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI;AACJ;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;;AAEA;AACA;AACA;AACA;AACA,CAAC;;;;;;UCpCD;UACA;;UAEA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;;UAEA;UACA;;UAEA;UACA;UACA;;UAEA;UACA;;UAEA;UACA;;;;;WC5BA;WACA;WACA;WACA;WACA;WACA,iCAAiC,WAAW;WAC5C;WACA;;;;;WCPA;WACA;WACA;WACA;WACA,yCAAyC,wCAAwC;WACjF;WACA;WACA;;;;;WCPA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;;;;;WCRA;WACA;WACA;WACA,8BAA8B,kgBAAkgB;WAChiB;;;;;WCJA;WACA;WACA;WACA;WACA,GAAG;WACH;WACA;WACA,CAAC;;;;;WCPD;;;;;WCAA;WACA;WACA;WACA;WACA,uBAAuB,4BAA4B;WACnD;WACA;WACA;WACA,iBAAiB,oBAAoB;WACrC;WACA,mGAAmG,YAAY;WAC/G;WACA;WACA;WACA;WACA;;WAEA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,mEAAmE,iCAAiC;WACpG;WACA;WACA;WACA;;;;;WCzCA;WACA;WACA;WACA,uDAAuD,iBAAiB;WACxE;WACA,gDAAgD,aAAa;WAC7D;;;;;WCNA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,oJAAoJ;WACpJ;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,IAAI,aAAa;WACjB;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;;;;;WC7CA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;;;;;WCfA;WACA;WACA,WAAW,6BAA6B,iBAAiB,GAAG,qEAAqE;WACjI;WACA;WACA;WACA,qCAAqC,aAAa,EAAE,wDAAwD,2BAA2B,4BAA4B,2BAA2B,+CAA+C,mCAAmC;WAChR;WACA;WACA;WACA,qBAAqB,8BAA8B,SAAS,sDAAsD,gBAAgB,eAAe,KAAK,6DAA6D,SAAS,SAAS,QAAQ,eAAe,KAAK,eAAe,qGAAqG,WAAW,aAAa;WAC7Y;WACA;WACA;WACA,gBAAgB,8BAA8B,qBAAqB,YAAY,sBAAsB,SAAS,iDAAiD,6FAA6F,WAAW,uBAAuB,2BAA2B,wBAAwB,KAAK,oCAAoC,oBAAoB,wBAAwB,oBAAoB,SAAS,KAAK,yBAAyB,KAAK,gCAAgC,yBAAyB,QAAQ,eAAe,KAAK,eAAe,4DAA4D;WACtoB;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,CAAC;;WAED;WACA;WACA;WACA,CAAC;WACD;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,MAAM;WACN,KAAK,WAAW;WAChB,GAAG;WACH;WACA;;;;;WC3LA;;WAEA;WACA;WACA;WACA;WACA;WACA;;WAEA;WACA;WACA;WACA,iCAAiC;;WAEjC;WACA;WACA;WACA,KAAK;WACL;WACA;WACA;WACA;;WAEA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,MAAM;WACN;WACA;WACA;;WAEA;;WAEA;;WAEA;;WAEA;;WAEA;;WAEA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,MAAM,qBAAqB;WAC3B;WACA;WACA;WACA;WACA;WACA;;WAEA;;WAEA;WACA;WACA;;;;;WCrFA;;;;;UEAA;UACA;UACA;UACA","sources":["webpack://neural_compressor_ext_lab/webpack/container-entry","webpack://neural_compressor_ext_lab/webpack/bootstrap","webpack://neural_compressor_ext_lab/webpack/runtime/compat get default export","webpack://neural_compressor_ext_lab/webpack/runtime/define property getters","webpack://neural_compressor_ext_lab/webpack/runtime/ensure chunk","webpack://neural_compressor_ext_lab/webpack/runtime/get javascript chunk filename","webpack://neural_compressor_ext_lab/webpack/runtime/global","webpack://neural_compressor_ext_lab/webpack/runtime/hasOwnProperty shorthand","webpack://neural_compressor_ext_lab/webpack/runtime/load script","webpack://neural_compressor_ext_lab/webpack/runtime/make namespace object","webpack://neural_compressor_ext_lab/webpack/runtime/sharing","webpack://neural_compressor_ext_lab/webpack/runtime/publicPath","webpack://neural_compressor_ext_lab/webpack/runtime/consumes","webpack://neural_compressor_ext_lab/webpack/runtime/jsonp chunk loading","webpack://neural_compressor_ext_lab/webpack/runtime/nonce","webpack://neural_compressor_ext_lab/webpack/before-startup","webpack://neural_compressor_ext_lab/webpack/startup","webpack://neural_compressor_ext_lab/webpack/after-startup"],"sourcesContent":["var moduleMap = {\n\t\"./index\": () => {\n\t\treturn Promise.all([__webpack_require__.e(\"webpack_sharing_consume_default_react\"), __webpack_require__.e(\"lib_index_js\")]).then(() => (() => ((__webpack_require__(/*! ./lib/index.js */ \"./lib/index.js\")))));\n\t},\n\t\"./extension\": () => {\n\t\treturn Promise.all([__webpack_require__.e(\"webpack_sharing_consume_default_react\"), __webpack_require__.e(\"lib_index_js\")]).then(() => (() => ((__webpack_require__(/*! ./lib/index.js */ \"./lib/index.js\")))));\n\t},\n\t\"./style\": () => {\n\t\treturn Promise.all([__webpack_require__.e(\"vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854\"), __webpack_require__.e(\"style_index_js\")]).then(() => (() => ((__webpack_require__(/*! ./style/index.js */ \"./style/index.js\")))));\n\t}\n};\nvar get = (module, getScope) => {\n\t__webpack_require__.R = getScope;\n\tgetScope = (\n\t\t__webpack_require__.o(moduleMap, module)\n\t\t\t? moduleMap[module]()\n\t\t\t: Promise.resolve().then(() => {\n\t\t\t\tthrow new Error('Module \"' + module + '\" does not exist in container.');\n\t\t\t})\n\t);\n\t__webpack_require__.R = undefined;\n\treturn getScope;\n};\nvar init = (shareScope, initScope) => {\n\tif (!__webpack_require__.S) return;\n\tvar name = \"default\"\n\tvar oldScope = __webpack_require__.S[name];\n\tif(oldScope && oldScope !== shareScope) throw new Error(\"Container initialization failed as it has already been initialized with a different share scope\");\n\t__webpack_require__.S[name] = shareScope;\n\treturn __webpack_require__.I(name, initScope);\n};\n\n// This exports getters to disallow modifications\n__webpack_require__.d(exports, {\n\tget: () => (get),\n\tinit: () => (init)\n});","// The module cache\nvar __webpack_module_cache__ = {};\n\n// The require function\nfunction __webpack_require__(moduleId) {\n\t// Check if module is in cache\n\tvar cachedModule = __webpack_module_cache__[moduleId];\n\tif (cachedModule !== undefined) {\n\t\treturn cachedModule.exports;\n\t}\n\t// Create a new module (and put it into the cache)\n\tvar module = __webpack_module_cache__[moduleId] = {\n\t\tid: moduleId,\n\t\t// no module.loaded needed\n\t\texports: {}\n\t};\n\n\t// Execute the module function\n\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n\n\t// Return the exports of the module\n\treturn module.exports;\n}\n\n// expose the modules object (__webpack_modules__)\n__webpack_require__.m = __webpack_modules__;\n\n// expose the module cache\n__webpack_require__.c = __webpack_module_cache__;\n\n","// getDefaultExport function for compatibility with non-harmony modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};","// define getter functions for harmony exports\n__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n\t\t}\n\t}\n};","__webpack_require__.f = {};\n// This file contains only the entry chunk.\n// The chunk loading function for additional chunks\n__webpack_require__.e = (chunkId) => {\n\treturn Promise.all(Object.keys(__webpack_require__.f).reduce((promises, key) => {\n\t\t__webpack_require__.f[key](chunkId, promises);\n\t\treturn promises;\n\t}, []));\n};","// This function allow to reference async chunks\n__webpack_require__.u = (chunkId) => {\n\t// return url for filenames based on template\n\treturn \"\" + chunkId + \".\" + {\"webpack_sharing_consume_default_react\":\"19c51f7b56cfd16da3f9\",\"lib_index_js\":\"0c0187df9df8bc38b9c5\",\"vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854\":\"e09faf9ec3a764e40dc7\",\"style_index_js\":\"8d733cc8b74fabbd10b8\",\"vendors-node_modules_react-sanitized-html_lib_index_js\":\"500104f7c13c01fe1646\",\"webpack_sharing_consume_default_sanitize-html_sanitize-html\":\"635249bb6dc3884c24a3\",\"vendors-node_modules_sanitize-html_dist_sanitize-html_js\":\"825dbf94ec7371e0b28d\"}[chunkId] + \".js\";\n};","__webpack_require__.g = (function() {\n\tif (typeof globalThis === 'object') return globalThis;\n\ttry {\n\t\treturn this || new Function('return this')();\n\t} catch (e) {\n\t\tif (typeof window === 'object') return window;\n\t}\n})();","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","var inProgress = {};\nvar dataWebpackPrefix = \"neural_compressor_ext_lab:\";\n// loadScript function to load a script via script tag\n__webpack_require__.l = (url, done, key, chunkId) => {\n\tif(inProgress[url]) { inProgress[url].push(done); return; }\n\tvar script, needAttach;\n\tif(key !== undefined) {\n\t\tvar scripts = document.getElementsByTagName(\"script\");\n\t\tfor(var i = 0; i < scripts.length; i++) {\n\t\t\tvar s = scripts[i];\n\t\t\tif(s.getAttribute(\"src\") == url || s.getAttribute(\"data-webpack\") == dataWebpackPrefix + key) { script = s; break; }\n\t\t}\n\t}\n\tif(!script) {\n\t\tneedAttach = true;\n\t\tscript = document.createElement('script');\n\n\t\tscript.charset = 'utf-8';\n\t\tscript.timeout = 120;\n\t\tif (__webpack_require__.nc) {\n\t\t\tscript.setAttribute(\"nonce\", __webpack_require__.nc);\n\t\t}\n\t\tscript.setAttribute(\"data-webpack\", dataWebpackPrefix + key);\n\t\tscript.src = url;\n\t}\n\tinProgress[url] = [done];\n\tvar onScriptComplete = (prev, event) => {\n\t\t// avoid mem leaks in IE.\n\t\tscript.onerror = script.onload = null;\n\t\tclearTimeout(timeout);\n\t\tvar doneFns = inProgress[url];\n\t\tdelete inProgress[url];\n\t\tscript.parentNode && script.parentNode.removeChild(script);\n\t\tdoneFns && doneFns.forEach((fn) => (fn(event)));\n\t\tif(prev) return prev(event);\n\t}\n\t;\n\tvar timeout = setTimeout(onScriptComplete.bind(null, undefined, { type: 'timeout', target: script }), 120000);\n\tscript.onerror = onScriptComplete.bind(null, script.onerror);\n\tscript.onload = onScriptComplete.bind(null, script.onload);\n\tneedAttach && document.head.appendChild(script);\n};","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","__webpack_require__.S = {};\nvar initPromises = {};\nvar initTokens = {};\n__webpack_require__.I = (name, initScope) => {\n\tif(!initScope) initScope = [];\n\t// handling circular init calls\n\tvar initToken = initTokens[name];\n\tif(!initToken) initToken = initTokens[name] = {};\n\tif(initScope.indexOf(initToken) >= 0) return;\n\tinitScope.push(initToken);\n\t// only runs once\n\tif(initPromises[name]) return initPromises[name];\n\t// creates a new share scope if needed\n\tif(!__webpack_require__.o(__webpack_require__.S, name)) __webpack_require__.S[name] = {};\n\t// runs all init snippets from all modules reachable\n\tvar scope = __webpack_require__.S[name];\n\tvar warn = (msg) => (typeof console !== \"undefined\" && console.warn && console.warn(msg));\n\tvar uniqueName = \"neural_compressor_ext_lab\";\n\tvar register = (name, version, factory, eager) => {\n\t\tvar versions = scope[name] = scope[name] || {};\n\t\tvar activeVersion = versions[version];\n\t\tif(!activeVersion || (!activeVersion.loaded && (!eager != !activeVersion.eager ? eager : uniqueName > activeVersion.from))) versions[version] = { get: factory, from: uniqueName, eager: !!eager };\n\t};\n\tvar initExternal = (id) => {\n\t\tvar handleError = (err) => (warn(\"Initialization of sharing external failed: \" + err));\n\t\ttry {\n\t\t\tvar module = __webpack_require__(id);\n\t\t\tif(!module) return;\n\t\t\tvar initFn = (module) => (module && module.init && module.init(__webpack_require__.S[name], initScope))\n\t\t\tif(module.then) return promises.push(module.then(initFn, handleError));\n\t\t\tvar initResult = initFn(module);\n\t\t\tif(initResult && initResult.then) return promises.push(initResult['catch'](handleError));\n\t\t} catch(err) { handleError(err); }\n\t}\n\tvar promises = [];\n\tswitch(name) {\n\t\tcase \"default\": {\n\t\t\tregister(\"neural_compressor_ext_lab\", \"0.1.0\", () => (Promise.all([__webpack_require__.e(\"webpack_sharing_consume_default_react\"), __webpack_require__.e(\"lib_index_js\")]).then(() => (() => (__webpack_require__(/*! ./lib/index.js */ \"./lib/index.js\"))))));\n\t\t\tregister(\"react-sanitized-html\", \"2.0.0\", () => (Promise.all([__webpack_require__.e(\"vendors-node_modules_react-sanitized-html_lib_index_js\"), __webpack_require__.e(\"webpack_sharing_consume_default_sanitize-html_sanitize-html\"), __webpack_require__.e(\"webpack_sharing_consume_default_react\")]).then(() => (() => (__webpack_require__(/*! ./node_modules/react-sanitized-html/lib/index.js */ \"./node_modules/react-sanitized-html/lib/index.js\"))))));\n\t\t\tregister(\"sanitize-html\", \"1.27.5\", () => (__webpack_require__.e(\"vendors-node_modules_sanitize-html_dist_sanitize-html_js\").then(() => (() => (__webpack_require__(/*! ./node_modules/sanitize-html/dist/sanitize-html.js */ \"./node_modules/sanitize-html/dist/sanitize-html.js\"))))));\n\t\t}\n\t\tbreak;\n\t}\n\tif(!promises.length) return initPromises[name] = 1;\n\treturn initPromises[name] = Promise.all(promises).then(() => (initPromises[name] = 1));\n};","var scriptUrl;\nif (__webpack_require__.g.importScripts) scriptUrl = __webpack_require__.g.location + \"\";\nvar document = __webpack_require__.g.document;\nif (!scriptUrl && document) {\n\tif (document.currentScript)\n\t\tscriptUrl = document.currentScript.src\n\tif (!scriptUrl) {\n\t\tvar scripts = document.getElementsByTagName(\"script\");\n\t\tif(scripts.length) scriptUrl = scripts[scripts.length - 1].src\n\t}\n}\n// When supporting browsers where an automatic publicPath is not supported you must specify an output.publicPath manually via configuration\n// or pass an empty string (\"\") and set the __webpack_public_path__ variable from your code to use your own logic.\nif (!scriptUrl) throw new Error(\"Automatic publicPath is not supported in this browser\");\nscriptUrl = scriptUrl.replace(/#.*$/, \"\").replace(/\\?.*$/, \"\").replace(/\\/[^\\/]+$/, \"/\");\n__webpack_require__.p = scriptUrl;","var parseVersion = (str) => {\n\t// see webpack/lib/util/semver.js for original code\n\tvar p=p=>{return p.split(\".\").map((p=>{return+p==p?+p:p}))},n=/^([^-+]+)?(?:-([^+]+))?(?:\\+(.+))?$/.exec(str),r=n[1]?p(n[1]):[];return n[2]&&(r.length++,r.push.apply(r,p(n[2]))),n[3]&&(r.push([]),r.push.apply(r,p(n[3]))),r;\n}\nvar versionLt = (a, b) => {\n\t// see webpack/lib/util/semver.js for original code\n\ta=parseVersion(a),b=parseVersion(b);for(var r=0;;){if(r>=a.length)return r=b.length)return\"u\"==n;var t=b[r],f=(typeof t)[0];if(n!=f)return\"o\"==n&&\"n\"==f||(\"s\"==f||\"u\"==n);if(\"o\"!=n&&\"u\"!=n&&e!=t)return e {\n\t// see webpack/lib/util/semver.js for original code\n\tvar r=range[0],n=\"\";if(1===range.length)return\"*\";if(r+.5){n+=0==r?\">=\":-1==r?\"<\":1==r?\"^\":2==r?\"~\":r>0?\"=\":\"!=\";for(var e=1,a=1;a0?\".\":\"\")+(e=2,t)}return n}var g=[];for(a=1;a {\n\t// see webpack/lib/util/semver.js for original code\n\tif(0 in range){version=parseVersion(version);var e=range[0],r=e<0;r&&(e=-e-1);for(var n=0,i=1,a=!0;;i++,n++){var f,s,g=i=version.length||\"o\"==(s=(typeof(f=version[n]))[0]))return!a||(\"u\"==g?i>e&&!r:\"\"==g!=r);if(\"u\"==s){if(!a||\"u\"!=g)return!1}else if(a)if(g==s)if(i<=e){if(f!=range[i])return!1}else{if(r?f>range[i]:f {\n\tvar scope = __webpack_require__.S[scopeName];\n\tif(!scope || !__webpack_require__.o(scope, key)) throw new Error(\"Shared module \" + key + \" doesn't exist in shared scope \" + scopeName);\n\treturn scope;\n};\nvar findVersion = (scope, key) => {\n\tvar versions = scope[key];\n\tvar key = Object.keys(versions).reduce((a, b) => {\n\t\treturn !a || versionLt(a, b) ? b : a;\n\t}, 0);\n\treturn key && versions[key]\n};\nvar findSingletonVersionKey = (scope, key) => {\n\tvar versions = scope[key];\n\treturn Object.keys(versions).reduce((a, b) => {\n\t\treturn !a || (!versions[a].loaded && versionLt(a, b)) ? b : a;\n\t}, 0);\n};\nvar getInvalidSingletonVersionMessage = (scope, key, version, requiredVersion) => {\n\treturn \"Unsatisfied version \" + version + \" from \" + (version && scope[key][version].from) + \" of shared singleton module \" + key + \" (required \" + rangeToString(requiredVersion) + \")\"\n};\nvar getSingleton = (scope, scopeName, key, requiredVersion) => {\n\tvar version = findSingletonVersionKey(scope, key);\n\treturn get(scope[key][version]);\n};\nvar getSingletonVersion = (scope, scopeName, key, requiredVersion) => {\n\tvar version = findSingletonVersionKey(scope, key);\n\tif (!satisfy(requiredVersion, version)) typeof console !== \"undefined\" && console.warn && console.warn(getInvalidSingletonVersionMessage(scope, key, version, requiredVersion));\n\treturn get(scope[key][version]);\n};\nvar getStrictSingletonVersion = (scope, scopeName, key, requiredVersion) => {\n\tvar version = findSingletonVersionKey(scope, key);\n\tif (!satisfy(requiredVersion, version)) throw new Error(getInvalidSingletonVersionMessage(scope, key, version, requiredVersion));\n\treturn get(scope[key][version]);\n};\nvar findValidVersion = (scope, key, requiredVersion) => {\n\tvar versions = scope[key];\n\tvar key = Object.keys(versions).reduce((a, b) => {\n\t\tif (!satisfy(requiredVersion, b)) return a;\n\t\treturn !a || versionLt(a, b) ? b : a;\n\t}, 0);\n\treturn key && versions[key]\n};\nvar getInvalidVersionMessage = (scope, scopeName, key, requiredVersion) => {\n\tvar versions = scope[key];\n\treturn \"No satisfying version (\" + rangeToString(requiredVersion) + \") of shared module \" + key + \" found in shared scope \" + scopeName + \".\\n\" +\n\t\t\"Available versions: \" + Object.keys(versions).map((key) => {\n\t\treturn key + \" from \" + versions[key].from;\n\t}).join(\", \");\n};\nvar getValidVersion = (scope, scopeName, key, requiredVersion) => {\n\tvar entry = findValidVersion(scope, key, requiredVersion);\n\tif(entry) return get(entry);\n\tthrow new Error(getInvalidVersionMessage(scope, scopeName, key, requiredVersion));\n};\nvar warnInvalidVersion = (scope, scopeName, key, requiredVersion) => {\n\ttypeof console !== \"undefined\" && console.warn && console.warn(getInvalidVersionMessage(scope, scopeName, key, requiredVersion));\n};\nvar get = (entry) => {\n\tentry.loaded = 1;\n\treturn entry.get()\n};\nvar init = (fn) => (function(scopeName, a, b, c) {\n\tvar promise = __webpack_require__.I(scopeName);\n\tif (promise && promise.then) return promise.then(fn.bind(fn, scopeName, __webpack_require__.S[scopeName], a, b, c));\n\treturn fn(scopeName, __webpack_require__.S[scopeName], a, b, c);\n});\n\nvar load = /*#__PURE__*/ init((scopeName, scope, key) => {\n\tensureExistence(scopeName, key);\n\treturn get(findVersion(scope, key));\n});\nvar loadFallback = /*#__PURE__*/ init((scopeName, scope, key, fallback) => {\n\treturn scope && __webpack_require__.o(scope, key) ? get(findVersion(scope, key)) : fallback();\n});\nvar loadVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn get(findValidVersion(scope, key, version) || warnInvalidVersion(scope, scopeName, key, version) || findVersion(scope, key));\n});\nvar loadSingleton = /*#__PURE__*/ init((scopeName, scope, key) => {\n\tensureExistence(scopeName, key);\n\treturn getSingleton(scope, scopeName, key);\n});\nvar loadSingletonVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn getSingletonVersion(scope, scopeName, key, version);\n});\nvar loadStrictVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn getValidVersion(scope, scopeName, key, version);\n});\nvar loadStrictSingletonVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn getStrictSingletonVersion(scope, scopeName, key, version);\n});\nvar loadVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn get(findValidVersion(scope, key, version) || warnInvalidVersion(scope, scopeName, key, version) || findVersion(scope, key));\n});\nvar loadSingletonFallback = /*#__PURE__*/ init((scopeName, scope, key, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn getSingleton(scope, scopeName, key);\n});\nvar loadSingletonVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn getSingletonVersion(scope, scopeName, key, version);\n});\nvar loadStrictVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tvar entry = scope && __webpack_require__.o(scope, key) && findValidVersion(scope, key, version);\n\treturn entry ? get(entry) : fallback();\n});\nvar loadStrictSingletonVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn getStrictSingletonVersion(scope, scopeName, key, version);\n});\nvar installedModules = {};\nvar moduleToHandlerMapping = {\n\t\"webpack/sharing/consume/default/react\": () => (loadSingletonVersionCheck(\"default\", \"react\", [1,17,0,1])),\n\t\"webpack/sharing/consume/default/@jupyterlab/notebook\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/notebook\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/apputils\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/apputils\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/settingregistry\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/settingregistry\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/mainmenu\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/mainmenu\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/ui-components\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/ui-components\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@lumino/widgets\": () => (loadSingletonVersionCheck(\"default\", \"@lumino/widgets\", [1,1,33,0])),\n\t\"webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html\": () => (loadStrictVersionCheckFallback(\"default\", \"react-sanitized-html\", [1,2,0,0], () => (Promise.all([__webpack_require__.e(\"vendors-node_modules_react-sanitized-html_lib_index_js\"), __webpack_require__.e(\"webpack_sharing_consume_default_sanitize-html_sanitize-html\")]).then(() => (() => (__webpack_require__(/*! react-sanitized-html */ \"./node_modules/react-sanitized-html/lib/index.js\"))))))),\n\t\"webpack/sharing/consume/default/sanitize-html/sanitize-html\": () => (loadStrictVersionCheckFallback(\"default\", \"sanitize-html\", [1,1,16,1], () => (__webpack_require__.e(\"vendors-node_modules_sanitize-html_dist_sanitize-html_js\").then(() => (() => (__webpack_require__(/*! sanitize-html */ \"./node_modules/sanitize-html/dist/sanitize-html.js\")))))))\n};\n// no consumes in initial chunks\nvar chunkMapping = {\n\t\"webpack_sharing_consume_default_react\": [\n\t\t\"webpack/sharing/consume/default/react\"\n\t],\n\t\"lib_index_js\": [\n\t\t\"webpack/sharing/consume/default/@jupyterlab/notebook\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/apputils\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/settingregistry\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/mainmenu\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/ui-components\",\n\t\t\"webpack/sharing/consume/default/@lumino/widgets\",\n\t\t\"webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html\"\n\t],\n\t\"webpack_sharing_consume_default_sanitize-html_sanitize-html\": [\n\t\t\"webpack/sharing/consume/default/sanitize-html/sanitize-html\"\n\t]\n};\n__webpack_require__.f.consumes = (chunkId, promises) => {\n\tif(__webpack_require__.o(chunkMapping, chunkId)) {\n\t\tchunkMapping[chunkId].forEach((id) => {\n\t\t\tif(__webpack_require__.o(installedModules, id)) return promises.push(installedModules[id]);\n\t\t\tvar onFactory = (factory) => {\n\t\t\t\tinstalledModules[id] = 0;\n\t\t\t\t__webpack_require__.m[id] = (module) => {\n\t\t\t\t\tdelete __webpack_require__.c[id];\n\t\t\t\t\tmodule.exports = factory();\n\t\t\t\t}\n\t\t\t};\n\t\t\tvar onError = (error) => {\n\t\t\t\tdelete installedModules[id];\n\t\t\t\t__webpack_require__.m[id] = (module) => {\n\t\t\t\t\tdelete __webpack_require__.c[id];\n\t\t\t\t\tthrow error;\n\t\t\t\t}\n\t\t\t};\n\t\t\ttry {\n\t\t\t\tvar promise = moduleToHandlerMapping[id]();\n\t\t\t\tif(promise.then) {\n\t\t\t\t\tpromises.push(installedModules[id] = promise.then(onFactory)['catch'](onError));\n\t\t\t\t} else onFactory(promise);\n\t\t\t} catch(e) { onError(e); }\n\t\t});\n\t}\n}","__webpack_require__.b = document.baseURI || self.location.href;\n\n// object to store loaded and loading chunks\n// undefined = chunk not loaded, null = chunk preloaded/prefetched\n// [resolve, reject, Promise] = chunk loading, 0 = chunk loaded\nvar installedChunks = {\n\t\"neural_compressor_ext_lab\": 0\n};\n\n__webpack_require__.f.j = (chunkId, promises) => {\n\t\t// JSONP chunk loading for javascript\n\t\tvar installedChunkData = __webpack_require__.o(installedChunks, chunkId) ? installedChunks[chunkId] : undefined;\n\t\tif(installedChunkData !== 0) { // 0 means \"already installed\".\n\n\t\t\t// a Promise means \"currently loading\".\n\t\t\tif(installedChunkData) {\n\t\t\t\tpromises.push(installedChunkData[2]);\n\t\t\t} else {\n\t\t\t\tif(!/^webpack_sharing_consume_default_(react|sanitize\\-html_sanitize\\-html)$/.test(chunkId)) {\n\t\t\t\t\t// setup Promise in chunk cache\n\t\t\t\t\tvar promise = new Promise((resolve, reject) => (installedChunkData = installedChunks[chunkId] = [resolve, reject]));\n\t\t\t\t\tpromises.push(installedChunkData[2] = promise);\n\n\t\t\t\t\t// start chunk loading\n\t\t\t\t\tvar url = __webpack_require__.p + __webpack_require__.u(chunkId);\n\t\t\t\t\t// create error before stack unwound to get useful stacktrace later\n\t\t\t\t\tvar error = new Error();\n\t\t\t\t\tvar loadingEnded = (event) => {\n\t\t\t\t\t\tif(__webpack_require__.o(installedChunks, chunkId)) {\n\t\t\t\t\t\t\tinstalledChunkData = installedChunks[chunkId];\n\t\t\t\t\t\t\tif(installedChunkData !== 0) installedChunks[chunkId] = undefined;\n\t\t\t\t\t\t\tif(installedChunkData) {\n\t\t\t\t\t\t\t\tvar errorType = event && (event.type === 'load' ? 'missing' : event.type);\n\t\t\t\t\t\t\t\tvar realSrc = event && event.target && event.target.src;\n\t\t\t\t\t\t\t\terror.message = 'Loading chunk ' + chunkId + ' failed.\\n(' + errorType + ': ' + realSrc + ')';\n\t\t\t\t\t\t\t\terror.name = 'ChunkLoadError';\n\t\t\t\t\t\t\t\terror.type = errorType;\n\t\t\t\t\t\t\t\terror.request = realSrc;\n\t\t\t\t\t\t\t\tinstalledChunkData[1](error);\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t};\n\t\t\t\t\t__webpack_require__.l(url, loadingEnded, \"chunk-\" + chunkId, chunkId);\n\t\t\t\t} else installedChunks[chunkId] = 0;\n\t\t\t}\n\t\t}\n};\n\n// no prefetching\n\n// no preloaded\n\n// no HMR\n\n// no HMR manifest\n\n// no on chunks loaded\n\n// install a JSONP callback for chunk loading\nvar webpackJsonpCallback = (parentChunkLoadingFunction, data) => {\n\tvar [chunkIds, moreModules, runtime] = data;\n\t// add \"moreModules\" to the modules object,\n\t// then flag all \"chunkIds\" as loaded and fire callback\n\tvar moduleId, chunkId, i = 0;\n\tif(chunkIds.some((id) => (installedChunks[id] !== 0))) {\n\t\tfor(moduleId in moreModules) {\n\t\t\tif(__webpack_require__.o(moreModules, moduleId)) {\n\t\t\t\t__webpack_require__.m[moduleId] = moreModules[moduleId];\n\t\t\t}\n\t\t}\n\t\tif(runtime) var result = runtime(__webpack_require__);\n\t}\n\tif(parentChunkLoadingFunction) parentChunkLoadingFunction(data);\n\tfor(;i < chunkIds.length; i++) {\n\t\tchunkId = chunkIds[i];\n\t\tif(__webpack_require__.o(installedChunks, chunkId) && installedChunks[chunkId]) {\n\t\t\tinstalledChunks[chunkId][0]();\n\t\t}\n\t\tinstalledChunks[chunkId] = 0;\n\t}\n\n}\n\nvar chunkLoadingGlobal = self[\"webpackChunkneural_compressor_ext_lab\"] = self[\"webpackChunkneural_compressor_ext_lab\"] || [];\nchunkLoadingGlobal.forEach(webpackJsonpCallback.bind(null, 0));\nchunkLoadingGlobal.push = webpackJsonpCallback.bind(null, chunkLoadingGlobal.push.bind(chunkLoadingGlobal));","__webpack_require__.nc = undefined;","","// module cache are used so entry inlining is disabled\n// startup\n// Load entry module and return exports\nvar __webpack_exports__ = __webpack_require__(\"webpack/container/entry/neural_compressor_ext_lab\");\n",""],"names":[],"sourceRoot":""}
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.e241a62ffe7f3e40b1d0.js.map b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.e241a62ffe7f3e40b1d0.js.map
deleted file mode 100644
index 7bff25b9d23..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/remoteEntry.e241a62ffe7f3e40b1d0.js.map
+++ /dev/null
@@ -1 +0,0 @@
-{"version":3,"file":"remoteEntry.e241a62ffe7f3e40b1d0.js","mappings":";;;;;;;;;;;AAAA;AACA;AACA;AACA,EAAE;AACF;AACA;AACA,EAAE;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI;AACJ;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;;AAEA;AACA;AACA;AACA;AACA,CAAC;;;;;;UCpCD;UACA;;UAEA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;UACA;;UAEA;UACA;;UAEA;UACA;UACA;;UAEA;UACA;;UAEA;UACA;;;;;WC5BA;WACA;WACA;WACA;WACA;WACA,iCAAiC,WAAW;WAC5C;WACA;;;;;WCPA;WACA;WACA;WACA;WACA,yCAAyC,wCAAwC;WACjF;WACA;WACA;;;;;WCPA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;;;;;WCRA;WACA;WACA;WACA,8BAA8B,kgBAAkgB;WAChiB;;;;;WCJA;WACA;WACA;WACA;WACA,GAAG;WACH;WACA;WACA,CAAC;;;;;WCPD;;;;;WCAA;WACA;WACA;WACA;WACA,uBAAuB,4BAA4B;WACnD;WACA;WACA;WACA,iBAAiB,oBAAoB;WACrC;WACA,mGAAmG,YAAY;WAC/G;WACA;WACA;WACA;WACA;;WAEA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,mEAAmE,iCAAiC;WACpG;WACA;WACA;WACA;;;;;WCzCA;WACA;WACA;WACA,uDAAuD,iBAAiB;WACxE;WACA,gDAAgD,aAAa;WAC7D;;;;;WCNA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,oJAAoJ;WACpJ;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,IAAI,aAAa;WACjB;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;;;;;WC7CA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;;;;;WCfA;WACA;WACA,WAAW,6BAA6B,iBAAiB,GAAG,qEAAqE;WACjI;WACA;WACA;WACA,qCAAqC,aAAa,EAAE,wDAAwD,2BAA2B,4BAA4B,2BAA2B,+CAA+C,mCAAmC;WAChR;WACA;WACA;WACA,qBAAqB,8BAA8B,SAAS,sDAAsD,gBAAgB,eAAe,KAAK,6DAA6D,SAAS,SAAS,QAAQ,eAAe,KAAK,eAAe,qGAAqG,WAAW,aAAa;WAC7Y;WACA;WACA;WACA,gBAAgB,8BAA8B,qBAAqB,YAAY,sBAAsB,SAAS,iDAAiD,6FAA6F,WAAW,uBAAuB,2BAA2B,wBAAwB,KAAK,oCAAoC,oBAAoB,wBAAwB,oBAAoB,SAAS,KAAK,yBAAyB,KAAK,gCAAgC,yBAAyB,QAAQ,eAAe,KAAK,eAAe,4DAA4D;WACtoB;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA;WACA,EAAE;WACF;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,CAAC;;WAED;WACA;WACA;WACA,CAAC;WACD;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA,CAAC;WACD;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,MAAM;WACN,KAAK,WAAW;WAChB,GAAG;WACH;WACA;;;;;WC3LA;;WAEA;WACA;WACA;WACA;WACA;WACA;;WAEA;WACA;WACA;WACA,iCAAiC;;WAEjC;WACA;WACA;WACA,KAAK;WACL;WACA;WACA;WACA;;WAEA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,MAAM;WACN;WACA;WACA;;WAEA;;WAEA;;WAEA;;WAEA;;WAEA;;WAEA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA;WACA,MAAM,qBAAqB;WAC3B;WACA;WACA;WACA;WACA;WACA;;WAEA;;WAEA;WACA;WACA;;;;;WCrFA;;;;;UEAA;UACA;UACA;UACA","sources":["webpack://neural_compressor_ext_lab/webpack/container-entry","webpack://neural_compressor_ext_lab/webpack/bootstrap","webpack://neural_compressor_ext_lab/webpack/runtime/compat get default export","webpack://neural_compressor_ext_lab/webpack/runtime/define property getters","webpack://neural_compressor_ext_lab/webpack/runtime/ensure chunk","webpack://neural_compressor_ext_lab/webpack/runtime/get javascript chunk filename","webpack://neural_compressor_ext_lab/webpack/runtime/global","webpack://neural_compressor_ext_lab/webpack/runtime/hasOwnProperty shorthand","webpack://neural_compressor_ext_lab/webpack/runtime/load script","webpack://neural_compressor_ext_lab/webpack/runtime/make namespace object","webpack://neural_compressor_ext_lab/webpack/runtime/sharing","webpack://neural_compressor_ext_lab/webpack/runtime/publicPath","webpack://neural_compressor_ext_lab/webpack/runtime/consumes","webpack://neural_compressor_ext_lab/webpack/runtime/jsonp chunk loading","webpack://neural_compressor_ext_lab/webpack/runtime/nonce","webpack://neural_compressor_ext_lab/webpack/before-startup","webpack://neural_compressor_ext_lab/webpack/startup","webpack://neural_compressor_ext_lab/webpack/after-startup"],"sourcesContent":["var moduleMap = {\n\t\"./index\": () => {\n\t\treturn Promise.all([__webpack_require__.e(\"webpack_sharing_consume_default_react\"), __webpack_require__.e(\"lib_index_js\")]).then(() => (() => ((__webpack_require__(/*! ./lib/index.js */ \"./lib/index.js\")))));\n\t},\n\t\"./extension\": () => {\n\t\treturn Promise.all([__webpack_require__.e(\"webpack_sharing_consume_default_react\"), __webpack_require__.e(\"lib_index_js\")]).then(() => (() => ((__webpack_require__(/*! ./lib/index.js */ \"./lib/index.js\")))));\n\t},\n\t\"./style\": () => {\n\t\treturn Promise.all([__webpack_require__.e(\"vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854\"), __webpack_require__.e(\"style_index_js\")]).then(() => (() => ((__webpack_require__(/*! ./style/index.js */ \"./style/index.js\")))));\n\t}\n};\nvar get = (module, getScope) => {\n\t__webpack_require__.R = getScope;\n\tgetScope = (\n\t\t__webpack_require__.o(moduleMap, module)\n\t\t\t? moduleMap[module]()\n\t\t\t: Promise.resolve().then(() => {\n\t\t\t\tthrow new Error('Module \"' + module + '\" does not exist in container.');\n\t\t\t})\n\t);\n\t__webpack_require__.R = undefined;\n\treturn getScope;\n};\nvar init = (shareScope, initScope) => {\n\tif (!__webpack_require__.S) return;\n\tvar name = \"default\"\n\tvar oldScope = __webpack_require__.S[name];\n\tif(oldScope && oldScope !== shareScope) throw new Error(\"Container initialization failed as it has already been initialized with a different share scope\");\n\t__webpack_require__.S[name] = shareScope;\n\treturn __webpack_require__.I(name, initScope);\n};\n\n// This exports getters to disallow modifications\n__webpack_require__.d(exports, {\n\tget: () => (get),\n\tinit: () => (init)\n});","// The module cache\nvar __webpack_module_cache__ = {};\n\n// The require function\nfunction __webpack_require__(moduleId) {\n\t// Check if module is in cache\n\tvar cachedModule = __webpack_module_cache__[moduleId];\n\tif (cachedModule !== undefined) {\n\t\treturn cachedModule.exports;\n\t}\n\t// Create a new module (and put it into the cache)\n\tvar module = __webpack_module_cache__[moduleId] = {\n\t\tid: moduleId,\n\t\t// no module.loaded needed\n\t\texports: {}\n\t};\n\n\t// Execute the module function\n\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n\n\t// Return the exports of the module\n\treturn module.exports;\n}\n\n// expose the modules object (__webpack_modules__)\n__webpack_require__.m = __webpack_modules__;\n\n// expose the module cache\n__webpack_require__.c = __webpack_module_cache__;\n\n","// getDefaultExport function for compatibility with non-harmony modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};","// define getter functions for harmony exports\n__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n\t\t}\n\t}\n};","__webpack_require__.f = {};\n// This file contains only the entry chunk.\n// The chunk loading function for additional chunks\n__webpack_require__.e = (chunkId) => {\n\treturn Promise.all(Object.keys(__webpack_require__.f).reduce((promises, key) => {\n\t\t__webpack_require__.f[key](chunkId, promises);\n\t\treturn promises;\n\t}, []));\n};","// This function allow to reference async chunks\n__webpack_require__.u = (chunkId) => {\n\t// return url for filenames based on template\n\treturn \"\" + chunkId + \".\" + {\"webpack_sharing_consume_default_react\":\"19c51f7b56cfd16da3f9\",\"lib_index_js\":\"2c3b18119886a0a82200\",\"vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854\":\"e09faf9ec3a764e40dc7\",\"style_index_js\":\"8d733cc8b74fabbd10b8\",\"vendors-node_modules_react-sanitized-html_lib_index_js\":\"500104f7c13c01fe1646\",\"webpack_sharing_consume_default_sanitize-html_sanitize-html\":\"635249bb6dc3884c24a3\",\"vendors-node_modules_sanitize-html_dist_sanitize-html_js\":\"825dbf94ec7371e0b28d\"}[chunkId] + \".js\";\n};","__webpack_require__.g = (function() {\n\tif (typeof globalThis === 'object') return globalThis;\n\ttry {\n\t\treturn this || new Function('return this')();\n\t} catch (e) {\n\t\tif (typeof window === 'object') return window;\n\t}\n})();","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","var inProgress = {};\nvar dataWebpackPrefix = \"neural_compressor_ext_lab:\";\n// loadScript function to load a script via script tag\n__webpack_require__.l = (url, done, key, chunkId) => {\n\tif(inProgress[url]) { inProgress[url].push(done); return; }\n\tvar script, needAttach;\n\tif(key !== undefined) {\n\t\tvar scripts = document.getElementsByTagName(\"script\");\n\t\tfor(var i = 0; i < scripts.length; i++) {\n\t\t\tvar s = scripts[i];\n\t\t\tif(s.getAttribute(\"src\") == url || s.getAttribute(\"data-webpack\") == dataWebpackPrefix + key) { script = s; break; }\n\t\t}\n\t}\n\tif(!script) {\n\t\tneedAttach = true;\n\t\tscript = document.createElement('script');\n\n\t\tscript.charset = 'utf-8';\n\t\tscript.timeout = 120;\n\t\tif (__webpack_require__.nc) {\n\t\t\tscript.setAttribute(\"nonce\", __webpack_require__.nc);\n\t\t}\n\t\tscript.setAttribute(\"data-webpack\", dataWebpackPrefix + key);\n\t\tscript.src = url;\n\t}\n\tinProgress[url] = [done];\n\tvar onScriptComplete = (prev, event) => {\n\t\t// avoid mem leaks in IE.\n\t\tscript.onerror = script.onload = null;\n\t\tclearTimeout(timeout);\n\t\tvar doneFns = inProgress[url];\n\t\tdelete inProgress[url];\n\t\tscript.parentNode && script.parentNode.removeChild(script);\n\t\tdoneFns && doneFns.forEach((fn) => (fn(event)));\n\t\tif(prev) return prev(event);\n\t}\n\t;\n\tvar timeout = setTimeout(onScriptComplete.bind(null, undefined, { type: 'timeout', target: script }), 120000);\n\tscript.onerror = onScriptComplete.bind(null, script.onerror);\n\tscript.onload = onScriptComplete.bind(null, script.onload);\n\tneedAttach && document.head.appendChild(script);\n};","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","__webpack_require__.S = {};\nvar initPromises = {};\nvar initTokens = {};\n__webpack_require__.I = (name, initScope) => {\n\tif(!initScope) initScope = [];\n\t// handling circular init calls\n\tvar initToken = initTokens[name];\n\tif(!initToken) initToken = initTokens[name] = {};\n\tif(initScope.indexOf(initToken) >= 0) return;\n\tinitScope.push(initToken);\n\t// only runs once\n\tif(initPromises[name]) return initPromises[name];\n\t// creates a new share scope if needed\n\tif(!__webpack_require__.o(__webpack_require__.S, name)) __webpack_require__.S[name] = {};\n\t// runs all init snippets from all modules reachable\n\tvar scope = __webpack_require__.S[name];\n\tvar warn = (msg) => (typeof console !== \"undefined\" && console.warn && console.warn(msg));\n\tvar uniqueName = \"neural_compressor_ext_lab\";\n\tvar register = (name, version, factory, eager) => {\n\t\tvar versions = scope[name] = scope[name] || {};\n\t\tvar activeVersion = versions[version];\n\t\tif(!activeVersion || (!activeVersion.loaded && (!eager != !activeVersion.eager ? eager : uniqueName > activeVersion.from))) versions[version] = { get: factory, from: uniqueName, eager: !!eager };\n\t};\n\tvar initExternal = (id) => {\n\t\tvar handleError = (err) => (warn(\"Initialization of sharing external failed: \" + err));\n\t\ttry {\n\t\t\tvar module = __webpack_require__(id);\n\t\t\tif(!module) return;\n\t\t\tvar initFn = (module) => (module && module.init && module.init(__webpack_require__.S[name], initScope))\n\t\t\tif(module.then) return promises.push(module.then(initFn, handleError));\n\t\t\tvar initResult = initFn(module);\n\t\t\tif(initResult && initResult.then) return promises.push(initResult['catch'](handleError));\n\t\t} catch(err) { handleError(err); }\n\t}\n\tvar promises = [];\n\tswitch(name) {\n\t\tcase \"default\": {\n\t\t\tregister(\"neural_compressor_ext_lab\", \"0.1.0\", () => (Promise.all([__webpack_require__.e(\"webpack_sharing_consume_default_react\"), __webpack_require__.e(\"lib_index_js\")]).then(() => (() => (__webpack_require__(/*! ./lib/index.js */ \"./lib/index.js\"))))));\n\t\t\tregister(\"react-sanitized-html\", \"2.0.0\", () => (Promise.all([__webpack_require__.e(\"vendors-node_modules_react-sanitized-html_lib_index_js\"), __webpack_require__.e(\"webpack_sharing_consume_default_sanitize-html_sanitize-html\"), __webpack_require__.e(\"webpack_sharing_consume_default_react\")]).then(() => (() => (__webpack_require__(/*! ./node_modules/react-sanitized-html/lib/index.js */ \"./node_modules/react-sanitized-html/lib/index.js\"))))));\n\t\t\tregister(\"sanitize-html\", \"1.27.5\", () => (__webpack_require__.e(\"vendors-node_modules_sanitize-html_dist_sanitize-html_js\").then(() => (() => (__webpack_require__(/*! ./node_modules/sanitize-html/dist/sanitize-html.js */ \"./node_modules/sanitize-html/dist/sanitize-html.js\"))))));\n\t\t}\n\t\tbreak;\n\t}\n\tif(!promises.length) return initPromises[name] = 1;\n\treturn initPromises[name] = Promise.all(promises).then(() => (initPromises[name] = 1));\n};","var scriptUrl;\nif (__webpack_require__.g.importScripts) scriptUrl = __webpack_require__.g.location + \"\";\nvar document = __webpack_require__.g.document;\nif (!scriptUrl && document) {\n\tif (document.currentScript)\n\t\tscriptUrl = document.currentScript.src\n\tif (!scriptUrl) {\n\t\tvar scripts = document.getElementsByTagName(\"script\");\n\t\tif(scripts.length) scriptUrl = scripts[scripts.length - 1].src\n\t}\n}\n// When supporting browsers where an automatic publicPath is not supported you must specify an output.publicPath manually via configuration\n// or pass an empty string (\"\") and set the __webpack_public_path__ variable from your code to use your own logic.\nif (!scriptUrl) throw new Error(\"Automatic publicPath is not supported in this browser\");\nscriptUrl = scriptUrl.replace(/#.*$/, \"\").replace(/\\?.*$/, \"\").replace(/\\/[^\\/]+$/, \"/\");\n__webpack_require__.p = scriptUrl;","var parseVersion = (str) => {\n\t// see webpack/lib/util/semver.js for original code\n\tvar p=p=>{return p.split(\".\").map((p=>{return+p==p?+p:p}))},n=/^([^-+]+)?(?:-([^+]+))?(?:\\+(.+))?$/.exec(str),r=n[1]?p(n[1]):[];return n[2]&&(r.length++,r.push.apply(r,p(n[2]))),n[3]&&(r.push([]),r.push.apply(r,p(n[3]))),r;\n}\nvar versionLt = (a, b) => {\n\t// see webpack/lib/util/semver.js for original code\n\ta=parseVersion(a),b=parseVersion(b);for(var r=0;;){if(r>=a.length)return r=b.length)return\"u\"==n;var t=b[r],f=(typeof t)[0];if(n!=f)return\"o\"==n&&\"n\"==f||(\"s\"==f||\"u\"==n);if(\"o\"!=n&&\"u\"!=n&&e!=t)return e {\n\t// see webpack/lib/util/semver.js for original code\n\tvar r=range[0],n=\"\";if(1===range.length)return\"*\";if(r+.5){n+=0==r?\">=\":-1==r?\"<\":1==r?\"^\":2==r?\"~\":r>0?\"=\":\"!=\";for(var e=1,a=1;a0?\".\":\"\")+(e=2,t)}return n}var g=[];for(a=1;a {\n\t// see webpack/lib/util/semver.js for original code\n\tif(0 in range){version=parseVersion(version);var e=range[0],r=e<0;r&&(e=-e-1);for(var n=0,i=1,a=!0;;i++,n++){var f,s,g=i=version.length||\"o\"==(s=(typeof(f=version[n]))[0]))return!a||(\"u\"==g?i>e&&!r:\"\"==g!=r);if(\"u\"==s){if(!a||\"u\"!=g)return!1}else if(a)if(g==s)if(i<=e){if(f!=range[i])return!1}else{if(r?f>range[i]:f {\n\tvar scope = __webpack_require__.S[scopeName];\n\tif(!scope || !__webpack_require__.o(scope, key)) throw new Error(\"Shared module \" + key + \" doesn't exist in shared scope \" + scopeName);\n\treturn scope;\n};\nvar findVersion = (scope, key) => {\n\tvar versions = scope[key];\n\tvar key = Object.keys(versions).reduce((a, b) => {\n\t\treturn !a || versionLt(a, b) ? b : a;\n\t}, 0);\n\treturn key && versions[key]\n};\nvar findSingletonVersionKey = (scope, key) => {\n\tvar versions = scope[key];\n\treturn Object.keys(versions).reduce((a, b) => {\n\t\treturn !a || (!versions[a].loaded && versionLt(a, b)) ? b : a;\n\t}, 0);\n};\nvar getInvalidSingletonVersionMessage = (scope, key, version, requiredVersion) => {\n\treturn \"Unsatisfied version \" + version + \" from \" + (version && scope[key][version].from) + \" of shared singleton module \" + key + \" (required \" + rangeToString(requiredVersion) + \")\"\n};\nvar getSingleton = (scope, scopeName, key, requiredVersion) => {\n\tvar version = findSingletonVersionKey(scope, key);\n\treturn get(scope[key][version]);\n};\nvar getSingletonVersion = (scope, scopeName, key, requiredVersion) => {\n\tvar version = findSingletonVersionKey(scope, key);\n\tif (!satisfy(requiredVersion, version)) typeof console !== \"undefined\" && console.warn && console.warn(getInvalidSingletonVersionMessage(scope, key, version, requiredVersion));\n\treturn get(scope[key][version]);\n};\nvar getStrictSingletonVersion = (scope, scopeName, key, requiredVersion) => {\n\tvar version = findSingletonVersionKey(scope, key);\n\tif (!satisfy(requiredVersion, version)) throw new Error(getInvalidSingletonVersionMessage(scope, key, version, requiredVersion));\n\treturn get(scope[key][version]);\n};\nvar findValidVersion = (scope, key, requiredVersion) => {\n\tvar versions = scope[key];\n\tvar key = Object.keys(versions).reduce((a, b) => {\n\t\tif (!satisfy(requiredVersion, b)) return a;\n\t\treturn !a || versionLt(a, b) ? b : a;\n\t}, 0);\n\treturn key && versions[key]\n};\nvar getInvalidVersionMessage = (scope, scopeName, key, requiredVersion) => {\n\tvar versions = scope[key];\n\treturn \"No satisfying version (\" + rangeToString(requiredVersion) + \") of shared module \" + key + \" found in shared scope \" + scopeName + \".\\n\" +\n\t\t\"Available versions: \" + Object.keys(versions).map((key) => {\n\t\treturn key + \" from \" + versions[key].from;\n\t}).join(\", \");\n};\nvar getValidVersion = (scope, scopeName, key, requiredVersion) => {\n\tvar entry = findValidVersion(scope, key, requiredVersion);\n\tif(entry) return get(entry);\n\tthrow new Error(getInvalidVersionMessage(scope, scopeName, key, requiredVersion));\n};\nvar warnInvalidVersion = (scope, scopeName, key, requiredVersion) => {\n\ttypeof console !== \"undefined\" && console.warn && console.warn(getInvalidVersionMessage(scope, scopeName, key, requiredVersion));\n};\nvar get = (entry) => {\n\tentry.loaded = 1;\n\treturn entry.get()\n};\nvar init = (fn) => (function(scopeName, a, b, c) {\n\tvar promise = __webpack_require__.I(scopeName);\n\tif (promise && promise.then) return promise.then(fn.bind(fn, scopeName, __webpack_require__.S[scopeName], a, b, c));\n\treturn fn(scopeName, __webpack_require__.S[scopeName], a, b, c);\n});\n\nvar load = /*#__PURE__*/ init((scopeName, scope, key) => {\n\tensureExistence(scopeName, key);\n\treturn get(findVersion(scope, key));\n});\nvar loadFallback = /*#__PURE__*/ init((scopeName, scope, key, fallback) => {\n\treturn scope && __webpack_require__.o(scope, key) ? get(findVersion(scope, key)) : fallback();\n});\nvar loadVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn get(findValidVersion(scope, key, version) || warnInvalidVersion(scope, scopeName, key, version) || findVersion(scope, key));\n});\nvar loadSingleton = /*#__PURE__*/ init((scopeName, scope, key) => {\n\tensureExistence(scopeName, key);\n\treturn getSingleton(scope, scopeName, key);\n});\nvar loadSingletonVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn getSingletonVersion(scope, scopeName, key, version);\n});\nvar loadStrictVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn getValidVersion(scope, scopeName, key, version);\n});\nvar loadStrictSingletonVersionCheck = /*#__PURE__*/ init((scopeName, scope, key, version) => {\n\tensureExistence(scopeName, key);\n\treturn getStrictSingletonVersion(scope, scopeName, key, version);\n});\nvar loadVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn get(findValidVersion(scope, key, version) || warnInvalidVersion(scope, scopeName, key, version) || findVersion(scope, key));\n});\nvar loadSingletonFallback = /*#__PURE__*/ init((scopeName, scope, key, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn getSingleton(scope, scopeName, key);\n});\nvar loadSingletonVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn getSingletonVersion(scope, scopeName, key, version);\n});\nvar loadStrictVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tvar entry = scope && __webpack_require__.o(scope, key) && findValidVersion(scope, key, version);\n\treturn entry ? get(entry) : fallback();\n});\nvar loadStrictSingletonVersionCheckFallback = /*#__PURE__*/ init((scopeName, scope, key, version, fallback) => {\n\tif(!scope || !__webpack_require__.o(scope, key)) return fallback();\n\treturn getStrictSingletonVersion(scope, scopeName, key, version);\n});\nvar installedModules = {};\nvar moduleToHandlerMapping = {\n\t\"webpack/sharing/consume/default/react\": () => (loadSingletonVersionCheck(\"default\", \"react\", [1,17,0,1])),\n\t\"webpack/sharing/consume/default/@jupyterlab/notebook\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/notebook\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/apputils\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/apputils\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/settingregistry\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/settingregistry\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/mainmenu\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/mainmenu\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@jupyterlab/ui-components\": () => (loadSingletonVersionCheck(\"default\", \"@jupyterlab/ui-components\", [1,3,4,7])),\n\t\"webpack/sharing/consume/default/@lumino/widgets\": () => (loadSingletonVersionCheck(\"default\", \"@lumino/widgets\", [1,1,33,0])),\n\t\"webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html\": () => (loadStrictVersionCheckFallback(\"default\", \"react-sanitized-html\", [1,2,0,0], () => (Promise.all([__webpack_require__.e(\"vendors-node_modules_react-sanitized-html_lib_index_js\"), __webpack_require__.e(\"webpack_sharing_consume_default_sanitize-html_sanitize-html\")]).then(() => (() => (__webpack_require__(/*! react-sanitized-html */ \"./node_modules/react-sanitized-html/lib/index.js\"))))))),\n\t\"webpack/sharing/consume/default/sanitize-html/sanitize-html\": () => (loadStrictVersionCheckFallback(\"default\", \"sanitize-html\", [1,1,16,1], () => (__webpack_require__.e(\"vendors-node_modules_sanitize-html_dist_sanitize-html_js\").then(() => (() => (__webpack_require__(/*! sanitize-html */ \"./node_modules/sanitize-html/dist/sanitize-html.js\")))))))\n};\n// no consumes in initial chunks\nvar chunkMapping = {\n\t\"webpack_sharing_consume_default_react\": [\n\t\t\"webpack/sharing/consume/default/react\"\n\t],\n\t\"lib_index_js\": [\n\t\t\"webpack/sharing/consume/default/@jupyterlab/notebook\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/apputils\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/settingregistry\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/mainmenu\",\n\t\t\"webpack/sharing/consume/default/@jupyterlab/ui-components\",\n\t\t\"webpack/sharing/consume/default/@lumino/widgets\",\n\t\t\"webpack/sharing/consume/default/react-sanitized-html/react-sanitized-html\"\n\t],\n\t\"webpack_sharing_consume_default_sanitize-html_sanitize-html\": [\n\t\t\"webpack/sharing/consume/default/sanitize-html/sanitize-html\"\n\t]\n};\n__webpack_require__.f.consumes = (chunkId, promises) => {\n\tif(__webpack_require__.o(chunkMapping, chunkId)) {\n\t\tchunkMapping[chunkId].forEach((id) => {\n\t\t\tif(__webpack_require__.o(installedModules, id)) return promises.push(installedModules[id]);\n\t\t\tvar onFactory = (factory) => {\n\t\t\t\tinstalledModules[id] = 0;\n\t\t\t\t__webpack_require__.m[id] = (module) => {\n\t\t\t\t\tdelete __webpack_require__.c[id];\n\t\t\t\t\tmodule.exports = factory();\n\t\t\t\t}\n\t\t\t};\n\t\t\tvar onError = (error) => {\n\t\t\t\tdelete installedModules[id];\n\t\t\t\t__webpack_require__.m[id] = (module) => {\n\t\t\t\t\tdelete __webpack_require__.c[id];\n\t\t\t\t\tthrow error;\n\t\t\t\t}\n\t\t\t};\n\t\t\ttry {\n\t\t\t\tvar promise = moduleToHandlerMapping[id]();\n\t\t\t\tif(promise.then) {\n\t\t\t\t\tpromises.push(installedModules[id] = promise.then(onFactory)['catch'](onError));\n\t\t\t\t} else onFactory(promise);\n\t\t\t} catch(e) { onError(e); }\n\t\t});\n\t}\n}","__webpack_require__.b = document.baseURI || self.location.href;\n\n// object to store loaded and loading chunks\n// undefined = chunk not loaded, null = chunk preloaded/prefetched\n// [resolve, reject, Promise] = chunk loading, 0 = chunk loaded\nvar installedChunks = {\n\t\"neural_compressor_ext_lab\": 0\n};\n\n__webpack_require__.f.j = (chunkId, promises) => {\n\t\t// JSONP chunk loading for javascript\n\t\tvar installedChunkData = __webpack_require__.o(installedChunks, chunkId) ? installedChunks[chunkId] : undefined;\n\t\tif(installedChunkData !== 0) { // 0 means \"already installed\".\n\n\t\t\t// a Promise means \"currently loading\".\n\t\t\tif(installedChunkData) {\n\t\t\t\tpromises.push(installedChunkData[2]);\n\t\t\t} else {\n\t\t\t\tif(!/^webpack_sharing_consume_default_(react|sanitize\\-html_sanitize\\-html)$/.test(chunkId)) {\n\t\t\t\t\t// setup Promise in chunk cache\n\t\t\t\t\tvar promise = new Promise((resolve, reject) => (installedChunkData = installedChunks[chunkId] = [resolve, reject]));\n\t\t\t\t\tpromises.push(installedChunkData[2] = promise);\n\n\t\t\t\t\t// start chunk loading\n\t\t\t\t\tvar url = __webpack_require__.p + __webpack_require__.u(chunkId);\n\t\t\t\t\t// create error before stack unwound to get useful stacktrace later\n\t\t\t\t\tvar error = new Error();\n\t\t\t\t\tvar loadingEnded = (event) => {\n\t\t\t\t\t\tif(__webpack_require__.o(installedChunks, chunkId)) {\n\t\t\t\t\t\t\tinstalledChunkData = installedChunks[chunkId];\n\t\t\t\t\t\t\tif(installedChunkData !== 0) installedChunks[chunkId] = undefined;\n\t\t\t\t\t\t\tif(installedChunkData) {\n\t\t\t\t\t\t\t\tvar errorType = event && (event.type === 'load' ? 'missing' : event.type);\n\t\t\t\t\t\t\t\tvar realSrc = event && event.target && event.target.src;\n\t\t\t\t\t\t\t\terror.message = 'Loading chunk ' + chunkId + ' failed.\\n(' + errorType + ': ' + realSrc + ')';\n\t\t\t\t\t\t\t\terror.name = 'ChunkLoadError';\n\t\t\t\t\t\t\t\terror.type = errorType;\n\t\t\t\t\t\t\t\terror.request = realSrc;\n\t\t\t\t\t\t\t\tinstalledChunkData[1](error);\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t};\n\t\t\t\t\t__webpack_require__.l(url, loadingEnded, \"chunk-\" + chunkId, chunkId);\n\t\t\t\t} else installedChunks[chunkId] = 0;\n\t\t\t}\n\t\t}\n};\n\n// no prefetching\n\n// no preloaded\n\n// no HMR\n\n// no HMR manifest\n\n// no on chunks loaded\n\n// install a JSONP callback for chunk loading\nvar webpackJsonpCallback = (parentChunkLoadingFunction, data) => {\n\tvar [chunkIds, moreModules, runtime] = data;\n\t// add \"moreModules\" to the modules object,\n\t// then flag all \"chunkIds\" as loaded and fire callback\n\tvar moduleId, chunkId, i = 0;\n\tif(chunkIds.some((id) => (installedChunks[id] !== 0))) {\n\t\tfor(moduleId in moreModules) {\n\t\t\tif(__webpack_require__.o(moreModules, moduleId)) {\n\t\t\t\t__webpack_require__.m[moduleId] = moreModules[moduleId];\n\t\t\t}\n\t\t}\n\t\tif(runtime) var result = runtime(__webpack_require__);\n\t}\n\tif(parentChunkLoadingFunction) parentChunkLoadingFunction(data);\n\tfor(;i < chunkIds.length; i++) {\n\t\tchunkId = chunkIds[i];\n\t\tif(__webpack_require__.o(installedChunks, chunkId) && installedChunks[chunkId]) {\n\t\t\tinstalledChunks[chunkId][0]();\n\t\t}\n\t\tinstalledChunks[chunkId] = 0;\n\t}\n\n}\n\nvar chunkLoadingGlobal = self[\"webpackChunkneural_compressor_ext_lab\"] = self[\"webpackChunkneural_compressor_ext_lab\"] || [];\nchunkLoadingGlobal.forEach(webpackJsonpCallback.bind(null, 0));\nchunkLoadingGlobal.push = webpackJsonpCallback.bind(null, chunkLoadingGlobal.push.bind(chunkLoadingGlobal));","__webpack_require__.nc = undefined;","","// module cache are used so entry inlining is disabled\n// startup\n// Load entry module and return exports\nvar __webpack_exports__ = __webpack_require__(\"webpack/container/entry/neural_compressor_ext_lab\");\n",""],"names":[],"sourceRoot":""}
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/style.js b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/style.js
deleted file mode 100644
index 7b10c692587..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/style.js
+++ /dev/null
@@ -1,4 +0,0 @@
-/* This is a generated file of CSS imports */
-/* It was generated by @jupyterlab/builder in Build.ensureAssets() */
-
-import 'neural_compressor_ext_lab/style/index.js';
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/style_index_js.8d733cc8b74fabbd10b8.js b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/style_index_js.8d733cc8b74fabbd10b8.js
deleted file mode 100644
index e7bdb005a65..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/style_index_js.8d733cc8b74fabbd10b8.js
+++ /dev/null
@@ -1,88 +0,0 @@
-"use strict";
-(self["webpackChunkneural_compressor_ext_lab"] = self["webpackChunkneural_compressor_ext_lab"] || []).push([["style_index_js"],{
-
-/***/ "./node_modules/css-loader/dist/cjs.js!./style/base.css":
-/*!**************************************************************!*\
- !*** ./node_modules/css-loader/dist/cjs.js!./style/base.css ***!
- \**************************************************************/
-/***/ ((module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "default": () => (__WEBPACK_DEFAULT_EXPORT__)
-/* harmony export */ });
-/* harmony import */ var _node_modules_css_loader_dist_runtime_sourceMaps_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../node_modules/css-loader/dist/runtime/sourceMaps.js */ "./node_modules/css-loader/dist/runtime/sourceMaps.js");
-/* harmony import */ var _node_modules_css_loader_dist_runtime_sourceMaps_js__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_node_modules_css_loader_dist_runtime_sourceMaps_js__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var _node_modules_css_loader_dist_runtime_api_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../node_modules/css-loader/dist/runtime/api.js */ "./node_modules/css-loader/dist/runtime/api.js");
-/* harmony import */ var _node_modules_css_loader_dist_runtime_api_js__WEBPACK_IMPORTED_MODULE_1___default = /*#__PURE__*/__webpack_require__.n(_node_modules_css_loader_dist_runtime_api_js__WEBPACK_IMPORTED_MODULE_1__);
-/* harmony import */ var _node_modules_css_loader_dist_runtime_getUrl_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../node_modules/css-loader/dist/runtime/getUrl.js */ "./node_modules/css-loader/dist/runtime/getUrl.js");
-/* harmony import */ var _node_modules_css_loader_dist_runtime_getUrl_js__WEBPACK_IMPORTED_MODULE_2___default = /*#__PURE__*/__webpack_require__.n(_node_modules_css_loader_dist_runtime_getUrl_js__WEBPACK_IMPORTED_MODULE_2__);
-// Imports
-
-
-
-var ___CSS_LOADER_URL_IMPORT_0___ = new URL(/* asset import */ __webpack_require__(/*! data:image/svg+xml, */ "data:image/svg+xml, "), __webpack_require__.b);
-var ___CSS_LOADER_EXPORT___ = _node_modules_css_loader_dist_runtime_api_js__WEBPACK_IMPORTED_MODULE_1___default()((_node_modules_css_loader_dist_runtime_sourceMaps_js__WEBPACK_IMPORTED_MODULE_0___default()));
-var ___CSS_LOADER_URL_REPLACEMENT_0___ = _node_modules_css_loader_dist_runtime_getUrl_js__WEBPACK_IMPORTED_MODULE_2___default()(___CSS_LOADER_URL_IMPORT_0___);
-// Module
-___CSS_LOADER_EXPORT___.push([module.id, "/*\n See the JupyterLab Developer Guide for useful CSS Patterns:\n\n https://jupyterlab.readthedocs.io/en/stable/developer/css.html\n*/\n.lds-ripple {\n display: flex;\n position: absolute; \n left: 0; \n top: 0; \n right: 0; \n bottom: 0;\n margin: auto; \n width: 80px;\n height: 80px;\n}\n.lds-ripple div {\n position: absolute; \n left: 0; \n top: 0; \n right: 0; \n bottom: 0;\n margin: auto; \n border: 4px solid rgb(245, 131, 55);\n opacity: 1;\n border-radius: 50%;\n animation: lds-ripple 1s cubic-bezier(0, 0.2, 0.8, 1) infinite;\n}\n.lds-ripple div:nth-child(2) {\n animation-delay: -0.5s;\n}\n@keyframes lds-ripple {\n 0% {\n left: 0; \n top: 0; \n right: 0; \n bottom: 0;\n margin: auto; \n width: 0;\n height: 0;\n opacity: 0;\n }\n 4.9% {\n left: 0; \n top: 0; \n right: 0; \n bottom: 0;\n margin: auto; \n width: 0;\n height: 0;\n opacity: 0;\n }\n 5% {\n left: 0; \n top: 0; \n right: 0; \n bottom: 0;\n margin: auto; \n width: 0;\n height: 0;\n opacity: 1;\n }\n 100% {\n top: 0px;\n left: 0px;\n width: 72px;\n height: 72px;\n opacity: 0;\n }\n}\n\n\n/* CSS */\n.button-62 {\n background: linear-gradient(to bottom right, #EF4765, #FF9A5A);\n border: 0;\n border-radius: 12px;\n color: #FFFFFF;\n cursor: pointer;\n display: inline-block;\n font-family: -apple-system,system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif;\n font-size: 16px;\n font-weight: 500;\n line-height: 2.5;\n outline: transparent;\n padding: 0 1rem;\n text-align: center;\n text-decoration: none;\n transition: box-shadow .2s ease-in-out;\n user-select: none;\n -webkit-user-select: none;\n touch-action: manipulation;\n white-space: nowrap;\n}\n\n.button-62:not([disabled]):focus {\n box-shadow: 0 0 .25rem rgba(0, 0, 0, 0.5), -.125rem -.125rem 1rem rgba(239, 71, 101, 0.5), .125rem .125rem 1rem rgba(255, 154, 90, 0.5);\n}\n\n.button-62:not([disabled]):hover {\n box-shadow: 0 0 .25rem rgba(0, 0, 0, 0.5), -.125rem -.125rem 1rem rgba(239, 71, 101, 0.5), .125rem .125rem 1rem rgba(255, 154, 90, 0.5);\n}\n\n.aselector select {\n background-color: initial;\n border: none;\n border-radius: 0;\n box-shadow: none;\n color: var(--jp-ui-font-color0);\n display: block;\n font-size: var(--jp-ui-font-size1);\n height: 24px;\n line-height: 14px;\n padding: 0 25px 0 10px;\n text-align: left;\n -moz-appearance: none;\n -webkit-appearance: none;\n}\n\n/* Use our own theme for hover and option styles */\n.aselector select:hover,\n.aselector select > option {\n background-color: var(--jp-layout-color2);\n color: var(--jp-ui-font-color0);\n}\nselect {\n box-sizing: border-box;\n}\n\n.font{\nbackground-color: initial;\nborder: none;\nheight: 21px;\nborder-radius: 0;\nfont-weight:500;\ncolor: var(--jp-ui-font-color0);\ndisplay: block;\nline-height: 22.5px;\npadding: 0 25px 0 10px;\nfont-size: var(--jp-ui-font-size1);\n}\n.wrapper {\n display: flex;\n}\n.f1ozlkqi {\n pointer-events: none;\n}\n\n.palybutton{\n background-image: '/home2/longxin/Neural_Coder_EXT/style/icons8-circled-play.gif';\n}\n.loading{\n \n background-image: url(" + ___CSS_LOADER_URL_REPLACEMENT_0___ + ");\n background-size: contain; \n}\n\n.dialog{\nbody {\n margin: 0;\n height: 100vh;\n width:600px;\n display: flex;\n align-items: center;\n justify-content: center;\n overflow: hidden;\n font-family: \"Poppins\", sans-serif;\n background: #e3d0b6;\n}\n\n#cookie-policy {\n display: flex;\n flex-direction: column;\n justify-content: center;\n align-items: center;\n width: 460px;\n height: 600px;\n background: #f3efe6;\n border-radius: 12px;\n transform: scale(.8);\n}\n\n#cookie-wrapper {\n height: 240px;\n width: 240px;\n margin: 30px 0;\n position: relative;\n left: -40px;\n}\n\nh1 {\n color: #6c3a1f;\n text-align: center;\n font-size: 36px;\n margin: 0;\n}\n\np {\n color: #a28561;\n font-size: 14px;\n margin-top: 0;\n padding: 0 60px;\n text-align: center;\n}\na {\n margin-top: 18px;\n font-size: 14px;\n color: #a28561;\n text-decoration: none;\n pointer-events: none;\n}\na:hover {\n color: #846b4d;\n}\n\nspan {\n font-family: \"Amatic SC\", cursive;\n font-weight: 400;\n font-size: 20px;\n position: relative;\n top: -18px;\n left: 3px;\n color: #a28561; \n}\n\n#heart-no, #thought-heart-yes, #mouth, #face-no, #thought-1, #thought-2, #thought-heart-na, #q-mark, #eyes, #leg-l, #leg-r {\n opacity: 0;\n}\n}\n.dia_button {\n color: white;\n background: #dd794a;\n margin-top: 12px;\n cursor: pointer;\n font-size: 24px;\n font-family: \"Poppins\", sans-serif;\n border-radius: 9px;\n border: none;\n width: 72%;\n padding: 12px 0;\n transition: 150ms ease-out;\n pointer-events: none;\n}\n\n.dia_button:hover {\n background: #d66029;\n}\n\n.pad{\n padding-left:6%\n}\n\n:root {\n /* Elevation\n *\n * We style box-shadows using Material Design's idea of elevation. These particular numbers are taken from here:\n *\n * https://github.com/material-components/material-components-web\n * https://material-components-web.appspot.com/elevation.html\n */\n\n --jp-shadow-base-lightness: 0;\n --jp-shadow-umbra-color: rgba(\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n 0.2\n );\n --jp-shadow-penumbra-color: rgba(\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n 0.14\n );\n --jp-shadow-ambient-color: rgba(\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n 0.12\n );\n --jp-elevation-z0: none;\n --jp-elevation-z1: 0px 2px 1px -1px var(--jp-shadow-umbra-color),\n 0px 1px 1px 0px var(--jp-shadow-penumbra-color),\n 0px 1px 3px 0px var(--jp-shadow-ambient-color);\n --jp-elevation-z2: 0px 3px 1px -2px var(--jp-shadow-umbra-color),\n 0px 2px 2px 0px var(--jp-shadow-penumbra-color),\n 0px 1px 5px 0px var(--jp-shadow-ambient-color);\n --jp-elevation-z4: 0px 2px 4px -1px var(--jp-shadow-umbra-color),\n 0px 4px 5px 0px var(--jp-shadow-penumbra-color),\n 0px 1px 10px 0px var(--jp-shadow-ambient-color);\n --jp-elevation-z6: 0px 3px 5px -1px var(--jp-shadow-umbra-color),\n 0px 6px 10px 0px var(--jp-shadow-penumbra-color),\n 0px 1px 18px 0px var(--jp-shadow-ambient-color);\n --jp-elevation-z8: 0px 5px 5px -3px var(--jp-shadow-umbra-color),\n 0px 8px 10px 1px var(--jp-shadow-penumbra-color),\n 0px 3px 14px 2px var(--jp-shadow-ambient-color);\n --jp-elevation-z12: 0px 7px 8px -4px var(--jp-shadow-umbra-color),\n 0px 12px 17px 2px var(--jp-shadow-penumbra-color),\n 0px 5px 22px 4px var(--jp-shadow-ambient-color);\n --jp-elevation-z16: 0px 8px 10px -5px var(--jp-shadow-umbra-color),\n 0px 16px 24px 2px var(--jp-shadow-penumbra-color),\n 0px 6px 30px 5px var(--jp-shadow-ambient-color);\n --jp-elevation-z20: 0px 10px 13px -6px var(--jp-shadow-umbra-color),\n 0px 20px 31px 3px var(--jp-shadow-penumbra-color),\n 0px 8px 38px 7px var(--jp-shadow-ambient-color);\n --jp-elevation-z24: 0px 11px 15px -7px var(--jp-shadow-umbra-color),\n 0px 24px 38px 3px var(--jp-shadow-penumbra-color),\n 0px 9px 46px 8px var(--jp-shadow-ambient-color);\n\n /* Borders\n *\n * The following variables, specify the visual styling of borders in JupyterLab.\n */\n\n --jp-border-width: 1px;\n --jp-border-color0: var(--md-grey-400);\n --jp-border-color1: var(--md-grey-400);\n --jp-border-color2: var(--md-grey-300);\n --jp-border-color3: var(--md-grey-200);\n --jp-inverse-border-color: var(--md-grey-600);\n --jp-border-radius: 2px;\n\n /* UI Fonts\n *\n * The UI font CSS variables are used for the typography all of the JupyterLab\n * user interface elements that are not directly user generated content.\n *\n * The font sizing here is done assuming that the body font size of --jp-ui-font-size1\n * is applied to a parent element. When children elements, such as headings, are sized\n * in em all things will be computed relative to that body size.\n */\n\n --jp-ui-font-scale-factor: 1.2;\n --jp-ui-font-size0: 0.83333em;\n --jp-ui-font-size1: 13px; /* Base font size */\n --jp-ui-font-size2: 1.2em;\n --jp-ui-font-size3: 1.44em;\n\n --jp-ui-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica,\n Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';\n\n /*\n * Use these font colors against the corresponding main layout colors.\n * In a light theme, these go from dark to light.\n */\n\n /* Defaults use Material Design specification */\n --jp-ui-font-color0: rgba(0, 0, 0, 1);\n --jp-ui-font-color1: rgba(0, 0, 0, 0.87);\n --jp-ui-font-color2: rgba(0, 0, 0, 0.54);\n --jp-ui-font-color3: rgba(0, 0, 0, 0.38);\n\n /*\n * Use these against the brand/accent/warn/error colors.\n * These will typically go from light to darker, in both a dark and light theme.\n */\n\n --jp-ui-inverse-font-color0: rgba(255, 255, 255, 1);\n --jp-ui-inverse-font-color1: rgba(255, 255, 255, 1);\n --jp-ui-inverse-font-color2: rgba(255, 255, 255, 0.7);\n --jp-ui-inverse-font-color3: rgba(255, 255, 255, 0.5);\n\n /* Content Fonts\n *\n * Content font variables are used for typography of user generated content.\n *\n * The font sizing here is done assuming that the body font size of --jp-content-font-size1\n * is applied to a parent element. When children elements, such as headings, are sized\n * in em all things will be computed relative to that body size.\n */\n\n --jp-content-line-height: 1.6;\n --jp-content-font-scale-factor: 1.2;\n --jp-content-font-size0: 0.83333em;\n --jp-content-font-size1: 14px; /* Base font size */\n --jp-content-font-size2: 1.2em;\n --jp-content-font-size3: 1.44em;\n --jp-content-font-size4: 1.728em;\n --jp-content-font-size5: 2.0736em;\n\n /* This gives a magnification of about 125% in presentation mode over normal. */\n --jp-content-presentation-font-size1: 17px;\n\n --jp-content-heading-line-height: 1;\n --jp-content-heading-margin-top: 1.2em;\n --jp-content-heading-margin-bottom: 0.8em;\n --jp-content-heading-font-weight: 500;\n\n /* Defaults use Material Design specification */\n --jp-content-font-color0: rgba(0, 0, 0, 1);\n --jp-content-font-color1: rgba(0, 0, 0, 0.87);\n --jp-content-font-color2: rgba(0, 0, 0, 0.54);\n --jp-content-font-color3: rgba(0, 0, 0, 0.38);\n\n --jp-content-link-color: var(--md-blue-700);\n\n --jp-content-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI',\n Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji',\n 'Segoe UI Symbol';\n\n /*\n * Code Fonts\n *\n * Code font variables are used for typography of code and other monospaces content.\n */\n\n --jp-code-font-size: 13px;\n --jp-code-line-height: 1.3077; /* 17px for 13px base */\n --jp-code-padding: 5px; /* 5px for 13px base, codemirror highlighting needs integer px value */\n --jp-code-font-family-default: Menlo, Consolas, 'DejaVu Sans Mono', monospace;\n --jp-code-font-family: var(--jp-code-font-family-default);\n\n /* This gives a magnification of about 125% in presentation mode over normal. */\n --jp-code-presentation-font-size: 16px;\n\n /* may need to tweak cursor width if you change font size */\n --jp-code-cursor-width0: 1.4px;\n --jp-code-cursor-width1: 2px;\n --jp-code-cursor-width2: 4px;\n\n /* Layout\n *\n * The following are the main layout colors use in JupyterLab. In a light\n * theme these would go from light to dark.\n */\n\n --jp-layout-color0: white;\n --jp-layout-color1: white;\n --jp-layout-color2: var(--md-grey-200);\n --jp-layout-color3: var(--md-grey-400);\n --jp-layout-color4: var(--md-grey-600);\n\n /* Inverse Layout\n *\n * The following are the inverse layout colors use in JupyterLab. In a light\n * theme these would go from dark to light.\n */\n\n --jp-inverse-layout-color0: #111111;\n --jp-inverse-layout-color1: var(--md-grey-900);\n --jp-inverse-layout-color2: var(--md-grey-800);\n --jp-inverse-layout-color3: var(--md-grey-700);\n --jp-inverse-layout-color4: var(--md-grey-600);\n\n /* Brand/accent */\n\n --jp-brand-color0: var(--md-blue-900);\n --jp-brand-color1: var(--md-blue-700);\n --jp-brand-color2: var(--md-blue-300);\n --jp-brand-color3: var(--md-blue-100);\n --jp-brand-color4: var(--md-blue-50);\n\n --jp-accent-color0: var(--md-green-900);\n --jp-accent-color1: var(--md-green-700);\n --jp-accent-color2: var(--md-green-300);\n --jp-accent-color3: var(--md-green-100);\n\n /* State colors (warn, error, success, info) */\n\n --jp-warn-color0: var(--md-orange-900);\n --jp-warn-color1: var(--md-orange-700);\n --jp-warn-color2: var(--md-orange-300);\n --jp-warn-color3: var(--md-orange-100);\n\n --jp-error-color0: var(--md-red-900);\n --jp-error-color1: var(--md-red-700);\n --jp-error-color2: var(--md-red-300);\n --jp-error-color3: var(--md-red-100);\n\n --jp-success-color0: var(--md-green-900);\n --jp-success-color1: var(--md-green-700);\n --jp-success-color2: var(--md-green-300);\n --jp-success-color3: var(--md-green-100);\n\n --jp-info-color0: var(--md-cyan-900);\n --jp-info-color1: var(--md-cyan-700);\n --jp-info-color2: var(--md-cyan-300);\n --jp-info-color3: var(--md-cyan-100);\n\n /* Cell specific styles */\n\n --jp-cell-padding: 5px;\n\n --jp-cell-collapser-width: 8px;\n --jp-cell-collapser-min-height: 20px;\n --jp-cell-collapser-not-active-hover-opacity: 0.6;\n\n --jp-cell-editor-background: var(--md-grey-100);\n --jp-cell-editor-border-color: var(--md-grey-300);\n --jp-cell-editor-box-shadow: inset 0 0 2px var(--md-blue-300);\n --jp-cell-editor-active-background: var(--jp-layout-color0);\n --jp-cell-editor-active-border-color: var(--jp-brand-color1);\n\n --jp-cell-prompt-width: 64px;\n --jp-cell-prompt-font-family: var(--jp-code-font-family-default);\n --jp-cell-prompt-letter-spacing: 0px;\n --jp-cell-prompt-opacity: 1;\n --jp-cell-prompt-not-active-opacity: 0.5;\n --jp-cell-prompt-not-active-font-color: var(--md-grey-700);\n /* A custom blend of MD grey and blue 600\n * See https://meyerweb.com/eric/tools/color-blend/#546E7A:1E88E5:5:hex */\n --jp-cell-inprompt-font-color: #307fc1;\n /* A custom blend of MD grey and orange 600\n * https://meyerweb.com/eric/tools/color-blend/#546E7A:F4511E:5:hex */\n --jp-cell-outprompt-font-color: #bf5b3d;\n\n /* Notebook specific styles */\n\n --jp-notebook-padding: 10px;\n --jp-notebook-select-background: var(--jp-layout-color1);\n --jp-notebook-multiselected-color: var(--md-blue-50);\n\n /* The scroll padding is calculated to fill enough space at the bottom of the\n notebook to show one single-line cell (with appropriate padding) at the top\n when the notebook is scrolled all the way to the bottom. We also subtract one\n pixel so that no scrollbar appears if we have just one single-line cell in the\n notebook. This padding is to enable a 'scroll past end' feature in a notebook.\n */\n --jp-notebook-scroll-padding: calc(\n 100% - var(--jp-code-font-size) * var(--jp-code-line-height) -\n var(--jp-code-padding) - var(--jp-cell-padding) - 1px\n );\n\n /* Rendermime styles */\n\n --jp-rendermime-error-background: #fdd;\n --jp-rendermime-table-row-background: var(--md-grey-100);\n --jp-rendermime-table-row-hover-background: var(--md-light-blue-50);\n\n /* Dialog specific styles */\n\n --jp-dialog-background: rgba(0, 0, 0, 0.25);\n\n /* Console specific styles */\n\n --jp-console-padding: 10px;\n\n /* Toolbar specific styles */\n\n --jp-toolbar-border-color: var(--jp-border-color1);\n --jp-toolbar-micro-height: 8px;\n --jp-toolbar-background: var(--jp-layout-color1);\n --jp-toolbar-box-shadow: 0px 0px 2px 0px rgba(0, 0, 0, 0.24);\n --jp-toolbar-header-margin: 4px 4px 0px 4px;\n --jp-toolbar-active-background: var(--md-grey-300);\n\n /* Statusbar specific styles */\n\n --jp-statusbar-height: 24px;\n\n /* Input field styles */\n\n --jp-input-box-shadow: inset 0 0 2px var(--md-blue-300);\n --jp-input-active-background: var(--jp-layout-color1);\n --jp-input-hover-background: var(--jp-layout-color1);\n --jp-input-background: var(--md-grey-100);\n --jp-input-border-color: var(--jp-inverse-border-color);\n --jp-input-active-border-color: var(--jp-brand-color1);\n --jp-input-active-box-shadow-color: rgba(19, 124, 189, 0.3);\n\n /* General editor styles */\n\n --jp-editor-selected-background: #d9d9d9;\n --jp-editor-selected-focused-background: #d7d4f0;\n --jp-editor-cursor-color: var(--jp-ui-font-color0);\n\n /* Code mirror specific styles */\n\n --jp-mirror-editor-keyword-color: #008000;\n --jp-mirror-editor-atom-color: #88f;\n --jp-mirror-editor-number-color: #080;\n --jp-mirror-editor-def-color: #00f;\n --jp-mirror-editor-variable-color: var(--md-grey-900);\n --jp-mirror-editor-variable-2-color: #05a;\n --jp-mirror-editor-variable-3-color: #085;\n --jp-mirror-editor-punctuation-color: #05a;\n --jp-mirror-editor-property-color: #05a;\n --jp-mirror-editor-operator-color: #aa22ff;\n --jp-mirror-editor-comment-color: #408080;\n --jp-mirror-editor-string-color: #ba2121;\n --jp-mirror-editor-string-2-color: #708;\n --jp-mirror-editor-meta-color: #aa22ff;\n --jp-mirror-editor-qualifier-color: #555;\n --jp-mirror-editor-builtin-color: #008000;\n --jp-mirror-editor-bracket-color: #997;\n --jp-mirror-editor-tag-color: #170;\n --jp-mirror-editor-attribute-color: #00c;\n --jp-mirror-editor-header-color: blue;\n --jp-mirror-editor-quote-color: #090;\n --jp-mirror-editor-link-color: #00c;\n --jp-mirror-editor-error-color: #f00;\n --jp-mirror-editor-hr-color: #999;\n\n /* Vega extension styles */\n\n --jp-vega-background: white;\n\n /* Sidebar-related styles */\n\n --jp-sidebar-min-width: 250px;\n\n /* Search-related styles */\n\n --jp-search-toggle-off-opacity: 0.5;\n --jp-search-toggle-hover-opacity: 0.8;\n --jp-search-toggle-on-opacity: 1;\n --jp-search-selected-match-background-color: rgb(245, 200, 0);\n --jp-search-selected-match-color: black;\n --jp-search-unselected-match-background-color: var(\n --jp-inverse-layout-color0\n );\n --jp-search-unselected-match-color: var(--jp-ui-inverse-font-color0);\n\n /* Icon colors that work well with light or dark backgrounds */\n --jp-icon-contrast-color0: var(--md-purple-600);\n --jp-icon-contrast-color1: var(--md-green-600);\n --jp-icon-contrast-color2: var(--md-pink-600);\n --jp-icon-contrast-color3: var(--md-blue-600);\n}\n\n/*-----------------------------------------------------------------------------\n| Copyright (c) Jupyter Development Team.\n| Distributed under the terms of the Modified BSD License.\n|----------------------------------------------------------------------------*/\n\n/* Set the default typography for monospace elements */\ntt,\ncode,\nkbd,\nsamp,\npre {\n font-family: var(--jp-code-font-family);\n font-size: var(--jp-code-font-size);\n line-height: var(--jp-code-line-height);\n}\n\n", "",{"version":3,"sources":["webpack://./style/base.css"],"names":[],"mappings":"AAAA;;;;CAIC;AACD;EACE,aAAa;EACb,kBAAkB;EAClB,OAAO;EACP,MAAM;EACN,QAAQ;EACR,SAAS;EACT,YAAY;EACZ,WAAW;EACX,YAAY;AACd;AACA;EACE,kBAAkB;EAClB,OAAO;EACP,MAAM;EACN,QAAQ;EACR,SAAS;EACT,YAAY;EACZ,mCAAmC;EACnC,UAAU;EACV,kBAAkB;EAClB,8DAA8D;AAChE;AACA;EACE,sBAAsB;AACxB;AACA;EACE;IACE,OAAO;IACP,MAAM;IACN,QAAQ;IACR,SAAS;IACT,YAAY;IACZ,QAAQ;IACR,SAAS;IACT,UAAU;EACZ;EACA;IACE,OAAO;IACP,MAAM;IACN,QAAQ;IACR,SAAS;IACT,YAAY;IACZ,QAAQ;IACR,SAAS;IACT,UAAU;EACZ;EACA;IACE,OAAO;IACP,MAAM;IACN,QAAQ;IACR,SAAS;IACT,YAAY;IACZ,QAAQ;IACR,SAAS;IACT,UAAU;EACZ;EACA;IACE,QAAQ;IACR,SAAS;IACT,WAAW;IACX,YAAY;IACZ,UAAU;EACZ;AACF;;;AAGA,QAAQ;AACR;EACE,8DAA8D;EAC9D,SAAS;EACT,mBAAmB;EACnB,cAAc;EACd,eAAe;EACf,qBAAqB;EACrB,iFAAiF;EACjF,eAAe;EACf,gBAAgB;EAChB,gBAAgB;EAChB,oBAAoB;EACpB,eAAe;EACf,kBAAkB;EAClB,qBAAqB;EACrB,sCAAsC;EACtC,iBAAiB;EACjB,yBAAyB;EACzB,0BAA0B;EAC1B,mBAAmB;AACrB;;AAEA;EACE,uIAAuI;AACzI;;AAEA;EACE,uIAAuI;AACzI;;AAEA;EACE,yBAAyB;EACzB,YAAY;EACZ,gBAAgB;EAChB,gBAAgB;EAChB,+BAA+B;EAC/B,cAAc;EACd,kCAAkC;EAClC,YAAY;EACZ,iBAAiB;EACjB,sBAAsB;EACtB,gBAAgB;EAChB,qBAAqB;EACrB,wBAAwB;AAC1B;;AAEA,kDAAkD;AAClD;;EAEE,yCAAyC;EACzC,+BAA+B;AACjC;AACA;EACE,sBAAsB;AACxB;;AAEA;AACA,yBAAyB;AACzB,YAAY;AACZ,YAAY;AACZ,gBAAgB;AAChB,eAAe;AACf,+BAA+B;AAC/B,cAAc;AACd,mBAAmB;AACnB,sBAAsB;AACtB,kCAAkC;AAClC;AACA;EACE,aAAa;AACf;AACA;EACE,oBAAoB;AACtB;;AAEA;EACE,iFAAiF;AACnF;AACA;;EAEE,yDAAm9B;EACn9B,wBAAwB;AAC1B;;AAEA;AACA;EACE,SAAS;EACT,aAAa;EACb,WAAW;EACX,aAAa;EACb,mBAAmB;EACnB,uBAAuB;EACvB,gBAAgB;EAChB,kCAAkC;EAClC,mBAAmB;AACrB;;AAEA;EACE,aAAa;EACb,sBAAsB;EACtB,uBAAuB;EACvB,mBAAmB;EACnB,YAAY;EACZ,aAAa;EACb,mBAAmB;EACnB,mBAAmB;EACnB,oBAAoB;AACtB;;AAEA;EACE,aAAa;EACb,YAAY;EACZ,cAAc;EACd,kBAAkB;EAClB,WAAW;AACb;;AAEA;EACE,cAAc;EACd,kBAAkB;EAClB,eAAe;EACf,SAAS;AACX;;AAEA;EACE,cAAc;EACd,eAAe;EACf,aAAa;EACb,eAAe;EACf,kBAAkB;AACpB;AACA;EACE,gBAAgB;EAChB,eAAe;EACf,cAAc;EACd,qBAAqB;EACrB,oBAAoB;AACtB;AACA;EACE,cAAc;AAChB;;AAEA;EACE,iCAAiC;EACjC,gBAAgB;EAChB,eAAe;EACf,kBAAkB;EAClB,UAAU;EACV,SAAS;EACT,cAAc;AAChB;;AAEA;EACE,UAAU;AACZ;AACA;AACA;EACE,YAAY;EACZ,mBAAmB;EACnB,gBAAgB;EAChB,eAAe;EACf,eAAe;EACf,kCAAkC;EAClC,kBAAkB;EAClB,YAAY;EACZ,UAAU;EACV,eAAe;EACf,0BAA0B;EAC1B,oBAAoB;AACtB;;AAEA;EACE,mBAAmB;AACrB;;AAEA;EACE;AACF;;AAEA;EACE;;;;;;IAME;;EAEF,6BAA6B;EAC7B;;;;;GAKC;EACD;;;;;GAKC;EACD;;;;;GAKC;EACD,uBAAuB;EACvB;;kDAEgD;EAChD;;kDAEgD;EAChD;;mDAEiD;EACjD;;mDAEiD;EACjD;;mDAEiD;EACjD;;mDAEiD;EACjD;;mDAEiD;EACjD;;mDAEiD;EACjD;;mDAEiD;;EAEjD;;;IAGE;;EAEF,sBAAsB;EACtB,sCAAsC;EACtC,sCAAsC;EACtC,sCAAsC;EACtC,sCAAsC;EACtC,6CAA6C;EAC7C,uBAAuB;;EAEvB;;;;;;;;IAQE;;EAEF,8BAA8B;EAC9B,6BAA6B;EAC7B,wBAAwB,EAAE,mBAAmB;EAC7C,yBAAyB;EACzB,0BAA0B;;EAE1B;+EAC6E;;EAE7E;;;IAGE;;EAEF,+CAA+C;EAC/C,qCAAqC;EACrC,wCAAwC;EACxC,wCAAwC;EACxC,wCAAwC;;EAExC;;;IAGE;;EAEF,mDAAmD;EACnD,mDAAmD;EACnD,qDAAqD;EACrD,qDAAqD;;EAErD;;;;;;;IAOE;;EAEF,6BAA6B;EAC7B,mCAAmC;EACnC,kCAAkC;EAClC,6BAA6B,EAAE,mBAAmB;EAClD,8BAA8B;EAC9B,+BAA+B;EAC/B,gCAAgC;EAChC,iCAAiC;;EAEjC,+EAA+E;EAC/E,0CAA0C;;EAE1C,mCAAmC;EACnC,sCAAsC;EACtC,yCAAyC;EACzC,qCAAqC;;EAErC,+CAA+C;EAC/C,0CAA0C;EAC1C,6CAA6C;EAC7C,6CAA6C;EAC7C,6CAA6C;;EAE7C,2CAA2C;;EAE3C;;qBAEmB;;EAEnB;;;;IAIE;;EAEF,yBAAyB;EACzB,6BAA6B,EAAE,uBAAuB;EACtD,sBAAsB,EAAE,sEAAsE;EAC9F,6EAA6E;EAC7E,yDAAyD;;EAEzD,+EAA+E;EAC/E,sCAAsC;;EAEtC,2DAA2D;EAC3D,8BAA8B;EAC9B,4BAA4B;EAC5B,4BAA4B;;EAE5B;;;;IAIE;;EAEF,yBAAyB;EACzB,yBAAyB;EACzB,sCAAsC;EACtC,sCAAsC;EACtC,sCAAsC;;EAEtC;;;;IAIE;;EAEF,mCAAmC;EACnC,8CAA8C;EAC9C,8CAA8C;EAC9C,8CAA8C;EAC9C,8CAA8C;;EAE9C,iBAAiB;;EAEjB,qCAAqC;EACrC,qCAAqC;EACrC,qCAAqC;EACrC,qCAAqC;EACrC,oCAAoC;;EAEpC,uCAAuC;EACvC,uCAAuC;EACvC,uCAAuC;EACvC,uCAAuC;;EAEvC,8CAA8C;;EAE9C,sCAAsC;EACtC,sCAAsC;EACtC,sCAAsC;EACtC,sCAAsC;;EAEtC,oCAAoC;EACpC,oCAAoC;EACpC,oCAAoC;EACpC,oCAAoC;;EAEpC,wCAAwC;EACxC,wCAAwC;EACxC,wCAAwC;EACxC,wCAAwC;;EAExC,oCAAoC;EACpC,oCAAoC;EACpC,oCAAoC;EACpC,oCAAoC;;EAEpC,yBAAyB;;EAEzB,sBAAsB;;EAEtB,8BAA8B;EAC9B,oCAAoC;EACpC,iDAAiD;;EAEjD,+CAA+C;EAC/C,iDAAiD;EACjD,6DAA6D;EAC7D,2DAA2D;EAC3D,4DAA4D;;EAE5D,4BAA4B;EAC5B,gEAAgE;EAChE,oCAAoC;EACpC,2BAA2B;EAC3B,wCAAwC;EACxC,0DAA0D;EAC1D;2EACyE;EACzE,sCAAsC;EACtC;uEACqE;EACrE,uCAAuC;;EAEvC,6BAA6B;;EAE7B,2BAA2B;EAC3B,wDAAwD;EACxD,oDAAoD;;EAEpD;;;;;GAKC;EACD;;;GAGC;;EAED,sBAAsB;;EAEtB,sCAAsC;EACtC,wDAAwD;EACxD,mEAAmE;;EAEnE,2BAA2B;;EAE3B,2CAA2C;;EAE3C,4BAA4B;;EAE5B,0BAA0B;;EAE1B,4BAA4B;;EAE5B,kDAAkD;EAClD,8BAA8B;EAC9B,gDAAgD;EAChD,4DAA4D;EAC5D,2CAA2C;EAC3C,kDAAkD;;EAElD,8BAA8B;;EAE9B,2BAA2B;;EAE3B,uBAAuB;;EAEvB,uDAAuD;EACvD,qDAAqD;EACrD,oDAAoD;EACpD,yCAAyC;EACzC,uDAAuD;EACvD,sDAAsD;EACtD,2DAA2D;;EAE3D,0BAA0B;;EAE1B,wCAAwC;EACxC,gDAAgD;EAChD,kDAAkD;;EAElD,gCAAgC;;EAEhC,yCAAyC;EACzC,mCAAmC;EACnC,qCAAqC;EACrC,kCAAkC;EAClC,qDAAqD;EACrD,yCAAyC;EACzC,yCAAyC;EACzC,0CAA0C;EAC1C,uCAAuC;EACvC,0CAA0C;EAC1C,yCAAyC;EACzC,wCAAwC;EACxC,uCAAuC;EACvC,sCAAsC;EACtC,wCAAwC;EACxC,yCAAyC;EACzC,sCAAsC;EACtC,kCAAkC;EAClC,wCAAwC;EACxC,qCAAqC;EACrC,oCAAoC;EACpC,mCAAmC;EACnC,oCAAoC;EACpC,iCAAiC;;EAEjC,0BAA0B;;EAE1B,2BAA2B;;EAE3B,2BAA2B;;EAE3B,6BAA6B;;EAE7B,0BAA0B;;EAE1B,mCAAmC;EACnC,qCAAqC;EACrC,gCAAgC;EAChC,6DAA6D;EAC7D,uCAAuC;EACvC;;GAEC;EACD,oEAAoE;;EAEpE,8DAA8D;EAC9D,+CAA+C;EAC/C,8CAA8C;EAC9C,6CAA6C;EAC7C,6CAA6C;AAC/C;;AAEA;;;8EAG8E;;AAE9E,sDAAsD;AACtD;;;;;EAKE,uCAAuC;EACvC,mCAAmC;EACnC,uCAAuC;AACzC","sourcesContent":["/*\n See the JupyterLab Developer Guide for useful CSS Patterns:\n\n https://jupyterlab.readthedocs.io/en/stable/developer/css.html\n*/\n.lds-ripple {\n display: flex;\n position: absolute; \n left: 0; \n top: 0; \n right: 0; \n bottom: 0;\n margin: auto; \n width: 80px;\n height: 80px;\n}\n.lds-ripple div {\n position: absolute; \n left: 0; \n top: 0; \n right: 0; \n bottom: 0;\n margin: auto; \n border: 4px solid rgb(245, 131, 55);\n opacity: 1;\n border-radius: 50%;\n animation: lds-ripple 1s cubic-bezier(0, 0.2, 0.8, 1) infinite;\n}\n.lds-ripple div:nth-child(2) {\n animation-delay: -0.5s;\n}\n@keyframes lds-ripple {\n 0% {\n left: 0; \n top: 0; \n right: 0; \n bottom: 0;\n margin: auto; \n width: 0;\n height: 0;\n opacity: 0;\n }\n 4.9% {\n left: 0; \n top: 0; \n right: 0; \n bottom: 0;\n margin: auto; \n width: 0;\n height: 0;\n opacity: 0;\n }\n 5% {\n left: 0; \n top: 0; \n right: 0; \n bottom: 0;\n margin: auto; \n width: 0;\n height: 0;\n opacity: 1;\n }\n 100% {\n top: 0px;\n left: 0px;\n width: 72px;\n height: 72px;\n opacity: 0;\n }\n}\n\n\n/* CSS */\n.button-62 {\n background: linear-gradient(to bottom right, #EF4765, #FF9A5A);\n border: 0;\n border-radius: 12px;\n color: #FFFFFF;\n cursor: pointer;\n display: inline-block;\n font-family: -apple-system,system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif;\n font-size: 16px;\n font-weight: 500;\n line-height: 2.5;\n outline: transparent;\n padding: 0 1rem;\n text-align: center;\n text-decoration: none;\n transition: box-shadow .2s ease-in-out;\n user-select: none;\n -webkit-user-select: none;\n touch-action: manipulation;\n white-space: nowrap;\n}\n\n.button-62:not([disabled]):focus {\n box-shadow: 0 0 .25rem rgba(0, 0, 0, 0.5), -.125rem -.125rem 1rem rgba(239, 71, 101, 0.5), .125rem .125rem 1rem rgba(255, 154, 90, 0.5);\n}\n\n.button-62:not([disabled]):hover {\n box-shadow: 0 0 .25rem rgba(0, 0, 0, 0.5), -.125rem -.125rem 1rem rgba(239, 71, 101, 0.5), .125rem .125rem 1rem rgba(255, 154, 90, 0.5);\n}\n\n.aselector select {\n background-color: initial;\n border: none;\n border-radius: 0;\n box-shadow: none;\n color: var(--jp-ui-font-color0);\n display: block;\n font-size: var(--jp-ui-font-size1);\n height: 24px;\n line-height: 14px;\n padding: 0 25px 0 10px;\n text-align: left;\n -moz-appearance: none;\n -webkit-appearance: none;\n}\n\n/* Use our own theme for hover and option styles */\n.aselector select:hover,\n.aselector select > option {\n background-color: var(--jp-layout-color2);\n color: var(--jp-ui-font-color0);\n}\nselect {\n box-sizing: border-box;\n}\n\n.font{\nbackground-color: initial;\nborder: none;\nheight: 21px;\nborder-radius: 0;\nfont-weight:500;\ncolor: var(--jp-ui-font-color0);\ndisplay: block;\nline-height: 22.5px;\npadding: 0 25px 0 10px;\nfont-size: var(--jp-ui-font-size1);\n}\n.wrapper {\n display: flex;\n}\n.f1ozlkqi {\n pointer-events: none;\n}\n\n.palybutton{\n background-image: '/home2/longxin/Neural_Coder_EXT/style/icons8-circled-play.gif';\n}\n.loading{\n \n background-image: url(\"data:image/svg+xml, \");\n background-size: contain; \n}\n\n.dialog{\nbody {\n margin: 0;\n height: 100vh;\n width:600px;\n display: flex;\n align-items: center;\n justify-content: center;\n overflow: hidden;\n font-family: \"Poppins\", sans-serif;\n background: #e3d0b6;\n}\n\n#cookie-policy {\n display: flex;\n flex-direction: column;\n justify-content: center;\n align-items: center;\n width: 460px;\n height: 600px;\n background: #f3efe6;\n border-radius: 12px;\n transform: scale(.8);\n}\n\n#cookie-wrapper {\n height: 240px;\n width: 240px;\n margin: 30px 0;\n position: relative;\n left: -40px;\n}\n\nh1 {\n color: #6c3a1f;\n text-align: center;\n font-size: 36px;\n margin: 0;\n}\n\np {\n color: #a28561;\n font-size: 14px;\n margin-top: 0;\n padding: 0 60px;\n text-align: center;\n}\na {\n margin-top: 18px;\n font-size: 14px;\n color: #a28561;\n text-decoration: none;\n pointer-events: none;\n}\na:hover {\n color: #846b4d;\n}\n\nspan {\n font-family: \"Amatic SC\", cursive;\n font-weight: 400;\n font-size: 20px;\n position: relative;\n top: -18px;\n left: 3px;\n color: #a28561; \n}\n\n#heart-no, #thought-heart-yes, #mouth, #face-no, #thought-1, #thought-2, #thought-heart-na, #q-mark, #eyes, #leg-l, #leg-r {\n opacity: 0;\n}\n}\n.dia_button {\n color: white;\n background: #dd794a;\n margin-top: 12px;\n cursor: pointer;\n font-size: 24px;\n font-family: \"Poppins\", sans-serif;\n border-radius: 9px;\n border: none;\n width: 72%;\n padding: 12px 0;\n transition: 150ms ease-out;\n pointer-events: none;\n}\n\n.dia_button:hover {\n background: #d66029;\n}\n\n.pad{\n padding-left:6%\n}\n\n:root {\n /* Elevation\n *\n * We style box-shadows using Material Design's idea of elevation. These particular numbers are taken from here:\n *\n * https://github.com/material-components/material-components-web\n * https://material-components-web.appspot.com/elevation.html\n */\n\n --jp-shadow-base-lightness: 0;\n --jp-shadow-umbra-color: rgba(\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n 0.2\n );\n --jp-shadow-penumbra-color: rgba(\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n 0.14\n );\n --jp-shadow-ambient-color: rgba(\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n var(--jp-shadow-base-lightness),\n 0.12\n );\n --jp-elevation-z0: none;\n --jp-elevation-z1: 0px 2px 1px -1px var(--jp-shadow-umbra-color),\n 0px 1px 1px 0px var(--jp-shadow-penumbra-color),\n 0px 1px 3px 0px var(--jp-shadow-ambient-color);\n --jp-elevation-z2: 0px 3px 1px -2px var(--jp-shadow-umbra-color),\n 0px 2px 2px 0px var(--jp-shadow-penumbra-color),\n 0px 1px 5px 0px var(--jp-shadow-ambient-color);\n --jp-elevation-z4: 0px 2px 4px -1px var(--jp-shadow-umbra-color),\n 0px 4px 5px 0px var(--jp-shadow-penumbra-color),\n 0px 1px 10px 0px var(--jp-shadow-ambient-color);\n --jp-elevation-z6: 0px 3px 5px -1px var(--jp-shadow-umbra-color),\n 0px 6px 10px 0px var(--jp-shadow-penumbra-color),\n 0px 1px 18px 0px var(--jp-shadow-ambient-color);\n --jp-elevation-z8: 0px 5px 5px -3px var(--jp-shadow-umbra-color),\n 0px 8px 10px 1px var(--jp-shadow-penumbra-color),\n 0px 3px 14px 2px var(--jp-shadow-ambient-color);\n --jp-elevation-z12: 0px 7px 8px -4px var(--jp-shadow-umbra-color),\n 0px 12px 17px 2px var(--jp-shadow-penumbra-color),\n 0px 5px 22px 4px var(--jp-shadow-ambient-color);\n --jp-elevation-z16: 0px 8px 10px -5px var(--jp-shadow-umbra-color),\n 0px 16px 24px 2px var(--jp-shadow-penumbra-color),\n 0px 6px 30px 5px var(--jp-shadow-ambient-color);\n --jp-elevation-z20: 0px 10px 13px -6px var(--jp-shadow-umbra-color),\n 0px 20px 31px 3px var(--jp-shadow-penumbra-color),\n 0px 8px 38px 7px var(--jp-shadow-ambient-color);\n --jp-elevation-z24: 0px 11px 15px -7px var(--jp-shadow-umbra-color),\n 0px 24px 38px 3px var(--jp-shadow-penumbra-color),\n 0px 9px 46px 8px var(--jp-shadow-ambient-color);\n\n /* Borders\n *\n * The following variables, specify the visual styling of borders in JupyterLab.\n */\n\n --jp-border-width: 1px;\n --jp-border-color0: var(--md-grey-400);\n --jp-border-color1: var(--md-grey-400);\n --jp-border-color2: var(--md-grey-300);\n --jp-border-color3: var(--md-grey-200);\n --jp-inverse-border-color: var(--md-grey-600);\n --jp-border-radius: 2px;\n\n /* UI Fonts\n *\n * The UI font CSS variables are used for the typography all of the JupyterLab\n * user interface elements that are not directly user generated content.\n *\n * The font sizing here is done assuming that the body font size of --jp-ui-font-size1\n * is applied to a parent element. When children elements, such as headings, are sized\n * in em all things will be computed relative to that body size.\n */\n\n --jp-ui-font-scale-factor: 1.2;\n --jp-ui-font-size0: 0.83333em;\n --jp-ui-font-size1: 13px; /* Base font size */\n --jp-ui-font-size2: 1.2em;\n --jp-ui-font-size3: 1.44em;\n\n --jp-ui-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica,\n Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';\n\n /*\n * Use these font colors against the corresponding main layout colors.\n * In a light theme, these go from dark to light.\n */\n\n /* Defaults use Material Design specification */\n --jp-ui-font-color0: rgba(0, 0, 0, 1);\n --jp-ui-font-color1: rgba(0, 0, 0, 0.87);\n --jp-ui-font-color2: rgba(0, 0, 0, 0.54);\n --jp-ui-font-color3: rgba(0, 0, 0, 0.38);\n\n /*\n * Use these against the brand/accent/warn/error colors.\n * These will typically go from light to darker, in both a dark and light theme.\n */\n\n --jp-ui-inverse-font-color0: rgba(255, 255, 255, 1);\n --jp-ui-inverse-font-color1: rgba(255, 255, 255, 1);\n --jp-ui-inverse-font-color2: rgba(255, 255, 255, 0.7);\n --jp-ui-inverse-font-color3: rgba(255, 255, 255, 0.5);\n\n /* Content Fonts\n *\n * Content font variables are used for typography of user generated content.\n *\n * The font sizing here is done assuming that the body font size of --jp-content-font-size1\n * is applied to a parent element. When children elements, such as headings, are sized\n * in em all things will be computed relative to that body size.\n */\n\n --jp-content-line-height: 1.6;\n --jp-content-font-scale-factor: 1.2;\n --jp-content-font-size0: 0.83333em;\n --jp-content-font-size1: 14px; /* Base font size */\n --jp-content-font-size2: 1.2em;\n --jp-content-font-size3: 1.44em;\n --jp-content-font-size4: 1.728em;\n --jp-content-font-size5: 2.0736em;\n\n /* This gives a magnification of about 125% in presentation mode over normal. */\n --jp-content-presentation-font-size1: 17px;\n\n --jp-content-heading-line-height: 1;\n --jp-content-heading-margin-top: 1.2em;\n --jp-content-heading-margin-bottom: 0.8em;\n --jp-content-heading-font-weight: 500;\n\n /* Defaults use Material Design specification */\n --jp-content-font-color0: rgba(0, 0, 0, 1);\n --jp-content-font-color1: rgba(0, 0, 0, 0.87);\n --jp-content-font-color2: rgba(0, 0, 0, 0.54);\n --jp-content-font-color3: rgba(0, 0, 0, 0.38);\n\n --jp-content-link-color: var(--md-blue-700);\n\n --jp-content-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI',\n Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji',\n 'Segoe UI Symbol';\n\n /*\n * Code Fonts\n *\n * Code font variables are used for typography of code and other monospaces content.\n */\n\n --jp-code-font-size: 13px;\n --jp-code-line-height: 1.3077; /* 17px for 13px base */\n --jp-code-padding: 5px; /* 5px for 13px base, codemirror highlighting needs integer px value */\n --jp-code-font-family-default: Menlo, Consolas, 'DejaVu Sans Mono', monospace;\n --jp-code-font-family: var(--jp-code-font-family-default);\n\n /* This gives a magnification of about 125% in presentation mode over normal. */\n --jp-code-presentation-font-size: 16px;\n\n /* may need to tweak cursor width if you change font size */\n --jp-code-cursor-width0: 1.4px;\n --jp-code-cursor-width1: 2px;\n --jp-code-cursor-width2: 4px;\n\n /* Layout\n *\n * The following are the main layout colors use in JupyterLab. In a light\n * theme these would go from light to dark.\n */\n\n --jp-layout-color0: white;\n --jp-layout-color1: white;\n --jp-layout-color2: var(--md-grey-200);\n --jp-layout-color3: var(--md-grey-400);\n --jp-layout-color4: var(--md-grey-600);\n\n /* Inverse Layout\n *\n * The following are the inverse layout colors use in JupyterLab. In a light\n * theme these would go from dark to light.\n */\n\n --jp-inverse-layout-color0: #111111;\n --jp-inverse-layout-color1: var(--md-grey-900);\n --jp-inverse-layout-color2: var(--md-grey-800);\n --jp-inverse-layout-color3: var(--md-grey-700);\n --jp-inverse-layout-color4: var(--md-grey-600);\n\n /* Brand/accent */\n\n --jp-brand-color0: var(--md-blue-900);\n --jp-brand-color1: var(--md-blue-700);\n --jp-brand-color2: var(--md-blue-300);\n --jp-brand-color3: var(--md-blue-100);\n --jp-brand-color4: var(--md-blue-50);\n\n --jp-accent-color0: var(--md-green-900);\n --jp-accent-color1: var(--md-green-700);\n --jp-accent-color2: var(--md-green-300);\n --jp-accent-color3: var(--md-green-100);\n\n /* State colors (warn, error, success, info) */\n\n --jp-warn-color0: var(--md-orange-900);\n --jp-warn-color1: var(--md-orange-700);\n --jp-warn-color2: var(--md-orange-300);\n --jp-warn-color3: var(--md-orange-100);\n\n --jp-error-color0: var(--md-red-900);\n --jp-error-color1: var(--md-red-700);\n --jp-error-color2: var(--md-red-300);\n --jp-error-color3: var(--md-red-100);\n\n --jp-success-color0: var(--md-green-900);\n --jp-success-color1: var(--md-green-700);\n --jp-success-color2: var(--md-green-300);\n --jp-success-color3: var(--md-green-100);\n\n --jp-info-color0: var(--md-cyan-900);\n --jp-info-color1: var(--md-cyan-700);\n --jp-info-color2: var(--md-cyan-300);\n --jp-info-color3: var(--md-cyan-100);\n\n /* Cell specific styles */\n\n --jp-cell-padding: 5px;\n\n --jp-cell-collapser-width: 8px;\n --jp-cell-collapser-min-height: 20px;\n --jp-cell-collapser-not-active-hover-opacity: 0.6;\n\n --jp-cell-editor-background: var(--md-grey-100);\n --jp-cell-editor-border-color: var(--md-grey-300);\n --jp-cell-editor-box-shadow: inset 0 0 2px var(--md-blue-300);\n --jp-cell-editor-active-background: var(--jp-layout-color0);\n --jp-cell-editor-active-border-color: var(--jp-brand-color1);\n\n --jp-cell-prompt-width: 64px;\n --jp-cell-prompt-font-family: var(--jp-code-font-family-default);\n --jp-cell-prompt-letter-spacing: 0px;\n --jp-cell-prompt-opacity: 1;\n --jp-cell-prompt-not-active-opacity: 0.5;\n --jp-cell-prompt-not-active-font-color: var(--md-grey-700);\n /* A custom blend of MD grey and blue 600\n * See https://meyerweb.com/eric/tools/color-blend/#546E7A:1E88E5:5:hex */\n --jp-cell-inprompt-font-color: #307fc1;\n /* A custom blend of MD grey and orange 600\n * https://meyerweb.com/eric/tools/color-blend/#546E7A:F4511E:5:hex */\n --jp-cell-outprompt-font-color: #bf5b3d;\n\n /* Notebook specific styles */\n\n --jp-notebook-padding: 10px;\n --jp-notebook-select-background: var(--jp-layout-color1);\n --jp-notebook-multiselected-color: var(--md-blue-50);\n\n /* The scroll padding is calculated to fill enough space at the bottom of the\n notebook to show one single-line cell (with appropriate padding) at the top\n when the notebook is scrolled all the way to the bottom. We also subtract one\n pixel so that no scrollbar appears if we have just one single-line cell in the\n notebook. This padding is to enable a 'scroll past end' feature in a notebook.\n */\n --jp-notebook-scroll-padding: calc(\n 100% - var(--jp-code-font-size) * var(--jp-code-line-height) -\n var(--jp-code-padding) - var(--jp-cell-padding) - 1px\n );\n\n /* Rendermime styles */\n\n --jp-rendermime-error-background: #fdd;\n --jp-rendermime-table-row-background: var(--md-grey-100);\n --jp-rendermime-table-row-hover-background: var(--md-light-blue-50);\n\n /* Dialog specific styles */\n\n --jp-dialog-background: rgba(0, 0, 0, 0.25);\n\n /* Console specific styles */\n\n --jp-console-padding: 10px;\n\n /* Toolbar specific styles */\n\n --jp-toolbar-border-color: var(--jp-border-color1);\n --jp-toolbar-micro-height: 8px;\n --jp-toolbar-background: var(--jp-layout-color1);\n --jp-toolbar-box-shadow: 0px 0px 2px 0px rgba(0, 0, 0, 0.24);\n --jp-toolbar-header-margin: 4px 4px 0px 4px;\n --jp-toolbar-active-background: var(--md-grey-300);\n\n /* Statusbar specific styles */\n\n --jp-statusbar-height: 24px;\n\n /* Input field styles */\n\n --jp-input-box-shadow: inset 0 0 2px var(--md-blue-300);\n --jp-input-active-background: var(--jp-layout-color1);\n --jp-input-hover-background: var(--jp-layout-color1);\n --jp-input-background: var(--md-grey-100);\n --jp-input-border-color: var(--jp-inverse-border-color);\n --jp-input-active-border-color: var(--jp-brand-color1);\n --jp-input-active-box-shadow-color: rgba(19, 124, 189, 0.3);\n\n /* General editor styles */\n\n --jp-editor-selected-background: #d9d9d9;\n --jp-editor-selected-focused-background: #d7d4f0;\n --jp-editor-cursor-color: var(--jp-ui-font-color0);\n\n /* Code mirror specific styles */\n\n --jp-mirror-editor-keyword-color: #008000;\n --jp-mirror-editor-atom-color: #88f;\n --jp-mirror-editor-number-color: #080;\n --jp-mirror-editor-def-color: #00f;\n --jp-mirror-editor-variable-color: var(--md-grey-900);\n --jp-mirror-editor-variable-2-color: #05a;\n --jp-mirror-editor-variable-3-color: #085;\n --jp-mirror-editor-punctuation-color: #05a;\n --jp-mirror-editor-property-color: #05a;\n --jp-mirror-editor-operator-color: #aa22ff;\n --jp-mirror-editor-comment-color: #408080;\n --jp-mirror-editor-string-color: #ba2121;\n --jp-mirror-editor-string-2-color: #708;\n --jp-mirror-editor-meta-color: #aa22ff;\n --jp-mirror-editor-qualifier-color: #555;\n --jp-mirror-editor-builtin-color: #008000;\n --jp-mirror-editor-bracket-color: #997;\n --jp-mirror-editor-tag-color: #170;\n --jp-mirror-editor-attribute-color: #00c;\n --jp-mirror-editor-header-color: blue;\n --jp-mirror-editor-quote-color: #090;\n --jp-mirror-editor-link-color: #00c;\n --jp-mirror-editor-error-color: #f00;\n --jp-mirror-editor-hr-color: #999;\n\n /* Vega extension styles */\n\n --jp-vega-background: white;\n\n /* Sidebar-related styles */\n\n --jp-sidebar-min-width: 250px;\n\n /* Search-related styles */\n\n --jp-search-toggle-off-opacity: 0.5;\n --jp-search-toggle-hover-opacity: 0.8;\n --jp-search-toggle-on-opacity: 1;\n --jp-search-selected-match-background-color: rgb(245, 200, 0);\n --jp-search-selected-match-color: black;\n --jp-search-unselected-match-background-color: var(\n --jp-inverse-layout-color0\n );\n --jp-search-unselected-match-color: var(--jp-ui-inverse-font-color0);\n\n /* Icon colors that work well with light or dark backgrounds */\n --jp-icon-contrast-color0: var(--md-purple-600);\n --jp-icon-contrast-color1: var(--md-green-600);\n --jp-icon-contrast-color2: var(--md-pink-600);\n --jp-icon-contrast-color3: var(--md-blue-600);\n}\n\n/*-----------------------------------------------------------------------------\n| Copyright (c) Jupyter Development Team.\n| Distributed under the terms of the Modified BSD License.\n|----------------------------------------------------------------------------*/\n\n/* Set the default typography for monospace elements */\ntt,\ncode,\nkbd,\nsamp,\npre {\n font-family: var(--jp-code-font-family);\n font-size: var(--jp-code-font-size);\n line-height: var(--jp-code-line-height);\n}\n\n"],"sourceRoot":""}]);
-// Exports
-/* harmony default export */ const __WEBPACK_DEFAULT_EXPORT__ = (___CSS_LOADER_EXPORT___);
-
-
-/***/ }),
-
-/***/ "./style/base.css":
-/*!************************!*\
- !*** ./style/base.css ***!
- \************************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */ "default": () => (__WEBPACK_DEFAULT_EXPORT__)
-/* harmony export */ });
-/* harmony import */ var _node_modules_style_loader_dist_runtime_injectStylesIntoStyleTag_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! !../node_modules/style-loader/dist/runtime/injectStylesIntoStyleTag.js */ "./node_modules/style-loader/dist/runtime/injectStylesIntoStyleTag.js");
-/* harmony import */ var _node_modules_style_loader_dist_runtime_injectStylesIntoStyleTag_js__WEBPACK_IMPORTED_MODULE_0___default = /*#__PURE__*/__webpack_require__.n(_node_modules_style_loader_dist_runtime_injectStylesIntoStyleTag_js__WEBPACK_IMPORTED_MODULE_0__);
-/* harmony import */ var _node_modules_css_loader_dist_cjs_js_base_css__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! !!../node_modules/css-loader/dist/cjs.js!./base.css */ "./node_modules/css-loader/dist/cjs.js!./style/base.css");
-
-
-
-var options = {};
-
-options.insert = "head";
-options.singleton = false;
-
-var update = _node_modules_style_loader_dist_runtime_injectStylesIntoStyleTag_js__WEBPACK_IMPORTED_MODULE_0___default()(_node_modules_css_loader_dist_cjs_js_base_css__WEBPACK_IMPORTED_MODULE_1__["default"], options);
-
-
-
-/* harmony default export */ const __WEBPACK_DEFAULT_EXPORT__ = (_node_modules_css_loader_dist_cjs_js_base_css__WEBPACK_IMPORTED_MODULE_1__["default"].locals || {});
-
-/***/ }),
-
-/***/ "./style/index.js":
-/*!************************!*\
- !*** ./style/index.js ***!
- \************************/
-/***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
-
-__webpack_require__.r(__webpack_exports__);
-/* harmony import */ var _base_css__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./base.css */ "./style/base.css");
-
-
-
-/***/ }),
-
-/***/ "data:image/svg+xml, ":
-/*!********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************!*\
- !*** data:image/svg+xml, ***!
- \********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************/
-/***/ ((module) => {
-
-module.exports = "data:image/svg+xml, ";
-
-/***/ })
-
-}]);
-//# sourceMappingURL=style_index_js.8d733cc8b74fabbd10b8.js.map
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/style_index_js.8d733cc8b74fabbd10b8.js.map b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/style_index_js.8d733cc8b74fabbd10b8.js.map
deleted file mode 100644
index c39a124ce9d..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/style_index_js.8d733cc8b74fabbd10b8.js.map
+++ /dev/null
@@ -1 +0,0 @@
-{"version":3,"file":"style_index_js.8d733cc8b74fabbd10b8.js","mappings":";;;;;;;;;;;;;;;;;;;AAAA;AAC0G;AACjB;AACO;AAChG,4CAA4C,yuEAAmmC;AAC/oC,8BAA8B,mFAA2B,CAAC,4FAAqC;AAC/F,yCAAyC,sFAA+B;AACxE;AACA,sMAAsM,kBAAkB,wBAAwB,aAAa,YAAY,cAAc,cAAc,kBAAkB,gBAAgB,iBAAiB,GAAG,mBAAmB,wBAAwB,aAAa,YAAY,cAAc,cAAc,kBAAkB,wCAAwC,eAAe,uBAAuB,mEAAmE,GAAG,gCAAgC,2BAA2B,GAAG,yBAAyB,QAAQ,eAAe,cAAc,gBAAgB,gBAAgB,oBAAoB,eAAe,gBAAgB,iBAAiB,KAAK,UAAU,eAAe,cAAc,gBAAgB,gBAAgB,oBAAoB,eAAe,gBAAgB,iBAAiB,KAAK,QAAQ,eAAe,cAAc,gBAAgB,gBAAgB,oBAAoB,eAAe,gBAAgB,iBAAiB,KAAK,UAAU,eAAe,gBAAgB,kBAAkB,mBAAmB,iBAAiB,KAAK,GAAG,6BAA6B,mEAAmE,cAAc,wBAAwB,mBAAmB,oBAAoB,0BAA0B,wFAAwF,oBAAoB,qBAAqB,qBAAqB,yBAAyB,oBAAoB,uBAAuB,0BAA0B,2CAA2C,sBAAsB,8BAA8B,+BAA+B,wBAAwB,GAAG,sCAAsC,4IAA4I,GAAG,sCAAsC,4IAA4I,GAAG,uBAAuB,8BAA8B,iBAAiB,qBAAqB,qBAAqB,oCAAoC,mBAAmB,uCAAuC,iBAAiB,sBAAsB,2BAA2B,qBAAqB,0BAA0B,6BAA6B,GAAG,+GAA+G,8CAA8C,oCAAoC,GAAG,UAAU,2BAA2B,GAAG,UAAU,4BAA4B,eAAe,eAAe,mBAAmB,kBAAkB,kCAAkC,iBAAiB,sBAAsB,yBAAyB,qCAAqC,GAAG,YAAY,kBAAkB,GAAG,aAAa,yBAAyB,GAAG,gBAAgB,sFAAsF,GAAG,WAAW,0EAA0E,+BAA+B,GAAG,YAAY,QAAQ,cAAc,kBAAkB,gBAAgB,kBAAkB,wBAAwB,4BAA4B,qBAAqB,yCAAyC,wBAAwB,GAAG,oBAAoB,kBAAkB,2BAA2B,4BAA4B,wBAAwB,iBAAiB,kBAAkB,wBAAwB,wBAAwB,yBAAyB,GAAG,qBAAqB,kBAAkB,iBAAiB,mBAAmB,uBAAuB,gBAAgB,GAAG,QAAQ,mBAAmB,uBAAuB,oBAAoB,cAAc,GAAG,OAAO,mBAAmB,oBAAoB,kBAAkB,oBAAoB,uBAAuB,GAAG,KAAK,qBAAqB,oBAAoB,mBAAmB,0BAA0B,yBAAyB,GAAG,WAAW,mBAAmB,GAAG,UAAU,wCAAwC,qBAAqB,oBAAoB,uBAAuB,eAAe,cAAc,oBAAoB,GAAG,gIAAgI,eAAe,GAAG,GAAG,eAAe,iBAAiB,wBAAwB,qBAAqB,oBAAoB,oBAAoB,yCAAyC,uBAAuB,iBAAiB,eAAe,oBAAoB,+BAA+B,yBAAyB,GAAG,uBAAuB,wBAAwB,GAAG,SAAS,sBAAsB,WAAW,iUAAiU,mKAAmK,uKAAuK,sKAAsK,4BAA4B,gLAAgL,gLAAgL,iLAAiL,kLAAkL,kLAAkL,oLAAoL,qLAAqL,sLAAsL,sLAAsL,8IAA8I,2CAA2C,2CAA2C,2CAA2C,2CAA2C,kDAAkD,4BAA4B,+dAA+d,kCAAkC,8BAA8B,kDAAkD,+BAA+B,sKAAsK,8OAA8O,6CAA6C,6CAA6C,6CAA6C,yNAAyN,wDAAwD,0DAA0D,0DAA0D,0ZAA0Z,wCAAwC,uCAAuC,mCAAmC,uDAAuD,oCAAoC,qCAAqC,sCAAsC,qIAAqI,0CAA0C,2CAA2C,8CAA8C,0CAA0C,qGAAqG,kDAAkD,kDAAkD,kDAAkD,kDAAkD,gLAAgL,8JAA8J,mCAAmC,oDAAoD,yJAAyJ,8DAA8D,iIAAiI,qGAAqG,iCAAiC,iCAAiC,wLAAwL,8BAA8B,2CAA2C,2CAA2C,2CAA2C,6MAA6M,mDAAmD,mDAAmD,mDAAmD,mDAAmD,oEAAoE,0CAA0C,0CAA0C,0CAA0C,yCAAyC,8CAA8C,4CAA4C,4CAA4C,4CAA4C,kGAAkG,2CAA2C,2CAA2C,2CAA2C,2CAA2C,yCAAyC,yCAAyC,yCAAyC,+CAA+C,6CAA6C,6CAA6C,6CAA6C,2CAA2C,yCAAyC,yCAAyC,yCAAyC,6DAA6D,qCAAqC,yCAAyC,sDAAsD,sDAAsD,sDAAsD,kEAAkE,gEAAgE,iEAAiE,mCAAmC,qEAAqE,yCAAyC,gCAAgC,6CAA6C,+DAA+D,sKAAsK,qKAAqK,sEAAsE,6DAA6D,yDAAyD,2kBAA2kB,0EAA0E,6DAA6D,wEAAwE,oFAAoF,oEAAoE,4FAA4F,mCAAmC,qDAAqD,iEAAiE,gDAAgD,uDAAuD,uEAAuE,4FAA4F,0DAA0D,yDAAyD,8CAA8C,4DAA4D,2DAA2D,gEAAgE,gFAAgF,qDAAqD,uDAAuD,uFAAuF,wCAAwC,0CAA0C,uCAAuC,0DAA0D,8CAA8C,8CAA8C,+CAA+C,4CAA4C,+CAA+C,8CAA8C,6CAA6C,4CAA4C,2CAA2C,6CAA6C,8CAA8C,2CAA2C,uCAAuC,6CAA6C,0CAA0C,yCAAyC,wCAAwC,yCAAyC,sCAAsC,mEAAmE,sEAAsE,2EAA2E,0CAA0C,qCAAqC,kEAAkE,4CAA4C,6FAA6F,yEAAyE,yHAAyH,mDAAmD,kDAAkD,kDAAkD,GAAG,sWAAsW,4CAA4C,wCAAwC,4CAA4C,GAAG,WAAW,oFAAoF,KAAK,KAAK,UAAU,YAAY,WAAW,UAAU,UAAU,UAAU,UAAU,UAAU,UAAU,KAAK,KAAK,YAAY,WAAW,UAAU,UAAU,UAAU,UAAU,YAAY,WAAW,YAAY,aAAa,MAAM,KAAK,YAAY,MAAM,KAAK,KAAK,UAAU,UAAU,UAAU,UAAU,UAAU,UAAU,UAAU,UAAU,KAAK,KAAK,UAAU,UAAU,UAAU,UAAU,UAAU,UAAU,UAAU,UAAU,KAAK,KAAK,UAAU,UAAU,UAAU,UAAU,UAAU,UAAU,UAAU,UAAU,KAAK,KAAK,UAAU,UAAU,UAAU,UAAU,UAAU,KAAK,OAAO,UAAU,KAAK,YAAY,WAAW,YAAY,WAAW,UAAU,YAAY,aAAa,WAAW,YAAY,aAAa,aAAa,WAAW,YAAY,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,OAAO,KAAK,YAAY,OAAO,KAAK,YAAY,OAAO,KAAK,YAAY,WAAW,YAAY,aAAa,aAAa,WAAW,YAAY,WAAW,YAAY,aAAa,aAAa,aAAa,aAAa,OAAO,YAAY,OAAO,YAAY,aAAa,MAAM,KAAK,YAAY,OAAO,KAAK,YAAY,WAAW,UAAU,YAAY,WAAW,YAAY,WAAW,YAAY,aAAa,aAAa,MAAM,KAAK,UAAU,KAAK,KAAK,YAAY,OAAO,KAAK,YAAY,MAAM,MAAM,aAAa,cAAc,OAAO,KAAK,KAAK,UAAU,UAAU,UAAU,UAAU,YAAY,aAAa,aAAa,aAAa,aAAa,OAAO,KAAK,UAAU,YAAY,aAAa,aAAa,WAAW,UAAU,YAAY,aAAa,aAAa,OAAO,KAAK,UAAU,UAAU,UAAU,YAAY,WAAW,MAAM,KAAK,UAAU,YAAY,WAAW,UAAU,MAAM,KAAK,UAAU,UAAU,UAAU,UAAU,YAAY,MAAM,KAAK,YAAY,WAAW,UAAU,YAAY,aAAa,MAAM,KAAK,UAAU,OAAO,KAAK,YAAY,aAAa,WAAW,YAAY,WAAW,UAAU,UAAU,OAAO,KAAK,UAAU,KAAK,KAAK,KAAK,UAAU,YAAY,aAAa,WAAW,UAAU,YAAY,aAAa,WAAW,UAAU,UAAU,YAAY,aAAa,OAAO,KAAK,YAAY,OAAO,KAAK,KAAK,MAAM,KAAK,UAAU,MAAM,YAAY,UAAU,KAAK,SAAS,KAAK,SAAS,KAAK,YAAY,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,QAAQ,QAAQ,MAAM,YAAY,aAAa,aAAa,aAAa,aAAa,aAAa,cAAc,aAAa,MAAM,YAAY,aAAa,yBAAyB,aAAa,cAAc,MAAM,QAAQ,QAAQ,MAAM,YAAY,aAAa,aAAa,aAAa,cAAc,QAAQ,MAAM,YAAY,aAAa,aAAa,cAAc,YAAY,MAAM,YAAY,aAAa,aAAa,yBAAyB,aAAa,aAAa,aAAa,cAAc,aAAa,cAAc,aAAa,aAAa,aAAa,cAAc,aAAa,aAAa,aAAa,aAAa,cAAc,cAAc,OAAO,QAAQ,SAAS,MAAM,YAAY,yBAAyB,yBAAyB,aAAa,cAAc,aAAa,cAAc,aAAa,aAAa,aAAa,cAAc,SAAS,MAAM,YAAY,aAAa,aAAa,aAAa,cAAc,SAAS,MAAM,YAAY,aAAa,aAAa,aAAa,cAAc,cAAc,aAAa,aAAa,aAAa,aAAa,cAAc,aAAa,aAAa,aAAa,cAAc,cAAc,aAAa,aAAa,aAAa,cAAc,aAAa,aAAa,aAAa,cAAc,aAAa,aAAa,aAAa,cAAc,aAAa,aAAa,aAAa,cAAc,cAAc,cAAc,aAAa,aAAa,cAAc,aAAa,aAAa,aAAa,aAAa,cAAc,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,MAAM,OAAO,aAAa,MAAM,OAAO,cAAc,cAAc,aAAa,aAAa,cAAc,UAAU,KAAK,OAAO,MAAM,aAAa,aAAa,aAAa,cAAc,cAAc,cAAc,cAAc,cAAc,cAAc,aAAa,aAAa,aAAa,aAAa,aAAa,cAAc,cAAc,cAAc,cAAc,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,cAAc,cAAc,aAAa,aAAa,cAAc,cAAc,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,cAAc,cAAc,cAAc,cAAc,cAAc,cAAc,aAAa,aAAa,aAAa,aAAa,aAAa,OAAO,KAAK,aAAa,aAAa,aAAa,aAAa,aAAa,aAAa,OAAO,OAAO,QAAQ,aAAa,UAAU,YAAY,aAAa,aAAa,sLAAsL,kBAAkB,wBAAwB,aAAa,YAAY,cAAc,cAAc,kBAAkB,gBAAgB,iBAAiB,GAAG,mBAAmB,wBAAwB,aAAa,YAAY,cAAc,cAAc,kBAAkB,wCAAwC,eAAe,uBAAuB,mEAAmE,GAAG,gCAAgC,2BAA2B,GAAG,yBAAyB,QAAQ,eAAe,cAAc,gBAAgB,gBAAgB,oBAAoB,eAAe,gBAAgB,iBAAiB,KAAK,UAAU,eAAe,cAAc,gBAAgB,gBAAgB,oBAAoB,eAAe,gBAAgB,iBAAiB,KAAK,QAAQ,eAAe,cAAc,gBAAgB,gBAAgB,oBAAoB,eAAe,gBAAgB,iBAAiB,KAAK,UAAU,eAAe,gBAAgB,kBAAkB,mBAAmB,iBAAiB,KAAK,GAAG,6BAA6B,mEAAmE,cAAc,wBAAwB,mBAAmB,oBAAoB,0BAA0B,wFAAwF,oBAAoB,qBAAqB,qBAAqB,yBAAyB,oBAAoB,uBAAuB,0BAA0B,2CAA2C,sBAAsB,8BAA8B,+BAA+B,wBAAwB,GAAG,sCAAsC,4IAA4I,GAAG,sCAAsC,4IAA4I,GAAG,uBAAuB,8BAA8B,iBAAiB,qBAAqB,qBAAqB,oCAAoC,mBAAmB,uCAAuC,iBAAiB,sBAAsB,2BAA2B,qBAAqB,0BAA0B,6BAA6B,GAAG,+GAA+G,8CAA8C,oCAAoC,GAAG,UAAU,2BAA2B,GAAG,UAAU,4BAA4B,eAAe,eAAe,mBAAmB,kBAAkB,kCAAkC,iBAAiB,sBAAsB,yBAAyB,qCAAqC,GAAG,YAAY,kBAAkB,GAAG,aAAa,yBAAyB,GAAG,gBAAgB,sFAAsF,GAAG,WAAW,2JAA2J,gCAAgC,gBAAgB,sBAAsB,qVAAqV,mBAAmB,gVAAgV,mBAAmB,kDAAkD,+BAA+B,GAAG,YAAY,QAAQ,cAAc,kBAAkB,gBAAgB,kBAAkB,wBAAwB,4BAA4B,qBAAqB,yCAAyC,wBAAwB,GAAG,oBAAoB,kBAAkB,2BAA2B,4BAA4B,wBAAwB,iBAAiB,kBAAkB,wBAAwB,wBAAwB,yBAAyB,GAAG,qBAAqB,kBAAkB,iBAAiB,mBAAmB,uBAAuB,gBAAgB,GAAG,QAAQ,mBAAmB,uBAAuB,oBAAoB,cAAc,GAAG,OAAO,mBAAmB,oBAAoB,kBAAkB,oBAAoB,uBAAuB,GAAG,KAAK,qBAAqB,oBAAoB,mBAAmB,0BAA0B,yBAAyB,GAAG,WAAW,mBAAmB,GAAG,UAAU,wCAAwC,qBAAqB,oBAAoB,uBAAuB,eAAe,cAAc,oBAAoB,GAAG,gIAAgI,eAAe,GAAG,GAAG,eAAe,iBAAiB,wBAAwB,qBAAqB,oBAAoB,oBAAoB,yCAAyC,uBAAuB,iBAAiB,eAAe,oBAAoB,+BAA+B,yBAAyB,GAAG,uBAAuB,wBAAwB,GAAG,SAAS,sBAAsB,WAAW,iUAAiU,mKAAmK,uKAAuK,sKAAsK,4BAA4B,gLAAgL,gLAAgL,iLAAiL,kLAAkL,kLAAkL,oLAAoL,qLAAqL,sLAAsL,sLAAsL,8IAA8I,2CAA2C,2CAA2C,2CAA2C,2CAA2C,kDAAkD,4BAA4B,+dAA+d,kCAAkC,8BAA8B,kDAAkD,+BAA+B,sKAAsK,8OAA8O,6CAA6C,6CAA6C,6CAA6C,yNAAyN,wDAAwD,0DAA0D,0DAA0D,0ZAA0Z,wCAAwC,uCAAuC,mCAAmC,uDAAuD,oCAAoC,qCAAqC,sCAAsC,qIAAqI,0CAA0C,2CAA2C,8CAA8C,0CAA0C,qGAAqG,kDAAkD,kDAAkD,kDAAkD,kDAAkD,gLAAgL,8JAA8J,mCAAmC,oDAAoD,yJAAyJ,8DAA8D,iIAAiI,qGAAqG,iCAAiC,iCAAiC,wLAAwL,8BAA8B,2CAA2C,2CAA2C,2CAA2C,6MAA6M,mDAAmD,mDAAmD,mDAAmD,mDAAmD,oEAAoE,0CAA0C,0CAA0C,0CAA0C,yCAAyC,8CAA8C,4CAA4C,4CAA4C,4CAA4C,kGAAkG,2CAA2C,2CAA2C,2CAA2C,2CAA2C,yCAAyC,yCAAyC,yCAAyC,+CAA+C,6CAA6C,6CAA6C,6CAA6C,2CAA2C,yCAAyC,yCAAyC,yCAAyC,6DAA6D,qCAAqC,yCAAyC,sDAAsD,sDAAsD,sDAAsD,kEAAkE,gEAAgE,iEAAiE,mCAAmC,qEAAqE,yCAAyC,gCAAgC,6CAA6C,+DAA+D,sKAAsK,qKAAqK,sEAAsE,6DAA6D,yDAAyD,2kBAA2kB,0EAA0E,6DAA6D,wEAAwE,oFAAoF,oEAAoE,4FAA4F,mCAAmC,qDAAqD,iEAAiE,gDAAgD,uDAAuD,uEAAuE,4FAA4F,0DAA0D,yDAAyD,8CAA8C,4DAA4D,2DAA2D,gEAAgE,gFAAgF,qDAAqD,uDAAuD,uFAAuF,wCAAwC,0CAA0C,uCAAuC,0DAA0D,8CAA8C,8CAA8C,+CAA+C,4CAA4C,+CAA+C,8CAA8C,6CAA6C,4CAA4C,2CAA2C,6CAA6C,8CAA8C,2CAA2C,uCAAuC,6CAA6C,0CAA0C,yCAAyC,wCAAwC,yCAAyC,sCAAsC,mEAAmE,sEAAsE,2EAA2E,0CAA0C,qCAAqC,kEAAkE,4CAA4C,6FAA6F,yEAAyE,yHAAyH,mDAAmD,kDAAkD,kDAAkD,GAAG,sWAAsW,4CAA4C,wCAAwC,4CAA4C,GAAG,uBAAuB;AACp/2C;AACA,iEAAe,uBAAuB,EAAC;;;;;;;;;;;;;;;;;;ACVkD;AACzF,YAAsF;;AAEtF;;AAEA;AACA;;AAEA,aAAa,0GAAG,CAAC,qFAAO;;;;AAIxB,iEAAe,4FAAc,MAAM;;;;;;;;;;;;ACZf","sources":["webpack://neural_compressor_ext_lab/./style/base.css","webpack://neural_compressor_ext_lab/./style/base.css?1944","webpack://neural_compressor_ext_lab/./style/index.js"],"sourcesContent":["// Imports\nimport ___CSS_LOADER_API_SOURCEMAP_IMPORT___ from \"../node_modules/css-loader/dist/runtime/sourceMaps.js\";\nimport ___CSS_LOADER_API_IMPORT___ from \"../node_modules/css-loader/dist/runtime/api.js\";\nimport ___CSS_LOADER_GET_URL_IMPORT___ from \"../node_modules/css-loader/dist/runtime/getUrl.js\";\nvar ___CSS_LOADER_URL_IMPORT_0___ = new URL(\"data:image/svg+xml, \", import.meta.url);\nvar ___CSS_LOADER_EXPORT___ = ___CSS_LOADER_API_IMPORT___(___CSS_LOADER_API_SOURCEMAP_IMPORT___);\nvar ___CSS_LOADER_URL_REPLACEMENT_0___ = ___CSS_LOADER_GET_URL_IMPORT___(___CSS_LOADER_URL_IMPORT_0___);\n// Module\n___CSS_LOADER_EXPORT___.push([module.id, \"/*\\n See the JupyterLab Developer Guide for useful CSS Patterns:\\n\\n https://jupyterlab.readthedocs.io/en/stable/developer/css.html\\n*/\\n.lds-ripple {\\n display: flex;\\n position: absolute; \\n left: 0; \\n top: 0; \\n right: 0; \\n bottom: 0;\\n margin: auto; \\n width: 80px;\\n height: 80px;\\n}\\n.lds-ripple div {\\n position: absolute; \\n left: 0; \\n top: 0; \\n right: 0; \\n bottom: 0;\\n margin: auto; \\n border: 4px solid rgb(245, 131, 55);\\n opacity: 1;\\n border-radius: 50%;\\n animation: lds-ripple 1s cubic-bezier(0, 0.2, 0.8, 1) infinite;\\n}\\n.lds-ripple div:nth-child(2) {\\n animation-delay: -0.5s;\\n}\\n@keyframes lds-ripple {\\n 0% {\\n left: 0; \\n top: 0; \\n right: 0; \\n bottom: 0;\\n margin: auto; \\n width: 0;\\n height: 0;\\n opacity: 0;\\n }\\n 4.9% {\\n left: 0; \\n top: 0; \\n right: 0; \\n bottom: 0;\\n margin: auto; \\n width: 0;\\n height: 0;\\n opacity: 0;\\n }\\n 5% {\\n left: 0; \\n top: 0; \\n right: 0; \\n bottom: 0;\\n margin: auto; \\n width: 0;\\n height: 0;\\n opacity: 1;\\n }\\n 100% {\\n top: 0px;\\n left: 0px;\\n width: 72px;\\n height: 72px;\\n opacity: 0;\\n }\\n}\\n\\n\\n/* CSS */\\n.button-62 {\\n background: linear-gradient(to bottom right, #EF4765, #FF9A5A);\\n border: 0;\\n border-radius: 12px;\\n color: #FFFFFF;\\n cursor: pointer;\\n display: inline-block;\\n font-family: -apple-system,system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif;\\n font-size: 16px;\\n font-weight: 500;\\n line-height: 2.5;\\n outline: transparent;\\n padding: 0 1rem;\\n text-align: center;\\n text-decoration: none;\\n transition: box-shadow .2s ease-in-out;\\n user-select: none;\\n -webkit-user-select: none;\\n touch-action: manipulation;\\n white-space: nowrap;\\n}\\n\\n.button-62:not([disabled]):focus {\\n box-shadow: 0 0 .25rem rgba(0, 0, 0, 0.5), -.125rem -.125rem 1rem rgba(239, 71, 101, 0.5), .125rem .125rem 1rem rgba(255, 154, 90, 0.5);\\n}\\n\\n.button-62:not([disabled]):hover {\\n box-shadow: 0 0 .25rem rgba(0, 0, 0, 0.5), -.125rem -.125rem 1rem rgba(239, 71, 101, 0.5), .125rem .125rem 1rem rgba(255, 154, 90, 0.5);\\n}\\n\\n.aselector select {\\n background-color: initial;\\n border: none;\\n border-radius: 0;\\n box-shadow: none;\\n color: var(--jp-ui-font-color0);\\n display: block;\\n font-size: var(--jp-ui-font-size1);\\n height: 24px;\\n line-height: 14px;\\n padding: 0 25px 0 10px;\\n text-align: left;\\n -moz-appearance: none;\\n -webkit-appearance: none;\\n}\\n\\n/* Use our own theme for hover and option styles */\\n.aselector select:hover,\\n.aselector select > option {\\n background-color: var(--jp-layout-color2);\\n color: var(--jp-ui-font-color0);\\n}\\nselect {\\n box-sizing: border-box;\\n}\\n\\n.font{\\nbackground-color: initial;\\nborder: none;\\nheight: 21px;\\nborder-radius: 0;\\nfont-weight:500;\\ncolor: var(--jp-ui-font-color0);\\ndisplay: block;\\nline-height: 22.5px;\\npadding: 0 25px 0 10px;\\nfont-size: var(--jp-ui-font-size1);\\n}\\n.wrapper {\\n display: flex;\\n}\\n.f1ozlkqi {\\n pointer-events: none;\\n}\\n\\n.palybutton{\\n background-image: '/home2/longxin/Neural_Coder_EXT/style/icons8-circled-play.gif';\\n}\\n.loading{\\n \\n background-image: url(\" + ___CSS_LOADER_URL_REPLACEMENT_0___ + \");\\n background-size: contain; \\n}\\n\\n.dialog{\\nbody {\\n margin: 0;\\n height: 100vh;\\n width:600px;\\n display: flex;\\n align-items: center;\\n justify-content: center;\\n overflow: hidden;\\n font-family: \\\"Poppins\\\", sans-serif;\\n background: #e3d0b6;\\n}\\n\\n#cookie-policy {\\n display: flex;\\n flex-direction: column;\\n justify-content: center;\\n align-items: center;\\n width: 460px;\\n height: 600px;\\n background: #f3efe6;\\n border-radius: 12px;\\n transform: scale(.8);\\n}\\n\\n#cookie-wrapper {\\n height: 240px;\\n width: 240px;\\n margin: 30px 0;\\n position: relative;\\n left: -40px;\\n}\\n\\nh1 {\\n color: #6c3a1f;\\n text-align: center;\\n font-size: 36px;\\n margin: 0;\\n}\\n\\np {\\n color: #a28561;\\n font-size: 14px;\\n margin-top: 0;\\n padding: 0 60px;\\n text-align: center;\\n}\\na {\\n margin-top: 18px;\\n font-size: 14px;\\n color: #a28561;\\n text-decoration: none;\\n pointer-events: none;\\n}\\na:hover {\\n color: #846b4d;\\n}\\n\\nspan {\\n font-family: \\\"Amatic SC\\\", cursive;\\n font-weight: 400;\\n font-size: 20px;\\n position: relative;\\n top: -18px;\\n left: 3px;\\n color: #a28561; \\n}\\n\\n#heart-no, #thought-heart-yes, #mouth, #face-no, #thought-1, #thought-2, #thought-heart-na, #q-mark, #eyes, #leg-l, #leg-r {\\n opacity: 0;\\n}\\n}\\n.dia_button {\\n color: white;\\n background: #dd794a;\\n margin-top: 12px;\\n cursor: pointer;\\n font-size: 24px;\\n font-family: \\\"Poppins\\\", sans-serif;\\n border-radius: 9px;\\n border: none;\\n width: 72%;\\n padding: 12px 0;\\n transition: 150ms ease-out;\\n pointer-events: none;\\n}\\n\\n.dia_button:hover {\\n background: #d66029;\\n}\\n\\n.pad{\\n padding-left:6%\\n}\\n\\n:root {\\n /* Elevation\\n *\\n * We style box-shadows using Material Design's idea of elevation. These particular numbers are taken from here:\\n *\\n * https://github.com/material-components/material-components-web\\n * https://material-components-web.appspot.com/elevation.html\\n */\\n\\n --jp-shadow-base-lightness: 0;\\n --jp-shadow-umbra-color: rgba(\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n 0.2\\n );\\n --jp-shadow-penumbra-color: rgba(\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n 0.14\\n );\\n --jp-shadow-ambient-color: rgba(\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n 0.12\\n );\\n --jp-elevation-z0: none;\\n --jp-elevation-z1: 0px 2px 1px -1px var(--jp-shadow-umbra-color),\\n 0px 1px 1px 0px var(--jp-shadow-penumbra-color),\\n 0px 1px 3px 0px var(--jp-shadow-ambient-color);\\n --jp-elevation-z2: 0px 3px 1px -2px var(--jp-shadow-umbra-color),\\n 0px 2px 2px 0px var(--jp-shadow-penumbra-color),\\n 0px 1px 5px 0px var(--jp-shadow-ambient-color);\\n --jp-elevation-z4: 0px 2px 4px -1px var(--jp-shadow-umbra-color),\\n 0px 4px 5px 0px var(--jp-shadow-penumbra-color),\\n 0px 1px 10px 0px var(--jp-shadow-ambient-color);\\n --jp-elevation-z6: 0px 3px 5px -1px var(--jp-shadow-umbra-color),\\n 0px 6px 10px 0px var(--jp-shadow-penumbra-color),\\n 0px 1px 18px 0px var(--jp-shadow-ambient-color);\\n --jp-elevation-z8: 0px 5px 5px -3px var(--jp-shadow-umbra-color),\\n 0px 8px 10px 1px var(--jp-shadow-penumbra-color),\\n 0px 3px 14px 2px var(--jp-shadow-ambient-color);\\n --jp-elevation-z12: 0px 7px 8px -4px var(--jp-shadow-umbra-color),\\n 0px 12px 17px 2px var(--jp-shadow-penumbra-color),\\n 0px 5px 22px 4px var(--jp-shadow-ambient-color);\\n --jp-elevation-z16: 0px 8px 10px -5px var(--jp-shadow-umbra-color),\\n 0px 16px 24px 2px var(--jp-shadow-penumbra-color),\\n 0px 6px 30px 5px var(--jp-shadow-ambient-color);\\n --jp-elevation-z20: 0px 10px 13px -6px var(--jp-shadow-umbra-color),\\n 0px 20px 31px 3px var(--jp-shadow-penumbra-color),\\n 0px 8px 38px 7px var(--jp-shadow-ambient-color);\\n --jp-elevation-z24: 0px 11px 15px -7px var(--jp-shadow-umbra-color),\\n 0px 24px 38px 3px var(--jp-shadow-penumbra-color),\\n 0px 9px 46px 8px var(--jp-shadow-ambient-color);\\n\\n /* Borders\\n *\\n * The following variables, specify the visual styling of borders in JupyterLab.\\n */\\n\\n --jp-border-width: 1px;\\n --jp-border-color0: var(--md-grey-400);\\n --jp-border-color1: var(--md-grey-400);\\n --jp-border-color2: var(--md-grey-300);\\n --jp-border-color3: var(--md-grey-200);\\n --jp-inverse-border-color: var(--md-grey-600);\\n --jp-border-radius: 2px;\\n\\n /* UI Fonts\\n *\\n * The UI font CSS variables are used for the typography all of the JupyterLab\\n * user interface elements that are not directly user generated content.\\n *\\n * The font sizing here is done assuming that the body font size of --jp-ui-font-size1\\n * is applied to a parent element. When children elements, such as headings, are sized\\n * in em all things will be computed relative to that body size.\\n */\\n\\n --jp-ui-font-scale-factor: 1.2;\\n --jp-ui-font-size0: 0.83333em;\\n --jp-ui-font-size1: 13px; /* Base font size */\\n --jp-ui-font-size2: 1.2em;\\n --jp-ui-font-size3: 1.44em;\\n\\n --jp-ui-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica,\\n Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';\\n\\n /*\\n * Use these font colors against the corresponding main layout colors.\\n * In a light theme, these go from dark to light.\\n */\\n\\n /* Defaults use Material Design specification */\\n --jp-ui-font-color0: rgba(0, 0, 0, 1);\\n --jp-ui-font-color1: rgba(0, 0, 0, 0.87);\\n --jp-ui-font-color2: rgba(0, 0, 0, 0.54);\\n --jp-ui-font-color3: rgba(0, 0, 0, 0.38);\\n\\n /*\\n * Use these against the brand/accent/warn/error colors.\\n * These will typically go from light to darker, in both a dark and light theme.\\n */\\n\\n --jp-ui-inverse-font-color0: rgba(255, 255, 255, 1);\\n --jp-ui-inverse-font-color1: rgba(255, 255, 255, 1);\\n --jp-ui-inverse-font-color2: rgba(255, 255, 255, 0.7);\\n --jp-ui-inverse-font-color3: rgba(255, 255, 255, 0.5);\\n\\n /* Content Fonts\\n *\\n * Content font variables are used for typography of user generated content.\\n *\\n * The font sizing here is done assuming that the body font size of --jp-content-font-size1\\n * is applied to a parent element. When children elements, such as headings, are sized\\n * in em all things will be computed relative to that body size.\\n */\\n\\n --jp-content-line-height: 1.6;\\n --jp-content-font-scale-factor: 1.2;\\n --jp-content-font-size0: 0.83333em;\\n --jp-content-font-size1: 14px; /* Base font size */\\n --jp-content-font-size2: 1.2em;\\n --jp-content-font-size3: 1.44em;\\n --jp-content-font-size4: 1.728em;\\n --jp-content-font-size5: 2.0736em;\\n\\n /* This gives a magnification of about 125% in presentation mode over normal. */\\n --jp-content-presentation-font-size1: 17px;\\n\\n --jp-content-heading-line-height: 1;\\n --jp-content-heading-margin-top: 1.2em;\\n --jp-content-heading-margin-bottom: 0.8em;\\n --jp-content-heading-font-weight: 500;\\n\\n /* Defaults use Material Design specification */\\n --jp-content-font-color0: rgba(0, 0, 0, 1);\\n --jp-content-font-color1: rgba(0, 0, 0, 0.87);\\n --jp-content-font-color2: rgba(0, 0, 0, 0.54);\\n --jp-content-font-color3: rgba(0, 0, 0, 0.38);\\n\\n --jp-content-link-color: var(--md-blue-700);\\n\\n --jp-content-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI',\\n Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji',\\n 'Segoe UI Symbol';\\n\\n /*\\n * Code Fonts\\n *\\n * Code font variables are used for typography of code and other monospaces content.\\n */\\n\\n --jp-code-font-size: 13px;\\n --jp-code-line-height: 1.3077; /* 17px for 13px base */\\n --jp-code-padding: 5px; /* 5px for 13px base, codemirror highlighting needs integer px value */\\n --jp-code-font-family-default: Menlo, Consolas, 'DejaVu Sans Mono', monospace;\\n --jp-code-font-family: var(--jp-code-font-family-default);\\n\\n /* This gives a magnification of about 125% in presentation mode over normal. */\\n --jp-code-presentation-font-size: 16px;\\n\\n /* may need to tweak cursor width if you change font size */\\n --jp-code-cursor-width0: 1.4px;\\n --jp-code-cursor-width1: 2px;\\n --jp-code-cursor-width2: 4px;\\n\\n /* Layout\\n *\\n * The following are the main layout colors use in JupyterLab. In a light\\n * theme these would go from light to dark.\\n */\\n\\n --jp-layout-color0: white;\\n --jp-layout-color1: white;\\n --jp-layout-color2: var(--md-grey-200);\\n --jp-layout-color3: var(--md-grey-400);\\n --jp-layout-color4: var(--md-grey-600);\\n\\n /* Inverse Layout\\n *\\n * The following are the inverse layout colors use in JupyterLab. In a light\\n * theme these would go from dark to light.\\n */\\n\\n --jp-inverse-layout-color0: #111111;\\n --jp-inverse-layout-color1: var(--md-grey-900);\\n --jp-inverse-layout-color2: var(--md-grey-800);\\n --jp-inverse-layout-color3: var(--md-grey-700);\\n --jp-inverse-layout-color4: var(--md-grey-600);\\n\\n /* Brand/accent */\\n\\n --jp-brand-color0: var(--md-blue-900);\\n --jp-brand-color1: var(--md-blue-700);\\n --jp-brand-color2: var(--md-blue-300);\\n --jp-brand-color3: var(--md-blue-100);\\n --jp-brand-color4: var(--md-blue-50);\\n\\n --jp-accent-color0: var(--md-green-900);\\n --jp-accent-color1: var(--md-green-700);\\n --jp-accent-color2: var(--md-green-300);\\n --jp-accent-color3: var(--md-green-100);\\n\\n /* State colors (warn, error, success, info) */\\n\\n --jp-warn-color0: var(--md-orange-900);\\n --jp-warn-color1: var(--md-orange-700);\\n --jp-warn-color2: var(--md-orange-300);\\n --jp-warn-color3: var(--md-orange-100);\\n\\n --jp-error-color0: var(--md-red-900);\\n --jp-error-color1: var(--md-red-700);\\n --jp-error-color2: var(--md-red-300);\\n --jp-error-color3: var(--md-red-100);\\n\\n --jp-success-color0: var(--md-green-900);\\n --jp-success-color1: var(--md-green-700);\\n --jp-success-color2: var(--md-green-300);\\n --jp-success-color3: var(--md-green-100);\\n\\n --jp-info-color0: var(--md-cyan-900);\\n --jp-info-color1: var(--md-cyan-700);\\n --jp-info-color2: var(--md-cyan-300);\\n --jp-info-color3: var(--md-cyan-100);\\n\\n /* Cell specific styles */\\n\\n --jp-cell-padding: 5px;\\n\\n --jp-cell-collapser-width: 8px;\\n --jp-cell-collapser-min-height: 20px;\\n --jp-cell-collapser-not-active-hover-opacity: 0.6;\\n\\n --jp-cell-editor-background: var(--md-grey-100);\\n --jp-cell-editor-border-color: var(--md-grey-300);\\n --jp-cell-editor-box-shadow: inset 0 0 2px var(--md-blue-300);\\n --jp-cell-editor-active-background: var(--jp-layout-color0);\\n --jp-cell-editor-active-border-color: var(--jp-brand-color1);\\n\\n --jp-cell-prompt-width: 64px;\\n --jp-cell-prompt-font-family: var(--jp-code-font-family-default);\\n --jp-cell-prompt-letter-spacing: 0px;\\n --jp-cell-prompt-opacity: 1;\\n --jp-cell-prompt-not-active-opacity: 0.5;\\n --jp-cell-prompt-not-active-font-color: var(--md-grey-700);\\n /* A custom blend of MD grey and blue 600\\n * See https://meyerweb.com/eric/tools/color-blend/#546E7A:1E88E5:5:hex */\\n --jp-cell-inprompt-font-color: #307fc1;\\n /* A custom blend of MD grey and orange 600\\n * https://meyerweb.com/eric/tools/color-blend/#546E7A:F4511E:5:hex */\\n --jp-cell-outprompt-font-color: #bf5b3d;\\n\\n /* Notebook specific styles */\\n\\n --jp-notebook-padding: 10px;\\n --jp-notebook-select-background: var(--jp-layout-color1);\\n --jp-notebook-multiselected-color: var(--md-blue-50);\\n\\n /* The scroll padding is calculated to fill enough space at the bottom of the\\n notebook to show one single-line cell (with appropriate padding) at the top\\n when the notebook is scrolled all the way to the bottom. We also subtract one\\n pixel so that no scrollbar appears if we have just one single-line cell in the\\n notebook. This padding is to enable a 'scroll past end' feature in a notebook.\\n */\\n --jp-notebook-scroll-padding: calc(\\n 100% - var(--jp-code-font-size) * var(--jp-code-line-height) -\\n var(--jp-code-padding) - var(--jp-cell-padding) - 1px\\n );\\n\\n /* Rendermime styles */\\n\\n --jp-rendermime-error-background: #fdd;\\n --jp-rendermime-table-row-background: var(--md-grey-100);\\n --jp-rendermime-table-row-hover-background: var(--md-light-blue-50);\\n\\n /* Dialog specific styles */\\n\\n --jp-dialog-background: rgba(0, 0, 0, 0.25);\\n\\n /* Console specific styles */\\n\\n --jp-console-padding: 10px;\\n\\n /* Toolbar specific styles */\\n\\n --jp-toolbar-border-color: var(--jp-border-color1);\\n --jp-toolbar-micro-height: 8px;\\n --jp-toolbar-background: var(--jp-layout-color1);\\n --jp-toolbar-box-shadow: 0px 0px 2px 0px rgba(0, 0, 0, 0.24);\\n --jp-toolbar-header-margin: 4px 4px 0px 4px;\\n --jp-toolbar-active-background: var(--md-grey-300);\\n\\n /* Statusbar specific styles */\\n\\n --jp-statusbar-height: 24px;\\n\\n /* Input field styles */\\n\\n --jp-input-box-shadow: inset 0 0 2px var(--md-blue-300);\\n --jp-input-active-background: var(--jp-layout-color1);\\n --jp-input-hover-background: var(--jp-layout-color1);\\n --jp-input-background: var(--md-grey-100);\\n --jp-input-border-color: var(--jp-inverse-border-color);\\n --jp-input-active-border-color: var(--jp-brand-color1);\\n --jp-input-active-box-shadow-color: rgba(19, 124, 189, 0.3);\\n\\n /* General editor styles */\\n\\n --jp-editor-selected-background: #d9d9d9;\\n --jp-editor-selected-focused-background: #d7d4f0;\\n --jp-editor-cursor-color: var(--jp-ui-font-color0);\\n\\n /* Code mirror specific styles */\\n\\n --jp-mirror-editor-keyword-color: #008000;\\n --jp-mirror-editor-atom-color: #88f;\\n --jp-mirror-editor-number-color: #080;\\n --jp-mirror-editor-def-color: #00f;\\n --jp-mirror-editor-variable-color: var(--md-grey-900);\\n --jp-mirror-editor-variable-2-color: #05a;\\n --jp-mirror-editor-variable-3-color: #085;\\n --jp-mirror-editor-punctuation-color: #05a;\\n --jp-mirror-editor-property-color: #05a;\\n --jp-mirror-editor-operator-color: #aa22ff;\\n --jp-mirror-editor-comment-color: #408080;\\n --jp-mirror-editor-string-color: #ba2121;\\n --jp-mirror-editor-string-2-color: #708;\\n --jp-mirror-editor-meta-color: #aa22ff;\\n --jp-mirror-editor-qualifier-color: #555;\\n --jp-mirror-editor-builtin-color: #008000;\\n --jp-mirror-editor-bracket-color: #997;\\n --jp-mirror-editor-tag-color: #170;\\n --jp-mirror-editor-attribute-color: #00c;\\n --jp-mirror-editor-header-color: blue;\\n --jp-mirror-editor-quote-color: #090;\\n --jp-mirror-editor-link-color: #00c;\\n --jp-mirror-editor-error-color: #f00;\\n --jp-mirror-editor-hr-color: #999;\\n\\n /* Vega extension styles */\\n\\n --jp-vega-background: white;\\n\\n /* Sidebar-related styles */\\n\\n --jp-sidebar-min-width: 250px;\\n\\n /* Search-related styles */\\n\\n --jp-search-toggle-off-opacity: 0.5;\\n --jp-search-toggle-hover-opacity: 0.8;\\n --jp-search-toggle-on-opacity: 1;\\n --jp-search-selected-match-background-color: rgb(245, 200, 0);\\n --jp-search-selected-match-color: black;\\n --jp-search-unselected-match-background-color: var(\\n --jp-inverse-layout-color0\\n );\\n --jp-search-unselected-match-color: var(--jp-ui-inverse-font-color0);\\n\\n /* Icon colors that work well with light or dark backgrounds */\\n --jp-icon-contrast-color0: var(--md-purple-600);\\n --jp-icon-contrast-color1: var(--md-green-600);\\n --jp-icon-contrast-color2: var(--md-pink-600);\\n --jp-icon-contrast-color3: var(--md-blue-600);\\n}\\n\\n/*-----------------------------------------------------------------------------\\n| Copyright (c) Jupyter Development Team.\\n| Distributed under the terms of the Modified BSD License.\\n|----------------------------------------------------------------------------*/\\n\\n/* Set the default typography for monospace elements */\\ntt,\\ncode,\\nkbd,\\nsamp,\\npre {\\n font-family: var(--jp-code-font-family);\\n font-size: var(--jp-code-font-size);\\n line-height: var(--jp-code-line-height);\\n}\\n\\n\", \"\",{\"version\":3,\"sources\":[\"webpack://./style/base.css\"],\"names\":[],\"mappings\":\"AAAA;;;;CAIC;AACD;EACE,aAAa;EACb,kBAAkB;EAClB,OAAO;EACP,MAAM;EACN,QAAQ;EACR,SAAS;EACT,YAAY;EACZ,WAAW;EACX,YAAY;AACd;AACA;EACE,kBAAkB;EAClB,OAAO;EACP,MAAM;EACN,QAAQ;EACR,SAAS;EACT,YAAY;EACZ,mCAAmC;EACnC,UAAU;EACV,kBAAkB;EAClB,8DAA8D;AAChE;AACA;EACE,sBAAsB;AACxB;AACA;EACE;IACE,OAAO;IACP,MAAM;IACN,QAAQ;IACR,SAAS;IACT,YAAY;IACZ,QAAQ;IACR,SAAS;IACT,UAAU;EACZ;EACA;IACE,OAAO;IACP,MAAM;IACN,QAAQ;IACR,SAAS;IACT,YAAY;IACZ,QAAQ;IACR,SAAS;IACT,UAAU;EACZ;EACA;IACE,OAAO;IACP,MAAM;IACN,QAAQ;IACR,SAAS;IACT,YAAY;IACZ,QAAQ;IACR,SAAS;IACT,UAAU;EACZ;EACA;IACE,QAAQ;IACR,SAAS;IACT,WAAW;IACX,YAAY;IACZ,UAAU;EACZ;AACF;;;AAGA,QAAQ;AACR;EACE,8DAA8D;EAC9D,SAAS;EACT,mBAAmB;EACnB,cAAc;EACd,eAAe;EACf,qBAAqB;EACrB,iFAAiF;EACjF,eAAe;EACf,gBAAgB;EAChB,gBAAgB;EAChB,oBAAoB;EACpB,eAAe;EACf,kBAAkB;EAClB,qBAAqB;EACrB,sCAAsC;EACtC,iBAAiB;EACjB,yBAAyB;EACzB,0BAA0B;EAC1B,mBAAmB;AACrB;;AAEA;EACE,uIAAuI;AACzI;;AAEA;EACE,uIAAuI;AACzI;;AAEA;EACE,yBAAyB;EACzB,YAAY;EACZ,gBAAgB;EAChB,gBAAgB;EAChB,+BAA+B;EAC/B,cAAc;EACd,kCAAkC;EAClC,YAAY;EACZ,iBAAiB;EACjB,sBAAsB;EACtB,gBAAgB;EAChB,qBAAqB;EACrB,wBAAwB;AAC1B;;AAEA,kDAAkD;AAClD;;EAEE,yCAAyC;EACzC,+BAA+B;AACjC;AACA;EACE,sBAAsB;AACxB;;AAEA;AACA,yBAAyB;AACzB,YAAY;AACZ,YAAY;AACZ,gBAAgB;AAChB,eAAe;AACf,+BAA+B;AAC/B,cAAc;AACd,mBAAmB;AACnB,sBAAsB;AACtB,kCAAkC;AAClC;AACA;EACE,aAAa;AACf;AACA;EACE,oBAAoB;AACtB;;AAEA;EACE,iFAAiF;AACnF;AACA;;EAEE,yDAAm9B;EACn9B,wBAAwB;AAC1B;;AAEA;AACA;EACE,SAAS;EACT,aAAa;EACb,WAAW;EACX,aAAa;EACb,mBAAmB;EACnB,uBAAuB;EACvB,gBAAgB;EAChB,kCAAkC;EAClC,mBAAmB;AACrB;;AAEA;EACE,aAAa;EACb,sBAAsB;EACtB,uBAAuB;EACvB,mBAAmB;EACnB,YAAY;EACZ,aAAa;EACb,mBAAmB;EACnB,mBAAmB;EACnB,oBAAoB;AACtB;;AAEA;EACE,aAAa;EACb,YAAY;EACZ,cAAc;EACd,kBAAkB;EAClB,WAAW;AACb;;AAEA;EACE,cAAc;EACd,kBAAkB;EAClB,eAAe;EACf,SAAS;AACX;;AAEA;EACE,cAAc;EACd,eAAe;EACf,aAAa;EACb,eAAe;EACf,kBAAkB;AACpB;AACA;EACE,gBAAgB;EAChB,eAAe;EACf,cAAc;EACd,qBAAqB;EACrB,oBAAoB;AACtB;AACA;EACE,cAAc;AAChB;;AAEA;EACE,iCAAiC;EACjC,gBAAgB;EAChB,eAAe;EACf,kBAAkB;EAClB,UAAU;EACV,SAAS;EACT,cAAc;AAChB;;AAEA;EACE,UAAU;AACZ;AACA;AACA;EACE,YAAY;EACZ,mBAAmB;EACnB,gBAAgB;EAChB,eAAe;EACf,eAAe;EACf,kCAAkC;EAClC,kBAAkB;EAClB,YAAY;EACZ,UAAU;EACV,eAAe;EACf,0BAA0B;EAC1B,oBAAoB;AACtB;;AAEA;EACE,mBAAmB;AACrB;;AAEA;EACE;AACF;;AAEA;EACE;;;;;;IAME;;EAEF,6BAA6B;EAC7B;;;;;GAKC;EACD;;;;;GAKC;EACD;;;;;GAKC;EACD,uBAAuB;EACvB;;kDAEgD;EAChD;;kDAEgD;EAChD;;mDAEiD;EACjD;;mDAEiD;EACjD;;mDAEiD;EACjD;;mDAEiD;EACjD;;mDAEiD;EACjD;;mDAEiD;EACjD;;mDAEiD;;EAEjD;;;IAGE;;EAEF,sBAAsB;EACtB,sCAAsC;EACtC,sCAAsC;EACtC,sCAAsC;EACtC,sCAAsC;EACtC,6CAA6C;EAC7C,uBAAuB;;EAEvB;;;;;;;;IAQE;;EAEF,8BAA8B;EAC9B,6BAA6B;EAC7B,wBAAwB,EAAE,mBAAmB;EAC7C,yBAAyB;EACzB,0BAA0B;;EAE1B;+EAC6E;;EAE7E;;;IAGE;;EAEF,+CAA+C;EAC/C,qCAAqC;EACrC,wCAAwC;EACxC,wCAAwC;EACxC,wCAAwC;;EAExC;;;IAGE;;EAEF,mDAAmD;EACnD,mDAAmD;EACnD,qDAAqD;EACrD,qDAAqD;;EAErD;;;;;;;IAOE;;EAEF,6BAA6B;EAC7B,mCAAmC;EACnC,kCAAkC;EAClC,6BAA6B,EAAE,mBAAmB;EAClD,8BAA8B;EAC9B,+BAA+B;EAC/B,gCAAgC;EAChC,iCAAiC;;EAEjC,+EAA+E;EAC/E,0CAA0C;;EAE1C,mCAAmC;EACnC,sCAAsC;EACtC,yCAAyC;EACzC,qCAAqC;;EAErC,+CAA+C;EAC/C,0CAA0C;EAC1C,6CAA6C;EAC7C,6CAA6C;EAC7C,6CAA6C;;EAE7C,2CAA2C;;EAE3C;;qBAEmB;;EAEnB;;;;IAIE;;EAEF,yBAAyB;EACzB,6BAA6B,EAAE,uBAAuB;EACtD,sBAAsB,EAAE,sEAAsE;EAC9F,6EAA6E;EAC7E,yDAAyD;;EAEzD,+EAA+E;EAC/E,sCAAsC;;EAEtC,2DAA2D;EAC3D,8BAA8B;EAC9B,4BAA4B;EAC5B,4BAA4B;;EAE5B;;;;IAIE;;EAEF,yBAAyB;EACzB,yBAAyB;EACzB,sCAAsC;EACtC,sCAAsC;EACtC,sCAAsC;;EAEtC;;;;IAIE;;EAEF,mCAAmC;EACnC,8CAA8C;EAC9C,8CAA8C;EAC9C,8CAA8C;EAC9C,8CAA8C;;EAE9C,iBAAiB;;EAEjB,qCAAqC;EACrC,qCAAqC;EACrC,qCAAqC;EACrC,qCAAqC;EACrC,oCAAoC;;EAEpC,uCAAuC;EACvC,uCAAuC;EACvC,uCAAuC;EACvC,uCAAuC;;EAEvC,8CAA8C;;EAE9C,sCAAsC;EACtC,sCAAsC;EACtC,sCAAsC;EACtC,sCAAsC;;EAEtC,oCAAoC;EACpC,oCAAoC;EACpC,oCAAoC;EACpC,oCAAoC;;EAEpC,wCAAwC;EACxC,wCAAwC;EACxC,wCAAwC;EACxC,wCAAwC;;EAExC,oCAAoC;EACpC,oCAAoC;EACpC,oCAAoC;EACpC,oCAAoC;;EAEpC,yBAAyB;;EAEzB,sBAAsB;;EAEtB,8BAA8B;EAC9B,oCAAoC;EACpC,iDAAiD;;EAEjD,+CAA+C;EAC/C,iDAAiD;EACjD,6DAA6D;EAC7D,2DAA2D;EAC3D,4DAA4D;;EAE5D,4BAA4B;EAC5B,gEAAgE;EAChE,oCAAoC;EACpC,2BAA2B;EAC3B,wCAAwC;EACxC,0DAA0D;EAC1D;2EACyE;EACzE,sCAAsC;EACtC;uEACqE;EACrE,uCAAuC;;EAEvC,6BAA6B;;EAE7B,2BAA2B;EAC3B,wDAAwD;EACxD,oDAAoD;;EAEpD;;;;;GAKC;EACD;;;GAGC;;EAED,sBAAsB;;EAEtB,sCAAsC;EACtC,wDAAwD;EACxD,mEAAmE;;EAEnE,2BAA2B;;EAE3B,2CAA2C;;EAE3C,4BAA4B;;EAE5B,0BAA0B;;EAE1B,4BAA4B;;EAE5B,kDAAkD;EAClD,8BAA8B;EAC9B,gDAAgD;EAChD,4DAA4D;EAC5D,2CAA2C;EAC3C,kDAAkD;;EAElD,8BAA8B;;EAE9B,2BAA2B;;EAE3B,uBAAuB;;EAEvB,uDAAuD;EACvD,qDAAqD;EACrD,oDAAoD;EACpD,yCAAyC;EACzC,uDAAuD;EACvD,sDAAsD;EACtD,2DAA2D;;EAE3D,0BAA0B;;EAE1B,wCAAwC;EACxC,gDAAgD;EAChD,kDAAkD;;EAElD,gCAAgC;;EAEhC,yCAAyC;EACzC,mCAAmC;EACnC,qCAAqC;EACrC,kCAAkC;EAClC,qDAAqD;EACrD,yCAAyC;EACzC,yCAAyC;EACzC,0CAA0C;EAC1C,uCAAuC;EACvC,0CAA0C;EAC1C,yCAAyC;EACzC,wCAAwC;EACxC,uCAAuC;EACvC,sCAAsC;EACtC,wCAAwC;EACxC,yCAAyC;EACzC,sCAAsC;EACtC,kCAAkC;EAClC,wCAAwC;EACxC,qCAAqC;EACrC,oCAAoC;EACpC,mCAAmC;EACnC,oCAAoC;EACpC,iCAAiC;;EAEjC,0BAA0B;;EAE1B,2BAA2B;;EAE3B,2BAA2B;;EAE3B,6BAA6B;;EAE7B,0BAA0B;;EAE1B,mCAAmC;EACnC,qCAAqC;EACrC,gCAAgC;EAChC,6DAA6D;EAC7D,uCAAuC;EACvC;;GAEC;EACD,oEAAoE;;EAEpE,8DAA8D;EAC9D,+CAA+C;EAC/C,8CAA8C;EAC9C,6CAA6C;EAC7C,6CAA6C;AAC/C;;AAEA;;;8EAG8E;;AAE9E,sDAAsD;AACtD;;;;;EAKE,uCAAuC;EACvC,mCAAmC;EACnC,uCAAuC;AACzC\",\"sourcesContent\":[\"/*\\n See the JupyterLab Developer Guide for useful CSS Patterns:\\n\\n https://jupyterlab.readthedocs.io/en/stable/developer/css.html\\n*/\\n.lds-ripple {\\n display: flex;\\n position: absolute; \\n left: 0; \\n top: 0; \\n right: 0; \\n bottom: 0;\\n margin: auto; \\n width: 80px;\\n height: 80px;\\n}\\n.lds-ripple div {\\n position: absolute; \\n left: 0; \\n top: 0; \\n right: 0; \\n bottom: 0;\\n margin: auto; \\n border: 4px solid rgb(245, 131, 55);\\n opacity: 1;\\n border-radius: 50%;\\n animation: lds-ripple 1s cubic-bezier(0, 0.2, 0.8, 1) infinite;\\n}\\n.lds-ripple div:nth-child(2) {\\n animation-delay: -0.5s;\\n}\\n@keyframes lds-ripple {\\n 0% {\\n left: 0; \\n top: 0; \\n right: 0; \\n bottom: 0;\\n margin: auto; \\n width: 0;\\n height: 0;\\n opacity: 0;\\n }\\n 4.9% {\\n left: 0; \\n top: 0; \\n right: 0; \\n bottom: 0;\\n margin: auto; \\n width: 0;\\n height: 0;\\n opacity: 0;\\n }\\n 5% {\\n left: 0; \\n top: 0; \\n right: 0; \\n bottom: 0;\\n margin: auto; \\n width: 0;\\n height: 0;\\n opacity: 1;\\n }\\n 100% {\\n top: 0px;\\n left: 0px;\\n width: 72px;\\n height: 72px;\\n opacity: 0;\\n }\\n}\\n\\n\\n/* CSS */\\n.button-62 {\\n background: linear-gradient(to bottom right, #EF4765, #FF9A5A);\\n border: 0;\\n border-radius: 12px;\\n color: #FFFFFF;\\n cursor: pointer;\\n display: inline-block;\\n font-family: -apple-system,system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif;\\n font-size: 16px;\\n font-weight: 500;\\n line-height: 2.5;\\n outline: transparent;\\n padding: 0 1rem;\\n text-align: center;\\n text-decoration: none;\\n transition: box-shadow .2s ease-in-out;\\n user-select: none;\\n -webkit-user-select: none;\\n touch-action: manipulation;\\n white-space: nowrap;\\n}\\n\\n.button-62:not([disabled]):focus {\\n box-shadow: 0 0 .25rem rgba(0, 0, 0, 0.5), -.125rem -.125rem 1rem rgba(239, 71, 101, 0.5), .125rem .125rem 1rem rgba(255, 154, 90, 0.5);\\n}\\n\\n.button-62:not([disabled]):hover {\\n box-shadow: 0 0 .25rem rgba(0, 0, 0, 0.5), -.125rem -.125rem 1rem rgba(239, 71, 101, 0.5), .125rem .125rem 1rem rgba(255, 154, 90, 0.5);\\n}\\n\\n.aselector select {\\n background-color: initial;\\n border: none;\\n border-radius: 0;\\n box-shadow: none;\\n color: var(--jp-ui-font-color0);\\n display: block;\\n font-size: var(--jp-ui-font-size1);\\n height: 24px;\\n line-height: 14px;\\n padding: 0 25px 0 10px;\\n text-align: left;\\n -moz-appearance: none;\\n -webkit-appearance: none;\\n}\\n\\n/* Use our own theme for hover and option styles */\\n.aselector select:hover,\\n.aselector select > option {\\n background-color: var(--jp-layout-color2);\\n color: var(--jp-ui-font-color0);\\n}\\nselect {\\n box-sizing: border-box;\\n}\\n\\n.font{\\nbackground-color: initial;\\nborder: none;\\nheight: 21px;\\nborder-radius: 0;\\nfont-weight:500;\\ncolor: var(--jp-ui-font-color0);\\ndisplay: block;\\nline-height: 22.5px;\\npadding: 0 25px 0 10px;\\nfont-size: var(--jp-ui-font-size1);\\n}\\n.wrapper {\\n display: flex;\\n}\\n.f1ozlkqi {\\n pointer-events: none;\\n}\\n\\n.palybutton{\\n background-image: '/home2/longxin/Neural_Coder_EXT/style/icons8-circled-play.gif';\\n}\\n.loading{\\n \\n background-image: url(\\\"data:image/svg+xml, \\\");\\n background-size: contain; \\n}\\n\\n.dialog{\\nbody {\\n margin: 0;\\n height: 100vh;\\n width:600px;\\n display: flex;\\n align-items: center;\\n justify-content: center;\\n overflow: hidden;\\n font-family: \\\"Poppins\\\", sans-serif;\\n background: #e3d0b6;\\n}\\n\\n#cookie-policy {\\n display: flex;\\n flex-direction: column;\\n justify-content: center;\\n align-items: center;\\n width: 460px;\\n height: 600px;\\n background: #f3efe6;\\n border-radius: 12px;\\n transform: scale(.8);\\n}\\n\\n#cookie-wrapper {\\n height: 240px;\\n width: 240px;\\n margin: 30px 0;\\n position: relative;\\n left: -40px;\\n}\\n\\nh1 {\\n color: #6c3a1f;\\n text-align: center;\\n font-size: 36px;\\n margin: 0;\\n}\\n\\np {\\n color: #a28561;\\n font-size: 14px;\\n margin-top: 0;\\n padding: 0 60px;\\n text-align: center;\\n}\\na {\\n margin-top: 18px;\\n font-size: 14px;\\n color: #a28561;\\n text-decoration: none;\\n pointer-events: none;\\n}\\na:hover {\\n color: #846b4d;\\n}\\n\\nspan {\\n font-family: \\\"Amatic SC\\\", cursive;\\n font-weight: 400;\\n font-size: 20px;\\n position: relative;\\n top: -18px;\\n left: 3px;\\n color: #a28561; \\n}\\n\\n#heart-no, #thought-heart-yes, #mouth, #face-no, #thought-1, #thought-2, #thought-heart-na, #q-mark, #eyes, #leg-l, #leg-r {\\n opacity: 0;\\n}\\n}\\n.dia_button {\\n color: white;\\n background: #dd794a;\\n margin-top: 12px;\\n cursor: pointer;\\n font-size: 24px;\\n font-family: \\\"Poppins\\\", sans-serif;\\n border-radius: 9px;\\n border: none;\\n width: 72%;\\n padding: 12px 0;\\n transition: 150ms ease-out;\\n pointer-events: none;\\n}\\n\\n.dia_button:hover {\\n background: #d66029;\\n}\\n\\n.pad{\\n padding-left:6%\\n}\\n\\n:root {\\n /* Elevation\\n *\\n * We style box-shadows using Material Design's idea of elevation. These particular numbers are taken from here:\\n *\\n * https://github.com/material-components/material-components-web\\n * https://material-components-web.appspot.com/elevation.html\\n */\\n\\n --jp-shadow-base-lightness: 0;\\n --jp-shadow-umbra-color: rgba(\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n 0.2\\n );\\n --jp-shadow-penumbra-color: rgba(\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n 0.14\\n );\\n --jp-shadow-ambient-color: rgba(\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n var(--jp-shadow-base-lightness),\\n 0.12\\n );\\n --jp-elevation-z0: none;\\n --jp-elevation-z1: 0px 2px 1px -1px var(--jp-shadow-umbra-color),\\n 0px 1px 1px 0px var(--jp-shadow-penumbra-color),\\n 0px 1px 3px 0px var(--jp-shadow-ambient-color);\\n --jp-elevation-z2: 0px 3px 1px -2px var(--jp-shadow-umbra-color),\\n 0px 2px 2px 0px var(--jp-shadow-penumbra-color),\\n 0px 1px 5px 0px var(--jp-shadow-ambient-color);\\n --jp-elevation-z4: 0px 2px 4px -1px var(--jp-shadow-umbra-color),\\n 0px 4px 5px 0px var(--jp-shadow-penumbra-color),\\n 0px 1px 10px 0px var(--jp-shadow-ambient-color);\\n --jp-elevation-z6: 0px 3px 5px -1px var(--jp-shadow-umbra-color),\\n 0px 6px 10px 0px var(--jp-shadow-penumbra-color),\\n 0px 1px 18px 0px var(--jp-shadow-ambient-color);\\n --jp-elevation-z8: 0px 5px 5px -3px var(--jp-shadow-umbra-color),\\n 0px 8px 10px 1px var(--jp-shadow-penumbra-color),\\n 0px 3px 14px 2px var(--jp-shadow-ambient-color);\\n --jp-elevation-z12: 0px 7px 8px -4px var(--jp-shadow-umbra-color),\\n 0px 12px 17px 2px var(--jp-shadow-penumbra-color),\\n 0px 5px 22px 4px var(--jp-shadow-ambient-color);\\n --jp-elevation-z16: 0px 8px 10px -5px var(--jp-shadow-umbra-color),\\n 0px 16px 24px 2px var(--jp-shadow-penumbra-color),\\n 0px 6px 30px 5px var(--jp-shadow-ambient-color);\\n --jp-elevation-z20: 0px 10px 13px -6px var(--jp-shadow-umbra-color),\\n 0px 20px 31px 3px var(--jp-shadow-penumbra-color),\\n 0px 8px 38px 7px var(--jp-shadow-ambient-color);\\n --jp-elevation-z24: 0px 11px 15px -7px var(--jp-shadow-umbra-color),\\n 0px 24px 38px 3px var(--jp-shadow-penumbra-color),\\n 0px 9px 46px 8px var(--jp-shadow-ambient-color);\\n\\n /* Borders\\n *\\n * The following variables, specify the visual styling of borders in JupyterLab.\\n */\\n\\n --jp-border-width: 1px;\\n --jp-border-color0: var(--md-grey-400);\\n --jp-border-color1: var(--md-grey-400);\\n --jp-border-color2: var(--md-grey-300);\\n --jp-border-color3: var(--md-grey-200);\\n --jp-inverse-border-color: var(--md-grey-600);\\n --jp-border-radius: 2px;\\n\\n /* UI Fonts\\n *\\n * The UI font CSS variables are used for the typography all of the JupyterLab\\n * user interface elements that are not directly user generated content.\\n *\\n * The font sizing here is done assuming that the body font size of --jp-ui-font-size1\\n * is applied to a parent element. When children elements, such as headings, are sized\\n * in em all things will be computed relative to that body size.\\n */\\n\\n --jp-ui-font-scale-factor: 1.2;\\n --jp-ui-font-size0: 0.83333em;\\n --jp-ui-font-size1: 13px; /* Base font size */\\n --jp-ui-font-size2: 1.2em;\\n --jp-ui-font-size3: 1.44em;\\n\\n --jp-ui-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica,\\n Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';\\n\\n /*\\n * Use these font colors against the corresponding main layout colors.\\n * In a light theme, these go from dark to light.\\n */\\n\\n /* Defaults use Material Design specification */\\n --jp-ui-font-color0: rgba(0, 0, 0, 1);\\n --jp-ui-font-color1: rgba(0, 0, 0, 0.87);\\n --jp-ui-font-color2: rgba(0, 0, 0, 0.54);\\n --jp-ui-font-color3: rgba(0, 0, 0, 0.38);\\n\\n /*\\n * Use these against the brand/accent/warn/error colors.\\n * These will typically go from light to darker, in both a dark and light theme.\\n */\\n\\n --jp-ui-inverse-font-color0: rgba(255, 255, 255, 1);\\n --jp-ui-inverse-font-color1: rgba(255, 255, 255, 1);\\n --jp-ui-inverse-font-color2: rgba(255, 255, 255, 0.7);\\n --jp-ui-inverse-font-color3: rgba(255, 255, 255, 0.5);\\n\\n /* Content Fonts\\n *\\n * Content font variables are used for typography of user generated content.\\n *\\n * The font sizing here is done assuming that the body font size of --jp-content-font-size1\\n * is applied to a parent element. When children elements, such as headings, are sized\\n * in em all things will be computed relative to that body size.\\n */\\n\\n --jp-content-line-height: 1.6;\\n --jp-content-font-scale-factor: 1.2;\\n --jp-content-font-size0: 0.83333em;\\n --jp-content-font-size1: 14px; /* Base font size */\\n --jp-content-font-size2: 1.2em;\\n --jp-content-font-size3: 1.44em;\\n --jp-content-font-size4: 1.728em;\\n --jp-content-font-size5: 2.0736em;\\n\\n /* This gives a magnification of about 125% in presentation mode over normal. */\\n --jp-content-presentation-font-size1: 17px;\\n\\n --jp-content-heading-line-height: 1;\\n --jp-content-heading-margin-top: 1.2em;\\n --jp-content-heading-margin-bottom: 0.8em;\\n --jp-content-heading-font-weight: 500;\\n\\n /* Defaults use Material Design specification */\\n --jp-content-font-color0: rgba(0, 0, 0, 1);\\n --jp-content-font-color1: rgba(0, 0, 0, 0.87);\\n --jp-content-font-color2: rgba(0, 0, 0, 0.54);\\n --jp-content-font-color3: rgba(0, 0, 0, 0.38);\\n\\n --jp-content-link-color: var(--md-blue-700);\\n\\n --jp-content-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI',\\n Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji',\\n 'Segoe UI Symbol';\\n\\n /*\\n * Code Fonts\\n *\\n * Code font variables are used for typography of code and other monospaces content.\\n */\\n\\n --jp-code-font-size: 13px;\\n --jp-code-line-height: 1.3077; /* 17px for 13px base */\\n --jp-code-padding: 5px; /* 5px for 13px base, codemirror highlighting needs integer px value */\\n --jp-code-font-family-default: Menlo, Consolas, 'DejaVu Sans Mono', monospace;\\n --jp-code-font-family: var(--jp-code-font-family-default);\\n\\n /* This gives a magnification of about 125% in presentation mode over normal. */\\n --jp-code-presentation-font-size: 16px;\\n\\n /* may need to tweak cursor width if you change font size */\\n --jp-code-cursor-width0: 1.4px;\\n --jp-code-cursor-width1: 2px;\\n --jp-code-cursor-width2: 4px;\\n\\n /* Layout\\n *\\n * The following are the main layout colors use in JupyterLab. In a light\\n * theme these would go from light to dark.\\n */\\n\\n --jp-layout-color0: white;\\n --jp-layout-color1: white;\\n --jp-layout-color2: var(--md-grey-200);\\n --jp-layout-color3: var(--md-grey-400);\\n --jp-layout-color4: var(--md-grey-600);\\n\\n /* Inverse Layout\\n *\\n * The following are the inverse layout colors use in JupyterLab. In a light\\n * theme these would go from dark to light.\\n */\\n\\n --jp-inverse-layout-color0: #111111;\\n --jp-inverse-layout-color1: var(--md-grey-900);\\n --jp-inverse-layout-color2: var(--md-grey-800);\\n --jp-inverse-layout-color3: var(--md-grey-700);\\n --jp-inverse-layout-color4: var(--md-grey-600);\\n\\n /* Brand/accent */\\n\\n --jp-brand-color0: var(--md-blue-900);\\n --jp-brand-color1: var(--md-blue-700);\\n --jp-brand-color2: var(--md-blue-300);\\n --jp-brand-color3: var(--md-blue-100);\\n --jp-brand-color4: var(--md-blue-50);\\n\\n --jp-accent-color0: var(--md-green-900);\\n --jp-accent-color1: var(--md-green-700);\\n --jp-accent-color2: var(--md-green-300);\\n --jp-accent-color3: var(--md-green-100);\\n\\n /* State colors (warn, error, success, info) */\\n\\n --jp-warn-color0: var(--md-orange-900);\\n --jp-warn-color1: var(--md-orange-700);\\n --jp-warn-color2: var(--md-orange-300);\\n --jp-warn-color3: var(--md-orange-100);\\n\\n --jp-error-color0: var(--md-red-900);\\n --jp-error-color1: var(--md-red-700);\\n --jp-error-color2: var(--md-red-300);\\n --jp-error-color3: var(--md-red-100);\\n\\n --jp-success-color0: var(--md-green-900);\\n --jp-success-color1: var(--md-green-700);\\n --jp-success-color2: var(--md-green-300);\\n --jp-success-color3: var(--md-green-100);\\n\\n --jp-info-color0: var(--md-cyan-900);\\n --jp-info-color1: var(--md-cyan-700);\\n --jp-info-color2: var(--md-cyan-300);\\n --jp-info-color3: var(--md-cyan-100);\\n\\n /* Cell specific styles */\\n\\n --jp-cell-padding: 5px;\\n\\n --jp-cell-collapser-width: 8px;\\n --jp-cell-collapser-min-height: 20px;\\n --jp-cell-collapser-not-active-hover-opacity: 0.6;\\n\\n --jp-cell-editor-background: var(--md-grey-100);\\n --jp-cell-editor-border-color: var(--md-grey-300);\\n --jp-cell-editor-box-shadow: inset 0 0 2px var(--md-blue-300);\\n --jp-cell-editor-active-background: var(--jp-layout-color0);\\n --jp-cell-editor-active-border-color: var(--jp-brand-color1);\\n\\n --jp-cell-prompt-width: 64px;\\n --jp-cell-prompt-font-family: var(--jp-code-font-family-default);\\n --jp-cell-prompt-letter-spacing: 0px;\\n --jp-cell-prompt-opacity: 1;\\n --jp-cell-prompt-not-active-opacity: 0.5;\\n --jp-cell-prompt-not-active-font-color: var(--md-grey-700);\\n /* A custom blend of MD grey and blue 600\\n * See https://meyerweb.com/eric/tools/color-blend/#546E7A:1E88E5:5:hex */\\n --jp-cell-inprompt-font-color: #307fc1;\\n /* A custom blend of MD grey and orange 600\\n * https://meyerweb.com/eric/tools/color-blend/#546E7A:F4511E:5:hex */\\n --jp-cell-outprompt-font-color: #bf5b3d;\\n\\n /* Notebook specific styles */\\n\\n --jp-notebook-padding: 10px;\\n --jp-notebook-select-background: var(--jp-layout-color1);\\n --jp-notebook-multiselected-color: var(--md-blue-50);\\n\\n /* The scroll padding is calculated to fill enough space at the bottom of the\\n notebook to show one single-line cell (with appropriate padding) at the top\\n when the notebook is scrolled all the way to the bottom. We also subtract one\\n pixel so that no scrollbar appears if we have just one single-line cell in the\\n notebook. This padding is to enable a 'scroll past end' feature in a notebook.\\n */\\n --jp-notebook-scroll-padding: calc(\\n 100% - var(--jp-code-font-size) * var(--jp-code-line-height) -\\n var(--jp-code-padding) - var(--jp-cell-padding) - 1px\\n );\\n\\n /* Rendermime styles */\\n\\n --jp-rendermime-error-background: #fdd;\\n --jp-rendermime-table-row-background: var(--md-grey-100);\\n --jp-rendermime-table-row-hover-background: var(--md-light-blue-50);\\n\\n /* Dialog specific styles */\\n\\n --jp-dialog-background: rgba(0, 0, 0, 0.25);\\n\\n /* Console specific styles */\\n\\n --jp-console-padding: 10px;\\n\\n /* Toolbar specific styles */\\n\\n --jp-toolbar-border-color: var(--jp-border-color1);\\n --jp-toolbar-micro-height: 8px;\\n --jp-toolbar-background: var(--jp-layout-color1);\\n --jp-toolbar-box-shadow: 0px 0px 2px 0px rgba(0, 0, 0, 0.24);\\n --jp-toolbar-header-margin: 4px 4px 0px 4px;\\n --jp-toolbar-active-background: var(--md-grey-300);\\n\\n /* Statusbar specific styles */\\n\\n --jp-statusbar-height: 24px;\\n\\n /* Input field styles */\\n\\n --jp-input-box-shadow: inset 0 0 2px var(--md-blue-300);\\n --jp-input-active-background: var(--jp-layout-color1);\\n --jp-input-hover-background: var(--jp-layout-color1);\\n --jp-input-background: var(--md-grey-100);\\n --jp-input-border-color: var(--jp-inverse-border-color);\\n --jp-input-active-border-color: var(--jp-brand-color1);\\n --jp-input-active-box-shadow-color: rgba(19, 124, 189, 0.3);\\n\\n /* General editor styles */\\n\\n --jp-editor-selected-background: #d9d9d9;\\n --jp-editor-selected-focused-background: #d7d4f0;\\n --jp-editor-cursor-color: var(--jp-ui-font-color0);\\n\\n /* Code mirror specific styles */\\n\\n --jp-mirror-editor-keyword-color: #008000;\\n --jp-mirror-editor-atom-color: #88f;\\n --jp-mirror-editor-number-color: #080;\\n --jp-mirror-editor-def-color: #00f;\\n --jp-mirror-editor-variable-color: var(--md-grey-900);\\n --jp-mirror-editor-variable-2-color: #05a;\\n --jp-mirror-editor-variable-3-color: #085;\\n --jp-mirror-editor-punctuation-color: #05a;\\n --jp-mirror-editor-property-color: #05a;\\n --jp-mirror-editor-operator-color: #aa22ff;\\n --jp-mirror-editor-comment-color: #408080;\\n --jp-mirror-editor-string-color: #ba2121;\\n --jp-mirror-editor-string-2-color: #708;\\n --jp-mirror-editor-meta-color: #aa22ff;\\n --jp-mirror-editor-qualifier-color: #555;\\n --jp-mirror-editor-builtin-color: #008000;\\n --jp-mirror-editor-bracket-color: #997;\\n --jp-mirror-editor-tag-color: #170;\\n --jp-mirror-editor-attribute-color: #00c;\\n --jp-mirror-editor-header-color: blue;\\n --jp-mirror-editor-quote-color: #090;\\n --jp-mirror-editor-link-color: #00c;\\n --jp-mirror-editor-error-color: #f00;\\n --jp-mirror-editor-hr-color: #999;\\n\\n /* Vega extension styles */\\n\\n --jp-vega-background: white;\\n\\n /* Sidebar-related styles */\\n\\n --jp-sidebar-min-width: 250px;\\n\\n /* Search-related styles */\\n\\n --jp-search-toggle-off-opacity: 0.5;\\n --jp-search-toggle-hover-opacity: 0.8;\\n --jp-search-toggle-on-opacity: 1;\\n --jp-search-selected-match-background-color: rgb(245, 200, 0);\\n --jp-search-selected-match-color: black;\\n --jp-search-unselected-match-background-color: var(\\n --jp-inverse-layout-color0\\n );\\n --jp-search-unselected-match-color: var(--jp-ui-inverse-font-color0);\\n\\n /* Icon colors that work well with light or dark backgrounds */\\n --jp-icon-contrast-color0: var(--md-purple-600);\\n --jp-icon-contrast-color1: var(--md-green-600);\\n --jp-icon-contrast-color2: var(--md-pink-600);\\n --jp-icon-contrast-color3: var(--md-blue-600);\\n}\\n\\n/*-----------------------------------------------------------------------------\\n| Copyright (c) Jupyter Development Team.\\n| Distributed under the terms of the Modified BSD License.\\n|----------------------------------------------------------------------------*/\\n\\n/* Set the default typography for monospace elements */\\ntt,\\ncode,\\nkbd,\\nsamp,\\npre {\\n font-family: var(--jp-code-font-family);\\n font-size: var(--jp-code-font-size);\\n line-height: var(--jp-code-line-height);\\n}\\n\\n\"],\"sourceRoot\":\"\"}]);\n// Exports\nexport default ___CSS_LOADER_EXPORT___;\n","import api from \"!../node_modules/style-loader/dist/runtime/injectStylesIntoStyleTag.js\";\n import content from \"!!../node_modules/css-loader/dist/cjs.js!./base.css\";\n\nvar options = {};\n\noptions.insert = \"head\";\noptions.singleton = false;\n\nvar update = api(content, options);\n\n\n\nexport default content.locals || {};","import './base.css';\n"],"names":[],"sourceRoot":""}
\ No newline at end of file
diff --git a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854.e09faf9ec3a764e40dc7.js b/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854.e09faf9ec3a764e40dc7.js
deleted file mode 100644
index 34c1fdb7608..00000000000
--- a/neural_coder/extensions/neural_compressor_ext_lab/neural_compressor_ext_lab/labextension/static/vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854.e09faf9ec3a764e40dc7.js
+++ /dev/null
@@ -1,463 +0,0 @@
-"use strict";
-(self["webpackChunkneural_compressor_ext_lab"] = self["webpackChunkneural_compressor_ext_lab"] || []).push([["vendors-node_modules_css-loader_dist_runtime_api_js-node_modules_css-loader_dist_runtime_getU-849854"],{
-
-/***/ "./node_modules/css-loader/dist/runtime/api.js":
-/*!*****************************************************!*\
- !*** ./node_modules/css-loader/dist/runtime/api.js ***!
- \*****************************************************/
-/***/ ((module) => {
-
-
-
-/*
- MIT License http://www.opensource.org/licenses/mit-license.php
- Author Tobias Koppers @sokra
-*/
-module.exports = function (cssWithMappingToString) {
- var list = []; // return the list of modules as css string
-
- list.toString = function toString() {
- return this.map(function (item) {
- var content = "";
- var needLayer = typeof item[5] !== "undefined";
-
- if (item[4]) {
- content += "@supports (".concat(item[4], ") {");
- }
-
- if (item[2]) {
- content += "@media ".concat(item[2], " {");
- }
-
- if (needLayer) {
- content += "@layer".concat(item[5].length > 0 ? " ".concat(item[5]) : "", " {");
- }
-
- content += cssWithMappingToString(item);
-
- if (needLayer) {
- content += "}";
- }
-
- if (item[2]) {
- content += "}";
- }
-
- if (item[4]) {
- content += "}";
- }
-
- return content;
- }).join("");
- }; // import a list of modules into the list
-
-
- list.i = function i(modules, media, dedupe, supports, layer) {
- if (typeof modules === "string") {
- modules = [[null, modules, undefined]];
- }
-
- var alreadyImportedModules = {};
-
- if (dedupe) {
- for (var k = 0; k < this.length; k++) {
- var id = this[k][0];
-
- if (id != null) {
- alreadyImportedModules[id] = true;
- }
- }
- }
-
- for (var _k = 0; _k < modules.length; _k++) {
- var item = [].concat(modules[_k]);
-
- if (dedupe && alreadyImportedModules[item[0]]) {
- continue;
- }
-
- if (typeof layer !== "undefined") {
- if (typeof item[5] === "undefined") {
- item[5] = layer;
- } else {
- item[1] = "@layer".concat(item[5].length > 0 ? " ".concat(item[5]) : "", " {").concat(item[1], "}");
- item[5] = layer;
- }
- }
-
- if (media) {
- if (!item[2]) {
- item[2] = media;
- } else {
- item[1] = "@media ".concat(item[2], " {").concat(item[1], "}");
- item[2] = media;
- }
- }
-
- if (supports) {
- if (!item[4]) {
- item[4] = "".concat(supports);
- } else {
- item[1] = "@supports (".concat(item[4], ") {").concat(item[1], "}");
- item[4] = supports;
- }
- }
-
- list.push(item);
- }
- };
-
- return list;
-};
-
-/***/ }),
-
-/***/ "./node_modules/css-loader/dist/runtime/getUrl.js":
-/*!********************************************************!*\
- !*** ./node_modules/css-loader/dist/runtime/getUrl.js ***!
- \********************************************************/
-/***/ ((module) => {
-
-
-
-module.exports = function (url, options) {
- if (!options) {
- options = {};
- }
-
- if (!url) {
- return url;
- }
-
- url = String(url.__esModule ? url.default : url); // If url is already wrapped in quotes, remove them
-
- if (/^['"].*['"]$/.test(url)) {
- url = url.slice(1, -1);
- }
-
- if (options.hash) {
- url += options.hash;
- } // Should url be wrapped?
- // See https://drafts.csswg.org/css-values-3/#urls
-
-
- if (/["'() \t\n]|(%20)/.test(url) || options.needQuotes) {
- return "\"".concat(url.replace(/"/g, '\\"').replace(/\n/g, "\\n"), "\"");
- }
-
- return url;
-};
-
-/***/ }),
-
-/***/ "./node_modules/css-loader/dist/runtime/sourceMaps.js":
-/*!************************************************************!*\
- !*** ./node_modules/css-loader/dist/runtime/sourceMaps.js ***!
- \************************************************************/
-/***/ ((module) => {
-
-
-
-module.exports = function (item) {
- var content = item[1];
- var cssMapping = item[3];
-
- if (!cssMapping) {
- return content;
- }
-
- if (typeof btoa === "function") {
- var base64 = btoa(unescape(encodeURIComponent(JSON.stringify(cssMapping))));
- var data = "sourceMappingURL=data:application/json;charset=utf-8;base64,".concat(base64);
- var sourceMapping = "/*# ".concat(data, " */");
- var sourceURLs = cssMapping.sources.map(function (source) {
- return "/*# sourceURL=".concat(cssMapping.sourceRoot || "").concat(source, " */");
- });
- return [content].concat(sourceURLs).concat([sourceMapping]).join("\n");
- }
-
- return [content].join("\n");
-};
-
-/***/ }),
-
-/***/ "./node_modules/style-loader/dist/runtime/injectStylesIntoStyleTag.js":
-/*!****************************************************************************!*\
- !*** ./node_modules/style-loader/dist/runtime/injectStylesIntoStyleTag.js ***!
- \****************************************************************************/
-/***/ ((module, __unused_webpack_exports, __webpack_require__) => {
-
-
-
-var isOldIE = function isOldIE() {
- var memo;
- return function memorize() {
- if (typeof memo === 'undefined') {
- // Test for IE <= 9 as proposed by Browserhacks
- // @see http://browserhacks.com/#hack-e71d8692f65334173fee715c222cb805
- // Tests for existence of standard globals is to allow style-loader
- // to operate correctly into non-standard environments
- // @see https://github.com/webpack-contrib/style-loader/issues/177
- memo = Boolean(window && document && document.all && !window.atob);
- }
-
- return memo;
- };
-}();
-
-var getTarget = function getTarget() {
- var memo = {};
- return function memorize(target) {
- if (typeof memo[target] === 'undefined') {
- var styleTarget = document.querySelector(target); // Special case to return head of iframe instead of iframe itself
-
- if (window.HTMLIFrameElement && styleTarget instanceof window.HTMLIFrameElement) {
- try {
- // This will throw an exception if access to iframe is blocked
- // due to cross-origin restrictions
- styleTarget = styleTarget.contentDocument.head;
- } catch (e) {
- // istanbul ignore next
- styleTarget = null;
- }
- }
-
- memo[target] = styleTarget;
- }
-
- return memo[target];
- };
-}();
-
-var stylesInDom = [];
-
-function getIndexByIdentifier(identifier) {
- var result = -1;
-
- for (var i = 0; i < stylesInDom.length; i++) {
- if (stylesInDom[i].identifier === identifier) {
- result = i;
- break;
- }
- }
-
- return result;
-}
-
-function modulesToDom(list, options) {
- var idCountMap = {};
- var identifiers = [];
-
- for (var i = 0; i < list.length; i++) {
- var item = list[i];
- var id = options.base ? item[0] + options.base : item[0];
- var count = idCountMap[id] || 0;
- var identifier = "".concat(id, " ").concat(count);
- idCountMap[id] = count + 1;
- var index = getIndexByIdentifier(identifier);
- var obj = {
- css: item[1],
- media: item[2],
- sourceMap: item[3]
- };
-
- if (index !== -1) {
- stylesInDom[index].references++;
- stylesInDom[index].updater(obj);
- } else {
- stylesInDom.push({
- identifier: identifier,
- updater: addStyle(obj, options),
- references: 1
- });
- }
-
- identifiers.push(identifier);
- }
-
- return identifiers;
-}
-
-function insertStyleElement(options) {
- var style = document.createElement('style');
- var attributes = options.attributes || {};
-
- if (typeof attributes.nonce === 'undefined') {
- var nonce = true ? __webpack_require__.nc : 0;
-
- if (nonce) {
- attributes.nonce = nonce;
- }
- }
-
- Object.keys(attributes).forEach(function (key) {
- style.setAttribute(key, attributes[key]);
- });
-
- if (typeof options.insert === 'function') {
- options.insert(style);
- } else {
- var target = getTarget(options.insert || 'head');
-
- if (!target) {
- throw new Error("Couldn't find a style target. This probably means that the value for the 'insert' parameter is invalid.");
- }
-
- target.appendChild(style);
- }
-
- return style;
-}
-
-function removeStyleElement(style) {
- // istanbul ignore if
- if (style.parentNode === null) {
- return false;
- }
-
- style.parentNode.removeChild(style);
-}
-/* istanbul ignore next */
-
-
-var replaceText = function replaceText() {
- var textStore = [];
- return function replace(index, replacement) {
- textStore[index] = replacement;
- return textStore.filter(Boolean).join('\n');
- };
-}();
-
-function applyToSingletonTag(style, index, remove, obj) {
- var css = remove ? '' : obj.media ? "@media ".concat(obj.media, " {").concat(obj.css, "}") : obj.css; // For old IE
-
- /* istanbul ignore if */
-
- if (style.styleSheet) {
- style.styleSheet.cssText = replaceText(index, css);
- } else {
- var cssNode = document.createTextNode(css);
- var childNodes = style.childNodes;
-
- if (childNodes[index]) {
- style.removeChild(childNodes[index]);
- }
-
- if (childNodes.length) {
- style.insertBefore(cssNode, childNodes[index]);
- } else {
- style.appendChild(cssNode);
- }
- }
-}
-
-function applyToTag(style, options, obj) {
- var css = obj.css;
- var media = obj.media;
- var sourceMap = obj.sourceMap;
-
- if (media) {
- style.setAttribute('media', media);
- } else {
- style.removeAttribute('media');
- }
-
- if (sourceMap && typeof btoa !== 'undefined') {
- css += "\n/*# sourceMappingURL=data:application/json;base64,".concat(btoa(unescape(encodeURIComponent(JSON.stringify(sourceMap)))), " */");
- } // For old IE
-
- /* istanbul ignore if */
-
-
- if (style.styleSheet) {
- style.styleSheet.cssText = css;
- } else {
- while (style.firstChild) {
- style.removeChild(style.firstChild);
- }
-
- style.appendChild(document.createTextNode(css));
- }
-}
-
-var singleton = null;
-var singletonCounter = 0;
-
-function addStyle(obj, options) {
- var style;
- var update;
- var remove;
-
- if (options.singleton) {
- var styleIndex = singletonCounter++;
- style = singleton || (singleton = insertStyleElement(options));
- update = applyToSingletonTag.bind(null, style, styleIndex, false);
- remove = applyToSingletonTag.bind(null, style, styleIndex, true);
- } else {
- style = insertStyleElement(options);
- update = applyToTag.bind(null, style, options);
-
- remove = function remove() {
- removeStyleElement(style);
- };
- }
-
- update(obj);
- return function updateStyle(newObj) {
- if (newObj) {
- if (newObj.css === obj.css && newObj.media === obj.media && newObj.sourceMap === obj.sourceMap) {
- return;
- }
-
- update(obj = newObj);
- } else {
- remove();
- }
- };
-}
-
-module.exports = function (list, options) {
- options = options || {}; // Force single-tag solution on IE6-9, which has a hard limit on the # of