diff --git a/CMakeLists.txt b/CMakeLists.txt index 3db2fc3fed..15dd57cd5e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -541,7 +541,7 @@ if(CONFIG_AV1_ENCODER) ${LIBBROTLICOMMON_LIBRARIES}) target_include_directories(aom PRIVATE ${LIBJXL_INCLUDE_DIRS}) else() - pkg_check_modules(LIBJXL REQUIRED libjxl) + pkg_check_modules(LIBJXL REQUIRED libjxl libjxl_threads) target_link_libraries(aom PRIVATE ${LIBJXL_LDFLAGS} ${LIBJXL_LIBRARIES}) target_include_directories(aom PRIVATE ${LIBJXL_INCLUDE_DIRS}) if(LIBJXL_CFLAGS) diff --git a/aom/aomcx.h b/aom/aomcx.h index 9c7402a6ec..9ffeccc153 100644 --- a/aom/aomcx.h +++ b/aom/aomcx.h @@ -1547,6 +1547,7 @@ typedef enum { AOM_TUNE_VMAF_MAX_GAIN = 6, AOM_TUNE_VMAF_NEG_MAX_GAIN = 7, AOM_TUNE_BUTTERAUGLI = 8, + AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY = 9, } aom_tune_metric; /*!\brief Distortion metric to use for RD optimization. diff --git a/aom_dsp/butteraugli.c b/aom_dsp/butteraugli.c index 038efcd313..55b69831b5 100644 --- a/aom_dsp/butteraugli.c +++ b/aom_dsp/butteraugli.c @@ -11,9 +11,11 @@ #include #include +#include #include "aom_dsp/butteraugli.h" #include "aom_mem/aom_mem.h" +#include "aom_ports/mem.h" #include "third_party/libyuv/include/libyuv/convert_argb.h" int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source, @@ -21,7 +23,7 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source, aom_matrix_coefficients_t matrix_coefficients, aom_color_range_t color_range, float *dist_map) { (void)bit_depth; - assert(bit_depth == 8); + assert(bit_depth <= 10); const int width = source->y_crop_width; const int height = source->y_crop_height; const int ss_x = source->subsampling_x; @@ -37,7 +39,7 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source, } const size_t stride_argb = width * 4; - const size_t buffer_size = height * stride_argb; + const size_t buffer_size = height * stride_argb * (bit_depth > 8 ? 2 : 1); uint8_t *src_argb = (uint8_t *)aom_malloc(buffer_size); uint8_t *distorted_argb = (uint8_t *)aom_malloc(buffer_size); if (!src_argb || !distorted_argb) { @@ -46,30 +48,57 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source, return 0; } + if (ss_x == 1 && ss_y == 1) { - I420ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer, - source->uv_stride, source->v_buffer, source->uv_stride, - src_argb, stride_argb, yuv_constants, width, height); - I420ToARGBMatrix(distorted->y_buffer, distorted->y_stride, - distorted->u_buffer, distorted->uv_stride, - distorted->v_buffer, distorted->uv_stride, distorted_argb, - stride_argb, yuv_constants, width, height); + if (bit_depth == 8) { + I420ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer, + source->uv_stride, source->v_buffer, source->uv_stride, + src_argb, stride_argb, yuv_constants, width, height); + I420ToARGBMatrix(distorted->y_buffer, distorted->y_stride, + distorted->u_buffer, distorted->uv_stride, + distorted->v_buffer, distorted->uv_stride, distorted_argb, + stride_argb, yuv_constants, width, height); + } else { + I010ToARGBMatrix(CONVERT_TO_SHORTPTR(source->y_buffer), source->y_stride, + CONVERT_TO_SHORTPTR(source->u_buffer), source->uv_stride, + CONVERT_TO_SHORTPTR(source->v_buffer), source->uv_stride, + src_argb, stride_argb, yuv_constants, width, height); + I010ToARGBMatrix(CONVERT_TO_SHORTPTR(distorted->y_buffer), distorted->y_stride, + CONVERT_TO_SHORTPTR(distorted->u_buffer), distorted->uv_stride, + CONVERT_TO_SHORTPTR(distorted->v_buffer), distorted->uv_stride, + distorted_argb, stride_argb, yuv_constants, width, height); + } } else if (ss_x == 1 && ss_y == 0) { - I422ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer, - source->uv_stride, source->v_buffer, source->uv_stride, - src_argb, stride_argb, yuv_constants, width, height); - I422ToARGBMatrix(distorted->y_buffer, distorted->y_stride, - distorted->u_buffer, distorted->uv_stride, - distorted->v_buffer, distorted->uv_stride, distorted_argb, - stride_argb, yuv_constants, width, height); + if (bit_depth == 8) { + I422ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer, + source->uv_stride, source->v_buffer, source->uv_stride, + src_argb, stride_argb, yuv_constants, width, height); + I422ToARGBMatrix(distorted->y_buffer, distorted->y_stride, + distorted->u_buffer, distorted->uv_stride, + distorted->v_buffer, distorted->uv_stride, distorted_argb, + stride_argb, yuv_constants, width, height); + } else { + I210ToARGBMatrix(CONVERT_TO_SHORTPTR(source->y_buffer), source->y_stride, + CONVERT_TO_SHORTPTR(source->u_buffer), source->uv_stride, + CONVERT_TO_SHORTPTR(source->v_buffer), source->uv_stride, + src_argb, stride_argb, yuv_constants, width, height); + I210ToARGBMatrix(CONVERT_TO_SHORTPTR(distorted->y_buffer), distorted->y_stride, + CONVERT_TO_SHORTPTR(distorted->u_buffer), distorted->uv_stride, + CONVERT_TO_SHORTPTR(distorted->v_buffer), distorted->uv_stride, + distorted_argb, stride_argb, yuv_constants, width, height); + } } else if (ss_x == 0 && ss_y == 0) { - I444ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer, - source->uv_stride, source->v_buffer, source->uv_stride, - src_argb, stride_argb, yuv_constants, width, height); - I444ToARGBMatrix(distorted->y_buffer, distorted->y_stride, - distorted->u_buffer, distorted->uv_stride, - distorted->v_buffer, distorted->uv_stride, distorted_argb, - stride_argb, yuv_constants, width, height); + if (bit_depth == 8) { + I444ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer, + source->uv_stride, source->v_buffer, source->uv_stride, + src_argb, stride_argb, yuv_constants, width, height); + I444ToARGBMatrix(distorted->y_buffer, distorted->y_stride, + distorted->u_buffer, distorted->uv_stride, + distorted->v_buffer, distorted->uv_stride, distorted_argb, + stride_argb, yuv_constants, width, height); + } else { + return 0; + } } else { aom_free(src_argb); aom_free(distorted_argb); @@ -77,8 +106,13 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source, } JxlPixelFormat pixel_format = { 4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0 }; + if (bit_depth == 10) { + pixel_format.data_type = JXL_TYPE_UINT16; + } JxlButteraugliApi *api = JxlButteraugliApiCreate(NULL); - JxlButteraugliApiSetHFAsymmetry(api, 0.8f); + JxlParallelRunner runner = JxlThreadParallelRunnerCreate(NULL, 6); + JxlButteraugliApiSetParallelRunner(api, JxlThreadParallelRunner, runner); + JxlButteraugliApiSetHFAsymmetry(api, 0.5f); JxlButteraugliResult *result = JxlButteraugliCompute( api, width, height, &pixel_format, src_argb, buffer_size, &pixel_format, diff --git a/av1/arg_defs.c b/av1/arg_defs.c index 882b03aa07..219f4c1dcc 100644 --- a/av1/arg_defs.c +++ b/av1/arg_defs.c @@ -47,6 +47,7 @@ static const struct arg_enum_list tuning_enum[] = { { "vmaf", AOM_TUNE_VMAF_MAX_GAIN }, { "vmaf_neg", AOM_TUNE_VMAF_NEG_MAX_GAIN }, { "butteraugli", AOM_TUNE_BUTTERAUGLI }, + { "image_perceptual_quality", AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY }, { NULL, 0 } }; @@ -535,8 +536,9 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = { ARG_DEF(NULL, "deltaq-mode", 1, "Delta qindex mode (0: off, 1: deltaq objective (default), " "2: deltaq placeholder, 3: key frame visual quality, 4: user " - "rating based visual quality optimization). " - "Currently this requires enable-tpl-model as a prerequisite."), + "rating based visual quality optimization, \n" + " 5: HDR deltaq optimization). " + "Currently, deltaq-mode=1 and 2 require enable-tpl-model as a prerequisite."), .deltaq_strength = ARG_DEF(NULL, "deltaq-strength", 1, "Deltaq strength for" " --deltaq-mode=4 (%)"), diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c index 9a2b9858fe..b93e56492e 100644 --- a/av1/av1_cx_iface.c +++ b/av1/av1_cx_iface.c @@ -798,7 +798,8 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx, } #endif - RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_BUTTERAUGLI); + RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, + AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY); RANGE_CHECK(extra_cfg, dist_metric, AOM_DIST_METRIC_PSNR, AOM_DIST_METRIC_QM_PSNR); @@ -878,9 +879,6 @@ static aom_codec_err_t validate_img(aom_codec_alg_priv_t *ctx, #if CONFIG_TUNE_BUTTERAUGLI if (ctx->extra_cfg.tuning == AOM_TUNE_BUTTERAUGLI) { - if (img->bit_depth > 8) { - ERROR("Only 8 bit depth images supported in tune=butteraugli mode."); - } if (img->mc != 0 && img->mc != AOM_CICP_MC_BT_709 && img->mc != AOM_CICP_MC_BT_601 && img->mc != AOM_CICP_MC_BT_470_B_G) { ERROR( diff --git a/av1/encoder/allintra_vis.c b/av1/encoder/allintra_vis.c index a8fb2f5ffc..42e4d03744 100644 --- a/av1/encoder/allintra_vis.c +++ b/av1/encoder/allintra_vis.c @@ -366,6 +366,140 @@ static void automatic_intra_tools_off(AV1_COMP *cpi, } } +// Compute the "mean subtracted contrast normalized coefficients (MSCN)", +// defined in the following paper: +// "No-Reference Image Quality Assessment in the Spatial Domain", +// DOI: 10.1109/TIP.2012.2214050 +// +// The MSCN coefficients reflect normalized signal information regardless +// of pixel intensity. We could think as a contrast enhanced image map. +// The absolute sum of MSCN coefficients of a block could represent +// the amount of information, or complexity of a block. +// Here, we seek the ratio of the most complex and the most plain block, +// as a complexity indicator of the image. +static void build_mscn_map(AV1_COMP *cpi, double *mscn_map) { + const uint8_t *buffer = cpi->source->y_buffer; + const int buf_stride = cpi->source->y_stride; + const int frame_width = cpi->frame_info.frame_width; + const int frame_height = cpi->frame_info.frame_height; + const int half_win = 3; + // h = round(fspecial('gaussian', 7, 3.0) * 1000) + const int gauss_kernel[] = { 11, 15, 18, 19, 18, 15, 11, 15, 20, 23, + 25, 23, 20, 15, 18, 23, 27, 29, 27, 23, + 18, 19, 25, 29, 31, 29, 25, 19, 18, 23, + 27, 29, 27, 23, 18, 15, 20, 23, 25, 23, + 20, 15, 11, 15, 18, 19, 18, 15, 11 }; + // Generate mscn map with Gaussian kernel weights. + double *mean_map = aom_calloc(frame_width * frame_height, sizeof(*mean_map)); + for (int row = 0; row < frame_height; ++row) { + for (int col = 0; col < frame_width; ++col) { + double weighted_sum = 0; + int count = 0; + for (int dy = -half_win; dy <= half_win; ++dy) { + for (int dx = -half_win; dx <= half_win; ++dx) { + if (row + dy < 0 || row + dy >= frame_height || col + dx < 0 || + col + dx >= frame_width) { + continue; + } + const int pix = buffer[(row + dy) * buf_stride + col + dx]; + weighted_sum += + pix * gauss_kernel[(dy + half_win) * (2 * half_win + 1) + + (dx + half_win)]; + count += gauss_kernel[(dy + half_win) * (2 * half_win + 1) + + (dx + half_win)]; + } + } + const double weighted_mean = weighted_sum / count; + mean_map[row * frame_width + col] = weighted_mean; + } + } + for (int row = 0; row < frame_height; ++row) { + for (int col = 0; col < frame_width; ++col) { + double weighted_sum = 0; + double count = 0; + const double mean = mean_map[row * frame_width + col]; + for (int dy = -half_win; dy <= half_win; ++dy) { + for (int dx = -half_win; dx <= half_win; ++dx) { + if (row + dy < 0 || row + dy >= frame_height || col + dx < 0 || + col + dx >= frame_width) { + continue; + } + const int pix = buffer[(row + dy) * buf_stride + col + dx]; + const double weight = + gauss_kernel[(dy + half_win) * (2 * half_win + 1) + + (dx + half_win)]; + weighted_sum += weight * (pix - mean) * (pix - mean); + count += weight; + } + } + const double sigma = sqrt(weighted_sum / count); + mscn_map[row * frame_width + col] = + (buffer[row * buf_stride + col] - mean) / (sigma + 1.0); + } + } + aom_free(mean_map); +} + +// beta (= cpi->norm_wiener_variance / sb_wiener_var) is the scaling factor +// that determines the quantizer used for a super block, +// used in "av1_get_sbq_perceptual_ai()". +// Its lower bound is determined by the "min_max_scale" which prevents using +// a large quantizer that quantizes all transform coeffiencts from non-zero +// to zero. +// Its upper bound is determined in this function, with the help of the +// global_msn_contrast, which measures the complexity contrast between the most +// difficult and the most plain super block. +static double get_dynamic_range(AV1_COMP *const cpi, const int sb_step) { + const AV1_COMMON *const cm = &cpi->common; + const int frame_width = cpi->frame_info.frame_width; + const int frame_height = cpi->frame_info.frame_height; + double *mscn_map = aom_calloc(frame_width * frame_height, sizeof(*mscn_map)); + build_mscn_map(cpi, mscn_map); + double max_block_mscn = 0.0; + double min_block_mscn = 1000.0; + for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sb_step) { + for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sb_step) { + int pix_count = 0; + double block_sum_mscn = 0.0; + for (int row = 0; row < mi_size_high[sb_step] * MI_SIZE; ++row) { + for (int col = 0; col < mi_size_wide[sb_step] * MI_SIZE; ++col) { + const int r = mi_row * MI_SIZE + row; + const int c = mi_col * MI_SIZE + col; + if (r >= frame_height || c >= frame_width) continue; + block_sum_mscn += fabs(mscn_map[r * frame_width + c]); + ++pix_count; + } + } + const double block_avg_mscn = block_sum_mscn / pix_count; + max_block_mscn = AOMMAX(block_avg_mscn, max_block_mscn); + min_block_mscn = AOMMIN(block_avg_mscn, min_block_mscn); + } + } + double global_mscn_contrast = max_block_mscn / (min_block_mscn + 0.01); + global_mscn_contrast = AOMMIN(global_mscn_contrast, 20.0); + double max_beta = 0.0; + double min_beta = 1000.0; + for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sb_step) { + for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sb_step) { + const int sb_wiener_var = + get_var_perceptual_ai(cpi, cm->seq_params->sb_size, mi_row, mi_col); + double beta = (double)cpi->norm_wiener_variance / sb_wiener_var; + double min_max_scale = AOMMAX( + 1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col)); + beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale); + min_beta = AOMMIN(beta, min_beta); + max_beta = AOMMAX(beta, max_beta); + } + } + const double scaling_factor = 1.0; + max_beta = min_beta * global_mscn_contrast * scaling_factor; + max_beta = AOMMIN(max_beta, 6.0); + max_beta = AOMMAX(max_beta, 2.0); + + aom_free(mscn_map); + return max_beta; +} + void av1_set_mb_wiener_variance(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; uint8_t *buffer = cpi->source->y_buffer; @@ -556,6 +690,8 @@ void av1_set_mb_wiener_variance(AV1_COMP *cpi) { pick_norm_factor_and_block_size(cpi, &norm_block_size); const int norm_step = mi_size_wide[norm_block_size]; + cpi->dynamic_range_upper_bound = get_dynamic_range(cpi, norm_step); + double sb_wiener_log = 0; double sb_count = 0; for (int its_cnt = 0; its_cnt < 2; ++its_cnt) { @@ -570,8 +706,7 @@ void av1_set_mb_wiener_variance(AV1_COMP *cpi) { double min_max_scale = AOMMAX( 1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col)); beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale); - beta = AOMMIN(beta, 4); - beta = AOMMAX(beta, 0.25); + beta = AOMMIN(beta, cpi->dynamic_range_upper_bound); sb_wiener_var = (int)(cpi->norm_wiener_variance / beta); @@ -600,10 +735,8 @@ int av1_get_sbq_perceptual_ai(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, double beta = (double)cpi->norm_wiener_variance / sb_wiener_var; double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col)); beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale); + beta = AOMMIN(beta, cpi->dynamic_range_upper_bound); - // Cap beta such that the delta q value is not much far away from the base q. - beta = AOMMIN(beta, 4); - beta = AOMMAX(beta, 0.25); offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta); const DeltaQInfo *const delta_q_info = &cm->delta_q_info; offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1); diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c index e876fd3b31..8dbd76fa11 100644 --- a/av1/encoder/av1_quantize.c +++ b/av1/encoder/av1_quantize.c @@ -770,7 +770,8 @@ static int adjust_hdr_cb_deltaq(int base_qindex) { const double dcbQP = CHROMA_CB_QP_SCALE * chromaQp * QP_SCALE_FACTOR; int dqpCb = (int)(dcbQP + (dcbQP < 0 ? -0.5 : 0.5)); dqpCb = AOMMIN(0, dqpCb); - dqpCb = (int)CLIP(dqpCb, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR); + // dqpCb = (int)CLIP(dqpCb, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR); + dqpCb = (int)CLIP(dqpCb, -CHROMA_DQP_MAX, CHROMA_DQP_MAX); return dqpCb; } @@ -780,7 +781,8 @@ static int adjust_hdr_cr_deltaq(int base_qindex) { const double dcrQP = CHROMA_CR_QP_SCALE * chromaQp * QP_SCALE_FACTOR; int dqpCr = (int)(dcrQP + (dcrQP < 0 ? -0.5 : 0.5)); dqpCr = AOMMIN(0, dqpCr); - dqpCr = (int)CLIP(dqpCr, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR); + //dqpCr = (int)CLIP(dqpCr, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR); + dqpCr = (int)CLIP(dqpCr, -CHROMA_DQP_MAX, CHROMA_DQP_MAX); return dqpCr; } diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c index be74763182..45f021bfa7 100644 --- a/av1/encoder/encodeframe_utils.c +++ b/av1/encoder/encodeframe_utils.c @@ -35,7 +35,8 @@ void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit, double num_of_mi = 0.0; double geom_mean_of_scale = 0.0; - assert(cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM); + assert(cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM || + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY); for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index 23e379f6fc..c1df6bde98 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c @@ -2566,9 +2566,18 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) { #endif #if !CONFIG_RD_COMMAND - // Determine whether to use screen content tools using two fast encoding. - if (!cpi->sf.hl_sf.disable_extra_sc_testing) + if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_PSY) { + // Screen content optimizations are bad for Psy tuning, + // disable them and avoid the extra testing to speed us up. + FeatureFlags *const features = &cm->features; + features->allow_screen_content_tools = 0; + features->allow_intrabc = 0; + cpi->use_screen_content_tools = 0; + cpi->is_screen_content_type = 0; + } else if (!cpi->sf.hl_sf.disable_extra_sc_testing) { + // Determine whether to use screen content tools using two fast encoding. av1_determine_sc_tools_with_encoding(cpi, q); + } #endif // !CONFIG_RD_COMMAND #if CONFIG_TUNE_VMAF @@ -3458,7 +3467,8 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, } } - if (oxcf->tune_cfg.tuning == AOM_TUNE_SSIM) { + if (oxcf->tune_cfg.tuning == AOM_TUNE_SSIM || + oxcf->tune_cfg.tuning == AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY) { av1_set_mb_ssim_rdmult_scaling(cpi); } diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index aeb277a823..9d46cb633d 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h @@ -88,11 +88,12 @@ extern "C" { #define TF_LOOKAHEAD_IDX_THR 7 #define HDR_QP_LEVELS 10 -#define CHROMA_CB_QP_SCALE 1.04 -#define CHROMA_CR_QP_SCALE 1.04 +#define CHROMA_CB_QP_SCALE 1.39 +#define CHROMA_CR_QP_SCALE 1.39 #define CHROMA_QP_SCALE -0.46 #define CHROMA_QP_OFFSET 9.26 #define QP_SCALE_FACTOR 2.0 +#define CHROMA_DQP_MAX 80 #define DISABLE_HDR_LUMA_DELTAQ 1 // Rational number with an int64 numerator @@ -3296,6 +3297,12 @@ typedef struct AV1_COMP { */ int64_t norm_wiener_variance; + /*! + * The upper bound that determines the minimum allowed q for a super block + * in all intra mode, deltaq-mode=3. + */ + double dynamic_range_upper_bound; + /*! * Buffer to store delta-q values for delta-q mode 4. */ diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c index b6ac2d9e7a..ec7a9fb762 100644 --- a/av1/encoder/encoder_utils.c +++ b/av1/encoder/encoder_utils.c @@ -1274,7 +1274,7 @@ void av1_set_mb_ssim_rdmult_scaling(AV1_COMP *cpi) { // Loop through each 16x16 block. for (int row = 0; row < num_rows; ++row) { for (int col = 0; col < num_cols; ++col) { - double var = 0.0, num_of_var = 0.0; + double var = 0.0, num_of_var = 0.0, var_log = 0.0; const int index = row * num_cols + col; // Loop through each 8x8 block. @@ -1291,31 +1291,89 @@ void av1_set_mb_ssim_rdmult_scaling(AV1_COMP *cpi) { buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y; buf.stride = y_stride; + double blk_var; if (use_hbd) { - var += av1_high_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8, - xd->bd); + blk_var = av1_high_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8, + xd->bd); } else { - var += av1_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8); + blk_var = av1_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8); } + var_log += log(AOMMAX(blk_var, 1)); + var += blk_var; num_of_var += 1.0; } } - var = var / num_of_var; - // Curve fitting with an exponential model on all 16x16 blocks from the - // midres dataset. - var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222; + if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY) { + var = exp(var_log / num_of_var); + const int cq_level = cpi->oxcf.rc_cfg.cq_level; + const double hq_level = 30 * 4; + const double delta = + cq_level < hq_level + ? 2.0 * (double)(hq_level - cq_level) / hq_level + : 10.0 * (double)(cq_level - hq_level) / (MAXQ - hq_level); + // Curve fitting with an exponential model on user rating dataset. + var = 39.126 * (1 - exp(-0.0009413 * var)) + 1.236 + delta; + } else { + var = var / num_of_var; + // Curve fitting with an exponential model on all 16x16 blocks from the + // midres dataset. + var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222; + } cpi->ssim_rdmult_scaling_factors[index] = var; log_sum += log(var); } } - log_sum = exp(log_sum / (double)(num_rows * num_cols)); - for (int row = 0; row < num_rows; ++row) { - for (int col = 0; col < num_cols; ++col) { - const int index = row * num_cols + col; - cpi->ssim_rdmult_scaling_factors[index] /= log_sum; + if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY && + cpi->oxcf.q_cfg.deltaq_mode != NO_DELTA_Q) { + const int sb_size = cpi->common.seq_params->sb_size; + const int num_mi_w_sb = mi_size_wide[sb_size]; + const int num_mi_h_sb = mi_size_high[sb_size]; + const int num_cols_sb = + (mi_params->mi_cols + num_mi_w_sb - 1) / num_mi_w_sb; + const int num_rows_sb = + (mi_params->mi_rows + num_mi_h_sb - 1) / num_mi_h_sb; + const int num_blk_w = num_mi_w_sb / num_mi_w; + const int num_blk_h = num_mi_h_sb / num_mi_h; + assert(num_blk_w * num_mi_w == num_mi_w_sb); + assert(num_blk_h * num_mi_h == num_mi_h_sb); + + for (int row = 0; row < num_rows_sb; ++row) { + for (int col = 0; col < num_cols_sb; ++col) { + double log_sum_sb = 0.0; + double blk_count = 0.0; + for (int blk_row = row * num_blk_h; + blk_row < (row + 1) * num_blk_h && blk_row < num_rows; ++blk_row) { + for (int blk_col = col * num_blk_w; + blk_col < (col + 1) * num_blk_w && blk_col < num_cols; + ++blk_col) { + const int index = blk_row * num_cols + blk_col; + log_sum_sb += log(cpi->ssim_rdmult_scaling_factors[index]); + blk_count += 1.0; + } + } + log_sum_sb = exp(log_sum_sb / blk_count); + for (int blk_row = row * num_blk_h; + blk_row < (row + 1) * num_blk_h && blk_row < num_rows; ++blk_row) { + for (int blk_col = col * num_blk_w; + blk_col < (col + 1) * num_blk_w && blk_col < num_cols; + ++blk_col) { + const int index = blk_row * num_cols + blk_col; + cpi->ssim_rdmult_scaling_factors[index] /= log_sum_sb; + } + } + } + } + } else { + log_sum = exp(log_sum / (double)(num_rows * num_cols)); + + for (int row = 0; row < num_rows; ++row) { + for (int col = 0; col < num_cols; ++col) { + const int index = row * num_cols + col; + cpi->ssim_rdmult_scaling_factors[index] /= log_sum; + } } } } diff --git a/av1/encoder/lookahead.h b/av1/encoder/lookahead.h index c9e1c9a52b..6d75e4b987 100644 --- a/av1/encoder/lookahead.h +++ b/av1/encoder/lookahead.h @@ -25,8 +25,8 @@ extern "C" { #endif /*!\cond */ -#define MAX_LAG_BUFFERS 48 -#define MAX_LAP_BUFFERS 48 +#define MAX_LAG_BUFFERS 120 +#define MAX_LAP_BUFFERS 120 #define MAX_TOTAL_BUFFERS (MAX_LAG_BUFFERS + MAX_LAP_BUFFERS) #define LAP_LAG_IN_FRAMES 17 diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c index 1e0a539a5b..7914946180 100644 --- a/av1/encoder/partition_search.c +++ b/av1/encoder/partition_search.c @@ -642,7 +642,8 @@ static void setup_block_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, } #endif // !CONFIG_REALTIME_ONLY - if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM) { + if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM || + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY) { av1_set_ssim_rdmult(cpi, &x->errorperbit, bsize, mi_row, mi_col, &x->rdmult); } diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c index c6b056ff4e..25e9df9d1f 100644 --- a/av1/encoder/speed_features.c +++ b/av1/encoder/speed_features.c @@ -950,8 +950,6 @@ static void set_good_speed_features_framesize_independent( if (speed >= 2) { sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF; - sf->fp_sf.skip_motion_search_threshold = 25; - sf->gm_sf.disable_gm_search_based_on_stats = 1; sf->part_sf.reuse_best_prediction_for_part_ab = @@ -1139,8 +1137,6 @@ static void set_good_speed_features_framesize_independent( } if (speed >= 5) { - sf->fp_sf.reduce_mv_step_param = 4; - sf->part_sf.simple_motion_search_prune_agg = allow_screen_content_tools ? SIMPLE_AGG_LVL0 : SIMPLE_AGG_LVL3; sf->part_sf.ext_partition_eval_thresh = @@ -1170,7 +1166,6 @@ static void set_good_speed_features_framesize_independent( sf->winner_mode_sf.dc_blk_pred_level = 1; - sf->fp_sf.disable_recon = 1; } if (speed >= 6) { @@ -1209,7 +1204,6 @@ static void set_good_speed_features_framesize_independent( sf->winner_mode_sf.dc_blk_pred_level = 2; sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF; - sf->fp_sf.skip_zeromv_motion_search = 1; } } diff --git a/av1/encoder/tune_butteraugli.c b/av1/encoder/tune_butteraugli.c index 80a0fc27c3..4a226067f8 100644 --- a/av1/encoder/tune_butteraugli.c +++ b/av1/encoder/tune_butteraugli.c @@ -18,6 +18,7 @@ #include "av1/encoder/encoder_utils.h" #include "av1/encoder/extend.h" #include "av1/encoder/var_based_part.h" +#include "aom_ports/mem.h" static const int resize_factor = 2; @@ -56,57 +57,115 @@ static void set_mb_butteraugli_rdmult_scaling(AV1_COMP *cpi, double log_sum = 0.0; double blk_count = 0.0; - // Loop through each block. - for (int row = 0; row < num_rows; ++row) { - for (int col = 0; col < num_cols; ++col) { - const int index = row * num_cols + col; - const int y_start = row * block_h; - const int x_start = col * block_w; - float dbutteraugli = 0.0f; - float dmse = 0.0f; - float px_count = 0.0f; - - // Loop through each pixel. - for (int y = y_start; y < y_start + block_h && y < height; y++) { - for (int x = x_start; x < x_start + block_w && x < width; x++) { - dbutteraugli += powf(diffmap[y * width + x], 12.0f); - float px_diff = source->y_buffer[y * source->y_stride + x] - - recon->y_buffer[y * recon->y_stride + x]; - dmse += px_diff * px_diff; - px_count += 1.0f; + + if (cm->seq_params->use_highbitdepth) + { + // Loop through each block. + for (int row = 0; row < num_rows; ++row) { + for (int col = 0; col < num_cols; ++col) { + const int index = row * num_cols + col; + const int y_start = row * block_h; + const int x_start = col * block_w; + float dbutteraugli = 0.0f; + float dmse = 0.0f; + float px_count = 0.0f; + + // Loop through each pixel. + for (int y = y_start; y < y_start + block_h && y < height; y++) { + for (int x = x_start; x < x_start + block_w && x < width; x++) { + dbutteraugli += powf(diffmap[y * width + x], 12.0f); + float px_diff = CONVERT_TO_SHORTPTR(source->y_buffer)[y * source->y_stride + x] - + CONVERT_TO_SHORTPTR(recon->y_buffer)[y * recon->y_stride + x]; + dmse += px_diff * px_diff; + px_count += 1.0f; + } } - } - const int y_end = AOMMIN((y_start >> ss_y) + (block_h >> ss_y), - (height + ss_y) >> ss_y); - for (int y = y_start >> ss_y; y < y_end; y++) { - const int x_end = AOMMIN((x_start >> ss_x) + (block_w >> ss_x), - (width + ss_x) >> ss_x); - for (int x = x_start >> ss_x; x < x_end; x++) { - const int src_px_index = y * source->uv_stride + x; - const int recon_px_index = y * recon->uv_stride + x; - const float px_diff_u = (float)(source->u_buffer[src_px_index] - - recon->u_buffer[recon_px_index]); - const float px_diff_v = (float)(source->v_buffer[src_px_index] - - recon->v_buffer[recon_px_index]); - dmse += px_diff_u * px_diff_u + px_diff_v * px_diff_v; - px_count += 2.0f; + const int y_end = AOMMIN((y_start >> ss_y) + (block_h >> ss_y), + (height + ss_y) >> ss_y); + for (int y = y_start >> ss_y; y < y_end; y++) { + const int x_end = AOMMIN((x_start >> ss_x) + (block_w >> ss_x), + (width + ss_x) >> ss_x); + for (int x = x_start >> ss_x; x < x_end; x++) { + const int src_px_index = y * source->uv_stride + x; + const int recon_px_index = y * recon->uv_stride + x; + const float px_diff_u = (float)(CONVERT_TO_SHORTPTR(source->u_buffer)[src_px_index] - + CONVERT_TO_SHORTPTR(recon->u_buffer)[recon_px_index]); + const float px_diff_v = (float)(CONVERT_TO_SHORTPTR(source->v_buffer)[src_px_index] - + CONVERT_TO_SHORTPTR(recon->v_buffer)[recon_px_index]); + dmse += px_diff_u * px_diff_u + px_diff_v * px_diff_v; + px_count += 2.0f; + } + } + + dbutteraugli = powf(dbutteraugli, 1.0f / 12.0f); + dmse = dmse / px_count; + const float eps = 0.01f; + double weight; + if (dbutteraugli < eps || dmse < eps) { + weight = -1.0; + } else { + blk_count += 1.0; + weight = dmse / dbutteraugli; + weight = AOMMIN(weight, 5.0); + weight += K; + log_sum += log(weight); } + cpi->butteraugli_info.rdmult_scaling_factors[index] = weight; } + } + } else { + // Loop through each block. + for (int row = 0; row < num_rows; ++row) { + for (int col = 0; col < num_cols; ++col) { + const int index = row * num_cols + col; + const int y_start = row * block_h; + const int x_start = col * block_w; + float dbutteraugli = 0.0f; + float dmse = 0.0f; + float px_count = 0.0f; + + // Loop through each pixel. + for (int y = y_start; y < y_start + block_h && y < height; y++) { + for (int x = x_start; x < x_start + block_w && x < width; x++) { + dbutteraugli += powf(diffmap[y * width + x], 12.0f); + float px_diff = source->y_buffer[y * source->y_stride + x] - + recon->y_buffer[y * recon->y_stride + x]; + dmse += px_diff * px_diff; + px_count += 1.0f; + } + } + const int y_end = AOMMIN((y_start >> ss_y) + (block_h >> ss_y), + (height + ss_y) >> ss_y); + for (int y = y_start >> ss_y; y < y_end; y++) { + const int x_end = AOMMIN((x_start >> ss_x) + (block_w >> ss_x), + (width + ss_x) >> ss_x); + for (int x = x_start >> ss_x; x < x_end; x++) { + const int src_px_index = y * source->uv_stride + x; + const int recon_px_index = y * recon->uv_stride + x; + const float px_diff_u = (float)(source->u_buffer[src_px_index] - + recon->u_buffer[recon_px_index]); + const float px_diff_v = (float)(source->v_buffer[src_px_index] - + recon->v_buffer[recon_px_index]); + dmse += px_diff_u * px_diff_u + px_diff_v * px_diff_v; + px_count += 2.0f; + } + } - dbutteraugli = powf(dbutteraugli, 1.0f / 12.0f); - dmse = dmse / px_count; - const float eps = 0.01f; - double weight; - if (dbutteraugli < eps || dmse < eps) { - weight = -1.0; - } else { - blk_count += 1.0; - weight = dmse / dbutteraugli; - weight = AOMMIN(weight, 5.0); - weight += K; - log_sum += log(weight); + dbutteraugli = powf(dbutteraugli, 1.0f / 12.0f); + dmse = dmse / px_count; + const float eps = 0.01f; + double weight; + if (dbutteraugli < eps || dmse < eps) { + weight = -1.0; + } else { + blk_count += 1.0; + weight = dmse / dbutteraugli; + weight = AOMMIN(weight, 5.0); + weight += K; + log_sum += log(weight); + } + cpi->butteraugli_info.rdmult_scaling_factors[index] = weight; } - cpi->butteraugli_info.rdmult_scaling_factors[index] = weight; } } // Geometric average of the weights. @@ -164,7 +223,7 @@ void av1_set_butteraugli_rdmult(const AV1_COMP *cpi, MACROBLOCK *x, av1_set_error_per_bit(&x->errorperbit, *rdmult); } -static void copy_plane(const uint8_t *src, int src_stride, uint8_t *dst, +static void copy_plane_lowbd(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h) { for (int row = 0; row < h; row++) { memcpy(dst, src, w); @@ -173,29 +232,66 @@ static void copy_plane(const uint8_t *src, int src_stride, uint8_t *dst, } } -static void copy_img(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, +static void copy_img_lowbd(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int width, int height) { - copy_plane(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, width, + copy_plane_lowbd(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, width, height); const int width_uv = (width + src->subsampling_x) >> src->subsampling_x; const int height_uv = (height + src->subsampling_y) >> src->subsampling_y; - copy_plane(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, + copy_plane_lowbd(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, width_uv, height_uv); - copy_plane(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, + copy_plane_lowbd(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, width_uv, height_uv); } +static void zero_plane_lowbd(uint8_t *dst, int dst_stride, int h) { + for (int row = 0; row < h; row++) { + memset(dst, 0, dst_stride); + dst += dst_stride; + } +} + +static void zero_img_lowbd(YV12_BUFFER_CONFIG *dst) { + zero_plane_lowbd(dst->y_buffer, dst->y_stride, dst->y_height); + zero_plane_lowbd(dst->u_buffer, dst->uv_stride, dst->uv_height); + zero_plane_lowbd(dst->v_buffer, dst->uv_stride, dst->uv_height); +} + + + -static void zero_plane(uint8_t *dst, int dst_stride, int h) { + +static void copy_plane_highbd(const uint16_t *src, int src_stride, uint16_t *dst, + int dst_stride, int w, int h) { + for (int row = 0; row < h; row++) { + memcpy(dst, src, w); + src += src_stride; + dst += dst_stride; + } +} + +static void copy_img_highbd(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, + int width, int height) { + copy_plane_highbd(CONVERT_TO_SHORTPTR(src->y_buffer), src->y_stride, CONVERT_TO_SHORTPTR(dst->y_buffer), dst->y_stride, width, + height); + const int width_uv = (width + src->subsampling_x) >> src->subsampling_x; + const int height_uv = (height + src->subsampling_y) >> src->subsampling_y; + copy_plane_highbd(CONVERT_TO_SHORTPTR(src->u_buffer), src->uv_stride, CONVERT_TO_SHORTPTR(dst->u_buffer), dst->uv_stride, + width_uv, height_uv); + copy_plane_highbd(CONVERT_TO_SHORTPTR(src->v_buffer), src->uv_stride, CONVERT_TO_SHORTPTR(dst->v_buffer), dst->uv_stride, + width_uv, height_uv); +} + +static void zero_plane_highbd(uint16_t *dst, int dst_stride, int h) { for (int row = 0; row < h; row++) { memset(dst, 0, dst_stride); dst += dst_stride; } } -static void zero_img(YV12_BUFFER_CONFIG *dst) { - zero_plane(dst->y_buffer, dst->y_stride, dst->y_height); - zero_plane(dst->u_buffer, dst->uv_stride, dst->uv_height); - zero_plane(dst->v_buffer, dst->uv_stride, dst->uv_height); +static void zero_img_highbd(YV12_BUFFER_CONFIG *dst) { + zero_plane_highbd(CONVERT_TO_SHORTPTR(dst->y_buffer), dst->y_stride, dst->y_height); + zero_plane_highbd(CONVERT_TO_SHORTPTR(dst->u_buffer), dst->uv_stride, dst->uv_height); + zero_plane_highbd(CONVERT_TO_SHORTPTR(dst->v_buffer), dst->uv_stride, dst->uv_height); } void av1_setup_butteraugli_source(AV1_COMP *cpi) { @@ -223,9 +319,15 @@ void av1_setup_butteraugli_source(AV1_COMP *cpi) { av1_resize_and_extend_frame_nonnormative(cpi->source, resized_dst, bit_depth, av1_num_planes(cm)); - zero_img(cpi->source); - copy_img(resized_dst, cpi->source, width / resize_factor, - height / resize_factor); + if (cm->seq_params->use_highbitdepth) { + zero_img_highbd(cpi->source); + copy_img_highbd(resized_dst, cpi->source, width / resize_factor, + height / resize_factor); + } else { + zero_img_lowbd(cpi->source); + copy_img_lowbd(resized_dst, cpi->source, width / resize_factor, + height / resize_factor); + } } void av1_setup_butteraugli_rdmult_and_restore_source(AV1_COMP *cpi, double K) { @@ -242,8 +344,14 @@ void av1_setup_butteraugli_rdmult_and_restore_source(AV1_COMP *cpi, double K) { &resized_recon, width / resize_factor, height / resize_factor, ss_x, ss_y, cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels, cm->features.byte_alignment); - copy_img(&cpi->common.cur_frame->buf, &resized_recon, width / resize_factor, - height / resize_factor); + + if (cm->seq_params->use_highbitdepth) { + copy_img_highbd(&cpi->common.cur_frame->buf, &resized_recon, width / resize_factor, + height / resize_factor); + } else { + copy_img_lowbd(&cpi->common.cur_frame->buf, &resized_recon, width / resize_factor, + height / resize_factor); + } set_mb_butteraugli_rdmult_scaling(cpi, &cpi->butteraugli_info.resized_source, &resized_recon, K); @@ -262,13 +370,15 @@ void av1_setup_butteraugli_rdmult(AV1_COMP *cpi) { av1_set_frame_size(cpi, cm->superres_upscaled_width, cm->superres_upscaled_height); - cpi->source = - av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source, - cm->features.interp_filter, 0, false, false); + cpi->source = av1_realloc_and_scale_if_required( + cm, cpi->unscaled_source, &cpi->scaled_source, cm->features.interp_filter, + 0, false, false, cpi->oxcf.border_in_pixels, + cpi->oxcf.tool_cfg.enable_global_motion); if (cpi->unscaled_last_source != NULL) { - cpi->last_source = av1_scale_if_required( + cpi->last_source = av1_realloc_and_scale_if_required( cm, cpi->unscaled_last_source, &cpi->scaled_last_source, - cm->features.interp_filter, 0, false, false); + cm->features.interp_filter, 0, false, false, cpi->oxcf.border_in_pixels, + cpi->oxcf.tool_cfg.enable_global_motion); } av1_setup_butteraugli_source(cpi); @@ -295,7 +405,7 @@ void av1_setup_butteraugli_rdmult(AV1_COMP *cpi) { // cpi->sf.part_sf.fixed_partition_size = BLOCK_32X32; av1_set_quantizer(cpi, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q_index, - q_cfg->enable_chroma_deltaq); + q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq); av1_set_speed_features_qindex_dependent(cpi, oxcf->speed); if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq) av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params, diff --git a/av1/encoder/txb_rdopt.c b/av1/encoder/txb_rdopt.c index 77bc3cd298..e3fe72a0d8 100644 --- a/av1/encoder/txb_rdopt.c +++ b/av1/encoder/txb_rdopt.c @@ -241,10 +241,11 @@ static AOM_FORCE_INLINE void update_coeff_eob( static INLINE void update_skip(int *accu_rate, int64_t accu_dist, int *eob, int nz_num, int *nz_ci, int64_t rdmult, int skip_cost, int non_skip_cost, - tran_low_t *qcoeff, tran_low_t *dqcoeff) { + tran_low_t *qcoeff, tran_low_t *dqcoeff, + int sharpness) { const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist); const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0); - if (rd_new_eob < rd) { + if (rd_new_eob < rd && sharpness == 0) { for (int i = 0; i < nz_num; ++i) { const int ci = nz_ci[i]; qcoeff[ci] = 0; @@ -329,7 +330,7 @@ int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, const LV_MAP_EOB_COST *txb_eob_costs = &coeff_costs->eob_costs[eob_multi_size][plane_type]; - const int rshift = 2; + const int rshift = sharpness + 2; const int64_t rdmult = (((int64_t)x->rdmult * @@ -395,9 +396,9 @@ int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, default: assert(false); } - if (si == -1 && nz_num <= max_nz_num && sharpness == 0) { + if (si == -1 && nz_num <= max_nz_num) { update_skip(&accu_rate, accu_dist, &eob, nz_num, nz_ci, rdmult, skip_cost, - non_skip_cost, qcoeff, dqcoeff); + non_skip_cost, qcoeff, dqcoeff, sharpness); } #define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal) \