Skip to content
This repository has been archived by the owner on Jun 4, 2024. It is now read-only.

Commit

Permalink
All patches except all-frame-HDR aq1
Browse files Browse the repository at this point in the history
  • Loading branch information
BlueSwordM committed Feb 12, 2022
1 parent 42cbda0 commit 177c4c9
Show file tree
Hide file tree
Showing 16 changed files with 490 additions and 138 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ if(CONFIG_AV1_ENCODER)
${LIBBROTLICOMMON_LIBRARIES})
target_include_directories(aom PRIVATE ${LIBJXL_INCLUDE_DIRS})
else()
pkg_check_modules(LIBJXL REQUIRED libjxl)
pkg_check_modules(LIBJXL REQUIRED libjxl libjxl_threads)
target_link_libraries(aom PRIVATE ${LIBJXL_LDFLAGS} ${LIBJXL_LIBRARIES})
target_include_directories(aom PRIVATE ${LIBJXL_INCLUDE_DIRS})
if(LIBJXL_CFLAGS)
Expand Down
1 change: 1 addition & 0 deletions aom/aomcx.h
Original file line number Diff line number Diff line change
Expand Up @@ -1547,6 +1547,7 @@ typedef enum {
AOM_TUNE_VMAF_MAX_GAIN = 6,
AOM_TUNE_VMAF_NEG_MAX_GAIN = 7,
AOM_TUNE_BUTTERAUGLI = 8,
AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY = 9,
} aom_tune_metric;

/*!\brief Distortion metric to use for RD optimization.
Expand Down
82 changes: 58 additions & 24 deletions aom_dsp/butteraugli.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,19 @@

#include <assert.h>
#include <jxl/butteraugli.h>
#include <jxl/thread_parallel_runner.h>

#include "aom_dsp/butteraugli.h"
#include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h"
#include "third_party/libyuv/include/libyuv/convert_argb.h"

int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *distorted, int bit_depth,
aom_matrix_coefficients_t matrix_coefficients,
aom_color_range_t color_range, float *dist_map) {
(void)bit_depth;
assert(bit_depth == 8);
assert(bit_depth <= 10);
const int width = source->y_crop_width;
const int height = source->y_crop_height;
const int ss_x = source->subsampling_x;
Expand All @@ -37,7 +39,7 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
}

const size_t stride_argb = width * 4;
const size_t buffer_size = height * stride_argb;
const size_t buffer_size = height * stride_argb * (bit_depth > 8 ? 2 : 1);
uint8_t *src_argb = (uint8_t *)aom_malloc(buffer_size);
uint8_t *distorted_argb = (uint8_t *)aom_malloc(buffer_size);
if (!src_argb || !distorted_argb) {
Expand All @@ -46,39 +48,71 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
return 0;
}


if (ss_x == 1 && ss_y == 1) {
I420ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
source->uv_stride, source->v_buffer, source->uv_stride,
src_argb, stride_argb, yuv_constants, width, height);
I420ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
distorted->u_buffer, distorted->uv_stride,
distorted->v_buffer, distorted->uv_stride, distorted_argb,
stride_argb, yuv_constants, width, height);
if (bit_depth == 8) {
I420ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
source->uv_stride, source->v_buffer, source->uv_stride,
src_argb, stride_argb, yuv_constants, width, height);
I420ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
distorted->u_buffer, distorted->uv_stride,
distorted->v_buffer, distorted->uv_stride, distorted_argb,
stride_argb, yuv_constants, width, height);
} else {
I010ToARGBMatrix(CONVERT_TO_SHORTPTR(source->y_buffer), source->y_stride,
CONVERT_TO_SHORTPTR(source->u_buffer), source->uv_stride,
CONVERT_TO_SHORTPTR(source->v_buffer), source->uv_stride,
src_argb, stride_argb, yuv_constants, width, height);
I010ToARGBMatrix(CONVERT_TO_SHORTPTR(distorted->y_buffer), distorted->y_stride,
CONVERT_TO_SHORTPTR(distorted->u_buffer), distorted->uv_stride,
CONVERT_TO_SHORTPTR(distorted->v_buffer), distorted->uv_stride,
distorted_argb, stride_argb, yuv_constants, width, height);
}
} else if (ss_x == 1 && ss_y == 0) {
I422ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
source->uv_stride, source->v_buffer, source->uv_stride,
src_argb, stride_argb, yuv_constants, width, height);
I422ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
distorted->u_buffer, distorted->uv_stride,
distorted->v_buffer, distorted->uv_stride, distorted_argb,
stride_argb, yuv_constants, width, height);
if (bit_depth == 8) {
I422ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
source->uv_stride, source->v_buffer, source->uv_stride,
src_argb, stride_argb, yuv_constants, width, height);
I422ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
distorted->u_buffer, distorted->uv_stride,
distorted->v_buffer, distorted->uv_stride, distorted_argb,
stride_argb, yuv_constants, width, height);
} else {
I210ToARGBMatrix(CONVERT_TO_SHORTPTR(source->y_buffer), source->y_stride,
CONVERT_TO_SHORTPTR(source->u_buffer), source->uv_stride,
CONVERT_TO_SHORTPTR(source->v_buffer), source->uv_stride,
src_argb, stride_argb, yuv_constants, width, height);
I210ToARGBMatrix(CONVERT_TO_SHORTPTR(distorted->y_buffer), distorted->y_stride,
CONVERT_TO_SHORTPTR(distorted->u_buffer), distorted->uv_stride,
CONVERT_TO_SHORTPTR(distorted->v_buffer), distorted->uv_stride,
distorted_argb, stride_argb, yuv_constants, width, height);
}
} else if (ss_x == 0 && ss_y == 0) {
I444ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
source->uv_stride, source->v_buffer, source->uv_stride,
src_argb, stride_argb, yuv_constants, width, height);
I444ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
distorted->u_buffer, distorted->uv_stride,
distorted->v_buffer, distorted->uv_stride, distorted_argb,
stride_argb, yuv_constants, width, height);
if (bit_depth == 8) {
I444ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
source->uv_stride, source->v_buffer, source->uv_stride,
src_argb, stride_argb, yuv_constants, width, height);
I444ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
distorted->u_buffer, distorted->uv_stride,
distorted->v_buffer, distorted->uv_stride, distorted_argb,
stride_argb, yuv_constants, width, height);
} else {
return 0;
}
} else {
aom_free(src_argb);
aom_free(distorted_argb);
return 0;
}

JxlPixelFormat pixel_format = { 4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0 };
if (bit_depth == 10) {
pixel_format.data_type = JXL_TYPE_UINT16;
}
JxlButteraugliApi *api = JxlButteraugliApiCreate(NULL);
JxlButteraugliApiSetHFAsymmetry(api, 0.8f);
JxlParallelRunner runner = JxlThreadParallelRunnerCreate(NULL, 6);
JxlButteraugliApiSetParallelRunner(api, JxlThreadParallelRunner, runner);
JxlButteraugliApiSetHFAsymmetry(api, 0.5f);

JxlButteraugliResult *result = JxlButteraugliCompute(
api, width, height, &pixel_format, src_argb, buffer_size, &pixel_format,
Expand Down
6 changes: 4 additions & 2 deletions av1/arg_defs.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ static const struct arg_enum_list tuning_enum[] = {
{ "vmaf", AOM_TUNE_VMAF_MAX_GAIN },
{ "vmaf_neg", AOM_TUNE_VMAF_NEG_MAX_GAIN },
{ "butteraugli", AOM_TUNE_BUTTERAUGLI },
{ "image_perceptual_quality", AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY },
{ NULL, 0 }
};

Expand Down Expand Up @@ -535,8 +536,9 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
ARG_DEF(NULL, "deltaq-mode", 1,
"Delta qindex mode (0: off, 1: deltaq objective (default), "
"2: deltaq placeholder, 3: key frame visual quality, 4: user "
"rating based visual quality optimization). "
"Currently this requires enable-tpl-model as a prerequisite."),
"rating based visual quality optimization, \n"
" 5: HDR deltaq optimization). "
"Currently, deltaq-mode=1 and 2 require enable-tpl-model as a prerequisite."),
.deltaq_strength = ARG_DEF(NULL, "deltaq-strength", 1,
"Deltaq strength for"
" --deltaq-mode=4 (%)"),
Expand Down
6 changes: 2 additions & 4 deletions av1/av1_cx_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,8 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
}
#endif

RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_BUTTERAUGLI);
RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR,
AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY);

RANGE_CHECK(extra_cfg, dist_metric, AOM_DIST_METRIC_PSNR,
AOM_DIST_METRIC_QM_PSNR);
Expand Down Expand Up @@ -878,9 +879,6 @@ static aom_codec_err_t validate_img(aom_codec_alg_priv_t *ctx,

#if CONFIG_TUNE_BUTTERAUGLI
if (ctx->extra_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
if (img->bit_depth > 8) {
ERROR("Only 8 bit depth images supported in tune=butteraugli mode.");
}
if (img->mc != 0 && img->mc != AOM_CICP_MC_BT_709 &&
img->mc != AOM_CICP_MC_BT_601 && img->mc != AOM_CICP_MC_BT_470_B_G) {
ERROR(
Expand Down
143 changes: 138 additions & 5 deletions av1/encoder/allintra_vis.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,140 @@ static void automatic_intra_tools_off(AV1_COMP *cpi,
}
}

// Compute the "mean subtracted contrast normalized coefficients (MSCN)",
// defined in the following paper:
// "No-Reference Image Quality Assessment in the Spatial Domain",
// DOI: 10.1109/TIP.2012.2214050
//
// The MSCN coefficients reflect normalized signal information regardless
// of pixel intensity. We could think as a contrast enhanced image map.
// The absolute sum of MSCN coefficients of a block could represent
// the amount of information, or complexity of a block.
// Here, we seek the ratio of the most complex and the most plain block,
// as a complexity indicator of the image.
static void build_mscn_map(AV1_COMP *cpi, double *mscn_map) {
const uint8_t *buffer = cpi->source->y_buffer;
const int buf_stride = cpi->source->y_stride;
const int frame_width = cpi->frame_info.frame_width;
const int frame_height = cpi->frame_info.frame_height;
const int half_win = 3;
// h = round(fspecial('gaussian', 7, 3.0) * 1000)
const int gauss_kernel[] = { 11, 15, 18, 19, 18, 15, 11, 15, 20, 23,
25, 23, 20, 15, 18, 23, 27, 29, 27, 23,
18, 19, 25, 29, 31, 29, 25, 19, 18, 23,
27, 29, 27, 23, 18, 15, 20, 23, 25, 23,
20, 15, 11, 15, 18, 19, 18, 15, 11 };
// Generate mscn map with Gaussian kernel weights.
double *mean_map = aom_calloc(frame_width * frame_height, sizeof(*mean_map));
for (int row = 0; row < frame_height; ++row) {
for (int col = 0; col < frame_width; ++col) {
double weighted_sum = 0;
int count = 0;
for (int dy = -half_win; dy <= half_win; ++dy) {
for (int dx = -half_win; dx <= half_win; ++dx) {
if (row + dy < 0 || row + dy >= frame_height || col + dx < 0 ||
col + dx >= frame_width) {
continue;
}
const int pix = buffer[(row + dy) * buf_stride + col + dx];
weighted_sum +=
pix * gauss_kernel[(dy + half_win) * (2 * half_win + 1) +
(dx + half_win)];
count += gauss_kernel[(dy + half_win) * (2 * half_win + 1) +
(dx + half_win)];
}
}
const double weighted_mean = weighted_sum / count;
mean_map[row * frame_width + col] = weighted_mean;
}
}
for (int row = 0; row < frame_height; ++row) {
for (int col = 0; col < frame_width; ++col) {
double weighted_sum = 0;
double count = 0;
const double mean = mean_map[row * frame_width + col];
for (int dy = -half_win; dy <= half_win; ++dy) {
for (int dx = -half_win; dx <= half_win; ++dx) {
if (row + dy < 0 || row + dy >= frame_height || col + dx < 0 ||
col + dx >= frame_width) {
continue;
}
const int pix = buffer[(row + dy) * buf_stride + col + dx];
const double weight =
gauss_kernel[(dy + half_win) * (2 * half_win + 1) +
(dx + half_win)];
weighted_sum += weight * (pix - mean) * (pix - mean);
count += weight;
}
}
const double sigma = sqrt(weighted_sum / count);
mscn_map[row * frame_width + col] =
(buffer[row * buf_stride + col] - mean) / (sigma + 1.0);
}
}
aom_free(mean_map);
}

// beta (= cpi->norm_wiener_variance / sb_wiener_var) is the scaling factor
// that determines the quantizer used for a super block,
// used in "av1_get_sbq_perceptual_ai()".
// Its lower bound is determined by the "min_max_scale" which prevents using
// a large quantizer that quantizes all transform coeffiencts from non-zero
// to zero.
// Its upper bound is determined in this function, with the help of the
// global_msn_contrast, which measures the complexity contrast between the most
// difficult and the most plain super block.
static double get_dynamic_range(AV1_COMP *const cpi, const int sb_step) {
const AV1_COMMON *const cm = &cpi->common;
const int frame_width = cpi->frame_info.frame_width;
const int frame_height = cpi->frame_info.frame_height;
double *mscn_map = aom_calloc(frame_width * frame_height, sizeof(*mscn_map));
build_mscn_map(cpi, mscn_map);
double max_block_mscn = 0.0;
double min_block_mscn = 1000.0;
for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sb_step) {
for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sb_step) {
int pix_count = 0;
double block_sum_mscn = 0.0;
for (int row = 0; row < mi_size_high[sb_step] * MI_SIZE; ++row) {
for (int col = 0; col < mi_size_wide[sb_step] * MI_SIZE; ++col) {
const int r = mi_row * MI_SIZE + row;
const int c = mi_col * MI_SIZE + col;
if (r >= frame_height || c >= frame_width) continue;
block_sum_mscn += fabs(mscn_map[r * frame_width + c]);
++pix_count;
}
}
const double block_avg_mscn = block_sum_mscn / pix_count;
max_block_mscn = AOMMAX(block_avg_mscn, max_block_mscn);
min_block_mscn = AOMMIN(block_avg_mscn, min_block_mscn);
}
}
double global_mscn_contrast = max_block_mscn / (min_block_mscn + 0.01);
global_mscn_contrast = AOMMIN(global_mscn_contrast, 20.0);
double max_beta = 0.0;
double min_beta = 1000.0;
for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sb_step) {
for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sb_step) {
const int sb_wiener_var =
get_var_perceptual_ai(cpi, cm->seq_params->sb_size, mi_row, mi_col);
double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
double min_max_scale = AOMMAX(
1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col));
beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
min_beta = AOMMIN(beta, min_beta);
max_beta = AOMMAX(beta, max_beta);
}
}
const double scaling_factor = 1.0;
max_beta = min_beta * global_mscn_contrast * scaling_factor;
max_beta = AOMMIN(max_beta, 6.0);
max_beta = AOMMAX(max_beta, 2.0);

aom_free(mscn_map);
return max_beta;
}

void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
uint8_t *buffer = cpi->source->y_buffer;
Expand Down Expand Up @@ -556,6 +690,8 @@ void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
pick_norm_factor_and_block_size(cpi, &norm_block_size);
const int norm_step = mi_size_wide[norm_block_size];

cpi->dynamic_range_upper_bound = get_dynamic_range(cpi, norm_step);

double sb_wiener_log = 0;
double sb_count = 0;
for (int its_cnt = 0; its_cnt < 2; ++its_cnt) {
Expand All @@ -570,8 +706,7 @@ void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
double min_max_scale = AOMMAX(
1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col));
beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
beta = AOMMIN(beta, 4);
beta = AOMMAX(beta, 0.25);
beta = AOMMIN(beta, cpi->dynamic_range_upper_bound);

sb_wiener_var = (int)(cpi->norm_wiener_variance / beta);

Expand Down Expand Up @@ -600,10 +735,8 @@ int av1_get_sbq_perceptual_ai(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
beta = AOMMIN(beta, cpi->dynamic_range_upper_bound);

// Cap beta such that the delta q value is not much far away from the base q.
beta = AOMMIN(beta, 4);
beta = AOMMAX(beta, 0.25);
offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta);
const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1);
Expand Down
6 changes: 4 additions & 2 deletions av1/encoder/av1_quantize.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,8 @@ static int adjust_hdr_cb_deltaq(int base_qindex) {
const double dcbQP = CHROMA_CB_QP_SCALE * chromaQp * QP_SCALE_FACTOR;
int dqpCb = (int)(dcbQP + (dcbQP < 0 ? -0.5 : 0.5));
dqpCb = AOMMIN(0, dqpCb);
dqpCb = (int)CLIP(dqpCb, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR);
// dqpCb = (int)CLIP(dqpCb, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR);
dqpCb = (int)CLIP(dqpCb, -CHROMA_DQP_MAX, CHROMA_DQP_MAX);
return dqpCb;
}

Expand All @@ -780,7 +781,8 @@ static int adjust_hdr_cr_deltaq(int base_qindex) {
const double dcrQP = CHROMA_CR_QP_SCALE * chromaQp * QP_SCALE_FACTOR;
int dqpCr = (int)(dcrQP + (dcrQP < 0 ? -0.5 : 0.5));
dqpCr = AOMMIN(0, dqpCr);
dqpCr = (int)CLIP(dqpCr, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR);
//dqpCr = (int)CLIP(dqpCr, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR);
dqpCr = (int)CLIP(dqpCr, -CHROMA_DQP_MAX, CHROMA_DQP_MAX);
return dqpCr;
}

Expand Down
3 changes: 2 additions & 1 deletion av1/encoder/encodeframe_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
double num_of_mi = 0.0;
double geom_mean_of_scale = 0.0;

assert(cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM);
assert(cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM ||
cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY);

for (row = mi_row / num_mi_w;
row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
Expand Down
Loading

0 comments on commit 177c4c9

Please sign in to comment.