Skip to content
This repository has been archived by the owner on Jun 4, 2024. It is now read-only.

Commit

Permalink
Multi-thread recon frame padding
Browse files Browse the repository at this point in the history
  • Loading branch information
mikrbosss committed Sep 7, 2022
1 parent a796bc8 commit 313d479
Show file tree
Hide file tree
Showing 9 changed files with 152 additions and 48 deletions.
2 changes: 2 additions & 0 deletions aom_scale/aom_scale_rtcd.pl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ ()
add_proto qw/void aom_yv12_partial_copy_v/, "const struct yv12_buffer_config *src_bc, int hstart1, int hend1, int vstart1, int vend1, struct yv12_buffer_config *dst_bc, int hstart2, int vstart2";
add_proto qw/void aom_yv12_partial_coloc_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";

add_proto qw/void aom_extend_frame_borders_plane_row/, "const struct yv12_buffer_config *ybf, int plane, int v_start, int v_end";

add_proto qw/void aom_extend_frame_borders/, "struct yv12_buffer_config *ybf, const int num_planes";
specialize qw/aom_extend_frame_borders dspr2/;

Expand Down
83 changes: 61 additions & 22 deletions aom_scale/generic/yv12extend.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,20 @@

static void extend_plane(uint8_t *const src, int src_stride, int width,
int height, int extend_top, int extend_left,
int extend_bottom, int extend_right) {
int extend_bottom, int extend_right, int v_start,
int v_end) {
assert(src != NULL);
int i;
const int linesize = extend_left + extend_right + width;
assert(linesize <= src_stride);

/* copy the left and right most columns out */
uint8_t *src_ptr1 = src;
uint8_t *src_ptr2 = src + width - 1;
uint8_t *dst_ptr1 = src - extend_left;
uint8_t *src_ptr1 = src + v_start * src_stride;
uint8_t *src_ptr2 = src + v_start * src_stride + width - 1;
uint8_t *dst_ptr1 = src + v_start * src_stride - extend_left;
uint8_t *dst_ptr2 = src_ptr2 + 1;

for (i = 0; i < height; ++i) {
for (i = v_start; i < v_end; ++i) {
memset(dst_ptr1, src_ptr1[0], extend_left);
memset(dst_ptr2, src_ptr2[0], extend_right);
src_ptr1 += src_stride;
Expand Down Expand Up @@ -65,19 +66,20 @@ static void extend_plane(uint8_t *const src, int src_stride, int width,
#if CONFIG_AV1_HIGHBITDEPTH
static void extend_plane_high(uint8_t *const src8, int src_stride, int width,
int height, int extend_top, int extend_left,
int extend_bottom, int extend_right) {
int extend_bottom, int extend_right, int v_start,
int v_end) {
int i;
const int linesize = extend_left + extend_right + width;
assert(linesize <= src_stride);
uint16_t *src = CONVERT_TO_SHORTPTR(src8);

/* copy the left and right most columns out */
uint16_t *src_ptr1 = src;
uint16_t *src_ptr2 = src + width - 1;
uint16_t *dst_ptr1 = src - extend_left;
uint16_t *src_ptr1 = src + v_start * src_stride;
uint16_t *src_ptr2 = src + v_start * src_stride + width - 1;
uint16_t *dst_ptr1 = src + v_start * src_stride - extend_left;
uint16_t *dst_ptr2 = src_ptr2 + 1;

for (i = 0; i < height; ++i) {
for (i = v_start; i < v_end; ++i) {
aom_memset16(dst_ptr1, src_ptr1[0], extend_left);
aom_memset16(dst_ptr2, src_ptr2[0], extend_right);
src_ptr1 += src_stride;
Expand Down Expand Up @@ -107,6 +109,41 @@ static void extend_plane_high(uint8_t *const src8, int src_stride, int width,
}
#endif // CONFIG_AV1_HIGHBITDEPTH

void aom_extend_frame_borders_plane_row_c(const YV12_BUFFER_CONFIG *ybf,
int plane, int v_start, int v_end) {
const int ext_size = ybf->border;
const int ss_x = ybf->subsampling_x;
const int ss_y = ybf->subsampling_y;

assert(ybf->y_height - ybf->y_crop_height < 16);
assert(ybf->y_width - ybf->y_crop_width < 16);
assert(ybf->y_height - ybf->y_crop_height >= 0);
assert(ybf->y_width - ybf->y_crop_width >= 0);

const int is_uv = plane > 0;
const int top = ext_size >> (is_uv ? ss_y : 0);
const int left = ext_size >> (is_uv ? ss_x : 0);
const int bottom = top + ybf->heights[is_uv] - ybf->crop_heights[is_uv];
const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
const int extend_top_border = (v_start == 0);
const int extend_bottom_border = (v_end == ybf->crop_heights[is_uv]);

#if CONFIG_AV1_HIGHBITDEPTH
if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
extend_plane_high(ybf->buffers[plane], ybf->strides[is_uv],
ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
extend_top_border ? top : 0, left,
extend_bottom_border ? bottom : 0, right, v_start, v_end);
return;
}
#endif

extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
extend_top_border ? top : 0, left,
extend_bottom_border ? bottom : 0, right, v_start, v_end);
}

void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf,
const int num_planes) {
assert(ybf->border % 2 == 0);
Expand All @@ -124,7 +161,8 @@ void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf,
ybf->buffers[plane], ybf->strides[is_uv], ybf->crop_widths[is_uv],
ybf->crop_heights[is_uv], plane_border, plane_border,
plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv], 0,
ybf->crop_heights[is_uv]);
}
return;
}
Expand All @@ -137,7 +175,8 @@ void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf,
ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
plane_border, plane_border,
plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv], 0,
ybf->crop_heights[is_uv]);
}
}

Expand All @@ -161,7 +200,7 @@ static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size,
const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
extend_plane_high(ybf->buffers[plane], ybf->strides[is_uv],
ybf->crop_widths[is_uv], ybf->crop_heights[is_uv], top,
left, bottom, right);
left, bottom, right, 0, ybf->crop_heights[is_uv]);
}
return;
}
Expand All @@ -175,7 +214,7 @@ static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size,
const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
ybf->crop_widths[is_uv], ybf->crop_heights[is_uv], top, left,
bottom, right);
bottom, right, 0, ybf->crop_heights[is_uv]);
}
}

Expand All @@ -199,17 +238,17 @@ void aom_extend_frame_borders_y_c(YV12_BUFFER_CONFIG *ybf) {
assert(ybf->y_width - ybf->y_crop_width >= 0);
#if CONFIG_AV1_HIGHBITDEPTH
if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
extend_plane_high(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
ybf->y_crop_height, ext_size, ext_size,
ext_size + ybf->y_height - ybf->y_crop_height,
ext_size + ybf->y_width - ybf->y_crop_width);
extend_plane_high(
ybf->y_buffer, ybf->y_stride, ybf->y_crop_width, ybf->y_crop_height,
ext_size, ext_size, ext_size + ybf->y_height - ybf->y_crop_height,
ext_size + ybf->y_width - ybf->y_crop_width, 0, ybf->y_crop_height);
return;
}
#endif
extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
ybf->y_crop_height, ext_size, ext_size,
ext_size + ybf->y_height - ybf->y_crop_height,
ext_size + ybf->y_width - ybf->y_crop_width);
extend_plane(
ybf->y_buffer, ybf->y_stride, ybf->y_crop_width, ybf->y_crop_height,
ext_size, ext_size, ext_size + ybf->y_height - ybf->y_crop_height,
ext_size + ybf->y_width - ybf->y_crop_width, 0, ybf->y_crop_height);
}

#if CONFIG_AV1_HIGHBITDEPTH
Expand Down
6 changes: 6 additions & 0 deletions av1/common/av1_common_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -1048,6 +1048,12 @@ typedef struct AV1Common {
*/
int spatial_layer_id;

/*!
* Extension of frame borders is multi-threaded along with cdef/loop
* restoration.
*/
int extend_border_mt[MAX_MB_PLANE];

#if TXCOEFF_TIMER
int64_t cum_txcoeff_timer;
int64_t txcoeff_timer;
Expand Down
8 changes: 6 additions & 2 deletions av1/common/restoration.c
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,8 @@ static void filter_frame_on_unit(const RestorationTileLimits *limits,
void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
YV12_BUFFER_CONFIG *frame,
AV1_COMMON *cm, int optimized_lr,
int num_planes) {
int num_planes,
int do_extend_border_mt) {
const SequenceHeader *const seq_params = cm->seq_params;
const int bit_depth = seq_params->bit_depth;
const int highbd = seq_params->use_highbitdepth;
Expand All @@ -1123,6 +1124,7 @@ void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,

lr_ctxt->on_rest_unit = filter_frame_on_unit;
lr_ctxt->frame = frame;
lr_ctxt->cm = cm;
for (int plane = 0; plane < num_planes; ++plane) {
RestorationInfo *rsi = &cm->rst_info[plane];
RestorationType rtype = rsi->frame_restoration_type;
Expand All @@ -1136,6 +1138,7 @@ void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
const int plane_width = frame->crop_widths[is_uv];
const int plane_height = frame->crop_heights[is_uv];
FilterFrameCtxt *lr_plane_ctxt = &lr_ctxt->ctxt[plane];
cm->extend_border_mt[plane] = do_extend_border_mt;

av1_extend_frame(frame->buffers[plane], plane_width, plane_height,
frame->strides[is_uv], RESTORATION_BORDER,
Expand Down Expand Up @@ -1196,7 +1199,8 @@ void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt;

av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm,
optimized_lr, num_planes);
optimized_lr, num_planes,
/* do_extend_border_mt */ 0);

foreach_rest_unit_in_planes(loop_rest_ctxt, cm, num_planes);

Expand Down
4 changes: 3 additions & 1 deletion av1/common/restoration.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ typedef struct AV1LrStruct {
FilterFrameCtxt ctxt[MAX_MB_PLANE];
YV12_BUFFER_CONFIG *frame;
YV12_BUFFER_CONFIG *dst;
struct AV1Common *cm;
} AV1LrStruct;

extern const sgr_params_type av1_sgr_params[SGRPROJ_PARAMS];
Expand Down Expand Up @@ -463,7 +464,8 @@ void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
YV12_BUFFER_CONFIG *frame,
struct AV1Common *cm,
int optimized_lr, int num_planes);
int optimized_lr, int num_planes,
int do_extend_border_mt);
void av1_loop_restoration_copy_planes(AV1LrStruct *loop_rest_ctxt,
struct AV1Common *cm, int num_planes);
void av1_foreach_rest_unit_in_row(
Expand Down
48 changes: 36 additions & 12 deletions av1/common/thread_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,12 @@ static int loop_restoration_row_worker(void *arg1, void *arg2) {
copy_funs[plane](lr_ctxt->dst, lr_ctxt->frame, ctxt[plane].tile_rect.left,
ctxt[plane].tile_rect.right, cur_job_info->v_copy_start,
cur_job_info->v_copy_end);

if (lr_ctxt->cm->extend_border_mt[plane]) {
aom_extend_frame_borders_plane_row(lr_ctxt->frame, plane,
cur_job_info->v_copy_start,
cur_job_info->v_copy_end);
}
} else {
break;
}
Expand Down Expand Up @@ -918,15 +924,16 @@ static void foreach_rest_unit_in_planes_mt(AV1LrStruct *lr_ctxt,
void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
AV1_COMMON *cm, int optimized_lr,
AVxWorker *workers, int num_workers,
AV1LrSync *lr_sync, void *lr_ctxt) {
AV1LrSync *lr_sync, void *lr_ctxt,
int do_extend_border) {
assert(!cm->features.all_lossless);

const int num_planes = av1_num_planes(cm);

AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt;

av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm,
optimized_lr, num_planes);
av1_loop_restoration_filter_frame_init(
loop_rest_ctxt, frame, cm, optimized_lr, num_planes, do_extend_border);

foreach_rest_unit_in_planes_mt(loop_rest_ctxt, workers, num_workers, lr_sync,
cm);
Expand Down Expand Up @@ -1002,13 +1009,27 @@ static AOM_INLINE int get_cdef_row_next_job(AV1CdefSync *const cdef_sync,
static int cdef_sb_row_worker_hook(void *arg1, void *arg2) {
AV1CdefSync *const cdef_sync = (AV1CdefSync *)arg1;
AV1CdefWorkerData *const cdef_worker = (AV1CdefWorkerData *)arg2;
const int nvfb =
(cdef_worker->cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
AV1_COMMON *cm = cdef_worker->cm;
const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
int cur_fbr;
const int num_planes = av1_num_planes(cm);
while (get_cdef_row_next_job(cdef_sync, &cur_fbr, nvfb)) {
av1_cdef_fb_row(cdef_worker->cm, cdef_worker->xd, cdef_worker->linebuf,
cdef_worker->colbuf, cdef_worker->srcbuf, cur_fbr,
MACROBLOCKD *xd = cdef_worker->xd;
av1_cdef_fb_row(cm, xd, cdef_worker->linebuf, cdef_worker->colbuf,
cdef_worker->srcbuf, cur_fbr,
cdef_worker->cdef_init_fb_row_fn, cdef_sync);
for (int plane = 0; plane < num_planes; ++plane) {
if (cm->extend_border_mt[plane]) {
const YV12_BUFFER_CONFIG *ybf = &cm->cur_frame->buf;
const int is_uv = plane > 0;
const int mi_high = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
const int unit_height = MI_SIZE_64X64 << mi_high;
const int v_start = cur_fbr * unit_height;
const int v_end =
AOMMIN(v_start + unit_height, ybf->crop_heights[is_uv]);
aom_extend_frame_borders_plane_row(ybf, plane, v_start, v_end);
}
}
}
return 1;
}
Expand All @@ -1017,12 +1038,15 @@ static int cdef_sb_row_worker_hook(void *arg1, void *arg2) {
static void prepare_cdef_frame_workers(
AV1_COMMON *const cm, MACROBLOCKD *xd, AV1CdefWorkerData *const cdef_worker,
AVxWorkerHook hook, AVxWorker *const workers, AV1CdefSync *const cdef_sync,
int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn) {
int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
int do_extend_border) {
const int num_planes = av1_num_planes(cm);

cdef_worker[0].srcbuf = cm->cdef_info.srcbuf;
for (int plane = 0; plane < num_planes; plane++)
for (int plane = 0; plane < num_planes; plane++) {
cdef_worker[0].colbuf[plane] = cm->cdef_info.colbuf[plane];
cm->extend_border_mt[plane] = do_extend_border;
}
for (int i = num_workers - 1; i >= 0; i--) {
AVxWorker *const worker = &workers[i];
cdef_worker[i].cm = cm;
Expand Down Expand Up @@ -1111,8 +1135,8 @@ void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
AV1CdefWorkerData *const cdef_worker,
AVxWorker *const workers, AV1CdefSync *const cdef_sync,
int num_workers,
cdef_init_fb_row_t cdef_init_fb_row_fn) {
int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
int do_extend_border) {
YV12_BUFFER_CONFIG *frame = &cm->cur_frame->buf;
const int num_planes = av1_num_planes(cm);

Expand All @@ -1122,7 +1146,7 @@ void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
reset_cdef_job_info(cdef_sync);
prepare_cdef_frame_workers(cm, xd, cdef_worker, cdef_sb_row_worker_hook,
workers, cdef_sync, num_workers,
cdef_init_fb_row_fn);
cdef_init_fb_row_fn, do_extend_border);
launch_cdef_workers(workers, num_workers);
sync_cdef_workers(workers, cm, num_workers);
}
Expand Down
5 changes: 3 additions & 2 deletions av1/common/thread_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ typedef struct AV1CdefSyncData {
void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
AV1CdefWorkerData *const cdef_worker,
AVxWorker *const workers, AV1CdefSync *const cdef_sync,
int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn);
int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
int do_extend_border);
void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
const MACROBLOCKD *const xd,
CdefBlockInfo *const fb_info,
Expand Down Expand Up @@ -163,7 +164,7 @@ void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
struct AV1Common *cm,
int optimized_lr, AVxWorker *workers,
int num_workers, AV1LrSync *lr_sync,
void *lr_ctxt);
void *lr_ctxt, int do_extend_border);
void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers);
void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
int num_workers, int num_rows_lr,
Expand Down
10 changes: 7 additions & 3 deletions av1/decoder/decodeframe.c
Original file line number Diff line number Diff line change
Expand Up @@ -5280,6 +5280,9 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
cm->rst_info[2].frame_restoration_type != RESTORE_NONE;
// Frame border extension is not required in the decoder
// as it happens in extend_mc_border().
int do_extend_border_mt = 0;
if (!optimized_loop_restoration) {
if (do_loop_restoration)
av1_loop_restoration_save_boundary_lines(&pbi->common.cur_frame->buf,
Expand All @@ -5289,7 +5292,8 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
if (pbi->num_workers > 1) {
av1_cdef_frame_mt(cm, &pbi->dcb.xd, pbi->cdef_worker,
pbi->tile_workers, &pbi->cdef_sync,
pbi->num_workers, av1_cdef_init_fb_row_mt);
pbi->num_workers, av1_cdef_init_fb_row_mt,
do_extend_border_mt);
} else {
av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd,
av1_cdef_init_fb_row);
Expand All @@ -5305,7 +5309,7 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
av1_loop_restoration_filter_frame_mt(
(YV12_BUFFER_CONFIG *)xd->cur_buf, cm, optimized_loop_restoration,
pbi->tile_workers, pbi->num_workers, &pbi->lr_row_sync,
&pbi->lr_ctxt);
&pbi->lr_ctxt, do_extend_border_mt);
} else {
av1_loop_restoration_filter_frame((YV12_BUFFER_CONFIG *)xd->cur_buf,
cm, optimized_loop_restoration,
Expand All @@ -5320,7 +5324,7 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
av1_loop_restoration_filter_frame_mt(
(YV12_BUFFER_CONFIG *)xd->cur_buf, cm, optimized_loop_restoration,
pbi->tile_workers, pbi->num_workers, &pbi->lr_row_sync,
&pbi->lr_ctxt);
&pbi->lr_ctxt, do_extend_border_mt);
} else {
av1_loop_restoration_filter_frame((YV12_BUFFER_CONFIG *)xd->cur_buf,
cm, optimized_loop_restoration,
Expand Down
Loading

0 comments on commit 313d479

Please sign in to comment.