Skip to content

Commit

Permalink
Use shadow buffers for dGPU VF + iGPU (backing virtio-GPU) output
Browse files Browse the repository at this point in the history
To get best performance we must guarantee that scan-out buffers used for
composition in surfaceflinger reside in GPU local memory, but importing
these buffers into virtio-GPU will migrate the buffers from local memory
to system memory, which will highly impact the performance. To avoid
migration of these client-composited buffers, allocate a shadow buffer
for each of them and import the shadow buffers into virtio-GPU for
scanning-out. Right before atomic commit, leverage GPU blit engine to
copy content to shadow buffer.

Use shadow buffers only when feature ALLOW_P2P of virtio-GPU is not present
and dGPU exists.

There are several GPU instructions to blit memory:
- XY_FAST_COPY_BLT (BSpec: 47982),
- XY_SRC_COPY_BLT (BSpec: 48002),
- XY_BLOCK_COPY_BLT (BSpec: 3678).
By experiment, XY_FAST_COPY is much faster than the other two instructions.

Tracked-On: OAM-124182
Signed-off-by: Weifeng Liu <weifeng.liu@intel.com>
  • Loading branch information
phreer committed Oct 14, 2024
1 parent 952ce91 commit e5ba1c8
Show file tree
Hide file tree
Showing 11 changed files with 915 additions and 13 deletions.
10 changes: 9 additions & 1 deletion Android.bp
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,18 @@
cc_library_static {
name: "libdrmhwc_utils",

srcs: ["utils/Worker.cpp"],
srcs: [
"utils/Worker.cpp",
"utils/intel_blit.cpp"
],

include_dirs: ["vendor/intel/external/drm-hwcomposer"],

shared_libs: [
"libdrm",
"libutils",
],

cflags: [
"-Wall",
"-Werror",
Expand Down
11 changes: 11 additions & 0 deletions bufferinfo/BufferInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <cstdint>
#include "utils/intel_blit.h"

constexpr int kBufferMaxPlanes = 4;

Expand Down Expand Up @@ -50,6 +51,16 @@ struct BufferInfo {
/* sizes[] is used only by mapper@4 metadata getter for internal purposes */
uint32_t sizes[kBufferMaxPlanes];
int prime_fds[kBufferMaxPlanes];
uint32_t prime_buffer_handles[kBufferMaxPlanes];
bool use_shadow_fds;
struct intel_info info;
/*
* Shadow buffers in system memory. We will blit content of prime_fds to
* shadow_fds right before atomic commit and use the shadow buffers as frame
* buffers.
**/
int shadow_fds[kBufferMaxPlanes];
uint32_t shadow_buffer_handles[kBufferMaxPlanes];
uint64_t modifiers[kBufferMaxPlanes];

BufferColorSpace color_space;
Expand Down
10 changes: 10 additions & 0 deletions bufferinfo/BufferInfoMapperMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <cinttypes>

#include "utils/log.h"
#include "utils/intel_blit.h"

namespace android {

Expand Down Expand Up @@ -81,6 +82,15 @@ BufferInfoMapperMetadata::GetFds(buffer_handle_t handle, BufferInfo *bo) {
ALOGE("Invalid prime fd");
return android::BAD_VALUE;
}

int dgpu_fd = intel_dgpu_fd();
if (dgpu_fd >= 0) {
int ret = drmPrimeFDToHandle(dgpu_fd, bo->prime_fds[i], &bo->prime_buffer_handles[i]);
if (ret) {
ALOGE("Cannot convert prime fd to handle\n");
return android::BAD_VALUE;
}
}
}

return 0;
Expand Down
2 changes: 2 additions & 0 deletions compositor/LayerData.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,15 @@ struct LayerData {
clonned.fb = fb;
clonned.pi = pi;
clonned.acquire_fence = std::move(acquire_fence);
clonned.blit_fence = std::move(blit_fence);
return clonned;
}

std::optional<BufferInfo> bi;
std::shared_ptr<DrmFbIdHandle> fb;
PresentInfo pi;
UniqueFd acquire_fence;
UniqueFd blit_fence;
};

} // namespace android
21 changes: 20 additions & 1 deletion drm/DrmAtomicStateManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include <drm/drm_fourcc.h>
#include <cmath>
#undef NDEBUG /* Required for assert to work */

#define ATRACE_TAG ATRACE_TAG_GRAPHICS
Expand All @@ -26,6 +28,7 @@
#include <sched.h>
#include <sync/sync.h>
#include <utils/Trace.h>
#include "utils/intel_blit.h"

#include <array>
#include <cassert>
Expand Down Expand Up @@ -105,14 +108,30 @@ auto DrmAtomicStateManager::CommitFrame(AtomicCommitArgs &args) -> int {
auto unused_planes = new_frame_state.used_planes;

bool has_hdr_layer = false;

if (args.composition) {
new_frame_state.used_planes.clear();

for (auto &joining : args.composition->plan) {
DrmPlane *plane = joining.plane->Get();
LayerData &layer = joining.layer;

if (layer.bi->use_shadow_fds) {
int ret = 0;
int out_handle;
// Use any tiling mode other than linear suffers from corrupted images.
uint32_t tiling = I915_TILING_NONE;
// TODO: handle multi-plane buffer
ret = intel_blit(&layer.bi->info, layer.bi->shadow_buffer_handles[0],
layer.bi->prime_buffer_handles[0],
layer.bi->pitches[0], 4, tiling,
layer.bi->width, layer.bi->height,
layer.acquire_fence.Get(), &out_handle);
if (ret) {
ALOGE("failed to blit scan-out buffer\n");
}
layer.blit_fence = android::UniqueFd(out_handle);
}

if (layer.bi->color_space >= BufferColorSpace::kItuRec2020) {
has_hdr_layer = true;
}
Expand Down
17 changes: 9 additions & 8 deletions drm/DrmFbImporter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ auto DrmFbIdHandle::CreateInstance(BufferInfo *bo, GemHandle first_gem_handle,
local->gem_handles_[0] = first_gem_handle;
int32_t err = 0;

int *fds = bo->use_shadow_fds ? bo->shadow_fds : bo->prime_fds;
/* Framebuffer object creation require gem handle for every used plane */
for (size_t i = 1; i < local->gem_handles_.size(); i++) {
if (bo->prime_fds[i] > 0) {
if (bo->prime_fds[i] != bo->prime_fds[0]) {
err = drmPrimeFDToHandle(drm.GetFd(), bo->prime_fds[i],
if (fds[i] > 0) {
if (fds[i] != fds[0]) {
err = drmPrimeFDToHandle(drm.GetFd(), fds[i],
&local->gem_handles_.at(i));
if (err != 0) {
ALOGE("failed to import prime fd %d errno=%d", bo->prime_fds[i],
errno);
ALOGE("failed to import prime fd %d errno=%d", fds[i], errno);
}
} else {
local->gem_handles_.at(i) = local->gem_handles_[0];
Expand Down Expand Up @@ -129,11 +129,12 @@ auto DrmFbImporter::GetOrCreateFbId(BufferInfo *bo)
-> std::shared_ptr<DrmFbIdHandle> {
/* Lookup DrmFbIdHandle in cache first. First handle serves as a cache key. */
GemHandle first_handle = 0;
int32_t err = drmPrimeFDToHandle(drm_->GetFd(), bo->prime_fds[0],
&first_handle);
int *fds = bo->use_shadow_fds ? bo->shadow_fds : bo->prime_fds;

int32_t err = drmPrimeFDToHandle(drm_->GetFd(), fds[0], &first_handle);

if (err != 0) {
ALOGE("Failed to import prime fd %d ret=%d", bo->prime_fds[0], err);
ALOGE("Failed to import prime fd %d ret=%d", fds[0], err);
return {};
}

Expand Down
5 changes: 3 additions & 2 deletions drm/DrmPlane.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,9 @@ auto DrmPlane::AtomicSetState(drmModeAtomicReq &pset, LayerData &layer,
}
}

if (layer.acquire_fence &&
!in_fence_fd_property_.AtomicSet(pset, layer.acquire_fence.Get())) {
int fence = layer.bi->use_shadow_fds ? layer.blit_fence.Get() : layer.acquire_fence.Get();
if (fence > 0 &&
!in_fence_fd_property_.AtomicSet(pset, fence)) {
return -EINVAL;
}

Expand Down
27 changes: 26 additions & 1 deletion hwc2_device/HwcLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@
* limitations under the License.
*/

#include <xf86drm.h>
#define LOG_TAG "hwc-layer"

#include "HwcLayer.h"

#include "HwcDisplay.h"
#include "bufferinfo/BufferInfoGetter.h"
#include "utils/log.h"
#include "utils/intel_blit.h"

namespace android {

Expand Down Expand Up @@ -252,6 +254,29 @@ void HwcLayer::ImportFb() {
return;
}

int kms_fd = parent_->GetPipe().device->GetFd();
layer_data_.bi->use_shadow_fds = (intel_dgpu_fd() >= 0) && !virtio_gpu_allow_p2p(kms_fd);
if (layer_data_.bi->use_shadow_fds) {
uint32_t handle;
int ret = intel_create_buffer(layer_data_.bi->width, layer_data_.bi->height,
layer_data_.bi->format, layer_data_.bi->modifiers[0],
&handle);
ALOGI("create shadow buffer, modifier=0x%lx\n", (unsigned long) layer_data_.bi->modifiers[0]);
if (ret) {
ALOGE("Failed to create shadow buffer\n");
layer_data_.bi->use_shadow_fds = false;
} else {
layer_data_.bi->shadow_buffer_handles[0] = handle;
ret = drmPrimeHandleToFD(intel_dgpu_fd(), handle, 0, &layer_data_.bi->shadow_fds[0]);
if (ret) {
ALOGE("Failed to export shadow buffer\n");
layer_data_.bi->use_shadow_fds = false;
drmCloseBufferHandle(intel_dgpu_fd(), handle);
}
intel_blit_init(&layer_data_.bi->info);
}
}

layer_data_
.fb = parent_->GetPipe().device->GetDrmFbImporter().GetOrCreateFbId(
&layer_data_.bi.value());
Expand Down Expand Up @@ -357,4 +382,4 @@ void HwcLayer::SwChainClearCache() {
swchain_reassembled_ = false;
}

} // namespace android
} // namespace android
118 changes: 118 additions & 0 deletions utils/i915_prelim.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright 2017 The Chromium OS Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef I915_PRELIM
#define I915_PRELIM

#include <i915_drm.h>

#define PRELIM_DRM_I915_QUERY (1 << 16)
#define PRELIM_DRM_I915_QUERY_MEMORY_REGIONS (PRELIM_DRM_I915_QUERY | 4)
#define PRELIM_I915_OBJECT_PARAM (1ull << 48)
#define PRELIM_I915_PARAM_MEMORY_REGIONS ((1 << 16) | 0x1)
#define PRELIM_I915_USER_EXT (1 << 16)
#define PRELIM_I915_GEM_CREATE_EXT_SETPARAM (PRELIM_I915_USER_EXT | 1)
#define PRELIM_DRM_IOCTL_I915_GEM_CREATE_EXT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct prelim_drm_i915_gem_create_ext)

#define prelim_drm_i915_gem_memory_class_instance drm_i915_gem_memory_class_instance
struct prelim_drm_i915_gem_object_param {
/* Object handle (0 for I915_GEM_CREATE_EXT_SETPARAM) */
__u32 handle;

/* Data pointer size */
__u32 size;

/*
* PRELIM_I915_OBJECT_PARAM:
*
* Select object namespace for the param.
*/
#define PRELIM_I915_OBJECT_PARAM (1ull << 48)

/*
* PRELIM_I915_PARAM_MEMORY_REGIONS:
*
* Set the data pointer with the desired set of placements in priority
* order(each entry must be unique and supported by the device), as an array of
* prelim_drm_i915_gem_memory_class_instance, or an equivalent layout of class:instance
* pair encodings. See PRELIM_DRM_I915_QUERY_MEMORY_REGIONS for how to query the
* supported regions.
*
* Note that this requires the PRELIM_I915_OBJECT_PARAM namespace:
* .param = PRELIM_I915_OBJECT_PARAM | PRELIM_I915_PARAM_MEMORY_REGIONS
*/
#define PRELIM_I915_PARAM_MEMORY_REGIONS ((1 << 16) | 0x1)
__u64 param;

/* Data value or pointer */
__u64 data;
};

struct prelim_drm_i915_gem_create_ext_setparam {
struct i915_user_extension base;
struct prelim_drm_i915_gem_object_param param;
};

/**
* struct prelim_drm_i915_memory_region_info
*
* Describes one region as known to the driver.
*/
struct prelim_drm_i915_memory_region_info {
/** class:instance pair encoding */
struct drm_i915_gem_memory_class_instance region;

/** MBZ */
__u32 rsvd0;

/** MBZ */
__u64 caps;

/** MBZ */
__u64 flags;

/** Memory probed by the driver (-1 = unknown) */
__u64 probed_size;

/** Estimate of memory remaining (-1 = unknown) */
__u64 unallocated_size;

/** MBZ */
__u64 rsvd1[8];
};

struct prelim_drm_i915_query_memory_regions {
/** @num_regions: Number of supported regions */
__u32 num_regions;

/** @rsvd: MBZ */
__u32 rsvd[3];

/** @regions: Info about each supported region */
struct prelim_drm_i915_memory_region_info regions[];
};


struct prelim_drm_i915_gem_create_ext {

/**
* Requested size for the object.
*
* The (page-aligned) allocated size for the object will be returned.
*/
__u64 size;
/**
* Returned handle for the object.
*
* Object handles are nonzero.
*/
__u32 handle;
__u32 pad;
#define PRELIM_I915_GEM_CREATE_EXT_SETPARAM (PRELIM_I915_USER_EXT | 1)
#define PRELIM_I915_GEM_CREATE_EXT_FLAGS_UNKNOWN \
(~PRELIM_I915_GEM_CREATE_EXT_SETPARAM)
__u64 extensions;
};
#endif
Loading

0 comments on commit e5ba1c8

Please sign in to comment.