Skip to content

Commit

Permalink
DO NOT SUBMIT adding vulkan logging ??
Browse files Browse the repository at this point in the history
  • Loading branch information
benvanik committed Aug 22, 2023
1 parent d2a70c6 commit 5061009
Show file tree
Hide file tree
Showing 13 changed files with 83 additions and 47 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ option(IREE_BYTECODE_MODULE_ENABLE_TSAN "Enable thread sanitizer in IREE modules
option(IREE_ENABLE_UBSAN "Enable undefined behavior sanitizer" OFF)
option(IREE_ENABLE_SPLIT_DWARF "Enable gsplit-dwarf for debug information if the platform supports it" OFF)
option(IREE_ENABLE_THIN_ARCHIVES "Enables thin ar archives (elf systems only). Disable for released static archives" OFF)
option(IREE_LINK_COMPILER_SHARED_LIBRARY "Links IREE tools using the compiler compiled into a shared library" ON)
option(IREE_LINK_COMPILER_SHARED_LIBRARY "Links IREE tools using the compiler compiled into a shared library" OFF)

# STREQUAL feels wrong here - we don't care about the exact true-value used,
# ON or TRUE or something else. But we haven't been able to think of a less bad
Expand Down
3 changes: 3 additions & 0 deletions build_tools/scripts/check_vulkan.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@ fi

echo "${VULKAN_INSTANCE?}"
echo "${VK_PHYSICAL_DEVICE_PROPERTIES?}"

cat /tmp/vulkaninfo.stdout
cat /tmp/vulkaninfo.stderr
18 changes: 18 additions & 0 deletions runtime/src/iree/hal/drivers/vulkan/base_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,15 @@ iree_status_t iree_hal_vulkan_find_memory_type(

iree_hal_memory_type_t requested_type = params->type;
if (device_props->deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) {
fprintf(stderr, "iree_hal_vulkan_find_memory_type integrated gpu\n");
// Integrated GPUs have tiny device local heaps commonly used for
// framebuffers and other bounded resources. We don't currently try to use
// them but could for very small transients.
if (iree_all_bits_set(requested_type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) {
requested_type &= ~IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL;
requested_type |= IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
fprintf(stderr,
"iree_hal_vulkan_find_memory_type flip to device visible\n");
}
}

Expand All @@ -61,30 +64,40 @@ iree_status_t iree_hal_vulkan_find_memory_type(
// Device-local, host-visible.
require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
prefer_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
fprintf(stderr,
"iree_hal_vulkan_find_memory_type req host visible, pref device "
"local\n");
} else {
// Device-local only.
require_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
fprintf(stderr, "iree_hal_vulkan_find_memory_type device local only\n");
}
} else {
if (iree_all_bits_set(requested_type,
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
// Host-local, device-visible.
require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
fprintf(stderr,
"iree_hal_vulkan_find_memory_type host local device visible\n");
} else {
// Host-local only.
require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
fprintf(stderr, "iree_hal_vulkan_find_memory_type host visible only\n");
}
}
if (iree_all_bits_set(requested_type, IREE_HAL_MEMORY_TYPE_HOST_CACHED)) {
require_flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
fprintf(stderr, "iree_hal_vulkan_find_memory_type host cached\n");
}
if (iree_all_bits_set(requested_type, IREE_HAL_MEMORY_TYPE_HOST_COHERENT)) {
require_flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
fprintf(stderr, "iree_hal_vulkan_find_memory_type host coherent\n");
}
if (iree_any_bit_set(requested_type,
IREE_HAL_BUFFER_USAGE_MAPPING_SCOPED |
IREE_HAL_BUFFER_USAGE_MAPPING_PERSISTENT)) {
require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
fprintf(stderr, "iree_hal_vulkan_find_memory_type mapping req\n");
}

int most_bits_count = 0;
Expand All @@ -95,6 +108,7 @@ iree_status_t iree_hal_vulkan_find_memory_type(
!iree_hal_vulkan_is_memory_type_usable(flags) ||
!iree_all_bits_set(allowed_type_indices, 1u << i)) {
// Excluded (required bits missing or memory type is not usable).
fprintf(stderr, "iree_hal_vulkan_find_memory_type excluding bit %u\n", i);
continue;
}
// When all required bits are satisfied try to find the memory type that
Expand All @@ -103,9 +117,13 @@ iree_status_t iree_hal_vulkan_find_memory_type(
if (most_bits_idx == -1) {
most_bits_count = bit_count;
most_bits_idx = (int)i;
fprintf(stderr,
"iree_hal_vulkan_find_memory_type first bit found at %u\n", i);
} else if (bit_count > most_bits_count) {
most_bits_count = bit_count;
most_bits_idx = (int)i;
fprintf(stderr, "iree_hal_vulkan_find_memory_type better found at %u\n",
i);
}
}
if (most_bits_idx == -1) {
Expand Down
18 changes: 16 additions & 2 deletions runtime/src/iree/hal/drivers/vulkan/native_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ static iree_status_t iree_hal_vulkan_native_allocator_commit_and_wrap(
VkMemoryRequirements requirements = {0};
logical_device->syms()->vkGetBufferMemoryRequirements(*logical_device, handle,
&requirements);
fprintf(stderr, "vkGetBufferMemoryRequirements alignment %u bits %08X\n",
(uint32_t)requirements.alignment, requirements.memoryTypeBits);
uint32_t memory_type_index = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_find_memory_type(
&allocator->device_props, &allocator->memory_props, params,
Expand All @@ -287,13 +289,25 @@ static iree_status_t iree_hal_vulkan_native_allocator_commit_and_wrap(
allocator->device_props_11.maxMemoryAllocationSize, out_buffer);
}

fprintf(stderr, "vkAllocateMemory\nreq size %u\nmemory type index %u\n",
(uint32_t)requirements.size, memory_type_index);

iree_device_size_t aligned_size =
iree_device_align(allocation_size, 16 * 1024);

// Allocate the device memory we'll attach the buffer to.
VkMemoryAllocateInfo allocate_info = {};
allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocate_info.pNext = NULL;
allocate_info.allocationSize = requirements.size;
allocate_info.allocationSize = iree_max(aligned_size, requirements.size);
allocate_info.memoryTypeIndex = memory_type_index;
VkDeviceMemory device_memory = VK_NULL_HANDLE;
fprintf(stderr,
"vkAllocateMemory(%p, allocationSize=%" PRIu64
", memoryTypeIndex=%u, pNext=%p, %p, %p)\n",
(void*)logical_device->value(),
(uint64_t)allocate_info.allocationSize, allocate_info.memoryTypeIndex,
allocate_info.pNext, logical_device->allocator(), &device_memory);
VK_RETURN_IF_ERROR(logical_device->syms()->vkAllocateMemory(
*logical_device, &allocate_info,
logical_device->allocator(), &device_memory),
Expand All @@ -307,7 +321,7 @@ static iree_status_t iree_hal_vulkan_native_allocator_commit_and_wrap(
internal_release_callback.user_data = NULL;
iree_status_t status = iree_hal_vulkan_native_buffer_wrap(
(iree_hal_allocator_t*)allocator, params->type, params->access,
params->usage, allocation_size,
params->usage, aligned_size,
/*byte_offset=*/0,
/*byte_length=*/allocation_size, logical_device, device_memory, handle,
internal_release_callback, iree_hal_buffer_release_callback_null(),
Expand Down
16 changes: 15 additions & 1 deletion runtime/src/iree/hal/drivers/vulkan/vma_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,19 @@ static void VKAPI_PTR iree_hal_vulkan_vma_free_callback(
static void iree_hal_vulkan_vma_allocator_destroy(
iree_hal_allocator_t* IREE_RESTRICT base_allocator);

static PFN_vkAllocateMemory allocate_memory_ptr = NULL;
static VkResult HOOKED_vkAllocateMemory(
VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo,
const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory) {
fprintf(stderr,
"vkAllocateMemory(%p, allocationSize=%" PRIu64
", memoryTypeIndex=%u, pNext=%p, %p, %p)\n",
(void*)device, (uint64_t)pAllocateInfo->allocationSize,
pAllocateInfo->memoryTypeIndex, pAllocateInfo->pNext, pAllocator,
pMemory);
return allocate_memory_ptr(device, pAllocateInfo, pAllocator, pMemory);
}

iree_status_t iree_hal_vulkan_vma_allocator_create(
const iree_hal_vulkan_device_options_t* options, VkInstance instance,
VkPhysicalDevice physical_device, VkDeviceHandle* logical_device,
Expand All @@ -287,7 +300,8 @@ iree_status_t iree_hal_vulkan_vma_allocator_create(
syms->vkGetPhysicalDeviceProperties;
vulkan_fns.vkGetPhysicalDeviceMemoryProperties =
syms->vkGetPhysicalDeviceMemoryProperties;
vulkan_fns.vkAllocateMemory = syms->vkAllocateMemory;
allocate_memory_ptr = syms->vkAllocateMemory;
vulkan_fns.vkAllocateMemory = HOOKED_vkAllocateMemory;
vulkan_fns.vkFreeMemory = syms->vkFreeMemory;
vulkan_fns.vkMapMemory = syms->vkMapMemory;
vulkan_fns.vkUnmapMemory = syms->vkUnmapMemory;
Expand Down
6 changes: 6 additions & 0 deletions runtime/src/iree/hal/drivers/vulkan/vma_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@
// to be omitted and not have VMA poking around where it shouldn't.
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0

#define VMA_DEBUG_LOG_FORMAT(format, ...) \
do { \
fprintf(stderr, (format), __VA_ARGS__); \
fprintf(stderr, "\n"); \
} while (false)

#include <vk_mem_alloc.h> // IWYU pragma: export

#endif // IREE_HAL_DRIVERS_VULKAN_VMA_IMPL_H_
2 changes: 1 addition & 1 deletion runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ static iree_hal_vulkan_device_t* iree_hal_vulkan_device_cast(
IREE_API_EXPORT void iree_hal_vulkan_device_options_initialize(
iree_hal_vulkan_device_options_t* out_options) {
memset(out_options, 0, sizeof(*out_options));
out_options->flags = IREE_HAL_VULKAN_DEVICE_FLAG_VMA_ALLOCATOR;
out_options->flags = 0; // IREE_HAL_VULKAN_DEVICE_FLAG_VMA_ALLOCATOR;
out_options->large_heap_block_size = 64 * 1024 * 1024;
}

Expand Down
1 change: 0 additions & 1 deletion tests/e2e/models/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ iree_lit_test_suite(
[
"collatz.mlir",
"edge_detection.mlir",
"fragment_000.mlir",
"fullyconnected.mlir",
"mnist_fake_weights.mlir",
"unidirectional_lstm.mlir",
Expand Down
1 change: 0 additions & 1 deletion tests/e2e/models/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ iree_lit_test_suite(
SRCS
"collatz.mlir"
"edge_detection.mlir"
"fragment_000.mlir"
"fullyconnected.mlir"
"mnist_fake_weights.mlir"
"unidirectional_lstm.mlir"
Expand Down
39 changes: 0 additions & 39 deletions tests/e2e/models/fragment_000.mlir

This file was deleted.

12 changes: 12 additions & 0 deletions tests/e2e/models/mnist_train_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ if(IREE_TARGET_BACKEND_CUDA AND IREE_HAL_DRIVER_CUDA)
endif()

if(IREE_TARGET_BACKEND_VULKAN_SPIRV AND IREE_HAL_DRIVER_VULKAN)
iree_py_test(
NAME
mnist_train_test_vulkan_vma
SRCS
"mnist_train_test.py"
ARGS
"--target_backend=vulkan-spirv"
"--driver=vulkan"
"--vma"
LABELS
"driver=vulkan"
)
iree_py_test(
NAME
mnist_train_test_vulkan
Expand Down
9 changes: 9 additions & 0 deletions tests/e2e/models/mnist_train_test/mnist_train_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import numpy as np

from iree.compiler.tools import InputType, compile_file
from iree import runtime as rt
from iree.runtime import load_vm_flatbuffer_file

MODEL_ARTIFACTS_URL = "https://storage.googleapis.com/iree-model-artifacts/mnist_train.a49ba1535a45ac0f3e6be22a7ed5dddf4a53cd1f41126af938f0667b998f8e11.tar"
Expand Down Expand Up @@ -76,6 +77,7 @@ def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--target_backend", type=str, default="llvm-cpu")
parser.add_argument("--driver", type=str, default="local-task")
parser.add_argument("--vma", default=False, action="store_true")
return parser.parse_known_args()


Expand Down Expand Up @@ -115,6 +117,13 @@ def extract_test_data(archive_path: str, out_dir: str):

class MnistTrainTest(unittest.TestCase):
def test_mnist_training(self):
if args.vma:
rt.flags.parse_flags("--vulkan_vma_allocator=true")
else:
rt.flags.parse_flags("--vulkan_vma_allocator=false")
rt.flags.parse_flags("--vulkan_validation_layers=true")
rt.flags.parse_flags("--vulkan_debug_utils=true")
rt.flags.parse_flags("--vulkan_debug_verbosity=4")
with tempfile.TemporaryDirectory() as tmp_dir:
archive_path = os.path.join(tmp_dir, "mnist_train.tar")
download_test_data(archive_path)
Expand Down
3 changes: 2 additions & 1 deletion tests/e2e/tensor_ops/tensor_cast.mlir
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// RUN: iree-run-mlir --Xcompiler,iree-hal-target-backends=llvm-cpu %s | FileCheck %s
// RUN: [[ $IREE_VMVX_DISABLE == 1 ]] || (iree-run-mlir --Xcompiler,iree-hal-target-backends=vmvx %s | FileCheck %s)
// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir --Xcompiler,iree-hal-target-backends=vulkan-spirv %s | FileCheck %s)
// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir --Xcompiler,iree-hal-target-backends=vulkan-spirv --trace_execution --vulkan_debug_verbosity=4 --vulkan_debug_utils=true --vulkan_validation_layers=true --vulkan_vma_allocator=true %s | FileCheck %s)
// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir --Xcompiler,iree-hal-target-backends=vulkan-spirv --trace_execution --vulkan_debug_verbosity=4 --vulkan_debug_utils=true --vulkan_validation_layers=true %s | FileCheck %s)

func.func @tensor_cast() -> tensor<2x?xf32> {
%input = util.unfoldable_constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf32>
Expand Down

0 comments on commit 5061009

Please sign in to comment.