From 028d4bde847e7c1c963ce96ea9d8f9848da116df Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Wed, 21 Aug 2024 18:25:49 -0500 Subject: [PATCH] clean up --- build_tools/ci/build_test_cpp.sh | 10 + build_tools/ci/cpu_comparison/run_test.py | 265 +++++++++--------- cmake/iree_aie_bootgen.cmake | 5 +- cmake/iree_aie_xrt.cmake | 3 +- .../aievec/VectorToAIEVecConversions.cpp | 8 - .../AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp | 20 +- .../Target/AMDAIETargetCDODirect.cpp | 4 +- .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 69 +++-- .../AMDAIELocalizeLogicalObjectFifo.cpp | 5 - .../aie_runtime/iree_aie_configure.cc | 15 +- .../aie_runtime/iree_aie_configure.h | 10 +- .../src/iree-amd-aie/driver/xrt/xrt_driver.cc | 3 +- 12 files changed, 207 insertions(+), 210 deletions(-) diff --git a/build_tools/ci/build_test_cpp.sh b/build_tools/ci/build_test_cpp.sh index ad6b4e460..d303a3677 100644 --- a/build_tools/ci/build_test_cpp.sh +++ b/build_tools/ci/build_test_cpp.sh @@ -32,6 +32,9 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then export CMAKE_TOOLCHAIN_FILE="$this_dir/linux_default_toolchain.cmake" export CC=clang export CXX=clang++ +else + export CC=clang-cl.exe + export CXX=clang-cl.exe fi export CCACHE_DIR="${cache_dir}/ccache" export CCACHE_MAXSIZE="700M" @@ -61,6 +64,13 @@ cmake -S "$iree_dir" -B "$build_dir" \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX="$install_dir" \ -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_EXE_LINKER_FLAGS_INIT="-fuse-ld=lld" \ + -DCMAKE_SHARED_LINKER_FLAGS_INIT="-fuse-ld=lld" \ + -DCMAKE_MODULE_LINKER_FLAGS_INIT="-fuse-ld=lld" \ + -DCMAKE_C_COMPILER="${CC}" \ + -DCMAKE_CXX_COMPILER="${CXX}" \ + -DLLVM_TARGET_ARCH=X86 \ + -DLLVM_TARGETS_TO_BUILD=X86 \ -DIREE_ENABLE_ASSERTIONS=ON \ -DIREE_BUILD_SAMPLES=OFF \ -DIREE_BUILD_PYTHON_BINDINGS=ON \ diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py index 587a34724..e02bd0144 100755 --- a/build_tools/ci/cpu_comparison/run_test.py +++ b/build_tools/ci/cpu_comparison/run_test.py @@ -60,7 +60,7 @@ def find_executable(install_dir: Path, executable_name): ) -def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True): +def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True, env=None): if workdir is None: workdir = Path.cwd() if not isinstance(cmd, list): @@ -68,8 +68,11 @@ def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True): for i, c in enumerate(cmd): if isinstance(c, Path): cmd[i] = str(c) - env = os.environ - env["XILINX_XRT"] = "C:\\bui\\tools" + if env is None: + env = {} + + env = {**env, **os.environ} + if verbose: _cmd = " ".join(cmd) if verbose > 1: @@ -98,14 +101,14 @@ def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True): def generate_aie_vmfb( - config, - name, - tile_pipeline, - lower_to_aie_pipeline, - use_ukernel, - test_file, - input_args, - function_name, + config, + name, + tile_pipeline, + lower_to_aie_pipeline, + use_ukernel, + test_file, + input_args, + function_name, ): """ Compile a test file for IREE's AIE backend, returning path to the compiled @@ -177,7 +180,12 @@ def generate_aie_output(config, aie_vmfb, input_args, function_name, name): shell_out(config.reset_npu_script, verbose=config.verbose) start = time.monotonic_ns() - shell_out(run_args, config.output_dir, config.verbose) + shell_out( + run_args, + config.output_dir, + config.verbose, + env={"XILINX_XRT": str(config.xrt_dir)}, + ) run_time = time.monotonic_ns() - start if config.verbose: @@ -187,11 +195,11 @@ def generate_aie_output(config, aie_vmfb, input_args, function_name, name): def generate_llvm_cpu_output( - config, - name, - test_file, - input_args, - function_name, + config, + name, + test_file, + input_args, + function_name, ): """ Compile and run a test file for IREE's CPU backend, returning a numpy array @@ -229,20 +237,21 @@ class TestConfig: """ def __init__( - self, - output_dir, - iree_install_dir, - peano_dir, - xrt_dir, - vitis_dir, - file_dir, - iree_compile_exe, - iree_run_exe, - verbose, - return_on_fail, - reset_npu_between_runs, - do_not_run_aie, - additional_aie_compilation_flags, + self, + output_dir, + iree_install_dir, + peano_dir, + xrt_dir, + vitis_dir, + file_dir, + iree_compile_exe, + iree_run_exe, + verbose, + return_on_fail, + reset_npu_between_runs, + do_not_run_aie, + get_component_log, + additional_aie_compilation_flags, ): self.output_dir = output_dir self.iree_install_dir = iree_install_dir @@ -264,18 +273,6 @@ def __init__( self.xrt_hash = "undetermined" self.xrt_release = "undetermined" self.peano_commit_hash = "undetermined" - xrt_bin_dir = xrt_dir - if platform.system() != "Windows": - xrt_bin_dir /= "bin" - # xrt_smi_exe = xrt_bin_dir / ( - # "xrt-smi" + ".exe" if platform.system() == "Windows" else "" - # ) - # if not xrt_smi_exe.exists(): - # xrt_smi_exe = xrt_bin_dir / ( - # "xbutil" + ".exe" if platform.system() == "Windows" else "" - # ) - # if not xrt_smi_exe.exists(): - # raise RuntimeError(f"Neither xrt-smi nor xbutil found in {xrt_bin_dir}") self.reset_npu_script = file_dir.parent / "reset_npu.sh" if reset_npu_between_runs and not self.reset_npu_script.exists(): @@ -283,6 +280,24 @@ def __init__( f"The file {self.reset_npu_script} does not exist, and reset_npu_script=True" ) + # Populated at runtime + self.failures = [] + + if not isinstance(self.verbose, bool) and not isinstance(self.verbose, int): + raise ValueError( + f"verbose must be a boolean or integer, not {type(verbose)}" + ) + + if not get_component_log: + return + + xrt_bin_dir = xrt_dir / "bin" + xrt_smi_exe = xrt_bin_dir / "xrt-smi" + if not xrt_smi_exe.exists(): + xrt_smi_exe = xrt_bin_dir / "xbutil" + if not xrt_smi_exe.exists(): + raise RuntimeError(f"Neither xrt-smi nor xbutil found in {xrt_bin_dir}") + # Get the string output of the xrt-smi 'examine' command. Expect the # string to look something like: # @@ -298,31 +313,30 @@ def __init__( # ... # ``` # - # system_info, xrt_info = ( - # subprocess.check_output([xrt_smi_exe, "examine"]) - # .decode("utf-8") - # .split("XRT") - # ) - # - # linux_kernel = re.findall(r"Release\s+:\s(.*)", system_info, re.MULTILINE) - # if linux_kernel: - # self.linux_kernel = linux_kernel[0] - # - # xrt_release = re.findall(r"Version\s+:\s(.*)", xrt_info, re.MULTILINE) - # if xrt_release: - # self.xrt_release = xrt_release[0] - # - # xrt_hash_date = re.findall(r"Hash Date\s+:\s(.*)", xrt_info, re.MULTILINE) - # if xrt_hash_date: - # self.xrt_hash_date = xrt_hash_date[0] - # - # xrt_hash = re.findall(r"Hash\s+:\s(.*)", xrt_info, re.MULTILINE) - # if xrt_hash: - # self.xrt_hash = xrt_hash[0] + system_info, xrt_info = ( + subprocess.check_output([xrt_smi_exe, "examine"]) + .decode("utf-8") + .split("XRT") + ) + + linux_kernel = re.findall(r"Release\s+:\s(.*)", system_info, re.MULTILINE) + if linux_kernel: + self.linux_kernel = linux_kernel[0] + + xrt_release = re.findall(r"Version\s+:\s(.*)", xrt_info, re.MULTILINE) + if xrt_release: + self.xrt_release = xrt_release[0] + + xrt_hash_date = re.findall(r"Hash Date\s+:\s(.*)", xrt_info, re.MULTILINE) + if xrt_hash_date: + self.xrt_hash_date = xrt_hash_date[0] + + xrt_hash = re.findall(r"Hash\s+:\s(.*)", xrt_info, re.MULTILINE) + if xrt_hash: + self.xrt_hash = xrt_hash[0] # Try and get the peano commit hash. This is a bit of a hack, if it fails # peano_commit_has is left as "undetermined". - self.peano_commit_hash = "undetermined" peano_clang_path = peano_dir / "bin" / "clang" if peano_clang_path.exists(): _, clang_v_output = shell_out( @@ -336,14 +350,6 @@ def __init__( if peano_commit_hash: self.peano_commit_hash = peano_commit_hash[0] - # Populated at runtime - self.failures = [] - - if not isinstance(self.verbose, bool) and not isinstance(self.verbose, int): - raise ValueError( - f"verbose must be a boolean or integer, not {type(verbose)}" - ) - def __str__(self): return dedent( f""" @@ -405,18 +411,18 @@ def name_from_mlir_filename(mlir_filename): def aie_vs_baseline( - config, - test_file, - input_args, - baseline_value, - use_ukernel, - tile_pipeline, - lower_to_aie_pipeline, - function_name, - seed, - rtol, - atol, - n_repeats, + config, + test_file, + input_args, + baseline_value, + use_ukernel, + tile_pipeline, + lower_to_aie_pipeline, + function_name, + seed, + rtol, + atol, + n_repeats, ): """ If the outputs differ, add the test file to a list of failures. @@ -483,16 +489,16 @@ def aie_vs_baseline( def aie_vs_llvm_cpu( - config, - test_file, - use_ukernel=False, - tile_pipeline="pad-pack", - lower_to_aie_pipeline="air", - function_name=None, - seed=1, - rtol=1e-6, - atol=1e-6, - n_repeats=1, + config, + test_file, + use_ukernel=False, + tile_pipeline="pad-pack", + lower_to_aie_pipeline="air", + function_name=None, + seed=1, + rtol=1e-6, + atol=1e-6, + n_repeats=1, ): """ Compare the output obtained when compiling and running on IREE's @@ -527,16 +533,16 @@ def aie_vs_llvm_cpu( def aie_vs_np_matmul( - config, - test_file, - use_ukernel=False, - tile_pipeline="pad-pack", - lower_to_aie_pipeline="air", - function_name=None, - seed=1, - rtol=1e-6, - atol=1e-6, - n_repeats=1, + config, + test_file, + use_ukernel=False, + tile_pipeline="pad-pack", + lower_to_aie_pipeline="air", + function_name=None, + seed=1, + rtol=1e-6, + atol=1e-6, + n_repeats=1, ): """ """ @@ -604,7 +610,7 @@ def run(self, config): output_dir = config.output_dir for name in [ - # "two_matmul_switching", + "two_matmul_switching", "matmul_f32_8_8_4", "matmul_f32_8_4_8", ]: @@ -675,23 +681,22 @@ def run(self, config): def get_test_partition(): - return [ - # ConvolutionSet(), - MatmulSet(), SmokeSet()] + return [ConvolutionSet(), MatmulSet(), SmokeSet()] def all_tests( - output_dir, - iree_install_dir, - peano_dir, - xrt_dir, - vitis_dir, - return_on_fail, - verbose, - reset_npu_between_runs, - do_not_run_aie, - test_set, - additional_aie_compilation_flags, + output_dir, + iree_install_dir, + peano_dir, + xrt_dir, + vitis_dir, + return_on_fail, + verbose, + reset_npu_between_runs, + do_not_run_aie, + get_component_log, + test_set, + additional_aie_compilation_flags, ): """ There are a few ways to add tests to this script: @@ -732,6 +737,7 @@ def all_tests( return_on_fail, reset_npu_between_runs, do_not_run_aie, + get_component_log, additional_aie_compilation_flags, ) if verbose: @@ -741,8 +747,8 @@ def all_tests( verify_determinism() # Verify a very basic script runs before running the more complex tests - # if platform.system() != "Windows": - # shell_out(["pwd"], verbose=config.verbose) + if platform.system() != "Windows": + shell_out(["pwd"], verbose=config.verbose) partition = get_test_partition() partition_names = [p.name for p in partition] @@ -758,8 +764,6 @@ def all_tests( partition = map_to_partition[test] partition.run(config) - # for p in partition: - if config.failures: # Convert the list of failed tests into a map: test name to the # number of failures (config.failures list may contain duplicates) @@ -801,7 +805,7 @@ def all_tests( ), ) - parser.add_argument('-v', '--verbose', action='count', default=0) + parser.add_argument("-v", "--verbose", action="count", default=0) parser.add_argument( "--reset-npu-between-runs", @@ -826,12 +830,18 @@ def all_tests( ), ) + parser.add_argument( + "--get-component-log", + action="store_true", + help="Print environment information (such as info about XRT and the kernel", + ) + partition = get_test_partition() partition_names = [p.name for p in partition] partition_names_and_all = partition_names + ["All"] help_string = ( - "A comma-separated list of test sets. Available test sets are: " - + ", ".join(partition_names_and_all) + "A comma-separated list of test sets. Available test sets are: " + + ", ".join(partition_names_and_all) ) parser.add_argument( @@ -868,6 +878,7 @@ def all_tests( args.verbose, args.reset_npu_between_runs, args.do_not_run_aie, + args.get_component_log, test_set_list, args.additional_aie_compilation_flags, ) diff --git a/cmake/iree_aie_bootgen.cmake b/cmake/iree_aie_bootgen.cmake index 526ed9a6f..11238be4c 100644 --- a/cmake/iree_aie_bootgen.cmake +++ b/cmake/iree_aie_bootgen.cmake @@ -26,9 +26,10 @@ replace_string_in_file("${_BOOTGEN_SOURCE_DIR}/main.cpp" file(GLOB _bootgen_sources "${_BOOTGEN_SOURCE_DIR}/*.c" "${_BOOTGEN_SOURCE_DIR}/*.cpp") add_library(iree-aie-bootgen STATIC ${_bootgen_sources}) -if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") +if(WIN32) target_compile_definitions(iree-aie-bootgen PUBLIC YY_NO_UNISTD_H) -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") +endif() +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") set(_bootgen_c_warning_ignores -Wno-cast-qual -Wno-covered-switch-default diff --git a/cmake/iree_aie_xrt.cmake b/cmake/iree_aie_xrt.cmake index 47db2979d..99539ebf3 100644 --- a/cmake/iree_aie_xrt.cmake +++ b/cmake/iree_aie_xrt.cmake @@ -27,7 +27,8 @@ FetchContent_Declare( GIT_PROGRESS TRUE DOWNLOAD_NO_EXTRACT FALSE # prevents configure from rerunning all the time - URL_HASH MD5=84bc7c861606dc66bcfbeb660fcddfd2) + DOWNLOAD_EXTRACT_TIMESTAMP TRUE + URL_HASH MD5=84BC7C861606DC66BCFBEB660FCDDFD2) FetchContent_MakeAvailable(Boost) set(IREE_AIE_BOOST_LIBS any diff --git a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp index d2a6e4e49..12769f2c9 100644 --- a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp +++ b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp @@ -840,14 +840,6 @@ static void configureAIEVecCommonLegalizations(ConversionTarget &target) { [](arith::SubFOp op) { return !isa(op.getType()); }); } -static void configureAIEVecV1Legalizations(ConversionTarget &target) { - target.addDynamicallyLegalOp( - [](arith::MulIOp op) { return !isa(op.getType()); }); - target.addDynamicallyLegalOp( - [](arith::MulFOp op) { return !isa(op.getType()); }); - target.addLegalDialect(); -} - static void configureAIEVecV2Legalizations(ConversionTarget &target) { target.addLegalOp(); target.addLegalOp(); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp index c746c0877..ff5ca0da8 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp @@ -18,20 +18,6 @@ static const char kPackingConfigAttrName[] = "packing_config"; -namespace mlir::iree_compiler { - -/// Returns an `ArrayAttr` where each element is an `IntegerAttr` of 64-bit -/// integer type whose values is obtained from `values`. -static ArrayAttr getIndexArrayAttr(MLIRContext *context, - ArrayRef values) { - return ArrayAttr::get( - context, llvm::map_to_vector(values, [&](int64_t value) -> Attribute { - return IntegerAttr::get(IndexType::get(context), APInt(64, value)); - })); -} - -} // namespace mlir::iree_compiler - namespace mlir::iree_compiler::AMDAIE { //===----------------------------------------------------------------------===// @@ -73,6 +59,7 @@ void AMDAIEDialect::initializeAMDAIEAttrs() { addAttributes< #define GET_ATTRDEF_LIST #include "iree-amd-aie/IR/AMDAIEAttrs.cpp.inc" // IWYU pragma: keeep + >(); } @@ -84,11 +71,6 @@ namespace mlir::iree_compiler { // Helpers for forming `amdaie.packing_config_level` attribute. // ===----------------------------------------------------------------------===// -static AMDAIE::PermLevelAttr getPermLevelAttr( - MLIRContext *context, ArrayRef permLevelVal) { - return AMDAIE::PermLevelAttr::get(context, permLevelVal); -} - static AMDAIE::PermLevelsAttr getPermLevelsAttr( MLIRContext *context, ArrayRef> permLevelsVal) { SmallVector permLevels; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp index 29216d069..76f289cb8 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp @@ -168,8 +168,8 @@ LogicalResult addAieElfsToCDO(const AMDAIEDeviceModel &deviceModel, else fileName = "core_" + std::to_string(tileLoc.col) + "_" + std::to_string(tileLoc.row) + ".elf"; - if (failed(addElfToTile(deviceModel, tileLoc, workDirPath / fileName, - aieSim))) { + Path elfPath = workDirPath / fileName; + if (failed(addElfToTile(deviceModel, tileLoc, elfPath, aieSim))) { return failure(); } } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 7bbd4f7fb..8bd202708 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -429,12 +429,7 @@ static LogicalResult assembleFileUsingChess( args.emplace_back("-o"); args.emplace_back(outputFile); std::vector env = makeChessEnv(vitisDir); - if (failed(runTool(xChessCCExe, args, verbose, env))) { - llvm::errs() << "Failed to assemble " << inputFile << " with chess"; - return failure(); - } - - return success(); + return runTool(xChessCCExe, args, verbose, env); } std::vector makePeanoOptArgs() { @@ -532,7 +527,7 @@ static_assert(std::is_same_v vitisDir, const std::string &targetArch, bool verbose, Path peanoDir, const std::optional &ukernel) { auto tileOps = deviceOp.getOps(); @@ -659,8 +654,13 @@ static LogicalResult generateCoreElfFiles( flags.emplace_back("-Wl,-T," + ldscriptPath.string()); #ifdef _WIN32 - // disable crt + // for some reason on windows libc/m and crt are deposited into an + // unconventional place (lib) + // https://github.com/Xilinx/llvm-aie/pull/155#issuecomment-2298247620 + // disable using crt by default ie remove automatic of un-path-qualified + // crt0.o crt1.o flags.emplace_back("-nostartfiles"); + // put them back by hand where they are in the wheel/distro flags.emplace_back((peanoDir / "lib" / "crt0.o").string()); flags.emplace_back((peanoDir / "lib" / "crt1.o").string()); flags.emplace_back("-Wl,-L" + (peanoDir / "lib").string()); @@ -671,18 +671,20 @@ static LogicalResult generateCoreElfFiles( if (verbose) flags.emplace_back("-v"); // we run clang (ie cc) so that libc, libm, crt0/1 paths are injected // automatically into the ld.lld invocation - return runTool((peanoDir / "bin" / "clang").string(), flags, verbose); + if (failed( + runTool((peanoDir / "bin" / "clang").string(), flags, verbose))) { + return failure(); + } } + return success(); } static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, bool timing, - const Path &tempDir) { - + Path &tempDir) { auto copy = cast(deviceOp.getParentOp()->clone()); deviceOp = *copy.getOps().begin(); - std::string errorMessage; PassManager passManager(context, AIE::DeviceOp::getOperationName()); applyConfigToPassManager(passManager, printIRBeforeAll, printIRAfterAll, @@ -761,10 +763,10 @@ static json::Object makeKernelJSON(const std::string &name, } static LogicalResult generateXCLBin( - const std::string &Output, const Path &tempDir, - const std::string &xclBinKernelID, const std::string &xclBinKernelName, - const std::string &xclBinInstanceName, const Path &amdAIEInstallDir, - bool verbose, const std::optional &inputXclbin) { + const std::string &Output, Path &tempDir, const std::string &xclBinKernelID, + const std::string &xclBinKernelName, const std::string &xclBinInstanceName, + const Path &amdAIEInstallDir, bool verbose, + const std::optional &inputXclbin) { std::string errorMessage; // Create mem_topology.json. Path memTopologyJsonFile = tempDir / "mem_topology.json"; @@ -923,9 +925,11 @@ static LogicalResult generateXCLBin( FailureOr xclbinutilBin = findAMDAIETool("iree-aie-xclbinutil", amdAIEInstallDir); - if (failed(xclbinutilBin)) return xclbinutilBin; + if (failed(xclbinutilBin)) return failure(); - if (inputXclbin) { + if (!inputXclbin) { + flags.insert(flags.end(), {"--add-replace-section", memArg}); + } else { // Create aie_partition.json. Path aieInputPartitionJsonFile = tempDir / "aie_input_partition.json"; std::string inputPartArg = @@ -977,8 +981,6 @@ static LogicalResult generateXCLBin( return failure(); } flags.insert(flags.end(), {"--input", *inputXclbin}); - } else { - flags.insert(flags.end(), {"--add-replace-section", memArg}); } flags.insert(flags.end(), {"--add-kernel", kernelsJsonFile.string(), "--add-replace-section", partArg, "--force", @@ -1057,9 +1059,9 @@ struct RemoveAlignment2FromLLVMLoadPass static LogicalResult generateUnifiedObject( MLIRContext *context, AIE::DeviceOp deviceOp, const std::string &outputFile, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, - bool timing, bool useChess, bool verbose, Path tempDir, + bool timing, bool useChess, bool verbose, Path &tempDir, std::optional vitisDir, const std::string &targetArch, - Path peanoDir) { + Path &peanoDir) { assert(deviceOp->getParentOp() && isa(deviceOp->getParentOp()) && "DeviceOp must be in a module parent"); @@ -1107,7 +1109,7 @@ static LogicalResult generateUnifiedObject( std::string inputLLChessHackedStr = chesshack(inputLLStr); FailureOr maybeVitisDir = findVitis(vitisDir); if (failed(maybeVitisDir)) return failure(); - FailureOr chessIntrinsicsObjFile = assembleStringUsingChess( + FailureOr chessIntrinsicsObjFile = assembleStringUsingChess( /*inputFileStr=*/inputLLChessHackedStr, /*inputFileName=*/"input.chesshacked.ll", /*outputFileName=*/outputFile, @@ -1219,29 +1221,36 @@ LogicalResult aie2xclbin( for (uint32_t w : npuInstructions) output->os() << llvm::format("%08X\n", w); output->keep(); - Path unifiedObj = Path(tempDir) / "input.o"; + Path tempDirPath{tempDir}; + tempDirPath.make_preferred(); + Path peanoDirPath{peanoDir}; + peanoDirPath.make_preferred(); + std::optional vitisDirPath{vitisDir}; + if (vitisDirPath) vitisDirPath->make_preferred(); + + Path unifiedObj = tempDirPath / "input.o"; if (failed(generateUnifiedObject( ctx, deviceOp, unifiedObj.string(), printIRBeforeAll, printIRAfterAll, - printIRModuleScope, timing, useChess, verbose, tempDir, vitisDir, - targetArch, peanoDir))) { + printIRModuleScope, timing, useChess, verbose, tempDirPath, + vitisDirPath, targetArch, peanoDirPath))) { llvm::errs() << "Failed to generate unified object\n"; return failure(); } - if (failed(generateCoreElfFiles(deviceOp, unifiedObj.string(), tempDir, - useChess, vitisDir, targetArch, verbose, + if (failed(generateCoreElfFiles(deviceOp, unifiedObj.string(), tempDirPath, + useChess, vitisDirPath, targetArch, verbose, peanoDir, ukernel))) { llvm::errs() << "Failed to generate core ELF file(s)\n"; return failure(); } if (failed(generateCDO(ctx, deviceOp, printIRBeforeAll, printIRAfterAll, - printIRModuleScope, timing, tempDir))) { + printIRModuleScope, timing, tempDirPath))) { llvm::errs() << "Failed to generate CDO\n"; return failure(); } - if (failed(generateXCLBin(outputXCLBin, tempDir, xclBinKernelID, + if (failed(generateXCLBin(outputXCLBin, tempDirPath, xclBinKernelID, xclBinKernelName, xclBinInstanceName, amdAIEInstallDir, verbose, InputXCLBin))) { llvm::errs() << "Failed to generate XCLBin\n"; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp index b3b3f8caa..6d7b3f7af 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp @@ -52,11 +52,6 @@ scf::ForallOp getThreadMappedForallAncestor(Operation *op) { return getMappedForallAncestor(op); } -scf::ForallOp getThreadOrBlockMappedForallAncestor(Operation *op) { - return getMappedForallAncestor(op); -} - class AMDAIELocalizeLogicalObjectfifoPass : public impl::AMDAIELocalizeLogicalObjectfifoBase< AMDAIELocalizeLogicalObjectfifoPass> { diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc index 7feebe07d..da4f04169 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc @@ -15,6 +15,9 @@ #include "iree_aie_router.h" #include "iree_aie_runtime.h" #include "llvm/ADT/StringExtras.h" +#ifdef _WIN32 +#include "llvm/Support/Windows/WindowsSupport.h" +#endif #define DEBUG_TYPE "iree-aie-cdo-emitter" @@ -210,19 +213,13 @@ LogicalResult pushToBdQueueAndEnable(const AMDAIEDeviceModel &deviceModel, } LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel, - const TileLoc &tileLoc, const Path &elfPath, + const TileLoc &tileLoc, Path &elfPath, bool aieSim) { auto devInst = const_cast(&deviceModel.devInst); - // this isn't the case elsewhere but for whatever reason - // fopen (what XAie_LoadElf ultimately calls) braeks for >=256 -#ifdef _WIN32 - if (elfPath.string().size() >= 256) { - llvm::errs() << "Windows paths must be less than 256 chars for elf loading " - "to work (seriously):" - << elfPath.string() << "\n"; + if (!std::filesystem::exists(elfPath)) { + llvm::errs() << "elf doesn't exist: " << elfPath.string() << "\n"; return failure(); } -#endif TRY_XAIE_API_LOGICAL_RESULT(XAie_LoadElf, devInst, tileLoc, elfPath.string().c_str(), /*loadSym*/ aieSim); diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h index 5f7623289..f738cdd07 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h @@ -102,7 +102,7 @@ BOTH_OSTREAM_OPS_FORALL_CDO_TYPES(OSTREAM_OP_DECL, BOTH_OSTREAM_OP) void initializeCDOGenerator(byte_ordering endianness, bool cdoDebug); /// Generates one of the aie_cdo*.bins. Takes a callback that makes the actual -/// calls to aie-rt but envelopes it with a prolog and an epilogue of calls to +/// calls to aie-rt but envelops it with a prolog and an epilogue of calls to /// cdo-driver that: /// /// 1. Starts the "cdo filestream" (literally just fopens a file) @@ -113,25 +113,25 @@ void initializeCDOGenerator(byte_ordering endianness, bool cdoDebug); /// CDO, checksum, etc. /// 5. Finishes the CDO(fcloses the file) /// -/// Note, all of the cdo APIs are simple and available at +/// Note, all the cdo APIs are simple and available at /// iree-amd-aie/third_party/bootgen/cdo-driver/cdo_driver.c LogicalResult generateCDOBinary(const std::filesystem::path &outputPath, const std::function &cb); /// "Loads" an elf which will be loaded to the program memory of a tile. Loads -/// is in quotes because where/how the elf is actaully loaded is determined by +/// is in quotes because where/how the elf is actually loaded is determined by /// the aie-rt backend; the CDO backend copies the elf byte by byte into the /// CDO. LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel, const TileLoc &tileLoc, - const std::filesystem::path &elfPath, bool aieSim); + std::filesystem::path &elfPath, bool aieSim); /// Turn off and turn it back on again... LogicalResult resetUnResetCore(const AMDAIEDeviceModel &deviceModel, const TileLoc &tileLoc); /// Sets/programs locks with explicit initializers; note initialize here is a -/// misnomer because "unintialized" locks actually have their counters +/// misnomer because "uninitialized" locks actually have their counters /// initialized to zero anyway by the hardware. LogicalResult initializeLock(const AMDAIEDeviceModel &deviceModel, const Lock &lock); diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc index 42b8adbd3..0a2f24fe5 100644 --- a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc +++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc @@ -92,7 +92,6 @@ iree_status_t iree_hal_xrt_driver_create_internal( "No XRT devices found"); } // Get handle to xrt device - std::cerr << xrt::system::enumerate_devices() << "\n"; try { global_device = xrt::device(0); } catch (std::runtime_error& e) { @@ -100,7 +99,7 @@ iree_status_t iree_hal_xrt_driver_create_internal( e.what()); } driver->device = &global_device; - *out_driver = (iree_hal_driver_t*)driver; + *out_driver = reinterpret_cast(driver); return iree_ok_status(); }