Skip to content

Commit

Permalink
Merge branch 'main' into mm_cascade
Browse files Browse the repository at this point in the history
  • Loading branch information
Yu-Zhewen authored May 28, 2024
2 parents 9e78f4d + b693d4e commit 1c2e38d
Show file tree
Hide file tree
Showing 22 changed files with 688 additions and 162 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ find_library(ELF_LIB elf)
cmake_dependent_option(AIE_ENABLE_AIRBIN
"Enables emitting AIRBIN ELF binaries." OFF "ELF_LIB" OFF)

# If we need runtime libs, then statically link them.
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")

add_flag_if_supported("-Werror=sign-compare" WERROR_SIGN_COMPARE)
add_flag_if_supported("-Werror=unused" WERROR_USED)
Expand Down
1 change: 0 additions & 1 deletion aie_runtime_lib/AIE/aiesim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@


set(INSTALLS
Makefile
genwrapper_for_ps.cpp)


Expand Down
55 changes: 0 additions & 55 deletions aie_runtime_lib/AIE/aiesim/Makefile

This file was deleted.

1 change: 0 additions & 1 deletion aie_runtime_lib/AIE2/aiesim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@


set(INSTALLS
Makefile
genwrapper_for_ps.cpp)


Expand Down
55 changes: 0 additions & 55 deletions aie_runtime_lib/AIE2/aiesim/Makefile

This file was deleted.

9 changes: 6 additions & 3 deletions cmake/toolchainFiles/toolchain_x86_64.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Copyright (C) 2018-2022, Xilinx Inc. All rights reserved.
# Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2018-2024 Advanced Micro Devices, Inc. or its affiliates

7 changes: 7 additions & 0 deletions docs/Building.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ and llvm.
source utils/env_setup.sh <mlir-aie>/install <llvm dir>/install
```

Note that when coming back to this install with a fresh environment, it is necessary to rerun the `utils/env_setup.sh` script to setup your environment as well as activate the Python virtual environment using the following command.
```
source sandbox/bin/activate
```

## Building on X86 targetting the VCK5000

In order to build and run on PCIe cards, you first have to build and install the aie-rt library. We chose to install the library in /opt/xaiengine but it is not required for the tools to be installed there. Just ensure that when building mlir-aie and mlir-air, that you point to the directory in which the aie-rt library was installed.
Expand Down Expand Up @@ -147,6 +152,8 @@ Then, set `${ROCM_ROOT}` to the ROCm install from the previous path. Then, run t

The PCIe AIR runtime requires the use of the [AIR PCIe kernel driver](https://github.com/Xilinx/ROCm-air-platforms/tree/main/driver). The driver directory in the [ROCm-air-platforms](https://github.com/Xilinx/ROCm-air-platforms) repository contains documentation on how to compile and load the AIR PCIe kernel driver.

After this is complete, refer back to Step 5 of `Building on X86 for mlir-aie development` to setup the rest of your environment.

### Sysroot
Since the AIE tools are cross-compiling, in order to actually compile code, we need a 'sysroot' directory,
containing an ARM rootfs. This rootfs must match what will be available in the runtime environment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "aie/Dialect/AIE/IR/AIEDialect.h"
#include "aie/Dialect/AIE/Transforms/AIEPasses.h"

#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Analysis/TopologicalSortUtils.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
Expand Down
161 changes: 158 additions & 3 deletions lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2023, Advanced Micro Devices, Inc.
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//
// This file contains conversions and rewrites to the Vector dialect to make
Expand Down Expand Up @@ -39,6 +39,55 @@ using namespace xilinx::aievec;
//================== Common AIE canonicalization analysis ====================//
//============================================================================//

static bool isGemmBTransposedContractionOp(vector::ContractionOp op) {
if (op.getKind() != vector::CombiningKind::ADD)
return false;

// Get and check shape of operands
auto lhsShape = op.getLhsType().getShape();
auto rhsShape = op.getRhsType().getShape();
auto accShape = cast<ShapedType>(op.getAccType()).getShape();
if (lhsShape.size() < 2 || rhsShape.size() < 2 || accShape.size() < 2)
return false;

// Check that the innermost iterators match gemm-like iterators
SmallVector<vector::IteratorType> iterators = op.getIteratorTypesArray();
if (iterators.size() < 3)
return false;
auto innerMostIterators =
SmallVector<vector::IteratorType>(iterators.end() - 3, iterators.end());
if (vector::IteratorType::parallel != innerMostIterators[0] ||
vector::IteratorType::parallel != innerMostIterators[1] ||
vector::IteratorType::reduction != innerMostIterators[2])
return false;

// Get indexing maps of iterators for operands
SmallVector<AffineMap, 4> indexingMaps(op.getIndexingMapsArray());
SmallVector<int64_t> outerMostResults;
for (int64_t i = 0; i < indexingMaps[0].getNumResults() - 2; i++)
outerMostResults.push_back(i);

auto innerLhsMap = indexingMaps[0].dropResults(outerMostResults);
auto innerRhsMap = indexingMaps[1].dropResults(outerMostResults);
auto innerAccMap = indexingMaps[2].dropResults(outerMostResults);

// Check whether they conform to a "transposed B" gemm
auto ctx = op.getContext();
auto mmAidxMap =
AffineMap::getPermutationMap(ArrayRef<unsigned>{1, 0, 2}, ctx)
.dropResults(0);
auto mmBidxMap =
AffineMap::getPermutationMap(ArrayRef<unsigned>{0, 1, 2}, ctx)
.dropResults(0);
auto mmCidxMap =
AffineMap::getPermutationMap(ArrayRef<unsigned>{2, 0, 1}, ctx)
.dropResults(0);
int64_t numOuterMostDims = indexingMaps[0].getNumDims() - 3;
return innerLhsMap == mmAidxMap.shiftDims(numOuterMostDims) &&
innerRhsMap == mmBidxMap.shiftDims(numOuterMostDims) &&
innerAccMap == mmCidxMap.shiftDims(numOuterMostDims);
}

//============================================================================//
//============ Common AIE canonicalization conversion patterns ===============//
//============================================================================//
Expand Down Expand Up @@ -411,6 +460,107 @@ struct FlattenMultDimTransferWritePattern
}
};

// This pattern takes out an implicit transposition of the `rhs` operand in a
// gemm-like contraction op, making it an explicit `vector.transpose` op.
// If `rhs` is coming from a widening op (`extf`/`extsi`/`extui`), the
// transposition will be hoisted above the widening op.
struct ExtractTransposeFromContractionOp
: public OpConversionPattern<vector::ContractionOp> {
using OpConversionPattern<vector::ContractionOp>::OpConversionPattern;

static VectorType getTransposedVectorType(VectorType vecTy) {
SmallVector<int64_t> shape{vecTy.getShape()};
auto nDim = shape.size();
int64_t dimNm1 = shape[nDim - 1];
shape[nDim - 1] = shape[nDim - 2];
shape[nDim - 2] = dimNm1;
auto elemTy = vecTy.getElementType();
return VectorType::get(shape, elemTy);
}

LogicalResult
matchAndRewrite(vector::ContractionOp contractOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
if (!isGemmBTransposedContractionOp(contractOp))
return failure();

Location loc = contractOp.getLoc();
auto ctx = rewriter.getContext();

Value rhsVal = adaptor.getRhs();
VectorType rhsVecTy = contractOp.getRhsType();
Type rhsElemTy = rhsVecTy.getElementType();

bool doExtF = false, doExtSI = false, doExtUI = false;
if (auto extfRhsOp = rhsVal.getDefiningOp<arith::ExtFOp>()) {
rhsVal = extfRhsOp.getIn();
rhsVecTy = cast<VectorType>(rhsVal.getType());
doExtF = true;
} else if (auto extsiRhsOp = rhsVal.getDefiningOp<arith::ExtSIOp>()) {
rhsVal = extsiRhsOp.getIn();
rhsVecTy = cast<VectorType>(rhsVal.getType());
doExtSI = true;
} else if (auto extuiRhsOp = rhsVal.getDefiningOp<arith::ExtUIOp>()) {
rhsVal = extuiRhsOp.getIn();
rhsVecTy = cast<VectorType>(rhsVal.getType());
doExtUI = true;
}

int64_t nDim = rhsVecTy.getShape().size();
SmallVector<int64_t> rhsPermutation;
for (int64_t i = 0; i < nDim - 2; i++)
rhsPermutation.push_back(i);
rhsPermutation.push_back(nDim - 1);
rhsPermutation.push_back(nDim - 2);
auto transpRhsVecTy = getTransposedVectorType(rhsVecTy);
rhsVal = rewriter
.create<vector::TransposeOp>(loc, transpRhsVecTy, rhsVal,
rhsPermutation)
.getResult();

if (doExtF)
rhsVal =
rewriter
.create<arith::ExtFOp>(
loc, VectorType::get(transpRhsVecTy.getShape(), rhsElemTy),
rhsVal)
.getOut();
if (doExtSI)
rhsVal =
rewriter
.create<arith::ExtSIOp>(
loc, VectorType::get(transpRhsVecTy.getShape(), rhsElemTy),
rhsVal)
.getOut();
if (doExtUI)
rhsVal =
rewriter
.create<arith::ExtUIOp>(
loc, VectorType::get(transpRhsVecTy.getShape(), rhsElemTy),
rhsVal)
.getOut();

SmallVector<AffineMap, 4> oldIdxMaps(contractOp.getIndexingMapsArray());

nDim = oldIdxMaps[1].getNumDims();
SmallVector<int64_t> innerDimPerm;
for (int64_t i = 0; i < nDim - 2; i++)
innerDimPerm.push_back(i);
innerDimPerm.push_back(nDim - 1);
innerDimPerm.push_back(nDim - 2);
auto transpPermMap = AffineMap::getPermutationMap(innerDimPerm, ctx);

auto newIdxMaps = rewriter.getAffineMapArrayAttr(
{oldIdxMaps[0], oldIdxMaps[1].compose(transpPermMap), oldIdxMaps[2]});

rewriter.replaceOpWithNewOp<vector::ContractionOp>(
contractOp, contractOp.getResult().getType(), adaptor.getLhs(), rhsVal,
adaptor.getAcc(), newIdxMaps, contractOp.getIteratorTypes());

return success();
}
};

//============================================================================//
//============ AIEML canonicalization conversion patterns ===============//
//============================================================================//
Expand Down Expand Up @@ -470,15 +620,20 @@ static void configureAIEMLCanonicalizeLegalizations(ConversionTarget &target,
[](vector::TransferWriteOp op) {
return cast<VectorType>(op.getVector().getType()).getRank() < 2;
});
target.addDynamicallyLegalOp<vector::ContractionOp>(
[](vector::ContractionOp op) {
return !isGemmBTransposedContractionOp(op);
});
}

static void
populateAIEMLCanonicalizeConversionPatterns(RewritePatternSet &patterns,
TargetBackend backend) {
patterns.add<SplitUnalignedTransferReadPattern>(patterns.getContext(), 1024,
256);
patterns.add<FlattenMultDimTransferReadPattern,
FlattenMultDimTransferWritePattern>(patterns.getContext());
patterns
.add<ExtractTransposeFromContractionOp, FlattenMultDimTransferReadPattern,
FlattenMultDimTransferWritePattern>(patterns.getContext());
}

//============================================================================//
Expand Down
2 changes: 1 addition & 1 deletion programming_examples/ml/resnet/layers_conv2_x/aie.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//===----------------------------------------------------------------------===//

module {
aie.device(npu) {
aie.device(npu1_3col) {

//shim
%tile00 = aie.tile(0, 0)
Expand Down
Loading

0 comments on commit 1c2e38d

Please sign in to comment.