From 83f3b1cb480b41e3347035aff14fd4bc2ba21d24 Mon Sep 17 00:00:00 2001
From: Yinying Li <yinyingli@google.com>
Date: Tue, 7 May 2024 20:28:39 -0400
Subject: [PATCH] [mlir][sparse] Add verification for explicit/implicit value
 (#90111)

1. Verify that the type of explicit/implicit values should be the same
as the tensor element type.
2. Verify that implicit value could only be zero.
3. Verify that explicit/implicit values should be numeric.
4. Fix the type change issue caused by SparseTensorType(enc).
---
 .../Dialect/SparseTensor/IR/SparseTensor.h    | 13 +++
 .../SparseTensor/IR/SparseTensorAttrDefs.td   | 15 ++++
 .../SparseTensor/IR/SparseTensorType.h        | 25 +-----
 .../SparseTensor/IR/SparseTensorDialect.cpp   | 78 +++++++++++------
 .../SparseTensor/invalid_encoding.mlir        | 85 +++++++++++++++++++
 5 files changed, 169 insertions(+), 47 deletions(-)
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
index b182b4c72b9535..3cf81d2e58f21c 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
@@ -41,6 +41,19 @@ using Level = uint64_t;
 /// including the value `ShapedType::kDynamic` (for shapes).
 using Size = int64_t;
 
+/// A simple structure that encodes a range of levels in the sparse tensors
+/// that forms a COO segment.
+struct COOSegment {
+  std::pair<Level, Level> lvlRange; // [low, high)
+  bool isSoA;
+
+  bool isAoS() const { return !isSoA; }
+  bool isSegmentStart(Level l) const { return l == lvlRange.first; }
+  bool inSegment(Level l) const {
+    return l >= lvlRange.first && l < lvlRange.second;
+  }
+};
+
 } // namespace sparse_tensor
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
index eefa4c71bbd2ca..53dd8e39438cc6 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
@@ -502,9 +502,24 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
     //
     // Helper function to translate between level/dimension space.
     //
+
     SmallVector<int64_t> translateShape(::mlir::ArrayRef<int64_t> srcShape, ::mlir::sparse_tensor::CrdTransDirectionKind) const;
     ValueRange translateCrds(::mlir::OpBuilder &builder, ::mlir::Location loc, ::mlir::ValueRange crds, ::mlir::sparse_tensor::CrdTransDirectionKind) const;
 
+    //
+    // COO methods.
+    //
+
+    /// Returns the starting level of this sparse tensor type for a
+    /// trailing COO region that spans **at least** two levels. If
+    /// no such COO region is found, then returns the level-rank.
+    ///
+    /// DEPRECATED: use getCOOSegment instead;
+    Level getAoSCOOStart() const;
+
+    /// Returns a list of COO segments in the sparse tensor types.
+    SmallVector<COOSegment> getCOOSegments() const;
+
     //
     // Printing methods.
     //
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
index ea3d8013b45671..a154d7fa5fb6e5 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
@@ -18,19 +18,6 @@
 namespace mlir {
 namespace sparse_tensor {
 
-/// A simple structure that encodes a range of levels in the sparse tensors that
-/// forms a COO segment.
-struct COOSegment {
-  std::pair<Level, Level> lvlRange; // [low, high)
-  bool isSoA;
-
-  bool isAoS() const { return !isSoA; }
-  bool isSegmentStart(Level l) const { return l == lvlRange.first; }
-  bool inSegment(Level l) const {
-    return l >= lvlRange.first && l < lvlRange.second;
-  }
-};
-
 //===----------------------------------------------------------------------===//
 /// A wrapper around `RankedTensorType`, which has three goals:
 ///
@@ -73,12 +60,6 @@ class SparseTensorType {
       : SparseTensorType(
             RankedTensorType::get(stp.getShape(), stp.getElementType(), enc)) {}
 
-  // TODO: remove?
-  SparseTensorType(SparseTensorEncodingAttr enc)
-      : SparseTensorType(RankedTensorType::get(
-            SmallVector<Size>(enc.getDimRank(), ShapedType::kDynamic),
-            Float32Type::get(enc.getContext()), enc)) {}
-
   SparseTensorType &operator=(const SparseTensorType &) = delete;
   SparseTensorType(const SparseTensorType &) = default;
 
@@ -369,13 +350,15 @@ class SparseTensorType {
   /// no such COO region is found, then returns the level-rank.
   ///
   /// DEPRECATED: use getCOOSegment instead;
-  Level getAoSCOOStart() const;
+  Level getAoSCOOStart() const { return getEncoding().getAoSCOOStart(); };
 
   /// Returns [un]ordered COO type for this sparse tensor type.
   RankedTensorType getCOOType(bool ordered) const;
 
   /// Returns a list of COO segments in the sparse tensor types.
-  SmallVector<COOSegment> getCOOSegments() const;
+  SmallVector<COOSegment> getCOOSegments() const {
+    return getEncoding().getCOOSegments();
+  }
 
 private:
   // These two must be const, to ensure coherence of the memoized fields.
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index de3d3006ebaac5..4cc6ee971d4a3e 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -104,7 +104,7 @@ void StorageLayout::foreachField(
         callback) const {
   const auto lvlTypes = enc.getLvlTypes();
   const Level lvlRank = enc.getLvlRank();
-  SmallVector<COOSegment> cooSegs = SparseTensorType(enc).getCOOSegments();
+  SmallVector<COOSegment> cooSegs = enc.getCOOSegments();
   FieldIndex fieldIdx = kDataFieldStartingIdx;
 
   ArrayRef cooSegsRef = cooSegs;
@@ -211,7 +211,7 @@ StorageLayout::getFieldIndexAndStride(SparseTensorFieldKind kind,
   unsigned stride = 1;
   if (kind == SparseTensorFieldKind::CrdMemRef) {
     assert(lvl.has_value());
-    const Level cooStart = SparseTensorType(enc).getAoSCOOStart();
+    const Level cooStart = enc.getAoSCOOStart();
     const Level lvlRank = enc.getLvlRank();
     if (lvl.value() >= cooStart && lvl.value() < lvlRank) {
       lvl = cooStart;
@@ -912,46 +912,53 @@ LogicalResult SparseTensorEncodingAttr::verifyEncoding(
     return emitError()
            << "dimension-rank mismatch between encoding and tensor shape: "
            << getDimRank() << " != " << dimRank;
+  if (auto expVal = getExplicitVal()) {
+    Type attrType = llvm::dyn_cast<TypedAttr>(expVal).getType();
+    if (attrType != elementType) {
+      return emitError() << "explicit value type mismatch between encoding and "
+                         << "tensor element type: " << attrType
+                         << " != " << elementType;
+    }
+  }
+  if (auto impVal = getImplicitVal()) {
+    Type attrType = llvm::dyn_cast<TypedAttr>(impVal).getType();
+    if (attrType != elementType) {
+      return emitError() << "implicit value type mismatch between encoding and "
+                         << "tensor element type: " << attrType
+                         << " != " << elementType;
+    }
+    // Currently, we only support zero as the implicit value.
+    auto impFVal = llvm::dyn_cast<FloatAttr>(impVal);
+    auto impIntVal = llvm::dyn_cast<IntegerAttr>(impVal);
+    auto impComplexVal = llvm::dyn_cast<complex::NumberAttr>(impVal);
+    if ((impFVal && impFVal.getValue().isNonZero()) ||
+        (impIntVal && !impIntVal.getValue().isZero()) ||
+        (impComplexVal && (impComplexVal.getImag().isNonZero() ||
+                           impComplexVal.getReal().isNonZero()))) {
+      return emitError() << "implicit value must be zero";
+    }
+  }
   return success();
 }
 
-//===----------------------------------------------------------------------===//
-// SparseTensorType Methods.
-//===----------------------------------------------------------------------===//
-
-bool mlir::sparse_tensor::SparseTensorType::isCOOType(Level startLvl,
-                                                      bool isUnique) const {
-  if (!hasEncoding())
-    return false;
-  if (!isCompressedLvl(startLvl) && !isLooseCompressedLvl(startLvl))
-    return false;
-  for (Level l = startLvl + 1; l < lvlRank; ++l)
-    if (!isSingletonLvl(l))
-      return false;
-  // If isUnique is true, then make sure that the last level is unique,
-  // that is, when lvlRank == 1, the only compressed level is unique,
-  // and when lvlRank > 1, the last singleton is unique.
-  return !isUnique || isUniqueLvl(lvlRank - 1);
-}
-
-Level mlir::sparse_tensor::SparseTensorType::getAoSCOOStart() const {
+Level mlir::sparse_tensor::SparseTensorEncodingAttr::getAoSCOOStart() const {
   SmallVector<COOSegment> coo = getCOOSegments();
   assert(coo.size() == 1 || coo.empty());
   if (!coo.empty() && coo.front().isAoS()) {
     return coo.front().lvlRange.first;
   }
-  return lvlRank;
+  return getLvlRank();
 }
 
 SmallVector<COOSegment>
-mlir::sparse_tensor::SparseTensorType::getCOOSegments() const {
+mlir::sparse_tensor::SparseTensorEncodingAttr::getCOOSegments() const {
   SmallVector<COOSegment> ret;
-  if (!hasEncoding() || lvlRank <= 1)
+  if (getLvlRank() <= 1)
     return ret;
 
   ArrayRef<LevelType> lts = getLvlTypes();
   Level l = 0;
-  while (l < lvlRank) {
+  while (l < getLvlRank()) {
     auto lt = lts[l];
     if (lt.isa<LevelFormat::Compressed, LevelFormat::LooseCompressed>()) {
       auto cur = lts.begin() + l;
@@ -975,6 +982,25 @@ mlir::sparse_tensor::SparseTensorType::getCOOSegments() const {
   return ret;
 }
 
+//===----------------------------------------------------------------------===//
+// SparseTensorType Methods.
+//===----------------------------------------------------------------------===//
+
+bool mlir::sparse_tensor::SparseTensorType::isCOOType(Level startLvl,
+                                                      bool isUnique) const {
+  if (!hasEncoding())
+    return false;
+  if (!isCompressedLvl(startLvl) && !isLooseCompressedLvl(startLvl))
+    return false;
+  for (Level l = startLvl + 1; l < lvlRank; ++l)
+    if (!isSingletonLvl(l))
+      return false;
+  // If isUnique is true, then make sure that the last level is unique,
+  // that is, when lvlRank == 1, the only compressed level is unique,
+  // and when lvlRank > 1, the last singleton is unique.
+  return !isUnique || isUniqueLvl(lvlRank - 1);
+}
+
 RankedTensorType
 mlir::sparse_tensor::SparseTensorType::getCOOType(bool ordered) const {
   SmallVector<LevelType> lvlTypes;
diff --git a/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir b/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir
index 8096c010ac935a..a3f72bd3ae971c 100644
--- a/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir
@@ -443,3 +443,88 @@ func.func private @NOutOfM(%arg0: tensor<?x?x?xf64, #NOutOfM>) {
 func.func private @NOutOfM(%arg0: tensor<?x?x?xf64, #NOutOfM>) {
   return
 }
+
+// -----
+
+#CSR_ExpType = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  explicitVal = 1 : i32,
+  implicitVal = 0.0 : f32
+}>
+
+// expected-error@+1 {{explicit value type mismatch between encoding and tensor element type: 'i32' != 'f32'}}
+func.func private @sparse_csr(tensor<?x?xf32, #CSR_ExpType>)
+
+// -----
+
+#CSR_ImpType = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  explicitVal = 1 : i32,
+  implicitVal = 0.0 : f32
+}>
+
+// expected-error@+1 {{implicit value type mismatch between encoding and tensor element type: 'f32' != 'i32'}}
+func.func private @sparse_csr(tensor<?x?xi32, #CSR_ImpType>)
+
+// -----
+
+// expected-error@+1 {{expected a numeric value for explicitVal}}
+#CSR_ExpType = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  explicitVal = "str"
+}>
+func.func private @sparse_csr(tensor<?x?xi32, #CSR_ExpType>)
+
+// -----
+
+// expected-error@+1 {{expected a numeric value for implicitVal}}
+#CSR_ImpType = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  implicitVal = "str"
+}>
+func.func private @sparse_csr(tensor<?x?xi32, #CSR_ImpType>)
+
+// -----
+
+#CSR_ImpVal = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  implicitVal = 1 : i32
+}>
+
+// expected-error@+1 {{implicit value must be zero}}
+func.func private @sparse_csr(tensor<?x?xi32, #CSR_ImpVal>)
+
+// -----
+
+#CSR_ImpVal = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  implicitVal = 1.0 : f32
+}>
+
+// expected-error@+1 {{implicit value must be zero}}
+func.func private @sparse_csr(tensor<?x?xf32, #CSR_ImpVal>)
+
+// -----
+
+#CSR_OnlyOnes = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 64,
+  crdWidth = 64,
+  explicitVal = #complex.number<:f32 1.0, 0.0>,
+  implicitVal = #complex.number<:f32 1.0, 0.0>
+}>
+
+// expected-error@+1 {{implicit value must be zero}}
+func.func private @sparse_csr(tensor<?x?xcomplex<f32>, #CSR_OnlyOnes>)