From 3dd1c4cd414b0d807c128f4df8daaa2f01d399aa Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Thu, 6 Jun 2024 15:38:54 -0500
Subject: [PATCH 1/4] feat(entity-client): restli batchGetV2 batchSize fix and
 concurrency (#10630)

---
 datahub-frontend/app/auth/AuthModule.java     |   4 +-
 datahub-frontend/conf/application.conf        |   6 +-
 ...eConsumerApplicationTestConfiguration.java |   3 +-
 .../src/main/resources/application.yaml       |   1 +
 .../RestliEntityClientFactory.java            |  15 +-
 .../entity/client/RestliEntityClient.java     | 180 +++++++++++-------
 .../client/SystemRestliEntityClient.java      |   5 +-
 .../common/client/BaseClientTest.java         |   6 +-
 .../client/SystemRestliEntityClientTest.java  |  12 +-
 .../metadata/service/BaseService.java         |   6 +-
 10 files changed, 150 insertions(+), 88 deletions(-)

diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java
index 7db8f5689ead5..32dfba00d47db 100644
--- a/datahub-frontend/app/auth/AuthModule.java
+++ b/datahub-frontend/app/auth/AuthModule.java
@@ -63,6 +63,7 @@ public class AuthModule extends AbstractModule {
   private static final String ENTITY_CLIENT_RETRY_INTERVAL = "entityClient.retryInterval";
   private static final String ENTITY_CLIENT_NUM_RETRIES = "entityClient.numRetries";
   private static final String ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE = "entityClient.restli.get.batchSize";
+  private static final String ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY = "entityClient.restli.get.batchConcurrency";
   private static final String GET_SSO_SETTINGS_ENDPOINT = "auth/getSsoSettings";
 
   private final com.typesafe.config.Config _configs;
@@ -208,7 +209,8 @@ protected SystemEntityClient provideEntityClient(
         new ExponentialBackoff(_configs.getInt(ENTITY_CLIENT_RETRY_INTERVAL)),
         _configs.getInt(ENTITY_CLIENT_NUM_RETRIES),
         configurationProvider.getCache().getClient().getEntityClient(),
-        Math.max(1, _configs.getInt(ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE)));
+        Math.max(1, _configs.getInt(ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE)),
+        Math.max(1, _configs.getInt(ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY)));
   }
 
   @Provides
diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf
index 6aa58d5b13b2c..045175ba69f02 100644
--- a/datahub-frontend/conf/application.conf
+++ b/datahub-frontend/conf/application.conf
@@ -289,5 +289,7 @@ entityClient.retryInterval = 2
 entityClient.retryInterval = ${?ENTITY_CLIENT_RETRY_INTERVAL}
 entityClient.numRetries = 3
 entityClient.numRetries = ${?ENTITY_CLIENT_NUM_RETRIES}
-entityClient.restli.get.batchSize = 100
-entityClient.restli.get.batchSize = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE}
\ No newline at end of file
+entityClient.restli.get.batchSize = 50
+entityClient.restli.get.batchSize = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE}
+entityClient.restli.get.batchConcurrency = 2
+entityClient.restli.get.batchConcurrency = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY}
\ No newline at end of file
diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java
index 08ff802c37e40..ba650c25a6117 100644
--- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java
+++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java
@@ -47,7 +47,8 @@ public SystemEntityClient systemEntityClient(
         new ExponentialBackoff(1),
         1,
         configurationProvider.getCache().getClient().getEntityClient(),
-        1);
+        1,
+        2);
   }
 
   @MockBean public Database ebeanServer;
diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml
index 19621dce767c6..4d188bd5c6183 100644
--- a/metadata-service/configuration/src/main/resources/application.yaml
+++ b/metadata-service/configuration/src/main/resources/application.yaml
@@ -386,6 +386,7 @@ entityClient:
   restli:
     get:
       batchSize: ${ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE:100} # limited to prevent exceeding restli URI size limit
+      batchConcurrency: ${ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY:2} # parallel threads
 
 usageClient:
   retryInterval: ${USAGE_CLIENT_RETRY_INTERVAL:2}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java
index 2d9f570e1b07d..9e7255bf43a34 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java
@@ -30,7 +30,8 @@ public EntityClient entityClient(
       @Value("${datahub.gms.sslContext.protocol}") String gmsSslProtocol,
       @Value("${entityClient.retryInterval:2}") int retryInterval,
       @Value("${entityClient.numRetries:3}") int numRetries,
-      final @Value("${entityClient.restli.get.batchSize:150}") int batchGetV2Size) {
+      final @Value("${entityClient.restli.get.batchSize}") int batchGetV2Size,
+      final @Value("${entityClient.restli.get.batchConcurrency}") int batchGetV2Concurrency) {
     final Client restClient;
     if (gmsUri != null) {
       restClient = DefaultRestliClientFactory.getRestLiClient(URI.create(gmsUri), gmsSslProtocol);
@@ -39,7 +40,11 @@ public EntityClient entityClient(
           DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol);
     }
     return new RestliEntityClient(
-        restClient, new ExponentialBackoff(retryInterval), numRetries, batchGetV2Size);
+        restClient,
+        new ExponentialBackoff(retryInterval),
+        numRetries,
+        batchGetV2Size,
+        batchGetV2Concurrency);
   }
 
   @Bean("systemEntityClient")
@@ -53,7 +58,8 @@ public SystemEntityClient systemEntityClient(
       @Value("${entityClient.retryInterval:2}") int retryInterval,
       @Value("${entityClient.numRetries:3}") int numRetries,
       final EntityClientCacheConfig entityClientCacheConfig,
-      final @Value("${entityClient.restli.get.batchSize:150}") int batchGetV2Size) {
+      final @Value("${entityClient.restli.get.batchSize}") int batchGetV2Size,
+      final @Value("${entityClient.restli.get.batchConcurrency}") int batchGetV2Concurrency) {
 
     final Client restClient;
     if (gmsUri != null) {
@@ -67,6 +73,7 @@ public SystemEntityClient systemEntityClient(
         new ExponentialBackoff(retryInterval),
         numRetries,
         entityClientCacheConfig,
-        batchGetV2Size);
+        batchGetV2Size,
+        batchGetV2Concurrency);
   }
 }
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
index 70fae208ad77a..fe1ca571efea5 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
@@ -85,8 +85,13 @@
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import javax.mail.MethodNotSupportedException;
@@ -110,14 +115,17 @@ public class RestliEntityClient extends BaseClient implements EntityClient {
   private static final RunsRequestBuilders RUNS_REQUEST_BUILDERS = new RunsRequestBuilders();
 
   private final int batchGetV2Size;
+  private final int batchGetV2Concurrency;
 
   public RestliEntityClient(
       @Nonnull final Client restliClient,
       @Nonnull final BackoffPolicy backoffPolicy,
       int retryCount,
-      int batchGetV2Size) {
+      int batchGetV2Size,
+      int batchGetV2Concurrency) {
     super(restliClient, backoffPolicy, retryCount);
     this.batchGetV2Size = Math.max(1, batchGetV2Size);
+    this.batchGetV2Concurrency = batchGetV2Concurrency;
   }
 
   @Override
@@ -150,7 +158,6 @@ public Entity get(@Nonnull OperationContext opContext, @Nonnull final Urn urn)
    * <p>Batch get a set of {@link Entity} objects by urn.
    *
    * @param urns the urns of the entities to batch get
-   * @param authentication the authentication to include in the request to the Metadata Service
    * @throws RemoteInvocationException when unable to execute request
    */
   @Override
@@ -216,40 +223,54 @@ public Map<Urn, EntityResponse> batchGetV2(
       throws RemoteInvocationException, URISyntaxException {
 
     Map<Urn, EntityResponse> responseMap = new HashMap<>();
+    ExecutorService executor = Executors.newFixedThreadPool(Math.max(1, batchGetV2Concurrency));
 
-    Iterators.partition(urns.iterator(), batchGetV2Size)
-        .forEachRemaining(
-            batch -> {
-              try {
-                final EntitiesV2BatchGetRequestBuilder requestBuilder =
-                    ENTITIES_V2_REQUEST_BUILDERS
-                        .batchGet()
-                        .aspectsParam(aspectNames)
-                        .ids(batch.stream().map(Urn::toString).collect(Collectors.toList()));
-
-                responseMap.putAll(
-                    sendClientRequest(requestBuilder, opContext.getSessionAuthentication())
-                        .getEntity()
-                        .getResults()
-                        .entrySet()
-                        .stream()
-                        .collect(
-                            Collectors.toMap(
-                                entry -> {
-                                  try {
-                                    return Urn.createFromString(entry.getKey());
-                                  } catch (URISyntaxException e) {
-                                    throw new RuntimeException(
-                                        String.format(
-                                            "Failed to bind urn string with value %s into urn",
-                                            entry.getKey()));
-                                  }
-                                },
-                                entry -> entry.getValue().getEntity())));
-              } catch (RemoteInvocationException e) {
-                throw new RuntimeException(e);
-              }
-            });
+    try {
+      Iterable<List<Urn>> iterable = () -> Iterators.partition(urns.iterator(), batchGetV2Size);
+      List<Future<Map<Urn, EntityResponse>>> futures =
+          StreamSupport.stream(iterable.spliterator(), false)
+              .map(
+                  batch ->
+                      executor.submit(
+                          () -> {
+                            try {
+                              log.debug("Executing batchGetV2 with batch size: {}", batch.size());
+                              final EntitiesV2BatchGetRequestBuilder requestBuilder =
+                                  ENTITIES_V2_REQUEST_BUILDERS
+                                      .batchGet()
+                                      .aspectsParam(aspectNames)
+                                      .ids(
+                                          batch.stream()
+                                              .map(Urn::toString)
+                                              .collect(Collectors.toList()));
+
+                              return sendClientRequest(
+                                      requestBuilder, opContext.getSessionAuthentication())
+                                  .getEntity()
+                                  .getResults()
+                                  .entrySet()
+                                  .stream()
+                                  .collect(
+                                      Collectors.toMap(
+                                          entry -> UrnUtils.getUrn(entry.getKey()),
+                                          entry -> entry.getValue().getEntity()));
+                            } catch (RemoteInvocationException e) {
+                              throw new RuntimeException(e);
+                            }
+                          }))
+              .collect(Collectors.toList());
+
+      futures.forEach(
+          result -> {
+            try {
+              responseMap.putAll(result.get());
+            } catch (InterruptedException | ExecutionException e) {
+              throw new RuntimeException(e);
+            }
+          });
+    } finally {
+      executor.shutdown();
+    }
 
     return responseMap;
   }
@@ -260,7 +281,6 @@ public Map<Urn, EntityResponse> batchGetV2(
    * @param entityName the entity type to fetch
    * @param versionedUrns the urns of the entities to batch get
    * @param aspectNames the aspect names to batch get
-   * @param authentication the authentication to include in the request to the Metadata Service
    * @throws RemoteInvocationException when unable to execute request
    */
   @Override
@@ -272,39 +292,62 @@ public Map<Urn, EntityResponse> batchGetVersionedV2(
       @Nullable final Set<String> aspectNames) {
 
     Map<Urn, EntityResponse> responseMap = new HashMap<>();
+    ExecutorService executor = Executors.newFixedThreadPool(Math.max(1, batchGetV2Concurrency));
 
-    Iterators.partition(versionedUrns.iterator(), batchGetV2Size)
-        .forEachRemaining(
-            batch -> {
-              final EntitiesVersionedV2BatchGetRequestBuilder requestBuilder =
-                  ENTITIES_VERSIONED_V2_REQUEST_BUILDERS
-                      .batchGet()
-                      .aspectsParam(aspectNames)
-                      .entityTypeParam(entityName)
-                      .ids(
-                          batch.stream()
-                              .map(
-                                  versionedUrn ->
-                                      com.linkedin.common.urn.VersionedUrn.of(
-                                          versionedUrn.getUrn().toString(),
-                                          versionedUrn.getVersionStamp()))
-                              .collect(Collectors.toSet()));
-
-              try {
-                responseMap.putAll(
-                    sendClientRequest(requestBuilder, opContext.getSessionAuthentication())
-                        .getEntity()
-                        .getResults()
-                        .entrySet()
-                        .stream()
-                        .collect(
-                            Collectors.toMap(
-                                entry -> UrnUtils.getUrn(entry.getKey().getUrn()),
-                                entry -> entry.getValue().getEntity())));
-              } catch (RemoteInvocationException e) {
-                throw new RuntimeException(e);
-              }
-            });
+    try {
+      Iterable<List<VersionedUrn>> iterable =
+          () -> Iterators.partition(versionedUrns.iterator(), batchGetV2Size);
+      List<Future<Map<Urn, EntityResponse>>> futures =
+          StreamSupport.stream(iterable.spliterator(), false)
+              .map(
+                  batch ->
+                      executor.submit(
+                          () -> {
+                            try {
+                              log.debug(
+                                  "Executing batchGetVersionedV2 with batch size: {}",
+                                  batch.size());
+                              final EntitiesVersionedV2BatchGetRequestBuilder requestBuilder =
+                                  ENTITIES_VERSIONED_V2_REQUEST_BUILDERS
+                                      .batchGet()
+                                      .aspectsParam(aspectNames)
+                                      .entityTypeParam(entityName)
+                                      .ids(
+                                          batch.stream()
+                                              .map(
+                                                  versionedUrn ->
+                                                      com.linkedin.common.urn.VersionedUrn.of(
+                                                          versionedUrn.getUrn().toString(),
+                                                          versionedUrn.getVersionStamp()))
+                                              .collect(Collectors.toSet()));
+
+                              return sendClientRequest(
+                                      requestBuilder, opContext.getSessionAuthentication())
+                                  .getEntity()
+                                  .getResults()
+                                  .entrySet()
+                                  .stream()
+                                  .collect(
+                                      Collectors.toMap(
+                                          entry -> UrnUtils.getUrn(entry.getKey().getUrn()),
+                                          entry -> entry.getValue().getEntity()));
+                            } catch (RemoteInvocationException e) {
+                              throw new RuntimeException(e);
+                            }
+                          }))
+              .collect(Collectors.toList());
+
+      futures.forEach(
+          result -> {
+            try {
+              responseMap.putAll(result.get());
+            } catch (InterruptedException | ExecutionException e) {
+              throw new RuntimeException(e);
+            }
+          });
+    } finally {
+      executor.shutdown();
+    }
 
     return responseMap;
   }
@@ -955,7 +998,6 @@ public VersionedAspect getAspectOrNull(
    * @param startTimeMillis the earliest desired event time of the aspect value in milliseconds.
    * @param endTimeMillis the latest desired event time of the aspect value in milliseconds.
    * @param limit the maximum number of desired aspect values.
-   * @param authentication the actor associated with the request [internal]
    * @return the list of EnvelopedAspect values satisfying the input parameters.
    * @throws RemoteInvocationException on remote request error.
    */
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
index 364ee9b0519d2..7546d1f0a3b54 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
@@ -27,8 +27,9 @@ public SystemRestliEntityClient(
       @Nonnull final BackoffPolicy backoffPolicy,
       int retryCount,
       EntityClientCacheConfig cacheConfig,
-      int batchGetV2Size) {
-    super(restliClient, backoffPolicy, retryCount, batchGetV2Size);
+      int batchGetV2Size,
+      int batchGetV2Concurrency) {
+    super(restliClient, backoffPolicy, retryCount, batchGetV2Size, batchGetV2Concurrency);
     this.operationContextMap = CacheBuilder.newBuilder().maximumSize(500).build();
     this.entityClientCache = buildEntityClientCache(SystemRestliEntityClient.class, cacheConfig);
   }
diff --git a/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java b/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java
index 474bb24f9e16b..797ead10c1a66 100644
--- a/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java
+++ b/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java
@@ -37,7 +37,7 @@ public void testZeroRetry() throws RemoteInvocationException {
     when(mockRestliClient.sendRequest(any(ActionRequest.class))).thenReturn(mockFuture);
 
     RestliEntityClient testClient =
-        new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 0, 10);
+        new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 0, 10, 2);
     testClient.sendClientRequest(testRequestBuilder, AUTH);
     // Expected 1 actual try and 0 retries
     verify(mockRestliClient).sendRequest(any(ActionRequest.class));
@@ -56,7 +56,7 @@ public void testMultipleRetries() throws RemoteInvocationException {
         .thenReturn(mockFuture);
 
     RestliEntityClient testClient =
-        new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1, 10);
+        new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1, 10, 2);
     testClient.sendClientRequest(testRequestBuilder, AUTH);
     // Expected 1 actual try and 1 retries
     verify(mockRestliClient, times(2)).sendRequest(any(ActionRequest.class));
@@ -73,7 +73,7 @@ public void testNonRetry() {
         .thenThrow(new RuntimeException(new RequiredFieldNotPresentException("value")));
 
     RestliEntityClient testClient =
-        new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1, 10);
+        new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1, 10, 2);
     assertThrows(
         RuntimeException.class, () -> testClient.sendClientRequest(testRequestBuilder, AUTH));
   }
diff --git a/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java b/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java
index 75614ca998f6a..e6d53fc98e2e3 100644
--- a/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java
+++ b/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java
@@ -45,7 +45,8 @@ public void testCache() throws RemoteInvocationException, URISyntaxException {
     noCacheConfig.setEnabled(true);
 
     SystemRestliEntityClient noCacheTest =
-        new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig, 1);
+        new SystemRestliEntityClient(
+            mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig, 1, 2);
 
     com.linkedin.entity.EntityResponse responseStatusTrue = buildStatusResponse(true);
     com.linkedin.entity.EntityResponse responseStatusFalse = buildStatusResponse(false);
@@ -83,7 +84,8 @@ public void testCache() throws RemoteInvocationException, URISyntaxException {
         Map.of(TEST_URN.getEntityType(), Map.of(Constants.STATUS_ASPECT_NAME, 60)));
 
     SystemRestliEntityClient cacheTest =
-        new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, cacheConfig, 1);
+        new SystemRestliEntityClient(
+            mockRestliClient, new ConstantBackoff(0), 0, cacheConfig, 1, 2);
 
     mockResponse(mockRestliClient, responseStatusTrue);
     assertEquals(
@@ -117,7 +119,8 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio
     noCacheConfig.setEnabled(true);
 
     SystemRestliEntityClient noCacheTest =
-        new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig, 1);
+        new SystemRestliEntityClient(
+            mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig, 1, 2);
 
     com.linkedin.entity.EntityResponse responseStatusTrue = buildStatusResponse(true);
     com.linkedin.entity.EntityResponse responseStatusFalse = buildStatusResponse(false);
@@ -155,7 +158,8 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio
         Map.of(TEST_URN.getEntityType(), Map.of(Constants.STATUS_ASPECT_NAME, 60)));
 
     SystemRestliEntityClient cacheTest =
-        new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, cacheConfig, 1);
+        new SystemRestliEntityClient(
+            mockRestliClient, new ConstantBackoff(0), 0, cacheConfig, 1, 2);
 
     mockResponse(mockRestliClient, responseStatusTrue);
     assertEquals(
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java
index 3f9022b634c67..dc533e4aa5de5 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java
@@ -19,6 +19,7 @@
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
+import java.util.stream.Collectors;
 import javax.annotation.Nonnull;
 import lombok.extern.slf4j.Slf4j;
 
@@ -61,8 +62,9 @@ protected Map<Urn, GlobalTags> getTagsAspects(
       return finalResult;
     } catch (Exception e) {
       log.error(
-          "Error retrieving global tags for entities. Entities: {} aspect: {}",
-          entityUrns,
+          "Error retrieving global tags for {} entities. Sample Urns: {} aspect: {}",
+          entityUrns.size(),
+          entityUrns.stream().limit(10).collect(Collectors.toList()),
           Constants.GLOSSARY_TERMS_ASPECT_NAME,
           e);
       return Collections.emptyMap();

From eda1db081b972b705737811242fb530fdd233d2f Mon Sep 17 00:00:00 2001
From: John Joyce <john@acryl.io>
Date: Thu, 6 Jun 2024 14:05:44 -0700
Subject: [PATCH 2/4] docs(): Adding API docs for incidents, operations, and
 assertions (#10522)

Co-authored-by: John Joyce <john@Johns-MacBook-Pro.local>
Co-authored-by: John Joyce <john@ip-10-0-0-48.us-west-2.compute.internal>
Co-authored-by: John Joyce <john@Johns-MBP-432.lan>
Co-authored-by: John Joyce <john@ip-192-168-1-200.us-west-2.compute.internal>
Co-authored-by: John Joyce <john@Johns-MBP.lan>
---
 docs-website/sidebars.js                      |    7 +-
 docs/api/tutorials/assertions.md              | 1181 +++++++++++++++++
 docs/api/tutorials/data-contracts.md          |  217 +++
 docs/api/tutorials/incidents.md               |  164 +++
 docs/api/tutorials/operations.md              |  136 ++
 .../library/dataset_read_operations.py        |   19 +
 .../library/dataset_report_operation.py       |   19 +
 .../examples/library/delete_assertion.py      |   18 +
 .../examples/library/run_assertion.py         |   20 +
 .../examples/library/run_assertions.py        |   37 +
 .../library/run_assertions_for_asset.py       |   38 +
 11 files changed, 1855 insertions(+), 1 deletion(-)
 create mode 100644 docs/api/tutorials/assertions.md
 create mode 100644 docs/api/tutorials/data-contracts.md
 create mode 100644 docs/api/tutorials/incidents.md
 create mode 100644 docs/api/tutorials/operations.md
 create mode 100644 metadata-ingestion/examples/library/dataset_read_operations.py
 create mode 100644 metadata-ingestion/examples/library/dataset_report_operation.py
 create mode 100644 metadata-ingestion/examples/library/delete_assertion.py
 create mode 100644 metadata-ingestion/examples/library/run_assertion.py
 create mode 100644 metadata-ingestion/examples/library/run_assertions.py
 create mode 100644 metadata-ingestion/examples/library/run_assertions_for_asset.py

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 1f2208bc17978..2eb600eff74e8 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -754,7 +754,7 @@ module.exports = {
     },
     {
       type: "category",
-      label: "Datahub Actions",
+      label: "DataHub Actions",
       link: { type: "doc", id: "docs/act-on-metadata" },
       items: [
         "docs/actions/README",
@@ -800,6 +800,11 @@ module.exports = {
         "docs/api/tutorials/datasets",
         "docs/api/tutorials/deprecation",
         "docs/api/tutorials/descriptions",
+        "docs/api/tutorials/custom-properties",
+        "docs/api/tutorials/assertions",
+        "docs/api/tutorials/incidents",
+        "docs/api/tutorials/operations",
+        "docs/api/tutorials/data-contracts",
         "docs/api/tutorials/domains",
         "docs/api/tutorials/forms",
         "docs/api/tutorials/lineage",
diff --git a/docs/api/tutorials/assertions.md b/docs/api/tutorials/assertions.md
new file mode 100644
index 0000000000000..08832ee19ff89
--- /dev/null
+++ b/docs/api/tutorials/assertions.md
@@ -0,0 +1,1181 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Assertions
+
+<FeatureAvailability saasOnly />
+
+This guide specifically covers how to use the Assertion APIs for **Acryl Cloud** native assertions, including:
+
+- [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md)
+- [Volume Assertions](/docs/managed-datahub/observe/volume-assertions.md)
+- [Column Assertions](/docs/managed-datahub/observe/column-assertions.md)
+- [Schema Assertions](/docs/managed-datahub/observe/schema-assertions.md)
+- [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions.md)
+
+## Why Would You Use Assertions APIs?
+
+The Assertions APIs allow you to create, schedule, run, and delete Assertions with Acryl Cloud.
+
+### Goal Of This Guide
+
+This guide will show you how to create, schedule, run and delete Assertions for a Table.
+
+## Prerequisites
+
+The actor making API calls must have the `Edit Assertions` and `Edit Monitors` privileges for the Tables at hand.
+
+## Create Assertions
+
+You can create new dataset Assertions to DataHub using the following APIs.
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+### Freshness Assertion
+
+To create a new freshness assertion, use the `upsertDatasetFreshnessAssertionMonitor` GraphQL Mutation.
+
+```graphql
+mutation upsertDatasetFreshnessAssertionMonitor {
+  upsertDatasetFreshnessAssertionMonitor(
+      input: {
+        entityUrn: "<urn of entity being monitored>",
+        schedule: {
+          type: FIXED_INTERVAL,
+          fixedInterval: { unit: HOUR, multiple: 8 }
+        }
+        evaluationSchedule: {
+          timezone: "America/Los_Angeles",
+          cron: "0 */8 * * *"
+        }
+        evaluationParameters: {
+          sourceType: INFORMATION_SCHEMA
+        }
+        mode: ACTIVE
+      }
+  ) {
+      urn
+    }
+}
+```
+
+This API will return a unique identifier (URN) for the new assertion if you were successful:
+
+```json
+{
+  "data": {
+    "upsertDatasetFreshnessAssertionMonitor": {
+      "urn": "urn:li:assertion:your-new-assertion-id"
+    }
+  },
+  "extensions": {}
+}
+```
+
+For more details, see the [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md) guide.
+
+### Volume Assertions
+
+To create a new volume assertion, use the `upsertDatasetVolumeAssertionMonitor` GraphQL Mutation.
+
+```graphql
+mutation upsertDatasetVolumeAssertionMonitor {
+  upsertDatasetVolumeAssertionMonitor(
+    input: {
+      entityUrn: "<urn of entity being monitored>"
+      type: ROW_COUNT_TOTAL
+      rowCountTotal: {
+        operator: BETWEEN
+        parameters: {
+          minValue: {
+            value: "10"
+            type: NUMBER
+          }
+          maxValue: {
+            value: "20"
+            type: NUMBER
+          }
+        }
+      }
+      evaluationSchedule: {
+        timezone: "America/Los_Angeles"
+        cron: "0 */8 * * *"
+      }
+      evaluationParameters: {
+        sourceType: INFORMATION_SCHEMA
+      }
+      mode: ACTIVE
+    }
+  ) {
+    urn
+  }
+}
+```
+
+This API will return a unique identifier (URN) for the new assertion if you were successful:
+
+```json
+{
+  "data": {
+    "upsertDatasetVolumeAssertionMonitor": {
+      "urn": "urn:li:assertion:your-new-assertion-id"
+    }
+  },
+  "extensions": {}
+}
+```
+
+For more details, see the [Volume Assertions](/docs/managed-datahub/observe/volume-assertions.md) guide.
+
+### Column Assertions
+
+To create a new column assertion, use the `upsertDatasetFieldAssertionMonitor` GraphQL Mutation.
+
+```graphql
+mutation upsertDatasetFieldAssertionMonitor {
+  upsertDatasetFieldAssertionMonitor(
+    input: {
+      entityUrn: "<urn of entity being monitored>"
+      type: FIELD_VALUES,
+      fieldValuesAssertion: {
+        field: {
+          path: "<name of the column to be monitored>",
+          type: "NUMBER",
+          nativeType: "NUMBER(38,0)"
+        },
+        operator: GREATER_THAN,
+        parameters: {
+          value: {
+            type: NUMBER,
+            value: "10"
+          }
+        },
+        failThreshold: {
+          type: COUNT,
+          value: 0
+        },
+        excludeNulls: true
+      }
+      evaluationSchedule: {
+        timezone: "America/Los_Angeles"
+        cron: "0 */8 * * *"
+      }
+      evaluationParameters: {
+        sourceType: ALL_ROWS_QUERY
+      }
+      mode: ACTIVE
+    }
+  ){
+    urn
+  }
+}
+```
+
+This API will return a unique identifier (URN) for the new assertion if you were successful:
+
+```json
+{
+  "data": {
+    "upsertDatasetFieldAssertionMonitor": {
+      "urn": "urn:li:assertion:your-new-assertion-id"
+    }
+  },
+  "extensions": {}
+}
+```
+
+For more details, see the [Column Assertions](/docs/managed-datahub/observe/column-assertions.md) guide.
+
+### Custom SQL Assertions
+
+To create a new column assertion, use the `upsertDatasetSqlAssertionMonitor` GraphQL Mutation.
+
+```graphql
+mutation upsertDatasetSqlAssertionMonitor {
+  upsertDatasetSqlAssertionMonitor(
+    assertionUrn: "<urn of assertion created in earlier query>"
+    input: {
+      entityUrn: "<urn of entity being monitored>"
+      type: METRIC,
+      description: "<description of the custom assertion>",
+      statement: "<SQL query to be evaluated>",
+      operator: GREATER_THAN_OR_EQUAL_TO,
+      parameters: {
+        value: {
+          value: "100",
+          type: NUMBER
+        }
+      }
+      evaluationSchedule: {
+        timezone: "America/Los_Angeles"
+        cron: "0 */6 * * *"
+      }
+      mode: ACTIVE   
+    }
+  ) {
+    urn
+  }
+}
+```
+
+This API will return a unique identifier (URN) for the new assertion if you were successful:
+
+```json
+{
+  "data": {
+    "upsertDatasetSqlAssertionMonitor": {
+      "urn": "urn:li:assertion:your-new-assertion-id"
+    }
+  },
+  "extensions": {}
+}
+```
+
+For more details, see the [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions.md) guide.
+
+### Schema Assertions
+
+To create a new schema assertion, use the `upsertDatasetSchemaAssertionMonitor` GraphQL Mutation.
+
+```graphql
+mutation upsertDatasetSchemaAssertionMonitor {
+    upsertDatasetSchemaAssertionMonitor(
+        assertionUrn: "urn:li:assertion:existing-assertion-id",
+        input: {
+            entityUrn: "<urn of the table to be monitored>",
+            assertion: {
+                compatibility: EXACT_MATCH,
+                fields: [
+                    {
+                        path: "id",
+                        type: STRING
+                    },
+                    {
+                        path: "count",
+                        type: NUMBER
+                    },
+                    {
+                        path: "struct",
+                        type: STRUCT
+                    },
+                    {
+                        path: "struct.nestedBooleanField",
+                        type: BOOLEAN
+                    }
+                ]
+            },
+            description: "<description of the schema assertion>",
+            mode: ACTIVE
+        }
+    )
+}
+```
+
+This API will return a unique identifier (URN) for the new assertion if you were successful:
+
+```json
+{
+  "data": {
+    "upsertDatasetSchemaAssertionMonitor": {
+      "urn": "urn:li:assertion:your-new-assertion-id"
+    }
+  },
+  "extensions": {}
+}
+```
+
+For more details, see the [Schema Assertions](/docs/managed-datahub/observe/schema-assertions.md) guide.
+
+
+</TabItem>
+</Tabs>
+
+## Run Assertions
+
+You can use the following APIs to trigger the assertions you've created to run on-demand. This is
+particularly useful for running assertions on a custom schedule, for example from your production
+data pipelines.
+
+> **Long-Running Assertions**: The timeout for synchronously running an assertion is currently limited to a maximum of 30 seconds. 
+> Each of the following APIs support an `async` parameter, which can be set to `true` to run the assertion asynchronously.
+> When set to `true`, the API will kick off the assertion run and return null immediately. To view the result of the assertion,
+> simply fetching the runEvents field of the `assertion(urn: String!)` GraphQL query. 
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+### Run Assertion
+
+```graphql
+mutation runAssertion {
+    runAssertion(urn: "urn:li:assertion:your-assertion-id", saveResult: true) {
+        type 
+        nativeResults {
+            key
+            value
+        }
+    }
+}
+```
+
+Where **type** will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`.
+
+The `saveResult` argument determines whether the result of the assertion will be saved to DataHub's backend,
+and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's
+backend. The value defaults to `true`.
+
+If the assertion is external (not natively executed by Acryl), this API will return an error.
+
+If running the assertion is successful, the result will be returned as follows:
+
+```json
+{
+  "data": {
+    "runAssertion": {
+        "type": "SUCCESS",
+        "nativeResults": [
+          {
+            "key": "Value",
+            "value": "1382"
+          }
+        ]
+    }
+  },
+  "extensions": {}
+}
+```
+
+### Run Group of Assertions
+
+```graphql
+mutation runAssertions {
+    runAssertions(urns: ["urn:li:assertion:your-assertion-id-1", "urn:li:assertion:your-assertion-id-2"], saveResults: true) {
+        passingCount
+        failingCount
+        errorCount
+        results {
+            urn
+            result {
+                type
+                nativeResults {
+                    key
+                    value
+                }
+            }
+        }
+    }
+}
+```
+
+Where **type** will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`.
+
+The `saveResults` argument determines whether the result of the assertion will be saved to DataHub's backend,
+and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's
+backend. The value defaults to `true`.
+
+If any of the assertion are external (not natively executed by Acryl), they will simply be omitted from the result set.
+
+If running the assertions is successful, the results will be returned as follows:
+
+```json
+{
+  "data": {
+    "runAssertions": {
+      "passingCount": 2,
+      "failingCount": 0,
+      "errorCount": 0,
+      "results": [
+        {
+          "urn": "urn:li:assertion:your-assertion-id-1",
+          "result": {
+            "type": "SUCCESS",
+            "nativeResults": [
+              {
+                "key": "Value",
+                "value": "1382"
+              }
+            ]
+          }
+        },
+        {
+          "urn": "urn:li:assertion:your-assertion-id-2",
+          "result": {
+            "type": "FAILURE",
+            "nativeResults": [
+              {
+                "key": "Value",
+                "value": "12323"
+              }
+            ]
+          }
+        }
+      ]
+    }
+  },
+  "extensions": {}
+}
+```
+
+Where you should see one result object for each assertion.
+
+### Run All Assertions for Table
+
+You can also run all assertions for a specific data asset using the `runAssertionsForAsset` mutation.
+
+```graphql
+mutation runAssertionsForAsset {
+    runAssertionsForAsset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchase_events,PROD)", saveResults: true) {
+        passingCount
+        failingCount
+        errorCount
+        results {
+            urn
+            result {
+                type
+                nativeResults {
+                    key
+                    value
+                }
+            }
+        }
+    }
+}
+```
+
+Where `type` will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`.
+
+The `saveResults` argument determines whether the result of the assertion will be saved to DataHub's backend,
+and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's
+backend. The value defaults to `true`.
+
+If any of the assertion are external (not natively executed by Acryl), they will simply be omitted from the result
+set.
+
+If running the assertions is successful, the results will be returned as follows:
+
+```json
+{
+  "data": {
+    "runAssertionsForAsset": {
+      "passingCount": 2,
+      "failingCount": 0,
+      "errorCount": 0,
+      "results": [
+        {
+          "urn": "urn:li:assertion:your-assertion-id-1",
+          "result": {
+            "type": "SUCCESS",
+            "nativeResults": [
+              {
+                "key": "Value",
+                "value": "1382"
+              }
+            ]
+          }
+        },
+        {
+          "urn": "urn:li:assertion:your-assertion-id-2",
+          "result": {
+            "type": "FAILURE",
+            "nativeResults": [
+              {
+                "key": "Value",
+                "value": "12323"
+              }
+            ]
+          }
+        }
+      ]
+    }
+  },
+  "extensions": {}
+}
+```
+
+Where you should see one result object for each assertion.
+
+### Run Group of Assertions for Table
+
+If you don't always want to run _all_ assertions for a given table, you can also opt to run a subset of the 
+table's assertions using *Assertion Tags*. First, you'll add tags to your assertions to group and categorize them,
+then you'll call the `runAssertionsForAsset` mutation with the `tagUrns` argument to filter for assertions having those tags.
+
+#### Step 1: Adding Tag to an Assertion
+
+Currently, you can add tags to an assertion only via the DataHub GraphQL API. You can do this using the following mutation:
+
+```graphql
+mutation addTags {
+    addTag(input: {
+        resourceUrn: "urn:li:assertion:your-assertion",
+        tagUrn: "urn:li:tag:my-important-tag",
+    })
+}
+```
+
+#### Step 2: Run All Assertions for a Table with Tags
+
+Now, you can run all assertions for a table with a specific tag(s) using the `runAssertionsForAsset` mutation with the 
+`tagUrns` input parameter:
+
+```graphql
+mutation runAssertionsForAsset {
+    runAssertionsForAsset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchase_events,PROD)", tagUrns: ["urn:li:tag:my-important-tag"]) {
+        passingCount
+        failingCount
+        errorCount
+        results {
+            urn
+            result {
+                type
+                nativeResults {
+                    key
+                    value
+                }
+            }
+        }
+    }
+}
+```
+
+**Coming Soon**: Support for adding tags to assertions through the DataHub UI.
+
+</TabItem>
+
+<TabItem value="python" label="Python">
+
+### Run Assertion
+
+```python
+{{ inline /metadata-ingestion/examples/library/run_assertion.py show_path_as_comment }}
+```
+
+### Run Group of Assertions
+
+```python
+{{ inline /metadata-ingestion/examples/library/run_assertions.py show_path_as_comment }}
+```
+
+### Run All Assertions for Table
+
+```python
+{{ inline /metadata-ingestion/examples/library/run_assertions_for_asset.py show_path_as_comment }}
+```
+
+</TabItem>
+
+</Tabs>
+
+### Experimental: Providing Dynamic Parameters to Assertions
+
+You can provide **dynamic parameters** to your assertions to customize their behavior. This is particularly useful for
+assertions that require dynamic parameters, such as a threshold value that changes based on the time of day.
+
+Dynamic parameters can be injected into the SQL fragment portion of any Assertion. For example, it can appear
+in any part of the SQL statement in a [Custom SQL](/docs/managed-datahub/observe/custom-sql-assertions.md) Assertion, 
+or it can appear in the **Advanced > Filter** section of a [Column](/docs/managed-datahub/observe/column-assertions.md),
+[Volume](/docs/managed-datahub/observe/volume-assertions.md), or [Freshness](/docs/managed-datahub/observe/freshness-assertions.md) Assertion.
+
+To do so, you'll first need to edit the SQL fragment to include the dynamic parameter. Dynamic parameters appear
+as `${parameterName}` in the SQL fragment.
+
+Next, you'll call the `runAssertion`, `runAssertions`, or `runAssertionsForAsset` mutations with the `parameters` input argument.
+This argument is a list of key-value tuples, where the key is the parameter name and the value is the parameter value:
+
+```graphql
+mutation runAssertion {
+    runAssertion(urn: "urn:li:assertion:your-assertion-id", parameters: [{key: "parameterName", value: "parameterValue"}]) {
+        type 
+        nativeResults {
+            key
+            value
+        }
+    }
+}
+```
+
+At runtime, the `${parameterName}` placeholder in the SQL fragment will be replaced with the provided `parameterValue` before the query
+is sent to the database for execution.
+
+## Get Assertion Details
+
+You can use the following APIs to 
+
+1. Fetch existing assertion definitions + run history
+2. Fetch the assertions associated with a given table + their run history. 
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+### Get Assertions for Table
+
+To retrieve all the assertions for a table, you can use the following GraphQL Query. 
+
+```graphql
+query dataset {
+    dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchases,PROD)") {
+        assertions(start: 0, count: 1000) {
+            start
+            count
+            total
+            assertions {
+                # Fetch the last run of each associated assertion. 
+                runEvents(status: COMPLETE, limit: 1) {
+                    total
+                    failed
+                    succeeded
+                    runEvents {
+                        timestampMillis
+                        status
+                        result {
+                            type
+                            nativeResults {
+                                key
+                                value
+                            }
+                        }
+                    }
+                }
+                info {
+                    type
+                    description
+                    lastUpdated {
+                        time
+                        actor
+                    }
+                    datasetAssertion {
+                        datasetUrn
+                        scope
+                        aggregation
+                        operator
+                        parameters {
+                            value {
+                                value
+                                type
+                            }
+                            minValue {
+                                value
+                                type
+                            }
+                            maxValue {
+                                value
+                                type
+                            }
+                        }
+                        fields {
+                            urn
+                            path
+                        }
+                        nativeType
+                        nativeParameters {
+                            key
+                            value
+                        }
+                        logic
+                    }
+                    freshnessAssertion {
+                        type
+                        entityUrn
+                        schedule {
+                            type
+                            cron {
+                                cron
+                                timezone
+                            }
+                            fixedInterval {
+                                unit
+                                multiple
+                            }
+                        }
+                        filter {
+                            type
+                            sql
+                        }
+                    }
+                    sqlAssertion {
+                        type
+                        entityUrn
+                        statement
+                        changeType
+                        operator
+                        parameters {
+                            value {
+                                value
+                                type
+                            }
+                            minValue {
+                                value
+                                type
+                            }
+                            maxValue {
+                                value
+                                type
+                            }
+                        }
+                    }
+                    fieldAssertion {
+                        type
+                        entityUrn
+                        filter {
+                            type
+                            sql
+                        }
+                        fieldValuesAssertion {
+                            field {
+                                path
+                                type
+                                nativeType
+                            }
+                            transform {
+                                type
+                            }
+                            operator
+                            parameters {
+                                value {
+                                    value
+                                    type
+                                }
+                                minValue {
+                                    value
+                                    type
+                                }
+                                maxValue {
+                                    value
+                                    type
+                                }
+                            }
+                            failThreshold {
+                                type
+                                value
+                            }
+                            excludeNulls
+                        }
+                        fieldMetricAssertion {
+                            field {
+                                path
+                                type
+                                nativeType
+                            }
+                            metric
+                            operator
+                            parameters {
+                                value {
+                                    value
+                                    type
+                                }
+                                minValue {
+                                    value
+                                    type
+                                }
+                                maxValue {
+                                    value
+                                    type
+                                }
+                            }
+                        }
+                    }
+                    volumeAssertion {
+                        type
+                        entityUrn
+                        filter {
+                            type
+                            sql
+                        }
+                        rowCountTotal {
+                            operator
+                            parameters {
+                                value {
+                                    value
+                                    type
+                                }
+                                minValue {
+                                    value
+                                    type
+                                }
+                                maxValue {
+                                    value
+                                    type
+                                }
+                            }
+                        }
+                        rowCountChange {
+                            type
+                            operator
+                            parameters {
+                                value {
+                                    value
+                                    type
+                                }
+                                minValue {
+                                    value
+                                    type
+                                }
+                                maxValue {
+                                    value
+                                    type
+                                }
+                            }
+                        }
+                    }
+                    schemaAssertion {
+                        entityUrn
+                        compatibility
+                        fields {
+                            path
+                            type
+                            nativeType
+                        }
+                        schema {
+                            fields {
+                                fieldPath
+                                type
+                                nativeDataType
+                            }
+                        }
+                    }
+                    source {
+                        type
+                        created {
+                            time
+                            actor
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+```
+
+### Get Assertion Details
+
+You can use the following GraphQL query to fetch the details for an assertion along with its evaluation history by URN.
+
+```graphql
+query getAssertion {
+    assertion(urn: "urn:li:assertion:assertion-id") {
+        # Fetch the last 10 runs for the assertion. 
+        runEvents(status: COMPLETE, limit: 10) {
+            total
+            failed
+            succeeded
+            runEvents {
+                timestampMillis
+                status
+                result {
+                    type
+                    nativeResults {
+                        key
+                        value
+                    }
+                }
+            }
+        }
+        info {
+            type
+            description
+            lastUpdated {
+                time
+                actor
+            }
+            datasetAssertion {
+                datasetUrn
+                scope
+                aggregation
+                operator
+                parameters {
+                    value {
+                        value
+                        type
+                    }
+                    minValue {
+                        value
+                        type
+                    }
+                    maxValue {
+                        value
+                        type
+                    }
+                }
+                fields {
+                    urn
+                    path
+                }
+                nativeType
+                nativeParameters {
+                    key
+                    value
+                }
+                logic
+            }
+            freshnessAssertion {
+                type
+                entityUrn
+                schedule {
+                    type
+                    cron {
+                        cron
+                        timezone
+                    }
+                    fixedInterval {
+                        unit
+                        multiple
+                    }
+                }
+                filter {
+                    type
+                    sql
+                }
+            }
+            sqlAssertion {
+                type
+                entityUrn
+                statement
+                changeType
+                operator
+                parameters {
+                    value {
+                        value
+                        type
+                    }
+                    minValue {
+                        value
+                        type
+                    }
+                    maxValue {
+                        value
+                        type
+                    }
+                }
+            }
+            fieldAssertion {
+                type
+                entityUrn
+                filter {
+                    type
+                    sql
+                }
+                fieldValuesAssertion {
+                    field {
+                        path
+                        type
+                        nativeType
+                    }
+                    transform {
+                        type
+                    }
+                    operator
+                    parameters {
+                        value {
+                            value
+                            type
+                        }
+                        minValue {
+                            value
+                            type
+                        }
+                        maxValue {
+                            value
+                            type
+                        }
+                    }
+                    failThreshold {
+                        type
+                        value
+                    }
+                    excludeNulls
+                }
+                fieldMetricAssertion {
+                    field {
+                        path
+                        type
+                        nativeType
+                    }
+                    metric
+                    operator
+                    parameters {
+                        value {
+                            value
+                            type
+                        }
+                        minValue {
+                            value
+                            type
+                        }
+                        maxValue {
+                            value
+                            type
+                        }
+                    }
+                }
+            }
+            volumeAssertion {
+                type
+                entityUrn
+                filter {
+                    type
+                    sql
+                }
+                rowCountTotal {
+                    operator
+                    parameters {
+                        value {
+                            value
+                            type
+                        }
+                        minValue {
+                            value
+                            type
+                        }
+                        maxValue {
+                            value
+                            type
+                        }
+                    }
+                }
+                rowCountChange {
+                    type
+                    operator
+                    parameters {
+                        value {
+                            value
+                            type
+                        }
+                        minValue {
+                            value
+                            type
+                        }
+                        maxValue {
+                            value
+                            type
+                        }
+                    }
+                }
+            }
+            schemaAssertion {
+                entityUrn
+                compatibility
+                fields {
+                    path
+                    type
+                    nativeType
+                }
+                schema {
+                    fields {
+                        fieldPath
+                        type
+                        nativeDataType
+                    }
+                }
+            }
+            source {
+                type
+                created {
+                    time
+                    actor
+                }
+            }
+        }
+    }
+}
+```
+
+</TabItem>
+
+<TabItem value="python" label="Python">
+
+```python
+Python support coming soon!
+```
+
+</TabItem>
+</Tabs>
+
+## Add Tag to Assertion
+
+You can add tags to individual assertions to group and categorize them, for example by its priority or severity.
+Note that the tag should already exist in DataHub, or the operation will fail. 
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+```graphql
+mutation addTags {
+    addTag(input: {
+        resourceUrn: "urn:li:assertion:your-assertion",
+        tagUrn: "urn:li:tag:my-important-tag",
+    })
+}
+```
+
+If you see the following response, the operation was successful:
+
+```json
+{
+  "data": {
+    "addTag": true
+  },
+  "extensions": {}
+}
+```
+
+You can create new tags using the `createTag` mutation or via the UI. 
+
+</TabItem>
+</Tabs>
+
+## Delete Assertions
+
+You can use delete dataset operations to DataHub using the following APIs.
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+```graphql
+mutation deleteAssertion {
+    deleteAssertion(urn: "urn:li:assertion:test")
+}
+```
+
+If you see the following response, the operation was successful:
+
+```json
+{
+  "data": {
+    "deleteAssertion": true
+  },
+  "extensions": {}
+}
+```
+
+</TabItem>
+
+<TabItem value="python" label="Python">
+
+```python
+{{ inline /metadata-ingestion/examples/library/delete_assertion.py show_path_as_comment }}
+```
+
+</TabItem>
+</Tabs>
+
+## (Advanced) Create and Report Results for Custom Assertions
+
+If you'd like to create and report results for your own custom assertions, e.g. those which are run and 
+evaluated outside of Acryl, you need to generate 2 important Assertion Entity aspects, and give the assertion a unique
+URN of the following format:
+
+
+1. Generate a unique URN for your assertion
+
+```plaintext
+urn:li:assertion:<unique-assertion-id>
+```
+
+2. Generate the [**AssertionInfo**](/docs/generated/metamodel/entities/assertion.md#assertion-info) aspect for the assertion. You can do this using the Python SDK. Give your assertion a `type` and a `source`
+with type `EXTERNAL` to mark it as an external assertion, not run by DataHub itself.
+
+3. Generate the [**AssertionRunEvent**](/docs/generated/metamodel/entities/assertion.md#assertionrunevent-timeseries) timeseries aspect using the Python SDK. This aspect should contain the result of the assertion 
+run at a given timestamp and will be shown on the results graph in DataHub's UI. 
+
diff --git a/docs/api/tutorials/data-contracts.md b/docs/api/tutorials/data-contracts.md
new file mode 100644
index 0000000000000..ac19920a5c4b7
--- /dev/null
+++ b/docs/api/tutorials/data-contracts.md
@@ -0,0 +1,217 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Data Contracts
+
+<FeatureAvailability saasOnly />
+
+This guide specifically covers how to use the Data Contract APIs with **Acryl Cloud**.
+
+## Why Would You Use Data Contract APIs?
+
+The Assertions APIs allow you to create, update, and evaluate Data Contracts programmatically. This is particularly
+useful to automate the monitoring of data quality and schema compliance for your data.
+
+### Goal Of This Guide
+
+This guide will show you how to create, update, and check the status of aData Contract.
+
+## Prerequisites
+
+### Privileges Required
+
+The actor making API calls must have the `Edit Data Contract` privileges for the Tables at hand.
+
+### Assertions
+
+Before creating a Data Contract, you should have already created the Assertions that you want to associate with the Data Contract.
+Check out the [Assertions](/docs/api/tutorials/assertions.md) guide for details on how to create DataHub Assertions.
+
+## Create & Update Data Contract
+
+You can create a new Data Contract, which is simply bundle of "important" assertions, using the following APIs.
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+To create or update a Data Contract, simply use the `upsertDataContract` GraphQL Mutation. 
+
+```graphql
+mutation upsertDataContract {
+    upsertDataContract(
+      input: {
+        entityUrn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchases,PROD)", # Table to Create Contract for 
+        freshness: [
+            {
+                assertionUrn: "urn:li:assertion:your-freshness-assertion-id",
+            }
+        ],
+        schema: [
+            {
+                assertionUrn: "urn:li:assertion:your-schema-assertion-id",
+            }
+        ],
+        dataQuality: [
+            {
+                assertionUrn: "urn:li:assertion:your-column-assertion-id-1",
+            },
+            {
+                assertionUrn: "urn:li:assertion:your-column-assertion-id-2",
+            }
+        ]
+      }) {
+        urn
+      }
+  )
+}
+```
+
+This API will return a unique identifier (URN) for the Data Contract if you were successful:
+
+```json
+{
+  "data": {
+    "upsertDataContract": {
+      "urn": "urn:li:dataContract:your-new-contract-id"
+    }
+  },
+  "extensions": {}
+}
+```
+
+If you want to update an existing Data Contract, you can use the same API, but also passing the `urn` parameter in the
+`upsertDataContract` mutation.
+
+```graphql
+mutation upsertDataContract {
+    upsertDataContract(
+      urn: "urn:li:dataContract:your-existing-contract-id",
+      input: {
+        freshness: [
+            {
+                assertionUrn: "urn:li:assertion:your-freshness-assertion-id",
+            }
+        ],
+        schema: [
+            {
+                assertionUrn: "urn:li:assertion:your-schema-assertion-id",
+            }
+        ],
+        dataQuality: [
+            {
+                assertionUrn: "urn:li:assertion:your-column-assertion-id-1",
+            },
+            {
+                assertionUrn: "urn:li:assertion:your-column-assertion-id-2",
+            }
+        ]
+      }) {
+        urn
+      }
+  )
+}
+```
+
+</TabItem>
+</Tabs>
+
+## Check Contract Status
+
+You can use the following APIs to check whether a Data Contract is passing or failing, which is determined
+by the last status of the assertions associated with the contract.
+
+<Tabs>
+
+<TabItem value="graphql" label="GraphQL" default>
+
+### Check Contract Status for Table
+
+```graphql
+query getTableContractStatus {
+    dataset(urn: "urn:li:dataset(urn:li:dataPlatform:snowflake,purchases,PROD") {
+        contract {
+           result {
+              type # Passing or Failing.
+              assertionResults { # Results of each contract assertion. 
+                  assertion {
+                     urn
+                  }
+                  result {
+                      type
+                      nativeResults {
+                          key
+                          value
+                      }
+                  }
+              }
+           }
+        }
+    }
+}
+```
+
+You can also _force refresh_ all of the Contract Assertions by evaluating them on-demand by providing the `refresh` argument
+in your query. 
+
+```graphql
+query getTableContractStatus {
+    dataset(urn: "urn:li:dataset(urn:li:dataPlatform:snowflake,purchases,PROD") {
+        contract(refresh: true) {
+           ... same
+        }
+    }
+}
+```
+
+This will run any native Acryl assertions comprising the Data Contract. Be careful! This can take a while depending on how many native assertions are part of the contract.
+
+If you're successful, you'll get the latest status for the Table Contract: 
+
+```json
+{
+  "data": {
+    "dataset": {
+       "contract": {
+           "result": {
+              "type": "PASSING",
+              "assertionResults": [
+                  {
+                      "assertion": {
+                         "urn": "urn:li:assertion:your-freshness-assertion-id"
+                      },
+                      "result": {
+                          "type": "SUCCESS",
+                          "nativeResults": [
+                              {
+                                  "key": "Value",
+                                  "value": "1382"
+                              }
+                          ]
+                      }
+                  },
+                  {
+                     "assertion": {
+                        "urn": "urn:li:assertion:your-volume-assertion-id"
+                     },
+                      "result": {
+                          "type": "SUCCESS",
+                          "nativeResults": [
+                              {
+                                  "key": "Value",
+                                  "value": "12323"
+                              }
+                          ]
+                      }
+                  }
+              ]
+           }
+        }
+    }
+  },
+  "extensions": {}
+}
+```
+</TabItem>
+
+</Tabs>
+
diff --git a/docs/api/tutorials/incidents.md b/docs/api/tutorials/incidents.md
new file mode 100644
index 0000000000000..20a24d58a1db4
--- /dev/null
+++ b/docs/api/tutorials/incidents.md
@@ -0,0 +1,164 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Incidents
+
+## Why Would You Use Incidents APIs?
+
+The Incidents APIs allow you to raise, retrieve, update and resolve data incidents via API. This is 
+useful for raising or resolving data incidents programmatically, for example from Airflow, Prefect, or Dagster DAGs.
+Incidents are also useful for conditional Circuit Breaking in these pipelines. 
+
+### Goal Of This Guide
+
+This guide will show you how to raise, retrieve, update and resolve data incidents via API.
+
+## Prerequisites
+
+The actor making API calls must have the `Edit Incidents` privileges for the Tables at hand.
+
+## Raise Incident
+
+You can raise a new Data Incident for an existing asset using the following APIs. 
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+```graphql
+mutation raiseIncident {
+  raiseIncident(
+      input: { 
+          resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,public.prod.purchases,PROD)",
+          type: OPERATIONAL,
+          title: "Data is Delayed",
+          description: "Data is delayed on May 15, 2024 because of downtime in the Spark Cluster.",
+      }
+  )
+}
+```
+
+Where `resourceUrn` is the unique identifier for the data asset (dataset, dashboard, chart, data job, or data flow) you want to raise the incident on. 
+
+Where supported Incident Types include
+
+- `OPERATIONAL`
+- `FRESHNESS`
+- `VOLUME`
+- `COLUMN`
+- `SQL`
+- `DATA_SCHEMA`
+- `CUSTOM`
+
+If you see the following response, a unique identifier for the new incident will be returned. 
+
+```json
+{
+  "data": {
+    "raiseIncident": "urn:li:incident:new-incident-id"
+  },
+  "extensions": {}
+}
+```
+
+</TabItem>
+
+<TabItem value="python" label="Python">
+
+```
+Python SDK support coming soon!
+```
+
+</TabItem>
+
+</Tabs>
+
+## Get Incidents For Data Asset
+
+You can use retrieve the incidents and their statuses for a given Data Asset using the following APIs. 
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+```graphql
+query getAssetIncidents {
+    dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,public.prod.purchases,PROD)") {
+        incidents(
+            state: ACTIVE, start: 0, count: 20
+        ) {
+            start
+            count
+            total
+            incidents {
+                urn
+                incidentType
+                title
+                description
+                status {
+                    state
+                    lastUpdated {
+                        time
+                        actor
+                    }
+                }
+            }
+        }
+    }
+}
+```
+
+Where you can filter for active incidents by passing the `ACTIVE` state and resolved incidents by passing the `RESOLVED` state.
+This will return all relevant incidents for the dataset.
+
+</TabItem>
+
+<TabItem value="python" label="Python">
+
+```
+Python SDK support coming soon!
+```
+
+</TabItem>
+</Tabs>
+
+
+## Resolve Incidents
+
+You can update the status of an incident using the following APIs. 
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+```graphql
+mutation updateIncidentStatus {
+    updateIncidentStatus(
+        input: { 
+            state: RESOLVED,
+            message: "The delayed data issue was resolved at 4:55pm on May 15."
+        }
+    )
+}
+```
+
+You can also reopen an incident by updating the state from `RESOLVED` to `ACTIVE`.
+
+If you see the following response, the operation was successful:
+
+```json
+{
+  "data": {
+    "updateIncidentStatus": true
+  },
+  "extensions": {}
+}
+```
+
+</TabItem>
+
+<TabItem value="python" label="Python">
+
+```
+Python SDK support coming soon!
+```
+
+</TabItem>
+</Tabs>
\ No newline at end of file
diff --git a/docs/api/tutorials/operations.md b/docs/api/tutorials/operations.md
new file mode 100644
index 0000000000000..70ede993ec95f
--- /dev/null
+++ b/docs/api/tutorials/operations.md
@@ -0,0 +1,136 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Operations
+
+## Why Would You Use Operations APIs?
+
+The Operations APIs allow you to report operational changes that were made to a given Dataset or Table using the 'Operation' concept.
+These operations may be viewed on the Dataset Profile (e.g. as last modified time), accessed via the DataHub GraphQL API, or 
+used to as inputs to Acryl Cloud [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md). 
+
+### Goal Of This Guide
+
+This guide will show you how to report and query Operations for a Dataset. 
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [DataHub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before reporting operations for a dataset, you need to ensure the targeted dataset is already present in DataHub.
+:::
+
+## Report Operations
+
+You can use report dataset operations to DataHub using the following APIs. 
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+```graphql
+mutation reportOperation {
+  reportOperation(
+      input: { 
+          urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", 
+          operationType: INSERT,
+          sourceType: DATA_PROCESS
+      }
+  )
+}
+```
+
+Where supported operation types include
+
+- `INSERT`
+- `UPDATE`
+- `DELETE`
+- `CREATE`
+- `ALTER`
+- `DROP`
+- `CUSTOM`
+
+If you want to report an operation that happened at a specific time, you can also optionally provide
+the `timestampMillis` field. If not provided, the current server time will be used as the operation time. 
+
+If you see the following response, the operation was successful:
+
+```json
+{
+  "data": {
+    "reportOperation": true
+  },
+  "extensions": {}
+}
+```
+
+</TabItem>
+
+<TabItem value="python" label="Python">
+
+```python
+{{ inline /metadata-ingestion/examples/library/dataset_report_operation.py show_path_as_comment }}
+```
+
+</TabItem>
+</Tabs>
+
+## Read Operations
+
+You can use read dataset operations to DataHub using the following APIs.
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+```graphql
+query dataset {
+    dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)") {
+        operations(
+            limit: 10, filter: [], startTimeMillis: <start-timestamp-ms>, endTimeMillis: <end-timestamp-ms>
+        ) {
+            timestampMillis
+            operationType
+            sourceType
+        }
+    }
+}
+```
+
+Where startTimeMillis and endTimeMillis are optional. By default, operations are sorted by time descending. 
+
+If you see the following response, the operation was successful:
+
+```json
+{
+  "data": {
+    "dataset": {
+        "operations": [
+            {
+                "timestampMillis": 1231232332,
+                "operationType": "INSERT",
+                "sourceType": "DATA_PROCESS"
+            }
+        ]
+    }
+  },
+  "extensions": {}
+}
+```
+
+</TabItem>
+
+<TabItem value="python" label="Python">
+
+```python
+{{ inline /metadata-ingestion/examples/library/dataset_read_operations.py show_path_as_comment }}
+```
+
+</TabItem>
+</Tabs>
+
+### Expected Outcomes of Reporting Operations
+
+Reported Operations will appear when displaying the Last Updated time for a Dataset on their DataHub Profile.
+They will also be used when selecting the `DataHub Operation` source type under the **Advanced** settings of a Freshness
+Assertion. 
\ No newline at end of file
diff --git a/metadata-ingestion/examples/library/dataset_read_operations.py b/metadata-ingestion/examples/library/dataset_read_operations.py
new file mode 100644
index 0000000000000..78c9a92141cef
--- /dev/null
+++ b/metadata-ingestion/examples/library/dataset_read_operations.py
@@ -0,0 +1,19 @@
+from datahub.api.graphql import Operation
+
+DATAHUB_HOST = "https//:org.acryl.io/gms"
+DATAHUB_TOKEN = "<your-datahub-access-token"
+
+dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"
+
+operation_client = Operation(
+    datahub_host=DATAHUB_HOST,
+    datahub_token=DATAHUB_TOKEN,
+)
+
+# Query for changes to the Dataset.
+operations = operation_client.query_operations(
+    urn=dataset_urn,
+    # limit=5,
+    # start_time_millis=<timestamp>,
+    # end_time_millis=<timestamo>
+)
diff --git a/metadata-ingestion/examples/library/dataset_report_operation.py b/metadata-ingestion/examples/library/dataset_report_operation.py
new file mode 100644
index 0000000000000..15ebc43dba60a
--- /dev/null
+++ b/metadata-ingestion/examples/library/dataset_report_operation.py
@@ -0,0 +1,19 @@
+from datahub.api.graphql import Operation
+
+DATAHUB_HOST = "https//:org.acryl.io/gms"
+DATAHUB_TOKEN = "<your-datahub-access-token"
+
+dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"
+
+operation_client = Operation(
+    datahub_host=DATAHUB_HOST,
+    datahub_token=DATAHUB_TOKEN,
+)
+
+operation_type = "INSERT"
+source_type = "DATA_PROCESS"  # Source of the operation (data platform or DAG task)
+
+# Report a change operation for the Dataset.
+operation_client.report_operation(
+    urn=dataset_urn, operation_type=operation_type, source_type=source_type
+)
diff --git a/metadata-ingestion/examples/library/delete_assertion.py b/metadata-ingestion/examples/library/delete_assertion.py
new file mode 100644
index 0000000000000..08df53ba1c3d3
--- /dev/null
+++ b/metadata-ingestion/examples/library/delete_assertion.py
@@ -0,0 +1,18 @@
+import logging
+
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+log = logging.getLogger(__name__)
+
+graph = DataHubGraph(
+    config=DatahubClientConfig(
+        server="http://localhost:8080",
+    )
+)
+
+assertion_urn = "urn:li:assertion:my-assertion"
+
+# Delete the Assertion
+graph.delete_entity(urn=assertion_urn, hard=True)
+
+log.info(f"Deleted assertion {assertion_urn}")
diff --git a/metadata-ingestion/examples/library/run_assertion.py b/metadata-ingestion/examples/library/run_assertion.py
new file mode 100644
index 0000000000000..414e5f46cc7f9
--- /dev/null
+++ b/metadata-ingestion/examples/library/run_assertion.py
@@ -0,0 +1,20 @@
+import logging
+
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+log = logging.getLogger(__name__)
+
+graph = DataHubGraph(
+    config=DatahubClientConfig(
+        server="http://localhost:8080",
+    )
+)
+
+assertion_urn = "urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a"
+
+# Run the assertion
+assertion_result = graph.run_assertion(urn=assertion_urn, save_result=True)
+
+log.info(
+    f'Assertion result (SUCCESS / FAILURE / ERROR): {assertion_result.get("type")}'
+)
diff --git a/metadata-ingestion/examples/library/run_assertions.py b/metadata-ingestion/examples/library/run_assertions.py
new file mode 100644
index 0000000000000..6d38d9b5edecd
--- /dev/null
+++ b/metadata-ingestion/examples/library/run_assertions.py
@@ -0,0 +1,37 @@
+import logging
+
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+log = logging.getLogger(__name__)
+
+graph = DataHubGraph(
+    config=DatahubClientConfig(
+        server="http://localhost:8080",
+    )
+)
+
+assertion_urns = [
+    "urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a",
+    "urn:li:assertion:9e3f9e09-1483-40f9-b9cd-30e5f182694g",
+]
+
+# Run the assertions
+assertion_results = graph.run_assertions(urns=assertion_urns, save_result=True).get(
+    "results"
+)
+
+if assertion_results is not None:
+    assertion_result_1 = assertion_results.get(
+        "urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a"
+    )
+    assertion_result_2 = assertion_results.get(
+        "urn:li:assertion:9e3f9e09-1483-40f9-b9cd-30e5f182694g"
+    )
+
+    log.info(f"Assertion results: {assertion_results}")
+    log.info(
+        f"Assertion result 1 (SUCCESS / FAILURE / ERROR): {assertion_result_1.get('type')}"
+    )
+    log.info(
+        f"Assertion result 2 (SUCCESS / FAILURE / ERROR): {assertion_result_2.get('type')}"
+    )
diff --git a/metadata-ingestion/examples/library/run_assertions_for_asset.py b/metadata-ingestion/examples/library/run_assertions_for_asset.py
new file mode 100644
index 0000000000000..ab2793c3b5b8a
--- /dev/null
+++ b/metadata-ingestion/examples/library/run_assertions_for_asset.py
@@ -0,0 +1,38 @@
+import logging
+
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+log = logging.getLogger(__name__)
+
+graph = DataHubGraph(
+    config=DatahubClientConfig(
+        server="http://localhost:8080",
+    )
+)
+
+dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_snowflake_table,PROD)"
+
+# Run all native assertions for the dataset
+assertion_results = graph.run_assertions_for_asset(urn=dataset_urn).get("results")
+
+if assertion_results is not None:
+    assertion_result_1 = assertion_results.get(
+        "urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a"
+    )
+    assertion_result_2 = assertion_results.get(
+        "urn:li:assertion:9e3f9e09-1483-40f9-b9cd-30e5f182694g"
+    )
+
+    log.info(f"Assertion results: {assertion_results}")
+    log.info(
+        f"Assertion result 1 (SUCCESS / FAILURE / ERROR): {assertion_result_1.get('type')}"
+    )
+    log.info(
+        f"Assertion result 2 (SUCCESS / FAILURE / ERROR): {assertion_result_2.get('type')}"
+    )
+
+# Run a subset of native assertions having a specific tag
+important_assertion_tag = "urn:li:tag:my-important-assertion-tag"
+assertion_results = graph.run_assertions_for_asset(
+    urn=dataset_urn, tag_urns=[important_assertion_tag]
+).get("results")

From 78ab7acab6a2f334571ac74f0ca5363008cc5223 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Thu, 6 Jun 2024 16:11:45 -0500
Subject: [PATCH 3/4] feat(ci): fix conditionals and consolidate change
 detection (#10649)

---
 .github/actions/ci-optimization/action.yml |  5 ++
 .github/workflows/docker-unified.yml       | 95 +++++++---------------
 2 files changed, 36 insertions(+), 64 deletions(-)

diff --git a/.github/actions/ci-optimization/action.yml b/.github/actions/ci-optimization/action.yml
index 0dcbdcecf34ad..2f677a0e552c2 100644
--- a/.github/actions/ci-optimization/action.yml
+++ b/.github/actions/ci-optimization/action.yml
@@ -17,6 +17,9 @@ outputs:
   ingestion-change:
     description: "Ingestion code has changed"
     value: ${{ steps.filter.outputs.ingestion == 'true' }}
+  ingestion-base-change:
+    description: "Ingestion base image docker image has changed"
+    value: ${{ steps.filter.outputs.ingestion-base == 'true' }}
   frontend-change:
     description: "Frontend code has changed"
     value: ${{ steps.filter.outputs.frontend == 'true' }}
@@ -56,6 +59,8 @@ runs:
             - "metadata-models/**"
             - "smoke-test/**"
             - "docker/datahub-ingestion**"
+          ingestion-base:
+            - "docker/datahub-ingestion-base/**"
           docker:
             - "docker/**"
           backend:
diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml
index fe50ad67c6492..53ecb762912b7 100644
--- a/.github/workflows/docker-unified.yml
+++ b/.github/workflows/docker-unified.yml
@@ -52,6 +52,7 @@ jobs:
       repository_name: ${{ steps.tag.outputs.repository_name }}
       frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' }}
       ingestion_change: ${{ steps.ci-optimize.outputs.ingestion-change == 'true' }}
+      ingestion_base_change: ${{ steps.ci-optimize.outputs.ingestion-base-change == 'true' }}
       backend_change: ${{ steps.ci-optimize.outputs.backend-change == 'true' }}
       frontend_only: ${{ steps.ci-optimize.outputs.frontend-only == 'true' }}
       ingestion_only: ${{ steps.ci-optimize.outputs.ingestion-only == 'true' }}
@@ -528,14 +529,8 @@ jobs:
     steps:
       - name: Check out the repo
         uses: acryldata/sane-checkout-action@v3
-      - uses: dorny/paths-filter@v2
-        id: filter
-        with:
-          filters: |
-            datahub-ingestion-base:
-              - 'docker/datahub-ingestion-base/**'
       - name: Build and push Base Image
-        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }}
+        if: ${{ needs.setup.outputs.ingestion_base_change == 'true' }}
         uses: ./.github/actions/docker-custom-build-and-push
         with:
           target: base
@@ -550,7 +545,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute DataHub Ingestion (Base) Tag
         id: tag
-        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
   datahub_ingestion_base_slim_build:
     name: Build and Push DataHub Ingestion (Base-Slim) Docker Image
     runs-on: ubuntu-latest
@@ -561,25 +556,19 @@ jobs:
     steps:
       - name: Check out the repo
         uses: acryldata/sane-checkout-action@v3
-      - uses: dorny/paths-filter@v2
-        id: filter
-        with:
-          filters: |
-            datahub-ingestion-base:
-              - 'docker/datahub-ingestion-base/**'
       - name: Download Base Image
         uses: ishworkh/docker-image-artifact-download@v1
-        if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }}
+        if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' && needs.setup.outputs.ingestion_base_change == 'true' }}
         with:
-          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
+          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}
       - name: Login to DockerHub
         uses: docker/login-action@v3
-        if: ${{ needs.setup.outputs.docker-login == 'true' && needs.setup.outputs.publish == 'false' && needs.setup.outputs.pr-publish == 'false' &&  steps.filter.outputs.datahub-ingestion-base == 'false' }}
+        if: ${{ needs.setup.outputs.docker-login == 'true' && needs.setup.outputs.publish == 'false' && needs.setup.outputs.pr-publish == 'false' && needs.setup.outputs.ingestion_base_change == 'false' }}
         with:
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
       - name: Build and push Base-Slim Image
-        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }}
+        if: ${{ needs.setup.outputs.ingestion_base_change == 'true' }}
         uses: ./.github/actions/docker-custom-build-and-push
         with:
           target: slim-install
@@ -590,14 +579,14 @@ jobs:
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
           build-args: |
             APP_ENV=slim
-            BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
+            BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}
           publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
           context: .
           file: ./docker/datahub-ingestion-base/Dockerfile
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute DataHub Ingestion (Base-Slim) Tag
         id: tag
-        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
   datahub_ingestion_base_full_build:
     name: Build and Push DataHub Ingestion (Base-Full) Docker Image
     runs-on: ubuntu-latest
@@ -608,25 +597,19 @@ jobs:
     steps:
       - name: Check out the repo
         uses: acryldata/sane-checkout-action@v3
-      - uses: dorny/paths-filter@v2
-        id: filter
-        with:
-          filters: |
-            datahub-ingestion-base:
-              - 'docker/datahub-ingestion-base/**'
       - name: Download Base Image
         uses: ishworkh/docker-image-artifact-download@v1
-        if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }}
+        if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' && needs.setup.outputs.ingestion_base_change == 'true' }}
         with:
-          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
+          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}
       - name: Login to DockerHub
         uses: docker/login-action@v3
-        if: ${{ needs.setup.outputs.docker-login == 'true' && needs.setup.outputs.publish == 'false' && needs.setup.outputs.pr-publish == 'false' &&  steps.filter.outputs.datahub-ingestion-base == 'false' }}
+        if: ${{ needs.setup.outputs.docker-login == 'true' && needs.setup.outputs.publish == 'false' && needs.setup.outputs.pr-publish == 'false' && needs.setup.outputs.ingestion_base_change == 'false' }}
         with:
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
       - name: Build and push (Base-Full) Image
-        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }}
+        if: ${{ needs.setup.outputs.ingestion_base_change == 'true' }}
         uses: ./.github/actions/docker-custom-build-and-push
         with:
           target: full-install
@@ -637,21 +620,21 @@ jobs:
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
           build-args: |
             APP_ENV=full
-            BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
+            BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}
           publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
           context: .
           file: ./docker/datahub-ingestion-base/Dockerfile
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute DataHub Ingestion (Base-Full) Tag
         id: tag
-        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
 
   datahub_ingestion_slim_build:
     name: Build and Push DataHub Ingestion Docker Images
     runs-on: ubuntu-latest
     outputs:
       tag: ${{ steps.tag.outputs.tag }}
-      needs_artifact_download: ${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && ( needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true') }}
+      needs_artifact_download: ${{ needs.setup.outputs.ingestion_change == 'true' && ( needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true') }}
     needs: [setup, datahub_ingestion_base_slim_build]
     if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }}
     steps:
@@ -663,30 +646,22 @@ jobs:
       - uses: gradle/gradle-build-action@v2
       - name: Check out the repo
         uses: acryldata/sane-checkout-action@v3
-      - uses: dorny/paths-filter@v2
-        id: filter
-        with:
-          filters: |
-            datahub-ingestion-base:
-              - 'docker/datahub-ingestion-base/**'
-            datahub-ingestion:
-              - 'docker/datahub-ingestion/**'
       - name: Build codegen
-        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
+        if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }}
         run: ./gradlew :metadata-ingestion:codegen
       - name: Download Base Image
         uses: ishworkh/docker-image-artifact-download@v1
-        if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }}
+        if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' && needs.setup.outputs.ingestion_base_change == 'true' }}
         with:
-          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}
+          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}
       - name: Login to DockerHub
         uses: docker/login-action@v3
-        if: ${{ needs.setup.outputs.docker-login == 'true' && needs.setup.outputs.publish == 'false' && needs.setup.outputs.pr-publish == 'false' &&  steps.filter.outputs.datahub-ingestion-base == 'false' }}
+        if: ${{ needs.setup.outputs.docker-login == 'true' && needs.setup.outputs.publish == 'false' && needs.setup.outputs.pr-publish == 'false' && needs.setup.outputs.ingestion_base_change == 'false' }}
         with:
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
       - name: Build and push Slim Image
-        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
+        if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }}
         uses: ./.github/actions/docker-custom-build-and-push
         with:
           target: final
@@ -694,7 +669,7 @@ jobs:
             ${{ env.DATAHUB_INGESTION_IMAGE }}
           build-args: |
             BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
-            DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}
+            DOCKER_VERSION=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}
             RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
             APP_ENV=slim
           tags: ${{ needs.setup.outputs.slim_tag }}
@@ -706,7 +681,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute Tag
         id: tag
-        run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
   datahub_ingestion_slim_scan:
     permissions:
       contents: read # for actions/checkout to fetch code
@@ -746,7 +721,7 @@ jobs:
     runs-on: ubuntu-latest
     outputs:
       tag: ${{ steps.tag.outputs.tag }}
-      needs_artifact_download: ${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && ( needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' ) }}
+      needs_artifact_download: ${{ needs.setup.outputs.ingestion_change == 'true' && ( needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' ) }}
     needs: [setup, datahub_ingestion_base_full_build]
     if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }}
     steps:
@@ -758,30 +733,22 @@ jobs:
       - uses: gradle/gradle-build-action@v2
       - name: Check out the repo
         uses: acryldata/sane-checkout-action@v3
-      - uses: dorny/paths-filter@v2
-        id: filter
-        with:
-          filters: |
-            datahub-ingestion-base:
-              - 'docker/datahub-ingestion-base/**'
-            datahub-ingestion:
-              - 'docker/datahub-ingestion/**'
       - name: Build codegen
-        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
+        if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }}
         run: ./gradlew :metadata-ingestion:codegen
       - name: Download Base Image
         uses: ishworkh/docker-image-artifact-download@v1
-        if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }}
+        if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' && needs.setup.outputs.ingestion_base_change == 'true' }}
         with:
-          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
+          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}
       - name: Login to DockerHub
         uses: docker/login-action@v3
-        if: ${{ needs.setup.outputs.docker-login == 'true' && needs.setup.outputs.publish == 'false' && needs.setup.outputs.pr-publish == 'false' &&  steps.filter.outputs.datahub-ingestion-base == 'false' }}
+        if: ${{ needs.setup.outputs.docker-login == 'true' && needs.setup.outputs.publish == 'false' && needs.setup.outputs.pr-publish == 'false' && needs.setup.outputs.ingestion_base_change == 'false' }}
         with:
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
       - name: Build and push Full Image
-        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
+        if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }}
         uses: ./.github/actions/docker-custom-build-and-push
         with:
           target: final
@@ -789,7 +756,7 @@ jobs:
             ${{ env.DATAHUB_INGESTION_IMAGE }}
           build-args: |
             BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
-            DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
+            DOCKER_VERSION=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}
             RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
           tags: ${{ needs.setup.outputs.tag }}
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
@@ -800,7 +767,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute Tag (Full)
         id: tag
-        run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
   datahub_ingestion_full_scan:
     permissions:
       contents: read # for actions/checkout to fetch code

From e62e4b18c0b18d382d879d0a1e594dc3111d194c Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 6 Jun 2024 15:54:32 -0700
Subject: [PATCH 4/4] fix(ingest/snowflake): avoid overfetching schemas from
 datahub (#10527)

---
 .../ingestion/source/snowflake/snowflake_v2.py    | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index 9bb6226b4947a..f155ac24fea3f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -254,7 +254,18 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config):
                 platform=self.platform,
                 platform_instance=self.config.platform_instance,
                 env=self.config.env,
-                graph=self.ctx.graph,
+                graph=(
+                    # If we're ingestion schema metadata for tables/views, then we will populate
+                    # schemas into the resolver as we go. We only need to do a bulk fetch
+                    # if we're not ingesting schema metadata as part of ingestion.
+                    self.ctx.graph
+                    if not (
+                        self.config.include_technical_schema
+                        and self.config.include_tables
+                        and self.config.include_views
+                    )
+                    else None
+                ),
                 generate_usage_statistics=False,
                 generate_operations=False,
                 format_queries=self.config.format_sql_queries,
@@ -1252,7 +1263,7 @@ def gen_schema_metadata(
             foreignKeys=foreign_keys,
         )
 
-        if self.aggregator and self.config.parse_view_ddl:
+        if self.aggregator:
             self.aggregator.register_schema(urn=dataset_urn, schema=schema_metadata)
 
         return schema_metadata