Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Sep 10, 2024
2 parents bf1e915 + fc92d23 commit 215a50d
Show file tree
Hide file tree
Showing 22 changed files with 1,008 additions and 145 deletions.
46 changes: 46 additions & 0 deletions datahub-frontend/app/controllers/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
import akka.util.ByteString;
import auth.Authenticator;
import com.datahub.authentication.AuthenticationConstants;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.linkedin.util.Pair;
import com.typesafe.config.Config;
import java.io.InputStream;
import java.net.URI;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand All @@ -33,6 +36,7 @@
import play.libs.ws.StandaloneWSClient;
import play.libs.ws.ahc.StandaloneAhcWSClient;
import play.mvc.Controller;
import play.mvc.Http.Cookie;
import play.mvc.Http;
import play.mvc.ResponseHeader;
import play.mvc.Result;
Expand Down Expand Up @@ -132,6 +136,9 @@ public CompletableFuture<Result> proxy(String path, Http.Request request)
headers.put(Http.HeaderNames.X_FORWARDED_PROTO, List.of(schema));
}

// Get the current time to measure the duration of the request
Instant start = Instant.now();

return _ws.url(
String.format(
"%s://%s:%s%s", protocol, metadataServiceHost, metadataServicePort, resolvedUri))
Expand Down Expand Up @@ -160,6 +167,15 @@ AuthenticationConstants.LEGACY_X_DATAHUB_ACTOR_HEADER, getDataHubActorHeader(req
.execute()
.thenApply(
apiResponse -> {
// Log the query if it takes longer than the configured threshold and verbose logging is enabled
boolean verboseGraphQLLogging = _config.getBoolean("graphql.verbose.logging");
int verboseGraphQLLongQueryMillis = _config.getInt("graphql.verbose.slowQueryMillis");
Instant finish = Instant.now();
long timeElapsed = Duration.between(start, finish).toMillis();
if (verboseGraphQLLogging && timeElapsed >= verboseGraphQLLongQueryMillis) {
logSlowQuery(request, resolvedUri, timeElapsed);
}

final ResponseHeader header =
new ResponseHeader(
apiResponse.getStatus(),
Expand Down Expand Up @@ -359,4 +375,34 @@ private String mapPath(@Nonnull final String path) {
// Otherwise, return original path
return path;
}


/**
* Called if verbose logging is enabled and request takes longer that the slow query milliseconds defined in the config
* @param request GraphQL request that was made
* @param resolvedUri URI that was requested
* @param duration How long the query took to complete
*/
private void logSlowQuery(Http.Request request, String resolvedUri, float duration) {
StringBuilder jsonBody = new StringBuilder();
Optional<Cookie> actorCookie = request.getCookie("actor");
String actorValue = actorCookie.isPresent() ? actorCookie.get().value() : "N/A";

try {
ObjectMapper mapper = new ObjectMapper();
JsonNode jsonNode = request.body().asJson();
((ObjectNode) jsonNode).remove("query");
jsonBody.append(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(jsonNode));
}
catch (Exception e) {
_logger.info("GraphQL Request Received: {}, Unable to parse JSON body", resolvedUri);
}
String jsonBodyStr = jsonBody.toString();
_logger.info("Slow GraphQL Request Received: {}, Request query string: {}, Request actor: {}, Request JSON: {}, Request completed in {} ms",
resolvedUri,
request.queryString(),
actorValue,
jsonBodyStr,
duration);
}
}
8 changes: 7 additions & 1 deletion datahub-frontend/conf/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -298,4 +298,10 @@ entityClient.numRetries = ${?ENTITY_CLIENT_NUM_RETRIES}
entityClient.restli.get.batchSize = 50
entityClient.restli.get.batchSize = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE}
entityClient.restli.get.batchConcurrency = 2
entityClient.restli.get.batchConcurrency = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY}
entityClient.restli.get.batchConcurrency = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY}

# Enable verbose authentication logging
graphql.verbose.logging = false
graphql.verbose.logging = ${?GRAPHQL_VERBOSE_LOGGING}
graphql.verbose.slowQueryMillis = 2500
graphql.verbose.slowQueryMillis = ${?GRAPHQL_VERBOSE_LONG_QUERY_MILLIS}
7 changes: 6 additions & 1 deletion docs/authentication/guides/sso/configure-oidc-react.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,12 @@ At this point, your app registration should look like the following. Finally, cl

:::note Optional
Once registration is done, you will land on the app registration **Overview** tab.
On the left-side navigation bar, click on **Authentication** under **Manage** and add extra redirect URIs if need be (if you want to support both local testing and Azure deployments). Finally, click **Save**.
On the left-side navigation bar, click on **Authentication** under **Manage** and add extra redirect URIs if need be (if you want to support both local testing and Azure deployments).

For logout URI:
- **Front-channel logout URL**. `https://your-datahub-domain.com/login`

Finally, click **Save**.

<p align="center">
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/sso/azure-setup-authentication.png"/>
Expand Down
15 changes: 9 additions & 6 deletions metadata-ingestion/docs/sources/dbt/dbt.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,16 +273,19 @@ source:
# ... other configs
```

<details>
<summary>[Experimental] Reducing "composed of" sprawl with multiproject setups</summary>
If you have models that have tons of sources from other projects listed in the "Composed Of" section, it may also make sense to hide sources.

When many dbt projects use a single table as a source, the "Composed Of" relationships can become very large and difficult to navigate.
To address this, we are experimenting with an alternative approach to handling multiproject setups: not including sources.
### Reducing "composed of" sprawl by hiding sources

When many dbt projects use a single table as a source, the "Composed Of" relationships can become very large and difficult to navigate
and extra source nodes can clutter the lineage graph.

This is particularly useful for multi-project setups, but can be useful in single-project setups as well.

The benefit is that your entire dbt estate becomes much easier to navigate, and the borders between projects less noticeable.
The downside is that we will not pick up any documentation or meta mappings applied to dbt sources.

To enable this, set a few additional flags in your dbt source config:
To enable this, set `entities_enabled.sources: No` and `skip_sources_in_lineage: true` in your dbt source config:

```yaml
source:
Expand All @@ -298,4 +301,4 @@ source:
skip_sources_in_lineage: true
```

</details>
[Experimental] It's also possible to use `skip_sources_in_lineage: true` without disabling sources entirely. If you do this, sources will not participate in the lineage graph - they'll have upstreams but no downstreams. However, they will still contribute to docs, tags, etc to the warehouse entity.
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class BIAssetSubTypes(StrEnum):

# Mode
MODE_REPORT = "Report"
MODE_DATASET = "Dataset"
MODE_QUERY = "Query"
MODE_CHART = "Chart"

Expand Down
Loading

0 comments on commit 215a50d

Please sign in to comment.