From 87241d5bb59618182d9f6810859a0341f0a8ee53 Mon Sep 17 00:00:00 2001 From: evansiroky Date: Fri, 14 Sep 2018 20:31:29 -0700 Subject: [PATCH 01/11] refactor(graphql): update to graphql-java version 10.0 --- pom.xml | 2 +- src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index ea617eef1..5f47cb913 100644 --- a/pom.xml +++ b/pom.xml @@ -353,7 +353,7 @@ com.graphql-java graphql-java - 4.2 + 10.0 diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java index d74180a82..5002e5ccc 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java @@ -1,9 +1,9 @@ package com.conveyal.gtfs.graphql; -import graphql.schema.FieldDataFetcher; import graphql.schema.GraphQLArgument; import graphql.schema.GraphQLFieldDefinition; import graphql.schema.GraphQLList; +import graphql.schema.PropertyDataFetcher; import static graphql.Scalars.GraphQLFloat; import static graphql.Scalars.GraphQLInt; @@ -17,7 +17,7 @@ public static GraphQLFieldDefinition string (String name) { return newFieldDefinition() .name(name) .type(GraphQLString) - .dataFetcher(new FieldDataFetcher(name)) + .dataFetcher(new PropertyDataFetcher(name)) .build(); } @@ -25,7 +25,7 @@ public static GraphQLFieldDefinition intt (String name) { return newFieldDefinition() .name(name) .type(GraphQLInt) - .dataFetcher(new FieldDataFetcher(name)) + .dataFetcher(new PropertyDataFetcher(name)) .build(); } From 3dbd66e0bb42b02d9fc4fb7691ee90bc21739ec5 Mon Sep 17 00:00:00 2001 From: evansiroky Date: Wed, 26 Sep 2018 18:08:20 -0400 Subject: [PATCH 02/11] perf(graphql): make nested JDBC fetchers query with joins --- .../gtfs/graphql/fetchers/JDBCFetcher.java | 125 ++++++++------- .../graphql/fetchers/NestedJDBCFetcher.java | 143 +++++++++++++----- .../graphql/fetchers/RowCountFetcher.java | 22 ++- 3 files changed, 196 insertions(+), 94 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java index 6ab6c90e9..7eb5e2fe6 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java @@ -146,73 +146,94 @@ public List> get (DataFetchingEnvironment environment) { // If we are fetching an item nested within a GTFS entity in the GraphQL query, we want to add an SQL "where" // clause using the values found here. Note, these are used in the below getResults call. - List parentJoinValues = new ArrayList<>(); + List inClauseValues = new ArrayList<>(); if (parentJoinField != null) { Map enclosingEntity = environment.getSource(); // FIXME: THIS IS BROKEN if parentJoinValue is null!!!! - Object parentJoinValue = enclosingEntity.get(parentJoinField); + Object inClauseValue = enclosingEntity.get(parentJoinField); // Check for null parentJoinValue to protect against NPE. - String parentJoinString = parentJoinValue == null ? null : parentJoinValue.toString(); - parentJoinValues.add(parentJoinString); - if (parentJoinValue == null) { + String inClauseString = inClauseValue == null ? null : inClauseValue.toString(); + inClauseValues.add(inClauseString); + if (inClauseValue == null) { return new ArrayList<>(); } } Map arguments = environment.getArguments(); - return getResults(namespace, parentJoinValues, arguments); + return getResults(namespace, inClauseValues, arguments); + } + + List> getResults ( + String namespace, + List inClauseValues, + Map arguments + ) { + // We could select only the requested fields by examining environment.getFields(), but we just get them all. + // The advantage of selecting * is that we don't need to validate the field names. + // All the columns will be loaded into the Map, + // but only the requested fields will be fetched from that Map using a MapFetcher. + StringBuilder sqlBuilder = new StringBuilder(); + sqlBuilder.append("select *"); + return getResults( + namespace, + inClauseValues, + arguments, + // No additional prepared statement parameters, where conditions or tables are needed beyond those added by + // default in the next method. + new ArrayList<>(), + new ArrayList<>(), + new HashSet<>(), + sqlBuilder + ); } /** * Handle fetching functionality for a given namespace, set of join values, and arguments. This is broken out from * the standard get function so that it can be reused in other fetchers (i.e., NestedJdbcFetcher) */ - List> getResults (String namespace, List parentJoinValues, Map arguments) { - // Track the parameters for setting prepared statement parameters - List parameters = new ArrayList<>(); + List> getResults ( + String namespace, + List inClauseValues, + Map graphQLQueryArguments, + List preparedStatementParameters, + List whereConditions, + Set fromTables, + StringBuilder sqlStatementStringBuilder + ) { // This will contain one Map for each row fetched from the database table. List> results = new ArrayList<>(); - if (arguments == null) arguments = new HashMap<>(); + if (graphQLQueryArguments == null) graphQLQueryArguments = new HashMap<>(); // Ensure namespace exists and is clean. Note: FeedFetcher will have executed before this and validated that an // entry exists in the feeds table and the schema actually exists in the database. validateNamespace(namespace); - StringBuilder sqlBuilder = new StringBuilder(); - // We could select only the requested fields by examining environment.getFields(), but we just get them all. - // The advantage of selecting * is that we don't need to validate the field names. - // All the columns will be loaded into the Map, - // but only the requested fields will be fetched from that Map using a MapFetcher. - Set fromTables = new HashSet<>(); - // By default, select only from the primary table. Other tables may be added to this list to handle joins. + // The primary table being selected from is added here. Other tables may have been / will be added to this list to handle joins. fromTables.add(String.join(".", namespace, tableName)); - sqlBuilder.append("select *"); - // We will build up additional sql clauses in this List (note: must be a List so that the order is preserved). - List conditions = new ArrayList<>(); // The order by clause will go here. String sortBy = ""; // If we are fetching an item nested within a GTFS entity in the GraphQL query, we want to add an SQL "where" // clause. This could conceivably be done automatically, but it's clearer to just express the intent. // Note, this is assuming the type of the field in the parent is a string. - if (parentJoinField != null && parentJoinValues != null && !parentJoinValues.isEmpty()) { - conditions.add(makeInClause(parentJoinField, parentJoinValues, parameters)); + if (parentJoinField != null && inClauseValues != null && !inClauseValues.isEmpty()) { + whereConditions.add(makeInClause(parentJoinField, inClauseValues, preparedStatementParameters)); } if (sortField != null) { // Sort field is not provided by user input, so it's ok to add here (i.e., it's not prone to SQL injection). sortBy = String.format(" order by %s", sortField); } - Set argumentKeys = arguments.keySet(); + Set argumentKeys = graphQLQueryArguments.keySet(); for (String key : argumentKeys) { // The pagination, bounding box, and date/time args should all be skipped here because they are handled // separately below from standard args (pagination becomes limit/offset clauses, bounding box applies to // stops table, and date/time args filter stop times. All other args become "where X in A, B, C" clauses. if (argsToSkip.contains(key)) continue; if (ID_ARG.equals(key)) { - Integer value = (Integer) arguments.get(key); - conditions.add(String.join(" = ", "id", value.toString())); + Integer value = (Integer) graphQLQueryArguments.get(key); + whereConditions.add(String.join(" = ", "id", value.toString())); } else { - List values = (List) arguments.get(key); - if (values != null && !values.isEmpty()) conditions.add(makeInClause(key, values, parameters)); + List values = (List) graphQLQueryArguments.get(key); + if (values != null && !values.isEmpty()) whereConditions.add(makeInClause(key, values, preparedStatementParameters)); } } if (argumentKeys.containsAll(boundingBoxArgs)) { @@ -222,7 +243,7 @@ List> getResults (String namespace, List parentJoinV // operating on the patterns table, a SELECT DISTINCT patterns query will be constructed with a join to // stops and pattern stops. for (String bound : boundingBoxArgs) { - Double value = (Double) arguments.get(bound); + Double value = (Double) graphQLQueryArguments.get(bound); // Determine delimiter/equality operator based on min/max String delimiter = bound.startsWith("max") ? " <= " : " >= "; // Determine field based on lat/lon @@ -232,7 +253,7 @@ List> getResults (String namespace, List parentJoinV boundsConditions.add(String.join(delimiter, fieldWithNamespace, value.toString())); } if ("stops".equals(tableName)) { - conditions.addAll(boundsConditions); + whereConditions.addAll(boundsConditions); } else if ("patterns".equals(tableName)) { // Add from table as unique_pattern_ids_in_bounds to match patterns table -> pattern stops -> stops fromTables.add( @@ -243,7 +264,7 @@ List> getResults (String namespace, List parentJoinV String.join(" and ", boundsConditions), namespace )); - conditions.add(String.format("%s.patterns.pattern_id = unique_pattern_ids_in_bounds.pattern_id", namespace)); + whereConditions.add(String.format("%s.patterns.pattern_id = unique_pattern_ids_in_bounds.pattern_id", namespace)); } } if (argumentKeys.contains(DATE_ARG)) { @@ -253,7 +274,7 @@ List> getResults (String namespace, List parentJoinV // service_dates table. In other words, feeds generated by the editor cannot be queried with the date/time args. String tripsTable = String.format("%s.trips", namespace); fromTables.add(tripsTable); - String date = getDateArgument(arguments); + String date = getDateArgument(graphQLQueryArguments); // Gather all service IDs that run on the provided date. fromTables.add(String.format( "(select distinct service_id from %s.service_dates where service_date = ?) as unique_service_ids_in_operation", @@ -261,11 +282,11 @@ List> getResults (String namespace, List parentJoinV ); // Add date to beginning of parameters list (it is used to pre-select a table in the from clause before any // other conditions or parameters are appended). - parameters.add(0, date); + preparedStatementParameters.add(0, date); if (argumentKeys.contains(FROM_ARG) && argumentKeys.contains(TO_ARG)) { // Determine which trips start in the specified time window by joining to filtered stop times. String timeFilteredTrips = "trips_beginning_in_time_period"; - conditions.add(String.format("%s.trip_id = %s.trip_id", timeFilteredTrips, tripsTable)); + whereConditions.add(String.format("%s.trip_id = %s.trip_id", timeFilteredTrips, tripsTable)); // Select all trip IDs that start during the specified time window. Note: the departure and arrival times // are divided by 86399 to account for trips that begin after midnight. FIXME: Should this be 86400? fromTables.add(String.format( @@ -273,16 +294,16 @@ List> getResults (String namespace, List parentJoinV "from (select distinct on (trip_id) * from %s.stop_times order by trip_id, stop_sequence) as first_stop_times " + "where departure_time %% 86399 >= %d and departure_time %% 86399 <= %d) as %s", namespace, - (int) arguments.get(FROM_ARG), - (int) arguments.get(TO_ARG), + (int) graphQLQueryArguments.get(FROM_ARG), + (int) graphQLQueryArguments.get(TO_ARG), timeFilteredTrips)); } // Join trips to service_dates (unique_service_ids_in_operation). - conditions.add(String.format("%s.service_id = unique_service_ids_in_operation.service_id", tripsTable)); + whereConditions.add(String.format("%s.service_id = unique_service_ids_in_operation.service_id", tripsTable)); } if (argumentKeys.contains(SEARCH_ARG)) { // Handle string search argument - String value = (String) arguments.get(SEARCH_ARG); + String value = (String) graphQLQueryArguments.get(SEARCH_ARG); if (!value.isEmpty()) { // Only apply string search if string is not empty. Set searchFields = getSearchFields(namespace); @@ -292,23 +313,23 @@ List> getResults (String namespace, List parentJoinV // FIXME: is ILIKE compatible with non-Postgres? LIKE doesn't work well enough (even when setting // the strings to lower case). searchClauses.add(String.format("%s ILIKE ?", field)); - parameters.add(String.format("%%%s%%", value)); + preparedStatementParameters.add(String.format("%%%s%%", value)); } if (!searchClauses.isEmpty()) { // Wrap string search in parentheses to isolate from other conditions. - conditions.add(String.format(("(%s)"), String.join(" OR ", searchClauses))); + whereConditions.add(String.format(("(%s)"), String.join(" OR ", searchClauses))); } } } - sqlBuilder.append(String.format(" from %s", String.join(", ", fromTables))); - if (!conditions.isEmpty()) { - sqlBuilder.append(" where "); - sqlBuilder.append(String.join(" and ", conditions)); + sqlStatementStringBuilder.append(String.format(" from %s", String.join(", ", fromTables))); + if (!whereConditions.isEmpty()) { + sqlStatementStringBuilder.append(" where "); + sqlStatementStringBuilder.append(String.join(" and ", whereConditions)); } // The default value for sortBy is an empty string, so it's safe to always append it here. Also, there is no // threat of SQL injection because the sort field value is not user input. - sqlBuilder.append(sortBy); - Integer limit = (Integer) arguments.get(LIMIT_ARG); + sqlStatementStringBuilder.append(sortBy); + Integer limit = (Integer) graphQLQueryArguments.get(LIMIT_ARG); if (limit == null) { limit = autoLimit ? DEFAULT_ROWS_TO_FETCH : -1; } @@ -320,18 +341,18 @@ List> getResults (String namespace, List parentJoinV // empty simply because it is clearer to define the condition in this way (vs. if limit > 0). // FIXME: Skipping limit is not scalable in many cases and should possibly be removed/limited. } else { - sqlBuilder.append(" limit ").append(limit); + sqlStatementStringBuilder.append(" limit ").append(limit); } - Integer offset = (Integer) arguments.get(OFFSET_ARG); + Integer offset = (Integer) graphQLQueryArguments.get(OFFSET_ARG); if (offset != null && offset >= 0) { - sqlBuilder.append(" offset ").append(offset); + sqlStatementStringBuilder.append(" offset ").append(offset); } Connection connection = null; try { connection = GTFSGraphQL.getConnection(); - PreparedStatement preparedStatement = connection.prepareStatement(sqlBuilder.toString()); + PreparedStatement preparedStatement = connection.prepareStatement(sqlStatementStringBuilder.toString()); int oneBasedIndex = 1; - for (String parameter : parameters) { + for (String parameter : preparedStatementParameters) { preparedStatement.setString(oneBasedIndex++, parameter); } // This logging produces a lot of noise during testing due to large numbers of joined sub-queries @@ -437,7 +458,7 @@ private Set getSearchFields(String namespace) { /** * Construct filter clause with '=' (single string) and add values to list of parameters. * */ - static String makeInClause(String filterField, String string, List parameters) { + static String filterEquals(String filterField, String string, List parameters) { // Add string to list of parameters (to be later used to set parameters for prepared statement). parameters.add(string); return String.format("%s = ?", filterField); @@ -448,7 +469,7 @@ static String makeInClause(String filterField, String string, List param * */ static String makeInClause(String filterField, List strings, List parameters) { if (strings.size() == 1) { - return makeInClause(filterField, strings.get(0), parameters); + return filterEquals(filterField, strings.get(0), parameters); } else { // Add strings to list of parameters (to be later used to set parameters for prepared statement). parameters.addAll(strings); diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java index 7478a6304..e2fa71159 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java @@ -11,11 +11,14 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import static com.conveyal.gtfs.graphql.GraphQLUtil.multiStringArg; import static com.conveyal.gtfs.graphql.GraphQLUtil.stringArg; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.makeInClause; import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; /** @@ -75,54 +78,124 @@ public List> get (DataFetchingEnvironment environment) { // FIXME: NestedJDBCFetcher may need to be refactored so that it avoids conventions of JDBCFetcher (like the // implied limit of 50 records). For now, the autoLimit field has been added to JDBCFetcher, so that certain // fetchers (like the nested ones used solely for joins here) will not apply the limit by default. - Map arguments = null; + Map graphQLQueryArguemnts = environment.getArguments();; - // Store each iteration's fetch results here. - List> fetchResults = null; +// // Store each iteration's fetch results here. +// List> fetchResults = null; +// for (int i = 0; i < jdbcFetchers.length; i++) { +// JDBCFetcher fetcher = jdbcFetchers[i]; +// List joinValues; +// if (i == 0) { +// // For first iteration of fetching, use parent entity to get join values. +// Map enclosingEntity = environment.getSource(); +// // FIXME SQL injection: enclosing entity's ID could contain malicious character sequences; quote and sanitize the string. +// // FIXME: THIS IS BROKEN if parentJoinValue is null!!!! +// Object parentJoinValue = enclosingEntity.get(fetcher.parentJoinField); +// // Check for null parentJoinValue to protect against NPE. +// joinValues = new ArrayList<>(); +// String parentJoinString = parentJoinValue == null ? null : parentJoinValue.toString(); +// joinValues.add(parentJoinString); +// if (parentJoinValue == null) { +// return new ArrayList<>(); +// } +// } else { +// // Otherwise, get join values stored by previous fetcher. +// joinValues = joinValuesForJoinField.get(fetcher.parentJoinField); +// if (i == jdbcFetchers.length - 1) { +// // Apply arguments only to the final fetched table +// arguments = environment.getArguments(); +// LOG.info("{} args: {}", fetcher.tableName, arguments.keySet().toString()); +// } +// } +// LOG.info("Join values: {}", joinValues.toString()); +// fetchResults = fetcher.getResults(namespace, joinValues, arguments); +// if (fetchResults.size() == 0) { +// // If there are no results, the following queries will have no results to join to, so we can simply +// // return the empty list. +// return fetchResults; +// } else if (i < jdbcFetchers.length - 1) { +// // Otherwise, iterate over results from current fetcher to store for next iteration for all but the last +// // iteration (the last iteration's fetchResults will contain the final results). +// JDBCFetcher nextFetcher = jdbcFetchers[i + 1]; +// for (Map entity : fetchResults) { +// Object joinValue = entity.get(nextFetcher.parentJoinField); +// // Store join values in multimap for +// if (joinValue != null) +// joinValuesForJoinField.put(nextFetcher.parentJoinField, joinValue.toString()); +// } +// } +// } +// // Last iteration should finally return results. +// return null; + + JDBCFetcher lastFetcher = null; + List preparedStatementParameters = new ArrayList<>(); + Set fromTables = new HashSet<>(); + List whereConditions = new ArrayList<>(); for (int i = 0; i < jdbcFetchers.length; i++) { JDBCFetcher fetcher = jdbcFetchers[i]; - List joinValues; + String tableName = nameSpacedTableName(namespace, fetcher.tableName); if (i == 0) { - // For first iteration of fetching, use parent entity to get join values. + // For first iteration of fetching, use parent entity to get in clause values. + // Also, we add this directly to conditions since the table and field will be different than in the + // final fetcher. + List inClauseValues = new ArrayList<>(); Map enclosingEntity = environment.getSource(); - // FIXME SQL injection: enclosing entity's ID could contain malicious character sequences; quote and sanitize the string. - // FIXME: THIS IS BROKEN if parentJoinValue is null!!!! - Object parentJoinValue = enclosingEntity.get(fetcher.parentJoinField); + String inClauseValue = (String) enclosingEntity.get(fetcher.parentJoinField); // Check for null parentJoinValue to protect against NPE. - joinValues = new ArrayList<>(); - String parentJoinString = parentJoinValue == null ? null : parentJoinValue.toString(); - joinValues.add(parentJoinString); - if (parentJoinValue == null) { + if (inClauseValue == null) { return new ArrayList<>(); + } else { + inClauseValues.add(inClauseValue); } + // add the base table and in clause that this whole query is based off of. We specific the namespaced table field + // name to avoid conflicts resulting from selecting from other similarly named table fields + fromTables.add(tableName); + whereConditions.add(makeInClause( + nameSpacedTableFieldName(namespace, fetcher.tableName, fetcher.parentJoinField), + inClauseValues, + preparedStatementParameters + )); } else { - // Otherwise, get join values stored by previous fetcher. - joinValues = joinValuesForJoinField.get(fetcher.parentJoinField); + // add nested table join by using a where condition + fromTables.add(tableName); + whereConditions.add(String.format( + "%s = %s", + nameSpacedTableFieldName(namespace, lastFetcher.tableName, fetcher.parentJoinField), + nameSpacedTableFieldName(namespace, fetcher.tableName, fetcher.parentJoinField) + )); + // check if we have reached the final nested table if (i == jdbcFetchers.length - 1) { - // Apply arguments only to the final fetched table - arguments = environment.getArguments(); - LOG.info("{} args: {}", fetcher.tableName, arguments.keySet().toString()); - } - } - LOG.info("Join values: {}", joinValues.toString()); - fetchResults = fetcher.getResults(namespace, joinValues, arguments); - if (fetchResults.size() == 0) { - // If there are no results, the following queries will have no results to join to, so we can simply - // return the empty list. - return fetchResults; - } else if (i < jdbcFetchers.length - 1) { - // Otherwise, iterate over results from current fetcher to store for next iteration for all but the last - // iteration (the last iteration's fetchResults will contain the final results). - JDBCFetcher nextFetcher = jdbcFetchers[i + 1]; - for (Map entity : fetchResults) { - Object joinValue = entity.get(nextFetcher.parentJoinField); - // Store join values in multimap for - if (joinValue != null) - joinValuesForJoinField.put(nextFetcher.parentJoinField, joinValue.toString()); + // final nested table reached! + // create a sqlBuilder to select all values from the final table + StringBuilder sqlStatementStringBuilder = new StringBuilder(); + sqlStatementStringBuilder.append("select "); + sqlStatementStringBuilder.append( + nameSpacedTableFieldName(namespace, fetcher.tableName, "*") + ); + // Make the query and return the results! + return fetcher.getResults( + namespace, + new ArrayList<>(), + graphQLQueryArguemnts, + preparedStatementParameters, + whereConditions, + fromTables, + sqlStatementStringBuilder + ); } } + lastFetcher = fetcher; } // Last iteration should finally return results. - return fetchResults; + return new ArrayList<>(); + } + + private String nameSpacedTableName (String namespace, String tableName) { + return String.format("%s.%s", namespace, tableName); + } + + private String nameSpacedTableFieldName (String namespace, String tableName, String tableField) { + return String.format("%s.%s.%s", namespace, tableName, tableField); } } diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java index e1a38a1aa..a6e12e973 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java @@ -15,7 +15,6 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; -import java.sql.Statement; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -23,7 +22,7 @@ import static com.conveyal.gtfs.graphql.GraphQLUtil.intt; import static com.conveyal.gtfs.graphql.GraphQLUtil.string; import static com.conveyal.gtfs.graphql.GraphQLUtil.stringArg; -import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.makeInClause; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.filterEquals; import static graphql.Scalars.GraphQLInt; import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; import static graphql.schema.GraphQLObjectType.newObject; @@ -70,9 +69,13 @@ public Object get(DataFetchingEnvironment environment) { if (filterField != null) { // FIXME Does this handle null cases? // Add where clause to filter out non-matching results. - String filterValue = (String) parentFeedMap.get(filterField); - String inClause = makeInClause(filterField, filterValue, parameters); - clauses.add(String.join(" ", "where", inClause)); + clauses.add( + String.join( + " ", + "where", + filterEquals(filterField, (String) parentFeedMap.get(filterField), parameters) + ) + ); } else if (groupByField != null) { // Handle group by field and optionally handle any filter arguments passed in. if (!argKeys.isEmpty()) { @@ -86,8 +89,13 @@ public Object get(DataFetchingEnvironment environment) { } String filterValue = (String) arguments.get(groupedFilterField); if (filterValue != null) { - String inClause = makeInClause(groupedFilterField, filterValue, parameters); - clauses.add(String.join(" ", "where", inClause)); + clauses.add( + String.join( + " ", + "where", + filterEquals(groupedFilterField, filterValue, parameters) + ) + ); } } // Finally, add group by clause. From 1bab6581883d54085dc5e278691ab85ea29d65e0 Mon Sep 17 00:00:00 2001 From: evansiroky Date: Wed, 26 Sep 2018 18:15:03 -0400 Subject: [PATCH 03/11] refactor(graphql): remove and reword some comments --- .../graphql/fetchers/NestedJDBCFetcher.java | 56 +------------------ 1 file changed, 2 insertions(+), 54 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java index e2fa71159..4d51c356c 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java @@ -1,8 +1,6 @@ package com.conveyal.gtfs.graphql.fetchers; import com.conveyal.gtfs.graphql.GraphQLGtfsSchema; -import com.google.common.collect.ListMultimap; -import com.google.common.collect.MultimapBuilder; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import graphql.schema.GraphQLFieldDefinition; @@ -63,9 +61,6 @@ public static GraphQLFieldDefinition field (String tableName) { @Override public List> get (DataFetchingEnvironment environment) { - // Store the join values here. - ListMultimap joinValuesForJoinField = MultimapBuilder.treeKeys().arrayListValues().build(); - // GetSource is the context in which this this DataFetcher has been created, in this case a map representing // the parent feed (FeedFetcher). Map parentEntityMap = environment.getSource(); @@ -80,54 +75,6 @@ public List> get (DataFetchingEnvironment environment) { // fetchers (like the nested ones used solely for joins here) will not apply the limit by default. Map graphQLQueryArguemnts = environment.getArguments();; -// // Store each iteration's fetch results here. -// List> fetchResults = null; -// for (int i = 0; i < jdbcFetchers.length; i++) { -// JDBCFetcher fetcher = jdbcFetchers[i]; -// List joinValues; -// if (i == 0) { -// // For first iteration of fetching, use parent entity to get join values. -// Map enclosingEntity = environment.getSource(); -// // FIXME SQL injection: enclosing entity's ID could contain malicious character sequences; quote and sanitize the string. -// // FIXME: THIS IS BROKEN if parentJoinValue is null!!!! -// Object parentJoinValue = enclosingEntity.get(fetcher.parentJoinField); -// // Check for null parentJoinValue to protect against NPE. -// joinValues = new ArrayList<>(); -// String parentJoinString = parentJoinValue == null ? null : parentJoinValue.toString(); -// joinValues.add(parentJoinString); -// if (parentJoinValue == null) { -// return new ArrayList<>(); -// } -// } else { -// // Otherwise, get join values stored by previous fetcher. -// joinValues = joinValuesForJoinField.get(fetcher.parentJoinField); -// if (i == jdbcFetchers.length - 1) { -// // Apply arguments only to the final fetched table -// arguments = environment.getArguments(); -// LOG.info("{} args: {}", fetcher.tableName, arguments.keySet().toString()); -// } -// } -// LOG.info("Join values: {}", joinValues.toString()); -// fetchResults = fetcher.getResults(namespace, joinValues, arguments); -// if (fetchResults.size() == 0) { -// // If there are no results, the following queries will have no results to join to, so we can simply -// // return the empty list. -// return fetchResults; -// } else if (i < jdbcFetchers.length - 1) { -// // Otherwise, iterate over results from current fetcher to store for next iteration for all but the last -// // iteration (the last iteration's fetchResults will contain the final results). -// JDBCFetcher nextFetcher = jdbcFetchers[i + 1]; -// for (Map entity : fetchResults) { -// Object joinValue = entity.get(nextFetcher.parentJoinField); -// // Store join values in multimap for -// if (joinValue != null) -// joinValuesForJoinField.put(nextFetcher.parentJoinField, joinValue.toString()); -// } -// } -// } -// // Last iteration should finally return results. -// return null; - JDBCFetcher lastFetcher = null; List preparedStatementParameters = new ArrayList<>(); Set fromTables = new HashSet<>(); @@ -187,7 +134,8 @@ public List> get (DataFetchingEnvironment environment) { } lastFetcher = fetcher; } - // Last iteration should finally return results. + // This piece of code will never be reached because of how things get returned above. + // But it's here to make java happy. return new ArrayList<>(); } From 1eecbb51f38232bd11d044632591cc7101b363d1 Mon Sep 17 00:00:00 2001 From: evansiroky Date: Thu, 27 Sep 2018 18:15:57 -0400 Subject: [PATCH 04/11] refactor(graphql): use dataloader for batching and caching of sql queries --- .../conveyal/gtfs/graphql/GTFSGraphQL.java | 12 +- .../gtfs/graphql/GraphQLGtfsSchema.java | 20 ++- .../conveyal/gtfs/graphql/GraphQLUtil.java | 7 + .../gtfs/graphql/fetchers/FeedFetcher.java | 3 + .../gtfs/graphql/fetchers/JDBCFetcher.java | 152 ++++++++++++------ .../graphql/fetchers/NestedJDBCFetcher.java | 24 ++- .../graphql/fetchers/SQLColumnFetcher.java | 55 ++++++- .../gtfs/graphql/GTFSGraphQLTest.java | 36 ++--- 8 files changed, 219 insertions(+), 90 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java index b7d9babf3..5c3aa3ee0 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java @@ -1,12 +1,14 @@ package com.conveyal.gtfs.graphql; -import com.conveyal.gtfs.GTFS; import graphql.GraphQL; +import graphql.execution.instrumentation.dataloader.DataLoaderDispatcherInstrumentation; import javax.sql.DataSource; import java.sql.Connection; import java.sql.SQLException; +import static com.conveyal.gtfs.graphql.GraphQLGtfsSchema.makeRegistry; + /** * This provides a GraphQL API around the gtfs-lib JDBC storage. * This just makes a Java API with the right schema available, which uses String requests and responses. @@ -23,7 +25,12 @@ public class GTFSGraphQL { /** Username and password can be null if connecting to a local instance with host-based authentication. */ public static void initialize (DataSource dataSource) { GTFSGraphQL.dataSource = dataSource; - GRAPHQL = new GraphQL(GraphQLGtfsSchema.feedBasedSchema); + GRAPHQL = GraphQL.newGraphQL(GraphQLGtfsSchema.feedBasedSchema) + // this instrumentation implementation will dispatch all the dataloaders + // as each level fo the graphql query is executed and hence make batched objects + // available to the query and the associated DataFetchers + .instrumentation(new DataLoaderDispatcherInstrumentation(makeRegistry())) + .build(); } public static Connection getConnection() { @@ -37,5 +44,4 @@ public static Connection getConnection() { public static GraphQL getGraphQl () { return GRAPHQL; } - } diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java index 23ff013a7..44a79d8f0 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java @@ -14,6 +14,7 @@ import graphql.schema.GraphQLScalarType; import graphql.schema.GraphQLSchema; import graphql.schema.GraphQLTypeReference; +import org.dataloader.DataLoaderRegistry; import java.sql.Array; import java.sql.SQLException; @@ -24,7 +25,18 @@ import static com.conveyal.gtfs.graphql.GraphQLUtil.multiStringArg; import static com.conveyal.gtfs.graphql.GraphQLUtil.string; import static com.conveyal.gtfs.graphql.GraphQLUtil.stringArg; -import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.*; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.DATE_ARG; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.FROM_ARG; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.ID_ARG; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.LIMIT_ARG; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.MAX_LAT; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.MAX_LON; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.MIN_LAT; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.MIN_LON; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.OFFSET_ARG; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.SEARCH_ARG; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.TO_ARG; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.jdbcDataLoader; import static graphql.Scalars.GraphQLFloat; import static graphql.Scalars.GraphQLInt; import static graphql.Scalars.GraphQLString; @@ -783,6 +795,12 @@ public class GraphQLGtfsSchema { // .mutation(someMutation) .build(); + public static DataLoaderRegistry makeRegistry() { + // DataLoaderRegistry is a place to register all data loaders in that needs to be dispatched together + DataLoaderRegistry registry = new DataLoaderRegistry(); + registry.register("jdbcfetcher", jdbcDataLoader); + return registry; + } private static class StringCoercing implements Coercing { @Override diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java index 5002e5ccc..fb342fcfb 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java @@ -57,4 +57,11 @@ public static GraphQLArgument floatArg (String name) { .build(); } + public static String namespacedTableName(String namespace, String tableName) { + return String.format("%s.%s", namespace, tableName); + } + + public static String namespacedTableFieldName(String namespace, String tableName, String tableField) { + return String.format("%s.%s.%s", namespace, tableName, tableField); + } } diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java index 4e505315b..5b5386529 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java @@ -14,6 +14,8 @@ import java.util.HashMap; import java.util.Map; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.validateNamespace; + /** * Fetch the summary row for a particular loaded feed, based on its namespace. * This essentially gets the row from the top-level summary table of all feeds that have been loaded into the database. @@ -25,6 +27,7 @@ public class FeedFetcher implements DataFetcher { @Override public Map get (DataFetchingEnvironment environment) { String namespace = environment.getArgument("namespace"); // This is the unique table prefix (the "schema"). + validateNamespace(namespace); StringBuilder sqlBuilder = new StringBuilder(); sqlBuilder.append(String.format("select * from feeds where namespace = '%s'", namespace)); Connection connection = null; diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java index 7eb5e2fe6..923c14609 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java @@ -7,6 +7,9 @@ import graphql.schema.GraphQLFieldDefinition; import graphql.schema.GraphQLList; import org.apache.commons.dbutils.DbUtils; +import org.dataloader.BatchLoaderEnvironment; +import org.dataloader.DataLoader; +import org.dataloader.MappedBatchLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -23,7 +26,9 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -34,7 +39,7 @@ /** * A generic fetcher to get fields out of an SQL database table. */ -public class JDBCFetcher implements DataFetcher>> { +public class JDBCFetcher implements DataFetcher { public static final Logger LOG = LoggerFactory.getLogger(JDBCFetcher.class); @@ -70,6 +75,61 @@ public class JDBCFetcher implements DataFetcher>> { private final String sortField; private final boolean autoLimit; + public static final MappedBatchLoader>> sqlBatchLoader = + (jdbcQueries) -> CompletableFuture.supplyAsync(() -> getSqlQueryResults(jdbcQueries)); + + private static Map>> getSqlQueryResults(Set jdbcQueries) { + Map>> allResults = new HashMap<>(); + Connection connection = null; + try { + connection = GTFSGraphQL.getConnection(); + for (JdbcQuery jdbcQuery : jdbcQueries) { + allResults.put(jdbcQuery, getSqlQueryResult(jdbcQuery, connection)); + } + } finally { + DbUtils.closeQuietly(connection); + } + return allResults; + } + + public static List> getSqlQueryResult (JdbcQuery jdbcQuery, Connection connection) { + try { + List> results = new ArrayList<>(); + PreparedStatement preparedStatement = connection.prepareStatement(jdbcQuery.sqlStatement); + int oneBasedIndex = 1; + for (String parameter : jdbcQuery.preparedStatementParameters) { + preparedStatement.setString(oneBasedIndex++, parameter); + } + // This logging produces a lot of noise during testing due to large numbers of joined sub-queries + // LOG.info("table name={}", tableName); + LOG.info("SQL: {}", preparedStatement.toString()); + if (preparedStatement.execute()) { + ResultSet resultSet = preparedStatement.getResultSet(); + ResultSetMetaData meta = resultSet.getMetaData(); + int nColumns = meta.getColumnCount(); + // Iterate over result rows + while (resultSet.next()) { + // Create a Map to hold the contents of this row, injecting the sql schema namespace into every map + Map resultMap = new HashMap<>(); + resultMap.put("namespace", jdbcQuery.namespace); + // One-based iteration: start at one and use <=. + for (int i = 1; i <= nColumns; i++) { + resultMap.put(meta.getColumnName(i), resultSet.getObject(i)); + } + results.add(resultMap); + } + } + LOG.info("Result size: {}", results.size()); + // Return a List of Maps, one Map for each row in the result. + return results; + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + public static final DataLoader>> jdbcDataLoader = + DataLoader.newMappedDataLoader(sqlBatchLoader); + /** * Constructor for tables that need neither restriction by a where clause nor sorting based on the enclosing entity. * These would typically be at the topmost level, directly inside a feed rather than nested in some GTFS entity type. @@ -130,7 +190,7 @@ public static GraphQLFieldDefinition field (String tableName) { // But what are the internal GraphQL objects, i.e. what does an ExecutionResult return? Are they Map? @Override - public List> get (DataFetchingEnvironment environment) { + public Object get (DataFetchingEnvironment environment) { // GetSource is the context in which this this DataFetcher has been created, in this case a map representing // the parent feed (FeedFetcher). Map parentEntityMap = environment.getSource(); @@ -163,7 +223,7 @@ public List> get (DataFetchingEnvironment environment) { return getResults(namespace, inClauseValues, arguments); } - List> getResults ( + Object getResults ( String namespace, List inClauseValues, Map arguments @@ -178,7 +238,7 @@ List> getResults ( namespace, inClauseValues, arguments, - // No additional prepared statement parameters, where conditions or tables are needed beyond those added by + // No additional prepared sqlStatement preparedStatementParameters, where conditions or tables are needed beyond those added by // default in the next method. new ArrayList<>(), new ArrayList<>(), @@ -191,7 +251,7 @@ List> getResults ( * Handle fetching functionality for a given namespace, set of join values, and arguments. This is broken out from * the standard get function so that it can be reused in other fetchers (i.e., NestedJdbcFetcher) */ - List> getResults ( + Object getResults ( String namespace, List inClauseValues, Map graphQLQueryArguments, @@ -280,8 +340,8 @@ List> getResults ( "(select distinct service_id from %s.service_dates where service_date = ?) as unique_service_ids_in_operation", namespace) ); - // Add date to beginning of parameters list (it is used to pre-select a table in the from clause before any - // other conditions or parameters are appended). + // Add date to beginning of preparedStatementParameters list (it is used to pre-select a table in the from clause before any + // other conditions or preparedStatementParameters are appended). preparedStatementParameters.add(0, date); if (argumentKeys.contains(FROM_ARG) && argumentKeys.contains(TO_ARG)) { // Determine which trips start in the specified time window by joining to filtered stop times. @@ -347,41 +407,11 @@ List> getResults ( if (offset != null && offset >= 0) { sqlStatementStringBuilder.append(" offset ").append(offset); } - Connection connection = null; - try { - connection = GTFSGraphQL.getConnection(); - PreparedStatement preparedStatement = connection.prepareStatement(sqlStatementStringBuilder.toString()); - int oneBasedIndex = 1; - for (String parameter : preparedStatementParameters) { - preparedStatement.setString(oneBasedIndex++, parameter); - } - // This logging produces a lot of noise during testing due to large numbers of joined sub-queries -// LOG.info("table name={}", tableName); - LOG.info("SQL: {}", preparedStatement.toString()); - if (preparedStatement.execute()) { - ResultSet resultSet = preparedStatement.getResultSet(); - ResultSetMetaData meta = resultSet.getMetaData(); - int nColumns = meta.getColumnCount(); - // Iterate over result rows - while (resultSet.next()) { - // Create a Map to hold the contents of this row, injecting the sql schema namespace into every map - Map resultMap = new HashMap<>(); - resultMap.put("namespace", namespace); - // One-based iteration: start at one and use <=. - for (int i = 1; i <= nColumns; i++) { - resultMap.put(meta.getColumnName(i), resultSet.getObject(i)); - } - results.add(resultMap); - } - } - } catch (SQLException e) { - throw new RuntimeException(e); - } finally { - DbUtils.closeQuietly(connection); - } - LOG.info("Result size: {}", results.size()); - // Return a List of Maps, one Map for each row in the result. - return results; + return jdbcDataLoader.load(new JdbcQuery( + namespace, + preparedStatementParameters, + sqlStatementStringBuilder.toString() + )); } private static String getDateArgument(Map arguments) { @@ -402,7 +432,7 @@ private static String getDateArgument(Map arguments) { * @param namespace database schema namespace/table prefix * @return */ - private static void validateNamespace(String namespace) { + public static void validateNamespace(String namespace) { if (namespace == null) { // If namespace is null, do no attempt a query on a namespace that does not exist. throw new IllegalArgumentException("Namespace prefix must be provided."); @@ -456,26 +486,54 @@ private Set getSearchFields(String namespace) { } /** - * Construct filter clause with '=' (single string) and add values to list of parameters. + * Construct filter clause with '=' (single string) and add values to list of preparedStatementParameters. * */ static String filterEquals(String filterField, String string, List parameters) { - // Add string to list of parameters (to be later used to set parameters for prepared statement). + // Add string to list of preparedStatementParameters (to be later used to set preparedStatementParameters for prepared sqlStatement). parameters.add(string); return String.format("%s = ?", filterField); } /** - * Construct filter clause with '=' (single string) or 'in' (multiple strings) and add values to list of parameters. + * Construct filter clause with '=' (single string) or 'in' (multiple strings) and add values to list of preparedStatementParameters. * */ static String makeInClause(String filterField, List strings, List parameters) { if (strings.size() == 1) { return filterEquals(filterField, strings.get(0), parameters); } else { - // Add strings to list of parameters (to be later used to set parameters for prepared statement). + // Add strings to list of preparedStatementParameters (to be later used to set preparedStatementParameters for prepared sqlStatement). parameters.addAll(strings); String questionMarks = String.join(", ", Collections.nCopies(strings.size(), "?")); return String.format("%s in (%s)", filterField, questionMarks); } } + public static class JdbcQuery { + public String namespace; + public List preparedStatementParameters; + public String sqlStatement; + + + public JdbcQuery(String namespace, List preparedStatementParameters, String sqlStatement) { + this.namespace = namespace; + this.preparedStatementParameters = preparedStatementParameters; + this.sqlStatement = sqlStatement; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + JdbcQuery jdbcQuery = (JdbcQuery) o; + return Objects.equals(namespace, jdbcQuery.namespace) && + Objects.equals(preparedStatementParameters, jdbcQuery.preparedStatementParameters) && + Objects.equals(sqlStatement, jdbcQuery.sqlStatement); + } + + @Override + public int hashCode() { + return Objects.hash(namespace, preparedStatementParameters, sqlStatement); + } + } + } diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java index 4d51c356c..71b9f6c16 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java @@ -15,6 +15,8 @@ import java.util.Set; import static com.conveyal.gtfs.graphql.GraphQLUtil.multiStringArg; +import static com.conveyal.gtfs.graphql.GraphQLUtil.namespacedTableFieldName; +import static com.conveyal.gtfs.graphql.GraphQLUtil.namespacedTableName; import static com.conveyal.gtfs.graphql.GraphQLUtil.stringArg; import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.makeInClause; import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; @@ -25,7 +27,7 @@ * routes that serve a specific stop, starting with a top-level stop type, we can nest joins from stop ABC -> pattern * stops -> patterns -> routes (see below example implementation for more details). */ -public class NestedJDBCFetcher implements DataFetcher>> { +public class NestedJDBCFetcher implements DataFetcher { private static final Logger LOG = LoggerFactory.getLogger(NestedJDBCFetcher.class); private final JDBCFetcher[] jdbcFetchers; @@ -60,7 +62,7 @@ public static GraphQLFieldDefinition field (String tableName) { } @Override - public List> get (DataFetchingEnvironment environment) { + public Object get (DataFetchingEnvironment environment) { // GetSource is the context in which this this DataFetcher has been created, in this case a map representing // the parent feed (FeedFetcher). Map parentEntityMap = environment.getSource(); @@ -81,7 +83,7 @@ public List> get (DataFetchingEnvironment environment) { List whereConditions = new ArrayList<>(); for (int i = 0; i < jdbcFetchers.length; i++) { JDBCFetcher fetcher = jdbcFetchers[i]; - String tableName = nameSpacedTableName(namespace, fetcher.tableName); + String tableName = namespacedTableName(namespace, fetcher.tableName); if (i == 0) { // For first iteration of fetching, use parent entity to get in clause values. // Also, we add this directly to conditions since the table and field will be different than in the @@ -99,7 +101,7 @@ public List> get (DataFetchingEnvironment environment) { // name to avoid conflicts resulting from selecting from other similarly named table fields fromTables.add(tableName); whereConditions.add(makeInClause( - nameSpacedTableFieldName(namespace, fetcher.tableName, fetcher.parentJoinField), + namespacedTableFieldName(namespace, fetcher.tableName, fetcher.parentJoinField), inClauseValues, preparedStatementParameters )); @@ -108,8 +110,8 @@ public List> get (DataFetchingEnvironment environment) { fromTables.add(tableName); whereConditions.add(String.format( "%s = %s", - nameSpacedTableFieldName(namespace, lastFetcher.tableName, fetcher.parentJoinField), - nameSpacedTableFieldName(namespace, fetcher.tableName, fetcher.parentJoinField) + namespacedTableFieldName(namespace, lastFetcher.tableName, fetcher.parentJoinField), + namespacedTableFieldName(namespace, fetcher.tableName, fetcher.parentJoinField) )); // check if we have reached the final nested table if (i == jdbcFetchers.length - 1) { @@ -118,7 +120,7 @@ public List> get (DataFetchingEnvironment environment) { StringBuilder sqlStatementStringBuilder = new StringBuilder(); sqlStatementStringBuilder.append("select "); sqlStatementStringBuilder.append( - nameSpacedTableFieldName(namespace, fetcher.tableName, "*") + namespacedTableFieldName(namespace, fetcher.tableName, "*") ); // Make the query and return the results! return fetcher.getResults( @@ -138,12 +140,4 @@ public List> get (DataFetchingEnvironment environment) { // But it's here to make java happy. return new ArrayList<>(); } - - private String nameSpacedTableName (String namespace, String tableName) { - return String.format("%s.%s", namespace, tableName); - } - - private String nameSpacedTableFieldName (String namespace, String tableName, String tableField) { - return String.format("%s.%s.%s", namespace, tableName, tableField); - } } diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java index 417061487..cfab0f775 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java @@ -1,14 +1,22 @@ package com.conveyal.gtfs.graphql.fetchers; +import com.conveyal.gtfs.graphql.GTFSGraphQL; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import org.apache.commons.dbutils.DbUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.sql.Connection; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; +import static com.conveyal.gtfs.graphql.GraphQLUtil.namespacedTableFieldName; +import static com.conveyal.gtfs.graphql.GraphQLUtil.namespacedTableName; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.validateNamespace; + /** * This wraps an SQL row fetcher, extracting only a single column of the specified type. * Because there's only one column, it collapses the result down into a list of elements of that column's type, @@ -29,17 +37,52 @@ public class SQLColumnFetcher implements DataFetcher> { public SQLColumnFetcher(String tableName, String parentJoinField, String columnName) { this.columnName = columnName; this.jdbcFetcher = new JDBCFetcher(tableName, parentJoinField); - } + /** + * This get method does not use the JdbcFetcher get method because the query ultimately returns a list of values + * instead of a List of a Map of Objects. + */ @Override public List get (DataFetchingEnvironment environment) { - List result = new ArrayList<>(); - // Ideally we'd only fetch one column in the wrapped row fetcher. - for (Map row : jdbcFetcher.get(environment)) { - result.add((T)row.get(columnName)); + // GetSource is the context in which this this DataFetcher has been created, in this case a map representing + // the parent feed (FeedFetcher). + Map parentEntityMap = environment.getSource(); + String namespace = (String) parentEntityMap.get("namespace"); + validateNamespace(namespace); + + // get id to filter by + String filterValue = (String) parentEntityMap.get(jdbcFetcher.parentJoinField); + if (filterValue == null) { + return new ArrayList<>(); + } + + StringBuilder sqlStatementBuilder = new StringBuilder(); + sqlStatementBuilder.append(String.format( + "select %s from %s where %s = ?", + namespacedTableFieldName(namespace, jdbcFetcher.tableName, columnName), + namespacedTableName(namespace, jdbcFetcher.tableName), + namespacedTableFieldName(namespace, jdbcFetcher.tableName, jdbcFetcher.parentJoinField) + )); + + List results = new ArrayList<>(); + Connection connection = null; + try { + connection = GTFSGraphQL.getConnection(); + for (Map row : jdbcFetcher.getSqlQueryResult( + new JDBCFetcher.JdbcQuery( + namespace, + Arrays.asList(filterValue), + sqlStatementBuilder.toString() + ), + connection + )) { + results.add((T) row.get(columnName)); + } + } finally { + DbUtils.closeQuietly(connection); } - return result; + return results; } } diff --git a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java index 3f294f1d9..c5c7cc87a 100644 --- a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java +++ b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java @@ -75,74 +75,74 @@ public static void tearDownClass() { } // tests that the graphQL schema can initialize - @Test + @Test(timeout=5000) public void canInitialize() { GTFSGraphQL.initialize(testDataSource); GraphQL graphQL = GTFSGraphQL.getGraphQl(); } // tests that the root element of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchFeed() throws IOException { assertThat(queryGraphQL("feed.txt"), matchesSnapshot()); } // tests that the row counts of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchFeedRowCounts() throws IOException { assertThat(queryGraphQL("feedRowCounts.txt"), matchesSnapshot()); } // tests that the errors of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchErrors() throws IOException { assertThat(queryGraphQL("feedErrors.txt"), matchesSnapshot()); } // tests that the feed_info of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchFeedInfo() throws IOException { assertThat(queryGraphQL("feedFeedInfo.txt"), matchesSnapshot()); } // tests that the patterns of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchPatterns() throws IOException { assertThat(queryGraphQL("feedPatterns.txt"), matchesSnapshot()); } // tests that the agencies of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchAgencies() throws IOException { assertThat(queryGraphQL("feedAgencies.txt"), matchesSnapshot()); } // tests that the calendars of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchCalendars() throws IOException { assertThat(queryGraphQL("feedCalendars.txt"), matchesSnapshot()); } // tests that the fares of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchFares() throws IOException { assertThat(queryGraphQL("feedFares.txt"), matchesSnapshot()); } // tests that the routes of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchRoutes() throws IOException { assertThat(queryGraphQL("feedRoutes.txt"), matchesSnapshot()); } // tests that the stops of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchStops() throws IOException { assertThat(queryGraphQL("feedStops.txt"), matchesSnapshot()); } // tests that the trips of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchTrips() throws IOException { assertThat(queryGraphQL("feedTrips.txt"), matchesSnapshot()); } @@ -150,19 +150,19 @@ public void canFetchTrips() throws IOException { // TODO: make tests for schedule_exceptions / calendar_dates // tests that the stop times of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchStopTimes() throws IOException { assertThat(queryGraphQL("feedStopTimes.txt"), matchesSnapshot()); } // tests that the stop times of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchServices() throws IOException { assertThat(queryGraphQL("feedServices.txt"), matchesSnapshot()); } // tests that the stop times of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchRoutesAndFilterTripsByDateAndTime() throws IOException { Map variables = new HashMap(); variables.put("namespace", testNamespace); @@ -176,7 +176,7 @@ public void canFetchRoutesAndFilterTripsByDateAndTime() throws IOException { } // tests that the limit argument applies properly to a fetcher defined with autolimit set to false - @Test + @Test(timeout=5000) public void canFetchNestedEntityWithLimit() throws IOException { assertThat(queryGraphQL("feedStopsStopTimeLimit.txt"), matchesSnapshot()); } @@ -185,7 +185,7 @@ public void canFetchNestedEntityWithLimit() throws IOException { * attempt to fetch more than one record with SQL injection as inputs * the graphql library should properly escape the string and return 0 results for stops */ - @Test + @Test(timeout=5000) public void canSanitizeSQLInjectionSentAsInput() throws IOException { Map variables = new HashMap(); variables.put("namespace", testInjectionNamespace); @@ -204,7 +204,7 @@ public void canSanitizeSQLInjectionSentAsInput() throws IOException { * attempt run a graphql query when one of the pieces of data contains a SQL injection * the graphql library should properly escape the string and complete the queries */ - @Test + @Test(timeout=5000) public void canSanitizeSQLInjectionSentAsKeyValue() throws IOException, SQLException { // manually update the route_id key in routes and patterns String injection = "'' OR 1=1; Select ''99"; From 485a0163b2b54f430d838f646a230eae7b6c7d10 Mon Sep 17 00:00:00 2001 From: evansiroky Date: Mon, 1 Oct 2018 13:46:37 -0700 Subject: [PATCH 05/11] refactor(graphql): refactor graphql and dataloader initialization - initialize dataloader so that it only caches during a single request - also add test for graphql queries that are redundantly nested --- .../conveyal/gtfs/graphql/GTFSGraphQL.java | 66 ++++++++++------ .../gtfs/graphql/GraphQLGtfsSchema.java | 9 --- .../graphql/fetchers/ErrorCountFetcher.java | 4 +- .../gtfs/graphql/fetchers/FeedFetcher.java | 4 +- .../gtfs/graphql/fetchers/JDBCFetcher.java | 78 ++++++++++++------- .../graphql/fetchers/NestedJDBCFetcher.java | 4 + .../graphql/fetchers/RowCountFetcher.java | 4 +- .../graphql/fetchers/SQLColumnFetcher.java | 7 +- .../gtfs/graphql/GTFSGraphQLTest.java | 34 ++++---- src/test/resources/graphql/superNested.txt | 23 ++++++ .../canFetchMultiNestedEntities.json | 63 +++++++++++++++ 11 files changed, 214 insertions(+), 82 deletions(-) create mode 100644 src/test/resources/graphql/superNested.txt create mode 100644 src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntities.json diff --git a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java index 5c3aa3ee0..32ff084b6 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java @@ -1,13 +1,18 @@ package com.conveyal.gtfs.graphql; +import com.conveyal.gtfs.graphql.fetchers.JDBCFetcher; +import graphql.ExecutionInput; import graphql.GraphQL; import graphql.execution.instrumentation.dataloader.DataLoaderDispatcherInstrumentation; +import graphql.schema.DataFetchingEnvironment; +import org.dataloader.DataLoader; +import org.dataloader.DataLoaderRegistry; import javax.sql.DataSource; -import java.sql.Connection; -import java.sql.SQLException; +import java.util.List; +import java.util.Map; -import static com.conveyal.gtfs.graphql.GraphQLGtfsSchema.makeRegistry; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.newJdbcDataLoader; /** * This provides a GraphQL API around the gtfs-lib JDBC storage. @@ -15,33 +20,50 @@ * To make this into a web API you need to wrap it in an HTTP framework / server. */ public class GTFSGraphQL { + public static Map run (DataSource dataSource, String query, Map variables) { + // the registry and dataLoaders must be created upon each request to avoid caching loaded data after each request + DataLoaderRegistry registry = new DataLoaderRegistry(); + DataLoader>> jdbcQueryDataLoader = newJdbcDataLoader(); + registry.register("jdbcfetcher", jdbcQueryDataLoader); - private static DataSource dataSource; - - // TODO Is it correct to share one of these objects between many instances? Is it supposed to be long-lived or threadsafe? - // Analysis-backend creates a new GraphQL object on every request. - private static GraphQL GRAPHQL; - - /** Username and password can be null if connecting to a local instance with host-based authentication. */ - public static void initialize (DataSource dataSource) { - GTFSGraphQL.dataSource = dataSource; - GRAPHQL = GraphQL.newGraphQL(GraphQLGtfsSchema.feedBasedSchema) + return GraphQL.newGraphQL(GraphQLGtfsSchema.feedBasedSchema) // this instrumentation implementation will dispatch all the dataloaders // as each level fo the graphql query is executed and hence make batched objects // available to the query and the associated DataFetchers - .instrumentation(new DataLoaderDispatcherInstrumentation(makeRegistry())) - .build(); + .instrumentation(new DataLoaderDispatcherInstrumentation(registry)) + .build() + // we now have a graphQL schema with dataloaders instrumented. Time to make a query + .execute( + ExecutionInput.newExecutionInput() + .context(new GraphQLExecutionContext(jdbcQueryDataLoader, dataSource)) + .query(query) + .variables(variables) + .build() + ) + .toSpecification(); + } + + public static DataLoader>> getJdbcQueryDataLoaderFromContext ( + DataFetchingEnvironment environment + ) { + return ((GraphQLExecutionContext)environment.getContext()).jdbcQueryDataLoader; } - public static Connection getConnection() { - try { - return dataSource.getConnection(); - } catch (SQLException ex) { - throw new RuntimeException(ex); + public static DataSource getDataSourceFromContext (DataFetchingEnvironment environment) { + DataSource dataSource = ((GraphQLExecutionContext)environment.getContext()).dataSource; + if (dataSource == null) { + throw new RuntimeException("DataSource is not defined, unable to make sql query!"); } + return dataSource; } - public static GraphQL getGraphQl () { - return GRAPHQL; + private static class GraphQLExecutionContext { + private final DataLoader>> jdbcQueryDataLoader; + private final DataSource dataSource; + + private GraphQLExecutionContext(DataLoader>> jdbcQueryDataLoader, DataSource dataSource) { + this.jdbcQueryDataLoader = jdbcQueryDataLoader; + this.dataSource = dataSource; + } } } diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java index 44a79d8f0..9744de83b 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java @@ -14,7 +14,6 @@ import graphql.schema.GraphQLScalarType; import graphql.schema.GraphQLSchema; import graphql.schema.GraphQLTypeReference; -import org.dataloader.DataLoaderRegistry; import java.sql.Array; import java.sql.SQLException; @@ -36,7 +35,6 @@ import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.OFFSET_ARG; import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.SEARCH_ARG; import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.TO_ARG; -import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.jdbcDataLoader; import static graphql.Scalars.GraphQLFloat; import static graphql.Scalars.GraphQLInt; import static graphql.Scalars.GraphQLString; @@ -795,13 +793,6 @@ public class GraphQLGtfsSchema { // .mutation(someMutation) .build(); - public static DataLoaderRegistry makeRegistry() { - // DataLoaderRegistry is a place to register all data loaders in that needs to be dispatched together - DataLoaderRegistry registry = new DataLoaderRegistry(); - registry.register("jdbcfetcher", jdbcDataLoader); - return registry; - } - private static class StringCoercing implements Coercing { @Override public Object serialize(Object input) { diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/ErrorCountFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/ErrorCountFetcher.java index 98f43551f..fe3c519e3 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/ErrorCountFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/ErrorCountFetcher.java @@ -8,6 +8,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.sql.DataSource; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; @@ -37,9 +38,10 @@ public Object get(DataFetchingEnvironment environment) { List errorCounts = new ArrayList(); Map parentFeedMap = environment.getSource(); String namespace = (String) parentFeedMap.get("namespace"); + DataSource dataSource = GTFSGraphQL.getDataSourceFromContext(environment); Connection connection = null; try { - connection = GTFSGraphQL.getConnection(); + connection = dataSource.getConnection(); Statement statement = connection.createStatement(); String sql = String.format("select error_type, count(*) from %s.errors group by error_type", namespace); LOG.info("SQL: {}", sql); diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java index 5b5386529..19743bd44 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java @@ -6,6 +6,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.sql.DataSource; import java.sql.Connection; import java.sql.ResultSet; import java.sql.ResultSetMetaData; @@ -27,12 +28,13 @@ public class FeedFetcher implements DataFetcher { @Override public Map get (DataFetchingEnvironment environment) { String namespace = environment.getArgument("namespace"); // This is the unique table prefix (the "schema"). + DataSource dataSource = GTFSGraphQL.getDataSourceFromContext(environment); validateNamespace(namespace); StringBuilder sqlBuilder = new StringBuilder(); sqlBuilder.append(String.format("select * from feeds where namespace = '%s'", namespace)); Connection connection = null; try { - connection = GTFSGraphQL.getConnection(); + connection = dataSource.getConnection(); Statement statement = connection.createStatement(); LOG.info("SQL: {}", sqlBuilder.toString()); if (statement.execute(sqlBuilder.toString())) { diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java index 923c14609..7cfd8d3d9 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java @@ -1,6 +1,5 @@ package com.conveyal.gtfs.graphql.fetchers; -import com.conveyal.gtfs.graphql.GTFSGraphQL; import com.conveyal.gtfs.graphql.GraphQLGtfsSchema; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -9,10 +8,11 @@ import org.apache.commons.dbutils.DbUtils; import org.dataloader.BatchLoaderEnvironment; import org.dataloader.DataLoader; -import org.dataloader.MappedBatchLoader; +import org.dataloader.MappedBatchLoaderWithContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.sql.DataSource; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -32,6 +32,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static com.conveyal.gtfs.graphql.GTFSGraphQL.getDataSourceFromContext; +import static com.conveyal.gtfs.graphql.GTFSGraphQL.getJdbcQueryDataLoaderFromContext; import static com.conveyal.gtfs.graphql.GraphQLUtil.multiStringArg; import static com.conveyal.gtfs.graphql.GraphQLUtil.stringArg; import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; @@ -75,19 +77,35 @@ public class JDBCFetcher implements DataFetcher { private final String sortField; private final boolean autoLimit; - public static final MappedBatchLoader>> sqlBatchLoader = - (jdbcQueries) -> CompletableFuture.supplyAsync(() -> getSqlQueryResults(jdbcQueries)); + /** + * This batched + */ + public static final MappedBatchLoaderWithContext>> sqlBatchLoader = + (jdbcQueries, context) -> CompletableFuture.supplyAsync(() -> getSqlQueryResults(jdbcQueries, context)); - private static Map>> getSqlQueryResults(Set jdbcQueries) { + private static Map>> getSqlQueryResults( + Set jdbcQueries, + BatchLoaderEnvironment context + ) { Map>> allResults = new HashMap<>(); - Connection connection = null; - try { - connection = GTFSGraphQL.getConnection(); - for (JdbcQuery jdbcQuery : jdbcQueries) { - allResults.put(jdbcQuery, getSqlQueryResult(jdbcQuery, connection)); + Map keyContexts = context.getKeyContexts(); + for (JdbcQuery jdbcQuery : jdbcQueries) { + Connection connection = null; + try { + DataSource dataSource = ((DataSource) keyContexts.get(jdbcQuery)); + connection = dataSource.getConnection(); + allResults.put( + jdbcQuery, + getSqlQueryResult( + jdbcQuery, + connection + ) + ); + } catch (SQLException e) { + throw new RuntimeException(e); + } finally { + DbUtils.closeQuietly(connection); } - } finally { - DbUtils.closeQuietly(connection); } return allResults; } @@ -127,8 +145,9 @@ public static List> getSqlQueryResult (JdbcQuery jdbcQuery, } } - public static final DataLoader>> jdbcDataLoader = - DataLoader.newMappedDataLoader(sqlBatchLoader); + public static DataLoader>> newJdbcDataLoader() { + return DataLoader.newMappedDataLoader(sqlBatchLoader); + } /** * Constructor for tables that need neither restriction by a where clause nor sorting based on the enclosing entity. @@ -220,14 +239,6 @@ public Object get (DataFetchingEnvironment environment) { } Map arguments = environment.getArguments(); - return getResults(namespace, inClauseValues, arguments); - } - - Object getResults ( - String namespace, - List inClauseValues, - Map arguments - ) { // We could select only the requested fields by examining environment.getFields(), but we just get them all. // The advantage of selecting * is that we don't need to validate the field names. // All the columns will be loaded into the Map, @@ -235,6 +246,8 @@ Object getResults ( StringBuilder sqlBuilder = new StringBuilder(); sqlBuilder.append("select *"); return getResults( + getJdbcQueryDataLoaderFromContext(environment), + getDataSourceFromContext(environment), namespace, inClauseValues, arguments, @@ -252,6 +265,8 @@ Object getResults ( * the standard get function so that it can be reused in other fetchers (i.e., NestedJdbcFetcher) */ Object getResults ( + DataLoader>> jdbcDataLoader, + DataSource dataSource, String namespace, List inClauseValues, Map graphQLQueryArguments, @@ -366,7 +381,7 @@ Object getResults ( String value = (String) graphQLQueryArguments.get(SEARCH_ARG); if (!value.isEmpty()) { // Only apply string search if string is not empty. - Set searchFields = getSearchFields(namespace); + Set searchFields = getSearchFields(dataSource, namespace); List searchClauses = new ArrayList<>(); for (String field : searchFields) { // Double percent signs format as single percents, which are used for the string matching. @@ -407,11 +422,14 @@ Object getResults ( if (offset != null && offset >= 0) { sqlStatementStringBuilder.append(" offset ").append(offset); } - return jdbcDataLoader.load(new JdbcQuery( - namespace, - preparedStatementParameters, - sqlStatementStringBuilder.toString() - )); + return jdbcDataLoader.load( + new JdbcQuery( + namespace, + preparedStatementParameters, + sqlStatementStringBuilder.toString() + ), + dataSource + ); } private static String getDateArgument(Map arguments) { @@ -447,14 +465,14 @@ public static void validateNamespace(String namespace) { * Get the list of fields to perform a string search on for the provided table. Each table included here must have * the {@link #SEARCH_ARG} argument applied in the query definition. */ - private Set getSearchFields(String namespace) { + private Set getSearchFields(DataSource dataSource, String namespace) { // Check table metadata for presence of columns. // long startTime = System.currentTimeMillis(); Set columnsForTable = new HashSet<>(); List searchColumns = new ArrayList<>(); Connection connection = null; try { - connection = GTFSGraphQL.getConnection(); + connection = dataSource.getConnection(); ResultSet columns = connection.getMetaData().getColumns(null, namespace, tableName, null); while (columns.next()) { String column = columns.getString(4); diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java index 71b9f6c16..a1aad880d 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java @@ -14,6 +14,8 @@ import java.util.Map; import java.util.Set; +import static com.conveyal.gtfs.graphql.GTFSGraphQL.getDataSourceFromContext; +import static com.conveyal.gtfs.graphql.GTFSGraphQL.getJdbcQueryDataLoaderFromContext; import static com.conveyal.gtfs.graphql.GraphQLUtil.multiStringArg; import static com.conveyal.gtfs.graphql.GraphQLUtil.namespacedTableFieldName; import static com.conveyal.gtfs.graphql.GraphQLUtil.namespacedTableName; @@ -124,6 +126,8 @@ public Object get (DataFetchingEnvironment environment) { ); // Make the query and return the results! return fetcher.getResults( + getJdbcQueryDataLoaderFromContext(environment), + getDataSourceFromContext(environment), namespace, new ArrayList<>(), graphQLQueryArguemnts, diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java index a6e12e973..2434405d9 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java @@ -11,6 +11,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.sql.DataSource; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -57,12 +58,13 @@ public RowCountFetcher(String tableName, String filterField, String groupByField public Object get(DataFetchingEnvironment environment) { Map parentFeedMap = environment.getSource(); Map arguments = environment.getArguments(); + DataSource dataSource = GTFSGraphQL.getDataSourceFromContext(environment); List argKeys = new ArrayList<>(arguments.keySet()); List parameters = new ArrayList<>(); String namespace = (String) parentFeedMap.get("namespace"); Connection connection = null; try { - connection = GTFSGraphQL.getConnection(); + connection = dataSource.getConnection(); List fields = new ArrayList<>(); fields.add("count(*)"); List clauses = new ArrayList<>(); diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java index cfab0f775..1c1ebdd62 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java @@ -7,7 +7,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.sql.DataSource; import java.sql.Connection; +import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -50,6 +52,7 @@ public List get (DataFetchingEnvironment environment) { Map parentEntityMap = environment.getSource(); String namespace = (String) parentEntityMap.get("namespace"); validateNamespace(namespace); + DataSource dataSource = GTFSGraphQL.getDataSourceFromContext(environment); // get id to filter by String filterValue = (String) parentEntityMap.get(jdbcFetcher.parentJoinField); @@ -68,7 +71,7 @@ public List get (DataFetchingEnvironment environment) { List results = new ArrayList<>(); Connection connection = null; try { - connection = GTFSGraphQL.getConnection(); + connection = dataSource.getConnection(); for (Map row : jdbcFetcher.getSqlQueryResult( new JDBCFetcher.JdbcQuery( namespace, @@ -79,6 +82,8 @@ public List get (DataFetchingEnvironment environment) { )) { results.add((T) row.get(columnName)); } + } catch (SQLException e) { + throw new RuntimeException(e); } finally { DbUtils.closeQuietly(connection); } diff --git a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java index c5c7cc87a..34dfab8b1 100644 --- a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java +++ b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java @@ -3,11 +3,12 @@ import com.conveyal.gtfs.TestUtils; import com.conveyal.gtfs.loader.FeedLoadResult; import com.fasterxml.jackson.databind.ObjectMapper; -import graphql.GraphQL; import org.apache.commons.io.IOUtils; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import javax.sql.DataSource; import java.io.FileInputStream; @@ -32,6 +33,8 @@ * in cases where the order of items in a list is not important. */ public class GTFSGraphQLTest { + public static final Logger LOG = LoggerFactory.getLogger(GTFSGraphQLTest.class); + private static String testDBName; private static DataSource testDataSource; private static String testNamespace; @@ -74,13 +77,6 @@ public static void tearDownClass() { TestUtils.dropDB(testInjectionDBName); } - // tests that the graphQL schema can initialize - @Test(timeout=5000) - public void canInitialize() { - GTFSGraphQL.initialize(testDataSource); - GraphQL graphQL = GTFSGraphQL.getGraphQl(); - } - // tests that the root element of a feed can be fetched @Test(timeout=5000) public void canFetchFeed() throws IOException { @@ -181,6 +177,12 @@ public void canFetchNestedEntityWithLimit() throws IOException { assertThat(queryGraphQL("feedStopsStopTimeLimit.txt"), matchesSnapshot()); } + // tests that the limit argument applies properly to a fetcher defined with autolimit set to false + @Test(timeout=5000) + public void canFetchMultiNestedEntities() throws IOException { + assertThat(queryGraphQL("superNested.txt"), matchesSnapshot()); + } + /** * attempt to fetch more than one record with SQL injection as inputs * the graphql library should properly escape the string and return 0 results for stops @@ -248,15 +250,13 @@ private Map queryGraphQL( Map variables, DataSource dataSource ) throws IOException { - GTFSGraphQL.initialize(dataSource); - FileInputStream inputStream = new FileInputStream( - getResourceFileName(String.format("graphql/%s", queryFilename)) - ); - return GTFSGraphQL.getGraphQl().execute( - IOUtils.toString(inputStream), - null, - null, + LOG.info("Query GraphQL with query: " + queryFilename); + return GTFSGraphQL.run( + dataSource, + IOUtils.toString( + new FileInputStream(getResourceFileName(String.format("graphql/%s", queryFilename))) + ), variables - ).toSpecification(); + ); } } diff --git a/src/test/resources/graphql/superNested.txt b/src/test/resources/graphql/superNested.txt new file mode 100644 index 000000000..3d2ea056b --- /dev/null +++ b/src/test/resources/graphql/superNested.txt @@ -0,0 +1,23 @@ +query ($namespace: String) { + feed(namespace: $namespace) { + feed_version + routes { + route_id + stops { + routes { + route_id + stops { + routes { + route_id + stops { + stop_id + } + } + stop_id + } + } + stop_id + } + } + } +} \ No newline at end of file diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntities.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntities.json new file mode 100644 index 000000000..97f3b64aa --- /dev/null +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntities.json @@ -0,0 +1,63 @@ +{ + "data" : { + "feed" : { + "feed_version" : "1.0", + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ] + } + } +} \ No newline at end of file From 2262cd105eaa3b829d1c9b83d4812b4e0a489efc Mon Sep 17 00:00:00 2001 From: evansiroky Date: Mon, 1 Oct 2018 16:38:47 -0700 Subject: [PATCH 06/11] refactor(graphql): improve some code comments --- .../conveyal/gtfs/graphql/GTFSGraphQL.java | 34 ++++++++++--- .../conveyal/gtfs/graphql/GraphQLUtil.java | 6 +++ .../gtfs/graphql/fetchers/JDBCFetcher.java | 50 ++++++++++--------- .../graphql/fetchers/NestedJDBCFetcher.java | 13 +++-- 4 files changed, 68 insertions(+), 35 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java index 32ff084b6..62330b8b5 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java @@ -14,12 +14,17 @@ import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.newJdbcDataLoader; -/** - * This provides a GraphQL API around the gtfs-lib JDBC storage. - * This just makes a Java API with the right schema available, which uses String requests and responses. - * To make this into a web API you need to wrap it in an HTTP framework / server. - */ public class GTFSGraphQL { + + /** + * Execute a graphQL request. This method creates a new graphQL runner that is able to cache and batch sql queries + * during the course of obtaining the data according to the graphQL query. + * + * @param dataSource The dataSource to use in connecting to a database to make sql queries. + * @param query The graphQL query + * @param variables The variables to apply to the graphQL query + * @return + */ public static Map run (DataSource dataSource, String query, Map variables) { // the registry and dataLoaders must be created upon each request to avoid caching loaded data after each request DataLoaderRegistry registry = new DataLoaderRegistry(); @@ -27,13 +32,16 @@ public static Map run (DataSource dataSource, String query, Map< registry.register("jdbcfetcher", jdbcQueryDataLoader); return GraphQL.newGraphQL(GraphQLGtfsSchema.feedBasedSchema) - // this instrumentation implementation will dispatch all the dataloaders - // as each level fo the graphql query is executed and hence make batched objects - // available to the query and the associated DataFetchers + // this instrumentation implementation will dispatch all the dataloaders as each level fo the graphql query + // is executed and hence make batched objects available to the query and the associated DataFetchers. This + // must be created new each time to avoid caching load results in future requests. .instrumentation(new DataLoaderDispatcherInstrumentation(registry)) .build() // we now have a graphQL schema with dataloaders instrumented. Time to make a query .execute( + // Build the execution input giving an environment context, query and variables. The environment + // context is used primarily in the JDBCFetcher to get the current loader and dataSource from which + // the sql queries can be executed. ExecutionInput.newExecutionInput() .context(new GraphQLExecutionContext(jdbcQueryDataLoader, dataSource)) .query(query) @@ -43,12 +51,19 @@ public static Map run (DataSource dataSource, String query, Map< .toSpecification(); } + /** + * Helper to obtain the jdbcQueryLoader. It is critical that the same dataloader in the DataloaderRegistry / + * DataLoaderDispatcherInstrumentation be used otherwise the queries will never be able to be dispatched. + */ public static DataLoader>> getJdbcQueryDataLoaderFromContext ( DataFetchingEnvironment environment ) { return ((GraphQLExecutionContext)environment.getContext()).jdbcQueryDataLoader; } + /** + * Helper method to return the DataSource from the DataFetchingEnvironment + */ public static DataSource getDataSourceFromContext (DataFetchingEnvironment environment) { DataSource dataSource = ((GraphQLExecutionContext)environment.getContext()).dataSource; if (dataSource == null) { @@ -57,6 +72,9 @@ public static DataSource getDataSourceFromContext (DataFetchingEnvironment envir return dataSource; } + /** + * The object used as the DataFetchingEnvironment context in graphQL queries. + */ private static class GraphQLExecutionContext { private final DataLoader>> jdbcQueryDataLoader; private final DataSource dataSource; diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java index fb342fcfb..9d45fa3b7 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java @@ -57,10 +57,16 @@ public static GraphQLArgument floatArg (String name) { .build(); } + /** + * Helper method to get the namespaced table name + */ public static String namespacedTableName(String namespace, String tableName) { return String.format("%s.%s", namespace, tableName); } + /** + * Helper method to get the string of the column name with the namespace and table name + */ public static String namespacedTableFieldName(String namespace, String tableName, String tableField) { return String.format("%s.%s.%s", namespace, tableName, tableField); } diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java index 7cfd8d3d9..c58a7ce49 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java @@ -8,7 +8,6 @@ import org.apache.commons.dbutils.DbUtils; import org.dataloader.BatchLoaderEnvironment; import org.dataloader.DataLoader; -import org.dataloader.MappedBatchLoaderWithContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,11 +77,17 @@ public class JDBCFetcher implements DataFetcher { private final boolean autoLimit; /** - * This batched + * Get a new MappedBatchLoaderWithContext batch loader that is able to batch multiple queries and cache the results. */ - public static final MappedBatchLoaderWithContext>> sqlBatchLoader = - (jdbcQueries, context) -> CompletableFuture.supplyAsync(() -> getSqlQueryResults(jdbcQueries, context)); + public static DataLoader>> newJdbcDataLoader() { + return DataLoader.newMappedDataLoader( + (jdbcQueries, context) -> CompletableFuture.supplyAsync(() -> getSqlQueryResults(jdbcQueries, context)) + ); + } + /** + * Method where batched sql queries are all ran at once. + */ private static Map>> getSqlQueryResults( Set jdbcQueries, BatchLoaderEnvironment context @@ -92,6 +97,7 @@ private static Map>> getSqlQueryResults( for (JdbcQuery jdbcQuery : jdbcQueries) { Connection connection = null; try { + // get the dataSource from the context of this specific jdbcQuery DataSource dataSource = ((DataSource) keyContexts.get(jdbcQuery)); connection = dataSource.getConnection(); allResults.put( @@ -110,6 +116,12 @@ private static Map>> getSqlQueryResults( return allResults; } + /** + * Helper method to actually execute some sql. + * + * @param jdbcQuery The query to execute. Object includes namespace, statement, and preparedStatementParameters + * @param connection The database connection to use + */ public static List> getSqlQueryResult (JdbcQuery jdbcQuery, Connection connection) { try { List> results = new ArrayList<>(); @@ -145,10 +157,6 @@ public static List> getSqlQueryResult (JdbcQuery jdbcQuery, } } - public static DataLoader>> newJdbcDataLoader() { - return DataLoader.newMappedDataLoader(sqlBatchLoader); - } - /** * Constructor for tables that need neither restriction by a where clause nor sorting based on the enclosing entity. * These would typically be at the topmost level, directly inside a feed rather than nested in some GTFS entity type. @@ -194,20 +202,9 @@ public static GraphQLFieldDefinition field (String tableName) { .build(); } - // Horrifically, we're going from SQL response to Gtfs-lib Java model object to GraphQL Java object to JSON. - // What if we did direct SQL->JSON? - // Could we transform JDBC ResultSets directly to JSON? - // With Jackson streaming API we can make a ResultSet serializer: https://stackoverflow.com/a/8120442 - - // We could apply a transformation from ResultSet to Gtfs-lib model object, but then more DataFetchers - // need to be defined to pull the fields out of those model objects. I'll try to skip those intermediate objects. - - // Unfortunately we can't just apply DataFetchers directly to the ResultSets because they're cursors, and we'd - // have to somehow advance them at the right moment. So we need to transform the SQL results into fully materialized - // Java objects, then transform those into GraphQL fields. Fortunately the final transformation is trivial fetching - // from a Map. - // But what are the internal GraphQL objects, i.e. what does an ExecutionResult return? Are they Map? - + /** + * get some data by fetching data from the database. + */ @Override public Object get (DataFetchingEnvironment environment) { // GetSource is the context in which this this DataFetcher has been created, in this case a map representing @@ -262,7 +259,11 @@ public Object get (DataFetchingEnvironment environment) { /** * Handle fetching functionality for a given namespace, set of join values, and arguments. This is broken out from - * the standard get function so that it can be reused in other fetchers (i.e., NestedJdbcFetcher) + * the standard get function so that it can be reused in other fetchers (i.e., NestedJdbcFetcher). This method + * ultimately returns a CompletableFuture result by calling the load method on the jdbcDataLoader argument. + * Therefore, this can't be directly used to get the results of a sql query. Instead it is intended to be used only + * by DataFetchers that get their data by making a sql query and get their results in the format of + * List>. */ Object getResults ( DataLoader>> jdbcDataLoader, @@ -526,6 +527,9 @@ static String makeInClause(String filterField, List strings, List preparedStatementParameters; diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java index a1aad880d..44c65b61c 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java @@ -24,10 +24,10 @@ import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; /** - * This fetcher uses nested calls to the general JDBCFetcher for SQL data. It uses the parent entity and a series of - * joins to leap frog from one entity to another more distantly-related entity. For example, if we want to know the - * routes that serve a specific stop, starting with a top-level stop type, we can nest joins from stop ABC -> pattern - * stops -> patterns -> routes (see below example implementation for more details). + * This fetcher creates one big query by joining tables in the order specified in the initial definition. The data + * returned will only consist of the columns from the final table definition. All other tables are assumed to be + * intermediate tables used to create joins to obtain filtered data from the final table. Also, any grqphQL arguments + * are applied only to the final table. */ public class NestedJDBCFetcher implements DataFetcher { @@ -63,6 +63,11 @@ public static GraphQLFieldDefinition field (String tableName) { .build(); } + /** + * get the data for the nested query by making a sql query. The sql query is constructed by adding tables to be + * queried and joining them via where clauses. This ultimately returns a CompleteableFuture value that is + * executed by graphQL at some strategic point in time. + */ @Override public Object get (DataFetchingEnvironment environment) { // GetSource is the context in which this this DataFetcher has been created, in this case a map representing From 5974df05b1f05816d52f4d6c445acae6ab623c23 Mon Sep 17 00:00:00 2001 From: evansiroky Date: Tue, 2 Oct 2018 11:18:46 -0700 Subject: [PATCH 07/11] refactor(graphql): add a static variable of the introspectedSchema --- src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java index 62330b8b5..e062217d3 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java @@ -4,6 +4,7 @@ import graphql.ExecutionInput; import graphql.GraphQL; import graphql.execution.instrumentation.dataloader.DataLoaderDispatcherInstrumentation; +import graphql.introspection.IntrospectionQuery; import graphql.schema.DataFetchingEnvironment; import org.dataloader.DataLoader; import org.dataloader.DataLoaderRegistry; @@ -51,6 +52,11 @@ public static Map run (DataSource dataSource, String query, Map< .toSpecification(); } + public static final Map introspectedSchema = GraphQL.newGraphQL(GraphQLGtfsSchema.feedBasedSchema) + .build() + .execute(IntrospectionQuery.INTROSPECTION_QUERY) + .toSpecification(); + /** * Helper to obtain the jdbcQueryLoader. It is critical that the same dataloader in the DataloaderRegistry / * DataLoaderDispatcherInstrumentation be used otherwise the queries will never be able to be dispatched. From 92c34f88812fa2ce1ec8ae481656940e173b0ab4 Mon Sep 17 00:00:00 2001 From: evansiroky Date: Mon, 8 Oct 2018 11:44:41 -0700 Subject: [PATCH 08/11] refactor(graphql): use exact return types for jdbqueries --- .../com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java | 6 +++--- .../conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java index c58a7ce49..eeae1ad25 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java @@ -206,7 +206,7 @@ public static GraphQLFieldDefinition field (String tableName) { * get some data by fetching data from the database. */ @Override - public Object get (DataFetchingEnvironment environment) { + public CompletableFuture>> get (DataFetchingEnvironment environment) { // GetSource is the context in which this this DataFetcher has been created, in this case a map representing // the parent feed (FeedFetcher). Map parentEntityMap = environment.getSource(); @@ -231,7 +231,7 @@ public Object get (DataFetchingEnvironment environment) { String inClauseString = inClauseValue == null ? null : inClauseValue.toString(); inClauseValues.add(inClauseString); if (inClauseValue == null) { - return new ArrayList<>(); + return new CompletableFuture<>(); } } Map arguments = environment.getArguments(); @@ -265,7 +265,7 @@ public Object get (DataFetchingEnvironment environment) { * by DataFetchers that get their data by making a sql query and get their results in the format of * List>. */ - Object getResults ( + CompletableFuture>> getResults ( DataLoader>> jdbcDataLoader, DataSource dataSource, String namespace, diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java index 44c65b61c..3ad40fcc5 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java @@ -13,6 +13,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CompletableFuture; import static com.conveyal.gtfs.graphql.GTFSGraphQL.getDataSourceFromContext; import static com.conveyal.gtfs.graphql.GTFSGraphQL.getJdbcQueryDataLoaderFromContext; @@ -69,7 +70,7 @@ public static GraphQLFieldDefinition field (String tableName) { * executed by graphQL at some strategic point in time. */ @Override - public Object get (DataFetchingEnvironment environment) { + public CompletableFuture>> get (DataFetchingEnvironment environment) { // GetSource is the context in which this this DataFetcher has been created, in this case a map representing // the parent feed (FeedFetcher). Map parentEntityMap = environment.getSource(); @@ -100,7 +101,7 @@ public Object get (DataFetchingEnvironment environment) { String inClauseValue = (String) enclosingEntity.get(fetcher.parentJoinField); // Check for null parentJoinValue to protect against NPE. if (inClauseValue == null) { - return new ArrayList<>(); + return new CompletableFuture<>(); } else { inClauseValues.add(inClauseValue); } @@ -147,6 +148,6 @@ public Object get (DataFetchingEnvironment environment) { } // This piece of code will never be reached because of how things get returned above. // But it's here to make java happy. - return new ArrayList<>(); + return new CompletableFuture<>(); } } From dd3767f5d027c980b491c0629c62d5ba24ba6e77 Mon Sep 17 00:00:00 2001 From: evansiroky Date: Mon, 15 Oct 2018 11:08:36 -0700 Subject: [PATCH 09/11] refactor(jdbcFetchers): simplify getResults method and add comments Addresses some PR review comments in https://github.com/conveyal/gtfs-lib/pull/144 --- .../gtfs/graphql/fetchers/JDBCFetcher.java | 81 ++++++++++--------- .../graphql/fetchers/NestedJDBCFetcher.java | 43 ++++------ 2 files changed, 58 insertions(+), 66 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java index eeae1ad25..174d67f6c 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java @@ -207,22 +207,11 @@ public static GraphQLFieldDefinition field (String tableName) { */ @Override public CompletableFuture>> get (DataFetchingEnvironment environment) { - // GetSource is the context in which this this DataFetcher has been created, in this case a map representing - // the parent feed (FeedFetcher). - Map parentEntityMap = environment.getSource(); - - // Apparently you can't get the arguments from the parent - how do you have arguments on sub-fields? - // It looks like you have to redefine the argument on each subfield and pass it explicitly in the GraphQL request. - - // String namespace = environment.getArgument("namespace"); // This is going to be the unique prefix, not the feedId in the usual sense - // This DataFetcher only makes sense when the enclosing parent object is a feed or something in a feed. - // So it should always be represented as a map with a namespace key. - - String namespace = (String) parentEntityMap.get("namespace"); - // If we are fetching an item nested within a GTFS entity in the GraphQL query, we want to add an SQL "where" // clause using the values found here. Note, these are used in the below getResults call. List inClauseValues = new ArrayList<>(); + List whereConditions = new ArrayList<>(); + List preparedStatementParameters = new ArrayList<>(); if (parentJoinField != null) { Map enclosingEntity = environment.getSource(); // FIXME: THIS IS BROKEN if parentJoinValue is null!!!! @@ -233,8 +222,8 @@ public CompletableFuture>> get (DataFetchingEnvironment if (inClauseValue == null) { return new CompletableFuture<>(); } + whereConditions.add(makeInClause(parentJoinField, inClauseValues, preparedStatementParameters)); } - Map arguments = environment.getArguments(); // We could select only the requested fields by examining environment.getFields(), but we just get them all. // The advantage of selecting * is that we don't need to validate the field names. @@ -243,17 +232,11 @@ public CompletableFuture>> get (DataFetchingEnvironment StringBuilder sqlBuilder = new StringBuilder(); sqlBuilder.append("select *"); return getResults( - getJdbcQueryDataLoaderFromContext(environment), - getDataSourceFromContext(environment), - namespace, - inClauseValues, - arguments, - // No additional prepared sqlStatement preparedStatementParameters, where conditions or tables are needed beyond those added by - // default in the next method. - new ArrayList<>(), - new ArrayList<>(), - new HashSet<>(), - sqlBuilder + environment, + sqlBuilder, + new HashSet<>(), // No additional tables are needed beyond those added by default in the next method. + whereConditions, + preparedStatementParameters ); } @@ -264,18 +247,39 @@ public CompletableFuture>> get (DataFetchingEnvironment * Therefore, this can't be directly used to get the results of a sql query. Instead it is intended to be used only * by DataFetchers that get their data by making a sql query and get their results in the format of * List>. + * + * @param environment The datafetching environement of the current GraphQL entity being fetched. + * @param sqlStatementStringBuilder The beginnings of the sql statement string. The NestedJdbcFetcher must select + * only from the final table, so this argument is needed. + * @param fromTables The tables to select from. + * @param whereConditions A list of where conditions to apply to the query. + * @param preparedStatementParameters A list of prepared statement parameters to be sent to the prepared statement + * once the query is ran. */ CompletableFuture>> getResults ( - DataLoader>> jdbcDataLoader, - DataSource dataSource, - String namespace, - List inClauseValues, - Map graphQLQueryArguments, - List preparedStatementParameters, - List whereConditions, + DataFetchingEnvironment environment, + StringBuilder sqlStatementStringBuilder, Set fromTables, - StringBuilder sqlStatementStringBuilder + List whereConditions, + List preparedStatementParameters ) { + // GetSource is the context in which this this DataFetcher has been created, in this case a map representing + // the parent feed (FeedFetcher). + Map parentEntityMap = environment.getSource(); + + // This DataFetcher only makes sense when the enclosing parent object is a feed or something in a feed. + // So it should always be represented as a map with a namespace key. + String namespace = (String) parentEntityMap.get("namespace"); + + // these can be additional arguments to apply to the query such as a limit on the amount of results + Map graphQLQueryArguments = environment.getArguments(); + + // Use the datasource defined in theDataFetching environment to query the database. + DataSource dataSource = getDataSourceFromContext(environment); + + // Make sure to get the dataloader defined in the environment to avoid caching sql results in future requests. + DataLoader>> jdbcDataLoader = getJdbcQueryDataLoaderFromContext(environment); + // This will contain one Map for each row fetched from the database table. List> results = new ArrayList<>(); if (graphQLQueryArguments == null) graphQLQueryArguments = new HashMap<>(); @@ -288,12 +292,6 @@ CompletableFuture>> getResults ( // The order by clause will go here. String sortBy = ""; - // If we are fetching an item nested within a GTFS entity in the GraphQL query, we want to add an SQL "where" - // clause. This could conceivably be done automatically, but it's clearer to just express the intent. - // Note, this is assuming the type of the field in the parent is a string. - if (parentJoinField != null && inClauseValues != null && !inClauseValues.isEmpty()) { - whereConditions.add(makeInClause(parentJoinField, inClauseValues, preparedStatementParameters)); - } if (sortField != null) { // Sort field is not provided by user input, so it's ok to add here (i.e., it's not prone to SQL injection). sortBy = String.format(" order by %s", sortField); @@ -528,7 +526,10 @@ static String makeInClause(String filterField, List strings, List>> get (DataFetchingEnvironment // So it should always be represented as a map with a namespace key. String namespace = (String) parentEntityMap.get("namespace"); - // Arguments are only applied for the final fetcher iteration. - // FIXME: NestedJDBCFetcher may need to be refactored so that it avoids conventions of JDBCFetcher (like the - // implied limit of 50 records). For now, the autoLimit field has been added to JDBCFetcher, so that certain - // fetchers (like the nested ones used solely for joins here) will not apply the limit by default. - Map graphQLQueryArguemnts = environment.getArguments();; - + // the lastFetcher is used to keep track of the last JdbcFetcher seen. This is used to create join conditions + // and eventually execute the query. JDBCFetcher lastFetcher = null; - List preparedStatementParameters = new ArrayList<>(); + + // The statement needs to be built to specific a namespaced table name in order to select only the fields from + // the last fetcher + StringBuilder sqlStatementStringBuilder = new StringBuilder(); Set fromTables = new HashSet<>(); List whereConditions = new ArrayList<>(); + List preparedStatementParameters = new ArrayList<>(); for (int i = 0; i < jdbcFetchers.length; i++) { JDBCFetcher fetcher = jdbcFetchers[i]; String tableName = namespacedTableName(namespace, fetcher.tableName); @@ -124,30 +122,23 @@ public CompletableFuture>> get (DataFetchingEnvironment // check if we have reached the final nested table if (i == jdbcFetchers.length - 1) { // final nested table reached! - // create a sqlBuilder to select all values from the final table - StringBuilder sqlStatementStringBuilder = new StringBuilder(); + // Add the namespaced select * to the sqlStatementStringBuilder to select all values from only the + // final table. sqlStatementStringBuilder.append("select "); sqlStatementStringBuilder.append( namespacedTableFieldName(namespace, fetcher.tableName, "*") ); - // Make the query and return the results! - return fetcher.getResults( - getJdbcQueryDataLoaderFromContext(environment), - getDataSourceFromContext(environment), - namespace, - new ArrayList<>(), - graphQLQueryArguemnts, - preparedStatementParameters, - whereConditions, - fromTables, - sqlStatementStringBuilder - ); } } lastFetcher = fetcher; } - // This piece of code will never be reached because of how things get returned above. - // But it's here to make java happy. - return new CompletableFuture<>(); + // Make the query and return the results! + return lastFetcher.getResults( + environment, + sqlStatementStringBuilder, + fromTables, + whereConditions, + preparedStatementParameters + ); } } From 89651f7e0bf224d29de706a9fe38d0670041a997 Mon Sep 17 00:00:00 2001 From: evansiroky Date: Tue, 16 Oct 2018 22:47:57 -0700 Subject: [PATCH 10/11] docs(graphql): add more comments --- .../conveyal/gtfs/graphql/GTFSGraphQL.java | 25 +++++++++++++------ .../gtfs/graphql/fetchers/JDBCFetcher.java | 4 +-- .../graphql/fetchers/NestedJDBCFetcher.java | 18 ++++++------- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java index e062217d3..bacd6951c 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java @@ -18,13 +18,19 @@ public class GTFSGraphQL { /** - * Execute a graphQL request. This method creates a new graphQL runner that is able to cache and batch sql queries - * during the course of obtaining the data according to the graphQL query. + * Execute a graphQL request. This method creates and executes a new graphQL runner that is able to cache and batch + * sql queries during the course of obtaining the data according to the graphQL query. The resulting output is a + * map of objects. This map is typically converted to JSON or some other kind of output via a downstream + * application using this method. + * + * A DataLoader (https://github.com/graphql-java/java-dataloader) handles The batching and caching of requests. + * java-dataloader requires manual dispatching in order to actually execute the code to fetch data. However, there + * is a helper class called DataLoaderDispatcherInstrumentation that automatically dispatches dataloaders. This was + * setup according to this guide: https://www.graphql-java.com/documentation/v10/batching/ * * @param dataSource The dataSource to use in connecting to a database to make sql queries. * @param query The graphQL query * @param variables The variables to apply to the graphQL query - * @return */ public static Map run (DataSource dataSource, String query, Map variables) { // the registry and dataLoaders must be created upon each request to avoid caching loaded data after each request @@ -33,9 +39,12 @@ public static Map run (DataSource dataSource, String query, Map< registry.register("jdbcfetcher", jdbcQueryDataLoader); return GraphQL.newGraphQL(GraphQLGtfsSchema.feedBasedSchema) - // this instrumentation implementation will dispatch all the dataloaders as each level fo the graphql query - // is executed and hence make batched objects available to the query and the associated DataFetchers. This - // must be created new each time to avoid caching load results in future requests. + // This instrumentation implementation will dispatch all the dataloaders as each level of the graphql query + // is executed and hence make batched objects available to the query and the associated DataFetchers. If + // the dataloader was not recreated on each request and reused between different graphql queries, the + // dataloader could end up returning cached data for the result of a sql query even if the actual data in + // the database had changed. Therefore, the dataloader and registry must be created new each time to avoid + // caching sql query results in future requests. .instrumentation(new DataLoaderDispatcherInstrumentation(registry)) .build() // we now have a graphQL schema with dataloaders instrumented. Time to make a query @@ -68,7 +77,9 @@ public static DataLoader>> getJd } /** - * Helper method to return the DataSource from the DataFetchingEnvironment + * Helper method to extract the DataSource from the DataFetchingEnvironment. Whenever we a sql query needs to be + * made, this method will have to be called to make sure the same dataSource is being used within the same graphQL + * query. */ public static DataSource getDataSourceFromContext (DataFetchingEnvironment environment) { DataSource dataSource = ((GraphQLExecutionContext)environment.getContext()).dataSource; diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java index 174d67f6c..9d3fe0274 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java @@ -40,7 +40,7 @@ /** * A generic fetcher to get fields out of an SQL database table. */ -public class JDBCFetcher implements DataFetcher { +public class JDBCFetcher implements DataFetcher>>> { public static final Logger LOG = LoggerFactory.getLogger(JDBCFetcher.class); @@ -529,7 +529,7 @@ static String makeInClause(String filterField, List strings, List { +public class NestedJDBCFetcher implements DataFetcher>>> { private static final Logger LOG = LoggerFactory.getLogger(NestedJDBCFetcher.class); private final JDBCFetcher[] jdbcFetchers; @@ -77,9 +77,9 @@ public CompletableFuture>> get (DataFetchingEnvironment // So it should always be represented as a map with a namespace key. String namespace = (String) parentEntityMap.get("namespace"); - // the lastFetcher is used to keep track of the last JdbcFetcher seen. This is used to create join conditions + // the previousFetcher is used to keep track of the last JdbcFetcher seen. This is used to create join conditions // and eventually execute the query. - JDBCFetcher lastFetcher = null; + JDBCFetcher previousFetcher = null; // The statement needs to be built to specific a namespaced table name in order to select only the fields from // the last fetcher @@ -116,7 +116,7 @@ public CompletableFuture>> get (DataFetchingEnvironment fromTables.add(tableName); whereConditions.add(String.format( "%s = %s", - namespacedTableFieldName(namespace, lastFetcher.tableName, fetcher.parentJoinField), + namespacedTableFieldName(namespace, previousFetcher.tableName, fetcher.parentJoinField), namespacedTableFieldName(namespace, fetcher.tableName, fetcher.parentJoinField) )); // check if we have reached the final nested table @@ -130,10 +130,10 @@ public CompletableFuture>> get (DataFetchingEnvironment ); } } - lastFetcher = fetcher; + previousFetcher = fetcher; } // Make the query and return the results! - return lastFetcher.getResults( + return previousFetcher.getResults( environment, sqlStatementStringBuilder, fromTables, From 5a7c52953c69bbd83221b0347e9c34a711b3bf8b Mon Sep 17 00:00:00 2001 From: evansiroky Date: Thu, 18 Oct 2018 00:47:55 -0700 Subject: [PATCH 11/11] perf(graphql): combine similar sql queries into single queries --- .../gtfs/graphql/GraphQLGtfsSchema.java | 1 + .../gtfs/graphql/fetchers/JDBCFetcher.java | 463 ++++++++++++++---- .../graphql/fetchers/NestedJDBCFetcher.java | 2 +- .../graphql/fetchers/SQLColumnFetcher.java | 13 +- .../gtfs/graphql/GTFSGraphQLTest.java | 11 +- .../resources/graphql/superNestedNoLimits.txt | 23 + ...FetchMultiNestedEntitiesWithoutLimits.json | 63 +++ 7 files changed, 472 insertions(+), 104 deletions(-) create mode 100644 src/test/resources/graphql/superNestedNoLimits.txt create mode 100644 src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntitiesWithoutLimits.json diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java index 9744de83b..0986ae469 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java @@ -357,6 +357,7 @@ public class GraphQLGtfsSchema { .type(new GraphQLList(routeType)) .argument(stringArg("namespace")) .argument(stringArg(SEARCH_ARG)) + .argument(intArg(LIMIT_ARG)) .dataFetcher(new NestedJDBCFetcher( new JDBCFetcher("pattern_stops", "stop_id", null, false), new JDBCFetcher("patterns", "pattern_id", null, false), diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java index 9d3fe0274..e85b5275f 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java @@ -28,6 +28,8 @@ import java.util.Objects; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -86,75 +88,103 @@ public static DataLoader>> newJdbcDataLoader } /** - * Method where batched sql queries are all ran at once. + * Method where batched sql queries are to be run. This method first iterates through all queries to see if there + * are ran any possibilities for combining multiple similar queries into one query where the results can be + * extracted and returned to the appropriate jdbcQuery. + * + * For now, only certain kinds of queries can be combined. See the CombinedQuery class for more details. */ private static Map>> getSqlQueryResults( Set jdbcQueries, BatchLoaderEnvironment context ) { + // create output map for all results Map>> allResults = new HashMap<>(); + + // get all key contexts Map keyContexts = context.getKeyContexts(); + + // prepare a list of sql queries that will actually get executed + List combinedQueries = new ArrayList<>(); + + // iterate through all batched jdbcQueries and combine combineable queries for (JdbcQuery jdbcQuery : jdbcQueries) { - Connection connection = null; - try { - // get the dataSource from the context of this specific jdbcQuery - DataSource dataSource = ((DataSource) keyContexts.get(jdbcQuery)); - connection = dataSource.getConnection(); - allResults.put( - jdbcQuery, - getSqlQueryResult( - jdbcQuery, - connection - ) - ); - } catch (SQLException e) { - throw new RuntimeException(e); - } finally { - DbUtils.closeQuietly(connection); + // get the dataSource from the context of this specific jdbcQuery + DataSource dataSource = ((DataSource) keyContexts.get(jdbcQuery)); + + // add first query to list + if (combinedQueries.isEmpty()) { + combinedQueries.add(new CombinedQuery(jdbcQuery, dataSource)); + continue; + } + + // check if subsequent queries can be merged with other queries + boolean foundOtherQueryToCombineWith = false; + for (CombinedQuery combinedQuery : combinedQueries) { + if (combinedQuery.combineWith(jdbcQuery)) { + foundOtherQueryToCombineWith = true; + break; + } + } + + // wasn't able to merge with another query, so add as another query to execute + if (!foundOtherQueryToCombineWith) { + combinedQueries.add(new CombinedQuery(jdbcQuery, dataSource)); } } + + // execute each combineable query and add to the results + for (CombinedQuery combinedQuery : combinedQueries) { + combinedQuery.executeAndAddToResults(allResults); + } + return allResults; } /** * Helper method to actually execute some sql. * - * @param jdbcQuery The query to execute. Object includes namespace, statement, and preparedStatementParameters * @param connection The database connection to use + * @param namespace The database namespace that will be included in the output + * @param statement The sql statement to be executed + * @param preparedStatementParameters A list of parameters to apply to the prepared statement. Assumes that all + * parameters are strings. + * @throws SQLException */ - public static List> getSqlQueryResult (JdbcQuery jdbcQuery, Connection connection) { - try { - List> results = new ArrayList<>(); - PreparedStatement preparedStatement = connection.prepareStatement(jdbcQuery.sqlStatement); - int oneBasedIndex = 1; - for (String parameter : jdbcQuery.preparedStatementParameters) { - preparedStatement.setString(oneBasedIndex++, parameter); - } - // This logging produces a lot of noise during testing due to large numbers of joined sub-queries - // LOG.info("table name={}", tableName); - LOG.info("SQL: {}", preparedStatement.toString()); - if (preparedStatement.execute()) { - ResultSet resultSet = preparedStatement.getResultSet(); - ResultSetMetaData meta = resultSet.getMetaData(); - int nColumns = meta.getColumnCount(); - // Iterate over result rows - while (resultSet.next()) { - // Create a Map to hold the contents of this row, injecting the sql schema namespace into every map - Map resultMap = new HashMap<>(); - resultMap.put("namespace", jdbcQuery.namespace); - // One-based iteration: start at one and use <=. - for (int i = 1; i <= nColumns; i++) { - resultMap.put(meta.getColumnName(i), resultSet.getObject(i)); - } - results.add(resultMap); + public static List> getSqlQueryResult ( + Connection connection, + String namespace, + String statement, + List preparedStatementParameters + ) throws SQLException { + List> results = new ArrayList<>(); + PreparedStatement preparedStatement = connection.prepareStatement(statement); + int oneBasedIndex = 1; + for (String parameter : preparedStatementParameters) { + preparedStatement.setString(oneBasedIndex++, parameter); + } + // This logging produces a lot of noise during testing due to large numbers of joined sub-queries + // LOG.info("table name={}", tableName); + LOG.info("SQL: {}", preparedStatement.toString()); + if (preparedStatement.execute()) { + ResultSet resultSet = preparedStatement.getResultSet(); + ResultSetMetaData meta = resultSet.getMetaData(); + int nColumns = meta.getColumnCount(); + // Iterate over result rows + while (resultSet.next()) { + // Create a Map to hold the contents of this row, injecting the sql schema namespace into every map + Map resultMap = new HashMap<>(); + resultMap.put("namespace", namespace); + // One-based iteration: start at one and use <=. + for (int i = 1; i <= nColumns; i++) { + resultMap.put(meta.getColumnName(i), resultSet.getObject(i)); } + results.add(resultMap); } - LOG.info("Result size: {}", results.size()); - // Return a List of Maps, one Map for each row in the result. - return results; - } catch (SQLException e) { - throw new RuntimeException(e); } + LOG.info("Result size: {}", results.size()); + // Return a List of Maps, one Map for each row in the result. + return results; } /** @@ -225,15 +255,13 @@ public CompletableFuture>> get (DataFetchingEnvironment whereConditions.add(makeInClause(parentJoinField, inClauseValues, preparedStatementParameters)); } - // We could select only the requested fields by examining environment.getFields(), but we just get them all. - // The advantage of selecting * is that we don't need to validate the field names. - // All the columns will be loaded into the Map, - // but only the requested fields will be fetched from that Map using a MapFetcher. - StringBuilder sqlBuilder = new StringBuilder(); - sqlBuilder.append("select *"); return getResults( environment, - sqlBuilder, + // We could select only the requested fields by examining environment.getFields(), but we just get them all. + // The advantage of selecting * is that we don't need to validate the field names. + // All the columns will be loaded into the Map, + // but only the requested fields will be fetched from that Map using a MapFetcher. + "select *", new HashSet<>(), // No additional tables are needed beyond those added by default in the next method. whereConditions, preparedStatementParameters @@ -249,8 +277,8 @@ public CompletableFuture>> get (DataFetchingEnvironment * List>. * * @param environment The datafetching environement of the current GraphQL entity being fetched. - * @param sqlStatementStringBuilder The beginnings of the sql statement string. The NestedJdbcFetcher must select - * only from the final table, so this argument is needed. + * @param selectStatement The beginnings of the sql statement string. The NestedJdbcFetcher must select only from + * the final table, so this argument is needed. * @param fromTables The tables to select from. * @param whereConditions A list of where conditions to apply to the query. * @param preparedStatementParameters A list of prepared statement parameters to be sent to the prepared statement @@ -258,7 +286,7 @@ public CompletableFuture>> get (DataFetchingEnvironment */ CompletableFuture>> getResults ( DataFetchingEnvironment environment, - StringBuilder sqlStatementStringBuilder, + String selectStatement, Set fromTables, List whereConditions, List preparedStatementParameters @@ -277,9 +305,6 @@ CompletableFuture>> getResults ( // Use the datasource defined in theDataFetching environment to query the database. DataSource dataSource = getDataSourceFromContext(environment); - // Make sure to get the dataloader defined in the environment to avoid caching sql results in future requests. - DataLoader>> jdbcDataLoader = getJdbcQueryDataLoaderFromContext(environment); - // This will contain one Map for each row fetched from the database table. List> results = new ArrayList<>(); if (graphQLQueryArguments == null) graphQLQueryArguments = new HashMap<>(); @@ -290,12 +315,6 @@ CompletableFuture>> getResults ( // The primary table being selected from is added here. Other tables may have been / will be added to this list to handle joins. fromTables.add(String.join(".", namespace, tableName)); - // The order by clause will go here. - String sortBy = ""; - if (sortField != null) { - // Sort field is not provided by user input, so it's ok to add here (i.e., it's not prone to SQL injection). - sortBy = String.format(" order by %s", sortField); - } Set argumentKeys = graphQLQueryArguments.keySet(); for (String key : argumentKeys) { // The pagination, bounding box, and date/time args should all be skipped here because they are handled @@ -395,14 +414,14 @@ CompletableFuture>> getResults ( } } } - sqlStatementStringBuilder.append(String.format(" from %s", String.join(", ", fromTables))); - if (!whereConditions.isEmpty()) { - sqlStatementStringBuilder.append(" where "); - sqlStatementStringBuilder.append(String.join(" and ", whereConditions)); - } + // The default value for sortBy is an empty string, so it's safe to always append it here. Also, there is no // threat of SQL injection because the sort field value is not user input. - sqlStatementStringBuilder.append(sortBy); + String sortBy = ""; + if (sortField != null) { + // Sort field is not provided by user input, so it's ok to add here (i.e., it's not prone to SQL injection). + sortBy = String.format(" order by %s", sortField); + } Integer limit = (Integer) graphQLQueryArguments.get(LIMIT_ARG); if (limit == null) { limit = autoLimit ? DEFAULT_ROWS_TO_FETCH : -1; @@ -410,22 +429,23 @@ CompletableFuture>> getResults ( if (limit > MAX_ROWS_TO_FETCH) { limit = MAX_ROWS_TO_FETCH; } - if (limit == -1) { - // Do not append limit if explicitly set to -1 or autoLimit is disabled. NOTE: this conditional block is - // empty simply because it is clearer to define the condition in this way (vs. if limit > 0). - // FIXME: Skipping limit is not scalable in many cases and should possibly be removed/limited. - } else { - sqlStatementStringBuilder.append(" limit ").append(limit); - } Integer offset = (Integer) graphQLQueryArguments.get(OFFSET_ARG); - if (offset != null && offset >= 0) { - sqlStatementStringBuilder.append(" offset ").append(offset); - } + + // Make sure to get the dataloader defined in the environment to avoid caching sql results in future requests. + DataLoader>> jdbcDataLoader = getJdbcQueryDataLoaderFromContext(environment); + + // add a new JdbcQuery to the DataLoader. The DataLoader returns a completeable future and will obtain the + // result of the query either from a cache or by executing some sql. return jdbcDataLoader.load( new JdbcQuery( namespace, - preparedStatementParameters, - sqlStatementStringBuilder.toString() + selectStatement, + fromTables, + whereConditions, + sortBy, + limit, + offset, + preparedStatementParameters ), dataSource ); @@ -532,15 +552,34 @@ static String makeInClause(String filterField, List strings, List preparedStatementParameters; - public String sqlStatement; - - - public JdbcQuery(String namespace, List preparedStatementParameters, String sqlStatement) { + private final String namespace; + private final String selectStatement; + private final Set fromTables; + private List whereConditions; + public String sortBy; + private final Integer limit; + private final Integer offset; + private final List preparedStatementParameters; + + + public JdbcQuery( + String namespace, + String selectStatement, + Set fromTables, + List whereConditions, + String sortBy, + Integer limit, + Integer offset, + List preparedStatementParameters + ) { this.namespace = namespace; + this.selectStatement = selectStatement; + this.fromTables = fromTables; + this.whereConditions = whereConditions; + this.sortBy = sortBy; + this.limit = limit; + this.offset = offset; this.preparedStatementParameters = preparedStatementParameters; - this.sqlStatement = sqlStatement; } @Override @@ -549,14 +588,248 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; JdbcQuery jdbcQuery = (JdbcQuery) o; return Objects.equals(namespace, jdbcQuery.namespace) && - Objects.equals(preparedStatementParameters, jdbcQuery.preparedStatementParameters) && - Objects.equals(sqlStatement, jdbcQuery.sqlStatement); + Objects.equals(selectStatement, jdbcQuery.selectStatement) && + Objects.equals(fromTables, jdbcQuery.fromTables) && + Objects.equals(whereConditions, jdbcQuery.whereConditions) && + Objects.equals(sortBy, jdbcQuery.sortBy) && + Objects.equals(limit, jdbcQuery.limit) && + Objects.equals(offset, jdbcQuery.offset) && + Objects.equals(preparedStatementParameters, jdbcQuery.preparedStatementParameters); } @Override public int hashCode() { - return Objects.hash(namespace, preparedStatementParameters, sqlStatement); + return Objects.hash( + namespace, + selectStatement, + fromTables, + whereConditions, + sortBy, + limit, + offset, + preparedStatementParameters + ); } } + /** + * A helper class that keeps track of queries that are combined from various individual JdbcQueries. In a lot of + * cases there will be numerous JdbcQueries that look very similar yet differ only in that the value that is being + * selected. For example, two queries could be + * `select id from trip where route_id = 1` and + * `select id from trip where route_id = 2` + * The above 2 queries can be combined into one by using an in clause: + * `select id from trip where route_id in (1, 2)` + */ + private static class CombinedQuery { + // the field used as the in clause in the combined query + private String inClauseField; + + // a StringBuilder is used to build up the inCondition + private StringBuilder inConditionClause = new StringBuilder(); + + // whether or not this query can be combined with other queries + private final boolean isCombinable; + + // whether or not at least 2 queries are being combined + private boolean isCombination = false; + private List combinedPreparedStatementParameters = new ArrayList<>(); + + // a Map of the prepared statement parameter to the JdbcQuery it came from. This is used when mapping + private Map combinedQueries = new HashMap<>(); + + // a list of where conditions excluding the original where condition with a parameter. That is instead stored + // in the inConditionClause. + private List combinedWhereConditions = new ArrayList<>(); + + // Assume that the same DataSource can be used to make a combined query. This should be OK because GraphQL + // requests in this library take in a DataSource for each GraphQL request. + private final DataSource underlyingDataSource; + private JdbcQuery underlyingQuery; + + + /** + * Instantiate the combined query. It could be the case that the query is not combinable, so do a few checks + * and prepare some other variables for later use. + */ + public CombinedQuery(JdbcQuery jdbcQuery, DataSource dataSource) { + underlyingDataSource = dataSource; + underlyingQuery = jdbcQuery; + isCombinable = underlyingQuery.limit == -1 && + underlyingQuery.offset == null && + underlyingQuery.preparedStatementParameters.size() == 1 && + parseWhereQueries(); + + // do some extra prep work if it's possible to combine the underlying query + if (isCombinable) { + String preparedStatementParameter = underlyingQuery.preparedStatementParameters.get(0); + combinedQueries.put(preparedStatementParameter, jdbcQuery); + combinedPreparedStatementParameters.add(preparedStatementParameter); + } + } + + /** + * Helper function to parse the where conditions. This method returns true if it finds that only one of + * the where conditions has a question mark. + */ + private boolean parseWhereQueries() { + // FIXME: this won't match a where statement where the question mark is first. + Pattern parameretizedWherePattern = Pattern.compile("(.*?)\\s*=\\s*\\?"); + boolean foundCombinableWhereClause = false; + for (String whereCondition : underlyingQuery.whereConditions) { + Matcher matcher = parameretizedWherePattern.matcher(whereCondition); + if (matcher.find()) { + // if a combinable where clause has already been found, return false because this current + // implementation only supports 1 parameratized where clause + if (foundCombinableWhereClause) return false; + + // begin creating the inConditionClause + inClauseField = matcher.group(1); + inConditionClause.append(inClauseField); + inConditionClause.append(" IN (?"); + + foundCombinableWhereClause = true; + } else { + combinedWhereConditions.add(whereCondition); + } + } + // return the result. If no combinable where clauses were found then this returns false. If just one is + // found this returns true. Otherwise the return in the above for loop returns false. + return foundCombinableWhereClause; + } + + /** + * Combine a given JdbcQueries if possible to this combined query which may already have other queries that have + * been combined. + * + * For 2 JdbcQueries to be combinable, the queries must not meet the following criteria: + * - be from the same namespace + * - not have a limit or an offset + * - have the exact same select string, from tables and ordering + * - have at least 1 where clause + * - have only 1 prepared statement parameter + */ + public boolean combineWith(JdbcQuery other) { + boolean canCombineQueries = isCombinable && + underlyingQuery.namespace.equals(other.namespace) && + other.limit == -1 && + other.offset == null && + underlyingQuery.selectStatement.equals(other.selectStatement) && + underlyingQuery.fromTables.equals(other.fromTables) && + // technically, the ordering shouldn't change the actual records that are returned, it's just more work + // to do postprocessing to resort the fields + underlyingQuery.sortBy.equals(other.sortBy) && + // FIXME + // the where conditions are strings in the form %s = %s, so it's possible that there will be a false + // negative where one where condition is a = b and the other is b = a. + underlyingQuery.whereConditions.equals(other.whereConditions) && + other.preparedStatementParameters.size() == 1; + + // if it's not possible to combine, return false immediately + if (!canCombineQueries) return canCombineQueries; + + // queries can be combined, proceed with adding the query + isCombination = true; + String otherFirstParameter = other.preparedStatementParameters.get(0); + combinedQueries.put(otherFirstParameter, other); + combinedPreparedStatementParameters.add(otherFirstParameter); + inConditionClause.append(", ?"); + + return true; + } + + /** + * execute the combined (or underlying query if it wasn't combine-able) and then add the results to the passed + * map + */ + public void executeAndAddToResults(Map>> allResults) { + // build up the sql statement to execute + StringBuilder sqlStatement = new StringBuilder(); + + // SELECT + sqlStatement.append(underlyingQuery.selectStatement); + String inClauseFieldAlias = null; + if (isCombination) { + // also add the field that is used in the IN clause. We need this data to match the row to the original + // JdbcQuery. Replace the field's .'s with _ to ensure an exact match in the output + inClauseFieldAlias = String.format("%s_alias", inClauseField.replace(".", "_")); + sqlStatement.append(String.format( + ", %s as %s", + inClauseField, + inClauseFieldAlias + )); + } + + // FROM + sqlStatement.append(String.format(" from %s", String.join(", ", underlyingQuery.fromTables))); + + // WHERE + if (isCombination) { + sqlStatement.append(" where "); + // complete the in clause and add it to the where conditions + inConditionClause.append(")"); + combinedWhereConditions.add(inConditionClause.toString()); + sqlStatement.append(String.join(" and ", combinedWhereConditions)); + } else { + if (!underlyingQuery.whereConditions.isEmpty()) { + sqlStatement + .append(" where ") + .append(String.join(" and ", underlyingQuery.whereConditions)); + } + } + + // ORDER BY + sqlStatement.append(underlyingQuery.sortBy); + + // limit and offset only apply if the combined query is not a combination + if (!isCombination) { + if (underlyingQuery.limit == -1) { + // Do not append limit if explicitly set to -1 or autoLimit is disabled. NOTE: this conditional block is + // empty simply because it is clearer to define the condition in this way (vs. if limit > 0). + // FIXME: Skipping limit is not scalable in many cases and should possibly be removed/limited. + } else { + sqlStatement.append(" limit ").append(underlyingQuery.limit); + } + if (underlyingQuery.offset != null && underlyingQuery.offset >= 0) { + sqlStatement.append(" offset ").append(underlyingQuery.offset); + } + } + + // Execute the query + Connection connection = null; + try { + connection = underlyingDataSource.getConnection(); + List> results = getSqlQueryResult( + connection, + underlyingQuery.namespace, + sqlStatement.toString(), + isCombination ? combinedPreparedStatementParameters : underlyingQuery.preparedStatementParameters + ); + + if (isCombination) { + // iterate throught the results and assign each Map to the corresponding entry in allResults + for (Map result : results) { + JdbcQuery query = combinedQueries.get(result.get(inClauseFieldAlias)); + List> queryResults = allResults.containsKey(query) + ? allResults.get(query) + : new ArrayList<>(); + + // remove the alias field + result.remove(inClauseFieldAlias); + queryResults.add(result); + + allResults.put(query, queryResults); + } + } else { + // query was not combined, so we can add all of these results with the underlying query as the key + allResults.put(underlyingQuery, results); + } + } catch (SQLException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } finally { + DbUtils.closeQuietly(connection); + } + } + } } diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java index fc7247bb1..80ca59484 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/NestedJDBCFetcher.java @@ -135,7 +135,7 @@ public CompletableFuture>> get (DataFetchingEnvironment // Make the query and return the results! return previousFetcher.getResults( environment, - sqlStatementStringBuilder, + sqlStatementStringBuilder.toString(), fromTables, whereConditions, preparedStatementParameters diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java index 1c1ebdd62..600748330 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/SQLColumnFetcher.java @@ -73,13 +73,12 @@ public List get (DataFetchingEnvironment environment) { try { connection = dataSource.getConnection(); for (Map row : jdbcFetcher.getSqlQueryResult( - new JDBCFetcher.JdbcQuery( - namespace, - Arrays.asList(filterValue), - sqlStatementBuilder.toString() - ), - connection - )) { + connection, + namespace, + sqlStatementBuilder.toString(), + Arrays.asList(filterValue) + ) + ) { results.add((T) row.get(columnName)); } } catch (SQLException e) { diff --git a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java index cffc6f7ed..31287512f 100644 --- a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java +++ b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java @@ -177,12 +177,21 @@ public void canFetchNestedEntityWithLimit() throws IOException { assertThat(queryGraphQL("feedStopsStopTimeLimit.txt"), matchesSnapshot()); } - // tests that the limit argument applies properly to a fetcher defined with autolimit set to false + // tests whether a graphQL query that has superflous and redundant nesting can find the right result + // if the graphQL dataloader is enabled correctly, there will not be any repeating sql queries in the logs @Test(timeout=5000) public void canFetchMultiNestedEntities() throws IOException { assertThat(queryGraphQL("superNested.txt"), matchesSnapshot()); } + // tests whether a graphQL query that has superflous and redundant nesting can find the right result + // if the graphQL dataloader is enabled correctly, there will not be any repeating sql queries in the logs + // furthermore, some queries should have been combined together + @Test(timeout=5000) + public void canFetchMultiNestedEntitiesWithoutLimits() throws IOException { + assertThat(queryGraphQL("superNestedNoLimits.txt"), matchesSnapshot()); + } + /** * attempt to fetch more than one record with SQL injection as inputs * the graphql library should properly escape the string and return 0 results for stops diff --git a/src/test/resources/graphql/superNestedNoLimits.txt b/src/test/resources/graphql/superNestedNoLimits.txt new file mode 100644 index 000000000..497e45b8b --- /dev/null +++ b/src/test/resources/graphql/superNestedNoLimits.txt @@ -0,0 +1,23 @@ +query ($namespace: String) { + feed(namespace: $namespace) { + feed_version + routes(limit: -1) { + route_id + stops(limit: -1) { + routes(limit: -1) { + route_id + stops(limit: -1) { + routes(limit: -1) { + route_id + stops(limit: -1) { + stop_id + } + } + stop_id + } + } + stop_id + } + } + } +} \ No newline at end of file diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntitiesWithoutLimits.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntitiesWithoutLimits.json new file mode 100644 index 000000000..97f3b64aa --- /dev/null +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntitiesWithoutLimits.json @@ -0,0 +1,63 @@ +{ + "data" : { + "feed" : { + "feed_version" : "1.0", + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ] + } + } +} \ No newline at end of file