Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fetch OSM for any region from Protomaps extract API #842

Draft
wants to merge 1 commit into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import com.conveyal.analysis.components.TaskScheduler;
import com.conveyal.analysis.datasource.DataSourcePreviewGenerator;
import com.conveyal.analysis.datasource.DataSourceUploadAction;
import com.conveyal.analysis.datasource.ProtomapsDownloader;
import com.conveyal.analysis.grids.SeamlessCensusGridExtractor;
import com.conveyal.analysis.models.DataSource;
import com.conveyal.analysis.models.GtfsDataSource;
import com.conveyal.analysis.models.OsmDataSource;
import com.conveyal.analysis.models.Region;
import com.conveyal.analysis.models.SpatialDataSource;
import com.conveyal.analysis.persistence.AnalysisCollection;
import com.conveyal.analysis.persistence.AnalysisDB;
import com.conveyal.analysis.persistence.Persistence;
import com.conveyal.analysis.util.HttpUtils;
import com.conveyal.file.FileStorage;
import com.conveyal.file.FileStorageFormat;
Expand Down Expand Up @@ -177,6 +180,30 @@ private String handleUpload (Request req, Response res) {
return backgroundTask.id.toString();
}

/**
* Hit this endpoint like curl -v -X POST 'http://localhost:7070/api/dataSource/pmdl/6053738bc5469ddf9e69efe9'
* Progress should be visible in the web UI, as long as it's actively polling the activity endpoint.
*
* @return the ID of the background task tracking the progress of extraction and download from Protomaps
*/
private String protomapsDownload (Request req, Response res) {
// Do some initial synchronous work setting up, in order to fail fast if the request is bad.
// final UserPermissions userPermissions = new UserPermissions("local", true, "local");
final UserPermissions userPermissions = UserPermissions.from(req);
final String regionId = req.params("regionId");
final Region region = Persistence.regions.findByIdIfPermitted(regionId, userPermissions);

// Kick off a background task so the HTTP request can return immediately.
// This should ideally be a subtask of an overall bundle creation task.
Task backgroundTask = Task.create("Downloading OSM for " + region.name)
.forUser(userPermissions)
.setHeavy(true)
.withAction(new ProtomapsDownloader(region, userPermissions, fileStorage, dataSourceCollection));

taskScheduler.enqueue(backgroundTask);
return backgroundTask.id.toString();
}

@Override
public void registerEndpoints (spark.Service sparkService) {
sparkService.path("/api/dataSource", () -> {
Expand All @@ -185,8 +212,8 @@ public void registerEndpoints (spark.Service sparkService) {
sparkService.delete("/:_id", this::deleteOneDataSourceById, toJson);
sparkService.get("/:_id/preview", this::getDataSourcePreview, toJson);
sparkService.post("", this::handleUpload, toJson);
// regionId will be in query parameter
sparkService.post("/addLodesDataSource", this::downloadLODES, toJson);
sparkService.post("/pmdl/:regionId", this::protomapsDownload, toJson);
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@

import java.io.File;

import static com.conveyal.file.FileStorageFormat.GEOJSON;
import static com.conveyal.file.FileStorageFormat.GEOPACKAGE;
import static com.conveyal.file.FileStorageFormat.SHP;
import static com.conveyal.file.FileStorageFormat.GEOTIFF;
import static com.conveyal.file.FileStorageFormat.*;

/**
* Logic for loading and validating a specific kind of input file, yielding a specific subclass of DataSource.
Expand Down Expand Up @@ -70,6 +67,8 @@ public static DataSourceIngester forFormat (FileStorageFormat format) {
return new GeoTiffDataSourceIngester();
} else if (format == GEOPACKAGE) {
return new GeoPackageDataSourceIngester();
} else if (format == OSMPBF) {
return new OsmDataSourceIngester();
}
throw new UnsupportedOperationException("Unknown file format: " + format.name());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package com.conveyal.analysis.datasource;

import com.conveyal.analysis.models.DataSource;
import com.conveyal.analysis.models.OsmDataSource;
import com.conveyal.r5.analyst.progress.ProgressListener;

import java.io.File;

/**
*
*/
public class OsmDataSourceIngester extends DataSourceIngester {

private OsmDataSource dataSource;

@Override
protected DataSource dataSource() {
return dataSource;
}

public OsmDataSourceIngester () {
this.dataSource = new OsmDataSource();
}

@Override
public void ingest(File file, ProgressListener progressListener) {
// TODO IMPLEMENT
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
package com.conveyal.analysis.datasource;

import com.conveyal.analysis.UserPermissions;
import com.conveyal.analysis.components.TaskScheduler;
import com.conveyal.analysis.datasource.DataSourceIngester;
import com.conveyal.analysis.datasource.OsmDataSourceIngester;
import com.conveyal.analysis.models.Bounds;
import com.conveyal.analysis.models.DataSource;
import com.conveyal.analysis.models.OsmDataSource;
import com.conveyal.analysis.models.Region;
import com.conveyal.analysis.persistence.AnalysisCollection;
import com.conveyal.analysis.util.JsonUtil;
import com.conveyal.file.FileCategory;
import com.conveyal.file.FileStorage;
import com.conveyal.file.FileStorageKey;
import com.conveyal.r5.analyst.progress.ProgressInputStream;
import com.conveyal.r5.analyst.progress.ProgressListener;
import com.conveyal.r5.analyst.progress.TaskAction;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.bson.types.ObjectId;
import org.hsqldb.rights.User;

import java.io.InputStream;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpRequest.BodyPublishers;
import java.net.http.HttpResponse;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;

import static com.conveyal.r5.analyst.cluster.AnalysisWorker.sleepSeconds;


/**
*
*/
public class ProtomapsDownloader implements TaskAction {

// Protomaps API token from https://app.protomaps.com/dashboard
private static final String PROTOMAPS_TOKEN = "YOUR TOKEN HERE";
private static final String PROTOMAPS_EXTRACT_URL = "https://app.protomaps.com/api/v1/extracts";

private final Region region;

private final UserPermissions userPermissions;
private final FileStorage fileStorage;
private final AnalysisCollection<DataSource> dataSourceCollection;


// CONSTRUCTOR

// Note, it seems like ingester/downloader actions that create datasources should not themselves need
// access to the DataSource instance, FileStorage etc. but handling this correctly might require sub-tasks.
// DataSourceIngester has the beginnings of such a system, where it creates the data source itself.
// See also insertion into database at com.conveyal.analysis.datasource.DataSourceUploadAction.action().
// Perhaps we should download the OSMPBF and then just feed it to a standard OSM file ingester.
// But we also don't want to be passing user permissions, database collections etc. into here.

public ProtomapsDownloader(Region region, UserPermissions userPermissions, FileStorage fileStorage, AnalysisCollection<DataSource> dataSourceCollection) {
this.region = region;
this.userPermissions = userPermissions;
this.fileStorage = fileStorage;
this.dataSourceCollection = dataSourceCollection;
}


// Model object for JSON result of polling for extract progress
// While extract is happening, response will look like this:
// {
// "Timestamp" : "2022-11-25T06:47:54Z",
// "CellsTotal" : 712,
// "CellsProg" : 712,
// "NodesTotal" : 801359,
// "NodesProg" : 801359,
// "ElemsTotal" : 890865,
// "ElemsProg" : 881991
// }
// The cells will count up first, then nodes, then elems.
// When it's finished the response will look like this:
// {
// "Uuid" : "d25e08bf-bc46-475c-aac1-397cddfd523f",
// "Timestamp" : "2022-11-25T06:47:54Z",
// "ElemsTotal" : 890865,
// "Complete" : true,
// "SizeBytes" : 6721639,
// "Elapsed" : 2.053800663
// }
public static class Progress {
// Field capitalization is nonstandard - can objectmapper do case insensitive mapping?
public String Uuid, Timestamp;
public Double Elapsed; // in seconds
public int CellsTotal, CellsProg, NodesTotal, NodesProg, ElemsTotal, ElemsProg;
public boolean Complete;
public long SizeBytes;
}

private ObjectNode newRequestBodyWithToken () {
return JsonUtil.objectNode().put("token", PROTOMAPS_TOKEN);
}

@Override
public void action(ProgressListener progressListener) throws Exception {
progressListener.beginTask("Contacting Protomaps", 2);

HttpClient httpClient = HttpClient.newBuilder().build();

ObjectNode initialRequestBody = newRequestBodyWithToken();
final Bounds bounds = region.bounds;
initialRequestBody.putObject("region")
.put("type", "bbox")
.putArray("data")
.add(bounds.south)
.add(bounds.west)
.add(bounds.north)
.add(bounds.east);

HttpRequest initialRequest = HttpRequest.newBuilder()
.POST(BodyPublishers.ofString(initialRequestBody.toString()))
.uri(URI.create(PROTOMAPS_EXTRACT_URL))
.build();

HttpResponse<byte[]> initialResponse = httpClient.send(initialRequest, HttpResponse.BodyHandlers.ofByteArray());
JsonNode initialJson = JsonUtil.objectMapper.readTree(initialResponse.body());
final String uuid = initialJson.get("uuid").asText(); // ID of this extract
URI progressUri = URI.create(initialJson.get("url").asText()); // for polling extract progress

progressListener.beginTask("Extracting OSM data", 100);
HttpRequest progressRequest = HttpRequest.newBuilder().GET().uri(progressUri).build();
while (true) {
HttpResponse<byte[]> response = httpClient.send(progressRequest, HttpResponse.BodyHandlers.ofByteArray());
System.out.println(new String(response.body()));
// JsonNode progressJson = JsonUtil.objectMapper.readTree(response.body());
// System.out.println(progressJson.toPrettyString());
Progress progress = JsonUtil.objectMapper.readValue(response.body(), Progress.class);
if (progress.Complete) {
break;
}
// Extraction proceeds in three phases: spatial index cells, then nodes, then ways and relations.
// This is abusing our ProgressListener programming API (new task each iteration) but it works for now.
if (progress.CellsProg < progress.CellsTotal) {
progressListener.beginTask("Scanning cells", progress.CellsTotal);
progressListener.increment(progress.CellsProg);
} else if (progress.NodesProg < progress.NodesTotal) {
progressListener.beginTask("Scanning nodes", progress.NodesTotal);
progressListener.increment(progress.NodesProg);
} else if (progress.ElemsProg < progress.ElemsTotal) {
progressListener.beginTask("Scanning elements", progress.ElemsTotal);
progressListener.increment(progress.ElemsProg);
}
sleepSeconds(1);
// TODO timeout and stall detection (on all background tasks?)
}

// Get the download URL for the finished extract
progressListener.beginTask("Getting download URL", 1);
HttpRequest downloadRequest = HttpRequest.newBuilder()
.POST(BodyPublishers.ofString(newRequestBodyWithToken().toString()))
.uri(URI.create(PROTOMAPS_EXTRACT_URL + "/" + uuid))
.build();

HttpResponse<byte[]> downloadResponse = httpClient.send(downloadRequest, HttpResponse.BodyHandlers.ofByteArray());
JsonNode downloadJson = JsonUtil.objectMapper.readTree(downloadResponse.body());
System.out.println(downloadJson.toPrettyString());
URI downloadUri = URI.create(downloadJson.get("url").asText());

// Finally, actually download the PBF data and save it to a temp file.
// Don't use simple downloadUri.toURL().openStream() because we want the content length header
progressListener.beginTask("Retrieving OSM data", 1);
HttpRequest fileRequest = HttpRequest.newBuilder()
.GET()
.uri(downloadUri)
.build();

HttpResponse<InputStream> fileResponse = httpClient.send(fileRequest, HttpResponse.BodyHandlers.ofInputStream());

Path tempFile = Files.createTempFile("protomaps_extract", ".osm.pbf");

// Sizes over 2GB are entirely possible, should really use longs
long contentLength = fileResponse.headers().firstValueAsLong("Content-Length").orElse(100_000_000);
progressListener.beginTask("Retrieving OSM data", (int) contentLength);
try (InputStream inputStream = new ProgressInputStream(progressListener, fileResponse.body())) {
Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);
}

// The rest of this is replicating some logic from DataSourceUploadAction and DataSourceIngester,
// which need to be generalized to files that are downloaded by the backend rather than just uploaded to it.

DataSourceIngester ingester = new OsmDataSourceIngester();
ingester.initializeDataSource(region.name, tempFile.toString(), region._id, userPermissions);
DataSource dataSource = ingester.dataSource();
dataSource.wgsBounds = region.bounds;
FileStorageKey key = new FileStorageKey(FileCategory.DATASOURCES, dataSource._id.toString());
fileStorage.moveIntoStorage(key, tempFile.toFile());
ingester.ingest(fileStorage.getFile(key), progressListener);
progressListener.setWorkProduct(ingester.dataSource().toWorkProduct());
dataSourceCollection.insert(dataSource);

}

}
9 changes: 9 additions & 0 deletions src/main/java/com/conveyal/analysis/models/OsmDataSource.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.conveyal.analysis.models;

import com.conveyal.file.FileStorageFormat;
import org.bson.codecs.pojo.annotations.BsonDiscriminator;

/**
Expand All @@ -8,4 +9,12 @@
@BsonDiscriminator(key="type", value="osm")
public class OsmDataSource extends DataSource {

int nodes;
int ways;
int relations;

public OsmDataSource() {
this.fileFormat = FileStorageFormat.OSMPBF;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ public T create(Request req, Response res) throws IOException {
public T findPermittedByRequestParamId (Request req) {
UserPermissions user = UserPermissions.from(req);
T value = findById(req.params("_id"));
if (value == null) {
throw AnalysisServerException.notFound("No item exists with the specified ID.");
}
// Throw if or does not have permission
if (!value.accessGroup.equals(user.accessGroup)) {
throw invalidAccessGroup();
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/conveyal/r5/analyst/progress/Task.java
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ public void run () {
markComplete();
} catch (Throwable t) {
// TODO Store error in work product and write product to Mongo uniformly
System.out.println(ExceptionUtils.stackTraceString(t));
markError(t);
}
}
Expand Down