Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Column-based storage for GTFS entities #1747

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,16 @@ public class Arguments {
description = "Skips check for new validator version")
private boolean skipValidatorUpdate = false;

@Parameter(
names = {"-cbs", "--column_based_storage"},
description = "Use column-based storage")
private boolean useColumnBasedStorage = false;

@Parameter(
names = {"-par", "--pause_after_reading"},
description = "Pause after initially reading feed")
private boolean pauseAfterReading = false;

ValidationRunnerConfig toConfig() throws URISyntaxException {
ValidationRunnerConfig.Builder builder = ValidationRunnerConfig.builder();
if (input != null) {
Expand Down Expand Up @@ -141,6 +151,8 @@ ValidationRunnerConfig toConfig() throws URISyntaxException {
builder.setNumThreads(numThreads);
builder.setPrettyJson(pretty);
builder.setSkipValidatorUpdate(skipValidatorUpdate);
builder.setUseColumnBasedStorage(useColumnBasedStorage);
builder.setPauseAfterReading(pauseAfterReading);
return builder.build();
}

Expand Down
2 changes: 2 additions & 0 deletions core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ plugins {
dependencies {
implementation project(':model')
annotationProcessor project(':processor:notices')
annotationProcessor project(':processor:columns')
annotationProcessor 'com.google.auto.value:auto-value:1.7.4'
compileOnly 'com.google.auto.value:auto-value-annotations:1.7.4'
implementation 'org.apache.commons:commons-compress:1.20'
Expand All @@ -42,6 +43,7 @@ dependencies {
implementation 'com.googlecode.libphonenumber:libphonenumber:8.12.13'
implementation 'com.google.flogger:flogger:0.6'
implementation 'io.github.classgraph:classgraph:4.8.146'
testImplementation project(':core:testing')
testImplementation 'com.google.flogger:flogger-system-backend:0.6'
testImplementation group: 'junit', name: 'junit', version: '4.13'
testImplementation "com.google.truth:truth:1.0.1"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package org.mobilitydata.gtfsvalidator.columns;

import java.lang.reflect.Array;
import java.util.Arrays;
import java.util.List;

/**
* Base classes for column store implementation classes, as produced from {@link
* org.mobilitydata.gtfsvalidator.annotation.ColumnStoreTypes} annotations.
*/
public class ColumnStoreBase {
static <T> int reserveColumn(List<T[]> valuesByColumnIndex, Class<T> type, int initialCapacity) {
T[] values = (T[]) Array.newInstance(type, initialCapacity);
valuesByColumnIndex.add(values);
return valuesByColumnIndex.size() - 1;
}

static void reservePrimitivePresence(
List<byte[]> primitivePresenceByColumnIndex, int columnIndex, int initialCapacity) {
int highOrderIndex = columnIndex / 8;
if (highOrderIndex >= primitivePresenceByColumnIndex.size()) {
primitivePresenceByColumnIndex.add(new byte[initialCapacity]);
}
}

static boolean hasPrimitive(
List<byte[]> primitivePresenceByColumnIndex, int columnIndex, int row) {
if (columnIndex == -1) {
return false;
}
int highOrderIndex = columnIndex / 8;
int lowOrderIndex = columnIndex % 8;
byte[] values = primitivePresenceByColumnIndex.get(highOrderIndex);
return (values[row] & (1 << lowOrderIndex)) != 0;
}

static <T> boolean hasValue(List<T[]> valuesByColumnIndex, int columnIndex, int row) {
if (columnIndex == -1) {
return false;
}
T[] values = valuesByColumnIndex.get(columnIndex);
return row < values.length && values[row] != null;
}

static <T> T getValue(List<T[]> valuesByColumnIndex, int columnIndex, int row, T defaultValue) {
if (columnIndex == -1) {
return defaultValue;
}
T[] values = valuesByColumnIndex.get(columnIndex);
if (row >= values.length) {
return defaultValue;
}
T value = values[row];
return value != null ? value : defaultValue;
}

static void setPrimitivePresence(
List<byte[]> primitivePresenceByColumnIndex, int columnIndex, int row) {
int highOrderIndex = columnIndex / 8;
int lowOrderIndex = columnIndex % 8;
byte[] presenceValues = primitivePresenceByColumnIndex.get(highOrderIndex);
if (presenceValues.length <= row) {
int newSize = calculateNewCapacity(presenceValues.length, row + 1);
presenceValues = Arrays.copyOf(presenceValues, newSize);
primitivePresenceByColumnIndex.set(highOrderIndex, presenceValues);
}
presenceValues[row] |= (1 << lowOrderIndex);
}

static <T> void setValue(List<T[]> valuesByColumnIndex, int columnIndex, int row, T value) {
T[] values = valuesByColumnIndex.get(columnIndex);
if (values.length <= row) {
int newSize = calculateNewCapacity(values.length, row + 1);
values = Arrays.copyOf(values, newSize);
valuesByColumnIndex.set(columnIndex, values);
}
values[row] = value;
}

static int calculateNewCapacity(int currentSize, int minSize) {
int newSize = currentSize;
while (newSize < minSize) {
newSize <<= 1;
}
return newSize;
}

static <T> void trimToSize(List<T[]> valuesByColumnIndex, int size) {
for (int i = 0; i < valuesByColumnIndex.size(); ++i) {
T[] values = valuesByColumnIndex.get(i);
if (values.length > size) {
valuesByColumnIndex.set(i, Arrays.copyOf(values, size));
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package org.mobilitydata.gtfsvalidator.columns;

/**
* Marker interface for generate ColumnAssignment classes, which track the mapping from GTFS fields
* to column indices within a {@link GtfsColumnStore}.
*/
public interface GtfsColumnAssignments {}
Loading
Loading