Skip to content

Commit

Permalink
feat: Do not call validators for missing columns (#1875)
Browse files Browse the repository at this point in the history
Now the validation will not be done for some validators if the column does not exist.
  • Loading branch information
jcpitre authored Oct 15, 2024
1 parent 1256b0d commit 01b7539
Show file tree
Hide file tree
Showing 22 changed files with 417 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
import com.google.common.base.Strings;
import java.util.HashMap;
import javax.annotation.Nullable;
import org.mobilitydata.gtfsvalidator.validator.ColumnInspector;

/** Read access to a header row in a CSV file. */
public class CsvHeader {
public class CsvHeader implements ColumnInspector {
private final HashMap<String, Integer> columnIndices = new HashMap<>();
private final String[] columnNames;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public static CsvFileLoader getInstance() {
final List<GtfsEntity> entities = new ArrayList<>();
boolean hasUnparsableRows = false;
final List<SingleEntityValidator<GtfsEntity>> singleEntityValidators =
createSingleEntityValidators(tableDescriptor.getEntityClass(), validatorProvider);
createSingleEntityValidators(tableDescriptor.getEntityClass(), header, validatorProvider);

try {
for (CsvRow row : csvFile) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

package org.mobilitydata.gtfsvalidator.table;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.flogger.FluentLogger;
import java.io.InputStream;
import java.lang.reflect.Modifier;
Expand Down Expand Up @@ -59,6 +62,24 @@ public class GtfsFeedLoader {
private final List<Class<? extends FileValidator>> multiFileValidatorsWithParsingErrors =
new ArrayList<>();

public enum SkippedValidatorReason {
SINGLE_ENTITY_VALIDATORS_WITH_ERROR,
SINGLE_FILE_VALIDATORS_WITH_ERROR,
MULTI_FILE_VALIDATORS_WITH_ERROR,
VALIDATORS_NO_NEED_TO_RUN
}

/**
* After running, will contain all validators that were skipped, grouped by
* SkippedValidatorReason. Get the synchronized version because validation is multithreaded.
*/
private final Multimap<SkippedValidatorReason, Class<?>> skippedValidators =
Multimaps.synchronizedListMultimap(ArrayListMultimap.create());

public Multimap<SkippedValidatorReason, Class<?>> getSkippedValidators() {
return skippedValidators;
}

public GtfsFeedLoader(
ImmutableList<Class<? extends GtfsFileDescriptor<?>>> tableDescriptorClasses) {
for (Class<? extends GtfsFileDescriptor<?>> clazz : tableDescriptorClasses) {
Expand Down Expand Up @@ -92,18 +113,14 @@ public void setNumThreads(int numThreads) {
this.numThreads = numThreads;
}

public List<Class<? extends FileValidator>> getMultiFileValidatorsWithParsingErrors() {
return Collections.unmodifiableList(multiFileValidatorsWithParsingErrors);
}

@SuppressWarnings("unchecked")
@MemoryMonitor()
public GtfsFeedContainer loadAndValidate(
GtfsInput gtfsInput, ValidatorProvider validatorProvider, NoticeContainer noticeContainer)
throws InterruptedException {
logger.atInfo().log("Loading in %d threads", numThreads);
ExecutorService exec = Executors.newFixedThreadPool(numThreads);

skippedValidators.clear();
List<Callable<TableAndNoticeContainers>> loaderCallables = new ArrayList<>();
Map<String, GtfsTableDescriptor<?>> remainingDescriptors =
(Map<String, GtfsTableDescriptor<?>>) tableDescriptors.clone();
Expand All @@ -118,6 +135,7 @@ public GtfsFeedContainer loadAndValidate(
GtfsEntityContainer<?, ?> tableContainer;
// The descriptor knows what loader to use to load the file
TableLoader tableLoader = tableDescriptor.getTableLoader();
tableLoader.setSkippedValidators(skippedValidators);
try (InputStream inputStream = gtfsInput.getFile(filename)) {
try {
tableContainer =
Expand Down Expand Up @@ -171,7 +189,7 @@ public GtfsFeedContainer loadAndValidate(
}
}

private static void loadTables(
private void loadTables(
NoticeContainer noticeContainer,
ExecutorService exec,
List<Callable<TableAndNoticeContainers>> loaderCallables,
Expand Down Expand Up @@ -200,8 +218,7 @@ private void executeMultiFileValidators(
// Validators with parser-error dependencies will not be returned here, but instead added to
// the skippedValidators list.
for (FileValidator validator :
validatorProvider.createMultiFileValidators(
feed, multiFileValidatorsWithParsingErrors::add)) {
validatorProvider.createMultiFileValidators(feed, skippedValidators)) {
validatorCallables.add(
() -> {
NoticeContainer validatorNotices = new NoticeContainer();
Expand All @@ -212,7 +229,7 @@ private void executeMultiFileValidators(
collectMultiFileValidationNotices(noticeContainer, exec, validatorCallables);
}

private static void collectMultiFileValidationNotices(
private void collectMultiFileValidationNotices(
NoticeContainer noticeContainer,
ExecutorService exec,
List<Callable<NoticeContainer>> validatorCallables)
Expand All @@ -229,8 +246,7 @@ private static void collectMultiFileValidationNotices(
}

/** Adds a ThreadExecutionError to the notice container. */
private static void addThreadExecutionError(
ExecutionException e, NoticeContainer noticeContainer) {
private void addThreadExecutionError(ExecutionException e, NoticeContainer noticeContainer) {
logger.atSevere().withCause(e).log("Execution exception");
noticeContainer.addSystemError(new ThreadExecutionError(e));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ public abstract <C extends GtfsEntityContainer> C createContainerForInvalidStatu
// True if the specified file is required in a feed.
private boolean required;

private TableStatus tableStatus;

public abstract boolean isRecommended();

public abstract Class<T> getEntityClass();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
package org.mobilitydata.gtfsvalidator.table;

import static org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.*;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.mobilitydata.gtfsvalidator.notice.MissingRecommendedFileNotice;
import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFileNotice;
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;
import org.mobilitydata.gtfsvalidator.validator.ColumnInspector;
import org.mobilitydata.gtfsvalidator.validator.FileValidator;
import org.mobilitydata.gtfsvalidator.validator.SingleEntityValidator;
import org.mobilitydata.gtfsvalidator.validator.ValidatorProvider;
Expand All @@ -15,21 +18,8 @@
/** Parent class for the different file loaders. */
public abstract class TableLoader {

private static final List<Class<? extends SingleEntityValidator>>
singleEntityValidatorsWithParsingErrors = new ArrayList<>();

private static final List<Class<? extends FileValidator>> singleFileValidatorsWithParsingErrors =
new ArrayList<>();

public static List<Class<? extends FileValidator>> getValidatorsWithParsingErrors() {
return Collections.unmodifiableList(singleFileValidatorsWithParsingErrors);
}

public static List<Class<? extends SingleEntityValidator>>
getSingleEntityValidatorsWithParsingErrors() {
return Collections.unmodifiableList(singleEntityValidatorsWithParsingErrors);
}

protected Multimap<SkippedValidatorReason, Class<?>> skippedValidators =
ArrayListMultimap.create();
/**
* Load the file
*
Expand All @@ -45,18 +35,20 @@ abstract GtfsEntityContainer load(
InputStream csvInputStream,
NoticeContainer noticeContainer);

public void setSkippedValidators(Multimap<SkippedValidatorReason, Class<?>> skippedValidators) {
this.skippedValidators = skippedValidators;
}

protected <T extends GtfsEntity> List<SingleEntityValidator<T>> createSingleEntityValidators(
Class<T> entityClass, ValidatorProvider validatorProvider) {
return validatorProvider.createSingleEntityValidators(
entityClass, singleEntityValidatorsWithParsingErrors::add);
Class<T> entityClass, ColumnInspector header, ValidatorProvider validatorProvider) {
return validatorProvider.createSingleEntityValidators(entityClass, header, skippedValidators);
}

protected <T extends GtfsEntity, D extends GtfsTableDescriptor>
List<FileValidator> createSingleFileValidators(
GtfsEntityContainer<T, D> table, ValidatorProvider validatorProvider) {

return validatorProvider.createSingleFileValidators(
table, singleFileValidatorsWithParsingErrors::add);
return validatorProvider.createSingleFileValidators(table, skippedValidators);
}

public GtfsEntityContainer loadMissingFile(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package org.mobilitydata.gtfsvalidator.validator;

public interface ColumnInspector {
boolean hasColumn(String columnName);
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@

package org.mobilitydata.gtfsvalidator.validator;

import static org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.SkippedValidatorReason.*;

import com.google.common.collect.ListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.flogger.FluentLogger;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import org.mobilitydata.gtfsvalidator.table.GtfsEntity;
import org.mobilitydata.gtfsvalidator.table.GtfsEntityContainer;
import org.mobilitydata.gtfsvalidator.table.GtfsFeedContainer;
import org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader;
import org.mobilitydata.gtfsvalidator.table.GtfsTableContainer;
import org.mobilitydata.gtfsvalidator.table.GtfsTableDescriptor;
import org.mobilitydata.gtfsvalidator.validator.ValidatorLoader.ValidatorWithDependencyStatus;
Expand Down Expand Up @@ -80,7 +83,8 @@ public TableHeaderValidator getTableHeaderValidator() {
@SuppressWarnings("unchecked")
public <T extends GtfsEntity> List<SingleEntityValidator<T>> createSingleEntityValidators(
Class<T> clazz,
Consumer<Class<? extends SingleEntityValidator<T>>> singleEntityValidatorsWithParsingErrors) {
ColumnInspector header,
Multimap<GtfsFeedLoader.SkippedValidatorReason, Class<?>> skippedValidators) {
List<SingleEntityValidator<T>> validators = new ArrayList<>();
for (Class<? extends SingleEntityValidator<?>> validatorClass :
singleEntityValidators.get(clazz)) {
Expand All @@ -91,10 +95,14 @@ public <T extends GtfsEntity> List<SingleEntityValidator<T>> createSingleEntityV
((Class<? extends SingleEntityValidator<?>>) validatorClass),
validationContext);
if (validatorWithDependencyStatus.dependenciesHaveErrors()) {
singleEntityValidatorsWithParsingErrors.accept(
(Class<? extends SingleEntityValidator<T>>) validatorClass);
skippedValidators.put(SINGLE_ENTITY_VALIDATORS_WITH_ERROR, validatorClass);
} else {
validators.add((SingleEntityValidator<T>) validatorWithDependencyStatus.validator());
var validator = validatorWithDependencyStatus.validator();
if (validator.shouldCallValidate(header)) {
validators.add((SingleEntityValidator<T>) validator);
} else {
skippedValidators.put(VALIDATORS_NO_NEED_TO_RUN, validatorClass);
}
}
} catch (ReflectiveOperationException | ValidatorLoaderException e) {
logger.atSevere().withCause(e).log(
Expand All @@ -109,17 +117,19 @@ public <T extends GtfsEntity> List<SingleEntityValidator<T>> createSingleEntityV
public <T extends GtfsEntity, D extends GtfsTableDescriptor>
List<FileValidator> createSingleFileValidators(
GtfsEntityContainer<T, D> table,
Consumer<Class<? extends FileValidator>> validatorsWithParsingErrors) {
Multimap<GtfsFeedLoader.SkippedValidatorReason, Class<?>> skippedValidators) {
List<FileValidator> validators = new ArrayList<>();
for (Class<? extends FileValidator> validatorClass :
singleFileValidators.get((Class<? extends GtfsTableContainer<?, ?>>) table.getClass())) {
try {
ValidatorWithDependencyStatus<? extends FileValidator> validatorWithStatus =
ValidatorLoader.createSingleFileValidator(validatorClass, table, validationContext);
if (validatorWithStatus.dependenciesHaveErrors()) {
validatorsWithParsingErrors.accept(validatorClass);
} else {
skippedValidators.put(SINGLE_FILE_VALIDATORS_WITH_ERROR, validatorClass);
} else if (validatorWithStatus.validator().shouldCallValidate()) {
validators.add(validatorWithStatus.validator());
} else {
skippedValidators.put(VALIDATORS_NO_NEED_TO_RUN, validatorClass);
}
} catch (ReflectiveOperationException | ValidatorLoaderException e) {
logger.atSevere().withCause(e).log(
Expand All @@ -131,17 +141,22 @@ List<FileValidator> createSingleFileValidators(

@Override
public List<FileValidator> createMultiFileValidators(
GtfsFeedContainer feed, Consumer<Class<? extends FileValidator>> skippedValidators) {
GtfsFeedContainer feed,
Multimap<GtfsFeedLoader.SkippedValidatorReason, Class<?>> skippedValidators) {
ArrayList<FileValidator> validators = new ArrayList<>();
validators.ensureCapacity(multiFileValidators.size());
for (Class<? extends FileValidator> validatorClass : multiFileValidators) {
try {
ValidatorWithDependencyStatus<? extends FileValidator> validatorWithStatus =
ValidatorLoader.createMultiFileValidator(validatorClass, feed, validationContext);
if (validatorWithStatus.dependenciesHaveErrors()) {
skippedValidators.accept(validatorClass);
skippedValidators.put(MULTI_FILE_VALIDATORS_WITH_ERROR, validatorClass);
} else {
validators.add(validatorWithStatus.validator());
if (validatorWithStatus.validator().shouldCallValidate()) {
validators.add(validatorWithStatus.validator());
} else {
skippedValidators.put(VALIDATORS_NO_NEED_TO_RUN, validatorClass);
}
}
} catch (ReflectiveOperationException | ValidatorLoaderException e) {
logger.atSevere().withCause(e).log(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,14 @@
/** Interface for validators that handle one as a whole or several files. */
public abstract class FileValidator {
public abstract void validate(NoticeContainer noticeContainer);

/**
* Check if the validate method should be called. For example if the child field is a ForeignKey,
* there's no point to validate if the child field column does not exist.
*
* @return true if the validate method should be called.
*/
public boolean shouldCallValidate() {
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,15 @@
/** Base class for validators that handle a single entity and do not need more information. */
public abstract class SingleEntityValidator<T extends GtfsEntity> {
public abstract void validate(T entity, NoticeContainer noticeContainer);

/**
* Check if the column used by this validator exists at all. If not, no need to call the validate
* method.
*
* @param header A ColumnInspector that can tell if a column exists in the GTFS file.
* @return true if the validate method should be called.
*/
public boolean shouldCallValidate(ColumnInspector header) {
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@

package org.mobilitydata.gtfsvalidator.validator;

import com.google.common.collect.Multimap;
import java.util.List;
import java.util.function.Consumer;
import org.mobilitydata.gtfsvalidator.table.GtfsEntity;
import org.mobilitydata.gtfsvalidator.table.GtfsEntityContainer;
import org.mobilitydata.gtfsvalidator.table.GtfsFeedContainer;
import org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.SkippedValidatorReason;
import org.mobilitydata.gtfsvalidator.table.GtfsTableDescriptor;

/**
Expand Down Expand Up @@ -48,20 +49,22 @@ public interface ValidatorProvider {
*/
<T extends GtfsEntity> List<SingleEntityValidator<T>> createSingleEntityValidators(
Class<T> clazz,
Consumer<Class<? extends SingleEntityValidator<T>>> singleEntityValidatorsWithParsingErrors);
ColumnInspector header,
Multimap<SkippedValidatorReason, Class<?>> skippedValidators);

/**
* Creates a list of validators for the given table.
*
* <p>Use {@link ValidatorUtil#invokeSingleFileValidators} to invoke the created validators.
*
* @param table GTFS table to validate
* @param <T> type of the GTFS entity
* @param table GTFS table to validate
* @param skippedValidators A map where to put the validators classes that did not run.
*/
<T extends GtfsEntity, D extends GtfsTableDescriptor>
List<FileValidator> createSingleFileValidators(
GtfsEntityContainer<T, D> table,
Consumer<Class<? extends FileValidator>> validatorsWithParsingErrors);
Multimap<SkippedValidatorReason, Class<?>> skippedValidators);

/**
* Creates a list of cross-table validators. Any validator that has a dependency with parse errors
Expand All @@ -74,5 +77,5 @@ List<FileValidator> createSingleFileValidators(
* @param skippedValidators
*/
List<FileValidator> createMultiFileValidators(
GtfsFeedContainer feed, Consumer<Class<? extends FileValidator>> skippedValidators);
GtfsFeedContainer feed, Multimap<SkippedValidatorReason, Class<?>> skippedValidators);
}
Loading

0 comments on commit 01b7539

Please sign in to comment.