diff --git a/benchmarks/experiment_configs/datasets/core50.json b/benchmarks/experiment_configs/datasets/core50.json new file mode 100644 index 00000000..27bfa2fb --- /dev/null +++ b/benchmarks/experiment_configs/datasets/core50.json @@ -0,0 +1,4 @@ +{ + "dataset_name": "Core50", + "num_outputs": 50 +} \ No newline at end of file diff --git a/benchmarks/experiment_configs/fine-tuning-core50-vitb16.json b/benchmarks/experiment_configs/fine-tuning-core50-vitb16.json new file mode 100644 index 00000000..4ef461ef --- /dev/null +++ b/benchmarks/experiment_configs/fine-tuning-core50-vitb16.json @@ -0,0 +1,6 @@ +{ + "scenario": "core50-vit-b16-8updates.json", + "model": "vit-b16.json", + "updater": "fine-tuning-core50.json", + "dataset": "core50.json" +} diff --git a/benchmarks/experiment_configs/fine-tuning-core50.json b/benchmarks/experiment_configs/fine-tuning-core50.json new file mode 100644 index 00000000..ecd9e768 --- /dev/null +++ b/benchmarks/experiment_configs/fine-tuning-core50.json @@ -0,0 +1,6 @@ +{ + "scenario": "core50-8updates.json", + "model": "resnet18.json", + "updater": "fine-tuning-core50.json", + "dataset": "core50.json" +} diff --git a/benchmarks/experiment_configs/joint-core50-vitb16.json b/benchmarks/experiment_configs/joint-core50-vitb16.json new file mode 100644 index 00000000..308df1ca --- /dev/null +++ b/benchmarks/experiment_configs/joint-core50-vitb16.json @@ -0,0 +1,6 @@ +{ + "scenario": "core50-vit-b16-8updates.json", + "model": "vit-b16.json", + "updater": "joint-core50.json", + "dataset": "core50.json" +} diff --git a/benchmarks/experiment_configs/joint-core50.json b/benchmarks/experiment_configs/joint-core50.json new file mode 100644 index 00000000..1d6dbc8d --- /dev/null +++ b/benchmarks/experiment_configs/joint-core50.json @@ -0,0 +1,6 @@ +{ + "scenario": "core50-8updates.json", + "model": "resnet18.json", + "updater": "joint-core50.json", + "dataset": "core50.json" +} diff --git a/benchmarks/experiment_configs/scenarios/arxiv-1update.json b/benchmarks/experiment_configs/scenarios/arxiv-1update.json new file mode 100644 index 00000000..c680521e --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/arxiv-1update.json @@ -0,0 +1,10 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]], + "num_tasks": 1, + "max_epochs": 2, + "optimizer": "AdamW", + "learning_rate": 0.00002, + "weight_decay": 0.01 +} diff --git a/benchmarks/experiment_configs/scenarios/clear10-1update.json b/benchmarks/experiment_configs/scenarios/clear10-1update.json new file mode 100644 index 00000000..fcb24eb7 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/clear10-1update.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], + "num_tasks": 1, + "max_epochs": 100, + "optimizer": "SGD", + "learning_rate": 0.01, + "momentum": 0.9, + "weight_decay": 1e-5, + "learning_rate_scheduler": "StepLR", + "learning_rate_scheduler_step_size": 30, + "learning_rate_scheduler_gamma": 0.1, + "learning_rate_scheduler_interval": "epoch" +} diff --git a/benchmarks/experiment_configs/scenarios/clear10-vit-b16-10updates.json b/benchmarks/experiment_configs/scenarios/clear10-vit-b16-10updates.json index 7efc9114..1aba6408 100644 --- a/benchmarks/experiment_configs/scenarios/clear10-vit-b16-10updates.json +++ b/benchmarks/experiment_configs/scenarios/clear10-vit-b16-10updates.json @@ -7,8 +7,8 @@ "learning_rate": 0.1, "learning_rate_scheduler": "CosineAnnealingLR", "learning_rate_scheduler_t_max": 10, - "learning_rate_scheduler_eta_min": 0.0001, - "learning_rate_scheduler_interval": "step", + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", "momentum": 0.0, "weight_decay": 0.0 } diff --git a/benchmarks/experiment_configs/scenarios/clear10-vit-b16-1update.json b/benchmarks/experiment_configs/scenarios/clear10-vit-b16-1update.json new file mode 100644 index 00000000..a0b07f1d --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/clear10-vit-b16-1update.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], + "num_tasks": 1, + "max_epochs": 10, + "optimizer": "SGD", + "learning_rate": 0.1, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 10, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.0, + "weight_decay": 0.0 +} diff --git a/benchmarks/experiment_configs/scenarios/clear100-1update.json b/benchmarks/experiment_configs/scenarios/clear100-1update.json new file mode 100644 index 00000000..c59694ba --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/clear100-1update.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], + "num_tasks": 1, + "max_epochs": 100, + "optimizer": "SGD", + "learning_rate": 0.01, + "momentum": 0.9, + "weight_decay": 1e-5, + "learning_rate_scheduler": "StepLR", + "learning_rate_scheduler_step_size": 30, + "learning_rate_scheduler_gamma": 0.1, + "learning_rate_scheduler_interval": "epoch" +} diff --git a/benchmarks/experiment_configs/scenarios/clear100-vit-b16-11updates.json b/benchmarks/experiment_configs/scenarios/clear100-vit-b16-11updates.json index 813fa760..7cafbb84 100644 --- a/benchmarks/experiment_configs/scenarios/clear100-vit-b16-11updates.json +++ b/benchmarks/experiment_configs/scenarios/clear100-vit-b16-11updates.json @@ -7,8 +7,8 @@ "learning_rate": 0.1, "learning_rate_scheduler": "CosineAnnealingLR", "learning_rate_scheduler_t_max": 10, - "learning_rate_scheduler_eta_min": 0.0001, - "learning_rate_scheduler_interval": "step", + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", "momentum": 0.0, "weight_decay": 0.0 } diff --git a/benchmarks/experiment_configs/scenarios/clear100-vit-b16-1update.json b/benchmarks/experiment_configs/scenarios/clear100-vit-b16-1update.json new file mode 100644 index 00000000..b1854625 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/clear100-vit-b16-1update.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], + "num_tasks": 1, + "max_epochs": 10, + "optimizer": "SGD", + "learning_rate": 0.1, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 10, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.0, + "weight_decay": 0.0 +} diff --git a/benchmarks/experiment_configs/scenarios/core50-1update.json b/benchmarks/experiment_configs/scenarios/core50-1update.json new file mode 100644 index 00000000..f0dcf090 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/core50-1update.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.3, + "scenario_name": "DataIncrementalScenario", + "num_tasks": 1, + "max_epochs": 50, + "optimizer": "SGD", + "learning_rate": 0.01, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 50, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.9, + "weight_decay": 2e-4, + "data_ids": [[0, 1, 2, 3, 4, 5, 6, 7]] + } \ No newline at end of file diff --git a/benchmarks/experiment_configs/scenarios/core50-8updates.json b/benchmarks/experiment_configs/scenarios/core50-8updates.json new file mode 100644 index 00000000..f2e2d156 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/core50-8updates.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.3, + "scenario_name": "DataIncrementalScenario", + "num_tasks": 8, + "max_epochs": 50, + "optimizer": "SGD", + "learning_rate": 0.01, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 50, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.9, + "weight_decay": 2e-4, + "data_ids": [0, 1, 2, 3, 4, 5, 6, 7] + } \ No newline at end of file diff --git a/benchmarks/experiment_configs/scenarios/core50-vit-b16-1update.json b/benchmarks/experiment_configs/scenarios/core50-vit-b16-1update.json new file mode 100644 index 00000000..559e2ae0 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/core50-vit-b16-1update.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.3, + "scenario_name": "DataIncrementalScenario", + "num_tasks": 1, + "max_epochs": 10, + "optimizer": "SGD", + "learning_rate": 0.01, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 10, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.9, + "weight_decay": 2e-4, + "data_ids": [[0, 1, 2, 3, 4, 5, 6, 7]] + } \ No newline at end of file diff --git a/benchmarks/experiment_configs/scenarios/core50-vit-b16-8updates.json b/benchmarks/experiment_configs/scenarios/core50-vit-b16-8updates.json new file mode 100644 index 00000000..0accce5e --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/core50-vit-b16-8updates.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.3, + "scenario_name": "DataIncrementalScenario", + "num_tasks": 8, + "max_epochs": 10, + "optimizer": "SGD", + "learning_rate": 0.01, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 10, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.9, + "weight_decay": 2e-4, + "data_ids": [0, 1, 2, 3, 4, 5, 6, 7] + } \ No newline at end of file diff --git a/benchmarks/experiment_configs/scenarios/domainnet-1update.json b/benchmarks/experiment_configs/scenarios/domainnet-1update.json new file mode 100644 index 00000000..64d028e9 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/domainnet-1update.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.3, + "scenario_name": "DataIncrementalScenario", + "groupings": [["clipart", "infograph", "painting", "quickdraw", "real", "sketch"]], + "num_tasks": 1, + "max_epochs": 50, + "optimizer": "SGD", + "learning_rate": 0.1, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 50, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.0, + "weight_decay": 0.0 +} diff --git a/benchmarks/experiment_configs/scenarios/domainnet-6updates.json b/benchmarks/experiment_configs/scenarios/domainnet-6updates.json index 411e7224..157fd3f3 100644 --- a/benchmarks/experiment_configs/scenarios/domainnet-6updates.json +++ b/benchmarks/experiment_configs/scenarios/domainnet-6updates.json @@ -7,8 +7,8 @@ "learning_rate": 0.1, "learning_rate_scheduler": "CosineAnnealingLR", "learning_rate_scheduler_t_max": 50, - "learning_rate_scheduler_eta_min": 0.0001, - "learning_rate_scheduler_interval": "step", + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", "momentum": 0.0, "weight_decay": 0.0, "data_ids": ["clipart", "infograph", "painting", "quickdraw", "real", "sketch"] diff --git a/benchmarks/experiment_configs/scenarios/domainnet-vit-b16-1update.json b/benchmarks/experiment_configs/scenarios/domainnet-vit-b16-1update.json new file mode 100644 index 00000000..d0411693 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/domainnet-vit-b16-1update.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.3, + "scenario_name": "DataIncrementalScenario", + "groupings": [["clipart", "infograph", "painting", "quickdraw", "real", "sketch"]], + "num_tasks": 1, + "max_epochs": 10, + "optimizer": "SGD", + "learning_rate": 0.1, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 10, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.0, + "weight_decay": 0.0 +} diff --git a/benchmarks/experiment_configs/scenarios/domainnet-vit-b16-6updates.json b/benchmarks/experiment_configs/scenarios/domainnet-vit-b16-6updates.json index b7a596b4..40199524 100644 --- a/benchmarks/experiment_configs/scenarios/domainnet-vit-b16-6updates.json +++ b/benchmarks/experiment_configs/scenarios/domainnet-vit-b16-6updates.json @@ -7,8 +7,8 @@ "learning_rate": 0.1, "learning_rate_scheduler": "CosineAnnealingLR", "learning_rate_scheduler_t_max": 10, - "learning_rate_scheduler_eta_min": 0.0001, - "learning_rate_scheduler_interval": "step", + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", "momentum": 0.0, "weight_decay": 0.0, "data_ids": ["clipart", "infograph", "painting", "quickdraw", "real", "sketch"] diff --git a/benchmarks/experiment_configs/scenarios/fmow-16updates.json b/benchmarks/experiment_configs/scenarios/fmow-16updates.json index e9bbfabf..706a827c 100644 --- a/benchmarks/experiment_configs/scenarios/fmow-16updates.json +++ b/benchmarks/experiment_configs/scenarios/fmow-16updates.json @@ -7,8 +7,8 @@ "learning_rate": 0.1, "learning_rate_scheduler": "CosineAnnealingLR", "learning_rate_scheduler_t_max": 50, - "learning_rate_scheduler_eta_min": 0.0001, - "learning_rate_scheduler_interval": "step", + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", "momentum": 0.0, "weight_decay": 0.0 } diff --git a/benchmarks/experiment_configs/scenarios/fmow-1update.json b/benchmarks/experiment_configs/scenarios/fmow-1update.json new file mode 100644 index 00000000..39e903e0 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/fmow-1update.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]], + "num_tasks": 1, + "max_epochs": 50, + "optimizer": "SGD", + "learning_rate": 0.1, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 50, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.0, + "weight_decay": 0.0 +} diff --git a/benchmarks/experiment_configs/scenarios/fmow-vit-b16-16updates.json b/benchmarks/experiment_configs/scenarios/fmow-vit-b16-16updates.json index 8daf30b7..24d45c8e 100644 --- a/benchmarks/experiment_configs/scenarios/fmow-vit-b16-16updates.json +++ b/benchmarks/experiment_configs/scenarios/fmow-vit-b16-16updates.json @@ -7,8 +7,8 @@ "learning_rate": 0.1, "learning_rate_scheduler": "CosineAnnealingLR", "learning_rate_scheduler_t_max": 10, - "learning_rate_scheduler_eta_min": 0.0001, - "learning_rate_scheduler_interval": "step", + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", "momentum": 0.0, "weight_decay": 0.0 } diff --git a/benchmarks/experiment_configs/scenarios/fmow-vit-b16-1update.json b/benchmarks/experiment_configs/scenarios/fmow-vit-b16-1update.json new file mode 100644 index 00000000..e0fde9ea --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/fmow-vit-b16-1update.json @@ -0,0 +1,15 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]], + "num_tasks": 1, + "max_epochs": 10, + "optimizer": "SGD", + "learning_rate": 0.1, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 10, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.0, + "weight_decay": 0.0 +} diff --git a/benchmarks/experiment_configs/scenarios/huffpost-1update.json b/benchmarks/experiment_configs/scenarios/huffpost-1update.json new file mode 100644 index 00000000..aa6713ff --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/huffpost-1update.json @@ -0,0 +1,10 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [[0, 1, 2, 3, 4, 5, 6]], + "num_tasks": 1, + "max_epochs": 4, + "optimizer": "AdamW", + "learning_rate": 0.00002, + "weight_decay": 0.01 +} diff --git a/benchmarks/experiment_configs/scenarios/multitext-1update.json b/benchmarks/experiment_configs/scenarios/multitext-1update.json new file mode 100644 index 00000000..26f2f6e2 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/multitext-1update.json @@ -0,0 +1,10 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [["ag_news", "yelp_review_full", "dbpedia_14", "yahoo_answers_topics"]], + "num_tasks": 1, + "max_epochs": 2, + "optimizer": "AdamW", + "learning_rate": 0.00002, + "weight_decay": 0.01 +} diff --git a/benchmarks/experiment_configs/scenarios/yearbook-17updates.json b/benchmarks/experiment_configs/scenarios/yearbook-17updates.json index 719acb1e..979e4d60 100644 --- a/benchmarks/experiment_configs/scenarios/yearbook-17updates.json +++ b/benchmarks/experiment_configs/scenarios/yearbook-17updates.json @@ -26,8 +26,8 @@ "learning_rate": 0.1, "learning_rate_scheduler": "CosineAnnealingLR", "learning_rate_scheduler_t_max": 50, - "learning_rate_scheduler_eta_min": 0.0001, - "learning_rate_scheduler_interval": "step", + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", "momentum": 0.0, "weight_decay": 0.0 } diff --git a/benchmarks/experiment_configs/scenarios/yearbook-1update.json b/benchmarks/experiment_configs/scenarios/yearbook-1update.json new file mode 100644 index 00000000..8f2d0fa8 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/yearbook-1update.json @@ -0,0 +1,33 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [ + [0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, + 80, 81, 82, 83] + ], + "num_tasks": 1, + "max_epochs": 50, + "optimizer": "SGD", + "learning_rate": 0.1, + "learning_rate_scheduler": "CosineAnnealingLR", + "learning_rate_scheduler_t_max": 50, + "learning_rate_scheduler_eta_min": 0, + "learning_rate_scheduler_interval": "epoch", + "momentum": 0.0, + "weight_decay": 0.0 +} diff --git a/benchmarks/experiment_configs/scenarios/yearbook-vit-b16-1update.json b/benchmarks/experiment_configs/scenarios/yearbook-vit-b16-1update.json new file mode 100644 index 00000000..d6801910 --- /dev/null +++ b/benchmarks/experiment_configs/scenarios/yearbook-vit-b16-1update.json @@ -0,0 +1,28 @@ +{ + "val_size": 0.1, + "scenario_name": "DataIncrementalScenario", + "groupings": [ + [0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, + 80, 81, 82, 83] + ], + "num_tasks": 1, + "max_epochs": 4, + "optimizer": "AdamW", + "learning_rate": 0.00002, + "weight_decay": 0.01 +} diff --git a/benchmarks/experiment_configs/updaters/fine-tuning-core50.json b/benchmarks/experiment_configs/updaters/fine-tuning-core50.json new file mode 100644 index 00000000..b650accf --- /dev/null +++ b/benchmarks/experiment_configs/updaters/fine-tuning-core50.json @@ -0,0 +1,4 @@ +{ + "updater": "FineTuning", + "batch_size": 64 +} \ No newline at end of file diff --git a/benchmarks/experiment_configs/updaters/joint-core50.json b/benchmarks/experiment_configs/updaters/joint-core50.json new file mode 100644 index 00000000..dbd3d97c --- /dev/null +++ b/benchmarks/experiment_configs/updaters/joint-core50.json @@ -0,0 +1,4 @@ +{ + "updater": "Joint", + "batch_size": 64 +} diff --git a/benchmarks/experiment_configs/updaters/offline-er-arxiv.json b/benchmarks/experiment_configs/updaters/offline-er-arxiv.json new file mode 100644 index 00000000..1c91980b --- /dev/null +++ b/benchmarks/experiment_configs/updaters/offline-er-arxiv.json @@ -0,0 +1,6 @@ +{ + "updater": "Offline-ER", + "batch_size": 64, + "batch_memory_frac": 0.5, + "memory_size": 11875 +} diff --git a/benchmarks/experiment_configs/updaters/offline-er-clear10-vit-b16.json b/benchmarks/experiment_configs/updaters/offline-er-clear10-vit-b16.json index 8140d002..32246679 100644 --- a/benchmarks/experiment_configs/updaters/offline-er-clear10-vit-b16.json +++ b/benchmarks/experiment_configs/updaters/offline-er-clear10-vit-b16.json @@ -2,5 +2,5 @@ "updater": "Offline-ER", "batch_size": 64, "batch_memory_frac": 0.5, - "memory_size": 10000 + "memory_size": 3300 } diff --git a/benchmarks/experiment_configs/updaters/offline-er-clear100-vit-b16.json b/benchmarks/experiment_configs/updaters/offline-er-clear100-vit-b16.json index 957a0c57..8140d002 100644 --- a/benchmarks/experiment_configs/updaters/offline-er-clear100-vit-b16.json +++ b/benchmarks/experiment_configs/updaters/offline-er-clear100-vit-b16.json @@ -2,5 +2,5 @@ "updater": "Offline-ER", "batch_size": 64, "batch_memory_frac": 0.5, - "memory_size": 99999999 + "memory_size": 10000 } diff --git a/benchmarks/experiment_configs/upperbound-arxiv.json b/benchmarks/experiment_configs/upperbound-arxiv.json new file mode 100644 index 00000000..af918eb7 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-arxiv.json @@ -0,0 +1,6 @@ +{ + "scenario": "arxiv-1update.json", + "model": "bert.json", + "updater": "joint-arxiv.json", + "dataset": "arxiv.json" +} diff --git a/benchmarks/experiment_configs/upperbound-clear10-vitb16.json b/benchmarks/experiment_configs/upperbound-clear10-vitb16.json new file mode 100644 index 00000000..bfa70a80 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-clear10-vitb16.json @@ -0,0 +1,6 @@ +{ + "scenario": "clear10-vit-b16-1update.json", + "model": "vit-b16.json", + "updater": "joint-clear-vit-b16.json", + "dataset": "clear10.json" +} diff --git a/benchmarks/experiment_configs/upperbound-clear10.json b/benchmarks/experiment_configs/upperbound-clear10.json new file mode 100644 index 00000000..f038f414 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-clear10.json @@ -0,0 +1,6 @@ +{ + "scenario": "clear10-1update.json", + "model": "resnet18.json", + "updater": "joint-clear.json", + "dataset": "clear10.json" +} diff --git a/benchmarks/experiment_configs/upperbound-clear100-vitb16.json b/benchmarks/experiment_configs/upperbound-clear100-vitb16.json new file mode 100644 index 00000000..94454ef1 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-clear100-vitb16.json @@ -0,0 +1,6 @@ +{ + "scenario": "clear100-vit-b16-1update.json", + "model": "vit-b16.json", + "updater": "joint-clear-vit-b16.json", + "dataset": "clear100.json" +} diff --git a/benchmarks/experiment_configs/upperbound-clear100.json b/benchmarks/experiment_configs/upperbound-clear100.json new file mode 100644 index 00000000..c90a5951 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-clear100.json @@ -0,0 +1,6 @@ +{ + "scenario": "clear100-1update.json", + "model": "resnet18.json", + "updater": "joint-clear.json", + "dataset": "clear100.json" +} diff --git a/benchmarks/experiment_configs/upperbound-core50-vitb16.json b/benchmarks/experiment_configs/upperbound-core50-vitb16.json new file mode 100644 index 00000000..a67f4d9a --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-core50-vitb16.json @@ -0,0 +1,6 @@ +{ + "scenario": "core50-vit-b16-1update.json", + "model": "vit-b16.json", + "updater": "joint-core50.json", + "dataset": "core50.json" +} diff --git a/benchmarks/experiment_configs/upperbound-core50.json b/benchmarks/experiment_configs/upperbound-core50.json new file mode 100644 index 00000000..cea9bfcd --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-core50.json @@ -0,0 +1,6 @@ +{ + "scenario": "core50-1update.json", + "model": "resnet18.json", + "updater": "joint-core50.json", + "dataset": "core50.json" +} diff --git a/benchmarks/experiment_configs/upperbound-domainnet-vitb16.json b/benchmarks/experiment_configs/upperbound-domainnet-vitb16.json new file mode 100644 index 00000000..adcaaee0 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-domainnet-vitb16.json @@ -0,0 +1,6 @@ +{ + "scenario": "domainnet-vit-b16-1update.json", + "model": "vit-b16.json", + "updater": "joint-domainnet.json", + "dataset": "domainnet.json" +} diff --git a/benchmarks/experiment_configs/upperbound-domainnet.json b/benchmarks/experiment_configs/upperbound-domainnet.json new file mode 100644 index 00000000..1065f936 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-domainnet.json @@ -0,0 +1,6 @@ +{ + "scenario": "domainnet-1update.json", + "model": "resnet18.json", + "updater": "joint-domainnet.json", + "dataset": "domainnet.json" +} diff --git a/benchmarks/experiment_configs/upperbound-fmow-vitb16.json b/benchmarks/experiment_configs/upperbound-fmow-vitb16.json new file mode 100644 index 00000000..f362ecd0 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-fmow-vitb16.json @@ -0,0 +1,6 @@ +{ + "scenario": "fmow-vit-b16-1update.json", + "model": "vit-b16.json", + "updater": "joint-fmow.json", + "dataset": "fmow.json" +} diff --git a/benchmarks/experiment_configs/upperbound-fmow.json b/benchmarks/experiment_configs/upperbound-fmow.json new file mode 100644 index 00000000..6725a2b4 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-fmow.json @@ -0,0 +1,6 @@ +{ + "scenario": "fmow-1update.json", + "model": "resnet18.json", + "updater": "joint-fmow.json", + "dataset": "fmow.json" +} diff --git a/benchmarks/experiment_configs/upperbound-huffpost.json b/benchmarks/experiment_configs/upperbound-huffpost.json new file mode 100644 index 00000000..d5e02fca --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-huffpost.json @@ -0,0 +1,6 @@ +{ + "scenario": "huffpost-1update.json", + "model": "bert.json", + "updater": "joint-huffpost.json", + "dataset": "huffpost.json" +} diff --git a/benchmarks/experiment_configs/upperbound-multitext.json b/benchmarks/experiment_configs/upperbound-multitext.json new file mode 100644 index 00000000..a87f7a40 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-multitext.json @@ -0,0 +1,6 @@ +{ + "scenario": "multitext-1update.json", + "model": "bert.json", + "updater": "joint-multitext.json", + "dataset": "multitext.json" +} diff --git a/benchmarks/experiment_configs/upperbound-yearbook-vitb16.json b/benchmarks/experiment_configs/upperbound-yearbook-vitb16.json new file mode 100644 index 00000000..6adc4d53 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-yearbook-vitb16.json @@ -0,0 +1,6 @@ +{ + "scenario": "yearbook-vit-b16-1update.json", + "model": "vit-b16.json", + "updater": "joint-yearbook.json", + "dataset": "yearbook.json" +} diff --git a/benchmarks/experiment_configs/upperbound-yearbook.json b/benchmarks/experiment_configs/upperbound-yearbook.json new file mode 100644 index 00000000..1198e632 --- /dev/null +++ b/benchmarks/experiment_configs/upperbound-yearbook.json @@ -0,0 +1,6 @@ +{ + "scenario": "yearbook-1update.json", + "model": "resnet18-cifar.json", + "updater": "joint-yearbook.json", + "dataset": "yearbook.json" +} diff --git a/doc/benchmarking/renate_benchmarks.rst b/doc/benchmarking/renate_benchmarks.rst index f914eb84..21579398 100644 --- a/doc/benchmarking/renate_benchmarks.rst +++ b/doc/benchmarking/renate_benchmarks.rst @@ -97,7 +97,21 @@ The full list of models and model names including a short description is provide - Wrapper around Hugging Face transformers. - * ``pretrained_model_name_or_path``: Hugging Face `transformer ID `__. * ``num_outputs``: The number of classes. - + * - `~renate.benchmark.models.l2p.LearningToPromptTransformer` + - `Learning to Prompt Transformer `_. Supports both text and vision transformers. + - * ``pretrained_model_name_or_path``: Hugging Face `transformer ID `__. + * ``num_outputs``: The number of classes. + * ``pool_size``: Total number of prompts in the prompt pool. + * ``pool_selection_size``: Number of prompts to select for each input from the pool. + * ``prompt_size``: Number of input tokens each prompt is equivalent to. + * ``prompt_key_dim``: Dimenensionality of the features used for prompt matching. + * - `~renate.benchmark.models.spromptmodel.SPromptTransformer` + - `S-Prompt Transformer `_. + - * ``pretrained_model_name_or_path``: Hugging Face `transformer ID `__. + * ``num_outputs``: The number of classes. + * ``prompt_size``: Number of input tokens each prompt is equivalent to. + * ``clusters_per_task``: Number of clusters for K-Means in task identification. + * ``per_task_classifier``: Flag to share or use individual classifier per task. .. _benchmarking-renate-benchmarks-datasets: @@ -173,6 +187,14 @@ The following table contains the list of supported datasets. - multiple - Any `Hugging Face dataset `__ can be used. Just prepend the prefix ``hfd-``, e.g., ``hfd-rotten_tomatoes``. Select input and target columns via ``config_space``, e.g., add ``"input_column": "text", "target_column": "label"`` for the `rotten_tomatoes `__ example. - Please refer to `the official documentation `__. + * - CDDB + - Image Classification: deepfake detection + - 2 classes, 5 domains, each generated using image generation techniques: GauGAN, BigGAN, WildDeepfake, WhichFaceReal, SAN respectively from HARD evaluation scenario. Numbers vary across domains. + - Li, Chuqiao, et al. A continual deepfake detection benchmark: Dataset, methods, and essentials. IEEE/CVF Winter Conference on Applications of Computer Vision. 2023. + * - Core50 + - Image Classfication + - 50 classes, 8 (0-7) domains for training, a single test set for evaluation. + - Vincenzo Lomonaco and Davide Maltoni: CORe50: a new Dataset and Benchmark for continual Object Recognition. 1st Annual Conference on Robot Learning, PMLR 78:17-26, 2017. .. _benchmarking-renate-benchmarks-scenarios: diff --git a/doc/getting_started/supported_algorithms.rst b/doc/getting_started/supported_algorithms.rst index 02c0ce98..9c89c7a9 100644 --- a/doc/getting_started/supported_algorithms.rst +++ b/doc/getting_started/supported_algorithms.rst @@ -41,7 +41,10 @@ using Renate (e.g., using :py:func:`~renate.training.training.run_training_job`; - A class that implements a Learning to Prompt method for ViTs. The methods trains only the input prompts that are sampled from a prompt pool in an input dependent fashion. * - ``"LearningToPromptReplay"`` - :py:class:`LearningToPromptLearner ` - - A class that extends the Learning to Prompt method to use a memory replay method like "Offline-ER" + - A class that extends the Learning to Prompt method to use a memory replay method like "Offline-ER". + * - ``"S-Prompts"`` + - :py:class:`SPeft ` + - A class that (currently) implements S-Prompts method for memory-free continual learning when used with `SPromptTransformer` model. The method trains a set of input prompts in an update dependent fashion. * - ``"Avalanche-ER"`` - :py:class:`AvalancheReplayLearner ` - A wrapper which gives access to Experience Replay as implemented in the Avalanche library. This method is the equivalent to our Offline-ER. diff --git a/doc/requirements.txt b/doc/requirements.txt index feb0cd8e..63217394 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -2,9 +2,9 @@ docutils==0.20.1 Sphinx==7.2.6 sphinx-copybutton==0.5.2 sphinx-hoverxref==1.3.0 -sphinxext-opengraph==0.8.2 -pydata-sphinx-theme==0.14.0 -sphinx-autodoc-typehints==1.24.0 +sphinxext-opengraph==0.9.0 +pydata-sphinx-theme==0.14.4 +sphinx-autodoc-typehints==1.25.2 sphinx-paramlinks==0.6.0 # Temporarily added diff --git a/pyproject.toml b/pyproject.toml index 5cf9e031..64b0dad0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,12 +25,12 @@ benchmark = [ "wild-time-data==0.1.1", ] dev = [ - "black==23.9.1", + "black==23.11.0", "avalanche_lib==0.3.1", "wild-time-data==0.1.1", "torch>=1.10.0, <1.12.2", # PyTest Dependencies - "pytest==7.4.2", + "pytest==7.4.3", "pytest-cov==4.1.0", "pytest-helpers-namespace==2021.12.29", ] diff --git a/requirements.txt b/requirements.txt index 241adf93..9ff1955f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,17 @@ -numpy>=1.17.2, <1.26.1 +numpy>=1.17.2, <1.26.3 torch>=1.10.0, <1.13.2 -pandas>=1.4.0, <2.1.1 -boto3>=1.26.0, <1.28.51 +pandas>=1.4.0, <2.1.4 +boto3>=1.26.0, <1.33.8 requests>=2.31.0, <2.31.1 -sagemaker>=2.112.0, <2.186.1 -syne-tune[aws,gpsearchers]>=0.6.0, <0.9.2 +sagemaker>=2.112.0, <2.199.1 +syne-tune[aws,gpsearchers]>=0.6.0, <0.10.1 pytorch-lightning>=1.8.0, <1.9.5 -Pillow>=9.0, <10.0.2 +Pillow>=9.0, <10.1.1 tabulate>=0.9.0, <0.9.1 tensorboardX>=2.5.0, <2.6.3 torchmetrics>=0.11.0, <0.11.5 -torchvision>=0.13.0, <0.15.3 -deepspeed>=0.9.0, <0.10.4 -datasets>=2.9.0, <2.14.6 -transformers>=4.31.0, <4.33.3 -scipy>=1.9.0, <1.11.3 +torchvision>=0.13.0, <0.16.2 +deepspeed>=0.9.0, <0.12.5 +datasets>=2.9.0, <2.15.1 +transformers>=4.31.0, <4.35.3 +scipy>=1.9.0, <1.11.5 diff --git a/src/renate/__init__.py b/src/renate/__init__.py index 0a7410a7..fd9b1d24 100644 --- a/src/renate/__init__.py +++ b/src/renate/__init__.py @@ -14,4 +14,4 @@ _renate_logger.addHandler(_handler) _renate_logger.propagate = False -__version__ = "0.4.0" +__version__ = "0.5.0" diff --git a/src/renate/benchmark/datasets/vision_datasets.py b/src/renate/benchmark/datasets/vision_datasets.py index 7368ae33..c57031f9 100644 --- a/src/renate/benchmark/datasets/vision_datasets.py +++ b/src/renate/benchmark/datasets/vision_datasets.py @@ -3,8 +3,10 @@ import json import os from pathlib import Path -from typing import List, Optional, Tuple, Union +import pickle +from typing import List, Literal, Optional, Tuple, Union +import gdown import pandas as pd import torch import torchvision @@ -14,7 +16,13 @@ from renate.benchmark.datasets.base import DataIncrementalDataModule from renate.data import ImageDataset from renate.data.data_module import RenateDataModule -from renate.utils.file import download_and_unzip_file, download_file, download_folder_from_s3 +from renate.utils.file import ( + download_and_unzip_file, + download_file, + download_file_from_s3, + download_folder_from_s3, + extract_file, +) class TinyImageNetDataModule(RenateDataModule): @@ -359,6 +367,8 @@ def __init__( def prepare_data(self) -> None: """Download DomainNet dataset for given domain.""" file_name = f"{self.data_id}.zip" + # update dataset name: + self._dataset_name = self.data_id url = "http://csr.bu.edu/ftp/visda/2019/multi-source/" if self.data_id in ["clipart", "painting"]: url = os.path.join(url, "groundtruth") @@ -402,3 +412,169 @@ def _get_filepaths_and_labels(self, split: str) -> Tuple[List[str], List[int]]: data = list(df.path.apply(lambda x: os.path.join(path, x))) labels = list(df.label) return data, labels + + +class CDDBDataModule(DataIncrementalDataModule): + md5s = { + "CDDB.tar.zip": "823b6496270ba03019dbd6af60cbcb6b", + } + + domains = ["gaugan", "biggan", "wild", "whichfaceisreal", "san"] + dataset_stats = { + "CDDB": dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + } + google_drive_id = "1NgB8ytBMFBFwyXJQvdVT_yek1EaaEHrg" + + def __init__( + self, + data_path: Union[Path, str], + src_bucket: Optional[str] = None, + src_object_name: Optional[str] = None, + domain: str = "gaugan", + val_size: float = defaults.VALIDATION_SIZE, + seed: int = defaults.SEED, + ): + assert domain in self.domains + super().__init__( + data_path=data_path, + data_id=domain.lower(), + src_bucket=src_bucket, + src_object_name=src_object_name, + val_size=val_size, + seed=seed, + ) + + def prepare_data(self) -> None: + """Download DomainNet dataset for given domain.""" + file_name = "CDDB.tar.zip" + self._dataset_name = "" + if not self._verify_file(file_name): + if self._src_bucket is None: + gdown.download( + output=self._data_path, + quiet=False, + url=f"https://drive.google.com/u/0/uc?id={self.google_drive_id}&export=download&confirm=pbef", # noqa: E501 + ) + else: + download_file_from_s3( + dst=os.path.join(self._data_path, file_name), + src_bucket=self._src_bucket, + src_object_name=self._src_object_name, + ) + extract_file(data_path=self._data_path, file_name="CDDB.tar.zip", dataset_name="") + extract_file(data_path=self._data_path, file_name="CDDB.tar", dataset_name="") + + def setup(self) -> None: + self._dataset_name = "CDDB" # we need this because zip+tar + train_path = self._get_filepaths_and_labels("train") + train_data = torchvision.datasets.ImageFolder(train_path) + self._train_data, self._val_data = self._split_train_val_data(train_data) + test_path = self._get_filepaths_and_labels("val") + self._test_data = torchvision.datasets.ImageFolder(test_path) + + def _get_filepaths_and_labels(self, split: str) -> Tuple[List[str], List[int]]: + return os.path.join(self._data_path, self._dataset_name, self.data_id, split) + + +class CORE50DataModule(DataIncrementalDataModule): + """Datamodule that process the CORe50 dataset. + + It enables to download all the scenarios and with respect to 0th run (as per S-Prompts), + set by `scenario` and `data_id` respectively. + + Source: https://vlomonaco.github.io/core50/. + Adapted from: https://github.com/vlomonaco/core50/blob/master/scripts/python/data_loader.py + + Args: + data_path: The path to the folder containing the dataset files. + src_bucket: The name of the s3 bucket. If not provided, downloads the data from + original source. + src_object_name: The folder path in the s3 bucket. + scenario: One of ``ni``, ``nc``, ``nic``, ``nicv2_79``, ``nicv2_196`` and ``nicv2_391``. + This is different from the usage of scenario elsewhere in Renate. + data_id: One of the several data batches dependent on scenario + """ + + md5s = { + "core50_128x128.zip": "745f3373fed08d69343f1058ee559e13", + "paths.pkl": "b568f86998849184df3ec3465290f1b0", + "LUP.pkl": "33afc26faa460aca98739137fdfa606e", + "labels.pkl": "281c95774306a2196f4505f22fd60ab1", + } + dataset_stats = { + "Core50": dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + } + + def __init__( + self, + data_path: Union[Path, str], + src_bucket: Optional[str] = None, + src_object_name: Optional[str] = None, + scenario: Literal["ni", "nc", "nic", "nicv2_79", "nicv2_196", "nicv2_391"] = "ni", + data_id: int = 0, + val_size: float = defaults.VALIDATION_SIZE, + seed: int = defaults.SEED, + ) -> None: + super().__init__( + data_path=data_path, + src_bucket=src_bucket, + src_object_name=src_object_name, + data_id=data_id, + val_size=val_size, + seed=seed, + ) + self._dataset_name = "core50" + self._image_source = "core50_128x128" + self._scenario = scenario + self._complete_data_path = os.path.join( + self._data_path, self._dataset_name, self._image_source + ) + + def prepare_data(self) -> None: + """Download the CORE50 dataset and supporting files and set paths.""" + if not self._verify_file(f"{self._image_source}.zip"): + download_and_unzip_file( + self._dataset_name, + self._data_path, + self._src_bucket, + self._src_object_name, + "http://bias.csr.unibo.it/maltoni/download/core50/", + f"{self._image_source}.zip", + ) + for file_name in [ + "paths.pkl", + "LUP.pkl", + "labels.pkl", + ]: + if not self._verify_file(file_name): + download_file( + self._dataset_name, + self._data_path, + self._src_bucket, + self._src_object_name, + "https://vlomonaco.github.io/core50/data/", + file_name, + ) + + def setup(self) -> None: + """Make assignments: train/test splits (CORe50 dataset only has train and test splits).""" + with open(os.path.join(self._data_path, self._dataset_name, "paths.pkl"), "rb") as f: + self._paths = pickle.load(f) + + with open(os.path.join(self._data_path, self._dataset_name, "LUP.pkl"), "rb") as f: + self._LUP = pickle.load(f) + + with open(os.path.join(self._data_path, self._dataset_name, "labels.pkl"), "rb") as f: + self._labels = pickle.load(f) + + train_data = ImageDataset(*self._parse_file_lists("train")) + self._train_data, self._val_data = self._split_train_val_data(train_data) + self._test_data = ImageDataset(*self._parse_file_lists("test")) + + def _parse_file_lists(self, stage: str) -> Tuple[List[str], List[int]]: + data_id = self.data_id if stage == "train" else -1 + idx_list = self._LUP[self._scenario][0][data_id] + X = [os.path.join(self._complete_data_path, self._paths[idx]) for idx in idx_list] + y = self._labels[self._scenario][0][data_id] + + return X, y diff --git a/src/renate/benchmark/experiment_config.py b/src/renate/benchmark/experiment_config.py index 16ba10b0..00549b49 100644 --- a/src/renate/benchmark/experiment_config.py +++ b/src/renate/benchmark/experiment_config.py @@ -14,7 +14,9 @@ from renate.benchmark.datasets.nlp_datasets import HuggingFaceTextDataModule, MultiTextDataModule from renate.benchmark.datasets.vision_datasets import ( + CDDBDataModule, CLEARDataModule, + CORE50DataModule, DomainNetDataModule, TorchVisionDataModule, ) @@ -50,6 +52,8 @@ from renate.models import RenateModule from renate.models.prediction_strategies import ICaRLClassificationStrategy +from renate.benchmark.models.spromptmodel import SPromptTransformer + models = { "MultiLayerPerceptron": MultiLayerPerceptron, "ResNet18CIFAR": ResNet18CIFAR, @@ -66,6 +70,7 @@ "VisionTransformerH14": VisionTransformerH14, "HuggingFaceTransformer": HuggingFaceSequenceClassificationTransformer, "LearningToPromptTransformer": LearningToPromptTransformer, + "SPromptTransformer": SPromptTransformer, } @@ -79,6 +84,9 @@ def model_fn( hidden_size: Optional[Tuple[int]] = None, dataset_name: Optional[str] = None, pretrained_model_name_or_path: Optional[str] = None, + prompt_size: int = 10, + clusters_per_task: int = 5, + per_task_classifier: bool = True, ) -> RenateModule: """Returns a model instance.""" if model_name not in models: @@ -108,6 +116,16 @@ def model_fn( f"LearningToPromptTransformer, but model name specified is {model_name}." ) model_kwargs["pretrained_model_name_or_path"] = pretrained_model_name_or_path + elif (updater is not None) and ("SPeft" in updater): + if not model_name.startswith("SPrompt"): + raise ValueError( + "SPrompt model updater is designed to work only with " + f"SPromptTransformer, but model name specified is {model_name}." + ) + model_kwargs["pretrained_model_name_or_path"] = pretrained_model_name_or_path + model_kwargs["prompt_size"] = prompt_size + model_kwargs["clusters_per_task"] = clusters_per_task + model_kwargs["per_task_classifier"] = per_task_classifier if model_state_url is None: model = model_class(**model_kwargs) else: @@ -174,6 +192,23 @@ def get_data_module( val_size=val_size, seed=seed, ) + if dataset_name == "CDDB": + return CDDBDataModule( + data_path=data_path, + src_bucket=src_bucket, + src_object_name=src_object_name, + val_size=val_size, + seed=seed, + ) + if dataset_name == "Core50": + return CORE50DataModule( + data_path=data_path, + src_bucket=src_bucket, + src_object_name=src_object_name, + val_size=val_size, + seed=seed, + ) + raise ValueError(f"Unknown dataset `{dataset_name}`.") @@ -335,6 +370,16 @@ def _get_normalize_transform(dataset_name): DomainNetDataModule.dataset_stats["all"]["mean"], DomainNetDataModule.dataset_stats["all"]["std"], ) + if dataset_name == "CDDB": + return transforms.Normalize( + CDDBDataModule.dataset_stats["CDDB"]["mean"], + CDDBDataModule.dataset_stats["CDDB"]["std"], + ) + if dataset_name == "Core50": + return transforms.Normalize( + CORE50DataModule.dataset_stats["Core50"]["mean"], + CORE50DataModule.dataset_stats["Core50"]["std"], + ) def train_transform(dataset_name: str, model_name: Optional[str] = None) -> Optional[Callable]: @@ -391,6 +436,25 @@ def train_transform(dataset_name: str, model_name: Optional[str] = None) -> Opti _get_normalize_transform(dataset_name), ] ) + if dataset_name == "CDDB": + return transforms.Compose( + [ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ColorJitter(brightness=63 / 255), + transforms.ToTensor(), + _get_normalize_transform(dataset_name), + ] + ) + if dataset_name == "Core50": + return transforms.Compose( + [ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + _get_normalize_transform(dataset_name), + ] + ) raise ValueError(f"Unknown dataset `{dataset_name}`.") @@ -442,6 +506,24 @@ def test_transform( _get_normalize_transform(dataset_name), ] ) + if dataset_name == "CDDB": + return transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + _get_normalize_transform(dataset_name), + ] + ) + if dataset_name == "Core50": + return transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + _get_normalize_transform(dataset_name), + ] + ) raise ValueError(f"Unknown dataset `{dataset_name}`.") diff --git a/src/renate/benchmark/models/__init__.py b/src/renate/benchmark/models/__init__.py index edfdf6bb..f343432d 100644 --- a/src/renate/benchmark/models/__init__.py +++ b/src/renate/benchmark/models/__init__.py @@ -9,7 +9,8 @@ ResNet50, ResNet50CIFAR, ) -from renate.benchmark.models.l2p import LearningToPromptTransformer +from renate.benchmark.models.l2p import LearningToPromptTransformer, PromptedTransformer +from renate.benchmark.models.spromptmodel import SPromptTransformer from renate.benchmark.models.vision_transformer import ( VisionTransformerB16, VisionTransformerB32, @@ -28,6 +29,8 @@ "ResNet50", "ResNet50CIFAR", "LearningToPromptTransformer", + "PromptedTransformer", + "SPromptTransformer", "VisionTransformerB16", "VisionTransformerB32", "VisionTransformerCIFAR", diff --git a/src/renate/benchmark/models/spromptmodel.py b/src/renate/benchmark/models/spromptmodel.py new file mode 100644 index 00000000..dac81b81 --- /dev/null +++ b/src/renate/benchmark/models/spromptmodel.py @@ -0,0 +1,222 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +import logging +import math +from typing import Any, Dict, List, Optional, Union + +import torch +import torch.nn as nn + +from renate.models.layers.shared_linear import SharedMultipleLinear +from renate.models.prediction_strategies import PredictionStrategy +from renate.models.task_identification_strategies import TaskPrototypes + +from .l2p import PromptedTransformer +from .base import RenateBenchmarkingModule + +logger = logging.getLogger(__name__) + + +class PromptPool(nn.Module): + """Implements a pool of prompts to be used in for S-Prompts. + + Args: + prompt_size: Equivalent to number of input tokens used per update . Defaults to 10. + embedding_size: Hidden size of the transformer used.. Defaults to 768. + current_update_id: Current update it. Used to init number of prompts. Defaults to 0. + """ + + def __init__( + self, prompt_size: int = 10, embedding_size: int = 768, current_update_id: int = 0 + ) -> None: + super().__init__() + self._M = prompt_size + self._embedding_size = embedding_size + self._curr_task = current_update_id + + self._pool = nn.ParameterDict() + for id in range(self._curr_task): + # This always needs to be intialized as the torch's state dict only restores for an + # existing Parameter. + self._pool[f"{id}"] = nn.Parameter( + torch.nn.init.kaiming_uniform_( + torch.empty((self._M, self._embedding_size)), a=math.sqrt(5) + ) + ) + + self._pool.requires_grad_(True) + + def forward(self, id: int) -> torch.nn.Parameter: + return self._pool[f"{id}"] + + def get_params(self, id: int) -> List[torch.nn.Parameter]: + return [self._pool[f"{id}"]] + + def increment_task(self) -> None: + self._pool[f"{len(self._pool)}"] = nn.Parameter( + torch.empty((self._M, self._embedding_size)).uniform_(-1, 1) + ) + self._pool.requires_grad_(True) + + +class SPromptTransformer(RenateBenchmarkingModule): + """Implements Transformer Model for S-Prompts as described in Wang, Yabin, et.al ."S-prompts + learning with pre-trained transformers: An occam’s razor for domain incremental learning." + Advances in Neural Information Processing Systems 35 (2022): 5682-5695. + + Args: + pretrained_model_name_or_path: A string that denotes which pretrained model from the HF hub + to use. + image_size: Image size. Used if `pretrained_model_name_or_path` is not set . + patch_size: Patch size to be extracted. Used if `pretrained_model_name_or_path` is not set . + num_layers: Num of transformer layers. Used only if `pretrained_model_name_or_path` is not + set . + num_heads: Num heads in MHSA. Used only if `pretrained_model_name_or_path` is not set . + hidden_dim: Hidden dimension of transformers. Used only if `pretrained_model_name_or_path` + is not set . + mlp_dim: _description_. Used only if `pretrained_model_name_or_path` is not set . + dropout: _description_. Used only if `pretrained_model_name_or_path` is not set . + attention_dropout: _description_. Used only if `pretrained_model_name_or_path` is not set . + num_outputs: Number of output classes of the output. Defaults to 10. + prediction_strategy: Continual learning strategies may alter the prediction at train or test + time. Defaults to None. + add_icarl_class_means: If ``True``, additional parameters used only by the + ``ICaRLModelUpdater`` are added. Only required when using that updater. + prompt_size: Equivalent to number of input tokens used per update . Defaults to 10. + task_id: Internal variable used to increment update id. Shouldn't be set by user. + Defaults to 0. + clusters_per_task: Number clusters in k-means used for task identification. Defaults to 5. + per_task_classifier: Flag to share or use a common classifier head for all tasks. + Defaults to False. + """ + + def __init__( + self, + pretrained_model_name_or_path="google/vit-base-patch16-224", + image_size: int = 32, + patch_size: int = 4, + num_layers: int = 12, + num_heads: int = 12, + hidden_dim: int = 768, + mlp_dim: int = 3072, + dropout: float = 0.1, + attention_dropout: float = 0.1, + num_outputs: int = 10, + prediction_strategy: Optional[PredictionStrategy] = None, + add_icarl_class_means: bool = True, + prompt_size: int = 10, + task_id: int = 0, + clusters_per_task: int = 5, + per_task_classifier: bool = False, + ): + transformer = PromptedTransformer( + pretrained_model_name_or_path=pretrained_model_name_or_path, + image_size=image_size, + patch_size=patch_size, + num_layers=num_layers, + num_heads=num_heads, + hidden_dim=hidden_dim, + mlp_dim=mlp_dim, + dropout=dropout, + attention_dropout=attention_dropout, + num_outputs=num_outputs, + prediction_strategy=prediction_strategy, + add_icarl_class_means=add_icarl_class_means, + ) + super().__init__( + embedding_size=transformer.transformer._embedding_size, + num_outputs=num_outputs, + constructor_arguments=dict( + **transformer.transformer._constructor_arguments, + prompt_size=prompt_size, + task_id=task_id, + clusters_per_task=clusters_per_task, + per_task_classifier=per_task_classifier, + ), + prediction_strategy=prediction_strategy, + add_icarl_class_means=add_icarl_class_means, + ) + self._M = prompt_size + self._task_id = task_id + self._per_task_classifier = per_task_classifier + + prompt_pool = PromptPool( + prompt_size=self._M, + embedding_size=self._embedding_size, + current_update_id=self._task_id, + ) + + self._backbone = nn.ModuleDict({"transformer": transformer, "prompt_pool": prompt_pool}) + self._task_id_method = TaskPrototypes( + task_id=task_id, + clusters_per_task=clusters_per_task, + embedding_size=self._embedding_size, + ) + self._backbone["transformer"].requires_grad_(False) + self._backbone["prompt_pool"].requires_grad_(True) + + self._backbone["classifier"] = SharedMultipleLinear( + self._embedding_size, + self._num_outputs, + share_parameters=not self._per_task_classifier, + num_updates=self._task_id + 1, + ) + + self._tasks_params = nn.ModuleDict( + {k: nn.Identity() for k, _ in self._tasks_params.items()} + ) + + self._backbone.forward = self._forward_for_monkey_patching + self.task_ids = None + + def increment_task(self) -> None: + # This cannot be a part of add_task_params as the super.__init__ function calls + # add_task_params, and thus we would be trying parameters to the non-existent + # self.s_prompts + self._backbone["prompt_pool"].increment_task() + + def _forward_for_monkey_patching( + self, x: Union[torch.Tensor, Dict[str, Any]], task_id: str = None + ) -> torch.Tensor: + prompt = None + task_ids = None + if self.training: + prompt = self._backbone["prompt_pool"](self._task_id) + else: + task_ids = self._task_id_method.infer_task(self._backbone["transformer"](x)) + if task_ids is not None: + prompt = torch.stack([self._backbone["prompt_pool"](i) for i in task_ids]) + self.task_ids = task_ids.detach().cpu().numpy() + + features = self._backbone["transformer"](x, prompt) + + # additional logic for separate classifiers + # a. This forward returns logits directly, and the RenateBenchmarkingModule's _task_params + # now are identities. Thus, the overall operation is still the network forward pass. + # b. Additional handling of params is not needed as backbone's params will return all the + # necessary elements. + + if self.training: + logits = self._backbone["classifier"][f"{self._task_id}"](features) + elif task_ids is not None: + logits = torch.cat( + [ + self._backbone["classifier"][f"{t}"](feat.unsqueeze(0)) + for t, feat in zip(task_ids, features) + ] + ) + else: + logits = self._backbone["classifier"]["0"](features) + + return logits + + def update_task_identifier(self, features: torch.Tensor, labels: torch.Tensor) -> None: + self._task_id_method.update_task_prototypes(features, labels) + + def set_extra_state(self, state: Any, decode=True): + super().set_extra_state(state, decode) + # once this is set (after loading. increase that by one.) + self._constructor_arguments["task_id"] = self._task_id + 1 + + def features(self, x: torch.Tensor) -> torch.Tensor: + return self._backbone["transformer"](x) diff --git a/src/renate/cli/parsing_functions.py b/src/renate/cli/parsing_functions.py index 0d94173e..f14a5c10 100644 --- a/src/renate/cli/parsing_functions.py +++ b/src/renate/cli/parsing_functions.py @@ -28,6 +28,7 @@ ) from renate.updaters.experimental.offline_er import OfflineExperienceReplayModelUpdater from renate.updaters.experimental.repeated_distill import RepeatedDistillationModelUpdater +from renate.updaters.experimental.speft import SPeftModelUpdater from renate.updaters.model_updater import ModelUpdater REQUIRED_ARGS_GROUP = "Required Arguments" @@ -70,6 +71,8 @@ def get_updater_and_learner_kwargs( elif args.updater == "LearningToPromptReplay": learner_args = learner_args + ["prompt_sim_loss_weight", "memory_size", "memory_batch_size"] updater_class = LearningToPromptReplayModelUpdater + elif args.updater == "SPeft": + updater_class = SPeftModelUpdater elif args.updater == "DER": learner_args = base_er_args + ["alpha", "beta"] updater_class = DarkExperienceReplayModelUpdater @@ -488,6 +491,10 @@ def _add_l2preplay_arguments(arguments: Dict[str, Dict[str, Any]]) -> None: _add_offline_er_arguments(arguments) +def _add_speft_arguments(arguments: Dict[str, Dict[str, Any]]) -> None: + pass + + def _add_gdumb_arguments(arguments: Dict[str, Dict[str, Any]]) -> None: """A helper function that adds GDumb arguments.""" _add_replay_learner_arguments(arguments) @@ -973,6 +980,7 @@ def get_scheduler_kwargs( "ER": _add_experience_replay_arguments, "LearningToPrompt": _add_l2p_arguments, "LearningToPromptReplay": _add_l2preplay_arguments, + "SPeft": _add_speft_arguments, "DER": _add_dark_experience_replay_arguments, "POD-ER": _add_pod_experience_replay_arguments, "CLS-ER": _add_cls_experience_replay_arguments, diff --git a/src/renate/defaults.py b/src/renate/defaults.py index e0854022..a92f7b34 100644 --- a/src/renate/defaults.py +++ b/src/renate/defaults.py @@ -112,6 +112,9 @@ # L2p PROMPT_SIM_LOSS_WEIGHT = 0.5 +# S-prompt +CLUSTERS_PER_TASK = 5 + def scheduler(config_space: Dict[str, Any], mode: str, metric: str): return FIFOScheduler( diff --git a/src/renate/models/layers/shared_linear.py b/src/renate/models/layers/shared_linear.py new file mode 100644 index 00000000..bef9d8ed --- /dev/null +++ b/src/renate/models/layers/shared_linear.py @@ -0,0 +1,50 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +import torch.nn as nn + + +class SharedMultipleLinear(nn.ModuleDict): + """This implements a linear classification layer for multiple tasks (updates). + This linear layer can be shared across all tasks or can have a separate layer per task. + This follows the `_task_params` in the `RenateBenchmarkingModule` that is a `nn.ModuleDict` + that holds a classifier per task (as in TIL). + + Args: + in_features: size of each input sample + out_features: size of each output sample + bias: If set to ``False``, the layer will not learn an additive bias. + Default: ``True`` + share_parameters: Flag whether to share parameters or use individual linears per task. + The interface remains identical, and the underlying linear layer is shared (or not). + num_updates: Number of updates that have happened/is happening. + """ + + def __init__( + self, + in_features: int, + out_features: int, + bias: bool = True, + share_parameters: bool = True, + num_updates: int = 0, + ) -> None: + self._share_parameters = share_parameters + self.in_features = in_features + self.out_features = out_features + self.bias = bias + + super().__init__() + for _ in range(num_updates): + self.increment_task() + + def increment_task(self) -> None: + currlen = len(self) + if self._share_parameters: + self[f"{currlen}"] = ( + self[list(self.keys())[0]] + if currlen > 0 + else nn.Linear(in_features=self.in_features, out_features=self.out_features) + ) + else: + self[f"{currlen}"] = nn.Linear( + in_features=self.in_features, out_features=self.out_features + ) diff --git a/src/renate/models/task_identification_strategies.py b/src/renate/models/task_identification_strategies.py new file mode 100644 index 00000000..8cf0dbee --- /dev/null +++ b/src/renate/models/task_identification_strategies.py @@ -0,0 +1,97 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from abc import ABC, abstractmethod +from typing import Union + +import numpy as np +import numpy.typing as npt +import torch +import torch.nn as nn +from sklearn.cluster import KMeans + + +class TaskEstimator(nn.Module, ABC): + """An ABC that all task estimator methods inherit. + + They implement two methods `update_task_prototypes` and `infer_task`. + """ + + @abstractmethod + def update_task_prototypes(self): + return + + @abstractmethod + def infer_task(self): + return + + +class TaskPrototypes(TaskEstimator): + """Task identification method proposed in S-Prompts. + + Args: + task_id: The current update id of the method. Required to deserialize. + clusters_per_task: Number of clusters to use in K-means. + embedding_size: Embedding size of the transformer features. + """ + + def __init__(self, task_id, clusters_per_task, embedding_size) -> None: + super().__init__() + self.register_buffer( + "_training_feat_centroids", + torch.empty(task_id * clusters_per_task, embedding_size), + ) + self.register_buffer( + "_training_feat_task_ids", + torch.full( + (self._training_feat_centroids.size(0),), fill_value=task_id, dtype=torch.long + ), + ) + self._clusters_per_task = clusters_per_task + self._task_id = task_id + self._embedding_size = embedding_size + + @torch.no_grad() + def update_task_prototypes( + self, + features: Union[torch.Tensor, npt.ArrayLike], + labels: Union[torch.Tensor, npt.ArrayLike], + ) -> None: + # At training. + if isinstance(features, torch.Tensor): + features = features.cpu().numpy() + + # l2 normalize features: + features = features / np.power(np.einsum("ij, ij -> i", features, features), 0.5)[:, None] + + centroids = torch.from_numpy( + KMeans(n_clusters=self._clusters_per_task, random_state=0) + .fit(features) + .cluster_centers_ + ).to(self._training_feat_centroids.device) + + self._training_feat_centroids = torch.cat( + [ + self._training_feat_centroids, + centroids, + ] + ) + self._training_feat_task_ids = torch.cat( + [ + self._training_feat_task_ids, + torch.full( + (centroids.size(0),), + fill_value=self._task_id, + dtype=torch.int8, + device=self._training_feat_task_ids.device, + ), + ] + ) + + def infer_task(self, features: torch.Tensor) -> torch.Tensor: + # At inference. + if self._training_feat_centroids.numel() > 0: + features = torch.nn.functional.normalize(features) + nearest_p_inds = torch.cdist(features, self._training_feat_centroids, p=2).argmin(1) + return self._training_feat_task_ids[nearest_p_inds] + else: + return None diff --git a/src/renate/updaters/avalanche/model_updater.py b/src/renate/updaters/avalanche/model_updater.py index 10e1d2f4..d1692b7f 100644 --- a/src/renate/updaters/avalanche/model_updater.py +++ b/src/renate/updaters/avalanche/model_updater.py @@ -211,7 +211,7 @@ def _load_benchmark_if_exists( train_dataset_collate_fn: Optional[Callable] = None, val_dataset_collate_fn: Optional[Callable] = None, ) -> AvalancheBenchmarkWrapper: - train_dataset = to_avalanche_dataset(train_dataset, train_dataset_collate_fn) + avalanche_train_dataset = to_avalanche_dataset(train_dataset, train_dataset_collate_fn) avalanche_state = None if self._input_state_folder is not None: @@ -232,7 +232,7 @@ def _load_benchmark_if_exists( val_memory_dataset = to_avalanche_dataset(train_dataset, val_dataset_collate_fn) benchmark = AvalancheBenchmarkWrapper( - train_dataset=train_dataset, + train_dataset=avalanche_train_dataset, val_dataset=val_memory_dataset, train_transform=self._train_transform, train_target_transform=self._train_target_transform, diff --git a/src/renate/updaters/experimental/speft.py b/src/renate/updaters/experimental/speft.py new file mode 100644 index 00000000..e6c0a095 --- /dev/null +++ b/src/renate/updaters/experimental/speft.py @@ -0,0 +1,198 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Callable, Dict, List, Optional + +import torch +import torch.nn as nn +import torchmetrics +from pytorch_lightning.loggers.logger import Logger +from torch.nn import Parameter +from torch.optim import Optimizer +from torch.optim.lr_scheduler import _LRScheduler +from torch.utils.data import Dataset + +from renate import defaults +from renate.benchmark.models.spromptmodel import SPromptTransformer +from renate.models import RenateModule +from renate.updaters.learner import Learner +from renate.updaters.model_updater import SingleTrainingLoopUpdater + + +class SPeftLearner(Learner): + """Learner to implement S-Prompts from + ```Wang, Yabin, et.al . + "S-prompts learning with pre-trained transformers: An occam’s razor for domain incremental learning." # noqa: E501 + Advances in Neural Information Processing Systems 35 (2022): 5682-5695.``` + + + Args: + model: The SPromptTransformer model to be trained. + loss_fn: Loss function to be trained with. + optimizer: Partial optimizer used to create an optimizer by passing the model parameters. + learning_rate_scheduler: Partial object of learning rate scheduler that will be created by + passing the optimizer. + learning_rate_scheduler_interval: When to update the learning rate scheduler. + Options: `epoch` and `step`. + batch_size: Training batch size. + train_transform: The transformation applied during training. + train_target_transform: The target transformation applied during testing. + test_transform: The transformation at test time. + test_target_transform: The target transformation at test time. + logged_metrics: Metrics logged additional to the default ones. + seed: See :func:`renate.models.utils.get_generator`. + mask_unused_classes: Masking logits corresponding to unused classes. Useful only for class + incremental problems. Defaults to defaults.MASK_UNUSED_CLASSES. + """ + + def __init__( + self, + model: RenateModule, + loss_fn: torch.nn.Module, + optimizer: Callable[[List[Parameter]], Optimizer], + learning_rate_scheduler: Optional[Optional[Callable[[Optimizer], _LRScheduler]]] = None, + learning_rate_scheduler_interval: defaults.SUPPORTED_LR_SCHEDULER_INTERVAL_TYPE = defaults.LR_SCHEDULER_INTERVAL, # noqa: E501 + batch_size: int = defaults.BATCH_SIZE, + train_transform: Optional[Callable] = None, + train_target_transform: Optional[Callable] = None, + test_transform: Optional[Callable] = None, + test_target_transform: Optional[Callable] = None, + logged_metrics: Optional[Dict[str, torchmetrics.Metric]] = None, + seed: int = defaults.SEED, + mask_unused_classes: bool = defaults.MASK_UNUSED_CLASSES, + ) -> None: + if not isinstance(model, SPromptTransformer): + raise ValueError( + "SPrompt Learner can only be used with a SPromptTransformer model." + f"But got {type(model)}" + ) + super().__init__( + model, + loss_fn, + optimizer, + learning_rate_scheduler, + learning_rate_scheduler_interval, + batch_size, + train_transform, + train_target_transform, + test_transform, + test_target_transform, + logged_metrics, + seed, + mask_unused_classes, + ) + + def on_model_update_start( + self, + train_dataset: Dataset, + val_dataset: Dataset, + train_dataset_collate_fn: Optional[Callable] = None, + val_dataset_collate_fn: Optional[Callable] = None, + task_id: Optional[str] = None, + ) -> None: + """A custom on_model_update_start hook for S-Peft methods. + + Here, we iterate oer the train data set and extract features. These features used to compute + the task prototypes by the `update_task_identifier` call. Having this function in the model + update start instead of end results in val metrics being reflective of test accuracy. + """ + super().on_model_update_start( + train_dataset, val_dataset, train_dataset_collate_fn, val_dataset_collate_fn, task_id + ) + ## k-means + device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + self._model.to(device) + features, labels = [], [] + with torch.inference_mode(): + for x, y in self.train_dataloader(): + features.append(self._model.features(x.to(device)).cpu()) + labels.append(y) + features = torch.cat(features) + labels = torch.cat(labels) + self._model.update_task_identifier(features=features, labels=labels) + + def setup(self, stage: str) -> None: + # We dont support distributed + assert ( + self.trainer.world_size == 1 + ), "SPrompt learner does not support Multi-GPU training yet." + if stage == "fit": + # This needs to run before configure optimizers is called. The only hook is setup("fit") + self._model.increment_task() + + def optimizer_zero_grad( + self, epoch: int, batch_idx: int, optimizer: Optimizer, optimizer_idx: int + ) -> None: + """Explicitly setting grads to None instead of zero.""" + optimizer.zero_grad(set_to_none=True) + + +class SPeftModelUpdater(SingleTrainingLoopUpdater): + def __init__( + self, + model: RenateModule, + loss_fn: torch.nn.Module, + optimizer: Callable[[List[nn.Parameter]], Optimizer], + batch_size: int = defaults.BATCH_SIZE, + seed: int = defaults.SEED, + learner_kwargs: Optional[Dict[str, Any]] = None, + input_state_folder: Optional[str] = None, + output_state_folder: Optional[str] = None, + max_epochs: int = defaults.MAX_EPOCHS, + learning_rate_scheduler: Optional[Optional[Callable[[Optimizer], _LRScheduler]]] = None, + learning_rate_scheduler_interval: defaults.SUPPORTED_LR_SCHEDULER_INTERVAL_TYPE = defaults.LR_SCHEDULER_INTERVAL, # noqa: E501 + train_transform: Optional[Callable] = None, + train_target_transform: Optional[Callable] = None, + test_transform: Optional[Callable] = None, + test_target_transform: Optional[Callable] = None, + buffer_transform: Optional[Callable] = None, + buffer_target_transform: Optional[Callable] = None, + metric: Optional[str] = None, + mode: defaults.SUPPORTED_TUNING_MODE_TYPE = "min", + logged_metrics: Optional[Dict[str, torchmetrics.Metric]] = None, + early_stopping_enabled: bool = False, + logger: Logger = defaults.LOGGER(**defaults.LOGGER_KWARGS), + accelerator: defaults.SUPPORTED_ACCELERATORS_TYPE = defaults.ACCELERATOR, + devices: Optional[int] = None, + strategy: Optional[str] = defaults.DISTRIBUTED_STRATEGY, + precision: str = defaults.PRECISION, + deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, + mask_unused_classes: bool = defaults.MASK_UNUSED_CLASSES, + ): + learner_kwargs = { + "batch_size": batch_size, + "seed": seed, + "loss_fn": loss_fn, + } + super().__init__( + model=model, + loss_fn=loss_fn, + optimizer=optimizer, + learner_class=SPeftLearner, + learner_kwargs=learner_kwargs, + input_state_folder=input_state_folder, + output_state_folder=output_state_folder, + max_epochs=max_epochs, + learning_rate_scheduler=learning_rate_scheduler, + learning_rate_scheduler_interval=learning_rate_scheduler_interval, + train_transform=train_transform, + train_target_transform=train_target_transform, + test_transform=test_transform, + test_target_transform=test_target_transform, + buffer_transform=buffer_transform, + buffer_target_transform=buffer_target_transform, + metric=metric, + mode=mode, + logged_metrics=logged_metrics, + early_stopping_enabled=early_stopping_enabled, + logger=logger, + accelerator=accelerator, + devices=devices, + strategy=strategy, + precision=precision, + deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, + mask_unused_classes=mask_unused_classes, + ) diff --git a/src/renate/updaters/model_updater.py b/src/renate/updaters/model_updater.py index 066f9aef..734e6229 100644 --- a/src/renate/updaters/model_updater.py +++ b/src/renate/updaters/model_updater.py @@ -22,10 +22,11 @@ from renate.utils.deepspeed import convert_zero_checkpoint_to_fp32_state_dict from renate.utils.distributed_strategies import create_strategy from renate.utils.file import unlink_file_or_folder -from renate.utils.misc import int_or_str +from renate.utils.misc import AdditionalTrainingMetrics, int_or_str from .learner import Learner, ReplayLearner from ..models import RenateModule + logging_logger = logging.getLogger(__name__) @@ -40,28 +41,36 @@ def __init__(self, val_enabled: bool): super().__init__() self._report = Reporter() self._val_enabled = val_enabled + self._additional_metrics = AdditionalTrainingMetrics() @rank_zero_only - def _log(self, trainer: Trainer, training: bool) -> None: + def _log(self, trainer: Trainer, pl_module: LightningModule) -> None: """Report the current epoch's results to Syne Tune. If validation was run `_val_enabled` is True, the results are reported at the end of the validation epoch. Otherwise, they are reported at the end of the training epoch. """ + training = pl_module.training if trainer.sanity_checking or (training and self._val_enabled): return + to_report = {k: v.item() for k, v in trainer.logged_metrics.items()} + to_report.update(self._additional_metrics(pl_module)) self._report( - **{k: v.item() for k, v in trainer.logged_metrics.items()}, + **to_report, step=trainer.current_epoch, epoch=trainer.current_epoch + 1, ) def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None: - self._log(trainer=trainer, training=pl_module.training) + self._additional_metrics.on_train_epoch_end() + self._log(trainer=trainer, pl_module=pl_module) def on_validation_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None: - self._log(trainer=trainer, training=pl_module.training) + self._log(trainer=trainer, pl_module=pl_module) + + def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: + self._additional_metrics.on_train_start() class RenateModelCheckpoint(ModelCheckpoint): diff --git a/src/renate/utils/avalanche.py b/src/renate/utils/avalanche.py index 42fb5984..857aad4b 100644 --- a/src/renate/utils/avalanche.py +++ b/src/renate/utils/avalanche.py @@ -11,16 +11,16 @@ from renate.data.datasets import _TransformedDataset from renate.memory import DataBuffer -from renate.types import NestedTensors -class AvalancheDataset(Dataset): - """A Dataset consumable by Avalanche updaters.""" +class BaseAvalancheDataset(Dataset): + """Base class for all datasets consumable by Avalanche updaters.""" def __init__( - self, inputs: NestedTensors, targets: List[int], collate_fn: Optional[Callable] = None + self, + targets: List[int], + collate_fn: Optional[Callable] = None, ): - self._inputs = inputs self._targets = targets self.targets = torch.tensor(targets, dtype=torch.long) if collate_fn is not None: @@ -29,25 +29,55 @@ def __init__( def __len__(self) -> int: return len(self._targets) - def __getitem__(self, idx) -> Tuple[Tensor, Tensor]: - return self._inputs[idx], self._targets[idx] + +class AvalancheDataset(BaseAvalancheDataset): + """A wrapper around a Dataset consumable by Avalanche updaters.""" + + def __init__( + self, + dataset: Union[Dataset, DataBuffer], + targets: List[int], + collate_fn: Optional[Callable] = None, + ): + super().__init__(targets, collate_fn) + self._dataset = dataset + + def __getitem__(self, idx) -> Tuple[Tensor, int]: + return self._dataset[idx][0], self._targets[idx] + + +class AvalancheDatasetForBuffer(BaseAvalancheDataset): + """A wrapper around a DataBuffer consumable by Avalanche updaters.""" + + def __init__( + self, buffer: DataBuffer, targets: List[int], collate_fn: Optional[Callable] = None + ): + super().__init__(targets, collate_fn) + self._indices = buffer._indices + self._datasets = buffer._datasets + + def __getitem__(self, idx) -> Tuple[Tensor, int]: + i, j = self._indices[idx] + return self._datasets[i][j][0], self._targets[idx] def to_avalanche_dataset( dataset: Union[Dataset, DataBuffer], collate_fn: Optional[Callable] = None -) -> AvalancheDataset: +) -> BaseAvalancheDataset: """Converts a DataBuffer or Dataset into an Avalanche-compatible Dataset.""" - x_data, y_data = [], [] + y_data = [] + is_buffer = isinstance(dataset, DataBuffer) for i in range(len(dataset)): - if isinstance(dataset, DataBuffer): - (x, y), _ = dataset[i] + if is_buffer: + (_, y), _ = dataset[i] else: - x, y = dataset[i] - x_data.append(x) + _, y = dataset[i] if not isinstance(y, int): y = y.item() y_data.append(y) - return AvalancheDataset(x_data, y_data, collate_fn) + if is_buffer: + return AvalancheDatasetForBuffer(dataset, y_data, collate_fn) + return AvalancheDataset(dataset, y_data, collate_fn) class AvalancheBenchmarkWrapper: @@ -64,6 +94,7 @@ def __init__( self._classes_order = None self._n_classes = 0 self._train_dataset = train_dataset + self._test_transform = test_transform self._train_target_transform = train_target_transform self._benchmark = dataset_benchmark( [train_dataset], @@ -78,7 +109,9 @@ def __init__( def update_benchmark_properties(self): dataset = _TransformedDataset( - dataset=self._train_dataset, target_transform=self._train_target_transform + dataset=self._train_dataset, + transform=self._test_transform, + target_transform=self._train_target_transform, ) dataloader = DataLoader(dataset) unique_classes = set() diff --git a/src/renate/utils/file.py b/src/renate/utils/file.py index f7db7a68..98e595df 100644 --- a/src/renate/utils/file.py +++ b/src/renate/utils/file.py @@ -4,6 +4,7 @@ import os import shutil from pathlib import Path +from tarfile import TarFile from typing import List, Optional, Tuple, Union from urllib.parse import urlparse from zipfile import ZipFile @@ -287,9 +288,15 @@ def delete_file_from_s3(bucket: str, object_name: str) -> None: s3_client.delete_object(Bucket=bucket, Key=str(object_name)) -def unzip_file(dataset_name: str, data_path: Union[str, Path], file_name: str) -> None: - """Extract .zip files into folder named with dataset name.""" - with ZipFile(os.path.join(data_path, dataset_name, file_name)) as f: +def extract_file(dataset_name: str, data_path: Union[str, Path], file_name: str) -> None: + """Extract .zip or .tar depending on the flag files into folder named with dataset name.""" + if file_name.endswith(".zip"): + Extractor = ZipFile + elif file_name.endswith(".tar"): + Extractor = TarFile + else: + raise ValueError("Unknown compressed format extension. Only Zip/Tar supported.") + with Extractor(os.path.join(data_path, dataset_name, file_name)) as f: f.extractall(os.path.join(data_path, dataset_name)) @@ -328,7 +335,7 @@ def download_and_unzip_file( ) -> None: """A helper function to download data .zips and uncompress them.""" download_file(dataset_name, data_path, src_bucket, src_object_name, url, file_name) - unzip_file(dataset_name, data_path, file_name) + extract_file(dataset_name, data_path, file_name) def save_pandas_df_to_csv(df: pd.DataFrame, file_path: Union[str, Path]) -> pd.DataFrame: diff --git a/src/renate/utils/misc.py b/src/renate/utils/misc.py index e2554453..4576cef9 100644 --- a/src/renate/utils/misc.py +++ b/src/renate/utils/misc.py @@ -1,6 +1,8 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -from typing import Optional, Set, Union +import time +from typing import Dict, Optional, Set, Tuple, Union +from pytorch_lightning import Callback import torch @@ -37,3 +39,46 @@ def maybe_populate_mask_and_ignore_logits( logits.index_fill_(1, class_mask.to(logits.device), -float("inf")) return logits, class_mask + + +class AdditionalTrainingMetrics(Callback): + def __init__(self) -> None: + self._training_start_time = None + self._curr_epoch_end_time = None + + def on_train_start(self) -> None: + self._training_start_time = time.time() + + def on_train_epoch_end(self) -> None: + self._curr_epoch_end_time = time.time() + + def __call__(self, model: torch.nn.Module) -> Dict[str, Union[float, int]]: + if all([self._training_start_time, self._curr_epoch_end_time]): + total_training_time = self._curr_epoch_end_time - self._training_start_time + else: + total_training_time = 0.0 + # maximum amount of memory used in training. This might + # not be the best choice, but the most convenient. + peak_memory_usage = ( + torch.cuda.memory_stats()["allocated_bytes.all.peak"] + if torch.cuda.is_available() + else 0 + ) + trainable_params, total_params = self.parameters_count(model) + + return dict( + total_training_time=total_training_time, + peak_memory_usage=peak_memory_usage, + trainable_params=trainable_params, + total_params=total_params, + ) + + def parameters_count(self, model: torch.nn.Module) -> Tuple[int, int]: + trainable_params, total_params = 0, 0 + for param in model.parameters(): + num_params = param.numel() + total_params += num_params + if param.requires_grad: + trainable_params += num_params + + return trainable_params, total_params diff --git a/test/integration_tests/configs/suites/quick/avalanche-er.json b/test/integration_tests/configs/suites/quick/avalanche-er.json index 5badc4df..730aa024 100644 --- a/test/integration_tests/configs/suites/quick/avalanche-er.json +++ b/test/integration_tests/configs/suites/quick/avalanche-er.json @@ -5,6 +5,6 @@ "dataset": "cifar10.json", "backend": "local", "job_name": "class-incremental-mlp-avalanche-er", - "expected_accuracy_linux": [[0.14100000262260437, 0.6794999837875366], [0.41449999809265137, 0.5740000009536743]], + "expected_accuracy_linux": [[0.24050000309944153, 0.6399999856948853]], "expected_accuracy_darwin": [[0.07649999856948853, 0.7114999890327454]] } diff --git a/test/integration_tests/configs/suites/quick/avalanche-ewc.json b/test/integration_tests/configs/suites/quick/avalanche-ewc.json index 63f762d8..6a95aedc 100644 --- a/test/integration_tests/configs/suites/quick/avalanche-ewc.json +++ b/test/integration_tests/configs/suites/quick/avalanche-ewc.json @@ -5,6 +5,6 @@ "dataset": "mnist.json", "backend": "local", "job_name": "rotation-mlp-avalanche-ewc", - "expected_accuracy_linux": [[0.7580999732017517, 0.9627000093460083], [0.7551000118255615, 0.9664999842643738]], - "expected_accuracy_darwin": [[0.7497000098228455, 0.9664999842643738]] + "expected_accuracy_linux": [[0.7360000014305115, 0.9606999754905701]], + "expected_accuracy_darwin": [[0.7275999784469604, 0.9585000276565552]] } diff --git a/test/integration_tests/configs/suites/quick/avalanche-icarl.json b/test/integration_tests/configs/suites/quick/avalanche-icarl.json index a0991a93..28926d10 100644 --- a/test/integration_tests/configs/suites/quick/avalanche-icarl.json +++ b/test/integration_tests/configs/suites/quick/avalanche-icarl.json @@ -5,6 +5,6 @@ "dataset": "mnist.json", "backend": "local", "job_name": "class-incremental-mlp-avalanche-icarl", - "expected_accuracy_linux": [[0.993380606174469, 0.8330068588256836], [0.9947990775108337, 0.8222330808639526]], + "expected_accuracy_linux": [[0.9938534498214722, 0.7737511992454529]], "expected_accuracy_darwin": [[0.9947990775108337, 0.7845249772071838]] } diff --git a/test/integration_tests/configs/suites/quick/avalanche-lwf.json b/test/integration_tests/configs/suites/quick/avalanche-lwf.json index cab2ab47..980ce4dd 100644 --- a/test/integration_tests/configs/suites/quick/avalanche-lwf.json +++ b/test/integration_tests/configs/suites/quick/avalanche-lwf.json @@ -5,6 +5,6 @@ "dataset": "mnist.json", "backend": "local", "job_name": "permutation-mlp-avalanche-lwf", - "expected_accuracy_linux": [[0.7526999711990356, 0.9607999920845032], [0.6541000008583069, 0.9617999792098999]], + "expected_accuracy_linux": [[0.7662000060081482, 0.9642999768257141]], "expected_accuracy_darwin": [[0.7202000021934509, 0.9646999835968018]] } diff --git a/test/integration_tests/configs/suites/quick/der.json b/test/integration_tests/configs/suites/quick/der.json index 942c1743..43ae07b9 100644 --- a/test/integration_tests/configs/suites/quick/der.json +++ b/test/integration_tests/configs/suites/quick/der.json @@ -5,6 +5,6 @@ "dataset": "cifar10.json", "backend": "local", "job_name": "feature-sorting-mlp-der", - "expected_accuracy_linux": [[0.35339999198913574, 0.3086000084877014], [0.35179999470710754, 0.3208000063896179]], + "expected_accuracy_linux": [[0.3497999906539917, 0.28839999437332153]], "expected_accuracy_darwin": [[0.3400000035762787, 0.3253999948501587]] } diff --git a/test/integration_tests/configs/suites/quick/er.json b/test/integration_tests/configs/suites/quick/er.json index 4abe300f..e04f7f3d 100644 --- a/test/integration_tests/configs/suites/quick/er.json +++ b/test/integration_tests/configs/suites/quick/er.json @@ -5,6 +5,6 @@ "dataset": "cifar10.json", "backend": "local", "job_name": "class-incremental-mlp-er", - "expected_accuracy_linux": [[0.5799999833106995, 0.367000013589859], [0.6100000143051147, 0.5975000262260437]], + "expected_accuracy_linux": [[0.44699999690055847, 0.6830000281333923]], "expected_accuracy_darwin": [[0.49050000309944153, 0.671500027179718]] } diff --git a/test/integration_tests/configs/suites/quick/fine-tuning.json b/test/integration_tests/configs/suites/quick/fine-tuning.json index 003d81a7..aa5472ed 100644 --- a/test/integration_tests/configs/suites/quick/fine-tuning.json +++ b/test/integration_tests/configs/suites/quick/fine-tuning.json @@ -5,7 +5,7 @@ "dataset": "fashionmnist.json", "backend": "local", "job_name": "iid-mlp-fine-tuning", - "expected_accuracy_linux": [[0.8458999991416931, 0.8458999991416931], [0.8574000000953674, 0.8574000000953674]], + "expected_accuracy_linux": [[0.854200005531311, 0.854200005531311]], "expected_accuracy_darwin": [[0.8521000146865845, 0.8521000146865845]] } diff --git a/test/integration_tests/configs/suites/quick/joint.json b/test/integration_tests/configs/suites/quick/joint.json index b9cea200..d3cf1e5a 100644 --- a/test/integration_tests/configs/suites/quick/joint.json +++ b/test/integration_tests/configs/suites/quick/joint.json @@ -5,6 +5,6 @@ "dataset": "fashionmnist.json", "backend": "local", "job_name": "iid-mlp-joint", - "expected_accuracy_linux": [[0.8425999879837036, 0.8425999879837036], [0.8446999788284302, 0.8446999788284302]], + "expected_accuracy_linux": [[0.8514999747276306, 0.8514999747276306]], "expected_accuracy_darwin": [[0.8432000279426575, 0.8432000279426575]] } diff --git a/test/integration_tests/configs/suites/quick/offline-er.json b/test/integration_tests/configs/suites/quick/offline-er.json index 3ab5cfc4..9076ea47 100644 --- a/test/integration_tests/configs/suites/quick/offline-er.json +++ b/test/integration_tests/configs/suites/quick/offline-er.json @@ -5,6 +5,6 @@ "dataset": "cifar10.json", "backend": "local", "job_name": "class-incremental-mlp-offline-er", - "expected_accuracy_linux": [[0.7634999752044678, 0.40299999713897705], [0.6234999895095825, 0.3779999911785126]], + "expected_accuracy_linux": [[0.7415000200271606, 0.48100000619888306]], "expected_accuracy_darwin": [[0.7279999852180481, 0.4650000035762787]] } diff --git a/test/integration_tests/configs/suites/quick/pod-er.json b/test/integration_tests/configs/suites/quick/pod-er.json index 915acb6c..03836373 100644 --- a/test/integration_tests/configs/suites/quick/pod-er.json +++ b/test/integration_tests/configs/suites/quick/pod-er.json @@ -5,6 +5,6 @@ "dataset": "cifar10.json", "backend": "local", "job_name": "hue-shift-mlp-pod-er", - "expected_accuracy_linux": [[0.17059999704360962, 0.31299999356269836], [0.19140000641345978, 0.3246000111103058]], + "expected_accuracy_linux": [[0.2134000062942505, 0.3061999976634979]], "expected_accuracy_darwin": [[0.20000000298023224, 0.2637999951839447]] } diff --git a/test/integration_tests/configs/suites/quick/super-er.json b/test/integration_tests/configs/suites/quick/super-er.json index 9148ce49..0a9ae527 100644 --- a/test/integration_tests/configs/suites/quick/super-er.json +++ b/test/integration_tests/configs/suites/quick/super-er.json @@ -5,6 +5,6 @@ "dataset": "fashionmnist.json", "backend": "local", "job_name": "class-incremental-mlp-super-er", - "expected_accuracy_linux": [[0.9394999742507935, 0.9125000238418579], [0.9384999871253967, 0.9135000109672546]], + "expected_accuracy_linux": [[0.9415000081062317, 0.9104999899864197]], "expected_accuracy_darwin": [[0.9424999952316284, 0.9100000262260437]] } diff --git a/test/renate/benchmark/models/test_l2p.py b/test/renate/benchmark/models/test_l2p.py index 44e0be0b..0703f2f8 100644 --- a/test/renate/benchmark/models/test_l2p.py +++ b/test/renate/benchmark/models/test_l2p.py @@ -22,18 +22,21 @@ def test_prompt_pool(): assert out.shape == (B, Lp * N, feat_dim) +@pytest.mark.slow def test_prompted_vision_transformer(): combined = LearningToPromptTransformer() inp = torch.rand(1, 3, 224, 224) assert combined(inp).shape == torch.Size((1, 10)) +@pytest.mark.slow def test_prompted_text_transformer(): model = LearningToPromptTransformer(pretrained_model_name_or_path="bert-base-uncased") inp = {"input_ids": torch.randint(0, 3000, (10, 128))} assert model(inp).shape == torch.Size((10, 10)) +@pytest.mark.slow @pytest.mark.parametrize( "cls,arg,argval,error", [ @@ -47,6 +50,7 @@ def test_pool_vision_transformer_raises_errors(cls, arg, argval, error): cls(**{arg: argval}) +@pytest.mark.slow @pytest.mark.parametrize( "backbone,num_trainable_params", [ @@ -64,6 +68,7 @@ def test_prompt_vision_transformer_trainable_parameters(backbone, num_trainable_ assert n == num_trainable_params +@pytest.mark.slow @pytest.mark.parametrize("backbone", ["google/vit-base-patch16-224", "bert-base-uncased"]) @pytest.mark.parametrize("prompt", [None, torch.rand(3, 10, 768)]) @pytest.mark.parametrize("cls_feat", [True, False]) diff --git a/test/renate/benchmark/models/test_sprompt.py b/test/renate/benchmark/models/test_sprompt.py new file mode 100644 index 00000000..03eefbf2 --- /dev/null +++ b/test/renate/benchmark/models/test_sprompt.py @@ -0,0 +1,20 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from renate.benchmark.models.spromptmodel import PromptPool + + +def test_prompt_pool(): + prompt_size = 7 + embedding_size = 12 + curr_update_id = 3 + pool = PromptPool( + prompt_size=prompt_size, embedding_size=embedding_size, current_update_id=curr_update_id + ) + + for i in range(curr_update_id): + assert pool(i).shape == (prompt_size, embedding_size) + assert pool.get_params(i)[0].shape == (prompt_size, embedding_size) + + pool.increment_task() + assert len(pool._pool) == curr_update_id + 1 diff --git a/test/renate/benchmark/models/test_text_transformer.py b/test/renate/benchmark/models/test_text_transformer.py index 56c21699..64868223 100644 --- a/test/renate/benchmark/models/test_text_transformer.py +++ b/test/renate/benchmark/models/test_text_transformer.py @@ -6,6 +6,7 @@ from renate.benchmark.models.transformer import HuggingFaceSequenceClassificationTransformer +@pytest.mark.slow @pytest.mark.parametrize("model_name", ["distilbert-base-uncased", "bert-base-uncased"]) def test_init(model_name): HuggingFaceSequenceClassificationTransformer( @@ -13,6 +14,7 @@ def test_init(model_name): ) +@pytest.mark.slow @pytest.mark.parametrize( "model_name,input_dim", [ diff --git a/test/renate/benchmark/models/test_vision_transformer.py b/test/renate/benchmark/models/test_vision_transformer.py index 5efba785..ba078a1a 100644 --- a/test/renate/benchmark/models/test_vision_transformer.py +++ b/test/renate/benchmark/models/test_vision_transformer.py @@ -6,6 +6,7 @@ from renate.defaults import TASK_ID +@pytest.mark.slow def test_renate_vision_transformer_init(): pytest.helpers.get_renate_module_vision_transformer(sub_class="visiontransformercifar") pytest.helpers.get_renate_module_vision_transformer(sub_class="visiontransformerb16") @@ -41,6 +42,7 @@ def test_renate_vision_transformer_fwd(sub_class, input_dim): # for m in [VisionTransformerB16, VisionTransformerB32, VisionTransformerCIFAR, # VisionTransformerH14, VisionTransformerL16, VisionTransformerL32]: # print(len(list(m()._backbone.parameters()))) +@pytest.mark.slow @pytest.mark.parametrize( "sub_class, expected_num_params", [ diff --git a/test/renate/benchmark/test_experimentation_config.py b/test/renate/benchmark/test_experimentation_config.py index 1da7b27c..3cd076d1 100644 --- a/test/renate/benchmark/test_experimentation_config.py +++ b/test/renate/benchmark/test_experimentation_config.py @@ -9,7 +9,11 @@ from renate.benchmark import experiment_config from renate.benchmark.datasets.nlp_datasets import HuggingFaceTextDataModule, MultiTextDataModule -from renate.benchmark.datasets.vision_datasets import CLEARDataModule, TorchVisionDataModule +from renate.benchmark.datasets.vision_datasets import ( + CDDBDataModule, + CLEARDataModule, + TorchVisionDataModule, +) from renate.benchmark.experiment_config import ( data_module_fn, get_data_module, @@ -34,6 +38,7 @@ from renate.models.prediction_strategies import ICaRLClassificationStrategy +@pytest.mark.slow @pytest.mark.parametrize( "model_name,expected_model_class", [(model_name, model_class) for model_name, model_class in models.items()], @@ -94,6 +99,7 @@ def test_model_fn_fails_for_unknown_model(): "label", ), ("MultiText", MultiTextDataModule, "distilbert-base-uncased", None, None), + ("CDDB", CDDBDataModule, None, None, None), ), ) def test_get_data_module( @@ -236,6 +242,20 @@ def test_get_scenario_fails_for_unknown_scenario(tmpdir): DataIncrementalScenario, 2, ), + ( + "DataIncrementalScenario", + "CDDB", + {"data_ids": ("biggan", "wild")}, + DataIncrementalScenario, + 2, + ), + ( + "DataIncrementalScenario", + "Core50", + {"data_ids": (2, 3)}, + DataIncrementalScenario, + 2, + ), ), ids=[ "class_incremental", @@ -249,6 +269,8 @@ def test_get_scenario_fails_for_unknown_scenario(tmpdir): "wild_time_image_all_tasks", "domainnet_by data_id", "domainnet by groupings", + "cddb by dataid", + "core50 by dataid", ], ) @pytest.mark.parametrize("val_size", (0, 0.5), ids=["no_val", "val"]) @@ -281,7 +303,7 @@ def test_data_module_fn( elif expected_scenario_class == DataIncrementalScenario: if "pretrained_model_name_or_path" in scenario_kwargs: assert scenario._data_module._tokenizer is not None - elif dataset_name not in ["CLEAR10", "CLEAR100", "DomainNet"]: + elif dataset_name not in ["CLEAR10", "CLEAR100", "DomainNet", "CDDB", "Core50"]: assert scenario._data_module._tokenizer is None assert scenario._num_tasks == expected_num_tasks @@ -294,6 +316,8 @@ def test_data_module_fn( ("CIFAR10", Compose, Normalize, "ResNet18CIFAR"), ("CIFAR100", Compose, Normalize, "ResNet18CIFAR"), ("CLEAR10", Compose, Compose, "ResNet18"), + ("CDDB", Compose, Compose, None), + ("Core50", Compose, Compose, None), ("DomainNet", Compose, Compose, "VisionTransformerB16"), ("hfd-rotten_tomatoes", type(None), type(None), "HuggingFaceTransformer"), ("fmow", Compose, Compose, "ResNet18"), @@ -344,6 +368,7 @@ def test_lr_scheduler_fn_fails_for_unknown_scheduler(): lr_scheduler_fn(unknown_lr_scheduler) +@pytest.mark.slow @pytest.mark.parametrize("model_name", [model_name for model_name in models]) @pytest.mark.parametrize("updater", ("ER", "Avalanche-iCaRL")) def test_prediction_strategy_is_correctly_set(model_name, updater): diff --git a/test/renate/data/test_data_module.py b/test/renate/data/test_data_module.py index 60b8eaa0..0b76bc1e 100644 --- a/test/renate/data/test_data_module.py +++ b/test/renate/data/test_data_module.py @@ -91,15 +91,15 @@ def test_torchvision_data_module(tmpdir, dataset_name, num_tr, num_te, x_shape): @pytest.mark.parametrize( "dataset_name,chunk_id,num_tr,num_te", [ - ("CLEAR10", 0, 2986, 500), - ("CLEAR100", 0, 9945, 4984), + ("CLEAR10", 0, 3300, 550), + ("CLEAR100", 0, 9964, 5000), ], ) def test_clear_data_module(tmpdir, dataset_name, chunk_id, num_tr, num_te): """Test loading of CLEAR data.""" val_size = 0.2 data_module = CLEARDataModule( - tmpdir, dataset_name=dataset_name, chunk_id=chunk_id, val_size=val_size + tmpdir, dataset_name=dataset_name, time_step=chunk_id, val_size=val_size ) data_module.prepare_data() data_module.setup() @@ -131,9 +131,10 @@ def test_tiny_imagenet_data_module(tmpdir): assert isinstance(val_data, Dataset) assert len(test_data) == num_te assert isinstance(test_data, Dataset) - assert train_data[0][0].size() == test_data[0][0].size() == (3, 64, 64) + assert train_data[0][0].size == test_data[0][0].size == (64, 64) +@pytest.mark.slow @pytest.mark.parametrize( "dataset_name,input_column,target_column", [ @@ -179,6 +180,7 @@ def test_hugging_face_data_module( assert isinstance(inputs["attention_mask"], torch.Tensor) +@pytest.mark.slow @pytest.mark.parametrize("column", ("input", "target"), ids=("input", "target")) def test_hugging_face_exception_raised_with_wrong_column(tmpdir, column): input_column = "text" diff --git a/test/renate/models/test_shared_linear.py b/test/renate/models/test_shared_linear.py new file mode 100644 index 00000000..4960d2d6 --- /dev/null +++ b/test/renate/models/test_shared_linear.py @@ -0,0 +1,17 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from renate.models.layers.shared_linear import SharedMultipleLinear + + +@pytest.mark.parametrize("share_parameters", [True, False]) +def test_shared_multiple_classifier(share_parameters): + model = SharedMultipleLinear(5, 3, share_parameters=share_parameters, num_updates=10) + num_elems = sum(x.numel() for x in model.parameters()) + assert num_elems == [10 * 5 * 3 + 10 * 3, 5 * 3 + 3][share_parameters] + + model.increment_task() + num_elems = sum(x.numel() for x in model.parameters()) + assert num_elems == [(10 + 1) * 5 * 3 + (10 + 1) * 3, 5 * 3 + 3][share_parameters] diff --git a/test/renate/models/test_task_identification_strat.py b/test/renate/models/test_task_identification_strat.py new file mode 100644 index 00000000..a575d0c0 --- /dev/null +++ b/test/renate/models/test_task_identification_strat.py @@ -0,0 +1,27 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +import torch +from sklearn.cluster import KMeans + +from renate.models.task_identification_strategies import TaskPrototypes + + +def test_task_prototypes(): + data = torch.nn.functional.normalize(torch.rand(10, 3)) + labels = torch.arange(start=0, end=data.size(0)) + task_proto = TaskPrototypes(0, 0, data.size(1)) + # lets attach + task_proto._training_feat_centroids = data + task_proto._training_feat_task_ids = labels + + test_data = torch.nn.functional.normalize(torch.rand(5, 3)) + predictions = task_proto.infer_task(test_data) + + kmeans = KMeans(n_clusters=data.size(0)) + kmeans.cluster_centers_ = data.numpy() + kmeans.labels_ = labels.numpy() + kmeans._n_threads = 1 + + gnd_truth = kmeans.predict(test_data.numpy()) + + assert (predictions.numpy() == gnd_truth).all() diff --git a/test/renate/utils/test_avalanche.py b/test/renate/utils/test_avalanche.py index cc66094d..5d318f68 100644 --- a/test/renate/utils/test_avalanche.py +++ b/test/renate/utils/test_avalanche.py @@ -1,11 +1,14 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +import pickle + import pytest import torch from avalanche.training.plugins import EWCPlugin, ReplayPlugin from torch import Tensor from torch.utils.data import Subset, TensorDataset +from renate.memory import ReservoirBuffer from renate.utils.avalanche import ( AvalancheBenchmarkWrapper, _plugin_index, @@ -16,24 +19,30 @@ ) -def test_to_avalanche_dataset(): +@pytest.mark.parametrize("use_buffer", [True, False]) +def test_to_avalanche_dataset(use_buffer): expected_x = 6 expected_y = 1 tensor_dataset = TensorDataset( torch.tensor([5, expected_x, 7]), torch.tensor([0, expected_y, 2]) ) - dataset = to_avalanche_dataset(Subset(tensor_dataset, [1])) - assert dataset._inputs[0].item() == expected_x + if use_buffer: + buffer = ReservoirBuffer(10) + buffer.update(tensor_dataset) + dataset = to_avalanche_dataset(buffer) + else: + dataset = to_avalanche_dataset(Subset(tensor_dataset, [0, 1, 2])) assert type(dataset._targets) == list - assert len(dataset._targets) == 1 - assert dataset._targets[0] == expected_y + assert len(dataset._targets) == 3 + assert dataset._targets[1] == expected_y assert type(dataset._targets[0]) == int - assert dataset.targets.item() == dataset._targets[0] - assert dataset.targets == expected_y + assert dataset.targets[0].item() == dataset._targets[0] + assert dataset.targets[1] == expected_y assert type(dataset.targets) == Tensor - x, y = dataset[0] + x, y = dataset[1] assert x == expected_x and y == expected_y - assert len(dataset) == 1 + assert len(dataset) == 3 + pickle.dumps(dataset) def test_avalanche_benchmark_wrapper_correctly_tracks_and_saves_state(): diff --git a/test/renate/utils/test_misc.py b/test/renate/utils/test_misc.py index 601c9524..84011532 100644 --- a/test/renate/utils/test_misc.py +++ b/test/renate/utils/test_misc.py @@ -3,7 +3,11 @@ import pytest import torch -from renate.utils.misc import int_or_str, maybe_populate_mask_and_ignore_logits +from renate.utils.misc import ( + AdditionalTrainingMetrics, + int_or_str, + maybe_populate_mask_and_ignore_logits, +) @pytest.mark.parametrize( @@ -69,3 +73,23 @@ def test_possibly_populate_mask_and_ignore_logits( assert out_logits.data_ptr() == logits.data_ptr() if class_mask is not None: assert class_mask.data_ptr() == out_cm.data_ptr() + + +@pytest.mark.parametrize( + "model,gnd", + [ + ( + torch.nn.Linear(2, 2), + {"peak_memory_usage": 0, "trainable_params": 6, "total_params": 6}, + ), + ( + torch.nn.Linear(2, 2).requires_grad_(False), + {"peak_memory_usage": 0, "trainable_params": 0, "total_params": 6}, + ), + ], +) +def test_addition_metrics(model, gnd): + metrics = AdditionalTrainingMetrics() + out = metrics(model) + for k in gnd: + assert gnd[k] == out[k] diff --git a/test/renate/utils/test_pytorch.py b/test/renate/utils/test_pytorch.py index ab1cfa5b..ea0bae91 100644 --- a/test/renate/utils/test_pytorch.py +++ b/test/renate/utils/test_pytorch.py @@ -1,13 +1,10 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -import numpy as np import pytest import torch import torchvision from torch.utils.data import Sampler, TensorDataset -from renate.benchmark.datasets.vision_datasets import TorchVisionDataModule -from renate.benchmark.scenarios import ClassIncrementalScenario from renate.memory.buffer import ReservoirBuffer from renate.utils import pytorch from renate.utils.pytorch import ( @@ -19,6 +16,8 @@ unique_classes, ) +from dummy_datasets import DummyDataIncrementalDataModule + @pytest.mark.parametrize("model", [torchvision.models.resnet18(pretrained=True)]) def test_reinitialize_model_parameters(model: torch.nn.Module): @@ -130,18 +129,15 @@ def test_complementary_indices(num_outputs, indices, expected_output): @pytest.mark.parametrize("test_dataset", [True, False]) def test_unique_classes(tmpdir, test_dataset): if test_dataset: - class_groupings = np.arange(0, 100).reshape(10, 10).tolist() - data_module = TorchVisionDataModule(tmpdir, dataset_name="CIFAR100", val_size=0.2) - data_module.prepare_data() - for chunk_id in range(len(class_groupings)): - scenario = ClassIncrementalScenario( - data_module=data_module, groupings=class_groupings, chunk_id=chunk_id + for data_id in range(5): + data_module = DummyDataIncrementalDataModule( + data_id, (10, 10), transform=None, val_size=0 ) - scenario.setup() - ds = scenario.val_data() - predicted_unique = unique_classes(ds) - - assert predicted_unique == set(class_groupings[chunk_id]) + data_module.prepare_data() + data_module.setup() + train_data = data_module.train_data() + predicted_unique = set(int(x) for x in unique_classes(train_data)) + assert predicted_unique == {data_id} else: X = torch.randn(10, 3) y = torch.arange(0, 10)