From c120338b977bce857f8ab364304c5455726d4f04 Mon Sep 17 00:00:00 2001
From: Giles Hall <ghall@broadinstitute.org>
Date: Wed, 16 Feb 2022 01:26:37 -0500
Subject: [PATCH 01/28] realign with current version of transformers library

---
 .../model/tabular_modeling_auto.py            |  2 +-
 .../model/tabular_transformers.py             | 30 +++++++++----------
 setup.py                                      |  4 +--
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/multimodal_transformers/model/tabular_modeling_auto.py b/multimodal_transformers/model/tabular_modeling_auto.py
index c95420c..20d44ac 100644
--- a/multimodal_transformers/model/tabular_modeling_auto.py
+++ b/multimodal_transformers/model/tabular_modeling_auto.py
@@ -1,7 +1,7 @@
 from collections import OrderedDict
 
 from transformers.configuration_utils import PretrainedConfig
-from transformers.configuration_auto import (
+from transformers import (
     AutoConfig,
     AlbertConfig,
     BertConfig,
diff --git a/multimodal_transformers/model/tabular_transformers.py b/multimodal_transformers/model/tabular_transformers.py
index 1d2a934..d9a9aed 100644
--- a/multimodal_transformers/model/tabular_transformers.py
+++ b/multimodal_transformers/model/tabular_transformers.py
@@ -7,14 +7,14 @@
     XLNetForSequenceClassification,
     XLMForSequenceClassification
 )
-from transformers.modeling_bert import BERT_INPUTS_DOCSTRING
-from transformers.modeling_roberta import ROBERTA_INPUTS_DOCSTRING
-from transformers.modeling_distilbert import DISTILBERT_INPUTS_DOCSTRING
-from transformers.modeling_albert import ALBERT_INPUTS_DOCSTRING
-from transformers.modeling_xlnet import XLNET_INPUTS_DOCSTRING
-from transformers.modeling_xlm import XLM_INPUTS_DOCSTRING
-from transformers.configuration_xlm_roberta import XLMRobertaConfig
-from transformers.file_utils import add_start_docstrings_to_callable
+from transformers.models.bert.modeling_bert import BERT_INPUTS_DOCSTRING
+from transformers.models.roberta.modeling_roberta import ROBERTA_INPUTS_DOCSTRING
+from transformers.models.distilbert.modeling_distilbert import DISTILBERT_INPUTS_DOCSTRING
+from transformers.models.albert.modeling_albert import ALBERT_INPUTS_DOCSTRING
+from transformers.models.xlnet.modeling_xlnet import XLNET_INPUTS_DOCSTRING
+from transformers.models.xlm.modeling_xlm import XLM_INPUTS_DOCSTRING
+from transformers.models.xlm_roberta.configuration_xlm_roberta import XLMRobertaConfig
+from transformers.file_utils import add_start_docstrings_to_model_forward
 
 from .tabular_combiner import TabularFeatCombiner
 from .tabular_config import TabularConfig
@@ -61,7 +61,7 @@ def __init__(self, hf_model_config):
                                           hidden_channels=dims,
                                           bn=True)
 
-    @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
     def forward(
         self,
         input_ids=None,
@@ -162,7 +162,7 @@ def __init__(self, hf_model_config):
                                           hidden_channels=dims,
                                           bn=True)
 
-    @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
     def forward(
         self,
         input_ids=None,
@@ -274,7 +274,7 @@ def __init__(self, hf_model_config):
                                           hidden_channels=dims,
                                           bn=True)
 
-    @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
     def forward(
         self,
         input_ids=None,
@@ -375,7 +375,7 @@ def __init__(self, hf_model_config):
                                           hidden_channels=dims,
                                           bn=True)
 
-    @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
+    @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING)
     def forward(
         self,
         input_ids=None,
@@ -465,7 +465,7 @@ def __init__(self, hf_model_config):
                                           hidden_channels=dims,
                                           bn=True)
 
-    @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
     def forward(
         self,
         input_ids=None,
@@ -563,7 +563,7 @@ def __init__(self, hf_model_config):
                                           hidden_channels=dims,
                                           bn=True)
 
-    @ add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
+    @ add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING)
     def forward(
             self,
             input_ids=None,
@@ -617,4 +617,4 @@ def forward(
                                                               labels,
                                                               self.num_labels,
                                                               class_weights)
-        return loss, logits, classifier_layer_outputs
\ No newline at end of file
+        return loss, logits, classifier_layer_outputs
diff --git a/setup.py b/setup.py
index 6aea872..7036be9 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 install_requires = [
     'torch',
-    'transformers==3.1',
+    'transformers>=4.16.2',
     'numpy',
     'tqdm',
     'scipy',
@@ -36,4 +36,4 @@
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
   ],
-)
\ No newline at end of file
+)

From 129c44138e53b3d576e16e588e6fd0d9ceec243a Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 1 Mar 2023 19:06:18 +0000
Subject: [PATCH 02/28] Fix: multimodal_exp_args to support latest
 transformers.

---
 multimodal_exp_args.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/multimodal_exp_args.py b/multimodal_exp_args.py
index c140cda..e7f27f5 100644
--- a/multimodal_exp_args.py
+++ b/multimodal_exp_args.py
@@ -1,10 +1,10 @@
 from dataclasses import dataclass, field
 import json
 import logging
-from typing import Optional, Tuple
+from typing import Optional, Tuple, List
 
 import torch
-from transformers.training_args import TrainingArguments, torch_required, cached_property
+from transformers.training_args import TrainingArguments, requires_backends, cached_property
 
 
 logger = logging.getLogger(__name__)
@@ -178,6 +178,10 @@ class OurTrainingArguments(TrainingArguments):
 
     learning_rate: float = field(default=5e-5, metadata={"help": "The initial learning rate for Adam."})
 
+    report_to: Optional[List[str]] = field(
+        default_factory=list, metadata={"help": "The list of integrations to report the results and logs to."}
+    )
+
     def __post_init__(self):
         if self.debug_dataset:
             self.max_token_length = 16
@@ -186,12 +190,12 @@ def __post_init__(self):
 
 
     @cached_property
-    @torch_required
     def _setup_devices(self) -> Tuple["torch.device", int]:
+        requires_backends(self, ["torch"])
         logger.info("PyTorch: setting up devices")
         if self.no_cuda:
             device = torch.device("cpu")
-            n_gpu = 0
+            self._n_gpu = 0
         elif self.local_rank == -1:
             # if n_gpu is > 1 we'll use nn.DataParallel.
             # If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0`
@@ -200,15 +204,16 @@ def _setup_devices(self) -> Tuple["torch.device", int]:
             # GPUs available in the environment, so `CUDA_VISIBLE_DEVICES=1,2` with `cuda:0`
             # will use the first GPU in that env, i.e. GPU#1
             device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-            n_gpu = torch.cuda.device_count()
+            self._n_gpu = torch.cuda.device_count()
         else:
             # Here, we'll use torch.distributed.
             # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
-            torch.distributed.init_process_group(backend="nccl")
+            if not torch.distributed.is_initialized():
+                torch.distributed.init_process_group(backend="nccl", timeout=self.ddp_timeout_delta)
             device = torch.device("cuda", self.local_rank)
-            n_gpu = 1
+            self._n_gpu = 1
 
         if device.type == "cuda":
             torch.cuda.set_device(device)
 
-        return device, n_gpu
\ No newline at end of file
+        return device
\ No newline at end of file

From 227fd72070dc775d01c10a1f3e3489a4057ed0b1 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 1 Mar 2023 19:43:59 +0000
Subject: [PATCH 03/28] Fix: Update to current stable transformers version.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7036be9..fed9c9b 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 install_requires = [
     'torch',
-    'transformers>=4.16.2',
+    'transformers>=4.26.1',
     'numpy',
     'tqdm',
     'scipy',

From 9b21861bb2a884571fa29adc4132e80c85de9e3f Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 1 Mar 2023 20:54:15 +0000
Subject: [PATCH 04/28] Fix: Add fix from issue #9.

---
 multimodal_transformers/model/tabular_combiner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal_transformers/model/tabular_combiner.py b/multimodal_transformers/model/tabular_combiner.py
index 33b82cf..ecf7369 100644
--- a/multimodal_transformers/model/tabular_combiner.py
+++ b/multimodal_transformers/model/tabular_combiner.py
@@ -406,7 +406,7 @@ def forward(self, text_feats, cat_feats=None, numerical_feats=None):
                 if self.numerical_feat_dim > self.text_out_dim:
                     numerical_feats = self.num_mlp(numerical_feats)
                 w_num = torch.mm(numerical_feats, self.weight_num)
-                g_num = (torch.cat([w_text, w_cat], dim=-1) * self.weight_a).sum(dim=1).unsqueeze(0).T
+                g_num = (torch.cat([w_text, w_num], dim=-1) * self.weight_a).sum(dim=1).unsqueeze(0).T
             else:
                 w_num = None
                 g_num = torch.zeros(0, device=g_text.device)

From 3294ae24aa6084dcb3080d50d2c4fdb5fdfe7b27 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 1 Mar 2023 21:00:00 +0000
Subject: [PATCH 05/28] Fix: Add fix from issue #14.

---
 multimodal_transformers/model/tabular_combiner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal_transformers/model/tabular_combiner.py b/multimodal_transformers/model/tabular_combiner.py
index ecf7369..f60d9b9 100644
--- a/multimodal_transformers/model/tabular_combiner.py
+++ b/multimodal_transformers/model/tabular_combiner.py
@@ -265,7 +265,7 @@ def __init__(self, tabular_config):
                         self.numerical_feat_dim,
                         division=self.mlp_division,
                         output_dim=output_dim_num)
-                    self.cat_mlp = MLP(
+                    self.num_mlp = MLP(
                         self.numerical_feat_dim,
                         output_dim_num,
                         num_hidden_lyr=len(dims),

From 48048f6ee18057377bf709879cef2be0e84d146f Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Thu, 2 Mar 2023 23:31:31 +0000
Subject: [PATCH 06/28] Fix: Evaluation issues.

---
 main.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/main.py b/main.py
index fd167a5..798839e 100644
--- a/main.py
+++ b/main.py
@@ -116,16 +116,17 @@ def main():
 
     def build_compute_metrics_fn(task_name: str) -> Callable[[EvalPrediction], Dict]:
         def compute_metrics_fn(p: EvalPrediction):
+            predictions = p.predictions[0]
             if task_name == "classification":
-                preds_labels = np.argmax(p.predictions, axis=1)
-                if p.predictions.shape[-1] == 2:
-                    pred_scores = softmax(p.predictions, axis=1)[:, 1]
+                preds_labels = np.argmax(predictions, axis=1)
+                if predictions.shape[-1] == 2:
+                    pred_scores = softmax(predictions, axis=1)[:, 1]
                 else:
-                    pred_scores = softmax(p.predictions, axis=1)
+                    pred_scores = softmax(predictions, axis=1)
                 return calc_classification_metrics(pred_scores, preds_labels,
                                                    p.label_ids)
             elif task_name == "regression":
-                preds = np.squeeze(p.predictions)
+                preds = np.squeeze(predictions)
                 return calc_regression_metrics(preds, p.label_ids)
             else:
                 return {}
@@ -178,7 +179,7 @@ def compute_metrics_fn(p: EvalPrediction):
             output_eval_file = os.path.join(
                 training_args.output_dir, f"eval_metric_results_{task}_fold_{i+1}.txt"
             )
-            if trainer.is_world_master():
+            if trainer.is_world_process_zero():
                 with open(output_eval_file, "w") as writer:
                     logger.info("***** Eval results {} *****".format(task))
                     for key, value in eval_result.items():
@@ -190,13 +191,13 @@ def compute_metrics_fn(p: EvalPrediction):
         if training_args.do_predict:
             logging.info("*** Test ***")
 
-            predictions = trainer.predict(test_dataset=test_dataset).predictions
+            predictions = trainer.predict(test_dataset=test_dataset).predictions[0]
             output_test_file = os.path.join(
                 training_args.output_dir, f"test_results_{task}_fold_{i+1}.txt"
             )
             eval_result = trainer.evaluate(eval_dataset=test_dataset)
             logger.info(pformat(eval_result, indent=4))
-            if trainer.is_world_master():
+            if trainer.is_world_process_zero():
                 with open(output_test_file, "w") as writer:
                     logger.info("***** Test results {} *****".format(task))
                     writer.write("index\tprediction\n")

From f67972f6077e4fb895dad32e4383f389b50dd5ae Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Thu, 2 Mar 2023 23:32:02 +0000
Subject: [PATCH 07/28] Fix: Standardize train configs.

---
 .../train_config.json                         |  4 ++--
 .../train_config.json                         | 21 ++++++++++---------
 .../train_config.json                         | 14 +++++++++----
 3 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/datasets/Melbourne_Airbnb_Open_Data/train_config.json b/datasets/Melbourne_Airbnb_Open_Data/train_config.json
index c9cbf46..180b18b 100644
--- a/datasets/Melbourne_Airbnb_Open_Data/train_config.json
+++ b/datasets/Melbourne_Airbnb_Open_Data/train_config.json
@@ -14,8 +14,8 @@
   "num_train_epochs": 5,
   "overwrite_output_dir": true,
   "learning_rate": 3e-3,
-  "per_device_train_batch_size": 12,
-  "per_device_eval_batch_size": 12,
+  "per_device_train_batch_size": 16,
+  "per_device_eval_batch_size": 16,
   "logging_steps": 50,
   "eval_steps": 500,
   "save_steps": 3000,
diff --git a/datasets/PetFindermy_Adoption_Prediction/train_config.json b/datasets/PetFindermy_Adoption_Prediction/train_config.json
index 14718de..5baaec0 100644
--- a/datasets/PetFindermy_Adoption_Prediction/train_config.json
+++ b/datasets/PetFindermy_Adoption_Prediction/train_config.json
@@ -1,24 +1,25 @@
 {
-  "output_dir": "./logs_petfinder/",
+  "output_dir": "./logs_petfinder/gating_on_cat_and_num_feats_then_sum_full_model",
   "debug_dataset": false,
   "task": "classification",
-  "num_labels": 5,
   "combine_feat_method": "text_only",
   "experiment_name": "bert-base-multilingual-uncased",
   "model_name_or_path": "bert-base-multilingual-uncased",
   "do_train": true,
+  "categorical_encode_type": "ohe",
+  "numerical_transformer_method": "quantile_normal",
   "tokenizer_name": "bert-base-multilingual-uncased",
-  "per_device_train_batch_size": 12,
-  "gpu_num": 0,
+  "use_simple_classifier": false,
+  "logging_dir": "./logs_petfinder/bertmultilingual_gating_on_cat_and_num_feats_then_sum_full_model_lr_3e-3/",
   "num_train_epochs": 5,
-  "categorical_encode_type": "ohe",
-  "use_class_weights": false,
+  "overwrite_output_dir": true,
+  "learning_rate": 1e-4,
+  "per_device_train_batch_size": 16,
+  "per_device_eval_batch_size": 16,
   "logging_steps": 50,
   "eval_steps": 750,
   "save_steps": 3000,
-  "learning_rate": 1e-4,
-  "data_path": "./datasets/PetFindermy_Adoption_Prediction/",
-  "column_info_path":  "./datasets/PetFindermy_Adoption_Prediction/column_info_all_text.json",
-  "overwrite_output_dir": true
+  "data_path": "./datasets/PetFindermy_Adoption_Prediction",
+  "column_info_path":  "./datasets/PetFindermy_Adoption_Prediction/column_info_all_text.json"
 }
 
diff --git a/datasets/Womens_Clothing_E-Commerce_Reviews/train_config.json b/datasets/Womens_Clothing_E-Commerce_Reviews/train_config.json
index 9cafa79..86a6954 100644
--- a/datasets/Womens_Clothing_E-Commerce_Reviews/train_config.json
+++ b/datasets/Womens_Clothing_E-Commerce_Reviews/train_config.json
@@ -5,15 +5,21 @@
   "combine_feat_method": "text_only",
   "experiment_name": "Unimodal Bert Base Uncased",
   "model_name_or_path": "bert-base-uncased",
-  "gpu_num": 0,
   "do_train": true,
+  "categorical_encode_type": "binary",
+  "numerical_transformer_method": "quantile_normal",
   "tokenizer_name": "bert-base-uncased",
-  "per_device_train_batch_size": 12,
+  "use_simple_classifier": false,
+  "logging_dir": "./logs_clothing_review/bertbase_gating_on_cat_and_num_feats_then_sum_full_model_lr_3e-3/",
+  "num_train_epochs": 5,
+  "overwrite_output_dir": true,
+  "learning_rate": 3e-3,
+  "per_device_train_batch_size": 16,
+  "per_device_eval_batch_size": 16,
   "logging_steps": 50,
   "eval_steps": 750,
   "save_steps": 3000,
   "data_path": "./datasets/Womens_Clothing_E-Commerce_Reviews",
-  "column_info_path":  "./datasets/Womens_Clothing_E-Commerce_Reviews/column_info_all_text.json",
-  "overwrite_output_dir": true
+  "column_info_path":  "./datasets/Womens_Clothing_E-Commerce_Reviews/column_info_all_text.json"
 }
 

From 4fd17c27f9b8f10aeb77e70d5fa282d8d09cd081 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Fri, 3 Mar 2023 18:30:32 +0000
Subject: [PATCH 08/28] Docs: Add comment documenting change to p.predictions.

---
 main.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/main.py b/main.py
index 798839e..a9b5f18 100644
--- a/main.py
+++ b/main.py
@@ -116,6 +116,8 @@ def main():
 
     def build_compute_metrics_fn(task_name: str) -> Callable[[EvalPrediction], Dict]:
         def compute_metrics_fn(p: EvalPrediction):
+            # p.predictions is now a list of objects
+            # The first entry is the actual predictions
             predictions = p.predictions[0]
             if task_name == "classification":
                 preds_labels = np.argmax(predictions, axis=1)

From 1465eed1896662676ae4b12a443ea4e8a441c814 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Fri, 3 Mar 2023 21:12:20 +0000
Subject: [PATCH 09/28] Docs: Resolve #19

---
 docs/source/notes/introduction.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/notes/introduction.rst b/docs/source/notes/introduction.rst
index 4088cbc..f786320 100644
--- a/docs/source/notes/introduction.rst
+++ b/docs/source/notes/introduction.rst
@@ -95,7 +95,7 @@ The following example shows a forward pass on two data examples
     labels = torch.tensor([1, 0])
 
     model_inputs['cat_feats'] = categorical_feat
-    model_inputs['num_feats'] = numerical_feat
+    model_inputs['numerical_feats'] = numerical_feat
     model_inputs['labels'] = labels
 
     loss, logits, layer_outs = model(**model_inputs)

From c518857bb4dbdac5701c96268ffdc1e2690973b4 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Mon, 6 Mar 2023 17:00:00 +0000
Subject: [PATCH 10/28] Fix: Resolve #32.

---
 multimodal_transformers/data/data_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal_transformers/data/data_utils.py b/multimodal_transformers/data/data_utils.py
index 3f73db4..c833982 100644
--- a/multimodal_transformers/data/data_utils.py
+++ b/multimodal_transformers/data/data_utils.py
@@ -66,7 +66,7 @@ def change_name_func(x):
     def _one_hot(self):
         ohe = preprocessing.OneHotEncoder(sparse=False)
         ohe.fit(self.df[self.cat_feats].values)
-        self.feat_names = list(ohe.get_feature_names(self.cat_feats))
+        self.feat_names = list(ohe.get_feature_names_out(self.cat_feats))
         return ohe.transform(self.df[self.cat_feats].values)
 
     def fit_transform(self):

From 242180a31a0793e686197b5ccbd90368cdcfbb15 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Mon, 6 Mar 2023 17:05:38 +0000
Subject: [PATCH 11/28] Fix: Use only 100 samples in debug mode.

---
 multimodal_transformers/data/load_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal_transformers/data/load_data.py b/multimodal_transformers/data/load_data.py
index e04bdab..c66d256 100644
--- a/multimodal_transformers/data/load_data.py
+++ b/multimodal_transformers/data/load_data.py
@@ -370,7 +370,7 @@ def load_data(data_df,
         :obj:`tabular_torch_dataset.TorchTextDataset`: The converted dataset
     """
     if debug:
-        data_df = data_df[:500]
+        data_df = data_df[:100]
     if empty_text_values is None:
         empty_text_values = ['nan', 'None']
 

From 3117e15e62143f84aa3dc53fe3a8c8d6c3228cf2 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Tue, 7 Mar 2023 18:49:21 +0000
Subject: [PATCH 12/28] Fix: XLNet Config has no use_cache attribute.

---
 multimodal_transformers/model/tabular_transformers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal_transformers/model/tabular_transformers.py b/multimodal_transformers/model/tabular_transformers.py
index d9a9aed..d55b21d 100644
--- a/multimodal_transformers/model/tabular_transformers.py
+++ b/multimodal_transformers/model/tabular_transformers.py
@@ -494,7 +494,7 @@ def forward(
             If ``config.num_labels > 1`` a classification loss is computed (Cross-Entropy).
         """
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-        use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache)
+        use_cache = self.training or (use_cache if use_cache is not None else False)
 
         transformer_outputs = self.transformer(
             input_ids,

From 8bba5e52d1e2005b9e59ec5dfec8528188403a2a Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Tue, 7 Mar 2023 19:29:39 +0000
Subject: [PATCH 13/28] Fix: use_cache -> use_mems for future proofing.

---
 multimodal_transformers/model/tabular_transformers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal_transformers/model/tabular_transformers.py b/multimodal_transformers/model/tabular_transformers.py
index d55b21d..fbd2287 100644
--- a/multimodal_transformers/model/tabular_transformers.py
+++ b/multimodal_transformers/model/tabular_transformers.py
@@ -506,7 +506,7 @@ def forward(
             input_mask=input_mask,
             head_mask=head_mask,
             inputs_embeds=inputs_embeds,
-            use_cache=use_cache,
+            use_mems=use_cache,
             output_attentions=output_attentions,
             output_hidden_states=output_hidden_states,
             return_dict=return_dict,

From 8d8202a5bce08f840c349125b204c77b6de48064 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 18:22:05 +0000
Subject: [PATCH 14/28] Fix: Force summary_proj_to_labels=False for XLNet and
 XLM.

---
 multimodal_transformers/model/tabular_transformers.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/multimodal_transformers/model/tabular_transformers.py b/multimodal_transformers/model/tabular_transformers.py
index fbd2287..ab5eec0 100644
--- a/multimodal_transformers/model/tabular_transformers.py
+++ b/multimodal_transformers/model/tabular_transformers.py
@@ -440,6 +440,9 @@ class XLNetWithTabular(XLNetForSequenceClassification):
             :obj:`TabularConfig` instance specifying the configs for :obj:`TabularFeatCombiner`
     """
     def __init__(self, hf_model_config):
+        # When set to true, sequency summary layer is hidden_size -> num_labels
+        # We expect the output to be hidden_size -> hidden_size
+        hf_model_config.summary_proj_to_labels = False
         super().__init__(hf_model_config)
         tabular_config = hf_model_config.tabular_config
         if type(tabular_config) is dict:  # when loading from saved model
@@ -538,6 +541,9 @@ class XLMWithTabular(XLMForSequenceClassification):
             :obj:`TabularConfig` instance specifying the configs for :obj:`TabularFeatCombiner`
     """
     def __init__(self, hf_model_config):
+        # When set to true, sequency summary layer is hidden_size -> num_labels
+        # We expect the output to be hidden_size -> hidden_size
+        hf_model_config.summary_proj_to_labels = False
         super().__init__(hf_model_config)
         tabular_config = hf_model_config.tabular_config
         if type(tabular_config) is dict:  # when loading from saved model

From 00897e92faeaa5728585e1d1685cbf85fc6d88dd Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 18:28:05 +0000
Subject: [PATCH 15/28] Fix: Set new library version.

---
 multimodal_transformers/__init__.py | 2 +-
 setup.py                            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/multimodal_transformers/__init__.py b/multimodal_transformers/__init__.py
index b9ed1ad..dbcaa83 100644
--- a/multimodal_transformers/__init__.py
+++ b/multimodal_transformers/__init__.py
@@ -1,6 +1,6 @@
 import multimodal_transformers.data
 import multimodal_transformers.model
 
-__version__ = '0.1.2-alpha'
+__version__ = '0.2-alpha'
 
 __all__ = ['multimodal_transformers', '__version__']
\ No newline at end of file
diff --git a/setup.py b/setup.py
index fed9c9b..8380fbe 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup, find_packages
 
-__version__ = '0.1.4-alpha'
+__version__ = '0.2-alpha'
 url = 'https://github.com/georgianpartners/Multimodal-Toolkit'
 
 install_requires = [

From 265478e8b7ce629b02119ca9a073109008412daa Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 18:30:46 +0000
Subject: [PATCH 16/28] Fix: Include sacremoses for certain models and pytest
 in setup.py

---
 setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/setup.py b/setup.py
index 8380fbe..67868dd 100644
--- a/setup.py
+++ b/setup.py
@@ -12,6 +12,8 @@
     'networkx',
     'scikit-learn',
     'pandas',
+    'sacremoses',
+    'pytest'
 ]
 
 setup(

From 2c613622e3e868a20dd2ad3d428568dd9ccb326c Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 18:32:22 +0000
Subject: [PATCH 17/28] Tests: Add basic model tests.

---
 tests/test_airbnb.json    |  20 ++++++
 tests/test_clothing.json  |  19 +++++
 tests/test_model.py       | 145 ++++++++++++++++++++++++++++++++++++++
 tests/test_petfinder.json |  22 ++++++
 4 files changed, 206 insertions(+)
 create mode 100644 tests/test_airbnb.json
 create mode 100644 tests/test_clothing.json
 create mode 100644 tests/test_model.py
 create mode 100644 tests/test_petfinder.json

diff --git a/tests/test_airbnb.json b/tests/test_airbnb.json
new file mode 100644
index 0000000..b96da73
--- /dev/null
+++ b/tests/test_airbnb.json
@@ -0,0 +1,20 @@
+{
+    "output_dir": "./tests/test_airbnb/",
+    "task": "regression",
+    "combine_feat_method": "gating_on_cat_and_num_feats_then_sum",
+    "experiment_name": "bert-base-multilingual-uncased",
+    "model_name_or_path": "bert-base-multilingual-uncased",
+    "categorical_encode_type": "binary",
+    "numerical_transformer_method": "quantile_normal",
+    "tokenizer_name": "bert-base-multilingual-uncased",
+    "use_simple_classifier": false,
+    "num_train_epochs": 5,
+    "overwrite_output_dir": true,
+    "learning_rate": 1e-4,
+    "per_device_train_batch_size": 2,
+    "per_device_eval_batch_size": 2,
+    "eval_steps": 500,
+    "data_path": "./datasets/Melbourne_Airbnb_Open_Data",
+    "column_info_path":  "./datasets/Melbourne_Airbnb_Open_Data/column_info.json"
+  }
+  
\ No newline at end of file
diff --git a/tests/test_clothing.json b/tests/test_clothing.json
new file mode 100644
index 0000000..6291743
--- /dev/null
+++ b/tests/test_clothing.json
@@ -0,0 +1,19 @@
+{
+    "output_dir": "./tests/test_clothing/",
+    "task": "classification",
+    "combine_feat_method": "text_only",
+    "experiment_name": "bert-base-uncased",
+    "model_name_or_path": "bert-base-uncased",
+    "categorical_encode_type": "binary",
+    "numerical_transformer_method": "quantile_normal",
+    "tokenizer_name": "bert-base-uncased",
+    "use_simple_classifier": false,
+    "num_train_epochs": 5,
+    "overwrite_output_dir": true,
+    "learning_rate": 1e-4,
+    "per_device_train_batch_size": 2,
+    "per_device_eval_batch_size": 2,
+    "eval_steps": 500,
+    "data_path": "./datasets/Womens_Clothing_E-Commerce_Reviews",
+    "column_info_path":  "./datasets/Womens_Clothing_E-Commerce_Reviews/column_info_all_text.json"
+  }
\ No newline at end of file
diff --git a/tests/test_model.py b/tests/test_model.py
new file mode 100644
index 0000000..24443a7
--- /dev/null
+++ b/tests/test_model.py
@@ -0,0 +1,145 @@
+import os
+import sys; sys.path.append("./")
+from typing import Callable, Dict
+
+import numpy as np
+from scipy.special import softmax
+from transformers import (
+    AutoTokenizer,
+    AutoConfig,
+    HfArgumentParser,
+    Trainer,
+    EvalPrediction,
+    set_seed
+)
+
+from multimodal_exp_args import MultimodalDataTrainingArguments, ModelArguments, OurTrainingArguments
+from evaluation import calc_classification_metrics, calc_regression_metrics
+from multimodal_transformers.data import load_data_from_folder
+from multimodal_transformers.model import TabularConfig
+from multimodal_transformers.model import AutoModelWithTabular
+
+import pytest
+
+os.environ['COMET_MODE'] = 'DISABLED'
+
+
+CONFIGS = [
+    "./tests/test_airbnb.json", 
+    "./tests/test_clothing.json", 
+    "./tests/test_petfinder.json"
+]
+
+MODELS = [
+    "albert-base-v2",
+    "bert-base-multilingual-uncased",
+    "distilbert-base-uncased",
+    "roberta-base",
+    "xlm-mlm-100-1280",
+    "xlm-roberta-base",
+    "xlnet-base-cased"
+]
+
+def build_compute_metrics_fn(task_name: str) -> Callable[[EvalPrediction], Dict]:
+    def compute_metrics_fn(p: EvalPrediction):
+        # p.predictions is now a list of objects
+        # The first entry is the actual predictions
+        predictions = p.predictions[0]
+        if task_name == "classification":
+            preds_labels = np.argmax(predictions, axis=1)
+            if predictions.shape[-1] == 2:
+                pred_scores = softmax(predictions, axis=1)[:, 1]
+            else:
+                pred_scores = softmax(predictions, axis=1)
+            return calc_classification_metrics(pred_scores, preds_labels,
+                                                p.label_ids)
+        elif task_name == "regression":
+            preds = np.squeeze(predictions)
+            return calc_regression_metrics(preds, p.label_ids)
+        else:
+            return {}
+    return compute_metrics_fn
+
+@pytest.mark.parametrize("json_file", CONFIGS)
+@pytest.mark.parametrize("model_string", MODELS)
+def test_model(json_file: str, model_string: str):
+    # Parse our input json files
+    parser = HfArgumentParser((
+        ModelArguments, MultimodalDataTrainingArguments,OurTrainingArguments
+    ))
+    model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(json_file))
+
+    # Set model string
+    # We don't use the value from the config here since we test multiple models
+    training_args.experiment_name = model_string
+    model_args.model_name_or_path = model_string
+    model_args.tokenizer_name = model_string
+
+    # Create a tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+    )
+
+    # Load and preprocess datasets
+    # We force debug=True so we load only 100 entries
+    train_dataset, val_dataset, test_dataset = load_data_from_folder(
+        data_args.data_path,
+        data_args.column_info['text_cols'],
+        tokenizer,
+        label_col=data_args.column_info['label_col'],
+        label_list=data_args.column_info['label_list'],
+        categorical_cols=data_args.column_info['cat_cols'],
+        numerical_cols=data_args.column_info['num_cols'],
+        categorical_encode_type=data_args.categorical_encode_type,
+        numerical_transformer_method=data_args.numerical_transformer_method,
+        sep_text_token_str=tokenizer.sep_token if not data_args.column_info['text_col_sep_token'] else data_args.column_info['text_col_sep_token'],
+        max_token_length=training_args.max_token_length,
+        debug=True,
+    )
+
+    set_seed(training_args.seed)
+    task = data_args.task
+
+    # Regression tasks have only one "label"
+    if task == 'regression':
+        num_labels = 1
+    else:
+        num_labels = len(np.unique(train_dataset.labels)) if data_args.num_classes == -1 else data_args.num_classes
+
+    # Setup configs
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+    )
+    tabular_config = TabularConfig(
+        num_labels=num_labels, 
+        cat_feat_dim=train_dataset.cat_feats.shape[1] if train_dataset.cat_feats is not None else 0,
+        numerical_feat_dim=train_dataset.numerical_feats.shape[1] if train_dataset.numerical_feats is not None else 0,
+        **vars(data_args)
+    )
+    config.tabular_config = tabular_config
+
+    # Make model
+    model = AutoModelWithTabular.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        config=config,
+        cache_dir=model_args.cache_dir
+    )
+    model_path = model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
+
+    # Make trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=val_dataset,
+        compute_metrics=build_compute_metrics_fn(task),
+    )
+
+    # Train
+    trainer.train(model_path=model_path)
+
+    # Get predictions
+    test_results = trainer.predict(test_dataset=test_dataset)
+    assert test_results.predictions[0].shape == (100, num_labels)
diff --git a/tests/test_petfinder.json b/tests/test_petfinder.json
new file mode 100644
index 0000000..3d627bc
--- /dev/null
+++ b/tests/test_petfinder.json
@@ -0,0 +1,22 @@
+{
+    "output_dir": "./tests/test_petfinder/",
+    "task": "classification",
+    "combine_feat_method": "text_only",
+    "experiment_name": "bert-base-multilingual-uncased",
+    "model_name_or_path": "bert-base-multilingual-uncased",
+    "categorical_encode_type": "ohe",
+    "numerical_transformer_method": "quantile_normal",
+    "tokenizer_name": "bert-base-multilingual-uncased",
+    "use_simple_classifier": false,
+    "num_train_epochs": 5,
+    "overwrite_output_dir": true,
+    "learning_rate": 1e-4,
+    "per_device_train_batch_size": 2,
+    "per_device_eval_batch_size": 2,
+    "eval_steps": 500,
+    "data_path": "./datasets/PetFindermy_Adoption_Prediction",
+    "column_info_path":  "./datasets/PetFindermy_Adoption_Prediction/column_info_all_text.json"
+  }
+  
+  
+  
\ No newline at end of file

From e6d09aeff440a49f8f58506d37bc58a04feb0b17 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 19:19:13 +0000
Subject: [PATCH 18/28] Docs: Add maintainer info to setup.py

---
 setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/setup.py b/setup.py
index 67868dd..82a8b82 100644
--- a/setup.py
+++ b/setup.py
@@ -25,6 +25,8 @@
     description='Multimodal Extension Library for PyTorch HuggingFace Transformers',
     author='Ken Gu',
     author_email='kgu@georgianpartners.com',
+    maintainer='Akash Saravanan, Kyryl Truskovskyi',
+    maintainer_email='akash.saravanan@georgian.io, kyryl@georgian.io'
     url=url,
     download_url='{}/archive/v_{}.tar.gz'.format(url, __version__),
     keywords=['pytorch', 'multimodal', 'transformers', 'huggingface'],   # Keywords that define your package best

From 54640f0f30fcc51a9a3d566eed3028a5f68bb8ee Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 20:13:41 +0000
Subject: [PATCH 19/28] Chore: Include versions for all libraries in setup.py

---
 setup.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/setup.py b/setup.py
index 82a8b82..0418f9b 100644
--- a/setup.py
+++ b/setup.py
@@ -4,16 +4,16 @@
 url = 'https://github.com/georgianpartners/Multimodal-Toolkit'
 
 install_requires = [
-    'torch',
+    'torch>=1.13.1',
     'transformers>=4.26.1',
-    'numpy',
-    'tqdm',
-    'scipy',
-    'networkx',
-    'scikit-learn',
-    'pandas',
-    'sacremoses',
-    'pytest'
+    'numpy>=1.21.6',
+    'tqdm>=4.64.1',
+    'scipy>=1.7.3',
+    'networkx>=2.6.3',
+    'scikit-learn>=1.0.2',
+    'pandas>=1.3.5',
+    'sacremoses>=0.0.53',
+    'pytest>=7.2.2'
 ]
 
 setup(

From a63a1bf12e404f6e2afd0cbb83611e45f7830148 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 20:27:38 +0000
Subject: [PATCH 20/28] Fix: Single source of truth for project version number

---
 setup.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 0418f9b..bbb573b 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,23 @@
+import os
 from setuptools import setup, find_packages
 
-__version__ = '0.2-alpha'
+# See: https://packaging.python.org/en/latest/guides/single-sourcing-package-version/
+def read(rel_path: str) -> str:
+    here = os.path.abspath(os.path.dirname(__file__))
+    # intentionally *not* adding an encoding option to open, See:
+    #   https://github.com/pypa/virtualenv/issues/201#issuecomment-3145690
+    with open(os.path.join(here, rel_path)) as fp:
+        return fp.read()
+
+def get_version(rel_path: str) -> str:
+    for line in read(rel_path).splitlines():
+        if line.startswith("__version__"):
+            # __version__ = "0.9"
+            delim = '"' if '"' in line else "'"
+            return line.split(delim)[1]
+    raise RuntimeError("Unable to find version string.")
+
+__version__ = get_version("multimodal_transformers/__init__.py")
 url = 'https://github.com/georgianpartners/Multimodal-Toolkit'
 
 install_requires = [
@@ -26,7 +43,7 @@
     author='Ken Gu',
     author_email='kgu@georgianpartners.com',
     maintainer='Akash Saravanan, Kyryl Truskovskyi',
-    maintainer_email='akash.saravanan@georgian.io, kyryl@georgian.io'
+    maintainer_email='akash.saravanan@georgian.io, kyryl@georgian.io',
     url=url,
     download_url='{}/archive/v_{}.tar.gz'.format(url, __version__),
     keywords=['pytorch', 'multimodal', 'transformers', 'huggingface'],   # Keywords that define your package best

From 6685f4d9c0cad3deaafbcec7ae2e3d5093a62535 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 20:53:26 +0000
Subject: [PATCH 21/28] Feat: Make debug dataset size a training argument.

---
 multimodal_exp_args.py                    |  5 +++++
 multimodal_transformers/data/load_data.py | 25 +++++++++++++++--------
 tests/test_model.py                       |  4 +++-
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/multimodal_exp_args.py b/multimodal_exp_args.py
index e7f27f5..0d28968 100644
--- a/multimodal_exp_args.py
+++ b/multimodal_exp_args.py
@@ -159,6 +159,11 @@ class OurTrainingArguments(TrainingArguments):
         metadata={'help': 'Whether we are training in debug mode (smaller model)'}
     )
 
+    debug_dataset_size: int = field(
+        default=100,
+        metadata={'help': 'Size of the dataset in debug mode. Only used when debug_dataset = True.'}
+    )
+
     do_eval: bool = field(default=True, metadata={"help": "Whether to run eval on the dev set."})
     do_predict: bool = field(default=True, metadata={"help": "Whether to run predictions on the test set."})
 
diff --git a/multimodal_transformers/data/load_data.py b/multimodal_transformers/data/load_data.py
index c66d256..6ee03f0 100644
--- a/multimodal_transformers/data/load_data.py
+++ b/multimodal_transformers/data/load_data.py
@@ -35,7 +35,8 @@ def load_data_into_folds(data_csv_path,
                          empty_text_values=None,
                          replace_empty_text=None,
                          max_token_length=None,
-                         debug=False
+                         debug=False,
+                         debug_dataset_size=100
                          ):
     """
         Function to load tabular and text data from a specified folder into folds
@@ -114,7 +115,8 @@ def load_data_into_folds(data_csv_path,
                                                       empty_text_values,
                                                       replace_empty_text,
                                                       max_token_length,
-                                                      debug)
+                                                      debug,
+                                                      debug_dataset_size)
         train_splits.append(train)
         val_splits.append(val)
         test_splits.append(test)
@@ -136,6 +138,7 @@ def load_data_from_folder(folder_path,
                           replace_empty_text=None,
                           max_token_length=None,
                           debug=False,
+                          debug_dataset_size=100
                           ):
     """
     Function to load tabular and text data from a specified folder
@@ -205,7 +208,8 @@ def load_data_from_folder(folder_path,
                                       empty_text_values,
                                       replace_empty_text,
                                       max_token_length,
-                                      debug)
+                                      debug,
+                                      debug_dataset_size)
 
 
 def load_train_val_test_helper(train_df,
@@ -223,7 +227,8 @@ def load_train_val_test_helper(train_df,
                                empty_text_values=None,
                                replace_empty_text=None,
                                max_token_length=None,
-                               debug=False):
+                               debug=False,
+                               debug_dataset_size=100):
     if categorical_encode_type == 'ohe' or categorical_encode_type == 'binary':
         dfs = [df for df in [train_df, val_df, test_df] if df is not None]
         data_df = pd.concat(dfs, axis=0)
@@ -272,7 +277,8 @@ def load_train_val_test_helper(train_df,
                               empty_text_values,
                               replace_empty_text,
                               max_token_length,
-                              debug
+                              debug,
+                              debug_dataset_size
                               )
     test_dataset = load_data(test_df,
                              text_cols,
@@ -287,7 +293,8 @@ def load_train_val_test_helper(train_df,
                              empty_text_values,
                              replace_empty_text,
                              max_token_length,
-                             debug
+                             debug,
+                             debug_dataset_size
                              )
 
     if val_df is not None:
@@ -304,7 +311,8 @@ def load_train_val_test_helper(train_df,
                                 empty_text_values,
                                 replace_empty_text,
                                 max_token_length,
-                                debug
+                                debug,
+                                debug_dataset_size
                                 )
     else:
         val_dataset = None
@@ -326,6 +334,7 @@ def load_data(data_df,
               replace_empty_text=None,
               max_token_length=None,
               debug=False,
+              debug_dataset_size=100
               ):
     """Function to load a single dataset given a pandas DataFrame
 
@@ -370,7 +379,7 @@ def load_data(data_df,
         :obj:`tabular_torch_dataset.TorchTextDataset`: The converted dataset
     """
     if debug:
-        data_df = data_df[:100]
+        data_df = data_df[:debug_dataset_size]
     if empty_text_values is None:
         empty_text_values = ['nan', 'None']
 
diff --git a/tests/test_model.py b/tests/test_model.py
index 24443a7..f9f620b 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -23,6 +23,7 @@
 
 os.environ['COMET_MODE'] = 'DISABLED'
 
+DEBUG_DATASET_SIZE = 50
 
 CONFIGS = [
     "./tests/test_airbnb.json", 
@@ -96,6 +97,7 @@ def test_model(json_file: str, model_string: str):
         sep_text_token_str=tokenizer.sep_token if not data_args.column_info['text_col_sep_token'] else data_args.column_info['text_col_sep_token'],
         max_token_length=training_args.max_token_length,
         debug=True,
+        debug_dataset_size=DEBUG_DATASET_SIZE
     )
 
     set_seed(training_args.seed)
@@ -142,4 +144,4 @@ def test_model(json_file: str, model_string: str):
 
     # Get predictions
     test_results = trainer.predict(test_dataset=test_dataset)
-    assert test_results.predictions[0].shape == (100, num_labels)
+    assert test_results.predictions[0].shape == (DEBUG_DATASET_SIZE, num_labels)

From 517fa0d73fe30722938eef4dcb2407ec1de335c1 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 20:54:09 +0000
Subject: [PATCH 22/28] Tests: Run models for only 1 epoch.

---
 tests/test_airbnb.json    | 2 +-
 tests/test_clothing.json  | 2 +-
 tests/test_petfinder.json | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_airbnb.json b/tests/test_airbnb.json
index b96da73..be7d6a0 100644
--- a/tests/test_airbnb.json
+++ b/tests/test_airbnb.json
@@ -8,7 +8,7 @@
     "numerical_transformer_method": "quantile_normal",
     "tokenizer_name": "bert-base-multilingual-uncased",
     "use_simple_classifier": false,
-    "num_train_epochs": 5,
+    "num_train_epochs": 1,
     "overwrite_output_dir": true,
     "learning_rate": 1e-4,
     "per_device_train_batch_size": 2,
diff --git a/tests/test_clothing.json b/tests/test_clothing.json
index 6291743..0693070 100644
--- a/tests/test_clothing.json
+++ b/tests/test_clothing.json
@@ -8,7 +8,7 @@
     "numerical_transformer_method": "quantile_normal",
     "tokenizer_name": "bert-base-uncased",
     "use_simple_classifier": false,
-    "num_train_epochs": 5,
+    "num_train_epochs": 1,
     "overwrite_output_dir": true,
     "learning_rate": 1e-4,
     "per_device_train_batch_size": 2,
diff --git a/tests/test_petfinder.json b/tests/test_petfinder.json
index 3d627bc..84204b3 100644
--- a/tests/test_petfinder.json
+++ b/tests/test_petfinder.json
@@ -8,7 +8,7 @@
     "numerical_transformer_method": "quantile_normal",
     "tokenizer_name": "bert-base-multilingual-uncased",
     "use_simple_classifier": false,
-    "num_train_epochs": 5,
+    "num_train_epochs": 1,
     "overwrite_output_dir": true,
     "learning_rate": 1e-4,
     "per_device_train_batch_size": 2,

From 507e756de0ffabbce8ddaf4dbddba7267124a89b Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 20:54:52 +0000
Subject: [PATCH 23/28] Docs: Update note in test_model.py

---
 tests/test_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_model.py b/tests/test_model.py
index f9f620b..0c6a7de 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -83,7 +83,7 @@ def test_model(json_file: str, model_string: str):
     )
 
     # Load and preprocess datasets
-    # We force debug=True so we load only 100 entries
+    # We force debug=True so we load only DEBUG_DATASET_SIZE entries
     train_dataset, val_dataset, test_dataset = load_data_from_folder(
         data_args.data_path,
         data_args.column_info['text_cols'],

From 52c59ce0a678dd77e35445b066fa4d4d3719c7dd Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 21:02:40 +0000
Subject: [PATCH 24/28] Fix: Update main.py to use newly added
 debug_dataset_size arg

---
 main.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/main.py b/main.py
index a9b5f18..a892e38 100644
--- a/main.py
+++ b/main.py
@@ -83,6 +83,7 @@ def main():
             sep_text_token_str=tokenizer.sep_token if not data_args.column_info['text_col_sep_token'] else data_args.column_info['text_col_sep_token'],
             max_token_length=training_args.max_token_length,
             debug=training_args.debug_dataset,
+            debug_dataset_size=training_args.debug_dataset_size
         )
         train_datasets = [train_dataset]
         val_datasets = [val_dataset]
@@ -104,6 +105,7 @@ def main():
             data_args.column_info['text_col_sep_token'],
             max_token_length=training_args.max_token_length,
             debug=training_args.debug_dataset,
+            debug_dataset_size=training_args.debug_dataset_size
         )
     train_dataset = train_datasets[0]
 

From b84351c757b36b0e9c239ddeadb9bad1eef007ab Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 21:09:17 +0000
Subject: [PATCH 25/28] Chore: Standardize output_dir naming format across
 datasets.

---
 datasets/PetFindermy_Adoption_Prediction/train_config.json    | 2 +-
 datasets/Womens_Clothing_E-Commerce_Reviews/train_config.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/datasets/PetFindermy_Adoption_Prediction/train_config.json b/datasets/PetFindermy_Adoption_Prediction/train_config.json
index 5baaec0..0b2e5ed 100644
--- a/datasets/PetFindermy_Adoption_Prediction/train_config.json
+++ b/datasets/PetFindermy_Adoption_Prediction/train_config.json
@@ -1,5 +1,5 @@
 {
-  "output_dir": "./logs_petfinder/gating_on_cat_and_num_feats_then_sum_full_model",
+  "output_dir": "./logs_petfinder/bertmultilingual_gating_on_cat_and_num_feats_then_sum_full_model_lr_3e-3",
   "debug_dataset": false,
   "task": "classification",
   "combine_feat_method": "text_only",
diff --git a/datasets/Womens_Clothing_E-Commerce_Reviews/train_config.json b/datasets/Womens_Clothing_E-Commerce_Reviews/train_config.json
index 86a6954..7ec5615 100644
--- a/datasets/Womens_Clothing_E-Commerce_Reviews/train_config.json
+++ b/datasets/Womens_Clothing_E-Commerce_Reviews/train_config.json
@@ -1,5 +1,5 @@
 {
-  "output_dir": "./logs_clothing_review/gating_on_cat_and_num_feats_then_sum/",
+  "output_dir": "./logs_clothing_review/bertbase_gating_on_cat_and_num_feats_then_sum_full_model_lr_3e-3/",
   "debug_dataset": false,
   "task": "classification",
   "combine_feat_method": "text_only",

From e201ed945043a94e5916877fe9cee9466b9c0ea1 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Wed, 8 Mar 2023 23:00:12 +0000
Subject: [PATCH 26/28] Docs: Add Python 3.10 to setup.py

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index bbb573b..6242881 100644
--- a/setup.py
+++ b/setup.py
@@ -56,5 +56,6 @@ def get_version(rel_path: str) -> str:
         'License :: OSI Approved :: MIT License',   # Again, pick a license
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.10',
   ],
 )

From 8faabd592929cd318705209d3240100a0868c914 Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Thu, 9 Mar 2023 19:55:08 +0000
Subject: [PATCH 27/28] Fix: Set library versions to support Python >=3.7

---
 setup.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/setup.py b/setup.py
index 6242881..fb9e31c 100644
--- a/setup.py
+++ b/setup.py
@@ -21,16 +21,16 @@ def get_version(rel_path: str) -> str:
 url = 'https://github.com/georgianpartners/Multimodal-Toolkit'
 
 install_requires = [
-    'torch>=1.13.1',
     'transformers>=4.26.1',
-    'numpy>=1.21.6',
-    'tqdm>=4.64.1',
-    'scipy>=1.7.3',
-    'networkx>=2.6.3',
-    'scikit-learn>=1.0.2',
-    'pandas>=1.3.5',
-    'sacremoses>=0.0.53',
-    'pytest>=7.2.2'
+    'torch>=1.13.1',
+    'sacremoses~=0.0.53',
+    'networkx~=2.6.3',
+    'scikit-learn~=1.0.2',
+    'scipy~=1.7.3',
+    'pandas~=1.3.5',
+    'numpy~=1.21.6',
+    'tqdm~=4.64.1',
+    'pytest~=7.2.2',
 ]
 
 setup(
@@ -54,8 +54,9 @@ def get_version(rel_path: str) -> str:
         'Intended Audience :: Developers',      # Define that your audience are developers
         'Topic :: Software Development :: Build Tools',
         'License :: OSI Approved :: MIT License',   # Again, pick a license
-        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
         'Programming Language :: Python :: 3.10',
   ],
 )

From d839824833e18dc2e12855f4d6f369f8dc3f2b8f Mon Sep 17 00:00:00 2001
From: akashsaravanan-georgian <akash.saravanan@georgian.io>
Date: Thu, 9 Mar 2023 19:56:09 +0000
Subject: [PATCH 28/28] Fix: Update requirements.txt

---
 docs/requirements.txt | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 79834f1..21e82ab 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,10 +1,12 @@
 joblib==0.15.1
-numpy==1.18.5
-pandas==1.0.4
+networkx~=2.6.3
+numpy~=1.21.6
+pandas~=1.3.5
+pytest~=7.2.2
+sacremoses~=0.0.53
 scikit-image==0.17.2
-scikit-learn==0.23.1
-scipy==1.4.1
-sklearn==0.0
+scikit-learn~=1.0.2
+scipy~=1.7.3
 Sphinx==3.2.1
 sphinx-markdown-tables==0.0.15
 sphinx-rtd-theme==0.5.0
@@ -16,6 +18,6 @@ sphinxcontrib-napoleon==0.7
 sphinxcontrib-qthelp==1.0.3
 sphinxcontrib-serializinghtml==1.1.4
 threadpoolctl==2.1.0
-torch==1.5.0
-torchvision==0.6.0
-transformers==3.0.0
+torch>=1.13.1
+tqdm~=4.64.1
+transformers>=4.26.1