Skip to content

Commit

Permalink
added multigpu evaluation and prediction
Browse files Browse the repository at this point in the history
  • Loading branch information
Thilina Rajapakse committed Sep 24, 2020
1 parent 0710cf3 commit 3e5076d
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 36 deletions.
14 changes: 12 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- ClassificationModel
- NERModel
- QuestionAnsweringModel
- Seq2Seq
- T5Model
- ConvAI
- MultiModalClassificationModel
- Added multigpu prediction/eval in
- ClassificationModel
- MultiModalClassificationModel
- ConvAI
- NERModel
- QuestionAnsweringModel
- Seq2Seq
- T5Model

### Fixed

- Thread count can now be specified for MultiLabelClassificationModel.

### TODO

- Add multigpu prediction/eval
## [0.48.4] - 2020-09-23

### Fixed
Expand Down
48 changes: 36 additions & 12 deletions simpletransformers/classification/classification_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,16 @@

class ClassificationModel:
def __init__(
self, model_type, model_name, num_labels=None, weight=None, args=None, use_cuda=True, cuda_device=-1, onnx_execution_provider=None, **kwargs,
self,
model_type,
model_name,
num_labels=None,
weight=None,
args=None,
use_cuda=True,
cuda_device=-1,
onnx_execution_provider=None,
**kwargs,
):

"""
Expand Down Expand Up @@ -818,11 +827,15 @@ def evaluate(
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

if args.n_gpu > 1:
model = torch.nn.DataParallel(model)

eval_loss = 0.0
nb_eval_steps = 0
preds = None
out_label_ids = None
model.eval()

if self.args.fp16:
from torch.cuda import amp

Expand Down Expand Up @@ -1096,8 +1109,10 @@ def predict(self, to_predict, multi_label=False):
preds = None
out_label_ids = None

if self.args.onnx:
model_inputs = self.tokenizer.batch_encode_plus(to_predict, return_tensors="pt", padding=True, truncation=True)
if not multi_label and self.args.onnx:
model_inputs = self.tokenizer.batch_encode_plus(
to_predict, return_tensors="pt", padding=True, truncation=True
)

for input_ids, attention_mask in zip(model_inputs["input_ids"], model_inputs["attention_mask"]):
input_ids = input_ids.unsqueeze(0).detach().cpu().numpy()
Expand All @@ -1119,6 +1134,9 @@ def predict(self, to_predict, multi_label=False):
self._move_model_to_device()
dummy_label = 0 if not self.args.labels_map else next(iter(self.args.labels_map.keys()))

if args.n_gpu > 1:
model = torch.nn.DataParallel(model)

if multi_label:
if isinstance(to_predict[0], list):
eval_examples = [
Expand All @@ -1132,7 +1150,9 @@ def predict(self, to_predict, multi_label=False):
]
else:
if isinstance(to_predict[0], list):
eval_examples = [InputExample(i, text[0], text[1], dummy_label) for i, text in enumerate(to_predict)]
eval_examples = [
InputExample(i, text[0], text[1], dummy_label) for i, text in enumerate(to_predict)
]
else:
eval_examples = [InputExample(i, text, None, dummy_label) for i, text in enumerate(to_predict)]
if args.sliding_window:
Expand Down Expand Up @@ -1175,7 +1195,9 @@ def predict(self, to_predict, multi_label=False):
if preds is None:
preds = logits.detach().cpu().numpy()
out_label_ids = inputs["labels"].detach().cpu().numpy()
all_layer_hidden_states = np.array([state.detach().cpu().numpy() for state in layer_hidden_states])
all_layer_hidden_states = np.array(
[state.detach().cpu().numpy() for state in layer_hidden_states]
)
all_embedding_outputs = embedding_outputs.detach().cpu().numpy()
else:
preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
Expand Down Expand Up @@ -1272,7 +1294,7 @@ def convert_to_onnx(self, output_dir=None, set_onnx_arg=True):
Args:
output_dir (str, optional): If specified, ONNX model will be saved to output_dir (else args.output_dir will be used). Defaults to None.
set_onnx_arg (bool, optional): Updates the model args to set onnx=True. Defaults to True.
""" # noqa
""" # noqa
if not output_dir:
output_dir = os.path.join(self.args.output_dir, "onnx")
os.makedirs(output_dir, exist_ok=True)
Expand All @@ -1288,12 +1310,14 @@ def convert_to_onnx(self, output_dir=None, set_onnx_arg=True):
with tempfile.TemporaryDirectory() as temp_dir:
self.save_model(output_dir=temp_dir, model=self.model)

convert(framework="pt",
model=temp_dir,
tokenizer=self.tokenizer,
output=Path(onnx_model_name),
pipeline_name="sentiment-analysis",
opset=11)
convert(
framework="pt",
model=temp_dir,
tokenizer=self.tokenizer,
output=Path(onnx_model_name),
pipeline_name="sentiment-analysis",
opset=11,
)

self.args.onnx = True
self.tokenizer.save_pretrained(output_dir)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,12 @@ def evaluate(
out_label_ids = None
model.eval()

if args.n_gpu > 1:
model = torch.nn.DataParallel(model)

if args.fp16:
from torch.cuda import amp

for batch in tqdm(eval_dataloader, disable=args.silent or silent, desc="Running Evaluation"):
batch = tuple(t.to(device) for t in batch)
labels = batch[5]
Expand Down Expand Up @@ -1018,14 +1024,24 @@ def predict(self, to_predict, image_path, image_type_extension=None):
preds = None
out_label_ids = None

if args.n_gpu > 1:
model = torch.nn.DataParallel(model)

if args.fp16:
from torch.cuda import amp

for batch in tqdm(eval_dataloader, disable=args.silent, desc="Running Prediction"):
batch = tuple(t.to(device) for t in batch)
labels = batch[5]
with torch.no_grad():
inputs = self._get_inputs_dict(batch)

outputs = model(**inputs)
logits = outputs[0] # Different from default behaviour
if self.args.fp16:
with amp.autocast():
outputs = model(**inputs)
logits = outputs[0] # Different from default behaviour
else:
outputs = model(**inputs)
logits = outputs[0] # Different from default behaviour
tmp_eval_loss = self.criterion(logits, labels)

eval_loss += tmp_eval_loss.mean().item()
Expand Down
34 changes: 31 additions & 3 deletions simpletransformers/conv_ai/conv_ai_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,13 +587,27 @@ def evaluate(self, eval_file, output_dir, verbose=True, silent=False, **kwargs):
}
model.eval()

if args.n_gpu > 1:
model = torch.nn.DataParallel(model)

if args.fp16:
from torch.cuda import amp

for batch in tqdm(eval_dataloader, disable=args.silent or silent, desc="Running Evaluation"):
batch = tuple(t.to(device) for t in batch)

with torch.no_grad():
input_ids, mc_token_ids, lm_labels, mc_labels, token_type_ids = batch

lm_logits, mc_logits, *_ = model(input_ids, token_type_ids=token_type_ids, mc_token_ids=mc_token_ids,)
if args.fp16:
with amp.autocast():
lm_logits, mc_logits, *_ = model(
input_ids, token_type_ids=token_type_ids, mc_token_ids=mc_token_ids,
)
else:
lm_logits, mc_logits, *_ = model(
input_ids, token_type_ids=token_type_ids, mc_token_ids=mc_token_ids,
)
# model outputs are always tuple in pytorch-transformers (see doc)

lm_logits_flat_shifted = lm_logits[..., :-1, :].contiguous().view(-1, lm_logits.size(-1))
Expand Down Expand Up @@ -739,6 +753,9 @@ def interact(self, personality=None):
tokenizer = self.tokenizer
process_count = self.args.process_count

if self.args.fp16:
from torch.cuda import amp

self._move_model_to_device()

if not personality:
Expand All @@ -764,7 +781,11 @@ def interact(self, personality=None):
raw_text = input(">>> ")
history.append(tokenizer.encode(raw_text))
with torch.no_grad():
out_ids = self.sample_sequence(personality, history, tokenizer, model, args)
if args.fp16:
with amp.autocast():
out_ids = self.sample_sequence(personality, history, tokenizer, model, args)
else:
out_ids = self.sample_sequence(personality, history, tokenizer, model, args)
history.append(out_ids)
history = history[-(2 * args.max_history + 1) :]
out_text = tokenizer.decode(out_ids, skip_special_tokens=True)
Expand All @@ -791,6 +812,9 @@ def interact_single(self, message, history, personality=None, encode_history=Tru
tokenizer = self.tokenizer
process_count = self.args.process_count

if self.args.fp16:
from torch.cuda import amp

self._move_model_to_device()

if not personality:
Expand All @@ -813,7 +837,11 @@ def interact_single(self, message, history, personality=None, encode_history=Tru
history = [tokenizer.encode(sentence) for sentence in history]
history.append(tokenizer.encode(message))
with torch.no_grad():
out_ids = self.sample_sequence(personality, history, tokenizer, model, args)
if args.fp16:
with amp.autocast():
out_ids = self.sample_sequence(personality, history, tokenizer, model, args)
else:
out_ids = self.sample_sequence(personality, history, tokenizer, model, args)
out_text = tokenizer.decode(out_ids, skip_special_tokens=True)

if encode_history:
Expand Down
41 changes: 29 additions & 12 deletions simpletransformers/ner/ner_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,15 @@

class NERModel:
def __init__(
self, model_type, model_name, labels=None, args=None, use_cuda=True, cuda_device=-1, onnx_execution_provider=None, **kwargs,
self,
model_type,
model_name,
labels=None,
args=None,
use_cuda=True,
cuda_device=-1,
onnx_execution_provider=None,
**kwargs,
):
"""
Initializes a NERModel
Expand Down Expand Up @@ -717,6 +725,9 @@ def evaluate(self, eval_dataset, output_dir, verbose=True, silent=False, wandb_l
out_label_ids = None
model.eval()

if args.n_gpu > 1:
model = torch.nn.DataParallel(model)

if self.args.fp16:
from torch.cuda import amp

Expand Down Expand Up @@ -869,7 +880,9 @@ def predict(self, to_predict, split_on_space=True):
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

if self.args.onnx:
model_inputs = self.tokenizer.batch_encode_plus(to_predict, return_tensors="pt", padding=True, truncation=True)
model_inputs = self.tokenizer.batch_encode_plus(
to_predict, return_tensors="pt", padding=True, truncation=True
)

for input_ids, attention_mask in zip(model_inputs["input_ids"], model_inputs["attention_mask"]):
input_ids = input_ids.unsqueeze(0).detach().cpu().numpy()
Expand All @@ -886,9 +899,7 @@ def predict(self, to_predict, split_on_space=True):
else:
preds = np.append(preds, output[0], axis=0)
out_input_ids = np.append(out_input_ids, inputs_onnx["input_ids"], axis=0)
out_attention_mask = np.append(
out_attention_mask, inputs_onnx["attention_mask"], axis=0,
)
out_attention_mask = np.append(out_attention_mask, inputs_onnx["attention_mask"], axis=0,)
out_label_ids = np.zeros_like(out_input_ids)
else:
self._move_model_to_device()
Expand All @@ -898,6 +909,10 @@ def predict(self, to_predict, split_on_space=True):
preds = None
out_label_ids = None
model.eval()

if args.n_gpu > 1:
model = torch.nn.DataParallel(model)

if self.args.fp16:
from torch.cuda import amp

Expand Down Expand Up @@ -1114,7 +1129,7 @@ def convert_to_onnx(self, output_dir=None, set_onnx_arg=True):
Args:
output_dir (str, optional): If specified, ONNX model will be saved to output_dir (else args.output_dir will be used). Defaults to None.
set_onnx_arg (bool, optional): Updates the model args to set onnx=True. Defaults to True.
""" # noqa
""" # noqa
if not output_dir:
output_dir = os.path.join(self.args.output_dir, "onnx")
os.makedirs(output_dir, exist_ok=True)
Expand All @@ -1130,12 +1145,14 @@ def convert_to_onnx(self, output_dir=None, set_onnx_arg=True):
with tempfile.TemporaryDirectory() as temp_dir:
self.save_model(output_dir=temp_dir, model=self.model)

convert(framework="pt",
model=temp_dir,
tokenizer=self.tokenizer,
output=Path(onnx_model_name),
pipeline_name="ner",
opset=11)
convert(
framework="pt",
model=temp_dir,
tokenizer=self.tokenizer,
output=Path(onnx_model_name),
pipeline_name="ner",
opset=11,
)

self.args.onnx = True
self.tokenizer.save_pretrained(output_dir)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,9 @@ def evaluate(self, eval_data, output_dir, verbose_logging=False):
nb_eval_steps = 0
model.eval()

if args.n_gpu > 1:
model = torch.nn.DataParallel(model)

if self.args.fp16:
from torch.cuda import amp

Expand Down Expand Up @@ -899,6 +902,10 @@ def predict(self, to_predict, n_best_size=None):
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

model.eval()

if args.n_gpu > 1:
model = torch.nn.DataParallel(model)

if self.args.fp16:
from torch.cuda import amp

Expand Down
15 changes: 13 additions & 2 deletions simpletransformers/seq2seq/seq2seq_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,13 +751,24 @@ def evaluate(self, eval_dataset, output_dir, verbose=True, silent=False, **kwarg
nb_eval_steps = 0
model.eval()

if args.n_gpu > 1:
model = torch.nn.DataParallel(model)

if self.args.fp16:
from torch.cuda import amp

for batch in tqdm(eval_dataloader, disable=args.silent or silent, desc="Running Evaluation"):
# batch = tuple(t.to(device) for t in batch)

inputs = self._get_inputs_dict(batch)
with torch.no_grad():
outputs = model(**inputs)
loss = outputs[0]
if self.args.fp16:
with amp.autocast():
outputs = model(**inputs)
loss = outputs[0]
else:
outputs = model(**inputs)
loss = outputs[0]
eval_loss += loss.mean().item()
nb_eval_steps += 1

Expand Down
Loading

0 comments on commit 3e5076d

Please sign in to comment.