diff --git a/feat/FastDetector.py b/feat/FastDetector.py
deleted file mode 100644
index 95be7069..00000000
--- a/feat/FastDetector.py
+++ /dev/null
@@ -1,669 +0,0 @@
-import json
-from tqdm import tqdm
-import numpy as np
-import pandas as pd
-from huggingface_hub import hf_hub_download, PyTorchModelHubMixin
-from collections import OrderedDict
-
-from feat.emo_detectors.ResMaskNet.resmasknet_test import (
-    ResMasking,
-)
-from feat.identity_detectors.facenet.facenet_model import InceptionResnetV1
-from feat.facepose_detectors.img2pose.deps.models import (
-    FasterDoFRCNN,
-    postprocess_img2pose,
-)
-from feat.au_detectors.StatLearning.SL_test import XGBClassifier, SVMClassifier
-from feat.emo_detectors.StatLearning.EmoSL_test import EmoSVMClassifier
-from feat.landmark_detectors.mobilefacenet_test import MobileFaceNet
-from feat.landmark_detectors.basenet_test import MobileNet_GDConv
-from feat.landmark_detectors.pfld_compressed_test import PFLDInference
-from feat.pretrained import load_model_weights, AU_LANDMARK_MAP
-from feat.utils import (
-    set_torch_device,
-    openface_2d_landmark_columns,
-    FEAT_EMOTION_COLUMNS,
-    FEAT_FACEBOX_COLUMNS,
-    FEAT_FACEPOSE_COLUMNS_6D,
-    FEAT_IDENTITY_COLUMNS,
-)
-from feat.utils.io import get_resource_path
-from feat.utils.image_operations import (
-    convert_image_to_tensor,
-    extract_face_from_bbox_torch,
-    inverse_transform_landmarks_torch,
-    extract_hog_features,
-    convert_bbox_output,
-    compute_original_image_size,
-)
-from feat.data import Fex, ImageDataset, TensorDataset, VideoDataset
-from skops.io import load, get_untrusted_types
-from safetensors.torch import load_file
-import torch
-import torch.nn as nn
-from torch.utils.data import DataLoader
-from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
-from torchvision.transforms import Compose, Normalize
-import sys
-
-sys.modules["__main__"].__dict__["XGBClassifier"] = XGBClassifier
-sys.modules["__main__"].__dict__["SVMClassifier"] = SVMClassifier
-sys.modules["__main__"].__dict__["EmoSVMClassifier"] = EmoSVMClassifier
-
-
-class FastDetector(nn.Module, PyTorchModelHubMixin):
-    def __init__(
-        self,
-        landmark_model="mobilefacenet",
-        au_model="xgb",
-        emotion_model="resmasknet",
-        identity_model="facenet",
-        device="cpu",
-    ):
-        super(FastDetector, self).__init__()
-
-        self.info = dict(
-            face_model="img2pose",
-            landmark_model=None,
-            emotion_model=None,
-            facepose_model="img2pose",
-            au_model=None,
-            identity_model=None,
-        )
-        self.device = set_torch_device(device)
-
-        # Load Model Configurations
-        facepose_config_file = hf_hub_download(
-            repo_id="py-feat/img2pose",
-            filename="config.json",
-            cache_dir=get_resource_path(),
-        )
-        with open(facepose_config_file, "r") as f:
-            facepose_config = json.load(f)
-
-        # Initialize img2pose
-        backbone = resnet_fpn_backbone(backbone_name="resnet18", weights=None)
-        backbone.eval()
-        backbone.to(self.device)
-        self.facepose_detector = FasterDoFRCNN(
-            backbone=backbone,
-            num_classes=2,
-            min_size=facepose_config["min_size"],
-            max_size=facepose_config["max_size"],
-            pose_mean=torch.tensor(facepose_config["pose_mean"]),
-            pose_stddev=torch.tensor(facepose_config["pose_stddev"]),
-            threed_68_points=torch.tensor(facepose_config["threed_points"]),
-            rpn_pre_nms_top_n_test=facepose_config["rpn_pre_nms_top_n_test"],
-            rpn_post_nms_top_n_test=facepose_config["rpn_post_nms_top_n_test"],
-            bbox_x_factor=facepose_config["bbox_x_factor"],
-            bbox_y_factor=facepose_config["bbox_y_factor"],
-            expand_forehead=facepose_config["expand_forehead"],
-        )
-        facepose_model_file = hf_hub_download(
-            repo_id="py-feat/img2pose",
-            filename="model.safetensors",
-            cache_dir=get_resource_path(),
-        )
-        facepose_checkpoint = load_file(facepose_model_file)
-        self.facepose_detector.load_state_dict(facepose_checkpoint, load_model_weights)
-        self.facepose_detector.eval()
-        self.facepose_detector.to(self.device)
-        # self.facepose_detector = torch.compile(self.facepose_detector)
-
-        # Initialize Landmark Detector
-        self.info["landmark_model"] = landmark_model
-        if landmark_model is not None:
-            if landmark_model == "mobilefacenet":
-                self.face_size = 112
-                self.landmark_detector = MobileFaceNet(
-                    [self.face_size, self.face_size], 136, device=self.device
-                )
-                landmark_model_file = hf_hub_download(
-                    repo_id="py-feat/mobilefacenet",
-                    filename="mobilefacenet_model_best.pth.tar",
-                    cache_dir=get_resource_path(),
-                )
-                landmark_state_dict = torch.load(
-                    landmark_model_file, map_location=self.device, weights_only=True
-                )["state_dict"]  # Ensure Model weights are Float32 for MPS
-            elif landmark_model == "mobilenet":
-                self.face_size = 224
-                self.landmark_detector = MobileNet_GDConv(136)
-                landmark_model_file = hf_hub_download(
-                    repo_id="py-feat/mobilenet",
-                    filename="mobilenet_224_model_best_gdconv_external.pth.tar",
-                    cache_dir=get_resource_path(),
-                )
-                mobilenet_state_dict = torch.load(
-                    landmark_model_file, map_location=self.device, weights_only=True
-                )["state_dict"]  # Ensure Model weights are Float32 for MPS
-                landmark_state_dict = OrderedDict()
-                for k, v in mobilenet_state_dict.items():
-                    if "module." in k:
-                        k = k.replace("module.", "")
-                    landmark_state_dict[k] = v
-            elif landmark_model == "pfld":
-                self.face_size = 112
-                self.landmark_detector = PFLDInference()
-                landmark_model_file = hf_hub_download(
-                    repo_id="py-feat/pfld",
-                    filename="pfld_model_best.pth.tar",
-                    cache_dir=get_resource_path(),
-                )
-                landmark_state_dict = torch.load(
-                    landmark_model_file, map_location=self.device, weights_only=True
-                )["state_dict"]  # Ensure Model weights are Float32 for MPS
-            else:
-                raise ValueError("{landmark_model} is not currently supported.")
-            self.landmark_detector.load_state_dict(landmark_state_dict)
-            self.landmark_detector.eval()
-            self.landmark_detector.to(self.device)
-            # self.landmark_detector = torch.compile(self.landmark_detector)
-        else:
-            self.landmark_detector = None
-
-        # Initialize AU Detector
-        self.info["au_model"] = au_model
-        if au_model is not None:
-            if self.landmark_detector is not None:
-                if au_model == "xgb":
-                    self.au_detector = XGBClassifier()
-                    au_model_path = hf_hub_download(
-                        repo_id="py-feat/xgb_au",
-                        filename="xgb_au_classifier.skops",
-                        cache_dir=get_resource_path(),
-                    )
-
-                elif au_model == "svm":
-                    self.au_detector = SVMClassifier()
-                    au_model_path = hf_hub_download(
-                        repo_id="py-feat/svm_au",
-                        filename="svm_au_classifier.skops",
-                        cache_dir=get_resource_path(),
-                    )
-                else:
-                    raise ValueError("{au_model} is not currently supported.")
-
-                au_unknown_types = get_untrusted_types(file=au_model_path)
-                loaded_au_model = load(au_model_path, trusted=au_unknown_types)
-                self.au_detector.load_weights(
-                    scaler_upper=loaded_au_model.scaler_upper,
-                    pca_model_upper=loaded_au_model.pca_model_upper,
-                    scaler_lower=loaded_au_model.scaler_lower,
-                    pca_model_lower=loaded_au_model.pca_model_lower,
-                    scaler_full=loaded_au_model.scaler_full,
-                    pca_model_full=loaded_au_model.pca_model_full,
-                    classifiers=loaded_au_model.classifiers,
-                )
-            else:
-                raise ValueError(
-                    "Landmark Detector is required for AU Detection with {au_model}."
-                )
-        else:
-            self.au_detector = None
-
-        # Initialize Emotion Detector
-        self.info["emotion_model"] = emotion_model
-        if emotion_model is not None:
-            if emotion_model == "resmasknet":
-                emotion_config_file = hf_hub_download(
-                    repo_id="py-feat/resmasknet",
-                    filename="config.json",
-                    cache_dir=get_resource_path(),
-                )
-                with open(emotion_config_file, "r") as f:
-                    emotion_config = json.load(f)
-
-                self.emotion_detector = ResMasking(
-                    "", in_channels=emotion_config["in_channels"]
-                )
-                self.emotion_detector.fc = nn.Sequential(
-                    nn.Dropout(0.4), nn.Linear(512, emotion_config["num_classes"])
-                )
-                emotion_model_file = hf_hub_download(
-                    repo_id="py-feat/resmasknet",
-                    filename="ResMaskNet_Z_resmasking_dropout1_rot30.pth",
-                    cache_dir=get_resource_path(),
-                )
-                emotion_checkpoint = torch.load(
-                    emotion_model_file, map_location=device, weights_only=True
-                )["net"]
-                self.emotion_detector.load_state_dict(emotion_checkpoint)
-                self.emotion_detector.eval()
-                self.emotion_detector.to(self.device)
-                # self.emotion_detector = torch.compile(self.emotion_detector)
-            elif emotion_model == "svm":
-                if self.landmark_detector is not None:
-                    self.emotion_detector = EmoSVMClassifier()
-                    emotion_model_path = hf_hub_download(
-                        repo_id="py-feat/svm_emo",
-                        filename="svm_emo_classifier.skops",
-                        cache_dir=get_resource_path(),
-                    )
-                    emotion_unknown_types = get_untrusted_types(file=emotion_model_path)
-                    loaded_emotion_model = load(
-                        emotion_model_path, trusted=emotion_unknown_types
-                    )
-                    self.emotion_detector.load_weights(
-                        scaler_full=loaded_emotion_model.scaler_full,
-                        pca_model_full=loaded_emotion_model.pca_model_full,
-                        classifiers=loaded_emotion_model.classifiers,
-                    )
-                else:
-                    raise ValueError(
-                        "Landmark Detector is required for Emotion Detection with {emotion_model}."
-                    )
-
-            else:
-                raise ValueError("{emotion_model} is not currently supported.")
-        else:
-            self.emotion_detector = None
-
-        # Initialize Identity Detecctor -  facenet
-        self.info["identity_model"] = identity_model
-        if identity_model is not None:
-            if identity_model == "facenet":
-                self.identity_detector = InceptionResnetV1(
-                    pretrained=None,
-                    classify=False,
-                    num_classes=None,
-                    dropout_prob=0.6,
-                    device=self.device,
-                )
-                self.identity_detector.logits = nn.Linear(512, 8631)
-                identity_model_file = hf_hub_download(
-                    repo_id="py-feat/facenet",
-                    filename="facenet_20180402_114759_vggface2.pth",
-                    cache_dir=get_resource_path(),
-                )
-                self.identity_detector.load_state_dict(
-                    torch.load(
-                        identity_model_file, map_location=device, weights_only=True
-                    )
-                )
-                self.identity_detector.eval()
-                self.identity_detector.to(self.device)
-                # self.identity_detector = torch.compile(self.identity_detector)
-            else:
-                raise ValueError("{identity_model} is not currently supported.")
-        else:
-            self.identity_detector = None
-
-    @torch.inference_mode()
-    def detect_faces(self, images, face_size=112, face_detection_threshold=0.5):
-        """
-        detect faces and poses in a batch of images using img2pose
-
-        Args:
-            img (torch.Tensor): Tensor of shape (B, C, H, W) representing the images
-            face_size (int): Output size to resize face after cropping.
-
-        Returns:
-            Fex: Prediction results dataframe
-        """
-
-        # img2pose
-        frames = convert_image_to_tensor(images, img_type="float32") / 255.0
-        frames.to(self.device)
-
-        batch_results = []
-        for i in range(frames.size(0)):
-            single_frame = frames[i, ...].unsqueeze(0)  # Extract single image from batch
-            img2pose_output = self.facepose_detector(single_frame.to(self.device))
-            img2pose_output = postprocess_img2pose(
-                img2pose_output[0], detection_threshold=face_detection_threshold
-            )
-            bbox = img2pose_output["boxes"]
-            poses = img2pose_output["dofs"]
-            facescores = img2pose_output["scores"]
-
-            # Extract faces from bbox
-            if bbox.numel() != 0:
-                extracted_faces, new_bbox = extract_face_from_bbox_torch(
-                    single_frame, bbox, face_size=face_size
-                )
-            else:  # No Face Detected - let's test of nans will work
-                extracted_faces = torch.zeros((1, 3, face_size, face_size))
-                # bbox = torch.zeros((1,4))
-                # new_bbox = torch.zeros((1,4))
-                # facescores = torch.zeros((1))
-                # poses = torch.zeros((1,6))
-                # extracted_faces = torch.full((1, 3, face_size, face_size), float('nan'))
-                bbox = torch.full((1, 4), float("nan"))
-                new_bbox = torch.full((1, 4), float("nan"))
-                facescores = torch.zeros((1))
-                poses = torch.full((1, 6), float("nan"))
-
-            frame_results = {
-                "face_id": i,
-                "faces": extracted_faces,
-                "boxes": bbox,
-                "new_boxes": new_bbox,
-                "poses": poses,
-                "scores": facescores,
-            }
-
-            # Extract Faces separately for Resmasknet
-            if self.info["emotion_model"] == "resmasknet":
-                if torch.all(torch.isnan(bbox)):  # No Face Detected
-                    frame_results["resmasknet_faces"] = torch.full(
-                        (1, 3, 224, 224), float("nan")
-                    )
-                    # frame_results["resmasknet_faces"] = torch.zeros((1, 3, 224, 224))
-                else:
-                    resmasknet_faces, _ = extract_face_from_bbox_torch(
-                        single_frame, bbox, expand_bbox=1.1, face_size=224
-                    )
-                    frame_results["resmasknet_faces"] = resmasknet_faces
-
-            batch_results.append(frame_results)
-
-        return batch_results
-
-    @torch.inference_mode()
-    def forward(self, faces_data):
-        """
-        Run Model Inference on detected faces.
-
-        Args:
-            faces_data (list of dict): Detected faces and associated data from `detect_faces`.
-
-        Returns:
-            Fex: Prediction results dataframe
-        """
-
-        extracted_faces = torch.cat([face["faces"] for face in faces_data], dim=0)
-        new_bboxes = torch.cat([face["new_boxes"] for face in faces_data], dim=0)
-        n_faces = extracted_faces.shape[0]
-
-        if self.landmark_detector is not None:
-            if self.info["landmark_model"].lower() == "mobilenet":
-                extracted_faces = Compose(
-                    [Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
-                )(extracted_faces)
-                landmarks = self.landmark_detector.forward(
-                    extracted_faces.to(self.device)
-                )
-            if self.info["landmark_model"].lower() == "mobilefacenet":
-                landmarks = self.landmark_detector.forward(
-                    extracted_faces.to(self.device)
-                )[0]
-            else:
-                landmarks = self.landmark_detector.forward(
-                    extracted_faces.to(self.device)
-                )
-            new_landmarks = inverse_transform_landmarks_torch(landmarks, new_bboxes)
-        else:
-            new_landmarks = torch.full((n_faces, 136), float("nan"))
-
-        if self.emotion_detector is not None:
-            if self.info["emotion_model"] == "resmasknet":
-                resmasknet_faces = torch.cat(
-                    [face["resmasknet_faces"] for face in faces_data], dim=0
-                )
-                emotions = self.emotion_detector.forward(resmasknet_faces.to(self.device))
-                emotions = torch.softmax(emotions, 1)
-            elif self.info["emotion_model"] == "svm":
-                hog_features, emo_new_landmarks = extract_hog_features(
-                    extracted_faces, landmarks
-                )
-                emotions = self.emotion_detector.detect_emo(
-                    frame=hog_features, landmarks=[emo_new_landmarks]
-                )
-                emotions = torch.tensor(emotions)
-        else:
-            emotions = torch.full((n_faces, 7), float("nan"))
-
-        if self.identity_detector is not None:
-            identity_embeddings = self.identity_detector.forward(
-                extracted_faces.to(self.device)
-            )
-        else:
-            identity_embeddings = torch.full((n_faces, 512), float("nan"))
-
-        if self.au_detector is not None:
-            hog_features, au_new_landmarks = extract_hog_features(
-                extracted_faces, landmarks
-            )
-            aus = self.au_detector.detect_au(
-                frame=hog_features, landmarks=[au_new_landmarks]
-            )
-        else:
-            aus = torch.full((n_faces, 20), float("nan"))
-
-        # Create Fex Output Representation
-        bboxes = torch.cat(
-            [
-                convert_bbox_output(
-                    face_output["new_boxes"].to(self.device),
-                    face_output["scores"].to(self.device),
-                )
-                for face_output in faces_data
-            ],
-            dim=0,
-        )
-        feat_faceboxes = pd.DataFrame(
-            bboxes.cpu().detach().numpy(),
-            columns=FEAT_FACEBOX_COLUMNS,
-        )
-
-        poses = torch.cat(
-            [face_output["poses"].to(self.device) for face_output in faces_data], dim=0
-        )
-        feat_poses = pd.DataFrame(
-            poses.cpu().detach().numpy(), columns=FEAT_FACEPOSE_COLUMNS_6D
-        )
-
-        reshape_landmarks = new_landmarks.reshape(new_landmarks.shape[0], 68, 2)
-        reordered_landmarks = torch.cat(
-            [reshape_landmarks[:, :, 0], reshape_landmarks[:, :, 1]], dim=1
-        )
-        feat_landmarks = pd.DataFrame(
-            reordered_landmarks.cpu().detach().numpy(),
-            columns=openface_2d_landmark_columns,
-        )
-
-        feat_aus = pd.DataFrame(aus, columns=AU_LANDMARK_MAP["Feat"])
-
-        feat_emotions = pd.DataFrame(
-            emotions.cpu().detach().numpy(), columns=FEAT_EMOTION_COLUMNS
-        )
-
-        feat_identities = pd.DataFrame(
-            identity_embeddings.cpu().detach().numpy(), columns=FEAT_IDENTITY_COLUMNS[1:]
-        )
-
-        return Fex(
-            pd.concat(
-                [
-                    feat_faceboxes,
-                    feat_landmarks,
-                    feat_poses,
-                    feat_aus,
-                    feat_emotions,
-                    feat_identities,
-                ],
-                axis=1,
-            ),
-            au_columns=AU_LANDMARK_MAP["Feat"],
-            emotion_columns=FEAT_EMOTION_COLUMNS,
-            facebox_columns=FEAT_FACEBOX_COLUMNS,
-            landmark_columns=openface_2d_landmark_columns,
-            facepose_columns=FEAT_FACEPOSE_COLUMNS_6D,
-            identity_columns=FEAT_IDENTITY_COLUMNS[1:],
-            detector="Feat",
-            face_model=self.info["face_model"],
-            landmark_model=self.info["landmark_model"],
-            au_model=self.info["au_model"],
-            emotion_model=self.info["emotion_model"],
-            facepose_model=self.info["facepose_model"],
-            identity_model=self.info["identity_model"],
-        )
-
-    def detect(
-        self,
-        inputs,
-        data_type="image",
-        output_size=None,
-        batch_size=1,
-        num_workers=0,
-        pin_memory=False,
-        face_identity_threshold=0.8,
-        face_detection_threshold=0.5,
-        skip_frames=None,
-        progress_bar=True,
-        **kwargs,
-    ):
-        """
-        Detects FEX from one or more image files.
-
-        Args:
-            inputs (list of str, torch.Tensor): Path to a list of paths to image files or torch.Tensor of images (B, C, H, W)
-            data_type (str): type of data to be processed; Default 'image' ['image', 'tensor', 'video']
-            output_size (int): image size to rescale all image preserving aspect ratio.
-            batch_size (int): how many batches of images you want to run at one shot.
-            num_workers (int): how many subprocesses to use for data loading.
-            pin_memory (bool): If ``True``, the data loader will copy Tensors into CUDA pinned memory before returning them.
-            face_identity_threshold (float): value between 0-1 to determine similarity of person using face identity embeddings; Default >= 0.8
-            face_detection_threshold (float): value between 0-1 to determine if a face was detected; Default >= 0.5
-            skip_frames (int or None): number of frames to skip to speed up inference (video only); Default None
-            progress_bar (bool): Whether to show the tqdm progress bar. Default is True.
-            **kwargs: additional detector-specific kwargs
-
-        Returns:
-            pd.DataFrame: Concatenated results for all images in the batch
-        """
-
-        if data_type.lower() == "image":
-            data_loader = DataLoader(
-                ImageDataset(
-                    inputs,
-                    output_size=output_size,
-                    preserve_aspect_ratio=True,
-                    padding=True,
-                ),
-                num_workers=num_workers,
-                batch_size=batch_size,
-                pin_memory=pin_memory,
-                shuffle=False,
-            )
-        elif data_type.lower() == "tensor":
-            data_loader = DataLoader(
-                TensorDataset(inputs),
-                batch_size=batch_size,
-                shuffle=False,
-                num_workers=num_workers,
-                pin_memory=pin_memory,
-            )
-        elif data_type.lower() == "video":
-            dataset = VideoDataset(
-                inputs, skip_frames=skip_frames, output_size=output_size
-            )
-            data_loader = DataLoader(
-                dataset,
-                num_workers=num_workers,
-                batch_size=batch_size,
-                pin_memory=pin_memory,
-                shuffle=False,
-            )
-
-        data_iterator = tqdm(data_loader) if progress_bar else data_loader
-
-        batch_output = []
-        frame_counter = 0
-
-        try:
-            _ = next(enumerate(tqdm(data_loader)))
-        except RuntimeError as e:
-            raise ValueError(
-                f"When using `batch_size > 1`, all images must either have the same dimension or `output_size` should be something other than `None` to pad images prior to processing\n{e}"
-            )
-
-        for batch_id, batch_data in enumerate(data_iterator):
-            faces_data = self.detect_faces(
-                batch_data["Image"],
-                face_size=self.face_size if hasattr(self, "face_size") else 112,
-                face_detection_threshold=face_detection_threshold,
-            )
-            batch_results = self.forward(faces_data)
-
-            # Create metadata for each frame
-            file_names = []
-            frame_ids = []
-            for i, face in enumerate(faces_data):
-                n_faces = len(face["scores"])
-                if data_type.lower() == "video":
-                    current_frame_id = batch_data["Frame"].detach().numpy()[i]
-                else:
-                    current_frame_id = frame_counter + i
-                frame_ids.append(np.repeat(current_frame_id, n_faces))
-                file_names.append(np.repeat(batch_data["FileName"][i], n_faces))
-            batch_results["input"] = np.concatenate(file_names)
-            batch_results["frame"] = np.concatenate(frame_ids)
-
-            # Invert the face boxes and landmarks based on the padded output size
-            for j, frame_idx in enumerate(batch_results["frame"].unique()):
-                batch_results.loc[
-                    batch_results["frame"] == frame_idx, ["FrameHeight", "FrameWidth"]
-                ] = (
-                    compute_original_image_size(batch_data)[j, :]
-                    .repeat(
-                        len(
-                            batch_results.loc[
-                                batch_results["frame"] == frame_idx, "frame"
-                            ]
-                        ),
-                        1,
-                    )
-                    .numpy()
-                )
-                batch_results.loc[batch_results["frame"] == frame_idx, "FaceRectX"] = (
-                    batch_results.loc[batch_results["frame"] == frame_idx, "FaceRectX"]
-                    - batch_data["Padding"]["Left"].detach().numpy()[j]
-                ) / batch_data["Scale"].detach().numpy()[j]
-                batch_results.loc[batch_results["frame"] == frame_idx, "FaceRectY"] = (
-                    batch_results.loc[batch_results["frame"] == frame_idx, "FaceRectY"]
-                    - batch_data["Padding"]["Top"].detach().numpy()[j]
-                ) / batch_data["Scale"].detach().numpy()[j]
-                batch_results.loc[
-                    batch_results["frame"] == frame_idx, "FaceRectWidth"
-                ] = (
-                    (
-                        batch_results.loc[
-                            batch_results["frame"] == frame_idx, "FaceRectWidth"
-                        ]
-                    )
-                    / batch_data["Scale"].detach().numpy()[j]
-                )
-                batch_results.loc[
-                    batch_results["frame"] == frame_idx, "FaceRectHeight"
-                ] = (
-                    (
-                        batch_results.loc[
-                            batch_results["frame"] == frame_idx, "FaceRectHeight"
-                        ]
-                    )
-                    / batch_data["Scale"].detach().numpy()[j]
-                )
-
-                for i in range(68):
-                    batch_results.loc[batch_results["frame"] == frame_idx, f"x_{i}"] = (
-                        batch_results.loc[batch_results["frame"] == frame_idx, f"x_{i}"]
-                        - batch_data["Padding"]["Left"].detach().numpy()[j]
-                    ) / batch_data["Scale"].detach().numpy()[j]
-                    batch_results.loc[batch_results["frame"] == frame_idx, f"y_{i}"] = (
-                        batch_results.loc[batch_results["frame"] == frame_idx, f"y_{i}"]
-                        - batch_data["Padding"]["Top"].detach().numpy()[j]
-                    ) / batch_data["Scale"].detach().numpy()[j]
-
-            batch_output.append(batch_results)
-            frame_counter += 1 * batch_size
-        batch_output = pd.concat(batch_output)
-        batch_output.reset_index(drop=True, inplace=True)
-        if data_type.lower() == "video":
-            batch_output["approx_time"] = [
-                dataset.calc_approx_frame_time(x)
-                for x in batch_output["frame"].to_numpy()
-            ]
-        batch_output.compute_identities(threshold=face_identity_threshold, inplace=True)
-        return batch_output
diff --git a/feat/MPDetector.py b/feat/MPDetector.py
index c73d7c3e..aac1d06f 100644
--- a/feat/MPDetector.py
+++ b/feat/MPDetector.py
@@ -50,10 +50,8 @@ def get_camera_intrinsics(batch_hw_tensor, focal_length=None):
     Computes the camera intrinsic matrix for a batch of images.
 
     Args:
-        batch_hw_tensor (torch.Tensor): A tensor of shape [B, 2] where B is the batch size,
-                                        and each entry contains [H, W] for the height and width of the images.
-        focal_length (torch.Tensor, optional): A tensor of shape [B] representing the focal length for each image in the batch.
-                                               If None, the focal length will default to the image width for each image.
+        batch_hw_tensor (torch.Tensor): A tensor of shape [B, 2] where B is the batch size, and each entry contains [H, W] for the height and width of the images.
+        focal_length (torch.Tensor, optional): A tensor of shape [B] representing the focal length for each image in the batch. If None, the focal length will default to the image width for each image.
 
     Returns:
         K (torch.Tensor): A tensor of shape [B, 3, 3] containing the camera intrinsic matrices for each image in the batch.
@@ -310,6 +308,157 @@ def estimate_face_pose(pts_3d, K, max_iter=100, lr=1e-3, return_euler_angles=Tru
         return R, t
 
 
+def plot_face_landmarks(
+    fex,
+    frame_idx,
+    ax=None,
+    oval_color="white",
+    oval_linestyle="-",
+    oval_linewidth=3,
+    tesselation_color="gray",
+    tesselation_linestyle="-",
+    tesselation_linewidth=1,
+    mouth_color="white",
+    mouth_linestyle="-",
+    mouth_linewidth=3,
+    eye_color="navy",
+    eye_linestyle="-",
+    eye_linewidth=2,
+    iris_color="skyblue",
+    iris_linestyle="-",
+    iris_linewidth=2,
+):
+    """Plots face landmarks on the given frame using specified styles for each part.
+
+    Args:
+        fex: DataFrame containing face landmarks (x, y coordinates).
+        frame_idx: Index of the frame to plot.
+        ax: Matplotlib axis to draw on. If None, a new axis is created.
+        oval_color, tesselation_color, mouth_color, eye_color, iris_color: Colors for each face part.
+        oval_linestyle, tesselation_linestyle, mouth_linestyle, eye_linestyle, iris_linestyle: Linestyle for each face part.
+        oval_linewidth, tesselation_linewidth, mouth_linewidth, eye_linewidth, iris_linewidth: Linewidth for each face part.
+        n_faces: Number of faces in the frame. If None, will be determined from fex.
+    """
+    if ax is None:
+        fig, ax = plt.subplots(figsize=(10, 10))
+
+    # Get frame data
+    fex_frame = fex.query("frame == @frame_idx")
+    n_faces_frame = fex_frame.shape[0]
+
+    # Add the frame image
+    ax.imshow(Image.open(fex_frame["input"].unique()[0]))
+
+    # Helper function to draw lines for a set of connections
+    def draw_connections(face_idx, connections, color, linestyle, linewidth):
+        for connection in connections:
+            start = connection.start
+            end = connection.end
+            line = plt.Line2D(
+                [fex.loc[face_idx, f"x_{start}"], fex.loc[face_idx, f"x_{end}"]],
+                [fex.loc[face_idx, f"y_{start}"], fex.loc[face_idx, f"y_{end}"]],
+                color=color,
+                linestyle=linestyle,
+                linewidth=linewidth,
+            )
+            ax.add_line(line)
+
+    # Face tessellation
+    for face in range(n_faces_frame):
+        draw_connections(
+            face,
+            FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION,
+            tesselation_color,
+            tesselation_linestyle,
+            tesselation_linewidth,
+        )
+
+    # Mouth
+    for face in range(n_faces_frame):
+        draw_connections(
+            face,
+            FaceLandmarksConnections.FACE_LANDMARKS_LIPS,
+            mouth_color,
+            mouth_linestyle,
+            mouth_linewidth,
+        )
+
+    # Left iris
+    for face in range(n_faces_frame):
+        draw_connections(
+            face,
+            FaceLandmarksConnections.FACE_LANDMARKS_LEFT_IRIS,
+            iris_color,
+            iris_linestyle,
+            iris_linewidth,
+        )
+
+    # Left eye
+    for face in range(n_faces_frame):
+        draw_connections(
+            face,
+            FaceLandmarksConnections.FACE_LANDMARKS_LEFT_EYE,
+            eye_color,
+            eye_linestyle,
+            eye_linewidth,
+        )
+
+    # Left eyebrow
+    for face in range(n_faces_frame):
+        draw_connections(
+            face,
+            FaceLandmarksConnections.FACE_LANDMARKS_LEFT_EYEBROW,
+            eye_color,
+            eye_linestyle,
+            eye_linewidth,
+        )
+
+    # Right iris
+    for face in range(n_faces_frame):
+        draw_connections(
+            face,
+            FaceLandmarksConnections.FACE_LANDMARKS_RIGHT_IRIS,
+            iris_color,
+            iris_linestyle,
+            iris_linewidth,
+        )
+
+    # Right eye
+    for face in range(n_faces_frame):
+        draw_connections(
+            face,
+            FaceLandmarksConnections.FACE_LANDMARKS_RIGHT_EYE,
+            eye_color,
+            eye_linestyle,
+            eye_linewidth,
+        )
+
+    # Right eyebrow
+    for face in range(n_faces_frame):
+        draw_connections(
+            face,
+            FaceLandmarksConnections.FACE_LANDMARKS_RIGHT_EYEBROW,
+            eye_color,
+            eye_linestyle,
+            eye_linewidth,
+        )
+
+    # Face oval
+    for face in range(n_faces_frame):
+        draw_connections(
+            face,
+            FaceLandmarksConnections.FACE_LANDMARKS_FACE_OVAL,
+            oval_color,
+            oval_linestyle,
+            oval_linewidth,
+        )
+
+    # Optionally turn off axis for a clean plot
+    ax.axis("off")
+
+    return ax
+
+
 class MPDetector(nn.Module, PyTorchModelHubMixin):
     def __init__(
         self,
diff --git a/feat/detector.py b/feat/detector.py
index 4a450183..24d7f697 100644
--- a/feat/detector.py
+++ b/feat/detector.py
@@ -1,1404 +1,673 @@
-"""
-Main Detector class. The Detector class wraps other pre-trained models
-(e.g. face detector, au detector) and provides a high-level API to make it easier to
-perform detection
-"""
-
-import os
+import json
+from tqdm import tqdm
 import numpy as np
 import pandas as pd
-from skimage.feature import hog
+from huggingface_hub import hf_hub_download, PyTorchModelHubMixin
+from collections import OrderedDict
+
+from feat.emo_detectors.ResMaskNet.resmasknet_test import (
+    ResMasking,
+)
+from feat.identity_detectors.facenet.facenet_model import InceptionResnetV1
+from feat.facepose_detectors.img2pose.deps.models import (
+    FasterDoFRCNN,
+    postprocess_img2pose,
+)
+from feat.au_detectors.StatLearning.SL_test import XGBClassifier, SVMClassifier
+from feat.emo_detectors.StatLearning.EmoSL_test import EmoSVMClassifier
+from feat.landmark_detectors.mobilefacenet_test import MobileFaceNet
+from feat.landmark_detectors.basenet_test import MobileNet_GDConv
+from feat.landmark_detectors.pfld_compressed_test import PFLDInference
+from feat.pretrained import load_model_weights, AU_LANDMARK_MAP
 from feat.utils import (
+    set_torch_device,
     openface_2d_landmark_columns,
     FEAT_EMOTION_COLUMNS,
     FEAT_FACEBOX_COLUMNS,
-    FEAT_FACEPOSE_COLUMNS_3D,
     FEAT_FACEPOSE_COLUMNS_6D,
-    FEAT_TIME_COLUMNS,
     FEAT_IDENTITY_COLUMNS,
-    set_torch_device,
-    is_list_of_lists_empty,
 )
 from feat.utils.io import get_resource_path
 from feat.utils.image_operations import (
-    extract_face_from_landmarks,
-    extract_face_from_bbox,
     convert_image_to_tensor,
-    BBox,
-)
-from feat.pretrained import (
-    get_pretrained_models,
-    fetch_model,
-    AU_LANDMARK_MAP,
-    load_model_weights,
-)
-from feat.data import (
-    Fex,
-    ImageDataset,
-    VideoDataset,
-    _inverse_face_transform,
-    _inverse_landmark_transform,
+    extract_face_from_bbox_torch,
+    inverse_transform_landmarks_torch,
+    extract_hog_features,
+    convert_bbox_output,
+    compute_original_image_size,
 )
+from feat.data import Fex, ImageDataset, TensorDataset, VideoDataset
+from skops.io import load, get_untrusted_types
+from safetensors.torch import load_file
 import torch
+import torch.nn as nn
 from torch.utils.data import DataLoader
-from torchvision.transforms import Compose, Normalize
-import torchvision.transforms as transforms
 from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
-from feat.facepose_detectors.img2pose.deps.models import postprocess_img2pose
-import logging
+from torchvision.transforms import Compose, Normalize
+import sys
 import warnings
-from tqdm import tqdm
-from huggingface_hub import hf_hub_download
-from safetensors.torch import load_file
-import json
+
+sys.modules["__main__"].__dict__["XGBClassifier"] = XGBClassifier
+sys.modules["__main__"].__dict__["SVMClassifier"] = SVMClassifier
+sys.modules["__main__"].__dict__["EmoSVMClassifier"] = EmoSVMClassifier
 
 # Supress sklearn warning about pickled estimators and diff sklearn versions
 warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
 
 
-class Detector(object):
+class Detector(nn.Module, PyTorchModelHubMixin):
     def __init__(
         self,
-        face_model="retinaface",
         landmark_model="mobilefacenet",
         au_model="xgb",
         emotion_model="resmasknet",
-        facepose_model="img2pose",
         identity_model="facenet",
         device="cpu",
-        n_jobs=1,
-        verbose=False,
-        **kwargs,
     ):
-        """Detector class to detect FEX from images or videos.
-
-        Detector is a class used to detect faces, facial landmarks, emotions, and action units from images and videos.
-
-        Args:
-            n_jobs (int, default=1): Number of processes to use for extraction.
-            device (str): specify device to process data (default='cpu'), can be
-            ['auto', 'cpu', 'cuda', 'mps']
-            verbose (bool): print logging and debug messages during operation
-            **kwargs: you can pass each detector specific kwargs using a dictionary
-            like: `face_model_kwargs = {...}, au_model_kwargs={...}, ...`
-
-        Attributes:
-            info (dict):
-                n_jobs (int): Number of jobs to be used in parallel.
-                face_model (str, default=retinaface): Name of face detection model
-                landmark_model (str, default=mobilenet): Nam eof landmark model
-                au_model (str, default=svm): Name of Action Unit detection model
-                emotion_model (str, default=resmasknet): Path to emotion detection model.
-                facepose_model (str, default=img2pose): Name of headpose detection model.
-                identity_model (str, default=facenet): Name of identity detection model.
-                face_detection_columns (list): Column names for face detection ouput (x, y, w, h)
-                face_landmark_columns (list): Column names for face landmark output (x0, y0, x1, y1, ...)
-                emotion_model_columns (list): Column names for emotion model output
-                emotion_model_columns (list): Column names for emotion model output
-                mapper (dict): Class names for emotion model output by index.
-                input_shape (dict)
-
-            face_detector: face detector object
-            face_landmark: face_landmark object
-            emotion_model: emotion_model object
+        super(Detector, self).__init__()
 
-        Examples:
-            >> detector = Detector(n_jobs=1)
-            >> detector.detect_image(["input.jpg"])
-            >> detector.detect_video("input.mp4")
-        """
-
-        # Initial info dict with model names only
         self.info = dict(
-            face_model=None,
+            face_model="img2pose",
             landmark_model=None,
             emotion_model=None,
-            facepose_model=None,
+            facepose_model="img2pose",
             au_model=None,
             identity_model=None,
-            n_jobs=n_jobs,
         )
-        self.verbose = verbose
-        # Setup verbosity
-        if self.verbose:
-            logging.basicConfig(level=logging.INFO)
-            logging.info("Verbose logging enabled")
-
-        # Setup device
         self.device = set_torch_device(device)
 
-        # Load Model Configs
-        with open(os.path.join(get_resource_path(), "model_config.json"), "r") as file:
-            self.model_configs = json.load(file)
-        # Verify model names and download if necessary
-        face, landmark, au, emotion, facepose, identity = get_pretrained_models(
-            face_model,
-            landmark_model,
-            au_model,
-            emotion_model,
-            facepose_model,
-            identity_model,
-            verbose,
+        # Load Model Configurations
+        facepose_config_file = hf_hub_download(
+            repo_id="py-feat/img2pose",
+            filename="config.json",
+            cache_dir=get_resource_path(),
         )
-
-        self._init_detectors(
-            face,
-            landmark,
-            au,
-            emotion,
-            facepose,
-            identity,
-            openface_2d_landmark_columns,
-            **kwargs,
+        with open(facepose_config_file, "r") as f:
+            facepose_config = json.load(f)
+
+        # Initialize img2pose
+        backbone = resnet_fpn_backbone(backbone_name="resnet18", weights=None)
+        backbone.eval()
+        backbone.to(self.device)
+        self.facepose_detector = FasterDoFRCNN(
+            backbone=backbone,
+            num_classes=2,
+            min_size=facepose_config["min_size"],
+            max_size=facepose_config["max_size"],
+            pose_mean=torch.tensor(facepose_config["pose_mean"]),
+            pose_stddev=torch.tensor(facepose_config["pose_stddev"]),
+            threed_68_points=torch.tensor(facepose_config["threed_points"]),
+            rpn_pre_nms_top_n_test=facepose_config["rpn_pre_nms_top_n_test"],
+            rpn_post_nms_top_n_test=facepose_config["rpn_post_nms_top_n_test"],
+            bbox_x_factor=facepose_config["bbox_x_factor"],
+            bbox_y_factor=facepose_config["bbox_y_factor"],
+            expand_forehead=facepose_config["expand_forehead"],
         )
-
-    def __repr__(self):
-        return f"{self.__class__.__module__}.{self.__class__.__name__}(device={self.device}; face_model={self.info['face_model']}, landmark_model={self.info['landmark_model']}, au_model={self.info['au_model']}, emotion_model={self.info['emotion_model']}, facepose_model={self.info['facepose_model']}, identity_model={self.info['identity_model']})"
-
-    def __getitem__(self, i):
-        return self.info[i]
-
-    def _init_detectors(
-        self,
-        face,
-        landmark,
-        au,
-        emotion,
-        facepose,
-        identity,
-        openface_2d_landmark_columns,
-        **kwargs,
-    ):
-        """Helper function called by __init__ and change_model to (re)initialize one of
-        the supported detectors"""
-
-        # Keyword arguments than can be passed to the underlying models
-        face_model_kwargs = kwargs.pop("face_model_kwargs", dict())
-        landmark_model_kwargs = kwargs.pop("landmark_model_kwargs", dict())
-        au_model_kwargs = kwargs.pop("au_model_kwargs", dict())
-        emotion_model_kwargs = kwargs.pop("emotion_model_kwargs", dict())
-        facepose_model_kwargs = kwargs.pop("facepose_model_kwargs", dict())
-        identity_model_kwargs = kwargs.pop("identity_model_kwargs", dict())
-
-        # Initialize model instances and any additional post init setup
-        # Only initialize a model if the currently initialized model is diff than the
-        # requested one. Lets us re-use this with .change_model
-
-        # FACE MODEL
-        if self.info["face_model"] != face:
-            logging.info(f"Loading Face model: {face}")
-            self.face_detector = fetch_model("face_model", face)
-            self.info["face_model"] = face
-            self.info["face_detection_columns"] = FEAT_FACEBOX_COLUMNS
-            predictions = np.full_like(np.atleast_2d(FEAT_FACEBOX_COLUMNS), np.nan)
-            empty_facebox = pd.DataFrame(predictions, columns=FEAT_FACEBOX_COLUMNS)
-            self._empty_facebox = empty_facebox
-            if self.face_detector is not None:
-                if "img2pose" in face:
-                    self.face_detector = self.face_detector(
-                        constrained="img2pose-c" == face,
-                        device=self.device,
-                        **face_model_kwargs,
-                    )
-                else:
-                    self.face_detector = self.face_detector(
-                        device=self.device, **face_model_kwargs
-                    )
-
-        # LANDMARK MODEL
-        if self.info["landmark_model"] != landmark:
-            logging.info(f"Loading Facial Landmark model: {landmark}")
-            self.landmark_detector = fetch_model("landmark_model", landmark)
-            if self.landmark_detector is not None:
-                if landmark == "mobilenet":
-                    self.landmark_detector = self.landmark_detector(
-                        136, **landmark_model_kwargs
-                    )
-                    self.landmark_detector.from_pretrained(
-                        f"py-feat/{landmark}", cache_dir=get_resource_path()
-                    )
-
-                    # checkpoint = torch.load(
-                    #     os.path.join(
-                    #         get_resource_path(),
-                    #         "mobilenet_224_model_best_gdconv_external.pth.tar",
-                    #     ),
-                    #     map_location=self.device,
-                    # )
-                    # ##################################
-                    # state_dict = checkpoint["state_dict"]
-                    # from collections import OrderedDict
-
-                    # new_state_dict = OrderedDict()
-                    # for k, v in state_dict.items():
-                    #     if "module." in k:
-                    #         k = k.replace("module.", "")
-                    #     new_state_dict[k] = v
-                    # self.landmark_detector.load_state_dict(new_state_dict)
-                    # #####################################
-
-                elif landmark == "pfld":
-                    self.landmark_detector = self.landmark_detector(
-                        **landmark_model_kwargs
-                    )
-                    self.landmark_detector.from_pretrained(
-                        f"py-feat/{landmark}", cache_dir=get_resource_path()
-                    )
-
-                    # checkpoint = torch.load(
-                    #     os.path.join(get_resource_path(), "pfld_model_best.pth.tar"),
-                    #     map_location=self.device,
-                    # )
-                    # self.landmark_detector.load_state_dict(checkpoint["state_dict"])
-                elif landmark == "mobilefacenet":
-                    self.landmark_detector = self.landmark_detector(
-                        [112, 112], 136, **landmark_model_kwargs
-                    )
-                    self.landmark_detector.from_pretrained(
-                        f"py-feat/{landmark}", cache_dir=get_resource_path()
-                    )
-
-                    # checkpoint = torch.load(
-                    #     os.path.join(
-                    #         get_resource_path(), "mobilefacenet_model_best.pth.tar"
-                    #     ),
-                    #     map_location=self.device,
-                    # )
-                    # self.landmark_detector.load_state_dict(checkpoint["state_dict"])
-            self.landmark_detector.eval()
-            self.landmark_detector.to(self.device)
-
-            self.info["landmark_model"] = landmark
-            self.info["mapper"] = openface_2d_landmark_columns
-            self.info["face_landmark_columns"] = openface_2d_landmark_columns
-            predictions = np.full_like(
-                np.atleast_2d(openface_2d_landmark_columns), np.nan
-            )
-            empty_landmarks = pd.DataFrame(
-                predictions, columns=openface_2d_landmark_columns
-            )
-            self._empty_landmark = empty_landmarks
-
-        # FACEPOSE MODEL
-        if self.info["facepose_model"] != facepose:
-            logging.info(f"Loading facepose model: {facepose}")
-            self.facepose_detector = fetch_model("facepose_model", facepose)
-            if "img2pose" in facepose:
-                backbone = resnet_fpn_backbone(
-                    backbone_name=f"resnet{self.model_configs['img2pose']['depth']}",
-                    weights=None,
+        facepose_model_file = hf_hub_download(
+            repo_id="py-feat/img2pose",
+            filename="model.safetensors",
+            cache_dir=get_resource_path(),
+        )
+        facepose_checkpoint = load_file(facepose_model_file)
+        self.facepose_detector.load_state_dict(facepose_checkpoint, load_model_weights)
+        self.facepose_detector.eval()
+        self.facepose_detector.to(self.device)
+        # self.facepose_detector = torch.compile(self.facepose_detector)
+
+        # Initialize Landmark Detector
+        self.info["landmark_model"] = landmark_model
+        if landmark_model is not None:
+            if landmark_model == "mobilefacenet":
+                self.face_size = 112
+                self.landmark_detector = MobileFaceNet(
+                    [self.face_size, self.face_size], 136, device=self.device
+                )
+                landmark_model_file = hf_hub_download(
+                    repo_id="py-feat/mobilefacenet",
+                    filename="mobilefacenet_model_best.pth.tar",
+                    cache_dir=get_resource_path(),
                 )
-                self.facepose_detector = self.facepose_detector(
-                    backbone=backbone,
-                    num_classes=2,
-                    min_size=self.model_configs["img2pose"]["min_size"],
-                    max_size=self.model_configs["img2pose"]["max_size"],
-                    pose_mean=torch.tensor(self.model_configs["img2pose"]["pose_mean"]),
-                    pose_stddev=torch.tensor(
-                        self.model_configs["img2pose"]["pose_stddev"]
-                    ),
-                    threed_68_points=torch.tensor(
-                        self.model_configs["img2pose"]["threed_points"]
-                    ),
-                    rpn_pre_nms_top_n_test=self.model_configs["img2pose"][
-                        "rpn_pre_nms_top_n_test"
-                    ],
-                    rpn_post_nms_top_n_test=self.model_configs["img2pose"][
-                        "rpn_post_nms_top_n_test"
-                    ],
-                    bbox_x_factor=self.model_configs["img2pose"]["bbox_x_factor"],
-                    bbox_y_factor=self.model_configs["img2pose"]["bbox_y_factor"],
-                    expand_forehead=self.model_configs["img2pose"]["expand_forehead"],
-                    **facepose_model_kwargs,
+                landmark_state_dict = torch.load(
+                    landmark_model_file, map_location=self.device, weights_only=True
+                )["state_dict"]  # Ensure Model weights are Float32 for MPS
+            elif landmark_model == "mobilenet":
+                self.face_size = 224
+                self.landmark_detector = MobileNet_GDConv(136)
+                landmark_model_file = hf_hub_download(
+                    repo_id="py-feat/mobilenet",
+                    filename="mobilenet_224_model_best_gdconv_external.pth.tar",
+                    cache_dir=get_resource_path(),
                 )
-
-                # self.facepose_detector = self.facepose_detector(
-                #     constrained="img2pose-c" == face,
-                #     device=self.device,
-                #     **facepose_model_kwargs,
-                # )
-                facepose_model_file = hf_hub_download(
-                    repo_id="py-feat/img2pose",
-                    filename="model.safetensors",
+                mobilenet_state_dict = torch.load(
+                    landmark_model_file, map_location=self.device, weights_only=True
+                )["state_dict"]  # Ensure Model weights are Float32 for MPS
+                landmark_state_dict = OrderedDict()
+                for k, v in mobilenet_state_dict.items():
+                    if "module." in k:
+                        k = k.replace("module.", "")
+                    landmark_state_dict[k] = v
+            elif landmark_model == "pfld":
+                self.face_size = 112
+                self.landmark_detector = PFLDInference()
+                landmark_model_file = hf_hub_download(
+                    repo_id="py-feat/pfld",
+                    filename="pfld_model_best.pth.tar",
                     cache_dir=get_resource_path(),
                 )
-                facepose_checkpoint = load_file(facepose_model_file)
-                self.facepose_detector.load_state_dict(facepose_checkpoint)
-                self.facepose_detector.eval()
-                self.facepose_detector.to(self.device)
+                landmark_state_dict = torch.load(
+                    landmark_model_file, map_location=self.device, weights_only=True
+                )["state_dict"]  # Ensure Model weights are Float32 for MPS
             else:
-                self.facepose_detector = self.facepose_detector(**facepose_model_kwargs)
-            self.info["facepose_model"] = facepose
+                raise ValueError("{landmark_model} is not currently supported.")
+            self.landmark_detector.load_state_dict(landmark_state_dict)
+            self.landmark_detector.eval()
+            self.landmark_detector.to(self.device)
+            # self.landmark_detector = torch.compile(self.landmark_detector)
+        else:
+            self.landmark_detector = None
 
-            pose_dof = facepose_model_kwargs.get("RETURN_DIM", 3)
-            self.info["facepose_model_columns"] = (
-                FEAT_FACEPOSE_COLUMNS_3D if pose_dof == 3 else FEAT_FACEPOSE_COLUMNS_6D
-            )
-            predictions = np.full_like(
-                np.atleast_2d(self.info["facepose_model_columns"]), np.nan
-            )
-            empty_facepose = pd.DataFrame(
-                predictions, columns=self.info["facepose_model_columns"]
-            )
-            self._empty_facepose = empty_facepose
+        # Initialize AU Detector
+        self.info["au_model"] = au_model
+        if au_model is not None:
+            if self.landmark_detector is not None:
+                if au_model == "xgb":
+                    self.au_detector = XGBClassifier()
+                    au_model_path = hf_hub_download(
+                        repo_id="py-feat/xgb_au",
+                        filename="xgb_au_classifier.skops",
+                        cache_dir=get_resource_path(),
+                    )
 
-        # AU MODEL
-        if self.info["au_model"] != au:
-            logging.info(f"Loading AU model: {au}")
-            self.au_model = fetch_model("au_model", au)
-            self.info["au_model"] = au
-            if self.info["au_model"] in ["svm", "xgb"]:
-                self.info["au_presence_columns"] = AU_LANDMARK_MAP["Feat"]
+                elif au_model == "svm":
+                    self.au_detector = SVMClassifier()
+                    au_model_path = hf_hub_download(
+                        repo_id="py-feat/svm_au",
+                        filename="svm_au_classifier.skops",
+                        cache_dir=get_resource_path(),
+                    )
+                else:
+                    raise ValueError("{au_model} is not currently supported.")
+
+                au_unknown_types = get_untrusted_types(file=au_model_path)
+                loaded_au_model = load(au_model_path, trusted=au_unknown_types)
+                self.au_detector.load_weights(
+                    scaler_upper=loaded_au_model.scaler_upper,
+                    pca_model_upper=loaded_au_model.pca_model_upper,
+                    scaler_lower=loaded_au_model.scaler_lower,
+                    pca_model_lower=loaded_au_model.pca_model_lower,
+                    scaler_full=loaded_au_model.scaler_full,
+                    pca_model_full=loaded_au_model.pca_model_full,
+                    classifiers=loaded_au_model.classifiers,
+                )
             else:
-                self.info["au_presence_columns"] = AU_LANDMARK_MAP[self.info["au_model"]]
-            if self.au_model is not None:
-                self.au_model = self.au_model(**au_model_kwargs)
-                au_weights = load_model_weights(
-                    model_type="au", model=au, location="huggingface"
+                raise ValueError(
+                    "Landmark Detector is required for AU Detection with {au_model}."
                 )
-                self.au_model.load_weights(
-                    au_weights["scaler_upper"],
-                    au_weights["pca_model_upper"],
-                    au_weights["scaler_lower"],
-                    au_weights["pca_model_lower"],
-                    au_weights["scaler_full"],
-                    au_weights["pca_model_full"],
-                    au_weights["au_classifiers"],
+        else:
+            self.au_detector = None
+
+        # Initialize Emotion Detector
+        self.info["emotion_model"] = emotion_model
+        if emotion_model is not None:
+            if emotion_model == "resmasknet":
+                emotion_config_file = hf_hub_download(
+                    repo_id="py-feat/resmasknet",
+                    filename="config.json",
+                    cache_dir=get_resource_path(),
                 )
+                with open(emotion_config_file, "r") as f:
+                    emotion_config = json.load(f)
 
-                predictions = np.full_like(
-                    np.atleast_2d(self.info["au_presence_columns"]), np.nan
+                self.emotion_detector = ResMasking(
+                    "", in_channels=emotion_config["in_channels"]
                 )
-                empty_au_occurs = pd.DataFrame(
-                    predictions, columns=self.info["au_presence_columns"]
+                self.emotion_detector.fc = nn.Sequential(
+                    nn.Dropout(0.4), nn.Linear(512, emotion_config["num_classes"])
                 )
-                self._empty_auoccurence = empty_au_occurs
-
-        # EMOTION MODEL
-        if self.info["emotion_model"] != emotion:
-            logging.info(f"Loading emotion model: {emotion}")
-            self.emotion_model = fetch_model("emotion_model", emotion)
-            self.info["emotion_model"] = emotion
-            if self.emotion_model is not None:
-                if emotion == "resmasknet":
-                    self.emotion_model = self.emotion_model(
-                        device=self.device, **emotion_model_kwargs
+                emotion_model_file = hf_hub_download(
+                    repo_id="py-feat/resmasknet",
+                    filename="ResMaskNet_Z_resmasking_dropout1_rot30.pth",
+                    cache_dir=get_resource_path(),
+                )
+                emotion_checkpoint = torch.load(
+                    emotion_model_file, map_location=device, weights_only=True
+                )["net"]
+                self.emotion_detector.load_state_dict(emotion_checkpoint)
+                self.emotion_detector.eval()
+                self.emotion_detector.to(self.device)
+                # self.emotion_detector = torch.compile(self.emotion_detector)
+            elif emotion_model == "svm":
+                if self.landmark_detector is not None:
+                    self.emotion_detector = EmoSVMClassifier()
+                    emotion_model_path = hf_hub_download(
+                        repo_id="py-feat/svm_emo",
+                        filename="svm_emo_classifier.skops",
+                        cache_dir=get_resource_path(),
                     )
-                elif emotion == "svm":
-                    self.emotion_model = self.emotion_model(**emotion_model_kwargs)
-                    emo_weights = load_model_weights(
-                        model_type="emotion", model=emotion, location="huggingface"
+                    emotion_unknown_types = get_untrusted_types(file=emotion_model_path)
+                    loaded_emotion_model = load(
+                        emotion_model_path, trusted=emotion_unknown_types
                     )
-                    self.emotion_model.load_weights(
-                        emo_weights["scaler_full"],
-                        emo_weights["pca_model_full"],
-                        emo_weights["emo_classifiers"],
+                    self.emotion_detector.load_weights(
+                        scaler_full=loaded_emotion_model.scaler_full,
+                        pca_model_full=loaded_emotion_model.pca_model_full,
+                        classifiers=loaded_emotion_model.classifiers,
+                    )
+                else:
+                    raise ValueError(
+                        "Landmark Detector is required for Emotion Detection with {emotion_model}."
                     )
 
-                self.info["emotion_model_columns"] = FEAT_EMOTION_COLUMNS
-                predictions = np.full_like(np.atleast_2d(FEAT_EMOTION_COLUMNS), np.nan)
-                empty_emotion = pd.DataFrame(predictions, columns=FEAT_EMOTION_COLUMNS)
-                self._empty_emotion = empty_emotion
-
-        # IDENTITY MODEL
-        if self.info["identity_model"] != identity:
-            logging.info(f"Loading Identity model: {identity}")
-            self.identity_model = fetch_model("identity_model", identity)
-            self.info["identity_model"] = identity
-            self.info["identity_model_columns"] = FEAT_IDENTITY_COLUMNS
-            predictions = np.full_like(np.atleast_2d(FEAT_IDENTITY_COLUMNS), np.nan)
-            empty_identity = pd.DataFrame(predictions, columns=FEAT_IDENTITY_COLUMNS)
-            self._empty_identity = empty_identity
-            if self.identity_model is not None:
-                self.identity_model = self.identity_model(
-                    device=self.device, **identity_model_kwargs
+            else:
+                raise ValueError("{emotion_model} is not currently supported.")
+        else:
+            self.emotion_detector = None
+
+        # Initialize Identity Detecctor -  facenet
+        self.info["identity_model"] = identity_model
+        if identity_model is not None:
+            if identity_model == "facenet":
+                self.identity_detector = InceptionResnetV1(
+                    pretrained=None,
+                    classify=False,
+                    num_classes=None,
+                    dropout_prob=0.6,
+                    device=self.device,
+                )
+                self.identity_detector.logits = nn.Linear(512, 8631)
+                identity_model_file = hf_hub_download(
+                    repo_id="py-feat/facenet",
+                    filename="facenet_20180402_114759_vggface2.pth",
+                    cache_dir=get_resource_path(),
                 )
-
-        self.info["output_columns"] = (
-            FEAT_TIME_COLUMNS
-            + self.info["face_detection_columns"]
-            + self.info["face_landmark_columns"]
-            + self.info["au_presence_columns"]
-            + self.info["facepose_model_columns"]
-            + self.info["emotion_model_columns"]
-            + self.info["identity_model_columns"]
-            + ["input"]
-        )
-
-    def change_model(self, **kwargs):
-        """Swap one or more pre-trained detector models for another one. Just pass in
-        the the new models to use as kwargs, e.g. emotion_model='svm'"""
-
-        face_model = kwargs.get("face_model", self.info["face_model"])
-        landmark_model = kwargs.get("landmark_model", self.info["landmark_model"])
-        au_model = kwargs.get("au_model", self.info["au_model"])
-        emotion_model = kwargs.get("emotion_model", self.info["emotion_model"])
-        facepose_model = kwargs.get("facepose_model", self.info["facepose_model"])
-        identity_model = kwargs.get("identity_model", self.info["identity_model"])
-
-        # Verify model names and download if necessary
-        face, landmark, au, emotion, facepose, identity = get_pretrained_models(
-            face_model,
-            landmark_model,
-            au_model,
-            emotion_model,
-            facepose_model,
-            identity_model,
-            self.verbose,
-        )
-        for requested, current_name in zip(
-            [face, landmark, au, emotion, facepose, identity],
-            [
-                "face_model",
-                "landmark_model",
-                "au_model",
-                "emotion_model",
-                "facepose_model",
-                "identity_model",
-            ],
-        ):
-            if requested != self.info[current_name]:
-                print(
-                    f"Changing {current_name} from {self.info[current_name]} -> {requested}"
+                self.identity_detector.load_state_dict(
+                    torch.load(
+                        identity_model_file, map_location=device, weights_only=True
+                    )
                 )
+                self.identity_detector.eval()
+                self.identity_detector.to(self.device)
+                # self.identity_detector = torch.compile(self.identity_detector)
+            else:
+                raise ValueError("{identity_model} is not currently supported.")
+        else:
+            self.identity_detector = None
 
-        self._init_detectors(
-            face,
-            landmark,
-            au,
-            emotion,
-            facepose,
-            identity,
-            openface_2d_landmark_columns,
-        )
-
-    def detect_faces(self, frame, threshold=0.5, **face_model_kwargs):
-        """Detect faces from image or video frame
+    @torch.inference_mode()
+    def detect_faces(self, images, face_size=112, face_detection_threshold=0.5):
+        """
+        detect faces and poses in a batch of images using img2pose
 
         Args:
-            frame (np.ndarray): 3d (single) or 4d (multiple) image array
-            threshold (float): threshold for detectiong faces (default=0.5)
+            img (torch.Tensor): Tensor of shape (B, C, H, W) representing the images
+            face_size (int): Output size to resize face after cropping.
 
         Returns:
-            list: list of lists with the same length as the number of frames. Each list
-            item is a list containing the (x1, y1, x2, y2) coordinates of each detected
-            face in that frame.
-
+            Fex: Prediction results dataframe
         """
 
-        logging.info("detecting faces...")
+        # img2pose
+        frames = convert_image_to_tensor(images, img_type="float32") / 255.0
+        frames.to(self.device)
 
-        frame = convert_image_to_tensor(frame, img_type="float32")
-
-        if "img2pose" in self.info["face_model"]:
-            frame = frame / 255
-            # faces, poses = self.face_detector(frame, **face_model_kwargs)
+        batch_results = []
+        for i in range(frames.size(0)):
+            single_frame = frames[i, ...].unsqueeze(0)  # Extract single image from batch
+            img2pose_output = self.facepose_detector(single_frame.to(self.device))
             img2pose_output = postprocess_img2pose(
-                self.facepose_detector(frame, **face_model_kwargs)
+                img2pose_output[0], detection_threshold=face_detection_threshold
             )
-            faces = img2pose_output["boxes"]
-            _ = img2pose_output["dofs"][
-                :, :3
-            ]  # Only returning xyz for now not translation
-        else:
-            faces = self.face_detector(frame, **face_model_kwargs)
-
-        if is_list_of_lists_empty(faces):
-            logging.warning("Warning: NO FACE is detected")
+            bbox = img2pose_output["boxes"]
+            poses = img2pose_output["dofs"]
+            facescores = img2pose_output["scores"]
+
+            # Extract faces from bbox
+            if bbox.numel() != 0:
+                extracted_faces, new_bbox = extract_face_from_bbox_torch(
+                    single_frame, bbox, face_size=face_size
+                )
+            else:  # No Face Detected - let's test of nans will work
+                extracted_faces = torch.zeros((1, 3, face_size, face_size))
+                # bbox = torch.zeros((1,4))
+                # new_bbox = torch.zeros((1,4))
+                # facescores = torch.zeros((1))
+                # poses = torch.zeros((1,6))
+                # extracted_faces = torch.full((1, 3, face_size, face_size), float('nan'))
+                bbox = torch.full((1, 4), float("nan"))
+                new_bbox = torch.full((1, 4), float("nan"))
+                facescores = torch.zeros((1))
+                poses = torch.full((1, 6), float("nan"))
+
+            frame_results = {
+                "face_id": i,
+                "faces": extracted_faces,
+                "boxes": bbox,
+                "new_boxes": new_bbox,
+                "poses": poses,
+                "scores": facescores,
+            }
+
+            # Extract Faces separately for Resmasknet
+            if self.info["emotion_model"] == "resmasknet":
+                if torch.all(torch.isnan(bbox)):  # No Face Detected
+                    frame_results["resmasknet_faces"] = torch.full(
+                        (1, 3, 224, 224), float("nan")
+                    )
+                    # frame_results["resmasknet_faces"] = torch.zeros((1, 3, 224, 224))
+                else:
+                    resmasknet_faces, _ = extract_face_from_bbox_torch(
+                        single_frame, bbox, expand_bbox=1.1, face_size=224
+                    )
+                    frame_results["resmasknet_faces"] = resmasknet_faces
 
-        thresholded_face = []
-        for fframe in faces:  # first level is each frame
-            fframe_x = []
-            for fface in fframe:  # second level is each face within a frame
-                if fface[4] >= threshold:  # set thresholds
-                    fframe_x.append(fface)
-            thresholded_face.append(fframe_x)
+            batch_results.append(frame_results)
 
-        return thresholded_face
+        return batch_results
 
-    def detect_landmarks(self, frame, detected_faces, **landmark_model_kwargs):
-        """Detect landmarks from image or video frame
+    @torch.inference_mode()
+    def forward(self, faces_data):
+        """
+        Run Model Inference on detected faces.
 
         Args:
-            frame (np.ndarray): 3d (single) or 4d (multiple) image array
-            detected_faces (array):
+            faces_data (list of dict): Detected faces and associated data from `detect_faces`.
 
         Returns:
-            list: x and y landmark coordinates (1,68,2)
-
-        Examples:
-            >>> from feat import Detector
-            >>> from feat.utils import read_pictures
-            >>> img_data = read_pictures(['my_image.jpg'])
-            >>> detector = Detector()
-            >>> detected_faces = detector.detect_faces(frame)
-            >>> detector.detect_landmarks(frame, detected_faces)
+            Fex: Prediction results dataframe
         """
 
-        logging.info("detecting landmarks...")
-        frame = convert_image_to_tensor(frame)
-
-        if is_list_of_lists_empty(detected_faces):
-            list_concat = detected_faces
-        else:
-            if self.info["landmark_model"]:
-                if self.info["landmark_model"].lower() == "mobilenet":
-                    out_size = 224
-                else:
-                    out_size = 112
-
-            extracted_faces, new_bbox = extract_face_from_bbox(
-                frame, detected_faces, face_size=out_size
-            )
-
-            extracted_faces = extracted_faces / 255.0
+        extracted_faces = torch.cat([face["faces"] for face in faces_data], dim=0)
+        new_bboxes = torch.cat([face["new_boxes"] for face in faces_data], dim=0)
+        n_faces = extracted_faces.shape[0]
 
+        if self.landmark_detector is not None:
             if self.info["landmark_model"].lower() == "mobilenet":
                 extracted_faces = Compose(
                     [Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
                 )(extracted_faces)
-
-            # Run Landmark Model
-            if self.info["landmark_model"].lower() == "mobilefacenet":
-                landmark = (
-                    self.landmark_detector(extracted_faces, **landmark_model_kwargs)[0]
-                    .cpu()
-                    .data.numpy()
+                landmarks = self.landmark_detector.forward(
+                    extracted_faces.to(self.device)
                 )
+            if self.info["landmark_model"].lower() == "mobilefacenet":
+                landmarks = self.landmark_detector.forward(
+                    extracted_faces.to(self.device)
+                )[0]
             else:
-                landmark = (
-                    self.landmark_detector(extracted_faces, **landmark_model_kwargs)
-                    .cpu()
-                    .data.numpy()
+                landmarks = self.landmark_detector.forward(
+                    extracted_faces.to(self.device)
                 )
-
-            landmark = landmark.reshape(landmark.shape[0], -1, 2)
-
-            landmark_results = []
-            for ik in range(landmark.shape[0]):
-                landmark_results.append(
-                    new_bbox[ik].inverse_transform_landmark(landmark[ik, :, :])
-                )
-
-            length_index = [len(x) for x in detected_faces]
-            new_lens = np.insert(np.cumsum(length_index), 0, 0)
-            list_concat = []
-            for ij in range(len(length_index)):
-                list_concat.append(landmark_results[new_lens[ij] : new_lens[ij + 1]])
-
-        return list_concat
-
-    def detect_facepose(self, frame, landmarks=None, **facepose_model_kwargs):
-        """Detect facepose from image or video frame.
-
-        When used with img2pose, returns *all* detected poses, and facebox and landmarks
-        are ignored. Use `detect_face` method in order to obtain bounding boxes
-        corresponding to the detected poses returned by this method.
-
-        Args:
-            frame (np.ndarray): list of images
-            landmarks (np.ndarray | None, optional): (num_images, num_faces, 68, 2)
-            landmarks for the faces contained in list of images; Default None and
-            ignored for img2pose and img2pose-c detectors
-
-        Returns:
-            list: poses (num_images, num_faces, [pitch, roll, yaw]) - Euler angles (in
-            degrees) for each face within in each image}
-
-        """
-
-        logging.info("detecting poses...")
-        # Normalize Data
-        frame = convert_image_to_tensor(frame, img_type="float32") / 255
-
-        output = {}
-        if "img2pose" in self.info["facepose_model"]:
-            img2pose_output = self.facepose_detector(frame, **facepose_model_kwargs)
-            img2pose_output = postprocess_img2pose(img2pose_output[0])
-            output["faces"] = img2pose_output["boxes"]
-            output["poses"] = img2pose_output[
-                "dofs"
-            ]  # Only returning xyz for now not translation
+            new_landmarks = inverse_transform_landmarks_torch(landmarks, new_bboxes)
         else:
-            output["poses"] = self.facepose_detector(
-                frame, landmarks, **facepose_model_kwargs
-            )
-
-        return output
-
-    def detect_aus(self, frame, landmarks, **au_model_kwargs):
-        """Detect Action Units from image or video frame
+            new_landmarks = torch.full((n_faces, 136), float("nan"))
 
-        Args:
-            frame (np.ndarray): image loaded in array format (n, m, 3)
-            landmarks (array): 68 landmarks used to localize face.
-
-        Returns:
-            array: Action Unit predictions
-
-        Examples:
-            >>> from feat import Detector
-            >>> from feat.utils import read_pictures
-            >>> frame = read_pictures(['my_image.jpg'])
-            >>> detector = Detector()
-            >>> detector.detect_aus(frame)
-        """
-
-        logging.info("detecting aus...")
-        frame = convert_image_to_tensor(frame, img_type="float32")
-
-        if is_list_of_lists_empty(landmarks):
-            return landmarks
-        else:
-            if self["au_model"].lower() in ["svm", "xgb"]:
-                # transform = Grayscale(3)
-                # frame = transform(frame)
-                hog_features, new_landmarks = self._batch_hog(
-                    frames=frame, landmarks=landmarks
+        if self.emotion_detector is not None:
+            if self.info["emotion_model"] == "resmasknet":
+                resmasknet_faces = torch.cat(
+                    [face["resmasknet_faces"] for face in faces_data], dim=0
                 )
-                au_predictions = self.au_model.detect_au(
-                    frame=hog_features, landmarks=new_landmarks, **au_model_kwargs
+                emotions = self.emotion_detector.forward(resmasknet_faces.to(self.device))
+                emotions = torch.softmax(emotions, 1)
+            elif self.info["emotion_model"] == "svm":
+                hog_features, emo_new_landmarks = extract_hog_features(
+                    extracted_faces, landmarks
                 )
-            else:
-                au_predictions = self.au_model.detect_au(
-                    frame, landmarks=landmarks, **au_model_kwargs
+                emotions = self.emotion_detector.detect_emo(
+                    frame=hog_features, landmarks=[emo_new_landmarks]
                 )
-
-            return self._convert_detector_output(landmarks, au_predictions)
-
-    def _batch_hog(self, frames, landmarks):
-        """
-        Helper function used in batch processing hog features
-
-        Args:
-            frames: a batch of frames
-            landmarks: a list of list of detected landmarks
-
-        Returns:
-            hog_features: a numpy array of hog features for each detected landmark
-            landmarks: updated landmarks
-        """
-
-        hog_features = []
-        new_landmark_frames = []
-        for i, frame_landmark in enumerate(landmarks):
-            if len(frame_landmark) != 0:
-                new_landmarks_faces = []
-                for j in range(len(frame_landmark)):
-                    convex_hull, new_landmark = extract_face_from_landmarks(
-                        frame=frames[i],
-                        landmarks=frame_landmark[j],
-                        face_size=112,
-                    )
-
-                    hog_features.append(
-                        hog(
-                            transforms.ToPILImage()(convex_hull[0] / 255.0),
-                            orientations=8,
-                            pixels_per_cell=(8, 8),
-                            cells_per_block=(2, 2),
-                            visualize=False,
-                            channel_axis=-1,
-                        ).reshape(1, -1)
-                    )
-
-                    new_landmarks_faces.append(new_landmark)
-                new_landmark_frames.append(new_landmarks_faces)
-            else:
-                hog_features.append(
-                    np.zeros((1, 5408))
-                )  # LC: Need to confirm this size is fixed.
-                new_landmark_frames.append([np.zeros((68, 2))])
-
-        hog_features = np.concatenate(hog_features)
-
-        return (hog_features, new_landmark_frames)
-
-    def detect_emotions(self, frame, facebox, landmarks, **emotion_model_kwargs):
-        """Detect emotions from image or video frame
-
-        Args:
-            frame ([type]): [description]
-            facebox ([type]): [description]
-            landmarks ([type]): [description]
-
-        Returns:
-            array: Action Unit predictions
-
-        Examples:
-            >>> from feat import Detector
-            >>> from feat.utils import read_pictures
-            >>> img_data = read_pictures(['my_image.jpg'])
-            >>> detector = Detector()
-            >>> detected_faces = detector.detect_faces(frame)
-            >>> detected_landmarks = detector.detect_landmarks(frame, detected_faces)
-            >>> detector.detect_emotions(frame, detected_faces, detected_landmarks)
-        """
-
-        logging.info("detecting emotions...")
-        frame = convert_image_to_tensor(frame, img_type="float32")
-
-        if is_list_of_lists_empty(facebox):
-            return facebox
+                emotions = torch.tensor(emotions)
         else:
-            if self.info["emotion_model"].lower() == "resmasknet":
-                return self._convert_detector_output(
-                    facebox,
-                    self.emotion_model.detect_emo(frame, facebox, **emotion_model_kwargs),
-                )
-
-            elif self.info["emotion_model"].lower() == "svm":
-                hog_features, new_landmarks = self._batch_hog(
-                    frames=frame, landmarks=landmarks
-                )
-                return self._convert_detector_output(
-                    landmarks,
-                    self.emotion_model.detect_emo(
-                        frame=hog_features,
-                        landmarks=new_landmarks,
-                        **emotion_model_kwargs,
-                    ),
-                )
-
-            else:
-                raise ValueError(
-                    "Cannot recognize input emo model! Please try to re-type emotion model"
-                )
-
-    def detect_identity(self, frame, facebox, **identity_model_kwargs):
-        """Detects identity of faces from image or video frame using face representation embeddings
-
-        Args:
-            frame (np.ndarray): 3d (single) or 4d (multiple) image array
-            threshold (float): threshold for matching identity (default=0.8)
-
-        Returns:
-            list: list of lists with the same length as the number of frames. Each list
-            item is a list containing the (x1, y1, x2, y2) coordinates of each detected
-            face in that frame.
+            emotions = torch.full((n_faces, 7), float("nan"))
 
-        """
-
-        logging.info("detecting identity...")
-
-        frame = convert_image_to_tensor(frame, img_type="float32") / 255
-
-        if is_list_of_lists_empty(facebox):
-            return facebox
-        else:
-            extracted_faces, new_bbox = extract_face_from_bbox(frame, facebox)
-            face_embeddings = self.identity_model(
-                extracted_faces, **identity_model_kwargs
+        if self.identity_detector is not None:
+            identity_embeddings = self.identity_detector.forward(
+                extracted_faces.to(self.device)
             )
-        return self._convert_detector_output(facebox, face_embeddings.numpy())
-
-    def _run_detection_waterfall(
-        self,
-        batch_data,
-        face_detection_threshold,
-        face_model_kwargs,
-        landmark_model_kwargs,
-        facepose_model_kwargs,
-        emotion_model_kwargs,
-        au_model_kwargs,
-        identity_model_kwargs,
-        suppress_torchvision_warnings=True,
-    ):
-        """
-        Main detection "waterfall." Calls each individual detector in the sequence
-        required to support any interactions between detections. Called
-        behind-the-scenes by .detect_image() and .detect_video()
-
-        Args:
-            batch_data (dict): singleton item from iterating over the output of a DataLoader
-            face_detection_threshold (float): value between 0-1
-            face_model_kwargs (dict): face model kwargs
-            landmark_model_kwargs (dict): landmark model kwargs
-            facepose_model_kwargs (dict): facepose model kwargs
-            emotion_model_kwargs (dict): emotion model kwargs
-            au_model_kwargs (dict): au model kwargs
-            identity_model_kwargs (dict): identity model kwargs
-
-        Returns:
-            tuple: faces, landmarks, poses, aus, emotions, identities
-        """
-
-        # Reset warnings
-        warnings.filterwarnings("default", category=UserWarning, module="torchvision")
+        else:
+            identity_embeddings = torch.full((n_faces, 512), float("nan"))
 
-        if suppress_torchvision_warnings:
-            warnings.filterwarnings("ignore", category=UserWarning, module="torchvision")
+        if self.au_detector is not None:
+            hog_features, au_new_landmarks = extract_hog_features(
+                extracted_faces, landmarks
+            )
+            aus = self.au_detector.detect_au(
+                frame=hog_features, landmarks=[au_new_landmarks]
+            )
+        else:
+            aus = torch.full((n_faces, 20), float("nan"))
 
-        faces = self.detect_faces(
-            batch_data["Image"],
-            threshold=face_detection_threshold,
-            **face_model_kwargs,
+        # Create Fex Output Representation
+        bboxes = torch.cat(
+            [
+                convert_bbox_output(
+                    face_output["new_boxes"].to(self.device),
+                    face_output["scores"].to(self.device),
+                )
+                for face_output in faces_data
+            ],
+            dim=0,
+        )
+        feat_faceboxes = pd.DataFrame(
+            bboxes.cpu().detach().numpy(),
+            columns=FEAT_FACEBOX_COLUMNS,
         )
 
-        landmarks = self.detect_landmarks(
-            batch_data["Image"],
-            detected_faces=faces,
-            **landmark_model_kwargs,
+        poses = torch.cat(
+            [face_output["poses"].to(self.device) for face_output in faces_data], dim=0
+        )
+        feat_poses = pd.DataFrame(
+            poses.cpu().detach().numpy(), columns=FEAT_FACEPOSE_COLUMNS_6D
         )
 
-        poses_dict = self.detect_facepose(
-            batch_data["Image"], landmarks, **facepose_model_kwargs
+        reshape_landmarks = new_landmarks.reshape(new_landmarks.shape[0], 68, 2)
+        reordered_landmarks = torch.cat(
+            [reshape_landmarks[:, :, 0], reshape_landmarks[:, :, 1]], dim=1
+        )
+        feat_landmarks = pd.DataFrame(
+            reordered_landmarks.cpu().detach().numpy(),
+            columns=openface_2d_landmark_columns,
         )
 
-        aus = self.detect_aus(batch_data["Image"], landmarks, **au_model_kwargs)
+        feat_aus = pd.DataFrame(aus, columns=AU_LANDMARK_MAP["Feat"])
 
-        emotions = self.detect_emotions(
-            batch_data["Image"], faces, landmarks, **emotion_model_kwargs
+        feat_emotions = pd.DataFrame(
+            emotions.cpu().detach().numpy(), columns=FEAT_EMOTION_COLUMNS
         )
 
-        identities = self.detect_identity(
-            batch_data["Image"],
-            faces,
-            **identity_model_kwargs,
+        feat_identities = pd.DataFrame(
+            identity_embeddings.cpu().detach().numpy(), columns=FEAT_IDENTITY_COLUMNS[1:]
         )
 
-        faces = _inverse_face_transform(faces, batch_data)
-        landmarks = _inverse_landmark_transform(landmarks, batch_data)
-
-        # match faces to poses - sometimes face detector finds different faces than pose detector.
-        faces, poses = self._match_faces_to_poses(
-            faces, poses_dict["faces"], poses_dict["poses"]
+        return Fex(
+            pd.concat(
+                [
+                    feat_faceboxes,
+                    feat_landmarks,
+                    feat_poses,
+                    feat_aus,
+                    feat_emotions,
+                    feat_identities,
+                ],
+                axis=1,
+            ),
+            au_columns=AU_LANDMARK_MAP["Feat"],
+            emotion_columns=FEAT_EMOTION_COLUMNS,
+            facebox_columns=FEAT_FACEBOX_COLUMNS,
+            landmark_columns=openface_2d_landmark_columns,
+            facepose_columns=FEAT_FACEPOSE_COLUMNS_6D,
+            identity_columns=FEAT_IDENTITY_COLUMNS[1:],
+            detector="Feat",
+            face_model=self.info["face_model"],
+            landmark_model=self.info["landmark_model"],
+            au_model=self.info["au_model"],
+            emotion_model=self.info["emotion_model"],
+            facepose_model=self.info["facepose_model"],
+            identity_model=self.info["identity_model"],
         )
 
-        return faces, landmarks, poses, aus, emotions, identities
-
-    def detect_image(
+    def detect(
         self,
-        input_file_list,
+        inputs,
+        data_type="image",
         output_size=None,
         batch_size=1,
         num_workers=0,
         pin_memory=False,
-        frame_counter=0,
-        face_detection_threshold=0.5,
         face_identity_threshold=0.8,
+        face_detection_threshold=0.5,
+        skip_frames=None,
+        progress_bar=True,
         **kwargs,
     ):
         """
-        Detects FEX from one or more image files. If you want to speed up detection you
-        can process multiple images in batches by setting `batch_size > 1`. However, all
-        images must have **the same dimensions** to be processed in batches. Py-feat can
-        automatically adjust image sizes by using the `output_size=int`. Common
-        output-sizes include 256 and 512.
-
-        **NOTE: Currently batch processing images gives slightly different AU detection results due to the way that py-feat integrates the underlying models. You can examine the degree of tolerance by checking out the results of `test_detection_and_batching_with_diff_img_sizes` in our test-suite**
+        Detects FEX from one or more image files.
 
         Args:
-            input_file_list (list of str): Path to a list of paths to image files.
+            inputs (list of str, torch.Tensor): Path to a list of paths to image files or torch.Tensor of images (B, C, H, W)
+            data_type (str): type of data to be processed; Default 'image' ['image', 'tensor', 'video']
             output_size (int): image size to rescale all image preserving aspect ratio.
-                                Will raise an error if not set and batch_size > 1 but images are not the same size
             batch_size (int): how many batches of images you want to run at one shot.
-                                Larger gives faster speed but is more memory-consuming. Images must be the
-            same size to be run in batches!
-            num_workers (int): how many subprocesses to use for data loading. ``0`` means that the data will be loaded in the main process.
-            pin_memory (bool): If ``True``, the data loader will copy Tensors into CUDA pinned memory before returning them.  If your data elements are a custom type, or your :attr:`collate_fn` returns a batch that is a custom type
-            frame_counter (int): starting value to count frames
-            face_detection_threshold (float): value between 0-1 to report a detection based on the
-                                confidence of the face detector; Default >= 0.5
+            num_workers (int): how many subprocesses to use for data loading.
+            pin_memory (bool): If ``True``, the data loader will copy Tensors into CUDA pinned memory before returning them.
             face_identity_threshold (float): value between 0-1 to determine similarity of person using face identity embeddings; Default >= 0.8
-            **kwargs: you can pass each detector specific kwargs using a dictionary
-                                like: `face_model_kwargs = {...}, au_model_kwargs={...}, ...`
+            face_detection_threshold (float): value between 0-1 to determine if a face was detected; Default >= 0.5
+            skip_frames (int or None): number of frames to skip to speed up inference (video only); Default None
+            progress_bar (bool): Whether to show the tqdm progress bar. Default is True.
+            **kwargs: additional detector-specific kwargs
 
         Returns:
-            Fex: Prediction results dataframe
+            pd.DataFrame: Concatenated results for all images in the batch
         """
 
-        # Keyword arguments than can be passed to the underlying models
-        face_model_kwargs = kwargs.pop("face_model_kwargs", dict())
-        landmark_model_kwargs = kwargs.pop("landmark_model_kwargs", dict())
-        au_model_kwargs = kwargs.pop("au_model_kwargs", dict())
-        emotion_model_kwargs = kwargs.pop("emotion_model_kwargs", dict())
-        facepose_model_kwargs = kwargs.pop("facepose_model_kwargs", dict())
-        identity_model_kwargs = kwargs.pop("identity_model_kwargs", dict())
-
-        data_loader = DataLoader(
-            ImageDataset(
-                input_file_list,
-                output_size=output_size,
-                preserve_aspect_ratio=True,
-                padding=True,
-            ),
-            num_workers=num_workers,
-            batch_size=batch_size,
-            pin_memory=pin_memory,
-            shuffle=False,
-        )
-
-        if self.info["landmark_model"] == "mobilenet" and batch_size > 1:
-            warnings.warn(
-                "Currently using mobilenet for landmark detection with batch_size > 1 may lead to erroneous detections. We recommend either setting batch_size=1 or using mobilefacenet as the landmark detection model. You can follow this issue for more: https://github.com/cosanlab/py-feat/issues/151"
+        if data_type.lower() == "image":
+            data_loader = DataLoader(
+                ImageDataset(
+                    inputs,
+                    output_size=output_size,
+                    preserve_aspect_ratio=True,
+                    padding=True,
+                ),
+                num_workers=num_workers,
+                batch_size=batch_size,
+                pin_memory=pin_memory,
+                shuffle=False,
             )
-
-        try:
-            batch_output = []
-
-            for batch_id, batch_data in enumerate(tqdm(data_loader)):
-                (
-                    faces,
-                    landmarks,
-                    poses,
-                    aus,
-                    emotions,
-                    identities,
-                ) = self._run_detection_waterfall(
-                    batch_data,
-                    face_detection_threshold,
-                    face_model_kwargs,
-                    landmark_model_kwargs,
-                    facepose_model_kwargs,
-                    emotion_model_kwargs,
-                    au_model_kwargs,
-                    identity_model_kwargs,
-                )
-
-                output = self._create_fex(
-                    faces,
-                    landmarks,
-                    poses,
-                    aus,
-                    emotions,
-                    identities,
-                    batch_data["FileNames"],
-                    frame_counter,
-                )
-                batch_output.append(output)
-                frame_counter += 1 * batch_size
-
-            batch_output = pd.concat(batch_output)
-            batch_output.reset_index(drop=True, inplace=True)
-            batch_output.compute_identities(
-                threshold=face_identity_threshold, inplace=True
+        elif data_type.lower() == "tensor":
+            data_loader = DataLoader(
+                TensorDataset(inputs),
+                batch_size=batch_size,
+                shuffle=False,
+                num_workers=num_workers,
+                pin_memory=pin_memory,
             )
-            return batch_output
-        except RuntimeError as e:
-            raise ValueError(
-                f"when using a batch_size > 1 all images must have the same dimensions or output_size must not be None so py-feat can rescale images to output_size. See pytorch error: \n{e}"
+        elif data_type.lower() == "video":
+            dataset = VideoDataset(
+                inputs, skip_frames=skip_frames, output_size=output_size
+            )
+            data_loader = DataLoader(
+                dataset,
+                num_workers=num_workers,
+                batch_size=batch_size,
+                pin_memory=pin_memory,
+                shuffle=False,
             )
 
-    def detect_video(
-        self,
-        video_path,
-        skip_frames=None,
-        output_size=700,
-        batch_size=1,
-        num_workers=0,
-        pin_memory=False,
-        face_detection_threshold=0.5,
-        face_identity_threshold=0.8,
-        **kwargs,
-    ):
-        """Detects FEX from a video file.
-
-        Args:
-            video_path (str): Path to a video file.
-            skip_frames (int or None): number of frames to skip (speeds up inference,
-            but less temporal information); Default None
-            output_size (int): image size to rescale all imagee preserving aspect ratio
-            batch_size (int): how many batches of images you want to run at one shot. Larger gives faster speed but is more memory-consuming
-            num_workers (int): how many subprocesses to use for data loading. ``0`` means that the data will be loaded in the main process.
-            pin_memory (bool): If ``True``, the data loader will copy Tensors
-                                into CUDA pinned memory before returning them.  If your data elements
-                                are a custom type, or your :attr:`collate_fn` returns a batch that is a custom type
-            face_detection_threshold (float): value between 0-1 to report a detection based on the
-                                confidence of the face detector; Default >= 0.5
-            face_identity_threshold (float): value between 0-1 to determine similarity of person using face identity embeddings; Default >= 0.8
-
-        Returns:
-            Fex: Prediction results dataframe
-        """
-
-        # Keyword arguments than can be passed to the underlying models
-        face_model_kwargs = kwargs.pop("face_model_kwargs", dict())
-        landmark_model_kwargs = kwargs.pop("landmark_model_kwargs", dict())
-        au_model_kwargs = kwargs.pop("au_model_kwargs", dict())
-        emotion_model_kwargs = kwargs.pop("emotion_model_kwargs", dict())
-        facepose_model_kwargs = kwargs.pop("facepose_model_kwargs", dict())
-        identity_model_kwargs = kwargs.pop("identity_model_kwargs", dict())
-
-        dataset = VideoDataset(
-            video_path, skip_frames=skip_frames, output_size=output_size
-        )
-
-        data_loader = DataLoader(
-            dataset,
-            num_workers=num_workers,
-            batch_size=batch_size,
-            pin_memory=pin_memory,
-            shuffle=False,
-        )
+        data_iterator = tqdm(data_loader) if progress_bar else data_loader
 
         batch_output = []
+        frame_counter = 0
 
-        for batch_data in tqdm(data_loader):
-            (
-                faces,
-                landmarks,
-                poses,
-                aus,
-                emotions,
-                identities,
-            ) = self._run_detection_waterfall(
-                batch_data,
-                face_detection_threshold,
-                face_model_kwargs,
-                landmark_model_kwargs,
-                facepose_model_kwargs,
-                emotion_model_kwargs,
-                au_model_kwargs,
-                identity_model_kwargs,
+        try:
+            _ = next(enumerate(tqdm(data_loader)))
+        except RuntimeError as e:
+            raise ValueError(
+                f"When using `batch_size > 1`, all images must either have the same dimension or `output_size` should be something other than `None` to pad images prior to processing\n{e}"
             )
 
-            frames = list(batch_data["Frame"].numpy())
-
-            output = self._create_fex(
-                faces,
-                landmarks,
-                poses,
-                aus,
-                emotions,
-                identities,
-                batch_data["FileName"],
-                frames,
+        for batch_id, batch_data in enumerate(data_iterator):
+            faces_data = self.detect_faces(
+                batch_data["Image"],
+                face_size=self.face_size if hasattr(self, "face_size") else 112,
+                face_detection_threshold=face_detection_threshold,
             )
-
-            batch_output.append(output)
-
-        batch_output = pd.concat(batch_output)
-        batch_output.reset_index(drop=True, inplace=True)
-        batch_output["approx_time"] = [
-            dataset.calc_approx_frame_time(x) for x in batch_output["frame"].to_numpy()
-        ]
-        batch_output.compute_identities(threshold=face_identity_threshold, inplace=True)
-
-        return batch_output.set_index("frame", drop=False)
-
-    def _create_fex(
-        self,
-        faces,
-        landmarks,
-        poses,
-        aus,
-        emotions,
-        identities,
-        file_names,
-        frame_counter,
-    ):
-        """Helper function to create a Fex instance using detector output
-
-        Args:
-            faces: output of detect_faces()
-            landmarks: output of detect_landmarks()
-            poses: output of dectect_facepose()
-            aus: output of detect_aus()
-            emotions: output of detect_emotions()
-            identities: output of detect_identities()
-            file_names: file name of input image
-            frame_counter: starting value for frame counter, useful for integrating batches
-
-        Returns:
-            Fex object
-        """
-
-        logging.info("creating fex output...")
-
-        out = []
-        for i, frame in enumerate(faces):
-            if not frame:
-                facebox_df = pd.DataFrame(
-                    {x: np.nan for x in self.info["face_detection_columns"]},
-                    columns=self.info["face_detection_columns"],
-                    index=[i],
-                )
-                facepose_df = pd.DataFrame(
-                    {x: np.nan for x in self.info["facepose_model_columns"]},
-                    columns=self.info["facepose_model_columns"],
-                    index=[i],
-                )
-                landmarks_df = pd.DataFrame(
-                    {x: np.nan for x in self.info["face_landmark_columns"]},
-                    columns=self.info["face_landmark_columns"],
-                    index=[i],
-                )
-                aus_df = pd.DataFrame(
-                    {x: np.nan for x in self.info["au_presence_columns"]},
-                    columns=self.info["au_presence_columns"],
-                    index=[i],
-                )
-                emotions_df = pd.DataFrame(
-                    {x: np.nan for x in self.info["emotion_model_columns"]},
-                    columns=self.info["emotion_model_columns"],
-                    index=[i],
-                )
-                identity_df = pd.DataFrame(
-                    {x: np.nan for x in self.info["identity_model_columns"]},
-                    columns=self.info["identity_model_columns"],
-                    index=[i],
-                )
-                input_df = pd.DataFrame(file_names[i], columns=["input"], index=[i])
-                tmp_df = pd.concat(
-                    [
-                        facebox_df,
-                        landmarks_df,
-                        facepose_df,
-                        aus_df,
-                        emotions_df,
-                        identity_df,
-                        input_df,
-                    ],
-                    axis=1,
-                )
-                if isinstance(frame_counter, (list)):
-                    tmp_df[FEAT_TIME_COLUMNS] = frame_counter[i]
+            batch_results = self.forward(faces_data)
+
+            # Create metadata for each frame
+            file_names = []
+            frame_ids = []
+            for i, face in enumerate(faces_data):
+                n_faces = len(face["scores"])
+                if data_type.lower() == "video":
+                    current_frame_id = batch_data["Frame"].detach().numpy()[i]
                 else:
-                    tmp_df[FEAT_TIME_COLUMNS] = frame_counter + i
-                out.append(tmp_df)
-
-            for j, face_in_frame in enumerate(frame):
-                facebox_df = pd.DataFrame(
-                    [
-                        [
-                            face_in_frame[0],
-                            face_in_frame[1],
-                            face_in_frame[2] - face_in_frame[0],
-                            face_in_frame[3] - face_in_frame[1],
-                            face_in_frame[4],
+                    current_frame_id = frame_counter + i
+                frame_ids.append(np.repeat(current_frame_id, n_faces))
+                file_names.append(np.repeat(batch_data["FileName"][i], n_faces))
+            batch_results["input"] = np.concatenate(file_names)
+            batch_results["frame"] = np.concatenate(frame_ids)
+
+            # Invert the face boxes and landmarks based on the padded output size
+            for j, frame_idx in enumerate(batch_results["frame"].unique()):
+                batch_results.loc[
+                    batch_results["frame"] == frame_idx, ["FrameHeight", "FrameWidth"]
+                ] = (
+                    compute_original_image_size(batch_data)[j, :]
+                    .repeat(
+                        len(
+                            batch_results.loc[
+                                batch_results["frame"] == frame_idx, "frame"
+                            ]
+                        ),
+                        1,
+                    )
+                    .numpy()
+                )
+                batch_results.loc[batch_results["frame"] == frame_idx, "FaceRectX"] = (
+                    batch_results.loc[batch_results["frame"] == frame_idx, "FaceRectX"]
+                    - batch_data["Padding"]["Left"].detach().numpy()[j]
+                ) / batch_data["Scale"].detach().numpy()[j]
+                batch_results.loc[batch_results["frame"] == frame_idx, "FaceRectY"] = (
+                    batch_results.loc[batch_results["frame"] == frame_idx, "FaceRectY"]
+                    - batch_data["Padding"]["Top"].detach().numpy()[j]
+                ) / batch_data["Scale"].detach().numpy()[j]
+                batch_results.loc[
+                    batch_results["frame"] == frame_idx, "FaceRectWidth"
+                ] = (
+                    (
+                        batch_results.loc[
+                            batch_results["frame"] == frame_idx, "FaceRectWidth"
                         ]
-                    ],
-                    columns=self.info["face_detection_columns"],
-                    index=[j],
-                )
-
-                facepose_df = pd.DataFrame(
-                    [poses[i][j]],
-                    columns=self.info["facepose_model_columns"],
-                    index=[j],
-                )
-
-                landmarks_df = pd.DataFrame(
-                    [landmarks[i][j].flatten(order="F")],
-                    columns=self.info["face_landmark_columns"],
-                    index=[j],
-                )
-
-                aus_df = pd.DataFrame(
-                    aus[i][j, :].reshape(1, len(self["au_presence_columns"])),
-                    columns=self.info["au_presence_columns"],
-                    index=[j],
-                )
-
-                emotions_df = pd.DataFrame(
-                    emotions[i][j, :].reshape(1, len(self.info["emotion_model_columns"])),
-                    columns=self.info["emotion_model_columns"],
-                    index=[j],
-                )
-
-                identity_df = pd.DataFrame(
-                    np.hstack([np.nan, identities[i][j]]).reshape(-1, 1).T,
-                    columns=self.info["identity_model_columns"],
-                    index=[j],
-                )
-
-                input_df = pd.DataFrame(
-                    file_names[i],
-                    columns=["input"],
-                    index=[j],
-                )
-
-                tmp_df = pd.concat(
-                    [
-                        facebox_df,
-                        landmarks_df,
-                        facepose_df,
-                        aus_df,
-                        emotions_df,
-                        identity_df,
-                        input_df,
-                    ],
-                    axis=1,
+                    )
+                    / batch_data["Scale"].detach().numpy()[j]
+                )
+                batch_results.loc[
+                    batch_results["frame"] == frame_idx, "FaceRectHeight"
+                ] = (
+                    (
+                        batch_results.loc[
+                            batch_results["frame"] == frame_idx, "FaceRectHeight"
+                        ]
+                    )
+                    / batch_data["Scale"].detach().numpy()[j]
                 )
 
-                if isinstance(frame_counter, (list)):
-                    tmp_df[FEAT_TIME_COLUMNS] = frame_counter[i]
-                else:
-                    tmp_df[FEAT_TIME_COLUMNS] = frame_counter + i
-                out.append(tmp_df)
+                for i in range(68):
+                    batch_results.loc[batch_results["frame"] == frame_idx, f"x_{i}"] = (
+                        batch_results.loc[batch_results["frame"] == frame_idx, f"x_{i}"]
+                        - batch_data["Padding"]["Left"].detach().numpy()[j]
+                    ) / batch_data["Scale"].detach().numpy()[j]
+                    batch_results.loc[batch_results["frame"] == frame_idx, f"y_{i}"] = (
+                        batch_results.loc[batch_results["frame"] == frame_idx, f"y_{i}"]
+                        - batch_data["Padding"]["Top"].detach().numpy()[j]
+                    ) / batch_data["Scale"].detach().numpy()[j]
 
-        out = pd.concat(out)
-        out.reset_index(drop=True, inplace=True)
-
-        # TODO: Add in support for gaze_columns
-        return Fex(
-            out,
-            au_columns=self.info["au_presence_columns"],
-            emotion_columns=self.info["emotion_model_columns"],
-            facebox_columns=self.info["face_detection_columns"],
-            landmark_columns=self.info["face_landmark_columns"],
-            facepose_columns=self.info["facepose_model_columns"],
-            identity_columns=self.info["identity_model_columns"],
-            detector="Feat",
-            face_model=self.info["face_model"],
-            landmark_model=self.info["landmark_model"],
-            au_model=self.info["au_model"],
-            emotion_model=self.info["emotion_model"],
-            facepose_model=self.info["facepose_model"],
-            identity_model=self.info["identity_model"],
-        )
-
-    @staticmethod
-    def _convert_detector_output(detected_faces, detector_results):
-        """
-        Helper function to convert AU/Emotion detector output into frame by face list of lists.
-        Either face or landmark detector list of list outputs can be used.
-
-        Args:
-            detected_faces (list): list of lists output from face/landmark detector
-            au_results (np.array):, results from au/emotion detectors
-
-        Returns:
-            list_concat: (list of list). The list which contains the number of faces. for example
-            if you process 2 frames and each frame contains 4 faces, it will return:
-                [[xxx,xxx,xxx,xxx],[xxx,xxx,xxx,xxx]]
-        """
-
-        length_index = [len(x) for x in detected_faces]
-
-        list_concat = []
-        new_lens = np.insert(np.cumsum(length_index), 0, 0)
-        for ij in range(len(length_index)):
-            list_concat.append(detector_results[new_lens[ij] : new_lens[ij + 1], :])
-        return list_concat
-
-    @staticmethod
-    def _match_faces_to_poses(faces, faces_pose, poses):
-        """Helper function to match list of lists of faces and poses based on overlap in bounding boxes.
-
-        Sometimes the face detector finds different faces than the pose detector unless the user
-        is using the same detector (i.e., img2pose).
-
-        This function will match the faces and poses and will return nans if more faces are detected then poses.
-        Will only return poses that match faces even if more faces are detected by pose detector.
-
-        Args:
-            faces (list): list of lists of face bounding boxes from face detector
-            faces_pose (list): list of lists of face bounding boxes from pose detector
-            poses (list): list of lists of poses from pose detector
-
-        Returns:
-            faces (list): list of list of faces that have been matched to poses
-            poses (list): list of list of poses that have been matched to faces
-        """
-
-        if len(faces) != len(faces_pose):
-            raise ValueError(
-                "Make sure the number of batches in faces and poses is the same."
-            )
-
-        if is_list_of_lists_empty(faces):
-            # Currently assuming no faces if no face is detected. Not running pose
-            return (faces, poses)
-
-        else:
-            overlap_faces = []
-            overlap_poses = []
-            for frame_face, frame_face_pose, frame_pose in zip(faces, faces_pose, poses):
-                if not frame_face:
-                    n_faces = 0
-                elif isinstance(frame_face[0], list):
-                    n_faces = len(frame_face)
-                else:
-                    n_faces = 1
-
-                if not frame_face_pose:
-                    n_poses = 0
-                elif isinstance(frame_face_pose[0], list):
-                    n_poses = len(frame_face_pose)
-                else:
-                    n_poses = 1
-
-                frame_overlap = np.zeros([n_faces, n_poses])
-
-                if n_faces == 0:
-                    overlap_faces.append([])
-                    overlap_poses.append([])
-
-                elif (n_faces == 1) & (n_poses > 1):
-                    b1 = BBox(frame_face[0][:-1])
-
-                    for pose_idx in range(n_poses):
-                        b2 = BBox(frame_face_pose[pose_idx][:-1])
-                        frame_overlap[0, pose_idx] = b1.overlap(b2)
-                    matched_pose_index = np.where(
-                        frame_overlap[0, :] == frame_overlap[0, :].max()
-                    )[0][0]
-                    overlap_faces.append(frame_face)
-                    overlap_poses.append([frame_pose[matched_pose_index]])
-
-                elif (n_faces > 1) & (n_poses == 1):
-                    b2 = BBox(frame_face_pose[0][:-1])
-                    for face_idx in range(n_faces):
-                        b1 = BBox(frame_face[face_idx][:-1])
-                        frame_overlap[face_idx, 0] = b1.overlap(b2)
-                    matched_face_index = np.where(
-                        frame_overlap[:, 0] == frame_overlap[:, 0].max()
-                    )[0][0]
-                    new_poses = []
-                    for f_idx in range(n_faces):
-                        if f_idx == matched_face_index:
-                            new_poses.append(frame_pose[0])
-                        else:
-                            new_poses.append(np.ones(3) * np.nan)
-                    overlap_faces.append(frame_face)
-                    overlap_poses.append(new_poses)
-
-                else:
-                    for face_idx in range(n_faces):
-                        b1 = BBox(frame_face[face_idx][:-1])
-                        for pose_idx in range(n_poses):
-                            b2 = BBox(frame_face_pose[pose_idx][:-1])
-                            frame_overlap[face_idx, pose_idx] = b1.overlap(b2)
-
-                    overlap_faces_frame = []
-                    overlap_poses_frame = []
-                    if n_faces < n_poses:
-                        for face_idx in range(n_faces):
-                            pose_idx = np.where(
-                                frame_overlap[face_idx, :]
-                                == frame_overlap[face_idx, :].max()
-                            )[0][0]
-                            overlap_faces_frame.append(frame_face[face_idx])
-                            overlap_poses_frame.append(frame_pose[pose_idx])
-                    elif n_faces > n_poses:
-                        matched_pose_index = []
-                        for pose_idx in range(n_poses):
-                            matched_pose_index.append(
-                                np.where(
-                                    frame_overlap[:, pose_idx]
-                                    == frame_overlap[:, pose_idx].max()
-                                )[0][0]
-                            )
-                        for face_idx in range(n_faces):
-                            overlap_faces_frame.append(frame_face[face_idx])
-                            if face_idx in matched_pose_index:
-                                overlap_poses_frame.append(
-                                    frame_pose[
-                                        np.where(
-                                            frame_overlap[face_idx, :]
-                                            == frame_overlap[face_idx, :].max()
-                                        )[0][0]
-                                    ]
-                                )
-                            else:
-                                overlap_poses_frame.append(np.ones(3) * np.nan)
-                    elif n_faces == n_poses:
-                        overlap_faces_frame = frame_face
-                        overlap_poses_frame = frame_pose
-
-                    overlap_faces.append(overlap_faces_frame)
-                    overlap_poses.append(overlap_poses_frame)
-
-            return (overlap_faces, overlap_poses)
+            batch_output.append(batch_results)
+            frame_counter += 1 * batch_size
+        batch_output = pd.concat(batch_output)
+        batch_output.reset_index(drop=True, inplace=True)
+        if data_type.lower() == "video":
+            batch_output["approx_time"] = [
+                dataset.calc_approx_frame_time(x)
+                for x in batch_output["frame"].to_numpy()
+            ]
+        batch_output.compute_identities(threshold=face_identity_threshold, inplace=True)
+        return batch_output
diff --git a/feat/plotting.py b/feat/plotting.py
index cc8fba6c..5b873dd1 100644
--- a/feat/plotting.py
+++ b/feat/plotting.py
@@ -27,7 +27,6 @@
 from scipy.spatial import ConvexHull
 import torchvision.transforms as transforms
 from torchvision.utils import draw_keypoints, draw_bounding_boxes, make_grid
-from feat.utils.mp_plotting import FaceLandmarksConnections
 
 __all__ = [
     "draw_lineface",
@@ -1560,154 +1559,3 @@ def extract_face_from_landmarks(frame, landmarks, face_size=112):
     masked_image = mask_image(aligned_img, mask)
 
     return (masked_image, new_landmarks)
-
-
-def plot_face_landmarks(
-    fex,
-    frame_idx,
-    ax=None,
-    oval_color="white",
-    oval_linestyle="-",
-    oval_linewidth=3,
-    tesselation_color="gray",
-    tesselation_linestyle="-",
-    tesselation_linewidth=1,
-    mouth_color="white",
-    mouth_linestyle="-",
-    mouth_linewidth=3,
-    eye_color="navy",
-    eye_linestyle="-",
-    eye_linewidth=2,
-    iris_color="skyblue",
-    iris_linestyle="-",
-    iris_linewidth=2,
-):
-    """Plots face landmarks on the given frame using specified styles for each part.
-
-    Args:
-        fex: DataFrame containing face landmarks (x, y coordinates).
-        frame_idx: Index of the frame to plot.
-        ax: Matplotlib axis to draw on. If None, a new axis is created.
-        oval_color, tesselation_color, mouth_color, eye_color, iris_color: Colors for each face part.
-        oval_linestyle, tesselation_linestyle, mouth_linestyle, eye_linestyle, iris_linestyle: Linestyle for each face part.
-        oval_linewidth, tesselation_linewidth, mouth_linewidth, eye_linewidth, iris_linewidth: Linewidth for each face part.
-        n_faces: Number of faces in the frame. If None, will be determined from fex.
-    """
-    if ax is None:
-        fig, ax = plt.subplots(figsize=(10, 10))
-
-    # Get frame data
-    fex_frame = fex.query("frame == @frame_idx")
-    n_faces_frame = fex_frame.shape[0]
-
-    # Add the frame image
-    ax.imshow(Image.open(fex_frame["input"].unique()[0]))
-
-    # Helper function to draw lines for a set of connections
-    def draw_connections(face_idx, connections, color, linestyle, linewidth):
-        for connection in connections:
-            start = connection.start
-            end = connection.end
-            line = plt.Line2D(
-                [fex.loc[face_idx, f"x_{start}"], fex.loc[face_idx, f"x_{end}"]],
-                [fex.loc[face_idx, f"y_{start}"], fex.loc[face_idx, f"y_{end}"]],
-                color=color,
-                linestyle=linestyle,
-                linewidth=linewidth,
-            )
-            ax.add_line(line)
-
-    # Face tessellation
-    for face in range(n_faces_frame):
-        draw_connections(
-            face,
-            FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION,
-            tesselation_color,
-            tesselation_linestyle,
-            tesselation_linewidth,
-        )
-
-    # Mouth
-    for face in range(n_faces_frame):
-        draw_connections(
-            face,
-            FaceLandmarksConnections.FACE_LANDMARKS_LIPS,
-            mouth_color,
-            mouth_linestyle,
-            mouth_linewidth,
-        )
-
-    # Left iris
-    for face in range(n_faces_frame):
-        draw_connections(
-            face,
-            FaceLandmarksConnections.FACE_LANDMARKS_LEFT_IRIS,
-            iris_color,
-            iris_linestyle,
-            iris_linewidth,
-        )
-
-    # Left eye
-    for face in range(n_faces_frame):
-        draw_connections(
-            face,
-            FaceLandmarksConnections.FACE_LANDMARKS_LEFT_EYE,
-            eye_color,
-            eye_linestyle,
-            eye_linewidth,
-        )
-
-    # Left eyebrow
-    for face in range(n_faces_frame):
-        draw_connections(
-            face,
-            FaceLandmarksConnections.FACE_LANDMARKS_LEFT_EYEBROW,
-            eye_color,
-            eye_linestyle,
-            eye_linewidth,
-        )
-
-    # Right iris
-    for face in range(n_faces_frame):
-        draw_connections(
-            face,
-            FaceLandmarksConnections.FACE_LANDMARKS_RIGHT_IRIS,
-            iris_color,
-            iris_linestyle,
-            iris_linewidth,
-        )
-
-    # Right eye
-    for face in range(n_faces_frame):
-        draw_connections(
-            face,
-            FaceLandmarksConnections.FACE_LANDMARKS_RIGHT_EYE,
-            eye_color,
-            eye_linestyle,
-            eye_linewidth,
-        )
-
-    # Right eyebrow
-    for face in range(n_faces_frame):
-        draw_connections(
-            face,
-            FaceLandmarksConnections.FACE_LANDMARKS_RIGHT_EYEBROW,
-            eye_color,
-            eye_linestyle,
-            eye_linewidth,
-        )
-
-    # Face oval
-    for face in range(n_faces_frame):
-        draw_connections(
-            face,
-            FaceLandmarksConnections.FACE_LANDMARKS_FACE_OVAL,
-            oval_color,
-            oval_linestyle,
-            oval_linewidth,
-        )
-
-    # Optionally turn off axis for a clean plot
-    ax.axis("off")
-
-    return ax
diff --git a/feat/tests/performance_testing.py b/feat/tests/performance_testing.py
index ecc41e17..d5cde0c1 100644
--- a/feat/tests/performance_testing.py
+++ b/feat/tests/performance_testing.py
@@ -1,5 +1,5 @@
 # %%
-from feat.FastDetector import MPDetector
+from feat.MPDetector import MPDetector
 import os
 from feat.utils.io import get_test_data_path
 import cProfile
@@ -7,7 +7,6 @@
 
 multi_face = os.path.join(get_test_data_path(), "multi_face.jpg")
 
-# detector = FastDetector()
 detector = MPDetector(device="mps", emotion_model="resmasknet", identity_model="facenet")
 
 # detector.detect(multi_face, data_type='image')
diff --git a/feat/tests/test_fast_detector.py b/feat/tests/test_detector.py
similarity index 98%
rename from feat/tests/test_fast_detector.py
rename to feat/tests/test_detector.py
index 0c4aece7..e79c41ae 100644
--- a/feat/tests/test_fast_detector.py
+++ b/feat/tests/test_detector.py
@@ -1,5 +1,5 @@
 import pytest
-from feat.FastDetector import FastDetector
+from feat.detector import Detector
 from feat.data import Fex
 from huggingface_hub import PyTorchModelHubMixin
 import numpy as np
@@ -21,10 +21,10 @@
     "face_noface_mov",
     "noface_face_mov",
 )
-class Test_Fast_Detector:
+class Test_Detector:
     """Test new single model detector"""
 
-    detector = FastDetector(device="cpu")
+    detector = Detector(device="cpu")
 
     def test_init(self):
         assert isinstance(self.detector, PyTorchModelHubMixin)
@@ -122,7 +122,7 @@ def test_fast_detection_and_batching_with_diff_img_sizes(
     def test_fast_init_with_wrongmodelname(self):
         """Should fail with unsupported model name"""
         with pytest.raises(ValueError):
-            _ = FastDetector(emotion_model="badmodelname")
+            _ = Detector(emotion_model="badmodelname")
 
     def test_fast_nofile(self):
         """Should fail with missing data"""