From aa1e5ea6677aa436217fe5b7100989e8a9e6a702 Mon Sep 17 00:00:00 2001
From: Ahmed Al-Hindawi <a.al-hindawi@imperial.ac.uk>
Date: Mon, 5 Nov 2018 14:37:15 +0000
Subject: [PATCH 1/8] Cleaned up extract_landmarks_method.py Fixed
 estimate_gaze.launch

---
 rt_gene/launch/estimate_gaze.launch           |   4 +-
 .../src/rt_gene/extract_landmarks_method.py   | 231 +++---------------
 2 files changed, 41 insertions(+), 194 deletions(-)
diff --git a/rt_gene/launch/estimate_gaze.launch b/rt_gene/launch/estimate_gaze.launch
index 90fdbc8..93073ce 100644
--- a/rt_gene/launch/estimate_gaze.launch
+++ b/rt_gene/launch/estimate_gaze.launch
@@ -1,5 +1,5 @@
 <launch>
-    <arg name="use_mtcnn" default="True" />
+    <arg name="use_mtcnn" default="False" />
 
     <arg name="interpupillary_distance" default="0.058"/>
     <arg name="start_rviz" default="False"/>
@@ -24,7 +24,7 @@
             args="0.0 0.0 0.0 0.0 0.0 0.0 1.0 $(arg rgb_frame_id_ros) $(arg rgb_frame_id) 100" unless="$(arg is_frame_rotated)" />
 
       <node pkg="rt_gene" type="extract_landmarks_node.py" name="extract_landmarks_new" output="screen">
-          <param name="use_mtcnn" value="$(arg use_mtcnn" />
+          <param name="use_mtcnn" value="$(arg use_mtcnn)" />
           <param name="rgb_frame_id" value="$(arg rgb_frame_id)" />
           <param name="rgb_frame_id_ros" value="$(arg rgb_frame_id_ros)" />
           <param name="tf_prefix" value="$(arg tf_prefix)" />
diff --git a/rt_gene/src/rt_gene/extract_landmarks_method.py b/rt_gene/src/rt_gene/extract_landmarks_method.py
index 827fcb7..354dc99 100644
--- a/rt_gene/src/rt_gene/extract_landmarks_method.py
+++ b/rt_gene/src/rt_gene/extract_landmarks_method.py
@@ -48,6 +48,9 @@
 from rt_gene.msg import MSG_SubjectImagesList
 from rt_gene.subject_ros_bridge import SubjectListBridge
 
+import face_alignment
+from face_alignment.detection.sfd import FaceDetector
+
 
 class SubjectDetected(object):
     def __init__(self, face_bb):
@@ -102,71 +105,15 @@ def __init__(self):
         # multiple person faces publication for visualisation
         self.subject_faces_pub = rospy.Publisher("/subjects/faces", Image, queue_size=1)
 
-        self.CNN_INPUT_SIZE = 128
-
         self.model_points = self._get_full_model_points()
 
         self.sess_bb = None
-        self.use_mtcnn = rospy.get_param("~use_mtcnn", True)
-        if self.use_mtcnn:
-            self.minsize = 40  # minimum size of face
-            self.threshold = [0.6, 0.7, 0.7]  # three steps's threshold
-            self.factor = 0.709  # scale factor
-            self.pnet, self.rnet, self.onet = self.load_mtcnn_network()
-        else:
-            rospy.logwarn(
-                "This doesn't work with the currently shipped version of ROS_OpenCV (3.3.1-dev) but requires OpenCV-3.3.1")
-            dnn_proto_txt = os.path.join(rospkg.RosPack().get_path('rt_gene'), 'model_nets/dnn_deploy.prototxt')
-            dnn_model = os.path.join(rospkg.RosPack().get_path('rt_gene'),
-                                     'model_nets/res10_300x300_ssd_iter_140000.caffemodel')
-            self.face_net = cv2.dnn.readNetFromCaffe(dnn_proto_txt, dnn_model)
-
-        mark_model = os.path.join(rospkg.RosPack().get_path('rt_gene'), 'model_nets/frozen_inference_graph.pb')
-
-        detection_graph = tensorflow.Graph()
-        with detection_graph.as_default():
-            od_graph_def = tensorflow.GraphDef()
-            with tensorflow.gfile.GFile(mark_model, 'rb') as fid:
-                serialized_graph = fid.read()
-                od_graph_def.ParseFromString(serialized_graph)
-                tensorflow.import_graph_def(od_graph_def, name='')
-        self.graph = detection_graph
-
-        gpu_options = tensorflow.GPUOptions(allow_growth=True, visible_device_list="0",
-                                            per_process_gpu_memory_fraction=0.3)
-        self.sess = tensorflow.Session(graph=detection_graph, config=tensorflow.ConfigProto(gpu_options=gpu_options,
-                                                                                            log_device_placement=False,
-                                                                                            inter_op_parallelism_threads=1,
-                                                                                            intra_op_parallelism_threads=1
-                                                                                            ))
-        self.color_sub = rospy.Subscriber("/image", Image, self.callback, buff_size=2 ** 24, queue_size=1)
-
-        hopenet_snapshot_path = os.path.join(rospkg.RosPack().get_path('rt_gene'),
-                                     'model_nets/hopenet_robust_alpha1.pkl')
-
-        # ResNet50 structure
-        self.hopenet = Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)
-
-        print('Loading Hopenet Snapshot.')
-        # Load snapshot
-        saved_state_dict = torch.load(hopenet_snapshot_path)
-        self.hopenet.load_state_dict(saved_state_dict)
+        self.face_net = FaceDetector(device="cuda:0")
 
-        print('Loading data.')
-        idx_tensor = [idx for idx in range(66)]
-        self.idx_tensor = torch.FloatTensor(idx_tensor).cuda(0)
-
-        self.transformations = transforms.Compose([transforms.Resize(224),
-                                                   transforms.CenterCrop(224), transforms.ToTensor(),
-                                                   transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                                                                        std=[0.229, 0.224, 0.225])])
-
-        self.hopenet.cuda(0)
-        self.hopenet.eval()
+        self.color_sub = rospy.Subscriber("/image", Image, self.callback, buff_size=2 ** 24, queue_size=1)
 
-    def __del__(self):
-        if self.sess is not None:
-            self.sess.close()
+        self.facial_landmark_nn = face_alignment.FaceAlignment(landmarks_type=face_alignment.LandmarksType._2D,
+                                                               device="cuda:0", flip_input=True)
 
     def _get_full_model_points(self):
         """Get all 68 3D model points from file"""
@@ -186,48 +133,18 @@ def _get_full_model_points(self):
 
         return model_points
 
-    def get_face_bb_mtcnn(self, image, _):
-        import rt_gene.detect_face as detect_face
-        bounding_boxes, landmark_points = detect_face.detect_face(image, self.minsize, self.pnet, self.rnet, self.onet,
-                                                                  self.threshold, self.factor)
-
-        faceboxes = []
-        for box in bounding_boxes[:, 0:4].astype(np.uint32):
-            # Move box down.
-            diff_height_width = (box[3] - box[1]) - (box[2] - box[0])
-            offset_y = int(abs(diff_height_width / 2))
-            box_moved = self.move_box(box, [0, offset_y])
-
-            # Make box square.
-            facebox = self.get_square_box(box_moved)
-
-            if self.box_in_image(facebox, image):
-                faceboxes.append(facebox)
-        return faceboxes
-
-    # noinspection PyUnusedLocal
-    def get_face_bb(self, image, timestamp):
-        rows, cols, _ = image.shape
-        threshold = 0.5
+    def get_face_bb(self, image):
+        threshold = 0.8
 
         faceboxes = []
 
-        resized = cv2.resize(image, (300, 300))
-        resized = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
-
-        self.face_net.setInput(cv2.dnn.blobFromImage(
-            resized, 1.0, (300, 300), (104.0, 177.0, 123.0), False, False))
-        detections = self.face_net.forward()
+        detections = self.face_net.detect_from_image(image)
 
-        for result in detections[0, 0, :, :]:
-            confidence = result[2]
+        for result in detections:
+            x_left_top, y_left_top, x_right_bottom, y_right_bottom, confidence = result
             if confidence > threshold:
-                x_left_bottom = int(result[3] * cols)
-                y_left_bottom = int(result[4] * rows)
-                x_right_top = int(result[5] * cols)
-                y_right_top = int(result[6] * rows)
 
-                box = [x_left_bottom, y_left_bottom, x_right_top, y_right_top]
+                box = [x_left_top, y_left_top, x_right_bottom, y_right_bottom]
                 diff_height_width = (box[3] - box[1]) - (box[2] - box[0])
                 offset_y = int(abs(diff_height_width / 2))
                 box_moved = self.move_box(box, [0, offset_y])
@@ -284,9 +201,6 @@ def get_square_box(box):
             if diff % 2 == 1:
                 bottom_y += 1
 
-        # Make sure box is always square.
-        assert ((right_x - left_x) == (bottom_y - top_y)), 'Box is not square.'
-
         return [left_x, top_y, right_x, bottom_y]
 
     def process_image(self, color_msg):
@@ -307,7 +221,7 @@ def process_image(self, color_msg):
             tqdm.write("No face found")
             return
 
-        self.get_eye_image()    # update self.subjects
+        self.get_eye_image()  # update self.subjects
 
         final_head_pose_images = None
         for subject_id, subject in self.subjects.items():
@@ -324,19 +238,21 @@ def process_image(self, color_msg):
                     cov_process=0.1,
                     cov_measure=0.1) for _ in range(6)]
 
-            success, _, translation_vector = self.get_head_pose(color_img, subject.marks, subject_id)
-            rotation_vector = self.get_head_pose_deep_net(color_img, subject)
+            success, rotation_vector, translation_vector = self.get_head_pose(color_img, subject.marks, subject_id)
 
             # Publish all the data
             translation_headpose_tf = self.get_head_translation(timestamp, subject_id)
 
             if success:
                 if translation_headpose_tf is not None:
-                    euler_angles_head = self.publish_pose(timestamp, translation_headpose_tf, rotation_vector, subject_id)
+                    euler_angles_head = self.publish_pose(timestamp, translation_headpose_tf, rotation_vector,
+                                                          subject_id)
 
                     if euler_angles_head is not None:
-                        headpose_image = self.visualize_headpose_result(subject.face_color, gaze_tools.get_phi_theta_from_euler(euler_angles_head))
-                        
+                        headpose_image = self.visualize_headpose_result(subject.face_color,
+                                                                        gaze_tools.get_phi_theta_from_euler(
+                                                                            euler_angles_head))
+
                         if final_head_pose_images is None:
                             final_head_pose_images = headpose_image
                         else:
@@ -350,38 +266,8 @@ def process_image(self, color_msg):
             headpose_image_ros = self.bridge.cv2_to_imgmsg(final_head_pose_images, "bgr8")
             headpose_image_ros.header.stamp = timestamp
             self.subject_faces_pub.publish(headpose_image_ros)
-                
-        tqdm.write('Elapsed total: ' + str(time.time() - start_time) + '\n\n')
-
-    def get_head_pose_deep_net(self, frame, subject):
-
-        img = frame[subject.face_bb[1]: subject.face_bb[3], subject.face_bb[0]: subject.face_bb[2]]
-
-        img = pilImage.fromarray(img)
 
-        # Transform
-        img = self.transformations(img)
-        img_shape = img.size()
-        img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
-        img = Variable(img).cuda(0)
-
-        yaw, pitch, roll = self.hopenet(img)
-
-        yaw_predicted = F.softmax(yaw, dim=1)
-        pitch_predicted = F.softmax(pitch, dim=1)
-        roll_predicted = F.softmax(roll, dim=1)
-        # Get continuous predictions in degrees.
-        yaw_predicted = torch.sum(yaw_predicted.data[0] * self.idx_tensor) * 3 - 99
-        pitch_predicted = torch.sum(pitch_predicted.data[0] * self.idx_tensor) * 3 - 99
-        roll_predicted = torch.sum(roll_predicted.data[0] * self.idx_tensor) * 3 - 99
-
-        p = pitch_predicted * np.pi / 180
-        y = -(yaw_predicted * np.pi / 180)
-        r = roll_predicted * np.pi / 180
-
-        quat = tf_transformations.quaternion_from_euler(r + np.pi, p + np.pi, y)
-
-        return quat
+        tqdm.write('Elapsed total: ' + str(time.time() - start_time) + '\n\n')
 
     def get_head_pose(self, color_img, landmarks, subject_id):
         """
@@ -406,7 +292,8 @@ def get_head_pose(self, color_img, landmarks, subject_id):
         # tqdm.write("Camera Matrix :\n {0}".format(camera_matrix))
 
         try:
-            if self.last_rvec[subject_id] is not None and self.last_tvec[subject_id] is not None and self.use_previous_headpose_estimate:
+            if self.last_rvec[subject_id] is not None and self.last_tvec[
+                subject_id] is not None and self.use_previous_headpose_estimate:
                 (success, rotation_vector_unstable, translation_vector_unstable) = \
                     cv2.solvePnP(self.model_points, image_points_headpose, camera_matrix, dist_coeffs,
                                  flags=cv2.SOLVEPNP_ITERATIVE, useExtrinsicGuess=True,
@@ -443,10 +330,10 @@ def get_head_pose(self, color_img, landmarks, subject_id):
         return success, rot_head, translation_vector
 
     def publish_subject_list(self, timestamp, subjects):
-        assert(subjects is not None)
-        
+        assert (subjects is not None)
+
         subject_list_message = self.__subject_bridge.images_to_msg(subjects, timestamp)
-            
+
         self.subject_pub.publish(subject_list_message)
 
     @staticmethod
@@ -510,20 +397,6 @@ def callback(self, color_msg):
             else:
                 raise e
 
-    def detect_marks(self, image_np):
-        """Detect marks from image"""
-        # Get result tensor by its name.
-        logits_tensor = self.graph.get_tensor_by_name('logits/BiasAdd:0')
-
-        # Actual detection.
-        predictions = self.sess.run(logits_tensor, feed_dict={'input_image_tensor:0': image_np})
-
-        # Convert predictions to landmarks.
-        marks = np.array(predictions).flatten()
-        marks = np.reshape(marks, (-1, 2))
-
-        return marks
-
     def __update_subjects(self, new_faceboxes):
         """
         Assign the new faces to the existing subjects (id tracking)
@@ -556,33 +429,24 @@ def __update_subjects(self, new_faceboxes):
 
     def __detect_landmarks_one_box(self, facebox, color_img):
         try:
-            face_img = color_img[facebox[1]: facebox[3], facebox[0]: facebox[2]]
-            face_img = cv2.resize(face_img, (self.CNN_INPUT_SIZE, self.CNN_INPUT_SIZE))
-            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
-
-            marks = self.detect_marks(face_img)
-
-            marks_orig = np.array(marks, copy=True)
-            marks_orig *= self.CNN_INPUT_SIZE
-
-            # Convert the marks locations from local CNN to global image.
-            marks *= (facebox[2] - facebox[0])
-            marks[:, 0] += facebox[0]
-            marks[:, 1] += facebox[1]
+            _bb = map(int, facebox)
+            face_img = color_img[_bb[1]: _bb[3], _bb[0]: _bb[2]]
+            marks_orig = np.array(self.__detect_facial_landmarks(color_img, facebox)[0])
 
             eye_indices = np.array([36, 39, 42, 45])
 
             transformed_landmarks = marks_orig[eye_indices]
 
-            return face_img, transformed_landmarks, marks
+            return face_img, transformed_landmarks, marks_orig
         except Exception:
             return None, None, None
 
+    def __detect_facial_landmarks(self, color_img, facebox):
+        marks = self.facial_landmark_nn.get_landmarks(color_img, detected_faces=[facebox])
+        return marks
+
     def detect_landmarks(self, color_img, timestamp):
-        if self.use_mtcnn:
-            faceboxes = self.get_face_bb_mtcnn(color_img, timestamp)
-        else:
-            faceboxes = self.get_face_bb(color_img, timestamp)
+        faceboxes = self.get_face_bb(color_img)
 
         self.__update_subjects(faceboxes)
 
@@ -594,6 +458,7 @@ def detect_landmarks(self, color_img, timestamp):
 
     def __get_eye_image_one(self, transformed_landmarks, face_aligned_color):
         margin_ratio = 1.0
+        print(transformed_landmarks)
 
         try:
             # Get the width of the eye, and compute how big the margin should be according to the width
@@ -633,7 +498,8 @@ def __get_eye_image_one(self, transformed_landmarks, face_aligned_color):
             # So far, we have only ensured that the ratio is correct. Now, resize it to the desired size.
             left_eye_color_resized = scipy.misc.imresize(left_eye_color, self.eye_image_size, interp='bilinear')
             right_eye_color_resized = scipy.misc.imresize(right_eye_color, self.eye_image_size, interp='bilinear')
-        except (ValueError, TypeError):
+        except (ValueError, TypeError) as e:
+            print(e)
             return None, None, None, None
         return left_eye_color_resized, right_eye_color_resized, left_bb, right_bb
 
@@ -670,22 +536,3 @@ def get_image_points_eyes_nose(landmarks):
 
         return (nose_center_x, nose_center_y), \
                (left_eye_center_x, left_eye_center_y), (right_eye_center_x, right_eye_center_y)
-
-    def load_mtcnn_network(self):
-        import rt_gene.detect_face as detect_face
-        import tensorflow
-
-        """Load the MTCNN network."""
-        tqdm.write('Creating networks and loading parameters')
-
-        with tensorflow.Graph().as_default():
-            gpu_memory_fraction = rospy.get_param("~gpu_memory_fraction", 0.05)
-            gpu_options = tensorflow.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
-            # gpu_options = tensorflow.GPUOptions(allow_growth=True, visible_device_list="0")
-            self.sess_bb = tensorflow.Session(config=tensorflow.ConfigProto(gpu_options=gpu_options,
-                                                                            log_device_placement=False))
-            with self.sess_bb.as_default():
-                model_path = rospkg.RosPack().get_path('rt_gene') + '/model_nets'
-                pnet, rnet, onet = detect_face.create_mtcnn(self.sess_bb, model_path)
-        return pnet, rnet, onet
-

From 78871a7a91a7f3c1fe4a9e54747068443e53bc40 Mon Sep 17 00:00:00 2001
From: Ahmed Al-Hindawi <a.al-hindawi@imperial.ac.uk>
Date: Mon, 5 Nov 2018 15:07:21 +0000
Subject: [PATCH 2/8] moved the eye_coordinates to reference top left upper...

---
 rt_gene/src/rt_gene/extract_landmarks_method.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rt_gene/src/rt_gene/extract_landmarks_method.py b/rt_gene/src/rt_gene/extract_landmarks_method.py
index 354dc99..d37ebc8 100644
--- a/rt_gene/src/rt_gene/extract_landmarks_method.py
+++ b/rt_gene/src/rt_gene/extract_landmarks_method.py
@@ -436,6 +436,8 @@ def __detect_landmarks_one_box(self, facebox, color_img):
             eye_indices = np.array([36, 39, 42, 45])
 
             transformed_landmarks = marks_orig[eye_indices]
+            transformed_landmarks[:, 0] -= facebox[0]
+            transformed_landmarks[:, 1] -= facebox[1]
 
             return face_img, transformed_landmarks, marks_orig
         except Exception:

From 7fe3bc57f5831dd022a219f6c784772f6e3dbf37 Mon Sep 17 00:00:00 2001
From: Ahmed Al-Hindawi <a.al-hindawi@imperial.ac.uk>
Date: Mon, 5 Nov 2018 15:28:46 +0000
Subject: [PATCH 3/8] a little cleaning

---
 .../src/rt_gene/extract_landmarks_method.py   | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/rt_gene/src/rt_gene/extract_landmarks_method.py b/rt_gene/src/rt_gene/extract_landmarks_method.py
index d37ebc8..d41a147 100644
--- a/rt_gene/src/rt_gene/extract_landmarks_method.py
+++ b/rt_gene/src/rt_gene/extract_landmarks_method.py
@@ -113,7 +113,7 @@ def __init__(self):
         self.color_sub = rospy.Subscriber("/image", Image, self.callback, buff_size=2 ** 24, queue_size=1)
 
         self.facial_landmark_nn = face_alignment.FaceAlignment(landmarks_type=face_alignment.LandmarksType._2D,
-                                                               device="cuda:0", flip_input=True)
+                                                               device="cuda:0", flip_input=False)
 
     def _get_full_model_points(self):
         """Get all 68 3D model points from file"""
@@ -134,15 +134,15 @@ def _get_full_model_points(self):
         return model_points
 
     def get_face_bb(self, image):
-        threshold = 0.8
-
         faceboxes = []
 
+        start_time = time.time()
         detections = self.face_net.detect_from_image(image)
+        print("Time: {}".format(1/(time.time() - start_time)))
 
         for result in detections:
             x_left_top, y_left_top, x_right_bottom, y_right_bottom, confidence = result
-            if confidence > threshold:
+            if confidence > 0.8:
 
                 box = [x_left_top, y_left_top, x_right_bottom, y_right_bottom]
                 diff_height_width = (box[3] - box[1]) - (box[2] - box[0])
@@ -284,7 +284,7 @@ def get_head_pose(self, color_img, landmarks, subject_id):
                  translation_vector - see rotation_vector
         """
 
-        image_points_headpose = self.get_image_points_headpose(landmarks)
+        image_points_headpose = landmarks
 
         camera_matrix = self.img_proc.intrinsicMatrix()
         dist_coeffs = self.img_proc.distortionCoeffs()
@@ -460,7 +460,6 @@ def detect_landmarks(self, color_img, timestamp):
 
     def __get_eye_image_one(self, transformed_landmarks, face_aligned_color):
         margin_ratio = 1.0
-        print(transformed_landmarks)
 
         try:
             # Get the width of the eye, and compute how big the margin should be according to the width
@@ -498,8 +497,8 @@ def __get_eye_image_one(self, transformed_landmarks, face_aligned_color):
             #     cv2.circle(face_aligned_color, (int(p[0]), int(p[1])), 3, (0, 0, 255), -1)
 
             # So far, we have only ensured that the ratio is correct. Now, resize it to the desired size.
-            left_eye_color_resized = scipy.misc.imresize(left_eye_color, self.eye_image_size, interp='bilinear')
-            right_eye_color_resized = scipy.misc.imresize(right_eye_color, self.eye_image_size, interp='bilinear')
+            left_eye_color_resized = scipy.misc.imresize(left_eye_color, self.eye_image_size, interp='lanczos')
+            right_eye_color_resized = scipy.misc.imresize(right_eye_color, self.eye_image_size, interp='lanczos')
         except (ValueError, TypeError) as e:
             print(e)
             return None, None, None, None
@@ -521,10 +520,6 @@ def get_eye_image(self):
 
         tqdm.write('New get_eye_image time: ' + str(time.time() - start_time))
 
-    @staticmethod
-    def get_image_points_headpose(landmarks):
-        return landmarks
-
     @staticmethod
     def get_image_points_eyes_nose(landmarks):
         landmarks_x, landmarks_y = landmarks.T[0], landmarks.T[1]

From 20254e2fcc17111ef815f56e482b85f69c6f3154 Mon Sep 17 00:00:00 2001
From: Ahmed Al-Hindawi <a.al-hindawi@imperial.ac.uk>
Date: Mon, 5 Nov 2018 15:59:38 +0000
Subject: [PATCH 4/8] More Cleaning

---
 .../src/rt_gene/extract_landmarks_method.py   | 34 +++++--------------
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/rt_gene/src/rt_gene/extract_landmarks_method.py b/rt_gene/src/rt_gene/extract_landmarks_method.py
index d41a147..b9fbe76 100644
--- a/rt_gene/src/rt_gene/extract_landmarks_method.py
+++ b/rt_gene/src/rt_gene/extract_landmarks_method.py
@@ -9,8 +9,6 @@
 
 from __future__ import print_function, division, absolute_import
 
-import rospy
-import numpy as np
 import cv2
 from cv_bridge import CvBridge, CvBridgeError
 from sensor_msgs.msg import Image, CameraInfo
@@ -19,25 +17,11 @@
 import tf.transformations as tf_transformations
 from geometry_msgs.msg import PointStamped, Point
 from tf import TransformBroadcaster, TransformListener, ExtrapolationException
-import time
-import os
-import tensorflow
-import rospkg
 import scipy
 
 import time
 import rospkg
 import rospy
-import os
-import torch
-from torch.autograd import Variable
-from torchvision import transforms
-import torch.backends.cudnn as cudnn
-import torchvision
-import torch.nn.functional as F
-from PIL import Image as pilImage
-
-from rt_gene.Hopenet import Hopenet
 
 import numpy as np
 
@@ -137,12 +121,15 @@ def get_face_bb(self, image):
         faceboxes = []
 
         start_time = time.time()
+        fraction = 4
+        image = scipy.misc.imresize(image, 1.0/fraction)
         detections = self.face_net.detect_from_image(image)
-        print("Time: {}".format(1/(time.time() - start_time)))
+        tqdm.write("Face Detector Frequency: {:.2f}Hz".format(1/(time.time() - start_time)))
 
         for result in detections:
-            x_left_top, y_left_top, x_right_bottom, y_right_bottom, confidence = result
-            if confidence > 0.8:
+            x_left_top, y_left_top, x_right_bottom, y_right_bottom, confidence = result*fraction
+
+            if confidence > 0.8*fraction:
 
                 box = [x_left_top, y_left_top, x_right_bottom, y_right_bottom]
                 diff_height_width = (box[3] - box[1]) - (box[2] - box[0])
@@ -210,7 +197,7 @@ def process_image(self, color_msg):
 
         start_time = time.time()
 
-        color_img = gaze_tools.convert_image(color_msg, "rgb8")
+        color_img = gaze_tools.convert_image(color_msg, "bgr8")
         timestamp = color_msg.header.stamp
 
         self.detect_landmarks(color_img, timestamp)  # update self.subjects
@@ -238,7 +225,7 @@ def process_image(self, color_msg):
                     cov_process=0.1,
                     cov_measure=0.1) for _ in range(6)]
 
-            success, rotation_vector, translation_vector = self.get_head_pose(color_img, subject.marks, subject_id)
+            success, rotation_vector, translation_vector = self.get_head_pose(subject.marks, subject_id)
 
             # Publish all the data
             translation_headpose_tf = self.get_head_translation(timestamp, subject_id)
@@ -269,14 +256,13 @@ def process_image(self, color_msg):
 
         tqdm.write('Elapsed total: ' + str(time.time() - start_time) + '\n\n')
 
-    def get_head_pose(self, color_img, landmarks, subject_id):
+    def get_head_pose(self, landmarks, subject_id):
         """
         We are given a set of 2D points in the form of landmarks. The basic idea is that we assume a generic 3D head
         model, and try to fit the 2D points to the 3D model based on the Levenberg-Marquardt optimization. We can use
         OpenCV's implementation of SolvePnP for this.
         This method is inspired by http://www.learnopencv.com/head-pose-estimation-using-opencv-and-dlib/
         We are planning to replace this with our own head pose estimator.
-        :param color_img: RGB Image
         :param landmarks: Landmark positions to be used to determine head pose
         :return: success - Whether the pose was successfully extracted
                  rotation_vector - rotation vector that along with translation vector brings points from the model
@@ -289,8 +275,6 @@ def get_head_pose(self, color_img, landmarks, subject_id):
         camera_matrix = self.img_proc.intrinsicMatrix()
         dist_coeffs = self.img_proc.distortionCoeffs()
 
-        # tqdm.write("Camera Matrix :\n {0}".format(camera_matrix))
-
         try:
             if self.last_rvec[subject_id] is not None and self.last_tvec[
                 subject_id] is not None and self.use_previous_headpose_estimate:

From d03d63b1fed470238bd4a77402a1a0e18f38aca2 Mon Sep 17 00:00:00 2001
From: Ahmed Al-Hindawi <a.al-hindawi@imperial.ac.uk>
Date: Mon, 5 Nov 2018 16:17:27 +0000
Subject: [PATCH 5/8] added correcting factor

---
 rt_gene/scripts/estimate_gaze.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rt_gene/scripts/estimate_gaze.py b/rt_gene/scripts/estimate_gaze.py
index f4a3988..6ed6fd8 100755
--- a/rt_gene/scripts/estimate_gaze.py
+++ b/rt_gene/scripts/estimate_gaze.py
@@ -101,7 +101,9 @@ def estimate_gaze_twoeyes(self, test_input_left, test_input_right, headpose):
                 predictions.append(model.predict({'img_input_L': test_input_left,
                                                   'img_input_R': test_input_right,
                                                   'headpose_input': test_headpose})[0])
-            return np.mean(np.array(predictions), axis=0)
+            mean_prediction = np.mean(np.array(predictions), axis=0)
+            mean_prediction[1] += 0.11
+            return mean_prediction
 
     def visualize_eye_result(self, eye_image, est_gaze):
         """Here, we take the original eye eye_image and overlay the estimated gaze."""

From a85089128f66da2b24e25cb5252d2979d1ed6f85 Mon Sep 17 00:00:00 2001
From: Ahmed Al-Hindawi <a.al-hindawi@imperial.ac.uk>
Date: Mon, 5 Nov 2018 18:45:07 +0000
Subject: [PATCH 6/8] Updated README.md to reflect new face-alignment
 requirement Removed +0.11 to mean_prediction as that affects ensemble models

---
 rt_gene/README.md                | 8 +-------
 rt_gene/scripts/estimate_gaze.py | 2 +-
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/rt_gene/README.md b/rt_gene/README.md
index 4250c93..bca628c 100644
--- a/rt_gene/README.md
+++ b/rt_gene/README.md
@@ -20,7 +20,7 @@ More information can be found on the Personal Robotic Lab's website: <https://ww
 ## Manual installation
 1) Download, install, and configure ROS (full installation; we recommend the Kinectic distribution of ROS): http://wiki.ros.org/kinetic/Installation
 1) `sudo apt-get install python-catkin-tools ros-$ROS_DISTRO-ros-numpy ros-$ROS_DISTRO-camera-info-manager-py ros-$ROS_DISTRO-uvc-camera`
-1) `pip install tensorflow-gpu keras numpy scipy tqdm torch torchvision Pillow`
+1) `pip install tensorflow-gpu keras numpy scipy tqdm torch torchvision Pillow face-alignment`
 1) `cd $HOME/catkin_ws/src && git clone https://github.com/Tobias-Fischer/rt_gene.git`
 1) `cd $HOME/catkin_ws && catkin build`
 
@@ -32,12 +32,6 @@ More information can be found on the Personal Robotic Lab's website: <https://ww
     - `wget https://imperialcollegelondon.box.com/shared/static/hmcoxopu4xetic5bm47xqrl5mqktpg92.h5 -O $(rospack find rt_gene)/model_nets/all_subjects_mpii_prl_utmv_3_02`
 - Finally, open `$(rospack find rt_gene)/launch/estimate_gaze.launch` and comment out `<rosparam param="model_files">['model_nets/Model_allsubjects1.h5']</rosparam>` and uncomment `<!--rosparam param="model_files">['model_nets/all_subjects_mpii_prl_utmv_0_02.h5', ..., ..., ...</rosparam-->`
 
-## Improved head pose estimation
-This is currently disabled by default as the DNN module in the OpenCV shipped by ROS is broken (see https://github.com/ros-gbp/opencv3-release/issues/20). To enable, build OpenCV3 from sources and link the ROS OpenCV to the one compiled from sources. Then download the following files:
-- `wget https://github.com/yinguobing/head-pose-estimation/raw/master/assets/res10_300x300_ssd_iter_140000.caffemodel -P $(rospack find rt_gene)/model_nets`
-- `wget https://raw.githubusercontent.com/yinguobing/head-pose-estimation/master/assets/deploy.prototxt -P $(rospack find rt_gene)/model_nets`
-
-Finally, set `use_mtcnn` to `False` in `$(rospack find rt_gene)/launch/estimate_gaze.launch`
 
 ## Requirements for live gaze estimation (Kinect One)
 - Follow instructions for https://github.com/code-iai/iai_kinect2
diff --git a/rt_gene/scripts/estimate_gaze.py b/rt_gene/scripts/estimate_gaze.py
index 6ed6fd8..0162e36 100755
--- a/rt_gene/scripts/estimate_gaze.py
+++ b/rt_gene/scripts/estimate_gaze.py
@@ -102,7 +102,7 @@ def estimate_gaze_twoeyes(self, test_input_left, test_input_right, headpose):
                                                   'img_input_R': test_input_right,
                                                   'headpose_input': test_headpose})[0])
             mean_prediction = np.mean(np.array(predictions), axis=0)
-            mean_prediction[1] += 0.11
+            # mean_prediction[1] += 0.11 # when using the ensemble model - they're all off then!
             return mean_prediction
 
     def visualize_eye_result(self, eye_image, est_gaze):

From 59645e4e38fd33a1e2122ebfc1cb06b86d4713d5 Mon Sep 17 00:00:00 2001
From: Ahmed Al-Hindawi <a.al-hindawi@imperial.ac.uk>
Date: Tue, 6 Nov 2018 11:39:52 +0000
Subject: [PATCH 7/8] Added reference to face-alignment package

---
 rt_gene/src/rt_gene/extract_landmarks_method.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rt_gene/src/rt_gene/extract_landmarks_method.py b/rt_gene/src/rt_gene/extract_landmarks_method.py
index b9fbe76..a0ad23c 100644
--- a/rt_gene/src/rt_gene/extract_landmarks_method.py
+++ b/rt_gene/src/rt_gene/extract_landmarks_method.py
@@ -5,6 +5,8 @@
 @Kevin Cortacero <cortacero.k31130@gmail.com>
 @Ahmed Al-Hindawi <a.al-hindawi@imperial.ac.uk>
 Licensed under Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode)
+
+Uses face-alignment package (https://github.com/1adrianb/face-alignment)
 """
 
 from __future__ import print_function, division, absolute_import

From d7555f59def9b1042c8779404109694872c81263 Mon Sep 17 00:00:00 2001
From: Ahmed Al-Hindawi <a.al-hindawi@imperial.ac.uk>
Date: Tue, 6 Nov 2018 11:43:30 +0000
Subject: [PATCH 8/8] removed MTCNN parameters as no longer used within ros
 node

---
 rt_gene/launch/estimate_gaze.launch | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/rt_gene/launch/estimate_gaze.launch b/rt_gene/launch/estimate_gaze.launch
index 93073ce..2855021 100644
--- a/rt_gene/launch/estimate_gaze.launch
+++ b/rt_gene/launch/estimate_gaze.launch
@@ -1,6 +1,4 @@
 <launch>
-    <arg name="use_mtcnn" default="False" />
-
     <arg name="interpupillary_distance" default="0.058"/>
     <arg name="start_rviz" default="False"/>
 
@@ -24,7 +22,6 @@
             args="0.0 0.0 0.0 0.0 0.0 0.0 1.0 $(arg rgb_frame_id_ros) $(arg rgb_frame_id) 100" unless="$(arg is_frame_rotated)" />
 
       <node pkg="rt_gene" type="extract_landmarks_node.py" name="extract_landmarks_new" output="screen">
-          <param name="use_mtcnn" value="$(arg use_mtcnn)" />
           <param name="rgb_frame_id" value="$(arg rgb_frame_id)" />
           <param name="rgb_frame_id_ros" value="$(arg rgb_frame_id_ros)" />
           <param name="tf_prefix" value="$(arg tf_prefix)" />