diff --git a/lib/BackgroundJob/Tasks/CreateClustersTask.php b/lib/BackgroundJob/Tasks/CreateClustersTask.php index e173701c..67f2064d 100644 --- a/lib/BackgroundJob/Tasks/CreateClustersTask.php +++ b/lib/BackgroundJob/Tasks/CreateClustersTask.php @@ -148,13 +148,11 @@ private function createClusterIfNeeded(string $userId) { $min_face_size = $this->settingsService->getMinimumFaceSize(); $min_confidence = $this->settingsService->getMinimumConfidence(); - $faces = array_merge( - $this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence), - $this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence) - ); + $faces = $this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence); + $nonGroupables = $this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence); $facesCount = count($faces); - $this->logInfo('There are ' . $facesCount . ' faces for clustering'); + $this->logInfo('There are ' . $facesCount . ' faces for clustering and '. count($nonGroupables) . ' that cannot be grouped.'); $noSlices = 1; $sliceSize = $facesCount; @@ -169,19 +167,30 @@ private function createClusterIfNeeded(string $userId) { $sliceSize = ceil($facesCount / $noSlices); } - $this->logDebug('We will cluster with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces'); + $this->logDebug('We will cluster these with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces.'); $newClusters = []; + // Obtain the clusters in batches and append them. for ($i = 0; $i < $noSlices ; $i++) { + // Get the batches. $facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize); - $newClusters = array_merge($newClusters, $this->getNewClusters($facesSliced)); + // Get the indices, obtain the partial clusters and incorporate them. + $faceIds = array_map(function ($face) { return $face['id']; }, $facesSliced); + $facesDescripted = $this->faceMapper->findDescriptorsBathed($faceIds); + $newClusters = array_merge($newClusters, $this->getNewClusters($facesDescripted)); + // Discard variables aggressively to improve memory consumption. + unset($facesDescripted); + unset($facesSliced); } + // Append non groupable faces on a single step. + $newClusters = array_merge($newClusters, $this->getFakeClusters($nonGroupables)); + // Cluster is associative array where key is person ID. // Value is array of face IDs. For old clusters, person IDs are some existing person IDs, // and for new clusters is whatever chinese whispers decides to identify them. // - $currentClusters = $this->getCurrentClusters($faces); + $currentClusters = $this->getCurrentClusters(array_merge($faces, $nonGroupables)); $this->logInfo(count($newClusters) . ' clusters found after clustering'); @@ -279,16 +288,26 @@ private function needCreateFirstTime(string $userId, int $modelId): bool { private function getCurrentClusters(array $faces): array { $chineseClusters = array(); foreach($faces as $face) { - if ($face->person !== null) { - if (!isset($chineseClusters[$face->person])) { - $chineseClusters[$face->person] = array(); + if ($face['person'] !== null) { + if (!isset($chineseClusters[$face['person']])) { + $chineseClusters[$face['person']] = array(); } - $chineseClusters[$face->person][] = $face->id; + $chineseClusters[$face['person']][] = $face['id']; } } return $chineseClusters; } + private function getFakeClusters(array $faces): array { + $newClusters = array(); + for ($i = 0, $c = count($faces); $i < $c; $i++) { + $fakeCluster = []; + $fakeCluster[] = $faces[$i]['id']; + $newClusters[] = $fakeCluster; + } + return $newClusters; + } + private function getNewClusters(array $faces): array { // Clustering parameters $sensitivity = $this->settingsService->getSensitivity(); @@ -299,16 +318,9 @@ private function getNewClusters(array $faces): array { $faces_count = count($faces); for ($i = 0; $i < $faces_count; $i++) { $face1 = $faces[$i]; - if (!isset($face1->descriptor)) { - $edges[] = array($i, $i); - continue; - } for ($j = $i; $j < $faces_count; $j++) { $face2 = $faces[$j]; - if (!isset($face2->descriptor)) { - continue; - } - $distance = dlib_vector_length($face1->descriptor, $face2->descriptor); + $distance = dlib_vector_length($face1['descriptor'], $face2['descriptor']); if ($distance < $sensitivity) { $edges[] = array($i, $j); } @@ -324,16 +336,9 @@ private function getNewClusters(array $faces): array { for ($i = 0; $i < $faces_count; $i++) { $face1 = $faces[$i]; - if (!isset($face1->descriptor)) { - $edges[] = array($i, $i); - continue; - } for ($j = $i; $j < $faces_count; $j++) { $face2 = $faces[$j]; - if (!isset($face2->descriptor)) { - continue; - } - $distance = Euclidean::distance($face1->descriptor, $face2->descriptor); + $distance = Euclidean::distance($face1['descriptor'], $face2['descriptor']); if ($distance < $sensitivity) { $edges[] = array($i, $j); } @@ -358,7 +363,7 @@ private function getNewClusters(array $faces): array { if (!isset($newClusters[$newChineseClustersByIndex[$i]])) { $newClusters[$newChineseClustersByIndex[$i]] = array(); } - $newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id; + $newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]['id']; } return $newClusters; } diff --git a/lib/Db/FaceMapper.php b/lib/Db/FaceMapper.php index 0e31ea8c..74ecbd29 100644 --- a/lib/Db/FaceMapper.php +++ b/lib/Db/FaceMapper.php @@ -41,7 +41,7 @@ public function find (int $faceId): ?Face { $qb = $this->db->getQueryBuilder(); $qb->select('id', 'image', 'person', 'x', 'y', 'width', 'height', 'landmarks', 'descriptor', 'confidence') ->from($this->getTableName(), 'f') - ->andWhere($qb->expr()->eq('id', $qb->createNamedParameter($faceId))); + ->where($qb->expr()->eq('id', $qb->createNamedParameter($faceId))); try { return $this->findEntity($qb); } catch (DoesNotExistException $e) { @@ -49,6 +49,27 @@ public function find (int $faceId): ?Face { } } + public function findDescriptorsBathed (array $faceIds): array { + $qb = $this->db->getQueryBuilder(); + $qb->select('id', 'descriptor') + ->from($this->getTableName(), 'f') + ->where($qb->expr()->in('id', $qb->createParameter('face_ids'))); + + $qb->setParameter('face_ids', $faceIds, IQueryBuilder::PARAM_INT_ARRAY); + + $descriptors = []; + $result = $qb->executeQuery(); + while ($row = $result->fetch()) { + $descriptors[] = [ + 'id' => $row['id'], + 'descriptor' => json_decode($row['descriptor']) + ]; + } + $result->closeCursor(); + + return $descriptors; + } + /** * Based on a given fileId, takes all faces that belong to that file * and return an array with that. @@ -147,7 +168,7 @@ public function getFaces(string $userId, int $model): array { public function getGroupableFaces(string $userId, int $model, int $minSize, float $minConfidence): array { $qb = $this->db->getQueryBuilder(); - $qb->select('f.id', 'f.person', 'f.descriptor') + $qb->select('f.id', 'f.person') ->from($this->getTableName(), 'f') ->innerJoin('f', 'facerecog_images' ,'i', $qb->expr()->eq('f.image', 'i.id')) ->where($qb->expr()->eq('user', $qb->createParameter('user'))) @@ -161,7 +182,12 @@ public function getGroupableFaces(string $userId, int $model, int $minSize, floa ->setParameter('min_size', $minSize) ->setParameter('min_confidence', $minConfidence) ->setParameter('is_groupable', true, IQueryBuilder::PARAM_BOOL); - return $this->findEntities($qb); + + $result = $qb->executeQuery(); + $rows = $result->fetchAll(); + $result->closeCursor(); + + return $rows; } public function getNonGroupableFaces(string $userId, int $model, int $minSize, float $minConfidence): array { @@ -182,7 +208,12 @@ public function getNonGroupableFaces(string $userId, int $model, int $minSize, f ->setParameter('min_size', $minSize) ->setParameter('min_confidence', $minConfidence) ->setParameter('is_groupable', false, IQueryBuilder::PARAM_BOOL); - return $this->findEntities($qb); + + $result = $qb->executeQuery(); + $rows = $result->fetchAll(); + $result->closeCursor(); + + return $rows; } /**