Change fileState.txt file path format (#82)

* Change fileState.txt file path format * bugfixed and delete unless natural_sort * add args img_list_natural_sort * fix code style --------- Co-authored-by: aboutibm@163.com <7p=e763wN3A6k+[C> Co-authored-by: Wang Xin <xinwang614@gmail.com>
PFCCLab · Sep 16, 2024 · ae56a99 · ae56a99
1 parent 9c69d7e
commit ae56a99
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 11 deletions.
diff --git a/PPOCRLabel.py b/PPOCRLabel.py
@@ -129,6 +129,7 @@ def __init__(
         self,
         lang="ch",
         gpu=False,
+        img_list_natural_sort=True,
         kie_mode=False,
         default_filename=None,
         default_predefined_class_file=None,
@@ -145,6 +146,7 @@ def __init__(
         settings = self.settings
         self.lang = lang
         self.gpu = gpu
+        self.img_list_natural_sort = img_list_natural_sort
 
         # Load string bundle for i18n
         if lang not in ["ch", "en"]:
@@ -2148,9 +2150,12 @@ def showBoundingBoxFromPPlabel(self, filePath):
             self.canvas.verified = False
 
     def validFilestate(self, filePath):
-        if filePath not in self.fileStatedict.keys():
-            return None
-        elif self.fileStatedict[filePath] == 1:
+        if filePath in self.fileStatedict.keys() and self.fileStatedict[filePath] == 1:
+            return True
+        elif (
+            self.getImglabelidx(filePath) in self.fileStatedict.keys()
+            and self.fileStatedict[self.getImglabelidx(filePath)] == 1
+        ):
             return True
         else:
             return False
@@ -2246,7 +2251,10 @@ def scanAllImages(self, folderPath):
                 relativePath = os.path.join(folderPath, file)
                 path = ustr(os.path.abspath(relativePath))
                 images.append(path)
-        natural_sort(images, key=lambda x: x.lower())
+        if self.img_list_natural_sort:
+            natural_sort(images, key=lambda x: x.lower())
+        else:
+            images.sort()
         return images
 
     def openDirDialog(self, _value=False, dirpath=None, silent=False):
@@ -2473,7 +2481,7 @@ def _saveFile(self, annotationFilePath, mode="Manual"):
                 item = self.fileListWidget.item(currIndex)
                 item.setIcon(newIcon("done"))
 
-                self.fileStatedict[self.filePath] = 1
+                self.fileStatedict[self.getImglabelidx(self.filePath)] = 1
                 if len(self.fileStatedict) % self.autoSaveNum == 0:
                     self.saveFilestate()
                     self.savePPlabel(mode="Auto")
@@ -2753,6 +2761,8 @@ def getImglabelidx(self, filePath):
         else:
             spliter = "/"
         filepathsplit = filePath.split(spliter)[-2:]
+        if len(filepathsplit) == 1:
+            return filePath
         return filepathsplit[0] + "/" + filepathsplit[1]
 
     def autoRecognition(self):
@@ -3241,7 +3251,7 @@ def loadFilestate(self, saveDir):
                 states = f.readlines()
                 for each in states:
                     file, state = each.split("\t")
-                    self.fileStatedict[file] = 1
+                    self.fileStatedict[self.getImglabelidx(file)] = 1
                 self.actions.saveLabel.setEnabled(True)
                 self.actions.saveRec.setEnabled(True)
                 self.actions.exportJSON.setEnabled(True)
@@ -3301,8 +3311,9 @@ def saveRecResult(self):
             QMessageBox.information(self, "Information", "Check the image first")
             return
 
-        rec_gt_dir = os.path.dirname(self.PPlabelpath) + "/rec_gt.txt"
-        crop_img_dir = os.path.dirname(self.PPlabelpath) + "/crop_img/"
+        base_dir = os.path.dirname(self.PPlabelpath)
+        rec_gt_dir = base_dir + "/rec_gt.txt"
+        crop_img_dir = base_dir + "/crop_img/"
         ques_img = []
         if not os.path.exists(crop_img_dir):
             os.mkdir(crop_img_dir)
@@ -3311,7 +3322,8 @@ def saveRecResult(self):
             for key in self.fileStatedict:
                 idx = self.getImglabelidx(key)
                 try:
-                    img = cv2.imdecode(np.fromfile(key, dtype=np.uint8), -1)
+                    img_path = os.path.dirname(base_dir) + "/" + key
+                    img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
                     for i, label in enumerate(self.PPlabel[idx]):
                         if label["difficult"]:
                             continue
@@ -3505,6 +3517,9 @@ def get_main_app(argv=[]):
     arg_parser = argparse.ArgumentParser()
     arg_parser.add_argument("--lang", type=str, default="ch", nargs="?")
     arg_parser.add_argument("--gpu", type=str2bool, default=True, nargs="?")
+    arg_parser.add_argument(
+        "--img_list_natural_sort", type=str2bool, default=True, nargs="?"
+    )
     arg_parser.add_argument("--kie", type=str2bool, default=False, nargs="?")
     arg_parser.add_argument(
         "--predefined_classes_file",
@@ -3518,6 +3533,7 @@ def get_main_app(argv=[]):
     win = MainWindow(
         lang=args.lang,
         gpu=args.gpu,
+        img_list_natural_sort=args.img_list_natural_sort,
         kie_mode=args.kie,
         default_predefined_class_file=args.predefined_classes_file,
     )

diff --git a/README.md b/README.md
@@ -16,7 +16,9 @@ PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field,
 
 ### Recent Update
 
-- 2024.09: Added `Re-recognition` and `Auto Save Unsaved changes` features. For usage details, please refer to the "11. Additional Feature Description" in the "2.1 Operational Steps" section below.
+- 2024.09:
+  - Added `Re-recognition` and `Auto Save Unsaved changes` features. For usage details, please refer to the "11. Additional Feature Description" in the "2.1 Operational Steps" section below.
+  - Added the parameter `--img_list_natural_sort`, which defaults to natural sorting for the left image list. After configuring this parameter, character sorting will be used to easily locate images based on character order.
 - 2022.05: Add table annotations, follow `2.2 Table Annotations` for more information (by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest))
 - 2022.02: (by [PeterH0323](https://github.com/peterh0323))
   - Add KIE Mode by using `--kie`, for [detection + identification + keyword extraction] labeling.

diff --git a/README_ch.md b/README_ch.md
@@ -16,7 +16,9 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具，内置P
 
 #### 近期更新
 
-- 2024.09: 新增`自动重新识别`和`自动保存未提交变更`功能，使用方法详见下方`2.1 操作步骤`的`11. 补充功能说明`
+- 2024.09:
+  - 新增`自动重新识别`和`自动保存未提交变更`功能，使用方法详见下方`2.1 操作步骤`的`11. 补充功能说明`。
+  - 新增`--img_list_natural_sort`参数，默认左侧图片列表使用自然排序，配置该参数后，将使用字符排序，方便根据字符顺序定位图片。
 - 2022.05：**新增表格标注**，使用方法见下方`2.2 表格标注`（by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest)）
 - 2022.02：**新增关键信息标注**、优化标注体验（by [PeterH0323](https://github.com/peterh0323) ）
   - 新增：使用 `--kie` 进入 KIE 功能，用于打【检测+识别+关键字提取】的标签