add bbox auto zoom center and vertex moves independently (#87)

* add bbox auto zoom center and vertex moves independently * code optimization * fix code style --------- Co-authored-by: aboutibm@163.com <7p=e763wN3A6k+[C> Co-authored-by: Wang Xin <xinwang614@gmail.com>
PFCCLab · Sep 22, 2024 · 476b4db · 476b4db
1 parent 3201d08
commit 476b4db
Show file tree

Hide file tree

Showing 5 changed files with 139 additions and 23 deletions.
diff --git a/PPOCRLabel.py b/PPOCRLabel.py
@@ -100,6 +100,8 @@
     newIcon,
     rebuild_html_from_ppstructure_label,
     stepsInfo,
+    polygon_bounding_box_center_and_area,
+    map_value,
     struct,
 )
 from libs.labelColor import label_colormap
@@ -130,6 +132,7 @@ def __init__(
         lang="ch",
         gpu=False,
         img_list_natural_sort=True,
+        bbox_auto_zoom_center=False,
         kie_mode=False,
         default_filename=None,
         default_predefined_class_file=None,
@@ -151,6 +154,7 @@ def __init__(
         self.lang = lang
         self.gpu = gpu
         self.img_list_natural_sort = img_list_natural_sort
+        self.bbox_auto_zoom_center = bbox_auto_zoom_center
 
         # Load string bundle for i18n
         if lang not in ["ch", "en"]:
@@ -1943,7 +1947,7 @@ def addZoom(self, increment=10):
             int(self.zoomWidget.value() + increment)
         )  # set zoom slider value
 
-    def zoomRequest(self, delta):
+    def zoomRequest(self, delta, pos: QPoint = None):
         # get the current scrollbar positions
         # calculate the percentages ~ coordinates
         h_bar = self.scrollBars[Qt.Horizontal]
@@ -1958,8 +1962,10 @@ def zoomRequest(self, delta):
         # where 0 = move left
         #       1 = move right
         # up and down analogous
-        cursor = QCursor()
-        pos = cursor.pos()
+        if pos is None:
+            cursor = QCursor()
+            pos = cursor.pos()
+
         relative_pos = QWidget.mapFromGlobal(self, pos)
 
         cursor_x = relative_pos.x()
@@ -2013,6 +2019,7 @@ def togglePolygons(self, value):
 
     def loadFile(self, filePath=None, isAdjustScale=True):
         """Load the specified file, or the last opened file if None."""
+        self.canvas.shape_move_index = None
         if self.dirty:
             self.mayContinue()
         self.resetState()
@@ -2117,6 +2124,20 @@ def loadFile(self, filePath=None, isAdjustScale=True):
             )
 
             self.canvas.setFocus(True)
+
+            if self.bbox_auto_zoom_center:
+                if len(self.canvas.shapes) > 0:
+                    (
+                        center_x,
+                        center_y,
+                        shape_area,
+                    ) = polygon_bounding_box_center_and_area(
+                        self.canvas.shapes[0].points
+                    )
+                    if shape_area < 30000:
+                        zoom_value = 120 * map_value(shape_area, 100, 30000, 20, 0)
+                        self.zoomRequest(zoom_value, QPoint(center_x, center_y))
+                        # print(" =========> ", shape_area, " ==> ", zoom_value)
             return True
         return False
 
@@ -3548,6 +3569,9 @@ def get_main_app(argv=[]):
     arg_parser.add_argument("--rec_model_dir", type=str, default=None, nargs="?")
     arg_parser.add_argument("--rec_char_dict_path", type=str, default=None, nargs="?")
     arg_parser.add_argument("--cls_model_dir", type=str, default=None, nargs="?")
+    arg_parser.add_argument(
+        "--bbox_auto_zoom_center", type=str2bool, default=False, nargs="?"
+    )
 
     args = arg_parser.parse_args(argv[1:])
 
@@ -3561,6 +3585,7 @@ def get_main_app(argv=[]):
         rec_model_dir=args.rec_model_dir,
         rec_char_dict_path=args.rec_char_dict_path,
         cls_model_dir=args.cls_model_dir,
+        bbox_auto_zoom_center=args.bbox_auto_zoom_center,
     )
     win.show()
     return app, win

diff --git a/README.md b/README.md
@@ -24,6 +24,8 @@ PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field,
     - `rec_model_dir`: Path to the recognition model directory
     - `rec_char_dict_path`: Path to the recognition model dictionary file
     - `cls_model_dir`: Path to the classification model directory
+  - Added the `--bbox_auto_zoom_center` parameter, which can be enabled when there is only one bounding box in the image, automatically centering and zooming in on the bounding box.
+  - Added 5 shortcut keys `z`, `x`, `c`, `v`, `b` for controlling the 4 vertices of the bounding box. For usage details, see the '11. Additional Functionality Description' in "2.1 Operating Procedures" below.
 - 2022.05: Add table annotations, follow `2.2 Table Annotations` for more information (by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest))
 - 2022.02: (by [PeterH0323](https://github.com/peterh0323))
   - Add KIE Mode by using `--kie`, for [detection + identification + keyword extraction] labeling.
@@ -166,6 +168,12 @@ PPOCRLabel.exe --lang ch
 11. Additional Feature Description
     - `File` -> `Re-recognition`: After checking, the newly annotated box content will automatically trigger the `Re-recognition` function of the current annotation box, eliminating the need to click the Re-identify button. This is suitable for scenarios where you do not want to use Automatic Annotation but prefer manual annotation, such as license plate recognition. In a single image with only one license plate, using Automatic Annotation would require deleting many additional recognized text boxes, which is less efficient than directly re-annotating.
     - `File` -> `Auto Save Unsaved changes`: By default, you need to press the `Check` button to complete the marking confirmation for the current box, which can be cumbersome. After checking, when switching to the next image (by pressing the shortcut key `D`), a prompt box asking to confirm whether to save unconfirmed markings will no longer appear. The current markings will be automatically saved and the next image will be switched, making it convenient for quick marking.
+    - After selecting the bounding box, there are 5 shortcut keys available to individually control the movement of the four vertices of the bounding box, suitable for scenarios that require precise control over the positions of the bounding box vertices:
+      - `z`: After pressing, the up, down, left, and right arrow keys will move the 1st vertex individually.
+      - `x`: After pressing, the up, down, left, and right arrow keys will move the 2nd vertex individually.
+      - `c`: After pressing, the up, down, left, and right arrow keys will move the 3rd vertex individually.
+      - `v`: After pressing, the up, down, left, and right arrow keys will move the 4th vertex individually.
+      - `b`: After pressing, the up, down, left, and right arrow keys will revert to the default action of moving the entire bounding box.
 
 ### 2.2 Table Annotation
 
@@ -217,8 +225,6 @@ labeling in the Excel file, the recommended steps are:
 | Ctrl + Shift + R         | Re-recognize all the labels of the current image |
 | W                        | Create a rect box                                |
 | Q  or  Home              | Create a multi-points box                         |
-| X                        | Rotate the box anti-clockwise                    |
-| C                        | Rotate the box clockwise                         |
 | Ctrl + E                 | Edit label of the selected box                   |
 | Ctrl + X                 | Change key class of the box when enable `--kie`  |
 | Ctrl + R                 | Re-recognize the selected box                    |
@@ -232,6 +238,7 @@ labeling in the Excel file, the recommended steps are:
 | Ctrl++                   | Zoom in                                          |
 | Ctrl--                   | Zoom out                                         |
 | ↑→↓←                     | Move selected box                                |
+| Z, X, C, V, B     | Move the four vertices of the selected bounding box individually|
 
 ### 3.2 Built-in Model
 

diff --git a/README_ch.md b/README_ch.md
@@ -24,6 +24,8 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具，内置P
     - `rec_model_dir` ：识别模型目录路径
     - `rec_char_dict_path` ：识别模型字典文件路径
     - `cls_model_dir` ：分类模型目录路径
+  - 新增`--bbox_auto_zoom_center`参数，当图片只有一个标记框的时候，可以开启，会自动将标记框居中放大
+  - 新增5个控制标记框4个顶点的快捷键`z`、`x`、`c`、`v`、`b`，使用方法详见下方`2.1 操作步骤`的`11. 补充功能说明`。
 - 2022.05：**新增表格标注**，使用方法见下方`2.2 表格标注`（by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest)）
 - 2022.02：**新增关键信息标注**、优化标注体验（by [PeterH0323](https://github.com/peterh0323) ）
   - 新增：使用 `--kie` 进入 KIE 功能，用于打【检测+识别+关键字提取】的标签
@@ -152,6 +154,12 @@ PPOCRLabel.exe --lang ch
 11. 补充功能说明
     - `文件` -> `自动重新识别` : 勾选后，对于新标注的框内容会自动触发当前标注框的重新识别功能，不需要再去点击`重新识别`按钮，适合各种原因不想使用`自动标注`只想手动标注的场景，例如车牌识别，一张图里只有一个车牌，如果使用`自动标注`,需要删除很多额外识别出来的文字框，不如直接重新标注
     - `文件` -> `自动保存未提交变更` : 默认是按`确认`按钮完成当前框的标记确认，有点繁琐，勾选后，切换下一张图（按快捷键`D`）的时候，不再弹出提示框确认是否保存未确认的标记，自动保存当前标记并切换下一张图，方便快速标记
+    - 选中标记框后，5个可以控制标记框四个顶点单独移动的快捷键，适合需要精确控制标记框四个顶点位置的场景
+      - `z` ：按下后，此时使用键盘的上下左右按键将单独移动第1个顶点
+      - `x` ：按下后，此时使用键盘的上下左右按键将单独移动第2个顶点
+      - `c` ：按下后，此时使用键盘的上下左右按键将单独移动第3个顶点
+      - `v` ：按下后，此时使用键盘的上下左右按键将单独移动第4个顶点
+      - `b` ：按下后，此时使用键盘的上下左右按键将恢复默认的整体移动整个标记框
 
 ### 2.2 表格标注（[视频演示](https://www.bilibili.com/video/BV1wR4y1v7JE/?share_source=copy_web&vd_source=cf1f9d24648d49636e3d109c9f9a377d&t=1998)）
 
@@ -196,8 +204,6 @@ PPOCRLabel.exe --lang ch
 | Ctrl + shift + R | 对当前图片的所有标记重新识别                  |
 | W                | 新建矩形框                           |
 | Q  或 Home       | 新建多点框                           |
-| X                | 框逆时针旋转                          |
-| C                | 框顺时针旋转                          |
 | Ctrl + E         | 编辑所选框标签                         |
 | Ctrl + X         |  `--kie` 模式下，修改 Box 的关键字种类 |
 | Ctrl + R         | 重新识别所选标记                        |
@@ -211,6 +217,7 @@ PPOCRLabel.exe --lang ch
 | Ctrl++           | 缩小                              |
 | Ctrl--           | 放大                              |
 | ↑→↓←             | 移动标记框                           |
+| Z、X、C、V、B     | 对选中的标记框，单独移动四个顶点     |
 
 ### 3.2 内置模型
 

diff --git a/libs/canvas.py b/libs/canvas.py
@@ -40,6 +40,8 @@ class Canvas(QWidget):
 
     epsilon = 5.0
 
+    shape_move_index = None
+
     def __init__(self, *args, **kwargs):
         super(Canvas, self).__init__(*args, **kwargs)
         # Initialise local state.
@@ -754,6 +756,39 @@ def keyPressEvent(self, ev):
             self.moveOnePixel("Up")
         elif key == Qt.Key_Down and self.selectedShapes:
             self.moveOnePixel("Down")
+        elif key == Qt.Key_Z and self.selectedShapes:
+            self.shape_move_index = 0
+            select_shape = self.selectedShapes[0]
+            select_shape.highlightVertex(
+                self.shape_move_index, select_shape.MOVE_VERTEX
+            )
+            self.update()
+        elif key == Qt.Key_X and self.selectedShapes:
+            self.shape_move_index = 1
+            select_shape = self.selectedShapes[0]
+            select_shape.highlightVertex(
+                self.shape_move_index, select_shape.MOVE_VERTEX
+            )
+            self.update()
+        elif key == Qt.Key_C and self.selectedShapes:
+            self.shape_move_index = 2
+            select_shape = self.selectedShapes[0]
+            select_shape.highlightVertex(
+                self.shape_move_index, select_shape.MOVE_VERTEX
+            )
+            self.update()
+        elif key == Qt.Key_V and self.selectedShapes:
+            self.shape_move_index = 3
+            select_shape = self.selectedShapes[0]
+            select_shape.highlightVertex(
+                self.shape_move_index, select_shape.MOVE_VERTEX
+            )
+            self.update()
+        elif key == Qt.Key_B and self.selectedShapes:
+            self.shape_move_index = None
+            select_shape = self.selectedShapes[0]
+            select_shape.highlightClear()
+            self.update()
         elif key == Qt.Key_X and self.selectedShapes:
             for i in range(len(self.selectedShapes)):
                 self.selectedShape = self.selectedShapes[i]
@@ -788,34 +823,31 @@ def moveOnePixel(self, direction):
             self.selectedShape = self.selectedShapes[i]
             if direction == "Left" and not self.moveOutOfBound(QPointF(-1.0, 0)):
                 # print("move Left one pixel")
-                self.selectedShape.points[0] += QPointF(-1.0, 0)
-                self.selectedShape.points[1] += QPointF(-1.0, 0)
-                self.selectedShape.points[2] += QPointF(-1.0, 0)
-                self.selectedShape.points[3] += QPointF(-1.0, 0)
+                self.move_points(QPointF(-1.0, 0))
             elif direction == "Right" and not self.moveOutOfBound(QPointF(1.0, 0)):
                 # print("move Right one pixel")
-                self.selectedShape.points[0] += QPointF(1.0, 0)
-                self.selectedShape.points[1] += QPointF(1.0, 0)
-                self.selectedShape.points[2] += QPointF(1.0, 0)
-                self.selectedShape.points[3] += QPointF(1.0, 0)
+                self.move_points(QPointF(1.0, 0))
             elif direction == "Up" and not self.moveOutOfBound(QPointF(0, -1.0)):
                 # print("move Up one pixel")
-                self.selectedShape.points[0] += QPointF(0, -1.0)
-                self.selectedShape.points[1] += QPointF(0, -1.0)
-                self.selectedShape.points[2] += QPointF(0, -1.0)
-                self.selectedShape.points[3] += QPointF(0, -1.0)
+                self.move_points(QPointF(0, -1.0))
             elif direction == "Down" and not self.moveOutOfBound(QPointF(0, 1.0)):
                 # print("move Down one pixel")
-                self.selectedShape.points[0] += QPointF(0, 1.0)
-                self.selectedShape.points[1] += QPointF(0, 1.0)
-                self.selectedShape.points[2] += QPointF(0, 1.0)
-                self.selectedShape.points[3] += QPointF(0, 1.0)
+                self.move_points(QPointF(0, 1.0))
         shapesBackup = []
         shapesBackup = copy.deepcopy(self.shapes)
         self.shapesBackups.append(shapesBackup)
         self.shapeMoved.emit()
         self.repaint()
 
+    def move_points(self, p: QPointF):
+        if self.shape_move_index is None:
+            self.selectedShape.points[0] += p
+            self.selectedShape.points[1] += p
+            self.selectedShape.points[2] += p
+            self.selectedShape.points[3] += p
+        else:
+            self.selectedShape.points[self.shape_move_index] += p
+
     def moveOutOfBound(self, step):
         points = [p1 + p2 for p1, p2 in zip(self.selectedShape.points, [step] * 4)]
         return True in map(self.outOfPixmap, points)

diff --git a/libs/utils.py b/libs/utils.py
@@ -322,6 +322,7 @@ def keysInfo(lang="en"):
             "Ctrl++\t\t\t缩小\n"
             "Ctrl--\t\t\t放大\n"
             "↑→↓←\t\t\t移动标记框\n"
+            "Z、X、C、V、B\t\t\t对选中的标记框，单独移动四个顶点\n"
             "———————————————————————\n"
             "注：Mac用户Command键替换上述Ctrl键"
         )
@@ -351,8 +352,52 @@ def keysInfo(lang="en"):
             "Ctrl++\t\t\tZoom in\n"
             "Ctrl--\t\t\tZoom out\n"
             "↑→↓←\t\t\tMove selected box"
+            "Z, X, C, V, B\t\tMove the four vertices of \n"
+            and "\t\t\tthe selected bounding box individually"
             "———————————————————————\n"
             "Notice:For Mac users, use the 'Command' key instead of the 'Ctrl' key"
         )
 
     return msg
+
+
+def polygon_bounding_box_center_and_area(points):
+    """
+    Calculate the center and area of the bounding rectangle of a polygon
+    """
+    if len(points) < 3:
+        raise ValueError("At least three points are required to form a polygon")
+
+    area = 0
+    min_x = float("inf")
+    max_x = float("-inf")
+    min_y = float("inf")
+    max_y = float("-inf")
+
+    n = len(points)
+    for i in range(n):
+        x1 = points[i].x()
+        y1 = points[i].y()
+        x2 = points[(i + 1) % n].x()
+        y2 = points[(i + 1) % n].y()
+        area += x1 * y2 - x2 * y1
+
+        min_x = min(min_x, x1)
+        max_x = max(max_x, x1)
+        min_y = min(min_y, y1)
+        max_y = max(max_y, y1)
+
+    area = abs(area) / 2.0
+    center_x = (min_x + max_x) / 2
+    center_y = (min_y + max_y) / 2
+
+    return center_x, center_y, area
+
+
+def map_value(x, in_min, in_max, out_min, out_max):
+    """
+    Map the numerical value x from the range of [in_in, in_max] to the range of [out_in, out_max]
+    """
+    if in_max == in_min:
+        raise ValueError("in_max and in_min cannot be equal")
+    return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min