From e6d36f3a3a0f24419c750a29b9ceee81a224e9bf Mon Sep 17 00:00:00 2001 From: cwr31 <59834457+cwr31@users.noreply.github.com> Date: Wed, 11 Jun 2025 09:20:30 +0800 Subject: [PATCH] Improve image rotation logic for text recognition (#8167) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? Enhanced the image rotation handling by evaluating the original orientation, clockwise 90°, and counter-clockwise 90° rotations. The image with the highest text recognition score is now selected, improving accuracy for text detection in images with aspect ratios >= 1.5. #8166 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: wenrui.cao --- deepdoc/vision/ocr.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/deepdoc/vision/ocr.py b/deepdoc/vision/ocr.py index 90b11038f..e9e594274 100644 --- a/deepdoc/vision/ocr.py +++ b/deepdoc/vision/ocr.py @@ -588,7 +588,29 @@ class OCR: flags=cv2.INTER_CUBIC) dst_img_height, dst_img_width = dst_img.shape[0:2] if dst_img_height * 1.0 / dst_img_width >= 1.5: - dst_img = np.rot90(dst_img) + # Try original orientation + rec_result = self.text_recognizer[0]([dst_img]) + text, score = rec_result[0][0] + best_score = score + best_img = dst_img + + # Try clockwise 90° rotation + rotated_cw = np.rot90(dst_img, k=3) + rec_result = self.text_recognizer[0]([rotated_cw]) + rotated_cw_text, rotated_cw_score = rec_result[0][0] + if rotated_cw_score > best_score: + best_score = rotated_cw_score + best_img = rotated_cw + + # Try counter-clockwise 90° rotation + rotated_ccw = np.rot90(dst_img, k=1) + rec_result = self.text_recognizer[0]([rotated_ccw]) + rotated_ccw_text, rotated_ccw_score = rec_result[0][0] + if rotated_ccw_score > best_score: + best_img = rotated_ccw + + # Use the best image + dst_img = best_img return dst_img def sorted_boxes(self, dt_boxes):