Testing

bigbruno · bigbruno · commit 4567f198199a · 2026-01-09T13:16:39.000-03:00
diff --git a/src/bigocrpdf/main.py b/src/bigocrpdf/main.py
@@ -74,6 +74,12 @@ def main() -> int:
     # Setup locale first, before other initialization
     setup_locale()
 
+    # Check if we should run in image mode (must be done BEFORE argument parsing)
+    # 1. Explicit flag (from wrapper script)
+    image_mode = "--image-mode" in sys.argv
+    if image_mode:
+        sys.argv.remove("--image-mode")
+
     # Setup environment and parse command line arguments
     setup_environment()
     args = parse_command_line()
@@ -94,15 +100,6 @@ def main() -> int:
     except Exception as e:
         logger.error(f"{_('Error clearing file queue')}: {e}")
 
-    # Determine Application ID based on context
-    app_id = APP_ID
-
-    # Check if we should run in image mode
-    # 1. Explicit flag (from wrapper script)
-    image_mode = "--image-mode" in sys.argv
-    if image_mode:
-        sys.argv.remove("--image-mode")
-
     # 2. Heuristic: Check if arguments contain images and no PDFs
     if not image_mode and len(sys.argv) > 1:
         image_exts = {".png", ".jpg", ".jpeg", ".webp", ".bmp"}
@@ -121,6 +118,8 @@ def main() -> int:
         if has_image and not has_pdf:
             image_mode = True
 
+    # Determine Application ID based on context
+    app_id = APP_ID
     if image_mode:
         from bigocrpdf.config import IMAGE_APP_ID
 
diff --git a/src/bigocrpdf/services/screen_capture.py b/src/bigocrpdf/services/screen_capture.py
@@ -47,6 +47,8 @@ def process_image_file(
         callback: Callable[[str | None, str | None], None],
         on_processing: Callable[[], None] | None = None,
         lang: str = "eng",
+        psm: int = 3,
+        oem: int = 3,
     ) -> None:
         """Process an existing image file and extract text.
 
@@ -55,42 +57,50 @@ def process_image_file(
             callback: Callback function to receive the result
             on_processing: Optional callback invoked when processing starts
             lang: Language code for OCR (default: "eng")
+            psm: Page segmentation mode (default: 3 - fully automatic)
+            oem: OCR engine mode (default: 3 - default based on available)
         """
         self._pending_callback = callback
         self._pending_processing_callback = on_processing
 
-        thread = threading.Thread(target=self._run_image_process, args=(image_path, lang))
+        thread = threading.Thread(target=self._run_image_process, args=(image_path, lang, psm, oem))
         thread.daemon = True
         thread.start()
 
-    def _run_image_process(self, image_path: str, lang: str = "eng") -> None:
+    def _run_image_process(
+        self, image_path: str, lang: str = "eng", psm: int = 3, oem: int = 3
+    ) -> None:
         """Execute the image processing in a thread."""
         self._invoke_processing_callback()
-        text = self.extract_text_from_image(image_path, lang)
+        text = self.extract_text_from_image(image_path, lang, psm, oem)
         self._invoke_callback(text, None)
 
     def capture_screen_region(
         self,
         callback: Callable[[str | None, str | None], None],
         on_processing: Callable[[], None] | None = None,
         lang: str = "eng",
+        psm: int = 3,
+        oem: int = 3,
     ) -> None:
         """Capture a region of the screen and extract text from it.
 
         Args:
             callback: Callback function to receive the result (text, error)
             on_processing: Optional callback invoked when processing starts
             lang: Language code for OCR (default: "eng")
+            psm: Page segmentation mode (default: 3 - fully automatic)
+            oem: OCR engine mode (default: 3 - default based on available)
         """
         self._pending_callback = callback
         self._pending_processing_callback = on_processing
 
         # Run capture in a separate thread to avoid freezing the UI
-        thread = threading.Thread(target=self._run_capture_thread, args=(lang,))
+        thread = threading.Thread(target=self._run_capture_thread, args=(lang, psm, oem))
         thread.daemon = True
         thread.start()
 
-    def _run_capture_thread(self, lang: str) -> None:
+    def _run_capture_thread(self, lang: str, psm: int = 3, oem: int = 3) -> None:
         """Execute the capture and OCR process in a thread."""
         try:
             # Generate a temporary file path
@@ -116,7 +126,7 @@ def _run_capture_thread(self, lang: str) -> None:
                 self._invoke_processing_callback()
 
                 # Extract text
-                text = self.extract_text_from_image(temp_path, lang)
+                text = self.extract_text_from_image(temp_path, lang, psm, oem)
                 self._cleanup_temp_file(temp_path)
                 self._invoke_callback(text, None)
             else:
@@ -214,12 +224,16 @@ def _capture_with_cli_tools(self, temp_path: str) -> bool:
             logger.error(f"Screenshot capture error: {e}")
             return False
 
-    def extract_text_from_image(self, image_path: str, lang: str = "eng") -> str | None:
+    def extract_text_from_image(
+        self, image_path: str, lang: str = "eng", psm: int = 3, oem: int = 3
+    ) -> str | None:
         """Extract text from an image using Tesseract OCR.
 
         Args:
             image_path: Path to the image file
             lang: Language to use for OCR (default: "eng")
+            psm: Page segmentation mode (default: 3 - fully automatic)
+            oem: OCR engine mode (default: 3 - default based on available)
 
         Returns:
             Extracted text or None on error
@@ -231,8 +245,18 @@ def extract_text_from_image(self, image_path: str, lang: str = "eng") -> str | N
                 self._invoke_callback(None, _("Tesseract OCR engine not found. Please install it."))
                 return None
 
-            # Direct tesseract execution
-            args = ["tesseract", image_path, "stdout", "-l", lang]
+            # Direct tesseract execution with psm and oem
+            args = [
+                "tesseract",
+                image_path,
+                "stdout",
+                "-l",
+                lang,
+                "--psm",
+                str(psm),
+                "--oem",
+                str(oem),
+            ]
 
             logger.info(f"Executing OCR: {' '.join(args)}")
             result = subprocess.run(args, capture_output=True, text=True, timeout=30)
diff --git a/src/bigocrpdf/ui/image_ocr_window.py b/src/bigocrpdf/ui/image_ocr_window.py