Skip to content

Commit 4567f19

Browse files
committed
Testing
1 parent b34dec4 commit 4567f19

File tree

3 files changed

+260
-23
lines changed

3 files changed

+260
-23
lines changed

src/bigocrpdf/main.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ def main() -> int:
7474
# Setup locale first, before other initialization
7575
setup_locale()
7676

77+
# Check if we should run in image mode (must be done BEFORE argument parsing)
78+
# 1. Explicit flag (from wrapper script)
79+
image_mode = "--image-mode" in sys.argv
80+
if image_mode:
81+
sys.argv.remove("--image-mode")
82+
7783
# Setup environment and parse command line arguments
7884
setup_environment()
7985
args = parse_command_line()
@@ -94,15 +100,6 @@ def main() -> int:
94100
except Exception as e:
95101
logger.error(f"{_('Error clearing file queue')}: {e}")
96102

97-
# Determine Application ID based on context
98-
app_id = APP_ID
99-
100-
# Check if we should run in image mode
101-
# 1. Explicit flag (from wrapper script)
102-
image_mode = "--image-mode" in sys.argv
103-
if image_mode:
104-
sys.argv.remove("--image-mode")
105-
106103
# 2. Heuristic: Check if arguments contain images and no PDFs
107104
if not image_mode and len(sys.argv) > 1:
108105
image_exts = {".png", ".jpg", ".jpeg", ".webp", ".bmp"}
@@ -121,6 +118,8 @@ def main() -> int:
121118
if has_image and not has_pdf:
122119
image_mode = True
123120

121+
# Determine Application ID based on context
122+
app_id = APP_ID
124123
if image_mode:
125124
from bigocrpdf.config import IMAGE_APP_ID
126125

src/bigocrpdf/services/screen_capture.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ def process_image_file(
4747
callback: Callable[[str | None, str | None], None],
4848
on_processing: Callable[[], None] | None = None,
4949
lang: str = "eng",
50+
psm: int = 3,
51+
oem: int = 3,
5052
) -> None:
5153
"""Process an existing image file and extract text.
5254
@@ -55,42 +57,50 @@ def process_image_file(
5557
callback: Callback function to receive the result
5658
on_processing: Optional callback invoked when processing starts
5759
lang: Language code for OCR (default: "eng")
60+
psm: Page segmentation mode (default: 3 - fully automatic)
61+
oem: OCR engine mode (default: 3 - default based on available)
5862
"""
5963
self._pending_callback = callback
6064
self._pending_processing_callback = on_processing
6165

62-
thread = threading.Thread(target=self._run_image_process, args=(image_path, lang))
66+
thread = threading.Thread(target=self._run_image_process, args=(image_path, lang, psm, oem))
6367
thread.daemon = True
6468
thread.start()
6569

66-
def _run_image_process(self, image_path: str, lang: str = "eng") -> None:
70+
def _run_image_process(
71+
self, image_path: str, lang: str = "eng", psm: int = 3, oem: int = 3
72+
) -> None:
6773
"""Execute the image processing in a thread."""
6874
self._invoke_processing_callback()
69-
text = self.extract_text_from_image(image_path, lang)
75+
text = self.extract_text_from_image(image_path, lang, psm, oem)
7076
self._invoke_callback(text, None)
7177

7278
def capture_screen_region(
7379
self,
7480
callback: Callable[[str | None, str | None], None],
7581
on_processing: Callable[[], None] | None = None,
7682
lang: str = "eng",
83+
psm: int = 3,
84+
oem: int = 3,
7785
) -> None:
7886
"""Capture a region of the screen and extract text from it.
7987
8088
Args:
8189
callback: Callback function to receive the result (text, error)
8290
on_processing: Optional callback invoked when processing starts
8391
lang: Language code for OCR (default: "eng")
92+
psm: Page segmentation mode (default: 3 - fully automatic)
93+
oem: OCR engine mode (default: 3 - default based on available)
8494
"""
8595
self._pending_callback = callback
8696
self._pending_processing_callback = on_processing
8797

8898
# Run capture in a separate thread to avoid freezing the UI
89-
thread = threading.Thread(target=self._run_capture_thread, args=(lang,))
99+
thread = threading.Thread(target=self._run_capture_thread, args=(lang, psm, oem))
90100
thread.daemon = True
91101
thread.start()
92102

93-
def _run_capture_thread(self, lang: str) -> None:
103+
def _run_capture_thread(self, lang: str, psm: int = 3, oem: int = 3) -> None:
94104
"""Execute the capture and OCR process in a thread."""
95105
try:
96106
# Generate a temporary file path
@@ -116,7 +126,7 @@ def _run_capture_thread(self, lang: str) -> None:
116126
self._invoke_processing_callback()
117127

118128
# Extract text
119-
text = self.extract_text_from_image(temp_path, lang)
129+
text = self.extract_text_from_image(temp_path, lang, psm, oem)
120130
self._cleanup_temp_file(temp_path)
121131
self._invoke_callback(text, None)
122132
else:
@@ -214,12 +224,16 @@ def _capture_with_cli_tools(self, temp_path: str) -> bool:
214224
logger.error(f"Screenshot capture error: {e}")
215225
return False
216226

217-
def extract_text_from_image(self, image_path: str, lang: str = "eng") -> str | None:
227+
def extract_text_from_image(
228+
self, image_path: str, lang: str = "eng", psm: int = 3, oem: int = 3
229+
) -> str | None:
218230
"""Extract text from an image using Tesseract OCR.
219231
220232
Args:
221233
image_path: Path to the image file
222234
lang: Language to use for OCR (default: "eng")
235+
psm: Page segmentation mode (default: 3 - fully automatic)
236+
oem: OCR engine mode (default: 3 - default based on available)
223237
224238
Returns:
225239
Extracted text or None on error
@@ -231,8 +245,18 @@ def extract_text_from_image(self, image_path: str, lang: str = "eng") -> str | N
231245
self._invoke_callback(None, _("Tesseract OCR engine not found. Please install it."))
232246
return None
233247

234-
# Direct tesseract execution
235-
args = ["tesseract", image_path, "stdout", "-l", lang]
248+
# Direct tesseract execution with psm and oem
249+
args = [
250+
"tesseract",
251+
image_path,
252+
"stdout",
253+
"-l",
254+
lang,
255+
"--psm",
256+
str(psm),
257+
"--oem",
258+
str(oem),
259+
]
236260

237261
logger.info(f"Executing OCR: {' '.join(args)}")
238262
result = subprocess.run(args, capture_output=True, text=True, timeout=30)

0 commit comments

Comments
 (0)