@@ -47,6 +47,8 @@ def process_image_file(
4747 callback : Callable [[str | None , str | None ], None ],
4848 on_processing : Callable [[], None ] | None = None ,
4949 lang : str = "eng" ,
50+ psm : int = 3 ,
51+ oem : int = 3 ,
5052 ) -> None :
5153 """Process an existing image file and extract text.
5254
@@ -55,42 +57,50 @@ def process_image_file(
5557 callback: Callback function to receive the result
5658 on_processing: Optional callback invoked when processing starts
5759 lang: Language code for OCR (default: "eng")
60+ psm: Page segmentation mode (default: 3 - fully automatic)
61+ oem: OCR engine mode (default: 3 - default based on available)
5862 """
5963 self ._pending_callback = callback
6064 self ._pending_processing_callback = on_processing
6165
62- thread = threading .Thread (target = self ._run_image_process , args = (image_path , lang ))
66+ thread = threading .Thread (target = self ._run_image_process , args = (image_path , lang , psm , oem ))
6367 thread .daemon = True
6468 thread .start ()
6569
66- def _run_image_process (self , image_path : str , lang : str = "eng" ) -> None :
70+ def _run_image_process (
71+ self , image_path : str , lang : str = "eng" , psm : int = 3 , oem : int = 3
72+ ) -> None :
6773 """Execute the image processing in a thread."""
6874 self ._invoke_processing_callback ()
69- text = self .extract_text_from_image (image_path , lang )
75+ text = self .extract_text_from_image (image_path , lang , psm , oem )
7076 self ._invoke_callback (text , None )
7177
7278 def capture_screen_region (
7379 self ,
7480 callback : Callable [[str | None , str | None ], None ],
7581 on_processing : Callable [[], None ] | None = None ,
7682 lang : str = "eng" ,
83+ psm : int = 3 ,
84+ oem : int = 3 ,
7785 ) -> None :
7886 """Capture a region of the screen and extract text from it.
7987
8088 Args:
8189 callback: Callback function to receive the result (text, error)
8290 on_processing: Optional callback invoked when processing starts
8391 lang: Language code for OCR (default: "eng")
92+ psm: Page segmentation mode (default: 3 - fully automatic)
93+ oem: OCR engine mode (default: 3 - default based on available)
8494 """
8595 self ._pending_callback = callback
8696 self ._pending_processing_callback = on_processing
8797
8898 # Run capture in a separate thread to avoid freezing the UI
89- thread = threading .Thread (target = self ._run_capture_thread , args = (lang ,))
99+ thread = threading .Thread (target = self ._run_capture_thread , args = (lang , psm , oem ))
90100 thread .daemon = True
91101 thread .start ()
92102
93- def _run_capture_thread (self , lang : str ) -> None :
103+ def _run_capture_thread (self , lang : str , psm : int = 3 , oem : int = 3 ) -> None :
94104 """Execute the capture and OCR process in a thread."""
95105 try :
96106 # Generate a temporary file path
@@ -116,7 +126,7 @@ def _run_capture_thread(self, lang: str) -> None:
116126 self ._invoke_processing_callback ()
117127
118128 # Extract text
119- text = self .extract_text_from_image (temp_path , lang )
129+ text = self .extract_text_from_image (temp_path , lang , psm , oem )
120130 self ._cleanup_temp_file (temp_path )
121131 self ._invoke_callback (text , None )
122132 else :
@@ -214,12 +224,16 @@ def _capture_with_cli_tools(self, temp_path: str) -> bool:
214224 logger .error (f"Screenshot capture error: { e } " )
215225 return False
216226
217- def extract_text_from_image (self , image_path : str , lang : str = "eng" ) -> str | None :
227+ def extract_text_from_image (
228+ self , image_path : str , lang : str = "eng" , psm : int = 3 , oem : int = 3
229+ ) -> str | None :
218230 """Extract text from an image using Tesseract OCR.
219231
220232 Args:
221233 image_path: Path to the image file
222234 lang: Language to use for OCR (default: "eng")
235+ psm: Page segmentation mode (default: 3 - fully automatic)
236+ oem: OCR engine mode (default: 3 - default based on available)
223237
224238 Returns:
225239 Extracted text or None on error
@@ -231,8 +245,18 @@ def extract_text_from_image(self, image_path: str, lang: str = "eng") -> str | N
231245 self ._invoke_callback (None , _ ("Tesseract OCR engine not found. Please install it." ))
232246 return None
233247
234- # Direct tesseract execution
235- args = ["tesseract" , image_path , "stdout" , "-l" , lang ]
248+ # Direct tesseract execution with psm and oem
249+ args = [
250+ "tesseract" ,
251+ image_path ,
252+ "stdout" ,
253+ "-l" ,
254+ lang ,
255+ "--psm" ,
256+ str (psm ),
257+ "--oem" ,
258+ str (oem ),
259+ ]
236260
237261 logger .info (f"Executing OCR: { ' ' .join (args )} " )
238262 result = subprocess .run (args , capture_output = True , text = True , timeout = 30 )
0 commit comments