Abbyy Finereader Python May 2026
def ocr_document(self, input_path, output_path, output_format="docx", language="English"): """OCR a single document with full control.""" # Create document object doc = self.app.CreateDocument() # Add image page page = doc.AddImageFile(input_path, 0) # 0 = auto orientation # Analyze layout doc.AnalyzeLayout() # Recognize with specific language doc.Recognize(language) # Export if output_format == "docx": doc.Export(output_path, "DOCX") elif output_format == "txt": doc.Export(output_path, "TEXT") elif output_format == "pdf": doc.Export(output_path, "PDF") # Cleanup doc.Close() return output_path
return result.returncode fine_read_cli("scan.jpg", "output/result", "docx") Batch Processing with CLI from concurrent.futures import ThreadPoolExecutor from tqdm import tqdm def batch_ocr_cli(input_folder, output_folder, max_workers=4): """Process all images in a folder.""" input_folder = Path(input_folder) output_folder = Path(output_folder) output_folder.mkdir(exist_ok=True) abbyy finereader python
doc.Recognize("English") doc.Export(output_pdf_path, "PDF", export_params) doc.Close() "DOCX") elif output_format == "txt": doc.Export(output_path
def process_one(img_path): out_name = output_folder / f"img_path.stem_ocr" fine_read_cli(str(img_path), str(out_name), "txt") "TEXT") elif output_format == "pdf": doc.Export(output_path