exe下载地址:
UB-Mannheim/tesseract: Tesseract Open Source OCR Engine (main repository) (github.com)
Index of /tesseract (uni-mannheim.de)
import fitz import os from PIL import Image import pytesseract def pdf(): doc=fitz.open('./pdfs/focus1.pdf') out=open('./pdfs/focus1.txt','w') for page in doc: text=page.get_text().encode('utf-8') print(text) out.write(text) out.write('\f') # 写入分页符作为字符串 out.close() def OCR_demo(): for i in range(1, 12): try: image_path = './imgs/' + str(i) + '.png' text_path = './imgs/' + str(i) + '.txt' image = Image.open(image_path) text = pytesseract.image_to_string(image) with open(text_path, 'w', encoding='utf-8') as out: out.write(text) except Exception as e: print(f"发生错误处理文件 {image_path}: {e}") if __name__ == '__main__': OCR_demo()