import json import os.path
from fontTools.ttLib import TTFont from PIL import Image, ImageDraw, ImageFont from paddleocr import PaddleOCR
def get_character_image(unicode_char, size=120, color=(0, 0, 0), background=(255, 255, 255)) -> Image: img = Image.new('RGB', (size, size), background) draw = ImageDraw.Draw(img)
font = ImageFont.truetype('exam_font_75f40cd3f2514aae89c62ff49d571f02.ttf', 100)
_, _, text_width, text_height = font.getbbox(unicode_char)
x = (size - text_width) / 2 y = (size - text_height) / 2 draw.text((x, y), unicode_char, font=font, fill=color, stroke_width=1.0)
return img
def main(): exam_font = TTFont('./exam_font_75f40cd3f2514aae89c62ff49d571f02.ttf') ocr = PaddleOCR(lang='ch') rec_result = dict() for table in exam_font['cmap'].tables: if table.isUnicode(): for k in table.cmap.keys(): if chr(k) in rec_result: continue img_path = f'./images/{k}.png' if not os.path.exists(img_path): get_character_image(chr(k)).save(img_path) text, _ = ocr.ocr(img_path, det=False, cls=False)[0][0] print(f'{img_path}: {chr(k)} -> {text}') rec_result[chr(k)] = text with open('rec_result.json', 'w') as f: rec_map_json = json.dumps(rec_result) f.write(rec_map_json)
if __name__ == '__main__': main()
|