还是基于医院检查单的需求,后期会有大量的样本图片,需要分别归类去识别验证,通过程序自动分类减少下人工分类的成本,简单代码实现下。
思路
先通过百度普通OCR识别图片文字信息,对文字信息遍历,看是否有关联的关键字信息。如果识别不行,在本地的tesseract在识别一次,识别不出在统一放一个目录中,人工处理。
分类前:
分类后:
代码实现
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2022-7-8 上午 9:09
# @Author : damon.guo
# @File : picToClass.py
# @Software: PyCharm
import cv2
from PIL import Image
import pytesseract
import os,sys
import shutil
from aip import AipOcr
gettaglist = ["报告单", "报告", "医院", "卫生院", "中心"] # 识别关键字
def picToClass(picturePath):
# imageCode = cv2.imread(r"F:\xbl\11\\12.jpg") # 图像增强,二值化
# picturePath=r"F:\xbl\11\\preclass\\1.jpg" # 图像增强,二值化
image = cv2.imread(picturePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
tmpfilename = r"F:\xbl\11\\class\\tmp.jpg" # 临时文件
cv2.imwrite(tmpfilename, edged)
text = pytesseract.image_to_string(Image.open(tmpfilename), lang='chi_sim')
print("二次识别",picturePath)
for i in text.split("\n"):
index = getTagIndex(i)
if index != -1:
news = i[:index]
print("nes", news)
return news.strip(" ").strip(":").strip("%")
# 还是识别不出来,统一放一个目录
return "other"
def getclient(APP_ID, API_KEY, SECRET_KEY):
""" 你的 APPID AK SK """
# APP_ID = '你的 App ID'
# API_KEY = '你的 Api Key'
# SECRET_KEY = '你的 Secret Key'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
return client
def get_file_content(imagepath):
with open(imagepath, 'rb') as fp:
return fp.read()
def ocrsdk(imagepath):
#先使用百度sdk识别
client = getclient("xx", "xx", "xx")
image = get_file_content(imagepath)
# 必填参数
options = {}
# options["templateSign"] = "" # 模板id
# options["detect_direction"] = "true" #是否自动转向
# options["probability"] = "true" #
options["language_type"] = "CHN_ENG" # 识别语言
res_image = client.basicAccurate(image,options)
print("识别图片:",image)
tag = baiduocrreslut(res_image)
if tag == "other":
# 无法识别在
# 在本地ocr识别一次
tag = picToClass(imagepath)
return tag
def getTagIndex(strs):
# 识别关键字在位置,取到索引,并向后多取2位
for i in gettaglist:
if i in strs:
index = strs.index(i)
return index+2
return -1
def copyFile(src,dst):
shutil.copy(src, dst)
def baiduocrreslut(res):
# 百度识别结果解析,
wordlist = res["words_result"]
for w in wordlist:
for i in gettaglist:
if i in w["words"]:
print(w["words"])
if len(w["words"]) >= 4:
return w['words']
return "other"
def main():
path = r"F:\xbl\preclass" # 待分类图片目录
classpath = r"F:\xbl\class" # 分类后主目录
listf = os.listdir(path)
for i in listf:
imagepath = os.path.join(path,i)
tag = ocrsdk(imagepath)
tagpath = os.path.join(classpath,tag)
if not os.path.exists(tagpath):
os.makedirs(tagpath)
newimagepath = os.path.join(tagpath,i)
if not os.path.exists(newimagepath):
shutil.copy(imagepath,newimagepath)
if __name__ == "__main__":
main()