12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- """
- 图像预处理,提高OCR识别率
- 1. 图像光照均匀性分析,获取图像光照分布map;
- 2. 图像边缘分析、得到图像的边缘分布模型,得到总体的边缘度量权值w1;
- 3. 图像模糊度计算,得到图像模糊权值w2.
- 4. 对图像进行局部分块处理,利用图像对应分块的map特征、w1、w2的权值,得到图像每个分块的局部二值结果。
- 5. 对整幅图像局部二值化结果进行空白填充处理防止字符断裂,对分割的游离点进行分析剔除异常噪点
- 6. 直线剔除。
- Scaling To The Right Size
- Ensure that the images are scaled to the right size which usually is of at least 300 DPI (Dots Per Inch). Keeping DPI
- lower than 200 will give unclear and incomprehensible results while keeping the DPI above 600 will unnecessarily
- increase the size of the output file without improving the quality of the file. Thus, a DPI of 300 works best for this
- purpose.
- Increase Contrast
- Low contrast can result in poor OCR. Increase the contrast and density before carrying out the OCR process. This can be
- done in the scanning software itself or in any other image processing software. Increasing the contrast between the
- text/image and its background brings out more clarity in the output.
- Binarize Image
- This step converts a multicolored image (RGB) to a black and white image. There are several algorithms to convert a
- color image to a monochrome image, ranging from simple thresholding to more sophisticated zonal analysis.
- Remove Noise and Scanning Artefacts
- Noise can drastically reduce the overall quality of the OCR process. It can be present in the background or foreground
- and can result from poor scanning or the poor original quality of the data.
- Deskew
- This may also be referred to as rotation. This means de-skewing the image to bring it in the right format and right
- shape. The text should appear horizontal and not tilted in any angle. If the image is skewed to any side, deskew it by
- rotating it clockwise or anti clockwise direction.
- Layout Analysis (or Zone Analysis)
- In order to detect words correctly, it is important to first recognize the zones or the layout (which are also the areas
- of interest). This step detects the paragraphs, tables, columns, captions of the images etc. If the software misses out
- on any zone or layout, words might be cut in half or not detected at all.
- """
- import cv2
- import numpy as np
- from segment.image_operation import utils
- # 读取图片,生成预处理的图像
- def preprocess(picture, scale, dilate, blur, show=False):
- # 预处理图像
- img = utils.read_img(picture)
- # rescale the image
- if scale != 0:
- img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
- # Convert to gray
- img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- # Apply dilation and erosion to remove some noise
- if dilate != 0:
- kernel = np.ones((dilate, dilate), np.uint8)
- img = cv2.dilate(img, kernel, iterations=1)
- img = cv2.erode(img, kernel, iterations=1)
- # Apply blur to smooth out the edges
- if blur != 0:
- img = cv2.GaussianBlur(img, (blur, blur), 0)
- # Apply threshold to get image with only b&w (binarization)
- img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
- if show:
- # cv2.namedWindow('image', cv2.WINDOW_NORMAL)
- cv2.imshow('image', img)
- cv2.waitKey(0)
- cv2.destroyAllWindows()
- return img
|