# @Author : lightXu # @File : views.py # @Time : 2018/7/19 0019 下午 14:28 import json import os import time import uuid import cv2 import numpy as np import requests from PIL import Image from django.conf import settings from django.http import HttpResponse from django.shortcuts import render from django.views.decorators.csrf import csrf_exempt import segment.logging_config as logging from segment.form import UploadImageForm, FormulaUrlForm, UploadFileForm from segment.formula import formula_segment, formula_segment_and_show from segment.image_operation.utils import png_read from segment.image_operation.utils import write_single_img, resize_by_percent from segment.sheet_resolve.tools.utils import NpEncoder from segment.models import ExamImage from segment.server import get_exam_bbox_by_tesseract, get_exam_ocr, opencv2base64 from segment.server import get_exam_box from segment.server import get_exam_ocr_by_penguin from segment.server import get_segment_by_ocr_once, get_exam_ocr_once from segment.server import save_pdf_image from segment.server import save_raw_image, save_raw_image_without_segment, ocr_login from segment.server import save_raw_image_in_jpeg logger = logging.getLogger(settings.LOGGING_TYPE) subject_id_dict = {0: 'unknown_subject', 3: 'math', 6: 'math_zxhx', 8: 'english', 9: 'chinese', 12: 'physics', 13: 'chemistry', 14: 'biology', 15: 'politics', 16: 'history', 17: 'geography', 18: 'science_comprehensive', 19: 'arts_comprehensive', 98: 'english_B', 99: 'english_T', } # Create your views here. def index(request): return render(request, 'exam_bbox.html') @csrf_exempt def upload_img(request): if request.method == 'POST': time_str = time.strftime('%Y-%m-%d', time.localtime(time.time())) form = UploadImageForm(request.POST, request.FILES) if form.is_valid(): subject_id = int(form.cleaned_data['subject']) subject = subject_id_dict.get(subject_id) if not subject: subject = 'unknown_subject' upload_img_list = request.FILES.getlist('img_data') res_info_list = [] is_success = 1 for img in upload_img_list: start_time = time.time() raw_name = img.name save_path, _ = save_raw_image(subject, time_str, img, 'segment') try: img_instance = ExamImage(upload_date=time_str, raw_name=raw_name, save_path=save_path, subject_id=subject_id, subject=subject) img_instance.save() logger.info('{}试卷 {} 存储成功: {}'.format(subject, raw_name, save_path)) except Exception as e: # traceback.print_exc() logger.info('{}试卷 {} 存储失败: {}'.format(subject, raw_name, e)) status, bbox_info = get_exam_bbox_by_tesseract(raw_name, save_path, subject) is_success = status end_time = time.time() cost_time = '{:.2f}s'.format(float(end_time - start_time)) bbox_info.update({'cost_time': cost_time}) res_info_list.append(bbox_info) res = {'isSuccess': is_success, 'imgs_info': res_info_list} res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder) logger.info('segment_info: {}'.format(res_json)) return HttpResponse(res_json) else: error_json = form.errors.as_json() is_success = 99 res = {'isSuccess': is_success, 'error': error_json} return HttpResponse('{}'.format(res)) else: form = UploadImageForm() return render(request, 'exam_bbox.html', {'form': form}) @csrf_exempt # 试卷分题 def analysis_exam_view(request): if request.method == 'POST': time_str = time.strftime('%Y-%m-%d', time.localtime(time.time())) form = UploadImageForm(request.POST, request.FILES) if form.is_valid(): subject_id = int(form.cleaned_data['subject']) subject = subject_id_dict.get(subject_id) if not subject: subject = 'unknown_subject' upload_img_list = request.FILES.getlist('img_data') res_info_list = [] error_info = '' is_success = 1 try: access_token = ocr_login() for img in upload_img_list: start_time = time.time() raw_name = img.name save_path = '' bin_parts_img_list = [] opencv_img = '' try: # save_path, bin_parts_img_list = save_raw_image(subject, time_str, img, 'segment') save_path, opencv_img, _ = save_raw_image_without_segment(subject, time_str, img, 'segment') img_instance = ExamImage(upload_date=time_str, raw_name=raw_name, save_path=save_path, subject_id=subject_id, subject=subject) img_instance.save() logger.info('{}试卷: {} 存储成功: {}'.format(subject, raw_name, save_path)) except Exception as e: # traceback.print_exc() logger.info('{}试卷: {} 存储失败: {}'.format(subject, raw_name, e)) # status, bbox_info = get_exam_box(raw_name, bin_parts_img_list, save_path, subject, access_token) status, bbox_info = get_segment_by_ocr_once(opencv_img, access_token, subject, save_path, raw_name) is_success = status end_time = time.time() cost_time = '{:.2f}s'.format(float(end_time - start_time)) bbox_info.update({'cost_time': cost_time}) res_info_list.append(bbox_info) except Exception as e: logger.info('ocr error: {}'.format(e)) is_success = 0 error_info = 'ocr error' res = {'isSuccess': is_success, 'imgs_info': res_info_list} if error_info: res['error'] = error_info res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder) logger.info('segment_info: {}\n'.format(res_json)) return HttpResponse(res_json) else: error_json = form.errors.as_json() is_success = 99 res = {'isSuccess': is_success, 'error': error_json} return HttpResponse('{}'.format(res)) else: form = UploadImageForm() return render(request, 'exam_bbox.html', {'form': form}) @csrf_exempt # 试卷识别文字 def ocr_exam_view(request): if request.method == 'POST': time_str = time.strftime('%Y-%m-%d', time.localtime(time.time())) form = UploadImageForm(request.POST, request.FILES) if form.is_valid(): subject_id = int(form.cleaned_data['subject']) subject = subject_id_dict.get(subject_id) if not subject: subject = 'unknown_subject' upload_img_list = request.FILES.getlist('img_data') res_info_list = [] error_info = '' is_success = 1 opencv_img = '' try: access_token = ocr_login() for img in upload_img_list: start_time = time.time() raw_name = img.name img_mem_size = img.size save_path = '' bin_parts_img_list = [] try: save_path, opencv_img, _, = save_raw_image_without_segment(subject, time_str, img, 'ocr') img_instance = ExamImage(upload_date=time_str, raw_name=raw_name, save_path=save_path, subject_id=subject_id, subject=subject) img_instance.save() logger.info('{}试卷: {} 存储成功: {}'.format(subject, raw_name, save_path)) except Exception as e: # traceback.print_exc() logger.info('{}试卷: {} 存储失败: {}'.format(subject, raw_name, e)) if subject == 'english' or subject == 'english_B': # 英语不分栏, 直接识别 bin_parts_img_list = [{'img_part': opencv2base64(opencv_img)}] status, text_info = get_exam_ocr(raw_name, bin_parts_img_list, save_path, subject, access_token) elif subject == 'english_T': status, text_info = get_exam_ocr_by_penguin(raw_name, opencv_img, img_mem_size, save_path, subject) else: # 识别并分栏 status, text_info = get_exam_ocr_once(opencv_img, access_token, subject, save_path, raw_name) is_success = status end_time = time.time() cost_time = '{:.2f}s'.format(float(end_time - start_time)) text_info.update({'cost_time': cost_time}) res_info_list.append(text_info) except Exception as e: logger.info('ocr error: {}'.format(e)) is_success = 0 error_info = 'ocr error' res = {'isSuccess': is_success, 'imgs_info': res_info_list} if error_info: res['error'] = error_info res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder) logger.info('text_info: {}\n'.format(res_json)) return HttpResponse(res_json) else: error_json = form.errors.as_json() is_success = 99 res = {'isSuccess': is_success, 'error': error_json} return HttpResponse('{}'.format(res)) else: form = UploadImageForm() return render(request, 'exam_bbox.html', {'form': form}) @csrf_exempt def ocr_exam_view_of_pdf(request): if request.method == 'POST': time_str = time.strftime('%Y-%m-%d', time.localtime(time.time())) form = UploadFileForm(request.POST, request.FILES) if form.is_valid(): subject_id = int(form.cleaned_data['subject']) subject = subject_id_dict.get(subject_id) if not subject: subject = 'unknown_subject' pdf_file = request.FILES.get('img_data') suffix = pdf_file.name if suffix[-4:] == '.pdf': upload_img_list, images_list = save_pdf_image(pdf_file, subject, time_str) res_info_list = [] error_info = '' is_success = 1 try: access_token = ocr_login() for pdf_img_index, img_path in enumerate(sorted(upload_img_list)): start_time = time.time() save_name = '' try: save_name = '{}_{}_{:04d}'.format(suffix[:-4], 'pdf', pdf_img_index+1) img_instance = ExamImage(upload_date=time_str, raw_name=save_name, save_path=img_path, subject_id=subject_id, subject=subject) img_instance.save() logger.info('{}试卷: {} 存储成功: {}'.format(subject, save_name, img_path)) except Exception as e: # traceback.print_exc() logger.info('{}试卷: {} 存储失败: {}'.format(subject, save_name, e)) status, text_info = get_exam_ocr_once(images_list[pdf_img_index], access_token, subject, img_path, save_name) is_success = status end_time = time.time() cost_time = '{:.2f}s'.format(float(end_time - start_time)) text_info.update({'cost_time': cost_time}) res_info_list.append(text_info) except Exception as e: logger.info('ocr error: {}'.format(e)) is_success = 0 error_info = 'ocr error' res = {'isSuccess': is_success, 'imgs_info': res_info_list} if error_info: res['error'] = error_info res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder) logger.info('text_info: {}\n'.format(res_json)) return HttpResponse(res_json) else: raise ValueError('{} is not a pdf file'.format(suffix)) else: error_json = form.errors.as_json() is_success = 99 res = {'isSuccess': is_success, 'error': error_json} return HttpResponse('{}'.format(res)) else: form = UploadImageForm() return render(request, 'exam_bbox.html', {'form': form}) @csrf_exempt def formula_analysis_show(request): if request.method == 'POST': time_str = time.strftime('%Y-%m-%d', time.localtime(time.time())) form = UploadImageForm(request.POST, request.FILES) if form.is_valid(): subject_id = int(form.cleaned_data['subject']) subject = subject_id_dict.get(subject_id) if not subject: subject = 'unknown_subject' img = request.FILES.get('img_data') error_info = '' text_info = '' raw_text_info = '' img_url_path = '' raw_name = img.name img_height = 0 try: start_time = time.time() save_path = '' opencv_img = '' try: save_path, opencv_img, img_url_path = save_raw_image_without_segment(subject, time_str, img, 'formula') img_instance = ExamImage(upload_date=time_str, raw_name=raw_name, save_path=save_path, subject_id=subject_id, subject=subject) img_instance.save() logger.info('{}试卷: {} 存储成功: {}'.format(subject, raw_name, save_path)) except Exception as e: # traceback.print_exc() logger.info('{}试卷: {} 存储失败: {}'.format(subject, raw_name, e)) access_token = ocr_login() text_info, raw_text_info, img_height = formula_segment_and_show.segment(opencv_img, save_path, access_token) is_success = 1 end_time = time.time() cost_time = '{:.2f}s'.format(float(end_time - start_time)) except Exception as e: logger.info('ocr error: {}'.format(e)) is_success = 0 error_info = 'analysis error' txt_url = os.path.join('/segment/', raw_name.replace('.jpg', '.html')) chars_lines = [ele for ele in text_info] chars_str = ''.join(chars_lines) raw_chars_lines = [ele for ele in raw_text_info] raw_chars_str = ''.join(raw_chars_lines) height = 300 if img_height > height: height = img_height + 50 res_dict = {'url': img_url_path, 'txt_url': txt_url, 'texts': chars_str, 'raw_texts': raw_chars_str, 'name': raw_name.replace('.jpg', ''), 'img_height': height, 'text_height': 1.5 * height } return render(request, 'showimg.html', res_dict) # return HttpResponse(res_json) else: error_json = form.errors.as_json() is_success = 99 res = {'is_success': is_success, 'error': error_json} return HttpResponse('{}'.format(res)) else: img_form = UploadImageForm() formula_form = FormulaUrlForm() return render(request, 'uploadimg.html', {'img_form': img_form, 'formula_form': formula_form}) @csrf_exempt def ai_formula_analysis_show(request): if request.method == 'POST': time_str = time.strftime('%Y-%m-%d', time.localtime(time.time())) form = UploadImageForm(request.POST, request.FILES) if form.is_valid(): subject_id = int(form.cleaned_data['subject']) subject = subject_id_dict.get(subject_id) if not subject: subject = 'unknown_subject' img = request.FILES.get('img_data') error_info = '' text_info = '' raw_text_info = '' img_url_path = '' raw_name = img.name img_height = 0 try: start_time = time.time() save_path = '' opencv_img = '' try: save_path, img_url_path, opencv_img = \ save_raw_image_in_jpeg(subject, time_str, img, 'formula') img_instance = ExamImage(upload_date=time_str, raw_name=raw_name, save_path=save_path, subject_id=subject_id, subject=subject) img_instance.save() logger.info('{}试卷: {} 存储成功: {}'.format(subject, raw_name, save_path)) except Exception as e: # traceback.print_exc() logger.info('{}试卷: {} 存储失败: {}'.format(subject, raw_name, e)) # 对整体图像大小进行resize img_height, w = opencv_img.shape[0], opencv_img.shape[1] save_path = save_path.replace('\\', '/') formula_img_name = save_path.split('/')[-1] formula_name_txt_path = save_path.replace('.jpg', '.txt') with open(formula_name_txt_path, 'w', encoding='utf-8') as writer: writer.writelines(formula_img_name + '\n') save_dir = save_path.replace(save_path.split('/')[-1], '')[:-1] text_info, raw_text_info = formula_segment_and_show\ .get_latex_by_ai_formula(save_dir, formula_name_txt_path) is_success = 1 end_time = time.time() cost_time = '{:.2f}s'.format(float(end_time - start_time)) except Exception as e: logger.info('ocr login error: {}'.format(e)) is_success = 0 error_info = 'analysis error' txt_url = os.path.join('/segment/', raw_name.replace('.jpg', '.html')) chars_lines = [ele for ele in text_info] chars_str = ''.join(chars_lines) raw_chars_lines = [ele for ele in raw_text_info] raw_chars_str = ''.join(raw_chars_lines) height = 300 if img_height > height: height = img_height + 50 res_dict = {'url': img_url_path, 'txt_url': txt_url, 'texts': chars_str, 'raw_texts': raw_chars_str, 'name': raw_name.replace('.jpg', ''), 'img_height': height, 'text_height': 1.5 * height } return render(request, 'showimg.html', res_dict) # return HttpResponse(res_json) else: error_json = form.errors.as_json() is_success = 99 res = {'is_success': is_success, 'error': error_json} return HttpResponse('{}'.format(res)) else: img_form = UploadImageForm() formula_form = FormulaUrlForm() return render(request, 'uploadimg.html', {'img_form': img_form, 'formula_form': formula_form}) @csrf_exempt def formula_analysis(request): if request.method == 'POST': time_str = time.strftime('%Y-%m-%d', time.localtime(time.time())) form = FormulaUrlForm(request.POST, ) if form.is_valid(): image_url = form.cleaned_data['img_url'] error_info = '' text_info = '' try: save_path = '' r = requests.get(image_url, timeout=3) save_dir = os.path.join(settings.MEDIA_ROOT, 'formula', time_str) if not os.path.exists(save_dir): os.makedirs(save_dir) if '.png' in image_url: ext = 'png' file_name = '{}.{}'.format(uuid.uuid4().hex[:10], ext) save_path = os.path.join(save_dir, file_name) with open(save_path, 'wb') as f: f.write(r.content) open_cv_image = png_read(save_path) else: ext = 'jpg' file_name = '{}.{}'.format(uuid.uuid4().hex[:10], ext) save_path = os.path.join(save_dir, file_name) with open(save_path, 'wb') as f: f.write(r.content) open_cv_image = cv2.imread(save_path) access_token = ocr_login() text_info, raw_text_info = formula_segment.segment(open_cv_image, save_path, access_token) txt_path = save_path[:-3] + 'txt' with open(txt_path, 'w') as f: f.writelines(text_info) is_success = 1 except Exception as e: logger.info('analysis error: {}'.format(e)) is_success = 0 error_info = 'analysis error: {}'.format(e) chars_lines = [ele for ele in text_info] chars_str = ''.join(chars_lines) res_dict = {'image_url': image_url, 'texts': chars_str, 'is_success': is_success } if error_info: res_dict['error'] = error_info res_json = json.dumps(res_dict, ensure_ascii=False, cls=NpEncoder) return HttpResponse(res_json) else: error_json = form.errors.as_json() is_success = 99 res = {'is_success': is_success, 'error': error_json} return HttpResponse('{}'.format(res)) else: formula_form = FormulaUrlForm() img_form = UploadImageForm() return render(request, 'uploadimg.html', {'img_form': img_form, 'formula_form': formula_form}) @csrf_exempt def exam_analysis_show(request): if request.method == 'POST': time_str = time.strftime('%Y-%m-%d', time.localtime(time.time())) form = UploadImageForm(request.POST, request.FILES) if form.is_valid(): subject_id = int(form.cleaned_data['subject']) subject = subject_id_dict.get(subject_id) if not subject: subject = 'unknown_subject' img = request.FILES.get('img_data') img_url_path = '' raw_name = img.name img_height = 0 bbox_info = '' save_path = '' try: save_path, bin_parts_img_list, img_url_path = save_raw_image(subject, time_str, img, 'segment') img_instance = ExamImage(upload_date=time_str, raw_name=raw_name, save_path=save_path, subject_id=subject_id, subject=subject) img_instance.save() access_token = ocr_login() status, bbox_info = get_exam_box(raw_name, bin_parts_img_list, save_path, subject, access_token) except Exception as e: logger.info('ocr login error: {}'.format(e)) is_success = 0 error_info = 'analysis error' raw_img = Image.open(img) # 读取上传的网络图像 open_cv_image = np.array(raw_img) show_ratio = 0.2 open_cv_image = resize_by_percent(open_cv_image, show_ratio) for ele in bbox_info['coordinate']: xmin = int(ele[0])*show_ratio ymin = int(ele[1])*show_ratio xmax = int(ele[2])*show_ratio ymax = int(ele[3])*show_ratio cv2.rectangle(open_cv_image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 1) write_single_img(open_cv_image, save_path) height, width = open_cv_image.shape[0], open_cv_image.shape[1] if img_height > height: height = img_height + 50 res_dict = {'url': img_url_path, 'texts': bbox_info, 'raw_texts': bbox_info, 'name': raw_name.replace('.jpg', ''), 'img_height': height, 'text_height': 1.5 * height } return render(request, 'showimg.html', res_dict) # return HttpResponse(res_json) else: error_json = form.errors.as_json() is_success = 99 res = {'is_success': is_success, 'error': error_json} return HttpResponse('{}'.format(res)) else: img_form = UploadImageForm() return render(request, 'exam_bbox.html', {'form': img_form})