123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618 |
- # @Author : lightXu
- # @File : views.py
- # @Time : 2018/7/19 0019 下午 14:28
- import json
- import os
- import time
- import uuid
- import cv2
- import numpy as np
- import requests
- from PIL import Image
- from django.conf import settings
- from django.http import HttpResponse
- from django.shortcuts import render
- from django.views.decorators.csrf import csrf_exempt
- import segment.logging_config as logging
- from segment.form import UploadImageForm, FormulaUrlForm, UploadFileForm
- from segment.formula import formula_segment, formula_segment_and_show
- from segment.image_operation.utils import png_read
- from segment.image_operation.utils import write_single_img, resize_by_percent
- from segment.sheet_resolve.tools.utils import NpEncoder
- from segment.models import ExamImage
- from segment.server import get_exam_bbox_by_tesseract, get_exam_ocr, opencv2base64
- from segment.server import get_exam_box
- from segment.server import get_exam_ocr_by_penguin
- from segment.server import get_segment_by_ocr_once, get_exam_ocr_once
- from segment.server import save_pdf_image
- from segment.server import save_raw_image, save_raw_image_without_segment, ocr_login
- from segment.server import save_raw_image_in_jpeg
- logger = logging.getLogger(settings.LOGGING_TYPE)
- subject_id_dict = {0: 'unknown_subject',
- 3: 'math',
- 6: 'math_zxhx',
- 8: 'english',
- 9: 'chinese',
- 12: 'physics',
- 13: 'chemistry',
- 14: 'biology',
- 15: 'politics',
- 16: 'history',
- 17: 'geography',
- 18: 'science_comprehensive',
- 19: 'arts_comprehensive',
- 98: 'english_B',
- 99: 'english_T',
- }
- # Create your views here.
- def index(request):
- return render(request, 'exam_bbox.html')
- @csrf_exempt
- def upload_img(request):
- if request.method == 'POST':
- time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
- form = UploadImageForm(request.POST, request.FILES)
- if form.is_valid():
- subject_id = int(form.cleaned_data['subject'])
- subject = subject_id_dict.get(subject_id)
- if not subject:
- subject = 'unknown_subject'
- upload_img_list = request.FILES.getlist('img_data')
- res_info_list = []
- is_success = 1
- for img in upload_img_list:
- start_time = time.time()
- raw_name = img.name
- save_path, _ = save_raw_image(subject, time_str, img, 'segment')
- try:
- img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
- save_path=save_path, subject_id=subject_id, subject=subject)
- img_instance.save()
- logger.info('{}试卷 {} 存储成功: {}'.format(subject, raw_name, save_path))
- except Exception as e:
- # traceback.print_exc()
- logger.info('{}试卷 {} 存储失败: {}'.format(subject, raw_name, e))
- status, bbox_info = get_exam_bbox_by_tesseract(raw_name, save_path, subject)
- is_success = status
- end_time = time.time()
- cost_time = '{:.2f}s'.format(float(end_time - start_time))
- bbox_info.update({'cost_time': cost_time})
- res_info_list.append(bbox_info)
- res = {'isSuccess': is_success, 'imgs_info': res_info_list}
- res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder)
- logger.info('segment_info: {}'.format(res_json))
- return HttpResponse(res_json)
- else:
- error_json = form.errors.as_json()
- is_success = 99
- res = {'isSuccess': is_success, 'error': error_json}
- return HttpResponse('{}'.format(res))
- else:
- form = UploadImageForm()
- return render(request, 'exam_bbox.html', {'form': form})
- @csrf_exempt # 试卷分题
- def analysis_exam_view(request):
- if request.method == 'POST':
- time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
- form = UploadImageForm(request.POST, request.FILES)
- if form.is_valid():
- subject_id = int(form.cleaned_data['subject'])
- subject = subject_id_dict.get(subject_id)
- if not subject:
- subject = 'unknown_subject'
- upload_img_list = request.FILES.getlist('img_data')
- res_info_list = []
- error_info = ''
- is_success = 1
- try:
- access_token = ocr_login()
- for img in upload_img_list:
- start_time = time.time()
- raw_name = img.name
- save_path = ''
- bin_parts_img_list = []
- opencv_img = ''
- try:
- # save_path, bin_parts_img_list = save_raw_image(subject, time_str, img, 'segment')
- save_path, opencv_img, _ = save_raw_image_without_segment(subject, time_str, img, 'segment')
- img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
- save_path=save_path, subject_id=subject_id, subject=subject)
- img_instance.save()
- logger.info('{}试卷: {} 存储成功: {}'.format(subject, raw_name, save_path))
- except Exception as e:
- # traceback.print_exc()
- logger.info('{}试卷: {} 存储失败: {}'.format(subject, raw_name, e))
- # status, bbox_info = get_exam_box(raw_name, bin_parts_img_list, save_path, subject, access_token)
- status, bbox_info = get_segment_by_ocr_once(opencv_img, access_token, subject, save_path, raw_name)
- is_success = status
- end_time = time.time()
- cost_time = '{:.2f}s'.format(float(end_time - start_time))
- bbox_info.update({'cost_time': cost_time})
- res_info_list.append(bbox_info)
- except Exception as e:
- logger.info('ocr error: {}'.format(e))
- is_success = 0
- error_info = 'ocr error'
- res = {'isSuccess': is_success, 'imgs_info': res_info_list}
- if error_info:
- res['error'] = error_info
- res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder)
- logger.info('segment_info: {}\n'.format(res_json))
- return HttpResponse(res_json)
- else:
- error_json = form.errors.as_json()
- is_success = 99
- res = {'isSuccess': is_success, 'error': error_json}
- return HttpResponse('{}'.format(res))
- else:
- form = UploadImageForm()
- return render(request, 'exam_bbox.html', {'form': form})
- @csrf_exempt # 试卷识别文字
- def ocr_exam_view(request):
- if request.method == 'POST':
- time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
- form = UploadImageForm(request.POST, request.FILES)
- if form.is_valid():
- subject_id = int(form.cleaned_data['subject'])
- subject = subject_id_dict.get(subject_id)
- if not subject:
- subject = 'unknown_subject'
- upload_img_list = request.FILES.getlist('img_data')
- res_info_list = []
- error_info = ''
- is_success = 1
- opencv_img = ''
- try:
- access_token = ocr_login()
- for img in upload_img_list:
- start_time = time.time()
- raw_name = img.name
- img_mem_size = img.size
- save_path = ''
- bin_parts_img_list = []
- try:
- save_path, opencv_img, _, = save_raw_image_without_segment(subject, time_str, img, 'ocr')
- img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
- save_path=save_path, subject_id=subject_id, subject=subject)
- img_instance.save()
- logger.info('{}试卷: {} 存储成功: {}'.format(subject, raw_name, save_path))
- except Exception as e:
- # traceback.print_exc()
- logger.info('{}试卷: {} 存储失败: {}'.format(subject, raw_name, e))
- if subject == 'english' or subject == 'english_B': # 英语不分栏, 直接识别
- bin_parts_img_list = [{'img_part': opencv2base64(opencv_img)}]
- status, text_info = get_exam_ocr(raw_name, bin_parts_img_list, save_path, subject, access_token)
- elif subject == 'english_T':
- status, text_info = get_exam_ocr_by_penguin(raw_name, opencv_img, img_mem_size, save_path, subject)
- else:
- # 识别并分栏
- status, text_info = get_exam_ocr_once(opencv_img, access_token, subject, save_path, raw_name)
- is_success = status
- end_time = time.time()
- cost_time = '{:.2f}s'.format(float(end_time - start_time))
- text_info.update({'cost_time': cost_time})
- res_info_list.append(text_info)
- except Exception as e:
- logger.info('ocr error: {}'.format(e))
- is_success = 0
- error_info = 'ocr error'
- res = {'isSuccess': is_success, 'imgs_info': res_info_list}
- if error_info:
- res['error'] = error_info
- res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder)
- logger.info('text_info: {}\n'.format(res_json))
- return HttpResponse(res_json)
- else:
- error_json = form.errors.as_json()
- is_success = 99
- res = {'isSuccess': is_success, 'error': error_json}
- return HttpResponse('{}'.format(res))
- else:
- form = UploadImageForm()
- return render(request, 'exam_bbox.html', {'form': form})
- @csrf_exempt
- def ocr_exam_view_of_pdf(request):
- if request.method == 'POST':
- time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
- form = UploadFileForm(request.POST, request.FILES)
- if form.is_valid():
- subject_id = int(form.cleaned_data['subject'])
- subject = subject_id_dict.get(subject_id)
- if not subject:
- subject = 'unknown_subject'
- pdf_file = request.FILES.get('img_data')
- suffix = pdf_file.name
- if suffix[-4:] == '.pdf':
- upload_img_list, images_list = save_pdf_image(pdf_file, subject, time_str)
- res_info_list = []
- error_info = ''
- is_success = 1
- try:
- access_token = ocr_login()
- for pdf_img_index, img_path in enumerate(sorted(upload_img_list)):
- start_time = time.time()
- save_name = ''
- try:
- save_name = '{}_{}_{:04d}'.format(suffix[:-4], 'pdf', pdf_img_index+1)
- img_instance = ExamImage(upload_date=time_str, raw_name=save_name,
- save_path=img_path, subject_id=subject_id, subject=subject)
- img_instance.save()
- logger.info('{}试卷: {} 存储成功: {}'.format(subject, save_name, img_path))
- except Exception as e:
- # traceback.print_exc()
- logger.info('{}试卷: {} 存储失败: {}'.format(subject, save_name, e))
- status, text_info = get_exam_ocr_once(images_list[pdf_img_index], access_token, subject, img_path, save_name)
- is_success = status
- end_time = time.time()
- cost_time = '{:.2f}s'.format(float(end_time - start_time))
- text_info.update({'cost_time': cost_time})
- res_info_list.append(text_info)
- except Exception as e:
- logger.info('ocr error: {}'.format(e))
- is_success = 0
- error_info = 'ocr error'
- res = {'isSuccess': is_success, 'imgs_info': res_info_list}
- if error_info:
- res['error'] = error_info
- res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder)
- logger.info('text_info: {}\n'.format(res_json))
- return HttpResponse(res_json)
- else:
- raise ValueError('{} is not a pdf file'.format(suffix))
- else:
- error_json = form.errors.as_json()
- is_success = 99
- res = {'isSuccess': is_success, 'error': error_json}
- return HttpResponse('{}'.format(res))
- else:
- form = UploadImageForm()
- return render(request, 'exam_bbox.html', {'form': form})
- @csrf_exempt
- def formula_analysis_show(request):
- if request.method == 'POST':
- time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
- form = UploadImageForm(request.POST, request.FILES)
- if form.is_valid():
- subject_id = int(form.cleaned_data['subject'])
- subject = subject_id_dict.get(subject_id)
- if not subject:
- subject = 'unknown_subject'
- img = request.FILES.get('img_data')
- error_info = ''
- text_info = ''
- raw_text_info = ''
- img_url_path = ''
- raw_name = img.name
- img_height = 0
- try:
- start_time = time.time()
- save_path = ''
- opencv_img = ''
- try:
- save_path, opencv_img, img_url_path = save_raw_image_without_segment(subject, time_str, img, 'formula')
- img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
- save_path=save_path, subject_id=subject_id, subject=subject)
- img_instance.save()
- logger.info('{}试卷: {} 存储成功: {}'.format(subject, raw_name, save_path))
- except Exception as e:
- # traceback.print_exc()
- logger.info('{}试卷: {} 存储失败: {}'.format(subject, raw_name, e))
- access_token = ocr_login()
- text_info, raw_text_info, img_height = formula_segment_and_show.segment(opencv_img, save_path,
- access_token)
- is_success = 1
- end_time = time.time()
- cost_time = '{:.2f}s'.format(float(end_time - start_time))
- except Exception as e:
- logger.info('ocr error: {}'.format(e))
- is_success = 0
- error_info = 'analysis error'
- txt_url = os.path.join('/segment/', raw_name.replace('.jpg', '.html'))
- chars_lines = [ele for ele in text_info]
- chars_str = ''.join(chars_lines)
- raw_chars_lines = [ele for ele in raw_text_info]
- raw_chars_str = ''.join(raw_chars_lines)
- height = 300
- if img_height > height:
- height = img_height + 50
- res_dict = {'url': img_url_path, 'txt_url': txt_url,
- 'texts': chars_str,
- 'raw_texts': raw_chars_str,
- 'name': raw_name.replace('.jpg', ''),
- 'img_height': height,
- 'text_height': 1.5 * height
- }
- return render(request, 'showimg.html', res_dict)
- # return HttpResponse(res_json)
- else:
- error_json = form.errors.as_json()
- is_success = 99
- res = {'is_success': is_success, 'error': error_json}
- return HttpResponse('{}'.format(res))
- else:
- img_form = UploadImageForm()
- formula_form = FormulaUrlForm()
- return render(request, 'uploadimg.html', {'img_form': img_form,
- 'formula_form': formula_form})
- @csrf_exempt
- def ai_formula_analysis_show(request):
- if request.method == 'POST':
- time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
- form = UploadImageForm(request.POST, request.FILES)
- if form.is_valid():
- subject_id = int(form.cleaned_data['subject'])
- subject = subject_id_dict.get(subject_id)
- if not subject:
- subject = 'unknown_subject'
- img = request.FILES.get('img_data')
- error_info = ''
- text_info = ''
- raw_text_info = ''
- img_url_path = ''
- raw_name = img.name
- img_height = 0
- try:
- start_time = time.time()
- save_path = ''
- opencv_img = ''
- try:
- save_path, img_url_path, opencv_img = \
- save_raw_image_in_jpeg(subject, time_str, img, 'formula')
- img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
- save_path=save_path, subject_id=subject_id, subject=subject)
- img_instance.save()
- logger.info('{}试卷: {} 存储成功: {}'.format(subject, raw_name, save_path))
- except Exception as e:
- # traceback.print_exc()
- logger.info('{}试卷: {} 存储失败: {}'.format(subject, raw_name, e))
- # 对整体图像大小进行resize
- img_height, w = opencv_img.shape[0], opencv_img.shape[1]
- save_path = save_path.replace('\\', '/')
- formula_img_name = save_path.split('/')[-1]
- formula_name_txt_path = save_path.replace('.jpg', '.txt')
- with open(formula_name_txt_path, 'w', encoding='utf-8') as writer:
- writer.writelines(formula_img_name + '\n')
- save_dir = save_path.replace(save_path.split('/')[-1], '')[:-1]
- text_info, raw_text_info = formula_segment_and_show\
- .get_latex_by_ai_formula(save_dir, formula_name_txt_path)
- is_success = 1
- end_time = time.time()
- cost_time = '{:.2f}s'.format(float(end_time - start_time))
- except Exception as e:
- logger.info('ocr login error: {}'.format(e))
- is_success = 0
- error_info = 'analysis error'
- txt_url = os.path.join('/segment/', raw_name.replace('.jpg', '.html'))
- chars_lines = [ele for ele in text_info]
- chars_str = ''.join(chars_lines)
- raw_chars_lines = [ele for ele in raw_text_info]
- raw_chars_str = ''.join(raw_chars_lines)
- height = 300
- if img_height > height:
- height = img_height + 50
- res_dict = {'url': img_url_path, 'txt_url': txt_url,
- 'texts': chars_str,
- 'raw_texts': raw_chars_str,
- 'name': raw_name.replace('.jpg', ''),
- 'img_height': height,
- 'text_height': 1.5 * height
- }
- return render(request, 'showimg.html', res_dict)
- # return HttpResponse(res_json)
- else:
- error_json = form.errors.as_json()
- is_success = 99
- res = {'is_success': is_success, 'error': error_json}
- return HttpResponse('{}'.format(res))
- else:
- img_form = UploadImageForm()
- formula_form = FormulaUrlForm()
- return render(request, 'uploadimg.html', {'img_form': img_form,
- 'formula_form': formula_form})
- @csrf_exempt
- def formula_analysis(request):
- if request.method == 'POST':
- time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
- form = FormulaUrlForm(request.POST, )
- if form.is_valid():
- image_url = form.cleaned_data['img_url']
- error_info = ''
- text_info = ''
- try:
- save_path = ''
- r = requests.get(image_url, timeout=3)
- save_dir = os.path.join(settings.MEDIA_ROOT, 'formula', time_str)
- if not os.path.exists(save_dir):
- os.makedirs(save_dir)
- if '.png' in image_url:
- ext = 'png'
- file_name = '{}.{}'.format(uuid.uuid4().hex[:10], ext)
- save_path = os.path.join(save_dir, file_name)
- with open(save_path, 'wb') as f:
- f.write(r.content)
- open_cv_image = png_read(save_path)
- else:
- ext = 'jpg'
- file_name = '{}.{}'.format(uuid.uuid4().hex[:10], ext)
- save_path = os.path.join(save_dir, file_name)
- with open(save_path, 'wb') as f:
- f.write(r.content)
- open_cv_image = cv2.imread(save_path)
- access_token = ocr_login()
- text_info, raw_text_info = formula_segment.segment(open_cv_image, save_path, access_token)
- txt_path = save_path[:-3] + 'txt'
- with open(txt_path, 'w') as f:
- f.writelines(text_info)
- is_success = 1
- except Exception as e:
- logger.info('analysis error: {}'.format(e))
- is_success = 0
- error_info = 'analysis error: {}'.format(e)
- chars_lines = [ele for ele in text_info]
- chars_str = ''.join(chars_lines)
- res_dict = {'image_url': image_url,
- 'texts': chars_str,
- 'is_success': is_success
- }
- if error_info:
- res_dict['error'] = error_info
- res_json = json.dumps(res_dict, ensure_ascii=False, cls=NpEncoder)
- return HttpResponse(res_json)
- else:
- error_json = form.errors.as_json()
- is_success = 99
- res = {'is_success': is_success, 'error': error_json}
- return HttpResponse('{}'.format(res))
- else:
- formula_form = FormulaUrlForm()
- img_form = UploadImageForm()
- return render(request, 'uploadimg.html', {'img_form': img_form,
- 'formula_form': formula_form})
- @csrf_exempt
- def exam_analysis_show(request):
- if request.method == 'POST':
- time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
- form = UploadImageForm(request.POST, request.FILES)
- if form.is_valid():
- subject_id = int(form.cleaned_data['subject'])
- subject = subject_id_dict.get(subject_id)
- if not subject:
- subject = 'unknown_subject'
- img = request.FILES.get('img_data')
- img_url_path = ''
- raw_name = img.name
- img_height = 0
- bbox_info = ''
- save_path = ''
- try:
- save_path, bin_parts_img_list, img_url_path = save_raw_image(subject, time_str, img, 'segment')
- img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
- save_path=save_path, subject_id=subject_id, subject=subject)
- img_instance.save()
- access_token = ocr_login()
- status, bbox_info = get_exam_box(raw_name, bin_parts_img_list, save_path, subject, access_token)
- except Exception as e:
- logger.info('ocr login error: {}'.format(e))
- is_success = 0
- error_info = 'analysis error'
- raw_img = Image.open(img) # 读取上传的网络图像
- open_cv_image = np.array(raw_img)
- show_ratio = 0.2
- open_cv_image = resize_by_percent(open_cv_image, show_ratio)
- for ele in bbox_info['coordinate']:
- xmin = int(ele[0])*show_ratio
- ymin = int(ele[1])*show_ratio
- xmax = int(ele[2])*show_ratio
- ymax = int(ele[3])*show_ratio
- cv2.rectangle(open_cv_image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 1)
- write_single_img(open_cv_image, save_path)
- height, width = open_cv_image.shape[0], open_cv_image.shape[1]
- if img_height > height:
- height = img_height + 50
- res_dict = {'url': img_url_path,
- 'texts': bbox_info,
- 'raw_texts': bbox_info,
- 'name': raw_name.replace('.jpg', ''),
- 'img_height': height,
- 'text_height': 1.5 * height
- }
- return render(request, 'showimg.html', res_dict)
- # return HttpResponse(res_json)
- else:
- error_json = form.errors.as_json()
- is_success = 99
- res = {'is_success': is_success, 'error': error_json}
- return HttpResponse('{}'.format(res))
- else:
- img_form = UploadImageForm()
- return render(request, 'exam_bbox.html', {'form': img_form})
|