lightxu
/
exam-segment-django


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
							# @Author  : lightXu
# @File    : views.py
# @Time    : 2018/7/19 0019 下午 14:28
import json
import os
import time
import uuid

import cv2
import numpy as np
import requests
from PIL import Image
from django.conf import settings
from django.http import HttpResponse
from django.shortcuts import render
from django.views.decorators.csrf import csrf_exempt

import segment.logging_config as logging
from segment.form import UploadImageForm, FormulaUrlForm, UploadFileForm
from segment.formula import formula_segment, formula_segment_and_show
from segment.image_operation.utils import png_read
from segment.image_operation.utils import write_single_img, resize_by_percent
from segment.sheet_resolve.tools.utils import NpEncoder
from segment.models import ExamImage
from segment.server import get_exam_bbox_by_tesseract, get_exam_ocr, opencv2base64
from segment.server import get_exam_box
from segment.server import get_exam_ocr_by_penguin
from segment.server import get_segment_by_ocr_once, get_exam_ocr_once
from segment.server import save_pdf_image
from segment.server import save_raw_image, save_raw_image_without_segment, ocr_login
from segment.server import save_raw_image_in_jpeg

logger = logging.getLogger(settings.LOGGING_TYPE)

subject_id_dict = {0: 'unknown_subject',
                   3: 'math',
                   6: 'math_zxhx',
                   8: 'english',
                   9: 'chinese',
                   12: 'physics',
                   13: 'chemistry',
                   14: 'biology',
                   15: 'politics',
                   16: 'history',
                   17: 'geography',
                   18: 'science_comprehensive',
                   19: 'arts_comprehensive',
                   98: 'english_B',
                   99: 'english_T',
                   }


# Create your views here.
def index(request):
    return render(request, 'exam_bbox.html')


@csrf_exempt
def upload_img(request):
    if request.method == 'POST':
        time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        form = UploadImageForm(request.POST, request.FILES)
        if form.is_valid():
            subject_id = int(form.cleaned_data['subject'])
            subject = subject_id_dict.get(subject_id)
            if not subject:
                subject = 'unknown_subject'
            upload_img_list = request.FILES.getlist('img_data')

            res_info_list = []
            is_success = 1
            for img in upload_img_list:
                start_time = time.time()
                raw_name = img.name
                save_path, _ = save_raw_image(subject, time_str, img, 'segment')

                try:
                    img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
                                             save_path=save_path, subject_id=subject_id, subject=subject)
                    img_instance.save()
                    logger.info('{}试卷 {} 存储成功: {}'.format(subject, raw_name, save_path))
                except Exception as e:
                    # traceback.print_exc()
                    logger.info('{}试卷 {} 存储失败: {}'.format(subject, raw_name, e))

                status, bbox_info = get_exam_bbox_by_tesseract(raw_name, save_path, subject)
                is_success = status
                end_time = time.time()
                cost_time = '{:.2f}s'.format(float(end_time - start_time))
                bbox_info.update({'cost_time': cost_time})
                res_info_list.append(bbox_info)

            res = {'isSuccess': is_success, 'imgs_info': res_info_list}
            res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder)
            logger.info('segment_info: {}'.format(res_json))

            return HttpResponse(res_json)
        else:
            error_json = form.errors.as_json()
            is_success = 99
            res = {'isSuccess': is_success, 'error': error_json}
            return HttpResponse('{}'.format(res))

    else:
        form = UploadImageForm()
    return render(request, 'exam_bbox.html', {'form': form})


@csrf_exempt  # 试卷分题
def analysis_exam_view(request):
    if request.method == 'POST':
        time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        form = UploadImageForm(request.POST, request.FILES)
        if form.is_valid():
            subject_id = int(form.cleaned_data['subject'])
            subject = subject_id_dict.get(subject_id)
            if not subject:
                subject = 'unknown_subject'
            upload_img_list = request.FILES.getlist('img_data')

            res_info_list = []
            error_info = ''
            is_success = 1
            try:
                access_token = ocr_login()
                for img in upload_img_list:
                    start_time = time.time()
                    raw_name = img.name
                    save_path = ''
                    bin_parts_img_list = []
                    opencv_img = ''
                    try:
                        # save_path, bin_parts_img_list = save_raw_image(subject, time_str, img, 'segment')
                        save_path, opencv_img, _ = save_raw_image_without_segment(subject, time_str, img, 'segment')
                        img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
                                                 save_path=save_path, subject_id=subject_id, subject=subject)
                        img_instance.save()
                        logger.info('{}试卷： {} 存储成功: {}'.format(subject, raw_name, save_path))
                    except Exception as e:
                        # traceback.print_exc()
                        logger.info('{}试卷： {} 存储失败: {}'.format(subject, raw_name, e))

                    # status, bbox_info = get_exam_box(raw_name, bin_parts_img_list, save_path, subject, access_token)
                    status, bbox_info = get_segment_by_ocr_once(opencv_img, access_token, subject, save_path, raw_name)

                    is_success = status
                    end_time = time.time()
                    cost_time = '{:.2f}s'.format(float(end_time - start_time))
                    bbox_info.update({'cost_time': cost_time})
                    res_info_list.append(bbox_info)
            except Exception as e:
                logger.info('ocr  error: {}'.format(e))
                is_success = 0
                error_info = 'ocr  error'

            res = {'isSuccess': is_success, 'imgs_info': res_info_list}
            if error_info:
                res['error'] = error_info
            res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder)
            logger.info('segment_info: {}\n'.format(res_json))

            return HttpResponse(res_json)
        else:
            error_json = form.errors.as_json()
            is_success = 99
            res = {'isSuccess': is_success, 'error': error_json}
            return HttpResponse('{}'.format(res))

    else:
        form = UploadImageForm()
    return render(request, 'exam_bbox.html', {'form': form})


@csrf_exempt  # 试卷识别文字
def ocr_exam_view(request):
    if request.method == 'POST':
        time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        form = UploadImageForm(request.POST, request.FILES)
        if form.is_valid():
            subject_id = int(form.cleaned_data['subject'])
            subject = subject_id_dict.get(subject_id)
            if not subject:
                subject = 'unknown_subject'
            upload_img_list = request.FILES.getlist('img_data')

            res_info_list = []
            error_info = ''
            is_success = 1
            opencv_img = ''
            try:
                access_token = ocr_login()
                for img in upload_img_list:
                    start_time = time.time()
                    raw_name = img.name
                    img_mem_size = img.size
                    save_path = ''
                    bin_parts_img_list = []
                    try:
                        save_path, opencv_img, _, = save_raw_image_without_segment(subject, time_str, img, 'ocr')
                        img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
                                                 save_path=save_path, subject_id=subject_id, subject=subject)
                        img_instance.save()
                        logger.info('{}试卷： {} 存储成功: {}'.format(subject, raw_name, save_path))
                    except Exception as e:
                        # traceback.print_exc()
                        logger.info('{}试卷： {} 存储失败: {}'.format(subject, raw_name, e))

                    if subject == 'english' or subject == 'english_B':  # 英语不分栏, 直接识别
                        bin_parts_img_list = [{'img_part': opencv2base64(opencv_img)}]
                        status, text_info = get_exam_ocr(raw_name, bin_parts_img_list, save_path, subject, access_token)

                    elif subject == 'english_T':
                        status, text_info = get_exam_ocr_by_penguin(raw_name, opencv_img, img_mem_size, save_path, subject)
                    else:
                        # 识别并分栏
                        status, text_info = get_exam_ocr_once(opencv_img, access_token, subject, save_path, raw_name)

                    is_success = status
                    end_time = time.time()
                    cost_time = '{:.2f}s'.format(float(end_time - start_time))
                    text_info.update({'cost_time': cost_time})
                    res_info_list.append(text_info)
            except Exception as e:
                logger.info('ocr error: {}'.format(e))
                is_success = 0
                error_info = 'ocr error'

            res = {'isSuccess': is_success, 'imgs_info': res_info_list}
            if error_info:
                res['error'] = error_info
            res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder)
            logger.info('text_info: {}\n'.format(res_json))

            return HttpResponse(res_json)
        else:
            error_json = form.errors.as_json()
            is_success = 99
            res = {'isSuccess': is_success, 'error': error_json}
            return HttpResponse('{}'.format(res))

    else:
        form = UploadImageForm()
    return render(request, 'exam_bbox.html', {'form': form})


@csrf_exempt
def ocr_exam_view_of_pdf(request):
    if request.method == 'POST':
        time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        form = UploadFileForm(request.POST, request.FILES)
        if form.is_valid():
            subject_id = int(form.cleaned_data['subject'])
            subject = subject_id_dict.get(subject_id)
            if not subject:
                subject = 'unknown_subject'
            pdf_file = request.FILES.get('img_data')
            suffix = pdf_file.name
            if suffix[-4:] == '.pdf':
                upload_img_list, images_list = save_pdf_image(pdf_file, subject, time_str)
                res_info_list = []
                error_info = ''
                is_success = 1
                try:
                    access_token = ocr_login()
                    for pdf_img_index, img_path in enumerate(sorted(upload_img_list)):
                        start_time = time.time()
                        save_name = ''
                        try:
                            save_name = '{}_{}_{:04d}'.format(suffix[:-4], 'pdf', pdf_img_index+1)
                            img_instance = ExamImage(upload_date=time_str, raw_name=save_name,
                                                     save_path=img_path, subject_id=subject_id, subject=subject)
                            img_instance.save()
                            logger.info('{}试卷： {} 存储成功: {}'.format(subject, save_name, img_path))
                        except Exception as e:
                            # traceback.print_exc()
                            logger.info('{}试卷： {} 存储失败: {}'.format(subject, save_name, e))

                        status, text_info = get_exam_ocr_once(images_list[pdf_img_index], access_token, subject, img_path, save_name)

                        is_success = status
                        end_time = time.time()
                        cost_time = '{:.2f}s'.format(float(end_time - start_time))
                        text_info.update({'cost_time': cost_time})
                        res_info_list.append(text_info)
                except Exception as e:
                    logger.info('ocr error: {}'.format(e))
                    is_success = 0
                    error_info = 'ocr error'

                res = {'isSuccess': is_success, 'imgs_info': res_info_list}
                if error_info:
                    res['error'] = error_info
                res_json = json.dumps(res, ensure_ascii=False, cls=NpEncoder)
                logger.info('text_info: {}\n'.format(res_json))

                return HttpResponse(res_json)
            else:
                raise ValueError('{} is not a pdf file'.format(suffix))

        else:
            error_json = form.errors.as_json()
            is_success = 99
            res = {'isSuccess': is_success, 'error': error_json}
            return HttpResponse('{}'.format(res))

    else:
        form = UploadImageForm()
    return render(request, 'exam_bbox.html', {'form': form})


@csrf_exempt
def formula_analysis_show(request):
    if request.method == 'POST':
        time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        form = UploadImageForm(request.POST, request.FILES)
        if form.is_valid():
            subject_id = int(form.cleaned_data['subject'])
            subject = subject_id_dict.get(subject_id)
            if not subject:
                subject = 'unknown_subject'
            img = request.FILES.get('img_data')

            error_info = ''
            text_info = ''
            raw_text_info = ''
            img_url_path = ''
            raw_name = img.name
            img_height = 0
            try:
                start_time = time.time()
                save_path = ''
                opencv_img = ''
                try:
                    save_path, opencv_img, img_url_path = save_raw_image_without_segment(subject, time_str, img, 'formula')
                    img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
                                             save_path=save_path, subject_id=subject_id, subject=subject)
                    img_instance.save()
                    logger.info('{}试卷： {} 存储成功: {}'.format(subject, raw_name, save_path))
                except Exception as e:
                    # traceback.print_exc()
                    logger.info('{}试卷： {} 存储失败: {}'.format(subject, raw_name, e))

                access_token = ocr_login()
                text_info, raw_text_info, img_height = formula_segment_and_show.segment(opencv_img, save_path,
                                                                                        access_token)

                is_success = 1
                end_time = time.time()
                cost_time = '{:.2f}s'.format(float(end_time - start_time))

            except Exception as e:
                logger.info('ocr error: {}'.format(e))
                is_success = 0
                error_info = 'analysis error'

            txt_url = os.path.join('/segment/', raw_name.replace('.jpg', '.html'))
            chars_lines = [ele for ele in text_info]
            chars_str = ''.join(chars_lines)
            raw_chars_lines = [ele for ele in raw_text_info]
            raw_chars_str = ''.join(raw_chars_lines)

            height = 300
            if img_height > height:
                height = img_height + 50
            res_dict = {'url': img_url_path, 'txt_url': txt_url,
                        'texts': chars_str,
                        'raw_texts': raw_chars_str,
                        'name': raw_name.replace('.jpg', ''),
                        'img_height': height,
                        'text_height': 1.5 * height
                        }

            return render(request, 'showimg.html', res_dict)
            # return HttpResponse(res_json)
        else:
            error_json = form.errors.as_json()
            is_success = 99
            res = {'is_success': is_success, 'error': error_json}

            return HttpResponse('{}'.format(res))

    else:
        img_form = UploadImageForm()
        formula_form = FormulaUrlForm()
    return render(request, 'uploadimg.html', {'img_form': img_form,
                                              'formula_form': formula_form})


@csrf_exempt
def ai_formula_analysis_show(request):
    if request.method == 'POST':
        time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        form = UploadImageForm(request.POST, request.FILES)
        if form.is_valid():
            subject_id = int(form.cleaned_data['subject'])
            subject = subject_id_dict.get(subject_id)
            if not subject:
                subject = 'unknown_subject'
            img = request.FILES.get('img_data')

            error_info = ''
            text_info = ''
            raw_text_info = ''
            img_url_path = ''
            raw_name = img.name
            img_height = 0
            try:
                start_time = time.time()
                save_path = ''
                opencv_img = ''
                try:
                    save_path, img_url_path, opencv_img = \
                        save_raw_image_in_jpeg(subject, time_str, img, 'formula')
                    img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
                                             save_path=save_path, subject_id=subject_id, subject=subject)
                    img_instance.save()
                    logger.info('{}试卷： {} 存储成功: {}'.format(subject, raw_name, save_path))
                except Exception as e:
                    # traceback.print_exc()
                    logger.info('{}试卷： {} 存储失败: {}'.format(subject, raw_name, e))

                # 对整体图像大小进行resize

                img_height, w = opencv_img.shape[0], opencv_img.shape[1]

                save_path = save_path.replace('\\', '/')
                formula_img_name = save_path.split('/')[-1]
                formula_name_txt_path = save_path.replace('.jpg', '.txt')
                with open(formula_name_txt_path, 'w', encoding='utf-8') as writer:
                    writer.writelines(formula_img_name + '\n')

                save_dir = save_path.replace(save_path.split('/')[-1], '')[:-1]

                text_info, raw_text_info = formula_segment_and_show\
                    .get_latex_by_ai_formula(save_dir, formula_name_txt_path)

                is_success = 1
                end_time = time.time()
                cost_time = '{:.2f}s'.format(float(end_time - start_time))

            except Exception as e:
                logger.info('ocr login error: {}'.format(e))
                is_success = 0
                error_info = 'analysis error'

            txt_url = os.path.join('/segment/', raw_name.replace('.jpg', '.html'))
            chars_lines = [ele for ele in text_info]
            chars_str = ''.join(chars_lines)
            raw_chars_lines = [ele for ele in raw_text_info]
            raw_chars_str = ''.join(raw_chars_lines)

            height = 300
            if img_height > height:
                height = img_height + 50
            res_dict = {'url': img_url_path, 'txt_url': txt_url,
                        'texts': chars_str,
                        'raw_texts': raw_chars_str,
                        'name': raw_name.replace('.jpg', ''),
                        'img_height': height,
                        'text_height': 1.5 * height
                        }

            return render(request, 'showimg.html', res_dict)
            # return HttpResponse(res_json)
        else:
            error_json = form.errors.as_json()
            is_success = 99
            res = {'is_success': is_success, 'error': error_json}

            return HttpResponse('{}'.format(res))

    else:
        img_form = UploadImageForm()
        formula_form = FormulaUrlForm()
    return render(request, 'uploadimg.html', {'img_form': img_form,
                                              'formula_form': formula_form})


@csrf_exempt
def formula_analysis(request):
    if request.method == 'POST':
        time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        form = FormulaUrlForm(request.POST, )
        if form.is_valid():
            image_url = form.cleaned_data['img_url']
            error_info = ''
            text_info = ''

            try:
                save_path = ''
                r = requests.get(image_url, timeout=3)
                save_dir = os.path.join(settings.MEDIA_ROOT, 'formula', time_str)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)
                if '.png' in image_url:
                    ext = 'png'
                    file_name = '{}.{}'.format(uuid.uuid4().hex[:10], ext)
                    save_path = os.path.join(save_dir, file_name)
                    with open(save_path, 'wb') as f:
                        f.write(r.content)

                    open_cv_image = png_read(save_path)

                else:
                    ext = 'jpg'
                    file_name = '{}.{}'.format(uuid.uuid4().hex[:10], ext)
                    save_path = os.path.join(save_dir, file_name)
                    with open(save_path, 'wb') as f:
                        f.write(r.content)
                    open_cv_image = cv2.imread(save_path)

                access_token = ocr_login()
                text_info, raw_text_info = formula_segment.segment(open_cv_image, save_path, access_token)
                txt_path = save_path[:-3] + 'txt'
                with open(txt_path, 'w') as f:
                    f.writelines(text_info)

                is_success = 1

            except Exception as e:
                logger.info('analysis error: {}'.format(e))
                is_success = 0
                error_info = 'analysis error: {}'.format(e)

            chars_lines = [ele for ele in text_info]
            chars_str = ''.join(chars_lines)

            res_dict = {'image_url': image_url,
                        'texts': chars_str,
                        'is_success': is_success
                        }
            if error_info:
                res_dict['error'] = error_info

            res_json = json.dumps(res_dict, ensure_ascii=False, cls=NpEncoder)
            return HttpResponse(res_json)
        else:
            error_json = form.errors.as_json()
            is_success = 99
            res = {'is_success': is_success, 'error': error_json}

            return HttpResponse('{}'.format(res))

    else:
        formula_form = FormulaUrlForm()
        img_form = UploadImageForm()
    return render(request, 'uploadimg.html', {'img_form': img_form,
                                              'formula_form': formula_form})


@csrf_exempt
def exam_analysis_show(request):
    if request.method == 'POST':
        time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        form = UploadImageForm(request.POST, request.FILES)
        if form.is_valid():
            subject_id = int(form.cleaned_data['subject'])
            subject = subject_id_dict.get(subject_id)
            if not subject:
                subject = 'unknown_subject'
            img = request.FILES.get('img_data')

            img_url_path = ''
            raw_name = img.name
            img_height = 0
            bbox_info = ''
            save_path = ''
            try:
                save_path, bin_parts_img_list, img_url_path = save_raw_image(subject, time_str, img, 'segment')
                img_instance = ExamImage(upload_date=time_str, raw_name=raw_name,
                                         save_path=save_path, subject_id=subject_id, subject=subject)
                img_instance.save()
                access_token = ocr_login()
                status, bbox_info = get_exam_box(raw_name, bin_parts_img_list, save_path, subject, access_token)

            except Exception as e:
                logger.info('ocr login error: {}'.format(e))
                is_success = 0
                error_info = 'analysis error'

            raw_img = Image.open(img)  # 读取上传的网络图像
            open_cv_image = np.array(raw_img)
            show_ratio = 0.2
            open_cv_image = resize_by_percent(open_cv_image, show_ratio)

            for ele in bbox_info['coordinate']:
                xmin = int(ele[0])*show_ratio
                ymin = int(ele[1])*show_ratio
                xmax = int(ele[2])*show_ratio
                ymax = int(ele[3])*show_ratio

                cv2.rectangle(open_cv_image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 1)

            write_single_img(open_cv_image, save_path)
            height, width = open_cv_image.shape[0], open_cv_image.shape[1]

            if img_height > height:
                height = img_height + 50
            res_dict = {'url': img_url_path,
                        'texts': bbox_info,
                        'raw_texts': bbox_info,
                        'name': raw_name.replace('.jpg', ''),
                        'img_height': height,
                        'text_height': 1.5 * height
                        }

            return render(request, 'showimg.html', res_dict)
            # return HttpResponse(res_json)
        else:
            error_json = form.errors.as_json()
            is_success = 99
            res = {'is_success': is_success, 'error': error_json}

            return HttpResponse('{}'.format(res))

    else:
        img_form = UploadImageForm()
    return render(request, 'exam_bbox.html', {'form': img_form})