lightxu
/
exam-segment-django


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
							# File:get_sheet_points_by_nlp.py
# Author:lynn
# Date:2020/5/19 18:23


import ast, cv2, re
from segment.sheet_resolve.analysis.sheet.tag_parse import TagParse
from segment.sheet_resolve.tools.utils import crop_region

try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET


def decide_coordinate_full_contains(coordinate1, coordinate2):
    xmin1 = coordinate1[0]
    ymin1 = coordinate1[1]
    xmax1 = coordinate1[2]
    ymax1 = coordinate1[3]
    mid_x = int(xmin1 + (xmax1 - xmin1)//2)
    mid_y = int(ymin1 + (ymax1 - ymin1)//2)

    xmin2 = coordinate2[0]
    ymin2 = coordinate2[1]
    xmax2 = coordinate2[2]
    ymax2 = coordinate2[3]

    if xmin1 <= xmin2 and ymin1 <= ymin2 and xmax1 >= xmax2 and ymax1 >= ymax2:
        return True
    else:
        return False


def analyse_solve_solve0_result(result, ele):
    key_words = ['T', 'C', 'S', 'N', 'O', 'E']
    key_words_list = [ele[1] for ele in result]
    move_m_list = [ele for ele in key_words_list if ele != 'M']
    new_list = []
    if 'O' not in key_words_list:
        if 'N' in move_m_list and 'T' in move_m_list:
            index_n = key_words_list.index('N')
            index_t = key_words_list.index('T')
            numeber_str = result[index_n][0]
            number_pattern = re.findall('\d+', numeber_str)
            title_number = int(number_pattern[0])
            ele['number'] = title_number

            total_score_str = result[index_t][0]
            total_score_pattern = re.findall('\d+', total_score_str)
            total_score = int(total_score_pattern[0])
            ele['default_points'] = total_score
            new_list.append(ele)
        elif 'N' not in move_m_list and 'T' in move_m_list:
            index_t = key_words_list.index('T')
            total_score_str = result[index_t][0]
            total_score_pattern = re.findall('\d+', total_score_str)
            total_score = int(total_score_pattern[0])
            ele['default_points'] = total_score
            new_list.append(ele)
        elif 'N' in move_m_list and 'T' not in move_m_list:
            index_n = key_words_list.index('N')
            numeber_str = result[index_n][0]
            number_pattern = re.findall('\d+', numeber_str)
            title_number = int(number_pattern[0])
            ele['number'] = title_number
            new_list.append(ele)
    return new_list


def analyse_choice_result(result, choice_m_list):
    key_words_list = [ele[1] for ele in result]
    move_m_list = [ele for ele in key_words_list if ele != 'M']
    o_len = [ele for ele in key_words_list if ele == 'O']

    if 'O' in key_words_list:
        index_o = [index for index, ele in enumerate(key_words_list) if ele == 'O']
        split_0_index = index_o
        split_0_index.insert(-1, len(key_words_list))
        split_0_index = sorted(list(set(split_0_index)))
        split_by_o_list = []
        for index, ele in enumerate(split_0_index):
            if index == 0:
                one_part = result[0: (split_0_index[index + 1]) - 1]
                split_by_o_list.append(one_part)
            elif ele == len(key_words_list):
                break
            else:
                one_part = result[split_0_index[index]: (split_0_index[index + 1]) - 1]
                split_by_o_list.append(one_part)
        print(split_by_o_list)
        number_with_value = []
        number_with_value1 = []
        for index0, ele0 in enumerate(split_by_o_list):
            part_key_words = [ele[1] for ele in ele0]
            index_oo = part_key_words.index('O')
            index_ss = part_key_words.index('S')
            contiue_number0 = ele0[index_oo]
            number_list = []
            if '-' in contiue_number0[0]:
                number_list = contiue_number0[0].split('-')
            elif '~' in contiue_number0[0]:
                number_list = contiue_number0[0].split('~')

            number_list = [int(ele) for ele in number_list]
            number_list_all = [i for i in range(number_list[0], number_list[1] + 1)]
            value = ele0[index_ss][0]
            nlp_number_value_dict = {}
            nlp_number_value_dict['number_list'] = number_list_all
            nlp_number_value_dict['value'] = value
            number_with_value1.append(nlp_number_value_dict)
            for ele in number_list_all:
                number_with_value.append({ele: value})
        print(number_with_value)

        for nlp_number in number_with_value1:
            number_list_nlp0 = nlp_number['number_list']
            value_nlp = nlp_number['value']
            for choice_m_box in choice_m_list:
                number_list_raw0 = choice_m_box['number']
                decide_whether_inclue = [False for c in number_list_raw0 if c not in number_list_nlp0]
                count_of_False = decide_whether_inclue.count(False)
                rows = choice_m_box['rows']
                if count_of_False / rows > 0.8:
                    continue
                elif count_of_False / rows <= 0.4:
                    points_list = [float(value_nlp) for i in range(0, rows)]
                    choice_m_box['default_points'] = points_list
        return choice_m_list


def analyse_cloze_result(result, cloze_and_cloze_s_list):
    new_list = []
    for cloze_and_cloze_s_ele in cloze_and_cloze_s_list:
        cloze_s_info = cloze_and_cloze_s_ele['cloze_s_info']

        key_words_list = [ele[1] for ele in result]
        move_m_list = [ele for ele in key_words_list if ele != 'M']

        for cloze_s_ele in cloze_s_info:
            if 'S' in move_m_list and 'T' in move_m_list:
                index_n = key_words_list.index('S')
                value_str = result[index_n][0]
                value_pattern = re.findall('\d+', value_str)
                value = int(value_pattern[0])
                cloze_s_ele['number'] = value

            elif 'C' not in move_m_list and 'T' in move_m_list:
                index_t = key_words_list.index('T')
                index_c = key_words_list.index('C')

                total_score_str = result[index_t][0]
                total_score_pattern = re.findall('\d+', total_score_str)
                total_score = int(total_score_pattern[0])
                value_per = float(total_score / int(index_c))
                cloze_s_ele['default_points'] = value_per
                new_list.append(cloze_s_ele)
    return new_list


def analyse_cloze_result1(result, cloze_and_cloze_s_list):
    new_list = []
    for cloze_and_cloze_s_ele in cloze_and_cloze_s_list:
        cloze_s_info = cloze_and_cloze_s_ele['cloze_s_info']

        key_words_list = [ele[1] for ele in result]
        move_m_list = [ele for ele in key_words_list if ele != 'M']

        for cloze_s_ele in cloze_s_info:
            if 'S' in move_m_list and 'T' in move_m_list:
                index_n = key_words_list.index('S')
                value_str = result[index_n][0]
                value_pattern = re.findall('\d+', value_str)
                value = int(value_pattern[0])
                cloze_s_ele['number'] = value

            elif 'C' not in move_m_list and 'T' in move_m_list:
                index_t = key_words_list.index('T')
                index_c = key_words_list.index('C')

                total_score_str = result[index_t][0]
                total_score_pattern = re.findall('\d+', total_score_str)
                total_score = int(total_score_pattern[0])
                value_per = float(total_score / int(index_c))
                cloze_s_ele['default_points'] = value_per
                new_list.append(cloze_s_ele)
    return new_list


def get_sheet_points_by_nlp(sheet_dict):
    import CRFPP
    # json_path = r'C:\Users\Administrator\Desktop\type_score_nlp\type_score_info\example\english\33.json'
    # file = open(json_path, 'r', encoding='gbk').read()
    # json_file = ast.literal_eval(file)
    regions = sheet_dict['regions']
    ocr_list = []
    new_list = []
    choice_m_list = [ele for ele in regions if ele['class_name'] == 'choice_m']
    cloze_list = [ele for ele in regions if ele['class_name'] == 'cloze']
    cloze_and_cloze_s_list = []
    for element_cloze in cloze_list:
        cloze_box = element_cloze['bounding_box']
        cloze_bbox = [cloze_box['xmin'], cloze_box['ymin'], cloze_box['xmax'], cloze_box['ymax']]
        cloze_s_dict = {}
        cloze_s_list = []
        for element in regions:
            if element['class_name'] == 'cloze_s':
                cloze_s_box = element['bounding_box']
                cloze_s_bbox = [cloze_s_box['xmin'], cloze_s_box['ymin'], cloze_s_box['xmax'], cloze_s_box['ymax']]

                if decide_coordinate_full_contains(cloze_bbox, cloze_s_bbox) == True:
                    cloze_s_list.append(element)
        cloze_s_dict['cloze_info'] = element_cloze
        cloze_s_dict['cloze_s_info'] = cloze_s_list
        cloze_and_cloze_s_list.append(cloze_s_dict)
    print(cloze_and_cloze_s_list)

    for ele in regions:
        if 'type_score_ocr' in ele:
            ocr_list.append(ele)
    for index, ele in enumerate(ocr_list):
        ocr_content = ele['type_score_ocr']
        taggers = CRFPP.Tagger("-m " + './segment/sheet_resolve/model/nlp_model/crf2.model')
        tb = TagParse(taggers)
        result = tb.get_tag_val(ocr_content)
        if ele['class_name'] == 'cloze':
            print(ele)

            cloze_and_cloze_s_list0 = []
            for ele1 in cloze_and_cloze_s_list:
                print(ele1)
                ele1_cloze = [ele1['cloze_info']['bounding_box']['xmin'], ele1['cloze_info']['bounding_box']['ymin'],
                              ele1['cloze_info']['bounding_box']['xmax'], ele1['cloze_info']['bounding_box']['ymax']]
                ele_cloze = [ele['bounding_box']['xmin'], ele['bounding_box']['ymin'],
                             ele['bounding_box']['xmax'], ele['bounding_box']['ymax']]
                if ele1_cloze == ele_cloze:
                    cloze_and_cloze_s_list0.append(ele1)
            new_list = analyse_cloze_result(result, cloze_and_cloze_s_list0)
        elif ele['class_name'] == 'choice':
            new_list = analyse_choice_result(result, choice_m_list)
        else:
            new_list = analyse_solve_solve0_result(result, ele)

    for index0, ele0 in enumerate(regions):
        for index1, ele1 in enumerate(new_list):
            class_name0 = ele0['class_name']
            bounding_box0 = ele0['bounding_box']

            class_name1 = ele1['class_name']
            bounding_box1 = ele1['bounding_box']
            if class_name0 == class_name1 and bounding_box0 == bounding_box1:
                ele0['default_points'] = ele1['default_points']

    # pop type_score_ocr
    for ele in regions:
        if 'type_score_ocr' in ele:
            ele.pop('type_score_ocr')

    sheet_dict.update({'regions': regions})
    return sheet_dict