4 years ago · b8401ddb98
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,64 @@
 
				+# Byte-compiled / optimized / DLL files
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+
			
 
				+*.pbtxt
			
 
				+
			
 
				+# C extensions
			
 
				+*.so
			
 
				+
			
 
				+# Distribution / packaging
			
 
				+.Python
			
 
				+env/
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+
			
 
				+# PyInstaller
			
 
				+#  Usually these files are written by a python script from a template
			
 
				+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				+*.manifest
			
 
				+*.spec
			
 
				+
			
 
				+# Installer logs
			
 
				+pip-log.txt
			
 
				+pip-delete-this-directory.txt
			
 
				+
			
 
				+# Unit test / coverage reports
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.coverage
			
 
				+.coverage.*
			
 
				+.cache
			
 
				+nosetests.xml
			
 
				+coverage.xml
			
 
				+*,cover
			
 
				+
			
 
				+# Translations
			
 
				+*.mo
			
 
				+*.pot
			
 
				+
			
 
				+# Django stuff:
			
 
				+*.log
			
 
				+
			
 
				+# Sphinx documentation
			
 
				+docs/_build/
			
 
				+
			
 
				+# PyBuilder
			
 
				+target/
			
 
				+
			
 
				+# model
			
 
				+segment/sheet_resolve/model
			
 
				+# upload images
			
 
				+segment/exam_image
			
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,4 @@
 
				+# Datasource local storage ignored files
			
 
				+/dataSources/
			
 
				+# Default ignored files
			
 
				+/workspace.xml
			
--- a/.idea/dataSources.local.xml
+++ b/.idea/dataSources.local.xml
@@ -0,0 +1,25 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="dataSourceStorageLocal">
			
 
				+    <data-source name="Django default" uuid="196c9e3b-952d-4e88-b2b8-8148410faad1">
			
 
				+      <database-info product="SQLite" version="3.20.1" jdbc-version="2.1" driver-name="SQLite JDBC" driver-version="3.20.1.1" dbms="SQLITE" exact-version="3.20.1" />
			
 
				+      <case-sensitivity plain-identifiers="mixed" quoted-identifiers="mixed" />
			
 
				+      <auth-required>false</auth-required>
			
 
				+      <schema-mapping>
			
 
				+        <introspection-scope>
			
 
				+          <node kind="schema" qname="main" />
			
 
				+        </introspection-scope>
			
 
				+      </schema-mapping>
			
 
				+    </data-source>
			
 
				+    <data-source name="db [2]" uuid="d3628523-582a-439a-a8a9-094e8504e34a">
			
 
				+      <database-info product="SQLite" version="3.20.1" jdbc-version="2.1" driver-name="SQLite JDBC" driver-version="3.20.1.1" dbms="SQLITE" exact-version="3.20.1" />
			
 
				+      <case-sensitivity plain-identifiers="mixed" quoted-identifiers="mixed" />
			
 
				+      <auth-required>false</auth-required>
			
 
				+      <schema-mapping>
			
 
				+        <introspection-scope>
			
 
				+          <node kind="schema" qname="@" />
			
 
				+        </introspection-scope>
			
 
				+      </schema-mapping>
			
 
				+    </data-source>
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/dataSources.xml
+++ b/.idea/dataSources.xml
@@ -0,0 +1,33 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="DataSourceManagerImpl" format="xml" multifile-model="true">
			
 
				+    <data-source source="LOCAL" name="Django default" uuid="196c9e3b-952d-4e88-b2b8-8148410faad1">
			
 
				+      <driver-ref>sqlite.xerial</driver-ref>
			
 
				+      <synchronize>true</synchronize>
			
 
				+      <imported>true</imported>
			
 
				+      <remarks>$PROJECT_DIR$/exam_segment_django/settings.py</remarks>
			
 
				+      <jdbc-driver>org.sqlite.JDBC</jdbc-driver>
			
 
				+      <jdbc-url>jdbc:sqlite:D:\project\exam_segment_django\db.sqlite3</jdbc-url>
			
 
				+      <driver-properties>
			
 
				+        <property name="enable_load_extension" value="true" />
			
 
				+      </driver-properties>
			
 
				+    </data-source>
			
 
				+    <data-source source="LOCAL" name="db [2]" uuid="d3628523-582a-439a-a8a9-094e8504e34a">
			
 
				+      <driver-ref>sqlite.xerial</driver-ref>
			
 
				+      <synchronize>true</synchronize>
			
 
				+      <jdbc-driver>org.sqlite.JDBC</jdbc-driver>
			
 
				+      <jdbc-url>jdbc:sqlite:D:\project\exam-segment-django\db.sqlite3</jdbc-url>
			
 
				+      <driver-properties>
			
 
				+        <property name="enable_load_extension" value="true" />
			
 
				+      </driver-properties>
			
 
				+      <libraries>
			
 
				+        <library>
			
 
				+          <url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.20.1.1/sqlite-jdbc-3.20.1.1.jar</url>
			
 
				+        </library>
			
 
				+        <library>
			
 
				+          <url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.20.1.1/xerial-sqlite-license.txt</url>
			
 
				+        </library>
			
 
				+      </libraries>
			
 
				+    </data-source>
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/deployment.xml
+++ b/.idea/deployment.xml
@@ -0,0 +1,31 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="PublishConfigData" autoUpload="Always" serverName="ubuntu@192.168.1.208:22">
			
 
				+    <serverData>
			
 
				+      <paths name="ubuntu-station">
			
 
				+        <serverdata>
			
 
				+          <mappings>
			
 
				+            <mapping local="D:/Anaconda3" web="/" />
			
 
				+            <mapping local="$PROJECT_DIR$" web="/" />
			
 
				+          </mappings>
			
 
				+        </serverdata>
			
 
				+      </paths>
			
 
				+      <paths name="ubuntu@192.168.1.167:22">
			
 
				+        <serverdata>
			
 
				+          <mappings>
			
 
				+            <mapping local="D:/Anaconda3" web="/" />
			
 
				+            <mapping local="$PROJECT_DIR$" web="/" />
			
 
				+          </mappings>
			
 
				+        </serverdata>
			
 
				+      </paths>
			
 
				+      <paths name="ubuntu@192.168.1.208:22">
			
 
				+        <serverdata>
			
 
				+          <mappings>
			
 
				+            <mapping deploy="/tmp/pycharm_project_586" local="$PROJECT_DIR$" />
			
 
				+          </mappings>
			
 
				+        </serverdata>
			
 
				+      </paths>
			
 
				+    </serverData>
			
 
				+    <option name="myAutoUpload" value="ALWAYS" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/exam_segment_django.iml
+++ b/.idea/exam_segment_django.iml
@@ -0,0 +1,46 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<module type="PYTHON_MODULE" version="4">
			
 
				+  <component name="FacetManager">
			
 
				+    <facet type="django" name="Django">
			
 
				+      <configuration>
			
 
				+        <option name="rootFolder" value="$MODULE_DIR$" />
			
 
				+        <option name="settingsModule" value="exam_segment_django/settings.py" />
			
 
				+        <option name="manageScript" value="$MODULE_DIR$/manage.py" />
			
 
				+        <option name="environment" value="&lt;map/&gt;" />
			
 
				+        <option name="doNotUseTestRunner" value="false" />
			
 
				+        <option name="trackFilePattern" value="migrations" />
			
 
				+      </configuration>
			
 
				+    </facet>
			
 
				+  </component>
			
 
				+  <component name="NewModuleRootManager">
			
 
				+    <content url="file://D:/Anaconda3">
			
 
				+      <excludeFolder url="file://D:/Anaconda3" />
			
 
				+    </content>
			
 
				+    <content url="file://$MODULE_DIR$">
			
 
				+      <excludeFolder url="file://$MODULE_DIR$/segment/exam_image" />
			
 
				+      <excludeFolder url="file://$MODULE_DIR$/segment/exam_info" />
			
 
				+      <excludeFolder url="file://$MODULE_DIR$/segment/sheet_resolve/images" />
			
 
				+      <excludeFolder url="file://$MODULE_DIR$/segment/sheet_resolve/labels" />
			
 
				+      <excludeFolder url="file://$MODULE_DIR$/segment/sheet_resolve/model" />
			
 
				+      <excludeFolder url="file://$MODULE_DIR$/segment/upload_images" />
			
 
				+      <excludeFolder url="file://$MODULE_DIR$/segment/xml_labels" />
			
 
				+    </content>
			
 
				+    <orderEntry type="jdk" jdkName="Python 3.6" jdkType="Python SDK" />
			
 
				+    <orderEntry type="sourceFolder" forTests="false" />
			
 
				+  </component>
			
 
				+  <component name="PyDocumentationSettings">
			
 
				+    <option name="renderExternalDocumentation" value="true" />
			
 
				+  </component>
			
 
				+  <component name="TemplatesService">
			
 
				+    <option name="TEMPLATE_CONFIGURATION" value="Django" />
			
 
				+    <option name="TEMPLATE_FOLDERS">
			
 
				+      <list>
			
 
				+        <option value="$MODULE_DIR$/templates" />
			
 
				+      </list>
			
 
				+    </option>
			
 
				+  </component>
			
 
				+  <component name="TestRunnerService">
			
 
				+    <option name="projectConfiguration" value="pytest" />
			
 
				+    <option name="PROJECT_TEST_RUNNER" value="py.test" />
			
 
				+  </component>
			
 
				+</module>
			
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,15 @@
 
				+<component name="InspectionProjectProfileManager">
			
 
				+  <profile version="1.0">
			
 
				+    <option name="myName" value="Project Default" />
			
 
				+    <inspection_tool class="HtmlUnknownAttribute" enabled="true" level="WARNING" enabled_by_default="true">
			
 
				+      <option name="myValues">
			
 
				+        <value>
			
 
				+          <list size="1">
			
 
				+            <item index="0" class="java.lang.String" itemvalue="text-align" />
			
 
				+          </list>
			
 
				+        </value>
			
 
				+      </option>
			
 
				+      <option name="myCustomValuesEnabled" value="true" />
			
 
				+    </inspection_tool>
			
 
				+  </profile>
			
 
				+</component>
			
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,10 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="JavaScriptSettings">
			
 
				+    <option name="languageLevel" value="ES6" />
			
 
				+  </component>
			
 
				+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
			
 
				+  <component name="PyCharmProfessionalAdvertiser">
			
 
				+    <option name="shown" value="true" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="ProjectModuleManager">
			
 
				+    <modules>
			
 
				+      <module fileurl="file://$PROJECT_DIR$/.idea/exam_segment_django.iml" filepath="$PROJECT_DIR$/.idea/exam_segment_django.iml" />
			
 
				+    </modules>
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/other.xml
+++ b/.idea/other.xml
@@ -0,0 +1,7 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="PySciProjectComponent">
			
 
				+    <option name="PY_SCI_VIEW" value="true" />
			
 
				+    <option name="PY_SCI_VIEW_SUGGESTED" value="true" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="VcsDirectoryMappings">
			
 
				+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,5 @@
 
				+#exam-segment-django
			
 
				+该项目目前实现了对试卷信息的分割：  
			
 
				+* 试卷块分割，一张试卷有左右两页，分成两页（待改进）；
			
 
				+* 试卷文字信息识别；
			
 
				+* 根据试卷中的文字信息分割题目；
			
--- a/db.sqlite3
+++ b/db.sqlite3
--- a/exam_segment_django/__init__.py
+++ b/exam_segment_django/__init__.py
--- a/exam_segment_django/settings.py
+++ b/exam_segment_django/settings.py
@@ -0,0 +1,146 @@
 
				+"""
			
 
				+Django settings for exam_segment_django project.
			
 
				+
			
 
				+Generated by 'django-admin startproject' using Django 2.1.
			
 
				+
			
 
				+For more information on this file, see
			
 
				+https://docs.djangoproject.com/en/2.1/topics/settings/
			
 
				+
			
 
				+For the full list of settings and their values, see
			
 
				+https://docs.djangoproject.com/en/2.1/ref/settings/
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+
			
 
				+# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
			
 
				+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
			
 
				+
			
 
				+
			
 
				+# Quick-start development settings - unsuitable for production
			
 
				+# See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/
			
 
				+
			
 
				+# SECURITY WARNING: keep the secret key used in production secret!
			
 
				+SECRET_KEY = '3t_*ihwd7qfdwj0-j8t+#a48h$tw_*9gxpv9cjrvbai7h!0!1t'
			
 
				+# SECRET_KEY = os.environ.get('DJ_SECRET_KEY')
			
 
				+
			
 
				+# SECURITY WARNING: don't run with debug turned on in production!
			
 
				+DEBUG = True
			
 
				+
			
 
				+ALLOWED_HOSTS = ['117.50.37.71', '127.0.0.1', 'localhost',
			
 
				+                 '0.0.0.0:8000', '192.168.1.208:8000', '192.168.1.208']
			
 
				+
			
 
				+
			
 
				+# Application definition
			
 
				+
			
 
				+INSTALLED_APPS = [
			
 
				+    'django.contrib.admin',
			
 
				+    'django.contrib.auth',
			
 
				+    'django.contrib.contenttypes',
			
 
				+    'django.contrib.sessions',
			
 
				+    'django.contrib.messages',
			
 
				+    'django.contrib.staticfiles',
			
 
				+    'segment.apps.SegmentConfig',
			
 
				+]
			
 
				+
			
 
				+MIDDLEWARE = [
			
 
				+    'django.middleware.security.SecurityMiddleware',
			
 
				+    'django.contrib.sessions.middleware.SessionMiddleware',
			
 
				+    'django.middleware.common.CommonMiddleware',
			
 
				+    'django.middleware.csrf.CsrfViewMiddleware',
			
 
				+    'django.contrib.auth.middleware.AuthenticationMiddleware',
			
 
				+    'django.contrib.messages.middleware.MessageMiddleware',
			
 
				+    'django.middleware.clickjacking.XFrameOptionsMiddleware',
			
 
				+]
			
 
				+
			
 
				+ROOT_URLCONF = 'exam_segment_django.urls'
			
 
				+
			
 
				+TEMPLATES = [
			
 
				+    {
			
 
				+        'BACKEND': 'django.template.backends.django.DjangoTemplates',
			
 
				+        'DIRS': [os.path.join(BASE_DIR, 'templates')],
			
 
				+        'APP_DIRS': True,
			
 
				+        'OPTIONS': {
			
 
				+            'context_processors': [
			
 
				+                'django.template.context_processors.debug',
			
 
				+                'django.template.context_processors.request',
			
 
				+                'django.contrib.auth.context_processors.auth',
			
 
				+                'django.contrib.messages.context_processors.messages',
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+]
			
 
				+
			
 
				+WSGI_APPLICATION = 'exam_segment_django.wsgi.application'
			
 
				+
			
 
				+
			
 
				+# Database
			
 
				+# https://docs.djangoproject.com/en/2.1/ref/settings/#databases
			
 
				+
			
 
				+DATABASES = {
			
 
				+    'default': {
			
 
				+        'ENGINE': 'django.db.backends.sqlite3',
			
 
				+        'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# Password validation
			
 
				+# https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators
			
 
				+
			
 
				+AUTH_PASSWORD_VALIDATORS = [
			
 
				+    {
			
 
				+        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
			
 
				+    },
			
 
				+    {
			
 
				+        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
			
 
				+    },
			
 
				+    {
			
 
				+        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
			
 
				+    },
			
 
				+    {
			
 
				+        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
			
 
				+    },
			
 
				+]
			
 
				+
			
 
				+
			
 
				+# Internationalization
			
 
				+# https://docs.djangoproject.com/en/2.1/topics/i18n/
			
 
				+
			
 
				+LANGUAGE_CODE = 'en-us'
			
 
				+
			
 
				+TIME_ZONE = 'Asia/Shanghai'
			
 
				+
			
 
				+USE_I18N = True
			
 
				+
			
 
				+USE_L10N = True
			
 
				+
			
 
				+USE_TZ = False
			
 
				+
			
 
				+
			
 
				+# Static files (CSS, JavaScript, Images)
			
 
				+# https://docs.djangoproject.com/en/2.1/howto/static-files/
			
 
				+
			
 
				+STATIC_URL = '/static/'
			
 
				+
			
 
				+MEDIA_ROOT = os.path.join(BASE_DIR, 'segment', 'exam_image').replace('\\', '/')
			
 
				+MEDIA_URL = '/exam_image/'
			
 
				+
			
 
				+TEMPLATES_ROOT = os.path.join(BASE_DIR, 'templates').replace('\\', '/')
			
 
				+
			
 
				+# segment-app settings
			
 
				+
			
 
				+TOLERANCE_PIX_NUMBER = 1
			
 
				+RESIZE_RADIO = 1.0  # (0~1]
			
 
				+
			
 
				+OCR_BOX_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/'
			
 
				+OCR_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/'
			
 
				+
			
 
				+# OCR_ACCURACY = 'general'
			
 
				+OCR_ACCURACY = 'accurate'
			
 
				+# OCR_CLIENT_ID = 'edZmhwHUTHLgrWdaxEQ72FfY'
			
 
				+# OCR_CLIENT_SECRET = 'qcEtvS0dRygSF2Pa9KQjbMQcjPKBqGIE'
			
 
				+OCR_CLIENT_ID = 'AVH7VGKG8QxoSotp6wG9LyZq'
			
 
				+OCR_CLIENT_SECRET = 'gG7VYvBWLU8Rusnin8cS8Ta4dOckGFl6'
			
 
				+OCR_TOKEN_UPDATE_DATE = 10
			
 
				+
			
 
				+LOGGING_TYPE = 'production'
			
--- a/exam_segment_django/urls.py
+++ b/exam_segment_django/urls.py
@@ -0,0 +1,28 @@
 
				+"""exam_segment_django URL Configuration
			
 
				+
			
 
				+The `urlpatterns` list routes URLs to views. For more information please see:
			
 
				+    https://docs.djangoproject.com/en/2.1/topics/http/urls/
			
 
				+Examples:
			
 
				+Function views
			
 
				+    1. Add an import:  from my_app import views
			
 
				+    2. Add a URL to urlpatterns:  path('', views.home, name='home')
			
 
				+Class-based views
			
 
				+    1. Add an import:  from other_app.views import Home
			
 
				+    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
			
 
				+Including another URLconf
			
 
				+    1. Import the include() function: from django.urls import include, path
			
 
				+    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
			
 
				+"""
			
 
				+from django.conf import settings
			
 
				+from django.conf.urls import url
			
 
				+from django.conf.urls.static import static
			
 
				+from django.contrib import admin
			
 
				+from django.urls import include
			
 
				+from django.urls import path
			
 
				+from django.views.static import serve
			
 
				+
			
 
				+urlpatterns = [
			
 
				+    path('admin/', admin.site.urls),
			
 
				+    path('segment/', include('segment.urls')),
			
 
				+    url(r'^exam_image/(?P<path>.*)$', serve, {'document_root': settings.MEDIA_ROOT}),
			
 
				+] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
			
--- a/exam_segment_django/wsgi.py
+++ b/exam_segment_django/wsgi.py
@@ -0,0 +1,16 @@
 
				+"""
			
 
				+WSGI config for exam_segment_django project.
			
 
				+
			
 
				+It exposes the WSGI callable as a module-level variable named ``application``.
			
 
				+
			
 
				+For more information on this file, see
			
 
				+https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+
			
 
				+from django.core.wsgi import get_wsgi_application
			
 
				+
			
 
				+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'exam_segment_django.settings')
			
 
				+
			
 
				+application = get_wsgi_application()
			
--- a/manage.py
+++ b/manage.py
@@ -0,0 +1,15 @@
 
				+#!/usr/bin/env python
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'exam_segment_django.settings')
			
 
				+    try:
			
 
				+        from django.core.management import execute_from_command_line
			
 
				+    except ImportError as exc:
			
 
				+        raise ImportError(
			
 
				+            "Couldn't import Django. Are you sure it's installed and "
			
 
				+            "available on your PYTHONPATH environment variable? Did you "
			
 
				+            "forget to activate a virtual environment?"
			
 
				+        ) from exc
			
 
				+    execute_from_command_line(sys.argv)
			
--- a/segment/__init__.py
+++ b/segment/__init__.py
--- a/segment/admin.py
+++ b/segment/admin.py
@@ -0,0 +1,3 @@
 
				+from django.contrib import admin
			
 
				+
			
 
				+# Register your models here.
			
--- a/segment/apps.py
+++ b/segment/apps.py
@@ -0,0 +1,5 @@
 
				+from django.apps import AppConfig
			
 
				+
			
 
				+
			
 
				+class SegmentConfig(AppConfig):
			
 
				+    name = 'segment'
			
--- a/segment/exam_info/000000-template.xml
+++ b/segment/exam_info/000000-template.xml
@@ -0,0 +1,14 @@
 
				+<annotation>
			
 
				+	<folder>JPEGImage</folder>
			
 
				+	<filename>000001.jpg</filename>
			
 
				+	<path>00</path>
			
 
				+	<source>
			
 
				+		<database>Unknown</database>
			
 
				+	</source>
			
 
				+	<size>
			
 
				+		<width>1000</width>
			
 
				+		<height>1000</height>
			
 
				+		<depth>3</depth>
			
 
				+	</size>
			
 
				+	<segmented>0</segmented>
			
 
				+</annotation>
			
--- a/segment/form.py
+++ b/segment/form.py
@@ -0,0 +1,69 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : form.py
			
 
				+from django import forms
			
 
				+
			
 
				+SUBJECT_ID = ((0, '未知学科'),
			
 
				+              (3, '数学'),
			
 
				+              (6, '数学（知心慧学）'),
			
 
				+              (8, '英语'),
			
 
				+              (9, '语文'),
			
 
				+              (12, '物理'),
			
 
				+              (13, '化学'),
			
 
				+              (14, '生物'),
			
 
				+              (15, '政治'),
			
 
				+              (16, '历史'),
			
 
				+              (17, '地理'),
			
 
				+              (18, '理综'),
			
 
				+              (19, '文综'),
			
 
				+              (98, '英语-B'),
			
 
				+              (99, '英语-T'),
			
 
				+              )
			
 
				+
			
 
				+
			
 
				+class UploadImageForm(forms.Form):
			
 
				+    subject_id_dict = SUBJECT_ID
			
 
				+
			
 
				+    subject = forms.ChoiceField(label='科目', label_suffix=':', widget=forms.Select(),
			
 
				+                                choices=subject_id_dict, initial=3, required=True, )
			
 
				+    img_data = forms.ImageField(label='试卷图片', label_suffix=':',
			
 
				+                                widget=forms.ClearableFileInput(attrs={'multiple': True}))
			
 
				+
			
 
				+
			
 
				+class UploadImageWithPaperIdForm(forms.Form):
			
 
				+    subject_id_dict = SUBJECT_ID
			
 
				+
			
 
				+    subject = forms.ChoiceField(label='科目', label_suffix=':', widget=forms.Select(),
			
 
				+                                choices=subject_id_dict, initial=3, required=True, )
			
 
				+    paper_id = forms.CharField(label='PaperID', label_suffix=':',)
			
 
				+    img_data = forms.ImageField(label='试卷图片', label_suffix=':',
			
 
				+                                widget=forms.ClearableFileInput(attrs={'multiple': True}))
			
 
				+
			
 
				+
			
 
				+class FormulaUrlForm(forms.Form):
			
 
				+    img_url = forms.CharField(label='试卷URL', label_suffix=':')
			
 
				+
			
 
				+
			
 
				+# class UploadFileForm(forms.Form):
			
 
				+#     # xml_file = forms.FileField(label='XML', label_suffix=':',
			
 
				+#     #                            widget=forms.ClearableFileInput(attrs={'multiple': True}))
			
 
				+#     xml_file = forms.FileField(label='XML', label_suffix=':',
			
 
				+#                                widget=forms.ClearableFileInput(attrs={'multiple': True}))
			
 
				+
			
 
				+
			
 
				+class SubmitSeriesNumberForm(forms.Form):
			
 
				+    series_number = forms.CharField(label='series_number', label_suffix=':', max_length=100)
			
 
				+
			
 
				+
			
 
				+class DownloadImage(forms.Form):
			
 
				+    paper_id = forms.CharField(label='paper_id', label_suffix=':', max_length=100)
			
 
				+
			
 
				+
			
 
				+class UploadFileForm(forms.Form):
			
 
				+    subject_id_dict = SUBJECT_ID
			
 
				+
			
 
				+    subject = forms.ChoiceField(label='科目', label_suffix=':', widget=forms.Select(),
			
 
				+                                choices=subject_id_dict, initial=3, required=True, )
			
 
				+    img_data = forms.FileField(label='试卷PDF', label_suffix=':',
			
 
				+                               widget=forms.ClearableFileInput(attrs={'multiple': True}))
			
 
				+
			
 
				+
			
--- a/segment/formula/__init__.py
+++ b/segment/formula/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2019/1/24 0024 上午 11:17
			
--- a/segment/formula/formula_segment.py
+++ b/segment/formula/formula_segment.py
@@ -0,0 +1,273 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : formula_segment.py
			
 
				+# @Time    : 2019/1/24 0024 下午 13:24
			
 
				+import time
			
 
				+import re
			
 
				+import copy
			
 
				+import math
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import xml.etree.cElementTree as ET
			
 
				+from segment.formula import mathpix_ocr
			
 
				+from segment.server import get_ocr_text_and_coordinate_formula
			
 
				+from segment.image_operation import utils
			
 
				+
			
 
				+
			
 
				+def get_coordinates(word_res, formula_words_list):
			
 
				+    res_list = []
			
 
				+    for formula_raw in formula_words_list:
			
 
				+        coordinates_start_index = formula_raw[1][0]
			
 
				+        coordinates_end_index = formula_raw[1][1] - 1
			
 
				+        coordinates_start = word_res['chars'][coordinates_start_index]['location']
			
 
				+        coordinates_end = word_res['chars'][coordinates_end_index]['location']
			
 
				+        coordinates = (coordinates_start['left'],  # xmin
			
 
				+                       min(coordinates_start['top'], coordinates_end['top']),  # ymin
			
 
				+                       coordinates_end['left'] + coordinates_end['width'],  # xmax
			
 
				+                       max(coordinates_start['top'] + coordinates_start['height'],
			
 
				+                           coordinates_end['top'] + coordinates_end['height']))  # ymax
			
 
				+        tmp_dict = {'chars': formula_raw[0],
			
 
				+                    'raw_chars': formula_raw[0],
			
 
				+                    'coordinates': coordinates,
			
 
				+                    'middle': (coordinates[0] + int((coordinates[2] - coordinates[0]) // 2),
			
 
				+                               coordinates[1] + int((coordinates[3] - coordinates[1]) // 2))}
			
 
				+        res_list.append(tmp_dict)
			
 
				+    return res_list
			
 
				+
			
 
				+
			
 
				+def generate_char(words, index_pair, zh=True):
			
 
				+    if index_pair:
			
 
				+        # new_words = words.copy()
			
 
				+        length = index_pair[1] - index_pair[0]
			
 
				+        gen = ''
			
 
				+        if zh:
			
 
				+            for i in range(length):
			
 
				+                gen = '中' + gen
			
 
				+        else:
			
 
				+            for i in range(length):
			
 
				+                gen = 'F' + gen
			
 
				+        words = words.replace(words[index_pair[0]:index_pair[1]], gen)
			
 
				+        return words
			
 
				+    else:
			
 
				+        return words
			
 
				+
			
 
				+
			
 
				+def segment(img, save_path, access_token):
			
 
				+    # raw_img = img.copy()
			
 
				+    # img = utils.preprocess(raw_img, None)
			
 
				+
			
 
				+    word_result_list = get_ocr_text_and_coordinate_formula(img, access_token)
			
 
				+    formula_coordinates_dict_list = []
			
 
				+    zh_coordinates_dict_list = []
			
 
				+    zh_char_height = 20  # default
			
 
				+    zh_char_width = 15  # default
			
 
				+    zh_char_height_list = []
			
 
				+    zh_char_width_list = []
			
 
				+
			
 
				+    exclude = r'{}|{}|{}|{}|{}|{}'.format(
			
 
				+        '^[(（]*[\d]+[）)]',
			
 
				+        '[ABCD]\.',
			
 
				+        '[\u4e00-\u9fa5][，；：。,;:.]',
			
 
				+        '[①②③④⑤⑥⑦⑧⑨⑩]',
			
 
				+        '[(（][）)]',
			
 
				+        '[\u4e00-\u9fa5][\d]+[\u4e00-\u9fa5]')
			
 
				+
			
 
				+    for index, word_res in enumerate(word_result_list):
			
 
				+        words = word_res['words'].replace(' ', '').replace('兀', 'π')  # 去除空格，baidu_api bug
			
 
				+        abcd_words_m = re.finditer(exclude, words)
			
 
				+        abcd_index_list = [(m.group(), m.span()) for m in abcd_words_m if m]
			
 
				+
			
 
				+        words_tmp_zh = copy.copy(words)
			
 
				+        for ele in abcd_index_list:
			
 
				+            words_tmp_zh = generate_char(words_tmp_zh, ele[1], zh=True)
			
 
				+
			
 
				+        formula_words_m = re.finditer(r'[^\u4e00-\u9fa5._"“”]+', words_tmp_zh)
			
 
				+        formula_index_list = [(m.group(), m.span()) for m in formula_words_m if m]
			
 
				+        formula_list = get_coordinates(word_res, formula_index_list)
			
 
				+        formula_coordinates_dict_list = formula_coordinates_dict_list + formula_list
			
 
				+
			
 
				+        words_tmp_formula = copy.copy(words)
			
 
				+        for ele in abcd_index_list:
			
 
				+            words_tmp_formula = generate_char(words_tmp_formula, ele[1], zh=False)
			
 
				+        zh_words_m = re.finditer(r'[\u4e00-\u9fa5._"“”]+', words_tmp_formula)
			
 
				+        zh_index_list = [(m.group(), m.span()) for m in zh_words_m if m]
			
 
				+        zh_list = get_coordinates(word_res, zh_index_list + abcd_index_list)
			
 
				+        zh_coordinates_dict_list = zh_coordinates_dict_list + zh_list
			
 
				+
			
 
				+        one_zh_char_m = re.match(r'[\u4e00-\u9fa5]+', words)
			
 
				+        if one_zh_char_m:
			
 
				+            index = one_zh_char_m.span()[0]
			
 
				+            zh_char_height_list.append(word_res['chars'][index]['location']['height'])
			
 
				+            zh_char_width_list.append(word_res['chars'][index]['location']['width'])
			
 
				+
			
 
				+    if len(zh_char_width_list) > 0 and len(zh_char_height_list) > 0:
			
 
				+        zh_char_height = np.mean(zh_char_height_list)
			
 
				+        zh_char_width = np.mean(zh_char_width_list)
			
 
				+
			
 
				+    formula_coordinates_list = [ele['coordinates'] for ele in formula_coordinates_dict_list]
			
 
				+    formula_combine_list = combine(formula_coordinates_list, zh_char_height, zh_char_width)  # 欧式距离
			
 
				+
			
 
				+    formula_combine_dict_list = []
			
 
				+    for i, ele in enumerate(formula_combine_list):
			
 
				+        middle = (ele[0] + int((ele[2] - ele[0]) // 2), ele[1] + int((ele[3] - ele[1]) // 2))
			
 
				+        ocr_region = utils.crop_region_direct(img, ele)
			
 
				+        y, x = ocr_region.shape[0], ocr_region.shape[1]
			
 
				+        if min(y, x) <= 50:
			
 
				+            ocr_region = utils.resize_by_percent(ocr_region, 2.00)  # 放大若干倍
			
 
				+        try:
			
 
				+            mathpix_raw_chars, latex_confidence = mathpix_ocr.mathpix_api(ocr_region)  # 识别公式
			
 
				+            render_mathpix_chars = '<latex>{}</latex>'.format(mathpix_raw_chars)
			
 
				+            if latex_confidence < 0.2:
			
 
				+                for item in formula_coordinates_dict_list:
			
 
				+                    if ele == item['coordinates']:
			
 
				+                        mathpix_raw_chars = item['chars']
			
 
				+                        render_mathpix_chars = '<latex>{}</latex>'.format(item['chars'])
			
 
				+                        break
			
 
				+
			
 
				+        except Exception:
			
 
				+            render_mathpix_chars = 'formula'
			
 
				+            mathpix_raw_chars = 'formula'
			
 
				+            for item in formula_coordinates_dict_list:
			
 
				+                if ele == item['coordinates']:
			
 
				+                    mathpix_raw_chars = item['chars']
			
 
				+                    render_mathpix_chars = '<latex>{}</latex>'.format(item['chars'])
			
 
				+                    break
			
 
				+
			
 
				+        # print(render_mathpix_chars)
			
 
				+        tmp_dict = {'chars': render_mathpix_chars, 'middle': middle, 'coordinates': ele, 'raw_chars': mathpix_raw_chars}
			
 
				+        formula_combine_dict_list.append(tmp_dict)
			
 
				+
			
 
				+    # res_dict = {'formula': formula_combine_list, 'zh_chars': zh_coordinates_dict_list}
			
 
				+    all_dict_list = formula_combine_dict_list + zh_coordinates_dict_list
			
 
				+
			
 
				+    all_dict_list = sorted(all_dict_list, key=lambda k: k.get('middle')[1])
			
 
				+
			
 
				+    # 相邻y做差
			
 
				+    former = np.array([ele['middle'][1] for ele in all_dict_list[:-1]])
			
 
				+    rear = np.array([ele['middle'][1] for ele in all_dict_list[1:]])
			
 
				+    dif = rear - former
			
 
				+    split_x_index = [index for index, ele in enumerate(dif) if ele >= zh_char_height]  # y轴排序
			
 
				+
			
 
				+    if not split_x_index:
			
 
				+        all_dict_list = sorted(all_dict_list, key=lambda k: k.get('middle')[0])  # x轴排序
			
 
				+        lines = [ele['chars'] for ele in all_dict_list]
			
 
				+        raw_lines = [ele['raw_chars'] for ele in all_dict_list]
			
 
				+        return lines, raw_lines
			
 
				+    else:
			
 
				+        res_list = []
			
 
				+        split_x_index = [ele + 1 for ele in split_x_index]  # 索引值扩大
			
 
				+        split_x_index.insert(0, 0)
			
 
				+        split_x_index.insert(-1, len(all_dict_list))
			
 
				+        split_x_index = sorted(list(set(split_x_index)))
			
 
				+        for i, split in enumerate(split_x_index[1:]):
			
 
				+            one_line = all_dict_list[split_x_index[i]:split_x_index[i + 1]]
			
 
				+            one_line = sorted(one_line, key=lambda k: k.get('middle')[0])  # x轴排序
			
 
				+            res_list.append(one_line)
			
 
				+
			
 
				+        lines = []
			
 
				+        raw_lines = []
			
 
				+        for ele in res_list:
			
 
				+            line_chars = ''
			
 
				+            raw_lines_chars = ''
			
 
				+            for ele1 in ele:
			
 
				+                chars = ele1['chars']
			
 
				+                raw_chars = ele1['raw_chars']
			
 
				+                line_chars = line_chars + chars
			
 
				+                raw_lines_chars = raw_lines_chars + raw_chars
			
 
				+
			
 
				+            lines.append(line_chars + '\n')
			
 
				+            raw_lines.append(raw_lines_chars + '\n')
			
 
				+        # print(lines)
			
 
				+
			
 
				+        return lines, raw_lines
			
 
				+
			
 
				+
			
 
				+def combine(formula_coordinates_list, zh_char_height, zh_char_width):
			
 
				+    formula_coordinates_list = sorted(formula_coordinates_list, key=lambda k: k[0])
			
 
				+    formula_coordinates_list = sorted(formula_coordinates_list, key=lambda k: k[1])  # 先x轴，再y轴排序
			
 
				+
			
 
				+    recursion_flag = False
			
 
				+    del_list = []
			
 
				+    temp_list = formula_coordinates_list.copy()
			
 
				+
			
 
				+    for i, outer in enumerate(temp_list):  # xmin, ymin, xmax, ymax
			
 
				+        for j, inner in enumerate(temp_list):  # xmin, ymin, xmax, ymax
			
 
				+            if not i == j:
			
 
				+                min_distance, flag = get_min_distance(outer, inner)
			
 
				+                combine_coordinate = ()
			
 
				+                if flag == 'i':
			
 
				+                    recursion_flag = True
			
 
				+                    combine_coordinate = (min(outer[0], inner[0]), min(outer[1], inner[1]),
			
 
				+                                          max(outer[2], inner[2]), max(outer[3], inner[3]))
			
 
				+                elif flag == 'h' and min_distance <= 1:
			
 
				+                    recursion_flag = True
			
 
				+                    combine_coordinate = (min(outer[0], inner[0]), min(outer[1], inner[1]),
			
 
				+                                          max(outer[2], inner[2]), max(outer[3], inner[3]))
			
 
				+                elif flag == 'w' and min_distance <= zh_char_width:
			
 
				+                    recursion_flag = True
			
 
				+                    combine_coordinate = (min(outer[0], inner[0]), min(outer[1], inner[1]),
			
 
				+                                          max(outer[2], inner[2]), max(outer[3], inner[3]))
			
 
				+                elif flag == 'c' and min_distance <= 1:
			
 
				+                    recursion_flag = True
			
 
				+                    combine_coordinate = (min(outer[0], inner[0]), min(outer[1], inner[1]),
			
 
				+                                          max(outer[2], inner[2]), max(outer[3], inner[3]))
			
 
				+
			
 
				+                if combine_coordinate:
			
 
				+                    if not combine_coordinate == outer and not combine_coordinate == inner:  # 避免全包围的情况
			
 
				+                        del_list.append(outer)
			
 
				+                        del_list.append(inner)
			
 
				+                    if combine_coordinate == outer:
			
 
				+                        del_list.append(inner)
			
 
				+                    if combine_coordinate == inner:
			
 
				+                        del_list.append(outer)
			
 
				+                    formula_coordinates_list.append(combine_coordinate)
			
 
				+
			
 
				+    res = list(set(formula_coordinates_list) - set(del_list))
			
 
				+
			
 
				+    if recursion_flag:
			
 
				+        return combine(res, zh_char_height, zh_char_width)
			
 
				+    else:
			
 
				+        return res
			
 
				+
			
 
				+
			
 
				+def get_min_distance_square(coordinate1, coordinate2):  # 顶点间欧式距离最小值的平方和
			
 
				+    all_points1 = [(x, y) for x in [coordinate1[0], coordinate1[2]] for y in [coordinate1[1], coordinate1[3]]]
			
 
				+    all_points2 = [(x, y) for x in [coordinate2[0], coordinate2[2]] for y in [coordinate2[1], coordinate2[3]]]
			
 
				+    distance_list = []
			
 
				+    for index1, point1 in enumerate(all_points1):
			
 
				+        for index2, point2 in enumerate(all_points2):
			
 
				+            distance = (point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2
			
 
				+            distance_list.append(distance)
			
 
				+    min_distance = min(distance_list)
			
 
				+    return min_distance
			
 
				+
			
 
				+
			
 
				+def get_min_distance(coordinate1, coordinate2):  # 欧式距离最小值
			
 
				+
			
 
				+    def dist(point1, point2):
			
 
				+        distance = (point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2
			
 
				+        return math.sqrt(distance)
			
 
				+
			
 
				+    (x1, y1, x1b, y1b) = coordinate1
			
 
				+    (x2, y2, x2b, y2b) = coordinate2
			
 
				+    left = x2b < x1  # 2在1的坐标左边
			
 
				+    right = x1b < x2  # 2在1的坐标右边
			
 
				+    bottom = y2b < y1  # 2在1的坐标下边
			
 
				+    top = y1b < y2  # 2在1的坐标上边
			
 
				+    if top and left:
			
 
				+        return dist((x1, y1b), (x2b, y2)), 'c'
			
 
				+    elif left and bottom:
			
 
				+        return dist((x1, y1), (x2b, y2b)), 'c'
			
 
				+    elif bottom and right:
			
 
				+        return dist((x1b, y1), (x2, y2b)), 'c'
			
 
				+    elif right and top:
			
 
				+        return dist((x1b, y1b), (x2, y2)), 'c'
			
 
				+    elif left:
			
 
				+        return x1 - x2b, 'w'
			
 
				+    elif right:
			
 
				+        return x2 - x1b, 'w'
			
 
				+    elif bottom:
			
 
				+        return y1 - y2b, 'h'
			
 
				+    elif top:
			
 
				+        return y2 - y1b, 'h'
			
 
				+    else:             # rectangles intersect
			
 
				+        return 0, 'i'
			
--- a/segment/formula/formula_segment_and_show.py
+++ b/segment/formula/formula_segment_and_show.py
@@ -0,0 +1,321 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : formula_segment_and_show.py
			
 
				+# @Time    : 2019/1/24 0024 下午 13:24
			
 
				+import time
			
 
				+import re
			
 
				+import copy
			
 
				+import math
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import xml.etree.cElementTree as ET
			
 
				+from segment.formula import mathpix_ocr
			
 
				+from segment.server import get_ocr_text_and_coordinate_formula
			
 
				+from segment.image_operation import utils
			
 
				+
			
 
				+
			
 
				+def get_coordinates(word_res, formula_words_list):
			
 
				+    res_list = []
			
 
				+    for formula_raw in formula_words_list:
			
 
				+        coordinates_start_index = formula_raw[1][0]
			
 
				+        coordinates_end_index = formula_raw[1][1] - 1
			
 
				+        coordinates_start = word_res['chars'][coordinates_start_index]['location']
			
 
				+        coordinates_end = word_res['chars'][coordinates_end_index]['location']
			
 
				+        coordinates = (coordinates_start['left'],  # xmin
			
 
				+                       min(coordinates_start['top'], coordinates_end['top']),  # ymin
			
 
				+                       coordinates_end['left'] + coordinates_end['width'],  # xmax
			
 
				+                       max(coordinates_start['top'] + coordinates_start['height'],
			
 
				+                           coordinates_end['top'] + coordinates_end['height']))  # ymax
			
 
				+        tmp_dict = {'chars': formula_raw[0],
			
 
				+                    'raw_chars': formula_raw[0],
			
 
				+                    'coordinates': coordinates,
			
 
				+                    'middle': (coordinates[0] + int((coordinates[2] - coordinates[0]) // 2),
			
 
				+                               coordinates[1] + int((coordinates[3] - coordinates[1]) // 2))}
			
 
				+        res_list.append(tmp_dict)
			
 
				+    return res_list
			
 
				+
			
 
				+
			
 
				+def generate_char(words, index_pair, zh=True):
			
 
				+    if index_pair:
			
 
				+        # new_words = words.copy()
			
 
				+        length = index_pair[1] - index_pair[0]
			
 
				+        gen = ''
			
 
				+        if zh:
			
 
				+            for i in range(length):
			
 
				+                gen = '中' + gen
			
 
				+        else:
			
 
				+            for i in range(length):
			
 
				+                gen = 'F' + gen
			
 
				+        words = words.replace(words[index_pair[0]:index_pair[1]], gen)
			
 
				+        return words
			
 
				+    else:
			
 
				+        return words
			
 
				+
			
 
				+
			
 
				+def segment(img, save_path, access_token):
			
 
				+    # raw_img = img.copy()
			
 
				+    # img = utils.preprocess(raw_img, None)
			
 
				+
			
 
				+    word_result_list = get_ocr_text_and_coordinate_formula(img, access_token)
			
 
				+    formula_coordinates_dict_list = []
			
 
				+    zh_coordinates_dict_list = []
			
 
				+    zh_char_height = 20  # default
			
 
				+    zh_char_width = 15  # default
			
 
				+    zh_char_height_list = []
			
 
				+    zh_char_width_list = []
			
 
				+
			
 
				+    exclude = r'{}|{}|{}|{}|{}|{}'.format(
			
 
				+        '[ABCD]\.',  # A. B. C. D.
			
 
				+        '[(（][）)]',  # ()
			
 
				+        '^[(（]*[\d]+[）)]',  # (1)
			
 
				+        # '[(（]*[a-zA-Z]{2,}[）)]',  # (km), (kg)
			
 
				+        '[①②③④⑤⑥⑦⑧⑨⑩]',  # ①②③④⑤⑥⑦⑧⑨⑩
			
 
				+        '[\u4e00-\u9fa5][，；：。,;:.]',  # 中.
			
 
				+        '[\u4e00-\u9fa5][\d]+[\u4e00-\u9fa5]')  # 中123中
			
 
				+
			
 
				+    for index, word_res in enumerate(word_result_list):
			
 
				+        words = word_res['words'].replace(' ', '').replace('兀', 'π')  # 去除空格，baidu_api bug
			
 
				+
			
 
				+        abcd_words_m = re.finditer(exclude, words)
			
 
				+        abcd_index_list = [(m.group(), m.span()) for m in abcd_words_m if m]
			
 
				+
			
 
				+        words_tmp_zh = copy.copy(words)
			
 
				+        for ele in abcd_index_list:
			
 
				+            words_tmp_zh = generate_char(words_tmp_zh, ele[1], zh=True)
			
 
				+
			
 
				+        formula_words_m = re.finditer(r'[^\u4e00-\u9fa5_"“”]+', words_tmp_zh)
			
 
				+        formula_index_list = [(m.group(), m.span()) for m in formula_words_m if m]
			
 
				+        formula_list = get_coordinates(word_res, formula_index_list)
			
 
				+        formula_coordinates_dict_list = formula_coordinates_dict_list + formula_list
			
 
				+
			
 
				+        words_tmp_formula = copy.copy(words)
			
 
				+        for ele in abcd_index_list:
			
 
				+            words_tmp_formula = generate_char(words_tmp_formula, ele[1], zh=False)
			
 
				+        zh_words_m = re.finditer(r'[\u4e00-\u9fa5_"“”]+', words_tmp_formula)
			
 
				+        zh_index_list = [(m.group(), m.span()) for m in zh_words_m if m]
			
 
				+        zh_list = get_coordinates(word_res, zh_index_list + abcd_index_list)
			
 
				+        zh_coordinates_dict_list = zh_coordinates_dict_list + zh_list
			
 
				+
			
 
				+        one_zh_char_m = re.match(r'[\u4e00-\u9fa5]+', words)
			
 
				+        if one_zh_char_m:
			
 
				+            index = one_zh_char_m.span()[0]
			
 
				+            zh_char_height_list.append(word_res['chars'][index]['location']['height'])
			
 
				+            zh_char_width_list.append(word_res['chars'][index]['location']['width'])
			
 
				+
			
 
				+    if len(zh_char_width_list) > 0 and len(zh_char_height_list) > 0:
			
 
				+        zh_char_height = np.mean(zh_char_height_list)
			
 
				+        zh_char_width = np.mean(zh_char_width_list)
			
 
				+
			
 
				+    formula_coordinates_list = [ele['coordinates'] for ele in formula_coordinates_dict_list]
			
 
				+
			
 
				+    temp_img = img.copy()
			
 
				+    for ele in formula_coordinates_list:
			
 
				+        cv2.rectangle(temp_img, (int(ele[0]), int(ele[1])), (int(ele[2]), int(ele[3])), (0, 255, 0), 1)
			
 
				+    save_path0 = save_path.replace('.jpg', '_@_{:02d}.jpg'.format(1))
			
 
				+    utils.write_single_img(temp_img, save_path0)
			
 
				+
			
 
				+    # 合并公式
			
 
				+    formula_combine_list = combine(img, save_path, formula_coordinates_list, zh_char_height, zh_char_width, 1)  # 欧式距离
			
 
				+
			
 
				+    formula_combine_dict_list = []
			
 
				+    for ele in formula_combine_list:
			
 
				+        middle = (ele[0] + int((ele[2] - ele[0]) // 2), ele[1] + int((ele[3] - ele[1]) // 2))
			
 
				+        ocr_region = utils.crop_region_direct(img, ele)
			
 
				+        y, x = ocr_region.shape[0], ocr_region.shape[1]
			
 
				+        if min(y, x) <= 50:
			
 
				+            ocr_region = utils.resize_by_percent(ocr_region, 1.50)  # 放大若干倍
			
 
				+            # cv2.imshow('region', ocr_region)
			
 
				+            # if cv2.waitKey(0) == 27:
			
 
				+            #     cv2.destroyAllWindows()
			
 
				+        try:
			
 
				+
			
 
				+            mathpix_raw_chars, latex_confidence = mathpix_ocr.mathpix_api(ocr_region)  # 识别公式
			
 
				+            render_mathpix_chars = '<img src="http://latex.codecogs.com/png.latex?{}" />'.format(mathpix_raw_chars)
			
 
				+            if latex_confidence < 0.2 or mathpix_raw_chars == '' or len(mathpix_raw_chars) == 1:
			
 
				+                for item in formula_coordinates_dict_list:
			
 
				+                    if ele == item['coordinates']:
			
 
				+                        mathpix_raw_chars = item['chars']
			
 
				+                        render_mathpix_chars = '<img src="http://latex.codecogs.com/png.latex?{}" />' \
			
 
				+                            .format(item['chars'])
			
 
				+                        break
			
 
				+        except Exception:
			
 
				+            render_mathpix_chars = 'formula'
			
 
				+            mathpix_raw_chars = 'formula'
			
 
				+            for item in formula_coordinates_dict_list:
			
 
				+                if ele == item['coordinates']:
			
 
				+                    mathpix_raw_chars = item['chars']
			
 
				+                    render_mathpix_chars = '<img src="http://latex.codecogs.com/png.latex?{}" />' \
			
 
				+                        .format(item['chars'])
			
 
				+                    break
			
 
				+
			
 
				+        print(render_mathpix_chars)
			
 
				+        tmp_dict = {'chars': render_mathpix_chars, 'middle': middle, 'coordinates': ele, 'raw_chars': mathpix_raw_chars}
			
 
				+        formula_combine_dict_list.append(tmp_dict)
			
 
				+
			
 
				+    # res_dict = {'formula': formula_combine_list, 'zh_chars': zh_coordinates_dict_list}
			
 
				+    all_dict_list = zh_coordinates_dict_list + formula_combine_dict_list
			
 
				+    all_dict_list = sorted(all_dict_list, key=lambda k: k.get('middle')[1])
			
 
				+
			
 
				+    # 相邻y做差
			
 
				+    former = np.array([ele['middle'][1] for ele in all_dict_list[:-1]])
			
 
				+    rear = np.array([ele['middle'][1] for ele in all_dict_list[1:]])
			
 
				+    dif = rear - former
			
 
				+    split_x_index = [index for index, ele in enumerate(dif) if ele >= zh_char_height]  # y轴排序
			
 
				+
			
 
				+    # 对整体图像大小进行resize
			
 
				+
			
 
				+    scale = 1
			
 
				+    h, w = img.shape[0], img.shape[1]
			
 
				+    if w > 1000:
			
 
				+        scale = float(1000 / w)
			
 
				+    elif h < 100:
			
 
				+        scale = float(100 / h)
			
 
				+
			
 
				+    img_resize = utils.resize_by_percent(img, scale)
			
 
				+    utils.write_single_img(img_resize, save_path)
			
 
				+
			
 
				+    if not split_x_index:
			
 
				+        all_dict_list = sorted(all_dict_list, key=lambda k: k.get('middle')[0])  # x轴排序
			
 
				+        lines = [ele['chars'] for ele in all_dict_list]
			
 
				+        raw_lines = [ele['raw_chars'] for ele in all_dict_list]
			
 
				+
			
 
				+        for ele in all_dict_list:
			
 
				+            bbox = [box * scale for box in ele['coordinates']]
			
 
				+            cv2.rectangle(img_resize, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 3)
			
 
				+        utils.write_single_img(img_resize, save_path)
			
 
				+
			
 
				+        return lines, raw_lines, h
			
 
				+    else:
			
 
				+        res_list = []
			
 
				+        split_x_index = [ele + 1 for ele in split_x_index]  # 索引值扩大
			
 
				+        split_x_index.insert(0, 0)
			
 
				+        split_x_index.insert(-1, len(all_dict_list))
			
 
				+        split_x_index = sorted(list(set(split_x_index)))
			
 
				+        for i, split in enumerate(split_x_index[1:]):
			
 
				+            one_line = all_dict_list[split_x_index[i]:split_x_index[i + 1]]
			
 
				+            one_line = sorted(one_line, key=lambda k: k.get('middle')[0])  # x轴排序
			
 
				+            res_list.append(one_line)
			
 
				+
			
 
				+        lines = []
			
 
				+        raw_lines = []
			
 
				+        for ele in res_list:
			
 
				+            line_chars = ''
			
 
				+            raw_lines_chars = ''
			
 
				+            for ele1 in ele:
			
 
				+                chars = ele1['chars']
			
 
				+                raw_chars = ele1['raw_chars']
			
 
				+                line_chars = line_chars + chars
			
 
				+                raw_lines_chars = raw_lines_chars + raw_chars
			
 
				+
			
 
				+                bbox = [box * scale for box in ele1['coordinates']]
			
 
				+                cv2.rectangle(img_resize, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1)
			
 
				+
			
 
				+            lines.append(line_chars + '\n')
			
 
				+            raw_lines.append(raw_lines_chars + '\n')
			
 
				+        utils.write_single_img(img_resize, save_path)
			
 
				+        # print(lines)
			
 
				+
			
 
				+        return lines, raw_lines, h
			
 
				+
			
 
				+
			
 
				+def combine(img, save_path, formula_coordinates_list, zh_char_height, zh_char_width, draw_index):
			
 
				+    img_draw = img.copy()
			
 
				+    formula_coordinates_list = sorted(formula_coordinates_list, key=lambda k: k[0])
			
 
				+    formula_coordinates_list = sorted(formula_coordinates_list, key=lambda k: k[1])  # 先x轴，再y轴排序
			
 
				+
			
 
				+    recursion_flag = False
			
 
				+    del_list = []
			
 
				+    temp_list = formula_coordinates_list.copy()
			
 
				+
			
 
				+    for i, outer in enumerate(temp_list):  # xmin, ymin, xmax, ymax
			
 
				+        for j, inner in enumerate(temp_list):  # xmin, ymin, xmax, ymax
			
 
				+            if not i == j:
			
 
				+                min_distance, flag = get_min_distance(outer, inner)
			
 
				+                combine_coordinate = ()
			
 
				+                if flag == 'i':
			
 
				+                    recursion_flag = True
			
 
				+                    combine_coordinate = (min(outer[0], inner[0]), min(outer[1], inner[1]),
			
 
				+                                          max(outer[2], inner[2]), max(outer[3], inner[3]))
			
 
				+                elif flag == 'h' and min_distance <= 1:
			
 
				+                    recursion_flag = True
			
 
				+                    combine_coordinate = (min(outer[0], inner[0]), min(outer[1], inner[1]),
			
 
				+                                          max(outer[2], inner[2]), max(outer[3], inner[3]))
			
 
				+                elif flag == 'w' and min_distance <= zh_char_width*2//3:
			
 
				+                    recursion_flag = True
			
 
				+                    combine_coordinate = (min(outer[0], inner[0]), min(outer[1], inner[1]),
			
 
				+                                          max(outer[2], inner[2]), max(outer[3], inner[3]))
			
 
				+                elif flag == 'c' and min_distance <= 1:
			
 
				+                    recursion_flag = True
			
 
				+                    combine_coordinate = (min(outer[0], inner[0]), min(outer[1], inner[1]),
			
 
				+                                          max(outer[2], inner[2]), max(outer[3], inner[3]))
			
 
				+
			
 
				+                if combine_coordinate:
			
 
				+                    if not combine_coordinate == outer and not combine_coordinate == inner:  # 避免全包围的情况
			
 
				+                        del_list.append(outer)
			
 
				+                        del_list.append(inner)
			
 
				+                    if combine_coordinate == outer:
			
 
				+                        del_list.append(inner)
			
 
				+                    if combine_coordinate == inner:
			
 
				+                        del_list.append(outer)
			
 
				+                    formula_coordinates_list.append(combine_coordinate)
			
 
				+
			
 
				+    res = list(set(formula_coordinates_list) - set(del_list))
			
 
				+
			
 
				+    if recursion_flag:
			
 
				+        draw_index = draw_index + 1
			
 
				+        for ele in res:
			
 
				+            cv2.rectangle(img_draw, (int(ele[0]), int(ele[1])), (int(ele[2]), int(ele[3])), (0, 255, 0), 1)
			
 
				+
			
 
				+        save_path_temp = save_path.replace('.jpg', '_@_{:02d}.jpg'.format(draw_index))
			
 
				+        utils.write_single_img(img_draw, save_path_temp)
			
 
				+        return combine(img, save_path, res, zh_char_height, zh_char_width, draw_index)
			
 
				+    else:
			
 
				+        for ele in res:
			
 
				+            cv2.rectangle(img_draw, (int(ele[0]), int(ele[1])), (int(ele[2]), int(ele[3])), (0, 255, 0), 1)
			
 
				+
			
 
				+        save_path_temp = save_path.replace('.jpg', '_@_final.jpg')
			
 
				+        utils.write_single_img(img_draw, save_path_temp)
			
 
				+        return res
			
 
				+
			
 
				+
			
 
				+def get_min_distance_square(coordinate1, coordinate2):  # 顶点间欧式距离最小值的平方和
			
 
				+    all_points1 = [(x, y) for x in [coordinate1[0], coordinate1[2]] for y in [coordinate1[1], coordinate1[3]]]
			
 
				+    all_points2 = [(x, y) for x in [coordinate2[0], coordinate2[2]] for y in [coordinate2[1], coordinate2[3]]]
			
 
				+    distance_list = []
			
 
				+    for index1, point1 in enumerate(all_points1):
			
 
				+        for index2, point2 in enumerate(all_points2):
			
 
				+            distance = (point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2
			
 
				+            distance_list.append(distance)
			
 
				+    min_distance = min(distance_list)
			
 
				+    return min_distance
			
 
				+
			
 
				+
			
 
				+def get_min_distance(coordinate1, coordinate2):  # 欧式距离最小值
			
 
				+
			
 
				+    def dist(point1, point2):
			
 
				+        distance = (point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2
			
 
				+        return math.sqrt(distance)
			
 
				+
			
 
				+    (x1, y1, x1b, y1b) = coordinate1
			
 
				+    (x2, y2, x2b, y2b) = coordinate2
			
 
				+    left = x2b < x1  # 2在1的坐标左边
			
 
				+    right = x1b < x2  # 2在1的坐标右边
			
 
				+    bottom = y2b < y1  # 2在1的坐标下边
			
 
				+    top = y1b < y2  # 2在1的坐标上边
			
 
				+    if top and left:
			
 
				+        return dist((x1, y1b), (x2b, y2)), 'c'
			
 
				+    elif left and bottom:
			
 
				+        return dist((x1, y1), (x2b, y2b)), 'c'
			
 
				+    elif bottom and right:
			
 
				+        return dist((x1b, y1), (x2, y2b)), 'c'
			
 
				+    elif right and top:
			
 
				+        return dist((x1b, y1b), (x2, y2)), 'c'
			
 
				+    elif left:
			
 
				+        return x1 - x2b, 'w'
			
 
				+    elif right:
			
 
				+        return x2 - x1b, 'w'
			
 
				+    elif bottom:
			
 
				+        return y1 - y2b, 'h'
			
 
				+    elif top:
			
 
				+        return y2 - y1b, 'h'
			
 
				+    else:  # rectangles intersect
			
 
				+        return 0, 'i'
			
--- a/segment/formula/mathpix_ocr.py
+++ b/segment/formula/mathpix_ocr.py
@@ -0,0 +1,32 @@
 
				+import base64
			
 
				+import requests
			
 
				+import json
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+def opecv2base64(img):
			
 
				+    image = cv2.imencode('.jpg', img)[1]
			
 
				+    base64_data = str(base64.b64encode(image))[2:-1]
			
 
				+    return base64_data
			
 
				+
			
 
				+
			
 
				+def mathpix_api(img):
			
 
				+    image = opecv2base64(img)
			
 
				+    image_uri = "data:image/jpg;base64," + image
			
 
				+    r = requests.post("https://api.mathpix.com/v3/latex",
			
 
				+                      data=json.dumps({'src': image_uri,
			
 
				+                                       'formats': ['latex_normal', 'latex_styled']}),
			
 
				+                      headers={"app_id": "1092963746_qq_com", "app_key": "0c3b77b0c3720175e0ba",
			
 
				+                               "Content-type": "application/json"},
			
 
				+                      timeout=7).json()
			
 
				+    res = r['latex_styled']
			
 
				+    latex_confidence = r['latex_confidence']
			
 
				+    # print(res)
			
 
				+    return res, latex_confidence
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    img_path0 = r'F:\save\img_withbgm\0003.png'
			
 
				+    img0 = np.asarray(cv2.imread(img_path0))
			
 
				+    mathpix_api(img0)
			
--- a/segment/image_operation/__init__.py
+++ b/segment/image_operation/__init__.py
@@ -0,0 +1,2 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
--- a/segment/image_operation/exam_segment.py
+++ b/segment/image_operation/exam_segment.py
@@ -0,0 +1,538 @@
 
				+import re
			
 
				+import json
			
 
				+import glob
			
 
				+import os
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+
			
 
				+problem_number_pattern = re.compile(r'\s*(\d+)')
			
 
				+number_pattern = re.compile(r'(\d+)')
			
 
				+sub_problem_number_pattern = re.compile(r'\s*\((\d+)')
			
 
				+
			
 
				+max_number = 99     # 最大题目数
			
 
				+min_number = 0      # 最小题目数
			
 
				+
			
 
				+
			
 
				+def get_respond_from_json(json_file):
			
 
				+    with open(json_file, 'r', encoding='UTF-8') as f:
			
 
				+        resp = json.load(f)
			
 
				+    return resp
			
 
				+
			
 
				+
			
 
				+def get_number_position(words_result, max_number=max_number, left_position=0, right_position=0):
			
 
				+    # 获取以数字开头的位置, 保留num<=max_number以及字符位置位于[left_position,right_position]的数
			
 
				+    numbers = []
			
 
				+    for line_index in range(len(words_result)):
			
 
				+        line = words_result[line_index]
			
 
				+        #print('**************************************')
			
 
				+        #print(line['words'])
			
 
				+        #print(line['chars'][:2])
			
 
				+        m = problem_number_pattern.match(line['words'])
			
 
				+        if m:
			
 
				+            location = line['chars'][m.start(1)]['location']
			
 
				+            number = line['words'][m.start(1):m.end(1)]
			
 
				+            center = location['left'] + location['width'] // 2
			
 
				+            if int(number) <= max_number and center >= left_position:
			
 
				+                if right_position == 0:
			
 
				+                    numbers.append(
			
 
				+                        {'number': number, 'center': center, 'line': line_index, 'location': line['location']})
			
 
				+                elif center <= right_position:
			
 
				+                    numbers.append(
			
 
				+                        {'number': number, 'center': center, 'line': line_index, 'location': line['location']})
			
 
				+
			
 
				+            #print(number, center, location)
			
 
				+            #print(line['chars'][m.start(1)])
			
 
				+    return numbers
			
 
				+
			
 
				+
			
 
				+def get_number_list(numbers, shift_limit=50):
			
 
				+    # 获取横坐标相近的数字序列
			
 
				+    number_list = []
			
 
				+
			
 
				+    for number in numbers:
			
 
				+        not_found_flag = 1
			
 
				+        for single_list in number_list:
			
 
				+            if abs(number['center']-single_list[-1]['center']) <= shift_limit:
			
 
				+                single_list.append(number)
			
 
				+                not_found_flag = 0
			
 
				+                #break
			
 
				+        if not_found_flag:
			
 
				+            single_list = []
			
 
				+            single_list.append(number)
			
 
				+            number_list.append(single_list)
			
 
				+
			
 
				+    return number_list
			
 
				+
			
 
				+
			
 
				+def get_longest_sequence(sequence, limit, type='l'):
			
 
				+    #   获取limit之下或之上的最长连续序列
			
 
				+    flag = [[0, 0], [0, 0]]
			
 
				+    for i in range(len(sequence)):
			
 
				+        if type == 'l':
			
 
				+            f = sequence[i] <= limit
			
 
				+        elif type == 'h':
			
 
				+            f = sequence[i] >= limit
			
 
				+        if f:
			
 
				+            if i == flag[1][1]:
			
 
				+                flag[1][1] += 1
			
 
				+            else:
			
 
				+                if flag[1][1] - flag[1][0] > flag[0][1] - flag[0][0]:
			
 
				+                    flag[0][:] = flag[1][:]
			
 
				+                flag[1][:] = [i, i + 1]
			
 
				+    if flag[1][1] - flag[1][0] > flag[0][1] - flag[0][0]:
			
 
				+        flag[0][:] = flag[1][:]
			
 
				+    return flag[0][:]
			
 
				+
			
 
				+
			
 
				+def get_number_sequence(numbers, max_gap=5, min_number=min_number):
			
 
				+    # 数列连续性判断
			
 
				+    number_sequence = []
			
 
				+    return number_sequence
			
 
				+
			
 
				+
			
 
				+def get_problem_list(number_list):
			
 
				+    # 选取题号序列
			
 
				+    # rule1: 横坐标最小
			
 
				+    # rule2: 序列连续性？
			
 
				+    # rule3: 整体题号连续性？
			
 
				+
			
 
				+    if number_list:
			
 
				+        index = 0
			
 
				+        left = number_list[index][0]['center']
			
 
				+    else:
			
 
				+        return []
			
 
				+
			
 
				+    for i in range(1, len(number_list)):
			
 
				+        if number_list[i][0]['center'] < left:
			
 
				+            index = i
			
 
				+            left = number_list[i][0]['center']
			
 
				+    return number_list[index]
			
 
				+
			
 
				+
			
 
				+def get_double_page_number(words_result, img_width, left_ratio=0.4, right_ratio=0.6):
			
 
				+    left = int(left_ratio * img_width)
			
 
				+    right = int(right_ratio * img_width)
			
 
				+    numbers = []
			
 
				+    for line in words_result:
			
 
				+        for char in line['chars']:
			
 
				+            center = int(char['location']['left']) + int(char['location']['width']) // 2
			
 
				+            if number_pattern.match(char['char']) and left <= center <= right:
			
 
				+                char.update(center=center)
			
 
				+                numbers.append(char)
			
 
				+    double_page_numbers = get_number_list(numbers)
			
 
				+    for d in double_page_numbers:
			
 
				+        if len(d) >= 2:
			
 
				+            return True, double_page_numbers
			
 
				+    return double_page_numbers
			
 
				+
			
 
				+
			
 
				+def image_projection(image, left_ratio, right_ratio, top_ratio=0.2, bottom_ratio=0.9, gap=20):
			
 
				+    #   图像投影projection = [counts, positions]
			
 
				+    image = np.asarray(image)
			
 
				+    image = 255 - image
			
 
				+    height = image.shape[0]
			
 
				+    width = image.shape[1]
			
 
				+    top = int(height * top_ratio)
			
 
				+    bottom = int(height * bottom_ratio)
			
 
				+    left = int(width * left_ratio)
			
 
				+    right = int(width * right_ratio)
			
 
				+    # col_num = (right - left + 1) // gap
			
 
				+    # right = left + col_num * gap
			
 
				+
			
 
				+    projection = np.zeros((2, len(range(left, right-gap, gap))), dtype=np.int)
			
 
				+    projection[1, :] = np.asarray(range(left, right-gap, gap), dtype=np.int)
			
 
				+
			
 
				+    projection[0, :] = np.sum(np.sum(np.hsplit(
			
 
				+        image[top:bottom, left:projection[1, -1]+gap], projection.shape[1]), axis=1), axis=1) // (bottom - top)
			
 
				+    return projection
			
 
				+
			
 
				+
			
 
				+def word_projection(words_result, image_shape, left_ratio, right_ratio, top_ratio=0.2, bottom_ratio=0.9, gap=20):
			
 
				+    #   字符投影word_count = [counts, positions]
			
 
				+    height = image_shape[0]
			
 
				+    width = image_shape[1]
			
 
				+    left = int(width * left_ratio)
			
 
				+    right = int(width * right_ratio)
			
 
				+    top = int(height * top_ratio)
			
 
				+    bottom = int(height * bottom_ratio)
			
 
				+    word_count = np.zeros((2, len(range(left, right-gap, gap))), dtype=np.int)
			
 
				+    word_count[1, :] = np.asarray(range(left, right-gap, gap), dtype=np.int)
			
 
				+
			
 
				+    for line in words_result:
			
 
				+        if top < line['location']['top'] < bottom:
			
 
				+            for char in line['chars']:
			
 
				+                center = char['location']['left'] + char['location']['width'] // 2
			
 
				+                for i in range(word_count.shape[1]):
			
 
				+                    if 0 <= center - word_count[1, i] < gap:
			
 
				+                        word_count[0, i] += 1
			
 
				+
			
 
				+    return word_count
			
 
				+
			
 
				+
			
 
				+def check_seal_line(words_result, image, type='left', gap=20):
			
 
				+    #   检查是否有密封线，返回密封线横坐标
			
 
				+    projection_limit = 80
			
 
				+    wc_limit = 0
			
 
				+    seal_limit = 3
			
 
				+
			
 
				+    image = np.asarray(image)
			
 
				+    height, width = image.shape[:2]
			
 
				+    if height / width < 1:
			
 
				+        if type == 'left':
			
 
				+            #   检查左密封线
			
 
				+            length_limit = 5
			
 
				+
			
 
				+            left_ratio = 0
			
 
				+            right_ratio = 0.15
			
 
				+            word_count = word_projection(
			
 
				+                words_result, (height, width), left_ratio=left_ratio, right_ratio=right_ratio, gap=gap)
			
 
				+            image_count = image_projection(image, left_ratio=left_ratio, right_ratio=right_ratio, gap=gap)
			
 
				+            seal_flag = np.sum(image_count[0, :length_limit] > projection_limit)
			
 
				+
			
 
				+            if seal_flag < seal_limit:
			
 
				+                # 判定无密封线
			
 
				+                return 0
			
 
				+            else:
			
 
				+                #   获取数字开头的位置
			
 
				+                numbers = get_number_position(
			
 
				+                    words_result, left_position=length_limit*gap, right_position=right_ratio*width)
			
 
				+                right_flag = right_ratio * width
			
 
				+                for number in numbers:
			
 
				+                    right_flag = min(right_flag, number['center'])
			
 
				+                for i in range(word_count.shape[1]-1, -1, -1):
			
 
				+                    if word_count[0, i] <= wc_limit:
			
 
				+                        if length_limit*gap <= word_count[1, i] <= right_flag:
			
 
				+                            return word_count[1, i]
			
 
				+                return length_limit * gap
			
 
				+        elif type == 'right':
			
 
				+            #   检查右密封线
			
 
				+
			
 
				+            left_ratio = 0.85
			
 
				+            right_ratio = 1
			
 
				+            word_count = word_projection(words_result, (height, width), left_ratio=left_ratio, right_ratio=right_ratio)
			
 
				+            image_count = image_projection(image, left_ratio=left_ratio, right_ratio=right_ratio)
			
 
				+
			
 
				+            # seal_flag = np.sum(image_count[0, -length_limit:] > projection_limit)
			
 
				+            # if seal_flag < seal_limit:
			
 
				+            #     return 0
			
 
				+            # else:
			
 
				+            #     for i in range(word_count.shape[1]-length_limit, -1, -1):
			
 
				+            #         if word_count[0, i] > wc_limit and image_count[0, i] <= projection_limit:
			
 
				+            #             return word_count[1, i] + 2 * gap
			
 
				+            #     return width - length_limit * gap
			
 
				+            for i in range(word_count.shape[1]-1, -1, -1):
			
 
				+                if word_count[0, i] > wc_limit:
			
 
				+                    if image_count[0, i-1] <= projection_limit and word_count[0, i-1] + word_count[0, i-2] > 0:
			
 
				+                        return word_count[1, i-1] + gap
			
 
				+
			
 
				+            return 0
			
 
				+    else:
			
 
				+        return 0
			
 
				+
			
 
				+
			
 
				+def check_double_page(words_result, image, height_to_width_ratio=1, wc_limit=2):
			
 
				+    #   检查是否有分页， 返回分割线横坐标
			
 
				+    image = np.asarray(image)
			
 
				+    height = image.shape[0]
			
 
				+    width = image.shape[1]
			
 
				+    flag = [[0, 0], [0, 0]]
			
 
				+
			
 
				+    if height / width < height_to_width_ratio:
			
 
				+        word_count = word_projection(words_result, (height, width), left_ratio=0.4, right_ratio=0.6)
			
 
				+        for i in range(word_count.shape[1]):
			
 
				+            if word_count[0, i] <= wc_limit:
			
 
				+                if i == flag[1][1]:
			
 
				+                    flag[1][1] += 1
			
 
				+                else:
			
 
				+                    if flag[1][1] - flag[1][0] > flag[0][1] - flag[0][0]:
			
 
				+                        flag[0][:] = flag[1][:]
			
 
				+                    flag[1][:] = [i, i+1]
			
 
				+        if flag[1][1] - flag[1][0] > flag[0][1] - flag[0][0]:
			
 
				+            return word_count[1, (flag[1][0]+flag[1][1])//2]
			
 
				+        elif flag[0][1]:
			
 
				+            return word_count[1, (flag[0][0] + flag[0][1]) // 2]
			
 
				+        else:
			
 
				+            return 0
			
 
				+    return 0
			
 
				+    #     for i in range(word_count.shape[1]//2):
			
 
				+    #         kplus = word_count.shape[1]//2 + i
			
 
				+    #         kminus = word_count.shape[1]//2 - i
			
 
				+    #         if word_count[0, kplus] <= wc_limit:
			
 
				+    #             return word_count[1, kplus]
			
 
				+    #         elif word_count[0, kminus] <= wc_limit:
			
 
				+    #             return word_count[1, kminus]
			
 
				+    # return 0
			
 
				+
			
 
				+
			
 
				+def get_line_from_chars(chars):
			
 
				+    #   从一行所有字符获取行的整体坐标
			
 
				+    if chars:
			
 
				+        xmin = chars[0]['location']['left']
			
 
				+        ymin = chars[0]['location']['top']
			
 
				+        xmax = chars[0]['location']['left'] + chars[0]['location']['width']
			
 
				+        ymax = chars[0]['location']['top'] + chars[0]['location']['height']
			
 
				+        for char in chars:
			
 
				+            if xmin > char['location']['left']:
			
 
				+                xmin = char['location']['left']
			
 
				+            if ymin > char['location']['top']:
			
 
				+                ymin = char['location']['top']
			
 
				+            if xmax < char['location']['left'] + char['location']['width']:
			
 
				+                xmax = char['location']['left'] + char['location']['width']
			
 
				+            if ymax < char['location']['top'] + char['location']['height']:
			
 
				+                ymax = char['location']['top'] + char['location']['height']
			
 
				+        result = {'width': xmax-xmin, 'top': ymin, 'left': xmin, 'height': ymax-ymin}
			
 
				+        return result
			
 
				+    else:
			
 
				+        return {}
			
 
				+
			
 
				+
			
 
				+def get_box_from_lines(lines):
			
 
				+    #   获取包含所有行区域的整体坐标
			
 
				+    if lines:
			
 
				+        ymin = lines[0]['location']['top']
			
 
				+        ymax = lines[0]['location']['top'] + lines[0]['location']['height']
			
 
				+        xmin = lines[0]['location']['left']
			
 
				+        xmax = lines[0]['location']['left'] + lines[0]['location']['width']
			
 
				+        for line in lines:
			
 
				+            if xmin > line['location']['left']:
			
 
				+                xmin = line['location']['left']
			
 
				+            if ymin > line['location']['top']:
			
 
				+                ymin = line['location']['top']
			
 
				+            if xmax < line['location']['left'] + line['location']['width']:
			
 
				+                xmax = line['location']['left'] + line['location']['width']
			
 
				+            if ymax < line['location']['top'] + line['location']['height']:
			
 
				+                ymax = line['location']['top'] + line['location']['height']
			
 
				+        return {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
			
 
				+    else:
			
 
				+        return {}
			
 
				+
			
 
				+
			
 
				+def split_line_for_double_pages(line, split_position):
			
 
				+    #   把单行按双页分割
			
 
				+    char_pattern = r'\s*\S'
			
 
				+    words_pattern = r''
			
 
				+    odd_page_line = {}
			
 
				+    even_page_line = {}
			
 
				+    odd_page_chars = []
			
 
				+    even_page_chars = []
			
 
				+    for char in line['chars']:
			
 
				+        center = char['location']['left'] + char['location']['width'] // 2
			
 
				+        if center <= split_position:
			
 
				+            odd_page_chars.append(char)
			
 
				+        else:
			
 
				+            even_page_chars.append(char)
			
 
				+    words_length = len(odd_page_chars)
			
 
				+    if words_length == 0:
			
 
				+        even_page_line = line
			
 
				+    elif len(even_page_chars) == 0:
			
 
				+        odd_page_line = line
			
 
				+    else:
			
 
				+        odd_page_location = get_line_from_chars(odd_page_chars)
			
 
				+        even_page_location = get_line_from_chars(even_page_chars)
			
 
				+        for i in range(words_length):
			
 
				+            words_pattern += char_pattern
			
 
				+        words_pattern = re.compile(words_pattern)   # ubuntu上有问题
			
 
				+        match = words_pattern.match(line['words'])
			
 
				+        odd_page_words = match[0]
			
 
				+        even_page_words = line['words'][match.end():]
			
 
				+        odd_page_line = {'chars': odd_page_chars, 'location': odd_page_location, 'words': odd_page_words}
			
 
				+        if even_page_words:
			
 
				+            even_page_line = {'chars': even_page_chars, 'location': even_page_location, 'words': even_page_words}
			
 
				+    return odd_page_line, even_page_line
			
 
				+
			
 
				+
			
 
				+def get_double_page_text(words_result, split_position):
			
 
				+    #   把文本按双页分割
			
 
				+    odd_page = []
			
 
				+    even_page = []
			
 
				+    for line in words_result:
			
 
				+        if line['location']['left'] + line['location']['width'] // 2 >= split_position:
			
 
				+            even_page.append(line)
			
 
				+        else:
			
 
				+            odd_page.append(line)
			
 
				+        # else:
			
 
				+        #     odd_page_line, even_page_line = split_line_for_double_pages(line, split_position)
			
 
				+        #     if odd_page_line:
			
 
				+        #         odd_page.append(odd_page_line)
			
 
				+        #     if even_page_line:
			
 
				+        #         even_page.append(even_page_line)
			
 
				+    return [odd_page, even_page]
			
 
				+
			
 
				+
			
 
				+# def get_double_page_text(words_result, split_position):
			
 
				+#     odd_page = []
			
 
				+#     even_page = []
			
 
				+#     for line in words_result:
			
 
				+#         odd_page_chars = []
			
 
				+#         even_page_chars = []
			
 
				+#         for char in line['chars']:
			
 
				+#             center = char['location']['left'] + char['location']['width'] // 2
			
 
				+#             if center <= split_position:
			
 
				+#                 odd_page_chars.append(char)
			
 
				+#             else:
			
 
				+#                 even_page_chars.append(char)
			
 
				+#         line_result = get_line_from_chars(odd_page_chars)
			
 
				+#         if line_result:
			
 
				+#             odd_page.append(line_result)
			
 
				+#         line_result = get_line_from_chars(even_page_chars)
			
 
				+#         if line_result:
			
 
				+#             even_page.append(line_result)
			
 
				+#     return [odd_page, even_page]
			
 
				+
			
 
				+
			
 
				+def get_page_text(words_result, image):
			
 
				+    #   除去密封线，分页，获取页面文本结果
			
 
				+    left_seal_line = check_seal_line(words_result, image, type='left')
			
 
				+    if left_seal_line:
			
 
				+        words_result = get_double_page_text(words_result, left_seal_line)[1]
			
 
				+    right_seal_line = check_seal_line(words_result, image, type='right')
			
 
				+    if right_seal_line:
			
 
				+        words_result = get_double_page_text(words_result, right_seal_line)[0]
			
 
				+    split_position = check_double_page(words_result, image)
			
 
				+    if split_position:
			
 
				+        return get_double_page_text(words_result, split_position)
			
 
				+    else:
			
 
				+        return [words_result]
			
 
				+
			
 
				+
			
 
				+def exam_segment(words_result):
			
 
				+    #   分割试卷区域
			
 
				+    numbers = get_number_position(words_result)
			
 
				+    number_list = get_number_list(numbers)
			
 
				+    group_list = get_problem_list(number_list)
			
 
				+
			
 
				+    for i in range(len(group_list)-1):
			
 
				+        group_list[i].update(end_line=group_list[i+1]['line']-1)
			
 
				+    if len(group_list) >= 1:
			
 
				+        group_list[-1].update(end_line=len(words_result)-1)
			
 
				+    for g in group_list:
			
 
				+        ymin = g['location']['top']
			
 
				+        ymax = words_result[g['end_line']]['location']['top'] + words_result[g['end_line']]['location']['height']
			
 
				+        xmin = g['location']['left']
			
 
				+        xmax = g['location']['left'] + g['location']['width']
			
 
				+        for line in range(g['line'], g['end_line']+1):
			
 
				+            left = words_result[line]['location']['left']
			
 
				+            width = words_result[line]['location']['width']
			
 
				+            if xmin > left:
			
 
				+                xmin = left
			
 
				+            if xmax < left + width:
			
 
				+                xmax = left + width
			
 
				+
			
 
				+        g.update(box={'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax})
			
 
				+
			
 
				+    return group_list
			
 
				+
			
 
				+
			
 
				+def show_result(img_file, debug=1):
			
 
				+    image_color = cv2.imread(img_file)
			
 
				+    image = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY)
			
 
				+    height = image.shape[0]
			
 
				+    width = image.shape[1]
			
 
				+    resp = get_respond_from_json(img_file.replace('.jpg', '_json.txt'))
			
 
				+    words_result = resp['words_result']
			
 
				+    print('**********{}*********'.format(os.path.split(img_file)[1]))
			
 
				+
			
 
				+    numbers = get_number_position(words_result)
			
 
				+    number_list = get_number_list(numbers)
			
 
				+    problem_list = get_problem_list(number_list)
			
 
				+    group_list = exam_segment(words_result)
			
 
				+    #double_page_numbers = get_double_page_number(words_result, img.shape[1])
			
 
				+
			
 
				+    if debug == 0:
			
 
				+        for line_index in range(len(words_result)):
			
 
				+            line = words_result[line_index]
			
 
				+            print('**************************************')
			
 
				+            print(line['words'])
			
 
				+        print('************All Numbers************')
			
 
				+        for num in numbers:
			
 
				+            print(num)
			
 
				+        for numbers in number_list:
			
 
				+            print('*******Number List********')
			
 
				+            for n in numbers:
			
 
				+                print(n)
			
 
				+    elif debug == 1:
			
 
				+        print('**********Problem List*********')
			
 
				+        for p in problem_list:
			
 
				+            print(p)
			
 
				+        print('**********Group List**********')
			
 
				+        for g in group_list:
			
 
				+            print(g)
			
 
				+    elif debug == 2:
			
 
				+        gap = 20
			
 
				+        middle_word_count = word_projection(words_result, (height, width), left_ratio=0.4, right_ratio=0.6, gap=gap)
			
 
				+        left_word_count = word_projection(words_result, (height, width), left_ratio=0, right_ratio=0.15, gap=gap)
			
 
				+        right_word_count = word_projection(words_result, (height, width), left_ratio=0.85, right_ratio=1, gap=gap)
			
 
				+
			
 
				+        left_image_projection = image_projection(image, left_ratio=0, right_ratio=0.15, gap=gap)
			
 
				+        middle_image_projection = image_projection(image, left_ratio=0.4, right_ratio=0.6, gap=gap)
			
 
				+        right_image_projection = image_projection(image, left_ratio=0.85, right_ratio=1, gap=gap)
			
 
				+        print('**********Left Projection************')
			
 
				+        print(left_word_count)
			
 
				+        print(left_image_projection)
			
 
				+        #print(get_longest_sequence(left_word_count[0, :], 2))
			
 
				+        #print(get_longest_sequence(left_image_projection[0, :], 100, type='h'))
			
 
				+        print('**********Middle Projection************')
			
 
				+        print(middle_word_count)
			
 
				+        print(middle_image_projection)
			
 
				+        print('**********Right Projection************')
			
 
				+        print(right_word_count)
			
 
				+        print(right_image_projection)
			
 
				+        print('************Split Line****************')
			
 
				+        left_p = check_seal_line(words_result, image, type='left')
			
 
				+        right_p = check_seal_line(words_result, image, type='right')
			
 
				+        middle_p = check_double_page(words_result, image)
			
 
				+        print(left_p, middle_p, right_p)
			
 
				+        cv2.line(image_color, (left_p, 0), (left_p, height), (0, 0, 255), 5)
			
 
				+        cv2.line(image_color, (middle_p, 0), (middle_p, height), (0, 255, 0), 5)
			
 
				+        cv2.line(image_color, (right_p, 0), (right_p, height), (255, 0, 0), 5)
			
 
				+        cv2.namedWindow('image', cv2.WINDOW_NORMAL)
			
 
				+        cv2.imshow('image', image_color)
			
 
				+        if cv2.waitKey(0) == 27:  # press ESC to exit
			
 
				+            exit(0)
			
 
				+        cv2.destroyAllWindows()
			
 
				+    elif debug == 3:
			
 
				+        page_text = get_page_text(words_result, image)
			
 
				+        if len(page_text) == 1:
			
 
				+            print('*************Single Page*********')
			
 
				+            for line in page_text[0]:
			
 
				+                print(line['words'])
			
 
				+        else:
			
 
				+            print('*************Odd Page**********')
			
 
				+            for line in page_text[0]:
			
 
				+                print(line['words'])
			
 
				+            print('************Even Page**********')
			
 
				+            for line in page_text[1]:
			
 
				+                print(line['words'])
			
 
				+    # elif style == 4:
			
 
				+    #     print('***********Page Text***********')
			
 
				+    #     page_result = get_page_text(words_result, image)
			
 
				+    #     if len(page_result) == 1:
			
 
				+    #         print('***********Single Page***********')
			
 
				+    #         for line in page_result[0]:
			
 
				+    #             print(line['words'])
			
 
				+    #     elif len(page_result) == 2:
			
 
				+    #         print('*********Odd************')
			
 
				+    #         for line in page_result[0]:
			
 
				+    #             print(line['words'])
			
 
				+    #         print('********Even************')
			
 
				+    #         for line in page_result[1]:
			
 
				+    #             print(line['words'])
			
 
				+
			
 
				+
			
 
				+# if __name__ == "__main__":
			
 
				+#     img_file = r'E:\data\test-problems\10.jpg'
			
 
				+#     # show_result(img_file, debug=2)
			
 
				+#     image_color = cv2.imread(img_file)
			
 
				+#     image = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY)
			
 
				+#     height = image.shape[0]
			
 
				+#     width = image.shape[1]
			
 
				+#     resp = get_respond_from_json(img_file.replace('.jpg', '_json.txt'))
			
 
				+#     words_result = resp['words_result']
			
 
				+#     print('**********{}*********'.format(os.path.split(img_file)[1]))
			
 
				+#     text_list = get_page_text(words_result, image)
			
 
				+#
			
 
				+#     # work_dir = r'E:\data\seal_line'
			
 
				+#     # for img_file in glob.glob(os.path.join(work_dir, '*.jpg')):
			
 
				+#     #     show_result(img_file, style=2)
			
--- a/segment/image_operation/img_urlcode.py
+++ b/segment/image_operation/img_urlcode.py
@@ -0,0 +1,15 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : img_urlcode.py
			
 
				+import base64
			
 
				+
			
 
				+
			
 
				+def img2base64(img):
			
 
				+    base64_data = base64.b64encode(img)
			
 
				+    print(base64_data)
			
 
				+    return base64_data
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    img_path = r'C:\Users\Administrator\Desktop\history\0002.jpg'
			
 
				+    with open(img_path, 'rb') as f:
			
 
				+        img2base64(f.read())
			
--- a/segment/image_operation/pre_segment.py
+++ b/segment/image_operation/pre_segment.py
@@ -0,0 +1,298 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : pre_segment.py
			
 
				+import time
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+from numpy import asarray
			
 
				+import base64
			
 
				+import scipy.signal
			
 
				+
			
 
				+from segment.image_operation import utils
			
 
				+
			
 
				+
			
 
				+def hough_rotate_cv(image):
			
 
				+    """ not Long time consuming, not Strong generalization ability, not high accuracy, more super parameters"""
			
 
				+    img_np = utils.resize_by_percent(asarray(image), 1)
			
 
				+    if len(img_np.shape) == 3:
			
 
				+        img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
			
 
				+    canny_image = cv2.Canny(img_np, 0, 255, apertureSize=3)
			
 
				+    # cv2.imshow('canny', canny_image)
			
 
				+    # cv2.waitKey(10)
			
 
				+    lines = cv2.HoughLinesP(canny_image, 1, np.pi / 180, 160, minLineLength=500, maxLineGap=65)
			
 
				+    # lines = cv2.HoughLines(canny_image, 1, np.pi / 180, 160, max_theta=30, min_theta=0)
			
 
				+
			
 
				+    # 寻找长度最长的线
			
 
				+    distance = []
			
 
				+    for line in lines:
			
 
				+        x1, y1, x2, y2 = line[0]
			
 
				+        dis = np.sqrt(pow((x2 - x1), 2) + pow((y2 - y1), 2))
			
 
				+        distance.append(dis)
			
 
				+    max_dis_index = distance.index(max(distance))
			
 
				+    max_line = lines[max_dis_index]
			
 
				+    x1, y1, x2, y2 = max_line[0]
			
 
				+
			
 
				+    # 获取旋转角度
			
 
				+    angle = cv2.fastAtan2((y2 - y1), (x2 - x1))
			
 
				+    print(angle)
			
 
				+
			
 
				+    if 0.5 <= angle <= 7:  # 因为识别误差问题，根据实际情况设置旋转阈值
			
 
				+        centerpoint = (image.shape[1] / 2, image.shape[0] / 2)
			
 
				+        rotate_mat = cv2.getRotationMatrix2D(centerpoint, angle, 1.0)  # 获取旋转矩阵
			
 
				+        correct_image = cv2.warpAffine(image, rotate_mat, (image.shape[1], image.shape[0]),
			
 
				+                                       borderValue=(255, 255, 255))
			
 
				+
			
 
				+        # cv2.imshow('test', resize_by_percent(correct_image, 0.1))
			
 
				+        # cv2.waitKey(10)
			
 
				+        return correct_image
			
 
				+    else:
			
 
				+        return image
			
 
				+
			
 
				+
			
 
				+def array_latter_subtracts_precious(nparray):
			
 
				+    array1 = nparray[:-1]
			
 
				+    array2 = nparray[1:]
			
 
				+    return array2 - array1
			
 
				+
			
 
				+
			
 
				+def split_by_index(im_raw, index):
			
 
				+    y_raw, x_raw, _ = im_raw.shape
			
 
				+    img_left = im_raw[1:y_raw, 1:index]
			
 
				+    img_right = im_raw[1:y_raw, index + 1:x_raw]
			
 
				+    return img_left, img_right
			
 
				+
			
 
				+
			
 
				+def split_img_at_middle_by_y_axis(img_path, radio=0.10, thresh_std=5000):
			
 
				+    im_raw = utils.read_img(img_path)
			
 
				+    im_resize = utils.resize_by_percent(im_raw, radio)
			
 
				+    ry, rx, _ = im_resize.shape
			
 
				+    img_mtx0 = np.asarray(utils.rgb2binary(im_resize))
			
 
				+    y_sum_array0 = img_mtx0.sum(axis=0)
			
 
				+    tmp = array_latter_subtracts_precious(y_sum_array0 / ry)
			
 
				+    std0 = np.std(tmp)  # 计算标准差
			
 
				+
			
 
				+    # # plt.bar(range(len(y_sum_array0)), y_sum_array0)
			
 
				+    # # plt.show()
			
 
				+    # plt.plot(range(len(y_sum_array0)-1), tmp)
			
 
				+    # plt.show()
			
 
				+
			
 
				+    y, x, _z = im_resize.shape
			
 
				+    x_bias = int(x * 0.15)
			
 
				+    y_bias = int(y * 0.30)
			
 
				+    middle_x = int(x / 2)
			
 
				+    middle_area_img = im_resize[y_bias:y, middle_x - x_bias:middle_x + x_bias]
			
 
				+    img_mtx = np.asarray(utils.rgb2binary(middle_area_img))
			
 
				+    y_sum_array = img_mtx.sum(axis=0)
			
 
				+    std = np.std(y_sum_array)  # 计算标准差
			
 
				+    y_sum_list = list(y_sum_array)
			
 
				+
			
 
				+    if std <= thresh_std:
			
 
				+        index = y_sum_list.index(max(y_sum_list))
			
 
				+    else:
			
 
				+        index = y_sum_list.index(min(y_sum_list))
			
 
				+    split_index = middle_x + index - int(len(y_sum_list) / 2)
			
 
				+    split_index = int(split_index / radio)
			
 
				+
			
 
				+    y_raw, x_raw, _ = im_raw.shape
			
 
				+    img_left = im_raw[1:y_raw, 1:split_index]
			
 
				+    img_right = im_raw[1:y_raw, split_index + 1:x_raw]
			
 
				+    left_path = img_path.replace('.jpg', '_left.jpg')
			
 
				+    right_path = img_path.replace('.jpg', '_right.jpg')
			
 
				+    cv2.imencode('.jpg', img_left)[1].tofile(left_path)
			
 
				+    cv2.imencode('.jpg', img_right)[1].tofile(right_path)
			
 
				+    print(left_path)
			
 
				+    print(right_path)
			
 
				+
			
 
				+
			
 
				+def smart_split_img_at_middle_by_x_axis(img_path, resize_radio=0.1):
			
 
				+    im_raw = utils.read_img(img_path)
			
 
				+    im_resize = utils.resize_by_percent(im_raw, resize_radio)
			
 
				+
			
 
				+    bin_img = utils.rgb2binary(im_resize)
			
 
				+    ry, rx = bin_img.shape
			
 
				+    img_mtx0 = np.asarray(bin_img)
			
 
				+    y_sum_array0 = img_mtx0.sum(axis=0)  # y轴求和
			
 
				+    subtracts_arr = np.abs(array_latter_subtracts_precious(y_sum_array0 / ry))  # 长度减1
			
 
				+    subtracts_arr_index = np.argsort(subtracts_arr, kind='quicksort', order=None)
			
 
				+    subtracts_arr_index = subtracts_arr_index[-10:]
			
 
				+
			
 
				+    index_middle_distance_list = list(np.abs(subtracts_arr_index - int(rx / 2)))
			
 
				+    split_index = subtracts_arr_index[index_middle_distance_list.index(min(index_middle_distance_list))] + 1
			
 
				+    split_index = int(split_index / resize_radio)
			
 
				+    img_left, img_right = split_by_index(im_raw, split_index)
			
 
				+    left_path = img_path.replace('.jpg', '_left.jpg')
			
 
				+    right_path = img_path.replace('.jpg', '_right.jpg')
			
 
				+    cv2.imencode('.jpg', img_left)[1].tofile(left_path)
			
 
				+    cv2.imencode('.jpg', img_right)[1].tofile(right_path)
			
 
				+    print(left_path)
			
 
				+    print(right_path)
			
 
				+
			
 
				+
			
 
				+def segment2parts_by_pix(crop_img):
			
 
				+
			
 
				+    p_image = utils.preprocess(crop_img)
			
 
				+    height, width = p_image.shape
			
 
				+    sum_x_axis = p_image.sum(axis=0) / (height*255)
			
 
				+
			
 
				+    # sum_x_axis = (sum_x_axis / (255*height)).astype(float)
			
 
				+    kernel = np.array([-2, 0, 2])
			
 
				+    sobel_filter = scipy.signal.convolve(sum_x_axis, kernel)  # 一维卷积运算
			
 
				+
			
 
				+    temp = np.abs(sobel_filter[1:-1])/np.max(np.abs(sobel_filter[1:-1]))
			
 
				+    temp[temp < 0.6] = 0
			
 
				+    temp[temp != 0] = 1
			
 
				+    index = np.where(temp == 1)[0]
			
 
				+
			
 
				+    width1 = width // 9
			
 
				+
			
 
				+    intervals = [(0, width1), (4 * width1, 5 * width1), (8 * width1, width)]  # 左开右闭
			
 
				+
			
 
				+    index_list = []
			
 
				+    for i, interval in enumerate(intervals):
			
 
				+        index_sec_list = []
			
 
				+        for ele in index:
			
 
				+            if interval[0] < ele <= interval[1]:
			
 
				+                index_sec_list.append(ele)
			
 
				+
			
 
				+        index_list.append(index_sec_list)
			
 
				+
			
 
				+    left_x_point, middle_x_point, right_x_point = 9999, 9999, 9999
			
 
				+    left_del_part = (0, left_x_point)
			
 
				+    middle_part = (left_x_point, middle_x_point)
			
 
				+    right_part = (middle_x_point, right_x_point)
			
 
				+    right_del_part = (right_x_point, width)
			
 
				+
			
 
				+    # left
			
 
				+    if index_list[0]:
			
 
				+        left_x_point = index_list[0][-1]
			
 
				+        left_del_part = (0, left_x_point)
			
 
				+    # middle
			
 
				+    if index_list[1]:
			
 
				+        value_list = [abs(sobel_filter[index]) for index in index_list[1]]
			
 
				+        middle_x_point = index_list[1][value_list.index(max(value_list))]
			
 
				+        middle_part = (left_x_point, middle_x_point)
			
 
				+    # right
			
 
				+    if index_list[2]:
			
 
				+        right_x_point = index_list[2][0]
			
 
				+        right_part = (middle_x_point, right_x_point)
			
 
				+        right_del_part = (right_x_point, width)
			
 
				+
			
 
				+    split_point = sorted(list(set(sorted(list(left_del_part + middle_part + right_part + right_del_part))) - {9999}))
			
 
				+
			
 
				+    split_pairs = []
			
 
				+    if len(split_point) > 2:
			
 
				+        a = split_point[:-1]
			
 
				+        b = split_point[1:]
			
 
				+        for i, ele in enumerate(a):
			
 
				+            if b[i] - ele > width1:
			
 
				+                split_pairs.append((ele, b[i]))
			
 
				+
			
 
				+    return split_pairs
			
 
				+
			
 
				+
			
 
				+def segment2parts(im_raw, save_path):
			
 
				+    img_parts_dict_list = []
			
 
				+
			
 
				+    # randon_img = radon_rotate_ski(im_raw)
			
 
				+    # 试卷顶部可能有黑边，切去3%
			
 
				+    yy, xx = im_raw.shape[0], im_raw.shape[1]
			
 
				+    y_crop_pix = int(yy*0.03)
			
 
				+    # x_crop_pix = int(xx*0.03)
			
 
				+    x_crop_pix = 0
			
 
				+    im_crop = im_raw[y_crop_pix:yy-y_crop_pix, x_crop_pix:xx-x_crop_pix]
			
 
				+
			
 
				+    split_pairs = segment2parts_by_pix(im_crop)
			
 
				+    if len(split_pairs) >= 2:
			
 
				+        for index, ele in enumerate(split_pairs):
			
 
				+            dst = im_raw[:, ele[0]:ele[1]]
			
 
				+            save_path_final = save_path.replace('.jpg', '') + '_{}_{}_{}.jpg'.format(ele[0], 0, index)
			
 
				+            cv2.imencode('.jpg', dst)[1].tofile(save_path_final)
			
 
				+            image = cv2.imencode('.jpg', dst)[1]
			
 
				+            base64_data = str(base64.b64encode(image))[2:-1]
			
 
				+            part_dict = {'img_part': base64_data,
			
 
				+                         'x_bias': ele[0] + x_crop_pix,
			
 
				+                         'y_bias': 0}
			
 
				+
			
 
				+            img_parts_dict_list.append(part_dict)
			
 
				+
			
 
				+    else:
			
 
				+        img = im_crop[:, split_pairs[0][0]:split_pairs[0][1]]
			
 
				+        resize_ratio = 0.3
			
 
				+        im_resize = utils.resize_by_percent(img, resize_ratio)
			
 
				+
			
 
				+        # gray
			
 
				+        if len(im_resize.shape) >= 3:
			
 
				+            gray_img = cv2.cvtColor(im_resize, cv2.COLOR_BGR2GRAY)
			
 
				+        else:
			
 
				+            gray_img = im_resize
			
 
				+        ry, rx = gray_img.shape
			
 
				+        # 高斯
			
 
				+        glur_img = cv2.GaussianBlur(gray_img, (5, 5), 0)
			
 
				+        # otsu
			
 
				+        _ret, threshed_img = cv2.threshold(glur_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
			
 
				+
			
 
				+        if ry < rx:
			
 
				+            x_kernel = int(10*resize_ratio)
			
 
				+        else:
			
 
				+            x_kernel = int(10 * resize_ratio)
			
 
				+        kernel = np.ones((glur_img.shape[0], x_kernel), np.uint8)  # height, width
			
 
				+        dilation = cv2.dilate(threshed_img, kernel, iterations=1)
			
 
				+        # cv2.imshow(' ', dilation)
			
 
				+        # if cv2.waitKey(0) == 27:
			
 
				+        #     cv2.destroyAllWindows()
			
 
				+
			
 
				+        # _, cnts, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+        (major, minor, _) = cv2.__version__.split(".")
			
 
				+        contours = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+        cnts = contours[0] if int(major) > 3 else contours[1]
			
 
				+
			
 
				+        box_list = [cv2.boundingRect(cnt) for cnt in cnts]
			
 
				+        box_array = np.asarray(box_list)
			
 
				+        box_array[:, 2] = box_array[:, 0] + box_array[:, 2]
			
 
				+        box_array[:, 3] = box_array[:, 1] + box_array[:, 3]
			
 
				+
			
 
				+        middle_x = rx // 2
			
 
				+        left_box = np.asarray([0, 0, 0, 0])
			
 
				+        right_box = np.asarray([0, 0, 0, 0])
			
 
				+        for box in box_array:
			
 
				+            x, y, xmax, ymax = box
			
 
				+            if x + (xmax-x)//2 <= middle_x:
			
 
				+                left_box = np.vstack([left_box, box])
			
 
				+            else:
			
 
				+                right_box = np.vstack([right_box, box])
			
 
				+
			
 
				+        left_box_list = []
			
 
				+        right_box_list = []
			
 
				+        try:
			
 
				+            left_box_list = left_box[1:, :][:, :2].min(axis=0).tolist() + left_box[1:, :][:, 2:].max(axis=0).tolist()
			
 
				+        except Exception:
			
 
				+            pass  # 单面的情况
			
 
				+        try:
			
 
				+            right_box_list = right_box[1:, :][:, :2].min(axis=0).tolist() + right_box[1:, :][:, 2:].max(axis=0).tolist()
			
 
				+        except Exception:
			
 
				+            pass
			
 
				+
			
 
				+        box_list = [left_box_list, right_box_list]
			
 
				+
			
 
				+        bias = int(70 * resize_ratio)
			
 
				+        for index, box in enumerate(box_list):
			
 
				+            if len(box) > 0:
			
 
				+                xmin, ymin, xmax, ymax = box
			
 
				+                if xmin - bias > 0:
			
 
				+                    xmin = xmin - bias
			
 
				+                else:
			
 
				+                    xmin = 0
			
 
				+
			
 
				+                dst = im_crop[int(ymin / resize_ratio):int(ymax / resize_ratio),
			
 
				+                      int(xmin / resize_ratio):int(xmax / resize_ratio)]
			
 
				+                save_path_final = save_path.replace('.jpg', '') + '_{}_{}_{}.jpg'.format(xmin, ymin, index)
			
 
				+                cv2.imencode('.jpg', dst)[1].tofile(save_path_final)
			
 
				+                image = cv2.imencode('.jpg', dst)[1]
			
 
				+                base64_data = str(base64.b64encode(image))[2:-1]
			
 
				+                part_dict = {'img_part': base64_data,
			
 
				+                             'x_bias': int(xmin/resize_ratio) + x_crop_pix + split_pairs[0][0],
			
 
				+                             'y_bias': int(ymin/resize_ratio) + y_crop_pix + 0}
			
 
				+                if (xmax - xmin)/resize_ratio > 100:  # 去掉竖长条
			
 
				+                    img_parts_dict_list.append(part_dict)
			
 
				+
			
 
				+    return img_parts_dict_list
			
 
				+
			
--- a/segment/image_operation/segment.py
+++ b/segment/image_operation/segment.py
@@ -0,0 +1,47 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : segment.py
			
 
				+import os
			
 
				+
			
 
				+import xml.etree.cElementTree as ET
			
 
				+
			
 
				+from segment.image_operation import utils
			
 
				+
			
 
				+
			
 
				+def joint_image(raw_img_path, bbox, lines_list):
			
 
				+    lines_dir = raw_img_path.replace('.jpg', '_lines')
			
 
				+    lines_file_list = os.listdir(lines_dir)
			
 
				+    lines_file_list = sorted([ele.replace('jpg', '')
			
 
				+                              for ele in lines_file_list if ele.endswith('.jpg')])
			
 
				+
			
 
				+    exam_items_bbox = []
			
 
				+
			
 
				+    tree = ET.parse(r'./segment/exam_info/000000-template.xml')  # xml tree
			
 
				+    for index_num, j in enumerate(lines_list):
			
 
				+        if j[1] == j[0]:
			
 
				+            continue
			
 
				+        elif j[1] - j[0] == 1:
			
 
				+            index_list = lines_file_list[j[0]].split('_')
			
 
				+            y_low = int(index_list[0])
			
 
				+            y_high = int(index_list[1])
			
 
				+            x_low = int(index_list[2])
			
 
				+            x_high = int(index_list[3])
			
 
				+        else:
			
 
				+            index_list0 = lines_file_list[j[0]].split('_')  # [33, 37]
			
 
				+            index_list1 = lines_file_list[j[1] - 1].split('_')
			
 
				+            y_low = int(index_list0[0])
			
 
				+            y_high = int(index_list1[1])
			
 
				+
			
 
				+            tmp_x_low_list = [ele.split('_')[2]
			
 
				+                              for ele in lines_file_list[j[0]:j[1]]]
			
 
				+            tmp_x_high_list = [ele.split('_')[3]
			
 
				+                               for ele in lines_file_list[j[0]:j[1]]]
			
 
				+            x_low = int(min(tmp_x_low_list))
			
 
				+            x_high = int(max(tmp_x_high_list))
			
 
				+        exam_bbox = [bbox[2] + x_low, bbox[0] + y_low, bbox[2] + x_high, bbox[0] + y_high]
			
 
				+
			
 
				+        tree = utils.create_xml('{:02d}'.format(index_num), tree,
			
 
				+                                exam_bbox[0], exam_bbox[1], exam_bbox[2], exam_bbox[3])
			
 
				+        exam_items_bbox.append(exam_bbox)
			
 
				+    # print(exam_items_bbox)
			
 
				+    tree.write(raw_img_path.replace('.jpg', '.xml'))
			
 
				+    return exam_items_bbox
			
--- a/segment/image_operation/split_lines.py
+++ b/segment/image_operation/split_lines.py
@@ -0,0 +1,94 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : split_lines.py
			
 
				+import os
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+
			
 
				+from segment.image_operation import utils
			
 
				+
			
 
				+from django.conf import settings
			
 
				+
			
 
				+
			
 
				+def find_contours(resized_img, ex_x, ex_y):
			
 
				+    threshed = utils.rgb2binary(resized_img)
			
 
				+
			
 
				+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ex_x, ex_y))  # 膨胀系数
			
 
				+    # morphed = cv2.morphologyEx(threshed, cv2.MORPH_CLOSE, kernel)
			
 
				+    morphed = cv2.dilate(threshed, kernel, iterations=1)
			
 
				+
			
 
				+    _, cnts, hierarchy = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+
			
 
				+    cnt = sorted(cnts, key=cv2.contourArea)[-1]
			
 
				+    x, y, w, h = cv2.boundingRect(cnt)
			
 
				+    x = x + int(ex_x * 0.5)
			
 
				+    w = w - int(ex_x * 0.5)
			
 
				+    dst = threshed[y:y + h, x:x + w]
			
 
				+    return dst, (y, y + h, x, x + w), cnts
			
 
				+
			
 
				+
			
 
				+def save_lines_by_index_without_white_line(path, split_img, split_index, resize_radio):
			
 
				+    img_y = split_img.shape[0]
			
 
				+    img_x = split_img.shape[1]
			
 
				+    lines_list = []
			
 
				+    for i in range(1, len(split_index)):
			
 
				+        if i % 2 != 1:
			
 
				+            start0 = int((split_index[i - 1] - 2) / resize_radio)  # 0,1间隔, 交替相减a2-a1， 每行上下的白多一点
			
 
				+            end0 = int((split_index[i] - 1 + 2) / resize_radio)  # 前一个索引
			
 
				+            start = start0 if (start0 >= 0) else 0
			
 
				+            end = end0 if (end0 <= img_y) else img_y
			
 
				+            line = split_img[start:end, 1:img_x]
			
 
				+            if len(line) < 1:
			
 
				+                continue
			
 
				+
			
 
				+            _, _, cnts = find_contours(line, 500, 70)  # x轴膨胀，去掉每行的白色， 第二个参数按行膨胀，第三个参数按列膨胀
			
 
				+            for cnt_id, cnt in enumerate(reversed(cnts)):
			
 
				+                x, y, w, h = cv2.boundingRect(cnt)
			
 
				+
			
 
				+                # print(x, y, w, h)
			
 
				+                if w * h > 100:
			
 
				+                    cj_out = line[y:y + h, x:x + w]
			
 
				+                    # line_list.append(cj_out)
			
 
				+                    save_path = os.path.join(path,
			
 
				+                                             '{:04d}_{:04d}_{:04d}_{:04d}_{}.jpg'.format(start, end, x, x+w, cnt_id))
			
 
				+                    cv2.imencode('.jpg', cj_out)[1].tofile(save_path)
			
 
				+                    # print(save_path)
			
 
				+                    filename = os.path.abspath(save_path)
			
 
				+                    lines_list.append(filename)
			
 
				+    return lines_list
			
 
				+
			
 
				+
			
 
				+def line_split(path, save_path, tolerance_pix_number):
			
 
				+    resize_radio = settings.RESIZE_RADIO
			
 
				+    images = utils.read_img(path)
			
 
				+    # raw_y = images.shape[0]
			
 
				+    # raw_x = images.shape[1]
			
 
				+    # images = images[:raw_y, int(raw_x * 0.05):raw_x - int(raw_x * 0.05)]
			
 
				+
			
 
				+    resize_img = utils.resize_by_percent(images, resize_radio)
			
 
				+    resize_crop_imgs, max_bbox, _ = find_contours(resize_img, 10, 200)  # y轴膨胀，整体去掉白色，去掉扫描后图像边界的黑色线条
			
 
				+
			
 
				+    bbox = [int(ele / resize_radio) for ele in max_bbox]
			
 
				+
			
 
				+    img_arr = np.asarray(resize_crop_imgs)
			
 
				+    img_size = img_arr.shape
			
 
				+    width = img_size[1]
			
 
				+
			
 
				+    sum_x_axis = img_arr.sum(axis=1) / width
			
 
				+    # hei[hei <= 254] = 0  # black
			
 
				+    sum_x_axis[sum_x_axis > 255 * tolerance_pix_number / width] = 1  # white
			
 
				+    sum_x_axis[sum_x_axis != 1] = 0
			
 
				+    sum_x_axis_list = list(sum_x_axis)
			
 
				+
			
 
				+    split_index0 = []
			
 
				+    num = 0
			
 
				+    for i, ele in enumerate(sum_x_axis_list):
			
 
				+        num = num % 2
			
 
				+        if ele == num:
			
 
				+            # print(i)
			
 
				+            num = num + 1
			
 
				+            split_index0.append(i)
			
 
				+
			
 
				+    split_img0 = images[bbox[0]:bbox[1], bbox[2]:bbox[3]]
			
 
				+    lines_list = save_lines_by_index_without_white_line(save_path, split_img0, split_index0, resize_radio)
			
 
				+    return bbox, lines_list
			
--- a/segment/image_operation/utils.py
+++ b/segment/image_operation/utils.py
@@ -0,0 +1,207 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : utils.py
			
 
				+import os
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import xml.etree.cElementTree as ET
			
 
				+from PIL import Image
			
 
				+
			
 
				+
			
 
				+def read_img(img_path):
			
 
				+    try:
			
 
				+        im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
			
 
				+    except FileNotFoundError as e:
			
 
				+        raise e
			
 
				+    return im
			
 
				+
			
 
				+
			
 
				+def write_img(img_to_wwite, save_path):
			
 
				+    try:
			
 
				+        cv2.imencode('.jpg', img_to_wwite)[1].tofile(save_path)
			
 
				+    except FileNotFoundError as e:
			
 
				+        raise e
			
 
				+
			
 
				+
			
 
				+def crop_region_direct(im, bbox):
			
 
				+    xmin = bbox[0]
			
 
				+    ymin = bbox[1]
			
 
				+    xmax = bbox[2]
			
 
				+    ymax = bbox[3]
			
 
				+
			
 
				+    region = im[ymin:ymax, xmin:xmax]
			
 
				+    return region
			
 
				+
			
 
				+
			
 
				+def resize_by_percent(im, percent):
			
 
				+    """
			
 
				+    :param im:
			
 
				+    :param percent:
			
 
				+    :return: resize_img
			
 
				+
			
 
				+    interpolation - 插值方法。共有5种：
			
 
				+    1)INTER_NEAREST - 最近邻插值法
			
 
				+    2)INTER_LINEAR - 双线性插值法（默认）
			
 
				+    3)INTER_AREA - 基于局部像素的重采样（resampling using pixel area relation）。
			
 
				+      对于图像抽取（image decimation）来说，这可能是一个更好的方法。但如果是放大图像时，它和最近邻法的效果类似。
			
 
				+    4)INTER_CUBIC - 基于4x4像素邻域的3次插值法
			
 
				+    5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值
			
 
				+    """
			
 
				+
			
 
				+    height = im.shape[0]
			
 
				+    width = im.shape[1]
			
 
				+    new_x = int(width * percent)
			
 
				+    new_y = int(height * percent)
			
 
				+
			
 
				+    res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA)
			
 
				+
			
 
				+    return res
			
 
				+
			
 
				+
			
 
				+def resize_by_fixed_size(im, new_x, new_y):
			
 
				+    """
			
 
				+    :param new_y: y轴像素
			
 
				+    :param new_x: x轴像素
			
 
				+    :param im:
			
 
				+    :return: resize_img
			
 
				+
			
 
				+    interpolation - 插值方法。共有5种：
			
 
				+    1)INTER_NEAREST - 最近邻插值法
			
 
				+    2)INTER_LINEAR - 双线性插值法（默认）
			
 
				+    3)INTER_AREA - 基于局部像素的重采样（resampling using pixel area relation）。
			
 
				+      对于图像抽取（image decimation）来说，这可能是一个更好的方法。但如果是放大图像时，它和最近邻法的效果类似。
			
 
				+    4)INTER_CUBIC - 基于4x4像素邻域的3次插值法
			
 
				+    5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值
			
 
				+    """
			
 
				+    res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA)
			
 
				+
			
 
				+    return res
			
 
				+
			
 
				+
			
 
				+def resize_by_radio(im):
			
 
				+    """
			
 
				+    :param im:
			
 
				+    :return: resize_img
			
 
				+
			
 
				+    interpolation - 插值方法。共有5种：
			
 
				+    1)INTER_NEAREST - 最近邻插值法
			
 
				+    2)INTER_LINEAR - 双线性插值法（默认）
			
 
				+    3)INTER_AREA - 基于局部像素的重采样（resampling using pixel area relation）。
			
 
				+      对于图像抽取（image decimation）来说，这可能是一个更好的方法。但如果是放大图像时，它和最近邻法的效果类似。
			
 
				+    4)INTER_CUBIC - 基于4x4像素邻域的3次插值法
			
 
				+    5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值
			
 
				+    """
			
 
				+    # res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA)
			
 
				+
			
 
				+    longer = 750
			
 
				+    shorter = 500
			
 
				+
			
 
				+    im_shape = im.shape
			
 
				+    im_size_min = np.min(im_shape[0:2])
			
 
				+    res = im
			
 
				+    if im_size_min > 500:
			
 
				+        im_size_max = np.max(im_shape[0:2])
			
 
				+        im_scale = float(shorter) / float(im_size_min)
			
 
				+        # Prevent the biggest axis from being more than MAX_SIZE
			
 
				+        if np.round(im_scale * im_size_max) > longer:
			
 
				+            im_scale = float(longer) / float(im_size_max)
			
 
				+        res = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
			
 
				+                         interpolation=cv2.INTER_AREA)
			
 
				+
			
 
				+    return res
			
 
				+
			
 
				+
			
 
				+def rgb2binary(im):
			
 
				+    gray_img = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
			
 
				+    _ret, thresh_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
			
 
				+    return thresh_img
			
 
				+
			
 
				+
			
 
				+def create_xml(obj_name, tree, xmin, ymin, xmax, ymax):
			
 
				+    root = tree.getroot()
			
 
				+
			
 
				+    pobject = ET.SubElement(root, 'object', {})
			
 
				+    pname = ET.SubElement(pobject, 'name')
			
 
				+    pname.text = obj_name
			
 
				+    ppose = ET.SubElement(pobject, 'pose')
			
 
				+    ppose.text = 'Unspecified'
			
 
				+    ptruncated = ET.SubElement(pobject, 'truncated')
			
 
				+    ptruncated.text = '0'
			
 
				+    pdifficult = ET.SubElement(pobject, 'difficult')
			
 
				+    pdifficult.text = '0'
			
 
				+    # add bndbox
			
 
				+    pbndbox = ET.SubElement(pobject, 'bndbox')
			
 
				+    pxmin = ET.SubElement(pbndbox, 'xmin')
			
 
				+    pxmin.text = str(xmin)
			
 
				+
			
 
				+    pymin = ET.SubElement(pbndbox, 'ymin')
			
 
				+    pymin.text = str(ymin)
			
 
				+
			
 
				+    pxmax = ET.SubElement(pbndbox, 'xmax')
			
 
				+    pxmax.text = str(xmax)
			
 
				+
			
 
				+    pymax = ET.SubElement(pbndbox, 'ymax')
			
 
				+    pymax.text = str(ymax)
			
 
				+
			
 
				+    return tree
			
 
				+
			
 
				+
			
 
				+def preprocess(img, binary_inv=True):
			
 
				+    dilate = 1
			
 
				+    blur = 1
			
 
				+
			
 
				+    if len(img.shape) >= 3:
			
 
				+        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
			
 
				+    else:
			
 
				+        gray_img = img
			
 
				+
			
 
				+    # # Apply dilation and erosion to remove some noise
			
 
				+    if dilate != 0:
			
 
				+        kernel = np.ones((dilate, dilate), np.uint8)
			
 
				+        img = cv2.dilate(gray_img, kernel, iterations=1)
			
 
				+        img = cv2.erode(img, kernel, iterations=1)
			
 
				+
			
 
				+    # Apply blur to smooth out the edges
			
 
				+    if blur != 0:
			
 
				+        img = cv2.GaussianBlur(img, (blur, blur), 0)
			
 
				+
			
 
				+    # Apply threshold to get image with only b&w (binarization)
			
 
				+    if binary_inv:
			
 
				+        img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
			
 
				+    else:
			
 
				+        img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
			
 
				+
			
 
				+    return img
			
 
				+
			
 
				+
			
 
				+def write_single_img(dst, save_path):
			
 
				+    try:
			
 
				+        cv2.imencode('.jpg', dst)[1].tofile(save_path)
			
 
				+    except FileNotFoundError as e:
			
 
				+        raise e
			
 
				+
			
 
				+
			
 
				+def png2jpg(png_path):
			
 
				+    try:
			
 
				+        im = Image.open(png_path)
			
 
				+        jpg_path = png_path.replace('.png', '.jpg')
			
 
				+        bg = Image.new("RGB", im.size, (255, 255, 255))
			
 
				+        bg.paste(im, im)
			
 
				+        bg.save(jpg_path)
			
 
				+        return jpg_path
			
 
				+    except Exception as e:
			
 
				+        print("PNG转换JPG 错误", e)
			
 
				+
			
 
				+
			
 
				+def png_read(img_file):
			
 
				+    raw_img = Image.open(img_file)  # 读取上传的网络图像
			
 
				+
			
 
				+    channels = raw_img.split()
			
 
				+    if len(channels) > 3:
			
 
				+        img = Image.merge("RGB", (channels[1], channels[2], channels[3]))
			
 
				+        open_cv_image = np.array(img)
			
 
				+
			
 
				+    else:
			
 
				+        img = raw_img
			
 
				+        open_cv_image = np.array(img)
			
 
				+    return open_cv_image
			
--- a/segment/logging.conf
+++ b/segment/logging.conf
@@ -0,0 +1,53 @@
 
				+[loggers]
			
 
				+keys=root,test, production
			
 
				+
			
 
				+[handlers]
			
 
				+keys=rotatingFileHandler,testHandler,consoleHandler, productionHandler
			
 
				+
			
 
				+[formatters]
			
 
				+keys=simpleFmt
			
 
				+
			
 
				+[logger_root]
			
 
				+level=DEBUG
			
 
				+handlers=rotatingFileHandler
			
 
				+
			
 
				+[logger_test]
			
 
				+level=DEBUG
			
 
				+handlers=testHandler
			
 
				+qualname=test
			
 
				+propagate=0
			
 
				+
			
 
				+[logger_production]
			
 
				+level=DEBUG
			
 
				+handlers=productionHandler
			
 
				+qualname=production
			
 
				+propagate=0
			
 
				+
			
 
				+[handler_rotatingFileHandler]
			
 
				+class=handlers.RotatingFileHandler
			
 
				+level=INFO
			
 
				+formatter=simpleFmt
			
 
				+args=("./default_log.log", "a", 20*1024*1024, 10, 'utf-8')
			
 
				+
			
 
				+[handler_testHandler]
			
 
				+class=handlers.RotatingFileHandler
			
 
				+level=INFO
			
 
				+formatter=simpleFmt
			
 
				+args=("./test_log.log", "a", 20*1024*1024, 10, 'utf-8')
			
 
				+
			
 
				+[handler_consoleHandler]
			
 
				+class=StreamHandler
			
 
				+level=DEBUG
			
 
				+formatter=simpleFmt
			
 
				+args=(sys.stdout,)
			
 
				+
			
 
				+[handler_productionHandler]
			
 
				+class=handlers.RotatingFileHandler
			
 
				+level=INFO
			
 
				+formatter=simpleFmt
			
 
				+args=("./log.log", "a", 20*1024*1024, 10, 'utf-8')
			
 
				+
			
 
				+
			
 
				+[formatter_simpleFmt]
			
 
				+format=%(asctime)s - %(name)s - %(levelname)s - %(message)s - [%(filename)s:%(lineno)s]
			
 
				+
			
--- a/segment/logging_config.py
+++ b/segment/logging_config.py
@@ -0,0 +1,11 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : logging_config.py
			
 
				+import logging
			
 
				+import logging.config
			
 
				+
			
 
				+
			
 
				+def getLogger(name='root'):
			
 
				+    CONF_LOG = "./segment/logging.conf"
			
 
				+    logging.config.fileConfig(CONF_LOG)
			
 
				+
			
 
				+    return logging.getLogger(name)
			
--- a/segment/migrations/0001_initial.py
+++ b/segment/migrations/0001_initial.py
@@ -0,0 +1,25 @@
 
				+# Generated by Django 2.1 on 2018-10-09 07:23
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    initial = True
			
 
				+
			
 
				+    dependencies = [
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.CreateModel(
			
 
				+            name='ExamImage',
			
 
				+            fields=[
			
 
				+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
			
 
				+                ('img_name', models.CharField(max_length=150, null=True)),
			
 
				+                ('subject_id', models.IntegerField()),
			
 
				+                ('subject', models.CharField(default='unknown_subject', max_length=20)),
			
 
				+                ('upload_date', models.DateField(verbose_name='保存日期')),
			
 
				+                ('save_path', models.CharField(max_length=150)),
			
 
				+            ],
			
 
				+        ),
			
 
				+    ]
			
--- a/segment/migrations/0002_auto_20181010_1008.py
+++ b/segment/migrations/0002_auto_20181010_1008.py
@@ -0,0 +1,18 @@
 
				+# Generated by Django 2.1 on 2018-10-10 02:08
			
 
				+
			
 
				+from django.db import migrations
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ('segment', '0001_initial'),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.RenameField(
			
 
				+            model_name='examimage',
			
 
				+            old_name='img_name',
			
 
				+            new_name='raw_name',
			
 
				+        ),
			
 
				+    ]
			
--- a/segment/migrations/0003_ocrtoken.py
+++ b/segment/migrations/0003_ocrtoken.py
@@ -0,0 +1,21 @@
 
				+# Generated by Django 2.1 on 2018-10-25 02:43
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ('segment', '0002_auto_20181010_1008'),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.CreateModel(
			
 
				+            name='OcrToken',
			
 
				+            fields=[
			
 
				+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
			
 
				+                ('upload_date', models.DateField(verbose_name='注册日期')),
			
 
				+                ('access_token', models.CharField(max_length=150, null=True)),
			
 
				+            ],
			
 
				+        ),
			
 
				+    ]
			
--- a/segment/migrations/0004_auto_20181025_1329.py
+++ b/segment/migrations/0004_auto_20181025_1329.py
@@ -0,0 +1,18 @@
 
				+# Generated by Django 2.1 on 2018-10-25 05:29
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ('segment', '0003_ocrtoken'),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.AlterField(
			
 
				+            model_name='ocrtoken',
			
 
				+            name='upload_date',
			
 
				+            field=models.DateTimeField(verbose_name='注册日期'),
			
 
				+        ),
			
 
				+    ]
			
--- a/segment/migrations/0005_auto_20181025_1332.py
+++ b/segment/migrations/0005_auto_20181025_1332.py
@@ -0,0 +1,18 @@
 
				+# Generated by Django 2.1 on 2018-10-25 05:32
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ('segment', '0004_auto_20181025_1329'),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.AlterField(
			
 
				+            model_name='ocrtoken',
			
 
				+            name='upload_date',
			
 
				+            field=models.DateTimeField(auto_now=True, verbose_name='注册日期'),
			
 
				+        ),
			
 
				+    ]
			
--- a/segment/migrations/0006_auto_20181025_1341.py
+++ b/segment/migrations/0006_auto_20181025_1341.py
@@ -0,0 +1,22 @@
 
				+# Generated by Django 2.1 on 2018-10-25 13:41
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ('segment', '0005_auto_20181025_1332'),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.RemoveField(
			
 
				+            model_name='ocrtoken',
			
 
				+            name='upload_date',
			
 
				+        ),
			
 
				+        migrations.AddField(
			
 
				+            model_name='ocrtoken',
			
 
				+            name='update_time',
			
 
				+            field=models.DateTimeField(auto_now=True, verbose_name='更新日期'),
			
 
				+        ),
			
 
				+    ]
			
--- a/segment/migrations/0007_sheetbigboxes.py
+++ b/segment/migrations/0007_sheetbigboxes.py
@@ -0,0 +1,27 @@
 
				+# Generated by Django 2.1 on 2019-04-03 15:26
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ('segment', '0006_auto_20181025_1341'),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.CreateModel(
			
 
				+            name='SheetBigBoxes',
			
 
				+            fields=[
			
 
				+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
			
 
				+                ('update_time', models.DateTimeField(auto_now=True, verbose_name='更新日期')),
			
 
				+                ('series_number', models.CharField(max_length=100, null=True)),
			
 
				+                ('raw_name', models.CharField(max_length=100, null=True)),
			
 
				+                ('save_path', models.CharField(max_length=100, null=True)),
			
 
				+                ('raw_big_box_path', models.CharField(max_length=100, null=True)),
			
 
				+                ('small_box_path', models.CharField(max_length=100, null=True)),
			
 
				+                ('subject_id', models.IntegerField(default=0)),
			
 
				+                ('subject', models.CharField(default='unknown_subject', max_length=20)),
			
 
				+            ],
			
 
				+        ),
			
 
				+    ]
			
--- a/segment/migrations/0007_sheetbigboxes_sheetboxes.py
+++ b/segment/migrations/0007_sheetbigboxes_sheetboxes.py
@@ -0,0 +1,40 @@
 
				+# Generated by Django 2.1.2 on 2019-09-26 15:40
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ('segment', '0006_auto_20181025_1341'),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.CreateModel(
			
 
				+            name='SheetBigBoxes',
			
 
				+            fields=[
			
 
				+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
			
 
				+                ('update_time', models.DateTimeField(auto_now=True, verbose_name='更新日期')),
			
 
				+                ('series_number', models.CharField(max_length=100, null=True)),
			
 
				+                ('raw_name', models.CharField(max_length=100, null=True)),
			
 
				+                ('small_box_path', models.CharField(max_length=100, null=True)),
			
 
				+                ('subject_id', models.IntegerField(default=0)),
			
 
				+                ('save_path', models.CharField(max_length=100, null=True)),
			
 
				+                ('raw_big_box_path', models.CharField(max_length=100, null=True)),
			
 
				+                ('subject', models.CharField(default='unknown_subject', max_length=20)),
			
 
				+            ],
			
 
				+        ),
			
 
				+        migrations.CreateModel(
			
 
				+            name='SheetBoxes',
			
 
				+            fields=[
			
 
				+                ('update_time', models.DateTimeField(auto_now=True, verbose_name='更新日期')),
			
 
				+                ('paper_id', models.CharField(max_length=100, primary_key=True, serialize=False)),
			
 
				+                ('raw_name', models.CharField(max_length=100, null=True)),
			
 
				+                ('subject_id', models.IntegerField(default=0)),
			
 
				+                ('subject', models.CharField(default='unknown_subject', max_length=20)),
			
 
				+                ('save_path', models.CharField(max_length=100, null=True)),
			
 
				+                ('xml_box_path', models.CharField(max_length=100, null=True)),
			
 
				+                ('download_path', models.CharField(default='', max_length=100)),
			
 
				+            ],
			
 
				+        ),
			
 
				+    ]
			
--- a/segment/migrations/0008_sheetboxes.py
+++ b/segment/migrations/0008_sheetboxes.py
@@ -0,0 +1,26 @@
 
				+# Generated by Django 2.1 on 2019-06-25 15:40
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ('segment', '0007_sheetbigboxes'),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.CreateModel(
			
 
				+            name='SheetBoxes',
			
 
				+            fields=[
			
 
				+                ('update_time', models.DateTimeField(auto_now=True, verbose_name='更新日期')),
			
 
				+                ('paper_id', models.CharField(max_length=100, primary_key=True, serialize=False)),
			
 
				+                ('raw_name', models.CharField(max_length=100, null=True)),
			
 
				+                ('subject_id', models.IntegerField(default=0)),
			
 
				+                ('subject', models.CharField(default='unknown_subject', max_length=20)),
			
 
				+                ('save_path', models.CharField(max_length=100, null=True)),
			
 
				+                ('xml_box_path', models.CharField(max_length=100, null=True)),
			
 
				+                ('download_path', models.CharField(default='', max_length=100)),
			
 
				+            ],
			
 
				+        ),
			
 
				+    ]
			
--- a/segment/migrations/__init__.py
+++ b/segment/migrations/__init__.py
--- a/segment/models.py
+++ b/segment/models.py
@@ -0,0 +1,39 @@
 
				+from django.db import models
			
 
				+
			
 
				+
			
 
				+# Create your models here.
			
 
				+class ExamImage(models.Model):
			
 
				+    raw_name = models.CharField(max_length=150, null=True)
			
 
				+    subject_id = models.IntegerField()
			
 
				+    subject = models.CharField(max_length=20, default='unknown_subject')
			
 
				+    upload_date = models.DateField('保存日期')
			
 
				+    save_path = models.CharField(max_length=150, null=False)
			
 
				+
			
 
				+
			
 
				+class OcrToken(models.Model):
			
 
				+    update_time = models.DateTimeField('更新日期', auto_now=True)
			
 
				+    access_token = models.CharField(max_length=150, null=True)
			
 
				+
			
 
				+
			
 
				+class SheetBigBoxes(models.Model):
			
 
				+    update_time = models.DateTimeField('更新日期', auto_now=True)
			
 
				+    series_number = models.CharField(max_length=100, null=True)
			
 
				+    raw_name = models.CharField(max_length=100, null=True)
			
 
				+    small_box_path = models.CharField(max_length=100, null=True)
			
 
				+
			
 
				+    subject_id = models.IntegerField(default=0)
			
 
				+    save_path = models.CharField(max_length=100, null=True)
			
 
				+    raw_big_box_path = models.CharField(max_length=100, null=True)
			
 
				+    subject = models.CharField(max_length=20, default='unknown_subject')
			
 
				+
			
 
				+
			
 
				+class SheetBoxes(models.Model):
			
 
				+    update_time = models.DateTimeField('更新日期', auto_now=True)
			
 
				+    paper_id = models.CharField(max_length=100, primary_key=True)
			
 
				+    raw_name = models.CharField(max_length=100, null=True)
			
 
				+    subject_id = models.IntegerField(default=0)
			
 
				+    subject = models.CharField(max_length=20, default='unknown_subject')
			
 
				+
			
 
				+    save_path = models.CharField(max_length=100, null=True)
			
 
				+    xml_box_path = models.CharField(max_length=100, null=True)
			
 
				+    download_path = models.CharField(max_length=100, default='')
			
--- a/segment/ocr/BD_OCR.py
+++ b/segment/ocr/BD_OCR.py
@@ -0,0 +1,43 @@
 
				+from selenium import webdriver
			
 
				+from selenium.webdriver.support.wait import WebDriverWait
			
 
				+from selenium.webdriver.support import expected_conditions as EC
			
 
				+from selenium.webdriver.common.by import By
			
 
				+import time
			
 
				+import traceback
			
 
				+import re
			
 
				+
			
 
				+
			
 
				+def bd_ocr_file(pictures):
			
 
				+    browser = webdriver.Chrome()
			
 
				+    browser.implicitly_wait(5)
			
 
				+    browser.maximize_window()
			
 
				+    browser.get('http://ai.baidu.com/tech/ocr/general')
			
 
				+    browser.execute_script("window.scrollTo(0, 850)")
			
 
				+    texts = []
			
 
				+    wait = WebDriverWait(browser, 5)
			
 
				+    wait.until(EC.presence_of_element_located((By.ID, "demo-photo-upload")))  # 等待id为table的元素被加载出来
			
 
				+
			
 
				+    for picture in pictures:
			
 
				+        time.sleep(2)
			
 
				+        print("开始传文件")
			
 
				+        try:
			
 
				+            browser.find_element_by_css_selector('input[type="file"]').send_keys(picture)
			
 
				+            time.sleep(3)
			
 
				+            html = browser.find_element_by_id("demo-json").text
			
 
				+            res = re.compile(r'"words": "(.*)?"').findall(html)
			
 
				+            res = ' '.join(res)
			
 
				+            # print("开始图片识别")
			
 
				+            if not res:
			
 
				+                res = '空白'
			
 
				+            # print(res)
			
 
				+            texts.append(res)
			
 
				+            # print("完成图片识别\n")
			
 
				+        except Exception as e:
			
 
				+            print(e)
			
 
				+            traceback.print_exc()
			
 
				+            browser.quit()
			
 
				+    browser.quit()
			
 
				+    return texts
			
 
				+
			
 
				+
			
 
				+
			
--- a/segment/ocr/__init__.py
+++ b/segment/ocr/__init__.py
@@ -0,0 +1,2 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
--- a/segment/ocr/group_pictures.py
+++ b/segment/ocr/group_pictures.py
@@ -0,0 +1,147 @@
 
				+import re
			
 
				+import shutil
			
 
				+
			
 
				+import glob
			
 
				+from pprint import pprint
			
 
				+
			
 
				+import segment.ocr.luo_ocr.ocr as luo_ocr
			
 
				+# from pypinyin import lazy_pinyin
			
 
				+from segment.ocr.split_topic_en import topic_type_line
			
 
				+
			
 
				+# def to_pinyin_camel(s):
			
 
				+#     '''文件123.txt'''
			
 
				+#     py_ls = lazy_pinyin(s)
			
 
				+#     py_camel = [py.capitalize() for py in py_ls]
			
 
				+#     return "".join(py_camel)
			
 
				+#
			
 
				+#
			
 
				+# def rename_filename(filename):
			
 
				+#     "将文件名转变为拼音"
			
 
				+#     filename_en = to_pinyin_camel(filename)
			
 
				+#     try:
			
 
				+#         shutil.copy(filename, filename_en)
			
 
				+#     except shutil.SameFileError:
			
 
				+#         pass
			
 
				+#     return filename_en
			
 
				+
			
 
				+
			
 
				+# def request_ocr(filename):
			
 
				+#     '''中文无法上传需要修改成英文'''
			
 
				+#     url = "http://117.50.17.141/ocr"
			
 
				+#     data = {}
			
 
				+#     filename = rename_filename(filename)
			
 
				+#     files = {"mydata": open(filename, "rb")}
			
 
				+#     r = requests.post(url, data, files=files)
			
 
				+#     print(filename)
			
 
				+#     print(r.json())
			
 
				+#     return r.json()['text']
			
 
				+
			
 
				+
			
 
				+topic_start = re.compile("^\s*(\d+)\s*[\.、:：,，]")
			
 
				+topic_start2 = re.compile("^\s*[(<〈《]?(\d+)\)\s*[\.、:：,，]?")
			
 
				+
			
 
				+
			
 
				+def is_topic_start(s, subject):
			
 
				+    """开始节点"""
			
 
				+    if subject in ['math', 'english', 'chinese', 'physics', 'chemistry', 'biology']:
			
 
				+        if topic_start.match(s):
			
 
				+            return True
			
 
				+        elif subject == 'math':
			
 
				+            if topic_start2.match(s):
			
 
				+                return True
			
 
				+        return False
			
 
				+    else:
			
 
				+        raise ValueError("subject={} is not supported!".format(subject))
			
 
				+
			
 
				+
			
 
				+# -------------------------符合下列条件的则为结束-------------------------
			
 
				+topic_end = re.compile("D\s*[\.、:：]")
			
 
				+topic_end2 = re.compile("^\s*G\s*[\.、:：]")
			
 
				+
			
 
				+
			
 
				+def is_topic_end(s, subject):
			
 
				+    """结束节点"""
			
 
				+    if subject in ['math', 'english', 'chinese', 'physics', 'chemistry', 'biology']:
			
 
				+        if topic_end.search(s):
			
 
				+            return True
			
 
				+        elif subject == 'english':
			
 
				+            if topic_end2.match(s):
			
 
				+                return True
			
 
				+        return False
			
 
				+    else:
			
 
				+        raise ValueError("subject={} is not supported!".format(subject))
			
 
				+
			
 
				+
			
 
				+# -------------------------符合下列条件的则为跳过舍去-------------------------
			
 
				+topic_filter = re.compile("^\s*[一二三四五六七八九十]+\s*[\.、:：]")
			
 
				+general_filter = ['选择题', '单选题', '多选题',
			
 
				+                  '填空题', '单空题', '多空题',
			
 
				+                  '解答题', '简答题', '证明题',
			
 
				+                  '选做题', '实验题', '第II卷',
			
 
				+                  '第I卷', '第二卷', ]
			
 
				+
			
 
				+english_filter = [["听", "材料"], ["听", "对话"], ["听", "独白"],
			
 
				+                  ['第一节'], ['第二节'], ['语言知识运用'], ['第II卷'],
			
 
				+                  ['第二部分'], ['第三部分'], ['第四部分']]
			
 
				+
			
 
				+chinese_filter = re.compile("^\s*[(（]\s*[一二三四五六七八九十]\s*[）)]\s*[\.、:：]?")
			
 
				+
			
 
				+
			
 
				+def contains_all(s, words):
			
 
				+    for word in words:
			
 
				+        if all([w in s for w in word]):
			
 
				+            return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def is_topic_skip(s, subject):
			
 
				+    """判断该行是否可以去掉，跳跃节点"""
			
 
				+    if subject == 'english':
			
 
				+        return contains_all(s, english_filter)
			
 
				+
			
 
				+    elif subject in ['math', 'chinese', 'physics', 'chemistry', 'biology']:
			
 
				+        if topic_filter.match(s):
			
 
				+            return True
			
 
				+        if subject == 'chinese':
			
 
				+            if chinese_filter.match(s):
			
 
				+                return True
			
 
				+        for topic_type in general_filter:
			
 
				+            if topic_type in s:
			
 
				+                return True
			
 
				+        return False
			
 
				+    else:
			
 
				+        raise ValueError("subject={} is not supported!".format(subject))
			
 
				+
			
 
				+
			
 
				+# ----------------------action----------------------
			
 
				+def group_pictures(pictures, subject=''):
			
 
				+    """Assume pictures are row based"""
			
 
				+    # texts = []
			
 
				+    # for picture in pictures:
			
 
				+    #     t = luo_ocr.ocr_py(picture)
			
 
				+    #     t = t.replace("\r", "").replace("\n", "")
			
 
				+    #     # print(str(t))
			
 
				+    #     texts.append(str(t))
			
 
				+    # # texts = bd_ocr(pictures)
			
 
				+    # # print(texts)
			
 
				+
			
 
				+    texts = [luo_ocr.ocr_py(picture).replace("\r", "").replace("\n", "") for picture in pictures]
			
 
				+
			
 
				+    groups = []
			
 
				+    start = 0
			
 
				+    for i, t in enumerate(texts):
			
 
				+        if is_topic_start(t, subject):
			
 
				+            groups.append([start, i])
			
 
				+            start = i
			
 
				+        elif is_topic_end(t, subject):
			
 
				+            groups.append([start, i + 1])
			
 
				+            start = i + 1
			
 
				+        elif is_topic_skip(t, subject):
			
 
				+            if i > start:
			
 
				+                groups.append([start, i])
			
 
				+            start = i + 1
			
 
				+
			
 
				+    len_text = len(texts)
			
 
				+    if len_text > start:
			
 
				+        groups.append([start, len_text])
			
 
				+    return texts, groups
			
--- a/segment/ocr/group_text.py
+++ b/segment/ocr/group_text.py
@@ -0,0 +1,246 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : group_text.py
			
 
				+import re
			
 
				+
			
 
				+subjects = ['unknown_subject', 'math', 'math_zxhx', 'english',
			
 
				+            'chinese', 'physics', 'chemistry',
			
 
				+            'biology', 'politics', 'history', 'geography',
			
 
				+            'science_comprehensive', 'arts_comprehensive']
			
 
				+
			
 
				+
			
 
				+# -------------------------符合下列条件的则为开始---------------------------
			
 
				+# general_start = re.compile("^\s*\d+\s*[\.、:：]?\D|^\s*\d+\s*[\.、:：]?\d{4}]")
			
 
				+general_start = re.compile("^\s*\d+\s*[\.、:：]\D|^\s*\d+\s*[\.、:：]\d{4}")
			
 
				+math_start = re.compile("^\s*\(\d+\)\s*[\.、:：]?")
			
 
				+chinese_start = re.compile("^\s*[(（]\s*[一二三四五六七八九十]\s*[）)]\s*[\.、:：]?")
			
 
				+write_start = re.compile("^\s*\(\d+\)\s*[\.、:：]?")
			
 
				+
			
 
				+
			
 
				+def is_topic_start(s, subject):
			
 
				+    '''开始节点'''
			
 
				+    if subject in subjects:
			
 
				+        if general_start.match(s):
			
 
				+            return True
			
 
				+        if subject == 'math':
			
 
				+            if "本题" in s or "本小题" in s:
			
 
				+                return True
			
 
				+            elif math_start.match(s):
			
 
				+                return True
			
 
				+        elif subject == 'chinese':
			
 
				+            if chinese_start.match(s):
			
 
				+                return True
			
 
				+        # elif subject == 'english':
			
 
				+        #     if '注意' in s or '内容包括' in s:
			
 
				+        #         if write_start.search(s):
			
 
				+        #             return False
			
 
				+        return False
			
 
				+    else:
			
 
				+        raise ValueError("subject={} is not supported!".format(subject))
			
 
				+
			
 
				+
			
 
				+# -------------------------符合下列条件的则为结束-------------------------
			
 
				+general_end = re.compile("D\s*[\.、:：]")
			
 
				+# english_end = re.compile("^\s*G\s*[\.、:：]")
			
 
				+english_end = re.compile("^\s*[EFG]\s*[\.、:：]|^\s*[EFG]\s+")
			
 
				+chinese_end = re.compile("^\s*[EFG]\s*[\.、:：]")
			
 
				+written_expression = re.compile(r'书面表达')
			
 
				+written_expression1 = re.compile(r'短文改错|翻译句子')
			
 
				+
			
 
				+
			
 
				+def is_topic_end(s, subject):
			
 
				+    '''结束节点'''
			
 
				+    if subject in subjects:
			
 
				+        if subject == 'english':
			
 
				+            if general_end.search(s):
			
 
				+                return True
			
 
				+            if english_end.search(s):
			
 
				+                return "G"
			
 
				+
			
 
				+            # elif written_expression.search(s):
			
 
				+            #     return '书面表达'
			
 
				+
			
 
				+        if subject == 'chinese':
			
 
				+            if chinese_end.search(s):
			
 
				+                return True
			
 
				+        return False
			
 
				+    else:
			
 
				+        raise ValueError("subject={} is not supported!".format(subject))
			
 
				+
			
 
				+
			
 
				+# -------------------------符合下列条件的则为跳过舍去-------------------------
			
 
				+general_filter1 = re.compile("^\s*[一二三四五六七八九十]+\s*[\.、:：]")
			
 
				+general_filter2 = ['选择题', '单选题', '多选题', '综合题', '答案无效', '题目要求',
			
 
				+                   '填空题', '单空题', '多空题', '计算题', '演算步骤', '单元测试', '古代诗歌阅读',
			
 
				+                   '解答题', '简答题', '证明题', '按要求填写下列空格', '单项选择题', '注意事项',
			
 
				+                   '选做题', '实验题', '第II卷', '第Ⅱ卷', '一律得零分', '证明过程', '现代文阅读',
			
 
				+                   '第二卷', '答题卡', '试卷满分', '选题人', '最佳选项', '填写结果', '选不全', '文言文阅读',
			
 
				+                   '答题时间', '分值', '题目要求', '阅读下面文字', '阅读下面短文', '阅读下列短文',
			
 
				+                   '甲必考题', '必考题', '读一遍', '题卡', '符合题目要求', '规定区域', '符合要求', '阅读下面']
			
 
				+
			
 
				+end_sign = re.compile("第\d+页|共\d+页|页\d+第|\d+第|第[(（]\d+[)）]页|共[(（]\d+[)）]页|共[(（]\d+[)）]页$")
			
 
				+
			
 
				+english_filter = [["听", "材料"], ["听", "对话"], ["听", "独白"],
			
 
				+                  ['第二节'], ['语言知识运用'], ['第II卷'], ['录音'], ['作答时间'], ['选项'],
			
 
				+                  ['第二部分'], ['第三部分'], ['第四部分'], ['第一部分'], ['第一节'], ['阅读下列短文'], ['短文'], ['阅读下面短文'], ['阅读']]
			
 
				+
			
 
				+
			
 
				+def contains_all(s, words):
			
 
				+    for word in words:
			
 
				+        if all([w in s for w in word]):  # any()与all()函数的区别: any是任意，而all是全部。
			
 
				+            return True                  # 空元组、空列表返回值为True，这里要特别注意
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def is_topic_skip(s, subject):
			
 
				+    '''判断该行是否可以去掉，跳跃节点'''
			
 
				+
			
 
				+    if subject in subjects:
			
 
				+        if general_filter1.match(s):
			
 
				+            return True
			
 
				+        elif end_sign.search(s):
			
 
				+            return True
			
 
				+        for topic_type in general_filter2:
			
 
				+            if topic_type in s:
			
 
				+                return True
			
 
				+        if subject == 'english':
			
 
				+            return contains_all(s, english_filter)
			
 
				+        return False
			
 
				+    else:
			
 
				+        raise ValueError("subject={} is not supported!".format(subject))
			
 
				+
			
 
				+
			
 
				+# -----------------------all_end---------------------
			
 
				+
			
 
				+end_words = [["参考", "答案"], ["试题", "答案"], ["试卷", "答案"],
			
 
				+             ["省", "学年"], ["省", "学期"], ["市", "学年"], ["市", "学期"]]
			
 
				+
			
 
				+
			
 
				+def all_end(s, subject):
			
 
				+    if subject not in ["chinese", 'english']:
			
 
				+        for word in end_words:
			
 
				+            if all([w in s for w in word]):
			
 
				+                return True
			
 
				+        return False
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+# ----------------------action----------------------
			
 
				+def group_pictures1(abcd_texts, subject=''):
			
 
				+    texts = abcd_texts['text']
			
 
				+    groups = []
			
 
				+    start = 0
			
 
				+    text_end = 0
			
 
				+    pattern = re.compile(r"[一]?[\.。、(（:：,，]?选择题[\.。、(（:：,，]|一[\.、(（。:：,，]?填空题[\.。、(（:：,，]|[一]?[\.。、(（:：,，]?单项选择题|[一]?[\.。、(（:：,，]?单项选择|[一]?[\.。、(（:：,，]?现代文阅读[\.。、(（:：,，]|[一]?[\.。、(（:：,，]?单选题[\.。、(（:：,，]")
			
 
				+    pattern1 = re.compile(r'第I卷|第〡卷|第Ⅰ卷|第I卷阅读题|第一部分')
			
 
				+
			
 
				+    for i, t in enumerate(texts):
			
 
				+        if pattern.match(t):
			
 
				+            groups.clear()
			
 
				+            start = i + 1
			
 
				+        elif pattern1.match(t):
			
 
				+            groups.clear()
			
 
				+            start = i + 1
			
 
				+
			
 
				+        elif is_topic_start(t, subject):
			
 
				+            groups.append([start, i])
			
 
				+            start = i
			
 
				+        elif is_topic_end(t, subject):
			
 
				+            if is_topic_end(t, subject) == "G" and start != 0:
			
 
				+                new_list = groups.pop()
			
 
				+                new_start = new_list[0]
			
 
				+                groups.append([new_start, i + 1])
			
 
				+                start = i + 1
			
 
				+
			
 
				+            else:
			
 
				+                groups.append([start, i + 1])
			
 
				+                start = i + 1
			
 
				+        elif is_topic_skip(t, subject):
			
 
				+            if i > start:
			
 
				+                groups.append([start, i])
			
 
				+            start = i + 1
			
 
				+        elif all_end(t, subject):
			
 
				+            text_end = i
			
 
				+
			
 
				+    len_text = len(texts)
			
 
				+    if len_text > start:
			
 
				+        if text_end:
			
 
				+            # print(text_end)
			
 
				+            groups.append([start, text_end])
			
 
				+        else:
			
 
				+            # print(len_text)
			
 
				+            groups.append([start, len_text])
			
 
				+
			
 
				+    for i, lst in enumerate(groups):
			
 
				+        if lst[0] == lst[1]:
			
 
				+            del groups[i]
			
 
				+
			
 
				+    # print('\n', groups)
			
 
				+    return groups
			
 
				+
			
 
				+
			
 
				+def segment(texts):
			
 
				+    text_correct = re.compile(r'短文改错|翻译句子|书面表达')
			
 
				+    seg_index = []
			
 
				+    for i, t in enumerate(texts):
			
 
				+        if text_correct.search(t):
			
 
				+            seg_index.append(i)
			
 
				+
			
 
				+    if len(seg_index) < 1:
			
 
				+        abcd_texts1 = {'start_index': 0, 'text': texts}
			
 
				+        return abcd_texts1, []
			
 
				+    else:
			
 
				+        seg_index_number = min(seg_index)
			
 
				+        abcd_texts1 = {'start_index': 0, 'text': texts[:seg_index_number]}
			
 
				+        writing_texts2 = {'start_index': seg_index_number, 'text': texts[seg_index_number:]}
			
 
				+
			
 
				+        return abcd_texts1, writing_texts2
			
 
				+
			
 
				+
			
 
				+def match_writing_section(texts, subject='english'):
			
 
				+    if subject == 'english':
			
 
				+        start_index = texts['start_index']
			
 
				+        texts_content = texts['text']
			
 
				+        text_correct = re.compile(r'短文改错|翻译句子|书面表达')
			
 
				+        seg_index_list = []
			
 
				+        for i, t in enumerate(texts_content):
			
 
				+            if text_correct.search(t):
			
 
				+                seg_index_list.append(i)
			
 
				+
			
 
				+        seg_index_list.append(len(texts_content))
			
 
				+        seg_index_list = sorted(list(set(seg_index_list)))
			
 
				+
			
 
				+        groups_list = []
			
 
				+        if len(seg_index_list) == 1 and seg_index_list[0] == 0:
			
 
				+            pass
			
 
				+        for i, number in enumerate(seg_index_list[1:]):
			
 
				+            groups_list.append([seg_index_list[i]+start_index+1, number+start_index])
			
 
				+
			
 
				+        # print(groups_list)
			
 
				+        return groups_list
			
 
				+    else:
			
 
				+        return []
			
 
				+
			
 
				+
			
 
				+def group_text(all_texts, subject):
			
 
				+    # txt_path = r'F:\nine_subject\english_test\write\57.txt'
			
 
				+    # text = open(txt_path, 'r').readlines()
			
 
				+    if subject == 'english':
			
 
				+        abcd_sec, writing_sec = segment(all_texts)
			
 
				+        if len(writing_sec) > 0:
			
 
				+            abcd_list = group_pictures1(abcd_sec, subject)
			
 
				+            writing_list = match_writing_section(writing_sec, subject)
			
 
				+            group_list = abcd_list + writing_list
			
 
				+        else:
			
 
				+            group_list = group_pictures1(abcd_sec, subject)
			
 
				+    else:
			
 
				+        all_texts = {'text': all_texts}
			
 
				+        group_list = group_pictures1(all_texts, subject)
			
 
				+    return group_list
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    subject = 'english'
			
 
				+    txt_path = r'G:\write\112.txt'
			
 
				+    all_texts = open(txt_path, 'r').readlines()
			
 
				+    group_list = group_text(all_texts, subject)
			
 
				+    print(group_list)
			
--- a/segment/ocr/luo_ocr/__init__.py
+++ b/segment/ocr/luo_ocr/__init__.py
--- a/segment/ocr/luo_ocr/ocr.py
+++ b/segment/ocr/luo_ocr/ocr.py
@@ -0,0 +1,64 @@
 
				+from . import preprocess
			
 
				+from . import sheetocr
			
 
				+import time
			
 
				+import os
			
 
				+import cv2
			
 
				+
			
 
				+# sheetpath = r'C:\Users\Administrator\Desktop\sheet'  # 预处理前的试卷目录
			
 
				+# testpath = r'C:\Users\Administrator\Desktop\test'  # 预处理后的试卷目录
			
 
				+# resultpath = r'C:\Users\Administrator\Desktop\result'  # 结果生成目录
			
 
				+
			
 
				+# Parameter Sets
			
 
				+langs = {'ce': '-l chi_sim+eng', 'ec': '-l eng+chi_sim', 'c': '-l chi_sim', 'e': '-l eng', 'eq': '-l eng+equ'}  # 语言选项
			
 
				+psms = {'block': '--psm 6', 'default': '--psm 3'}  # , '_line': ' --psm 7'}  # Page segmentation modes
			
 
				+# oems = {'legacy': '--oem 0', 'lstm': '--oem 1', 'lstm+legacy': '--oem 2'}  # OCR Engine modes
			
 
				+langs_py = {'ce': 'chi_sim+eng', 'ec': 'eng+chi_sim', 'c': 'chi_sim', 'e': 'eng', 'eq': 'eng+equ'}  # 语言选项
			
 
				+psms_py = {'block': '--psm 6', 'default': '--psm 3'}  # , '_line': ' --psm 7'}  # Page segmentation modes
			
 
				+scales = (0, 0.5, 2)
			
 
				+dilates = (0, 1, 3, 5)
			
 
				+blurs = (0, 1, 3, 5, 7)
			
 
				+
			
 
				+
			
 
				+# 用默认最佳参数处理图片， 返回文本
			
 
				+def ocr_py(picture, lang='ce', psm='block', scale=0, dilate=1, blur=5):
			
 
				+    image = preprocess.preprocess(picture, scale=scale, dilate=dilate, blur=blur)
			
 
				+    words = sheetocr.sheetocr_py(image, lang=langs_py[lang], psm=psms_py[psm])
			
 
				+    return words
			
 
				+
			
 
				+
			
 
				+# 用默认最佳参数处理图片， 返回文本文件
			
 
				+def ocr(picture, output, lang='ce', psm='block', scale=0, dilate=1, blur=5):
			
 
				+    image = preprocess.preprocess(picture, scale=scale, dilate=dilate, blur=blur)
			
 
				+    cv2.imwrite('tmp_pic', image)
			
 
				+    sheetocr.sheetocr('tmp_pic', output, lang=langs[lang], psm=psms[psm])
			
 
				+    os.remove('tmp_pic')
			
 
				+
			
 
				+
			
 
				+# 测试最佳参数
			
 
				+def test_parameters(picture_path, output=0):
			
 
				+    start = time.time()
			
 
				+    for root, dirs, files in os.walk(picture_path):
			
 
				+        for file in files:
			
 
				+            picture = os.path.join(root, file)
			
 
				+            if output == 0:  # 屏幕显示
			
 
				+                for s in scales:
			
 
				+                    for d in dilates:
			
 
				+                        for b in blurs:
			
 
				+                            print('Parameters:' + 's' + str(s) + 'd' + str(d) + 'b' + str(b) + '\n')
			
 
				+                            words = ocr_py(picture, scale=s, dilate=d, blur=b)
			
 
				+                            print(words)
			
 
				+            else:  # 输出到路径为output的文件中
			
 
				+                for s in scales:
			
 
				+                    for d in dilates:
			
 
				+                        for b in blurs:
			
 
				+                            save = file + 's' + str(s) + 'd' + str(d) + 'b' + str(b)
			
 
				+                            save = os.path.join(output, save)
			
 
				+                            with open(save, 'r', encoding='UTF-8') as f:
			
 
				+                                words = ocr_py(picture, scale=s, dilate=d, blur=b)
			
 
				+                                f.write(words)
			
 
				+    end = time.time()
			
 
				+    print('running time:', end - start, 's')
			
 
				+
			
 
				+
			
 
				+# test_parameters(sheetpath)
			
 
				+# print('OCR done!\n')
			
--- a/segment/ocr/luo_ocr/preprocess.py
+++ b/segment/ocr/luo_ocr/preprocess.py
@@ -0,0 +1,85 @@
 
				+"""
			
 
				+图像预处理，提高OCR识别率
			
 
				+1.  图像光照均匀性分析，获取图像光照分布map;
			
 
				+
			
 
				+2.  图像边缘分析、得到图像的边缘分布模型，得到总体的边缘度量权值w1;
			
 
				+
			
 
				+3.  图像模糊度计算，得到图像模糊权值w2.
			
 
				+
			
 
				+4.  对图像进行局部分块处理，利用图像对应分块的map特征、w1、w2的权值，得到图像每个分块的局部二值结果。
			
 
				+
			
 
				+5.  对整幅图像局部二值化结果进行空白填充处理防止字符断裂，对分割的游离点进行分析剔除异常噪点
			
 
				+
			
 
				+6.  直线剔除。
			
 
				+
			
 
				+Scaling To The Right Size
			
 
				+
			
 
				+Ensure that the images are scaled to the right size which usually is of at least 300 DPI (Dots Per Inch). Keeping DPI
			
 
				+lower than 200 will give unclear and incomprehensible results while keeping the DPI above 600 will unnecessarily
			
 
				+increase the size of the output file without improving the quality of the file. Thus, a DPI of 300 works best for this
			
 
				+purpose.
			
 
				+
			
 
				+Increase Contrast
			
 
				+
			
 
				+Low contrast can result in poor OCR. Increase the contrast and density before carrying out the OCR process. This can be
			
 
				+done in the scanning software itself or in any other image processing software. Increasing the contrast between the
			
 
				+text/image and its background brings out more clarity in the output.
			
 
				+
			
 
				+Binarize Image
			
 
				+
			
 
				+This step converts a multicolored image (RGB) to a black and white image. There are several algorithms to convert a
			
 
				+color image to a monochrome image, ranging from simple thresholding to more sophisticated zonal analysis.
			
 
				+
			
 
				+Remove Noise and Scanning Artefacts
			
 
				+
			
 
				+Noise can drastically reduce the overall quality of the OCR process. It can be present in the background or foreground
			
 
				+and can result from poor scanning or the poor original quality of the data.
			
 
				+
			
 
				+Deskew
			
 
				+
			
 
				+This may also be referred to as rotation. This means de-skewing the image to bring it in the right format and right
			
 
				+shape. The text should appear horizontal and not tilted in any angle. If the image is skewed to any side, deskew it by
			
 
				+rotating it clockwise or anti clockwise direction.
			
 
				+
			
 
				+Layout Analysis (or Zone Analysis)
			
 
				+
			
 
				+In order to detect words correctly, it is important to first recognize the zones or the layout (which are also the areas
			
 
				+of interest). This step detects the paragraphs, tables, columns, captions of the images etc. If the software misses out
			
 
				+on any zone or layout, words might be cut in half or not detected at all.
			
 
				+"""
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from segment.image_operation import utils
			
 
				+
			
 
				+
			
 
				+# 读取图片，生成预处理的图像
			
 
				+def preprocess(picture, scale, dilate, blur, show=False):
			
 
				+    # 预处理图像
			
 
				+    img = utils.read_img(picture)
			
 
				+    # rescale the image
			
 
				+    if scale != 0:
			
 
				+        img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
			
 
				+
			
 
				+    # Convert to gray
			
 
				+    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
			
 
				+
			
 
				+    # Apply dilation and erosion to remove some noise
			
 
				+    if dilate != 0:
			
 
				+        kernel = np.ones((dilate, dilate), np.uint8)
			
 
				+        img = cv2.dilate(img, kernel, iterations=1)
			
 
				+        img = cv2.erode(img, kernel, iterations=1)
			
 
				+
			
 
				+    # Apply blur to smooth out the edges
			
 
				+    if blur != 0:
			
 
				+        img = cv2.GaussianBlur(img, (blur, blur), 0)
			
 
				+
			
 
				+    # Apply threshold to get image with only b&w (binarization)
			
 
				+    img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
			
 
				+
			
 
				+    if show:
			
 
				+        # cv2.namedWindow('image', cv2.WINDOW_NORMAL)
			
 
				+        cv2.imshow('image', img)
			
 
				+        cv2.waitKey(0)
			
 
				+        cv2.destroyAllWindows()
			
 
				+    return img
			
--- a/segment/ocr/luo_ocr/sheetocr.py
+++ b/segment/ocr/luo_ocr/sheetocr.py
@@ -0,0 +1,67 @@
 
				+'''
			
 
				+使用Tesseract 对试卷做OCR
			
 
				+
			
 
				+Tesseract Usage:
			
 
				+  tesseract --help | --help-extra | --help-psm | --help-oem | --version
			
 
				+  tesseract --list-langs [--tessdata-dir PATH]
			
 
				+  tesseract --print-parameters [options...] [configfile...]
			
 
				+  tesseract imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]
			
 
				+
			
 
				+OCR options:
			
 
				+  --tessdata-dir PATH   Specify the location of tessdata path.
			
 
				+  --user-words PATH     Specify the location of user words file.
			
 
				+  --user-patterns PATH  Specify the location of user patterns file.
			
 
				+  -l LANG[+LANG]        Specify language(s) used for OCR.
			
 
				+  -c VAR=VALUE          Set value for config variables.
			
 
				+                        Multiple -c arguments are allowed.
			
 
				+  --psm NUM             Specify page segmentation mode.
			
 
				+  --oem NUM             Specify OCR Engine mode.
			
 
				+NOTE: These options must occur before any configfile.
			
 
				+
			
 
				+Page segmentation modes:
			
 
				+  0    Orientation and script detection (OSD) only.
			
 
				+  1    Automatic page segmentation with OSD.
			
 
				+  2    Automatic page segmentation, but no OSD, or OCR.
			
 
				+  3    Fully automatic page segmentation, but no OSD. (Default)
			
 
				+  4    Assume a single column of text of variable sizes.
			
 
				+  5    Assume a single uniform block of vertically aligned text.
			
 
				+  6    Assume a single uniform block of text.
			
 
				+  7    Treat the image as a single text line.
			
 
				+  8    Treat the image as a single word.
			
 
				+  9    Treat the image as a single word in a circle.
			
 
				+ 10    Treat the image as a single character.
			
 
				+ 11    Sparse text. Find as much text as possible in no particular order.
			
 
				+ 12    Sparse text with OSD.
			
 
				+ 13    Raw line. Treat the image as a single text line,
			
 
				+       bypassing hacks that are Tesseract-specific.
			
 
				+
			
 
				+OCR Engine modes:
			
 
				+  0    Legacy engine only.
			
 
				+  1    Neural nets LSTM engine only.
			
 
				+  2    Legacy + LSTM engines.
			
 
				+  3    Default, based on what is available.
			
 
				+
			
 
				+Single options:
			
 
				+  -h, --help            Show minimal help message.
			
 
				+  --help-extra          Show extra help for advanced users.
			
 
				+  --help-psm            Show page segmentation modes.
			
 
				+  --help-oem            Show OCR Engine modes.
			
 
				+  -v, --version         Show version information.
			
 
				+  --list-langs          List available languages for tesseract engine.
			
 
				+  --print-parameters    Print tesseract parameters.
			
 
				+
			
 
				+'''
			
 
				+import os
			
 
				+import pytesseract
			
 
				+
			
 
				+
			
 
				+# ocr图片文件，生成文本文件，较好的参数为 -l chi_sim+eng --psm 6
			
 
				+def sheetocr(picture, output, lang, psm):
			
 
				+    cmd = 'tesseract' + ' ' + picture + ' ' + output + ' ' + lang + ' ' + psm
			
 
				+    os.system(cmd)
			
 
				+
			
 
				+
			
 
				+# ocr 图像，生成文本，较好的参数为'chi_sim+eng',  '--psm 6'
			
 
				+def sheetocr_py(img, lang, psm):
			
 
				+    words = pytesseract.image_to_string(img, lang=lang, config=psm)
			
 
				+    return words
			
--- a/segment/ocr/penguin_ocr.py
+++ b/segment/ocr/penguin_ocr.py
@@ -0,0 +1,144 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : penguin_ocr.py
			
 
				+# @Time    : 2019/6/11 0011 下午 17:52
			
 
				+import base64
			
 
				+import hashlib
			
 
				+import random
			
 
				+import string
			
 
				+import time
			
 
				+from urllib.parse import urlencode
			
 
				+
			
 
				+import cv2
			
 
				+import requests
			
 
				+from requests.adapters import HTTPAdapter
			
 
				+from urllib3.util.retry import Retry
			
 
				+
			
 
				+from segment.image_operation.utils import resize_by_percent, write_single_img
			
 
				+
			
 
				+APP_KEY = 'R2iPkd5J2056YFRw'
			
 
				+APP_ID = '2117302084'
			
 
				+
			
 
				+
			
 
				+def opecv2base64(img):
			
 
				+    image = cv2.imencode('.jpg', img)[1]
			
 
				+    base64_data = str(base64.b64encode(image))[2:-1]
			
 
				+    return base64_data
			
 
				+
			
 
				+
			
 
				+def get_base64_size(base64_str):
			
 
				+    length = len(base64_str)
			
 
				+    size = float(length - 2 * length/8)  # byte
			
 
				+    return size
			
 
				+
			
 
				+
			
 
				+def opecv2base64_stand(raw_image, mem_size, default_size=1):  # 小于1M
			
 
				+    default_size = default_size * 1000 * 1000
			
 
				+    m_ratio = mem_size/default_size
			
 
				+    if m_ratio > 1.0:
			
 
				+        y, x = raw_image.shape[0], raw_image.shape[1]
			
 
				+        s_ratio = max(y, x) / 1200
			
 
				+        ratio = max(m_ratio, s_ratio)
			
 
				+        image_resize = resize_by_percent(raw_image, 1/ratio)
			
 
				+        # img_gray = cv2.cvtColor(raw_image, cv2.COLOR_RGB2GRAY)
			
 
				+        write_single_img(image_resize, r'C:\Users\Administrator\Desktop\p\01_r.jpg')
			
 
				+        return opecv2base64(image_resize)
			
 
				+    else:
			
 
				+        return opecv2base64(raw_image)
			
 
				+
			
 
				+
			
 
				+def _get_sign(params, app_key):
			
 
				+    sort_dict = sorted(params.items(), key=lambda item: item[0], reverse=False)
			
 
				+    sort_dict.append(('app_key', app_key))
			
 
				+    rawtext = urlencode(sort_dict).encode()
			
 
				+    sha = hashlib.md5()
			
 
				+    sha.update(rawtext)
			
 
				+    md5text = sha.hexdigest().upper()
			
 
				+
			
 
				+    return md5text
			
 
				+
			
 
				+
			
 
				+def please_retry(response, url, data, headers):
			
 
				+    status_code = response.status_code
			
 
				+    if status_code == 200:
			
 
				+        resp = response.json()
			
 
				+        if 'ok' != resp.get('msg'):
			
 
				+
			
 
				+            try_iter = 0
			
 
				+            while try_iter < 3:
			
 
				+                response = requests.post(url, data=data, headers=headers, timeout=15)
			
 
				+
			
 
				+            print(resp)
			
 
				+
			
 
				+
			
 
				+def get_ocr_english_text_raw_format(img, size):
			
 
				+    url = 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_generalocr'
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+    image_base64 = opecv2base64_stand(img, size)  # 得到 base64 编码的数据
			
 
				+    nonce_str = ''.join(random.sample(string.ascii_letters + string.digits, 10))
			
 
				+    data = {
			
 
				+        'app_id': APP_ID,
			
 
				+        'image': image_base64,
			
 
				+        'time_stamp': str(int(time.time())),
			
 
				+        'nonce_str': nonce_str,
			
 
				+    }
			
 
				+
			
 
				+    sign = _get_sign(data, APP_KEY)
			
 
				+    data['sign'] = sign
			
 
				+
			
 
				+    s = requests.Session()
			
 
				+    retries = Retry(total=3,
			
 
				+                    backoff_factor=0.1,
			
 
				+                    status_forcelist=[500, 502, 503, 504])
			
 
				+    s.mount('https://', HTTPAdapter(max_retries=retries))
			
 
				+
			
 
				+    # response = requests.post(url, data=data, headers=headers)
			
 
				+    response = s.post(url, data=data, headers=headers, timeout=15)
			
 
				+
			
 
				+    final_response = ''
			
 
				+    for _ in range(0, 3):
			
 
				+        status_code = response.status_code
			
 
				+        if status_code == 200 and 'ok' == response.json().get('msg'):
			
 
				+            final_response = response
			
 
				+            # print('ok')
			
 
				+            break
			
 
				+        else:
			
 
				+            response = s.post(url, data=data, headers=headers, timeout=15)
			
 
				+            # print('retry')
			
 
				+
			
 
				+    if final_response:
			
 
				+        status_code = final_response.status_code
			
 
				+        if status_code == 200:
			
 
				+            resp = response.json()
			
 
				+            if 'ok' != resp.get('msg'):
			
 
				+                # print(resp)
			
 
				+                raise Exception("ocr error {}: {}!".format(resp.get('ret'), resp.get('msg')))
			
 
				+            # print(resp)
			
 
				+        else:
			
 
				+            raise ValueError('ocr failed, response[{}]'.format(status_code))
			
 
				+    else:
			
 
				+        raise ValueError('ocr failed, retried three times while no response')
			
 
				+
			
 
				+    return resp
			
 
				+
			
 
				+
			
 
				+def ocr_format(resp):
			
 
				+    item_list = resp['data']['item_list']
			
 
				+    words_str_list = []
			
 
				+    for item_index, item in enumerate(item_list):
			
 
				+        words_list = item['words']
			
 
				+        words_str = ''
			
 
				+        for char_index, char_dict in enumerate(words_list):
			
 
				+            char = char_dict['character']
			
 
				+            if char == '':
			
 
				+                char = ' '
			
 
				+            words_str = words_str + char
			
 
				+        words_str_list.append(words_str.lstrip())
			
 
				+
			
 
				+    return words_str_list
			
 
				+
			
 
				+
			
 
				+def get_ocr_english_text(image, size):
			
 
				+    resp = get_ocr_english_text_raw_format(image, size)
			
 
				+    words_list = ocr_format(resp)
			
 
				+    return words_list
			
 
				+
			
--- a/segment/ocr/split_topic_en.py
+++ b/segment/ocr/split_topic_en.py
@@ -0,0 +1,36 @@
 
				+inf_words_dict = dict()
			
 
				+with open("./segment/ocr/type_config.txt", "r", encoding="utf-8") as f:
			
 
				+    for i, line in enumerate(f):
			
 
				+        if line.startswith("#"):
			
 
				+            continue
			
 
				+        line = line.strip().replace("：", ":").replace("，", ",")
			
 
				+        key, val = line.split(":")
			
 
				+        key = key.strip()
			
 
				+        val = val.split(",")
			
 
				+        val = tuple(v.strip() for v in val)
			
 
				+        inf_words_dict[val] = key
			
 
				+
			
 
				+# 答案冒号 = "答案:"
			
 
				+# 解析冒号 = "解析:"
			
 
				+
			
 
				+
			
 
				+def could_skip_line(line):
			
 
				+    '''对于答案和解析行，不进行type_inf'''
			
 
				+    return line.startswith("答案:") or line.startswith("解析:")
			
 
				+
			
 
				+
			
 
				+def contains_all(s, words):
			
 
				+    return all([w in s for w in words])
			
 
				+
			
 
				+
			
 
				+def topic_type_line(line):
			
 
				+    if could_skip_line(line):
			
 
				+        return False
			
 
				+    for key, val in inf_words_dict.items():
			
 
				+        if contains_all(line, key):
			
 
				+            return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/segment/ocr/type_config.txt
+++ b/segment/ocr/type_config.txt
@@ -0,0 +1,165 @@
 
				+考试听力：听力
			
 
				+考试听力：听,录音
			
 
				+考试听力：听,材料
			
 
				+考试听力：听,对话
			
 
				+考试听力：听,短文
			
 
				+考试听力: 听,独白
			
 
				+考试听力：听力理解
			
 
				+#考试听力：listening
			
 
				+考试听力：hear, recording
			
 
				+考试听力：hear, recordings
			
 
				+考试听力：hear, material
			
 
				+考试听力：hear, materials
			
 
				+考试听力：hear, conversation
			
 
				+考试听力：hear, conversations
			
 
				+考试听力：hear, passage
			
 
				+考试听力：hear, passages
			
 
				+考试听力：hear, monologue
			
 
				+考试听力：hear, monologues
			
 
				+考试听力：listening, comprehension
			
 
				+单项填空: 单项填空
			
 
				+单项填空: 单项选择
			
 
				+单项填空: 单选
			
 
				+完形填空: 完形填空
			
 
				+完形填空: 完型填空
			
 
				+#完形填空: cloze
			
 
				+完形填空: cloze test
			
 
				+阅读理解: 阅读理解
			
 
				+阅读理解: reading comprehension
			
 
				+七选五: 七选五
			
 
				+七选五: 七个选项
			
 
				+七选五: 两项,多余
			
 
				+七选五: 两项,多余选项
			
 
				+七选五: 两项,多于选项
			
 
				+七选五: 两个,多余
			
 
				+七选五: 两个,多余选项
			
 
				+七选五: 两个,多于选项
			
 
				+七选五: seven options
			
 
				+七选五: two items, surplus
			
 
				+语法填空: 语法填空
			
 
				+语法填空: 短文, 适当形式
			
 
				+语法填空：短文, 正确形式
			
 
				+语法填空：材料, 适当形式
			
 
				+语法填空：材料, 正确形式
			
 
				+语法填空：passage, proper form
			
 
				+语法填空：passage, correct form
			
 
				+语法填空：material, proper form
			
 
				+语法填空：material, correct form
			
 
				+语法填空：grammar and vocabulary
			
 
				+语法填空：vocabulary and grammar
			
 
				+选词填空: 选词填空
			
 
				+选词填空: proper word, box
			
 
				+任务型阅读: 任务型阅读
			
 
				+任务型阅读: 任务型读写
			
 
				+任务型阅读: task-based reading
			
 
				+任务型阅读: task-based writing
			
 
				+阅读表达: 阅读表达
			
 
				+阅读表达: 阅读, 表达
			
 
				+阅读表达: reading expression
			
 
				+短文改错: 短文改错
			
 
				+短文改错：单句改错
			
 
				+#短文改错: 改,短文,错误
			
 
				+#短文改错: 改,句子,错误
			
 
				+#短文改错: 多余的词, 划掉
			
 
				+#短文改错：错误, 修改
			
 
				+#短文改错：改正, 错误
			
 
				+#短文改错: text, correction
			
 
				+#短文改错: essay, correction
			
 
				+#短文改错：sentence, correction
			
 
				+#短文改错：sentences, correction
			
 
				+#短文改错: change, error
			
 
				+#短文改错: change, errors
			
 
				+#短文改错: change, mistake
			
 
				+#短文改错: change, mistakes
			
 
				+#短文改错：correct, mistake
			
 
				+#短文改错：correct, mistakes
			
 
				+#短文改错：correct, error
			
 
				+#短文改错：correct, errors
			
 
				+短文改错：modify, mistake
			
 
				+短文改错：modify, mistakes
			
 
				+短文改错：modify, errors
			
 
				+短文改错：modify, error
			
 
				+单词拼写: 单词拼写
			
 
				+#单词拼写: 首字母
			
 
				+单词拼写: 首字母, 汉语
			
 
				+单词拼写: 首字母, 内容
			
 
				+#单词拼写: 单词, 中文
			
 
				+单词拼写: word spelling
			
 
				+单词拼写: initial letter
			
 
				+单词拼写: initial letter, chinese
			
 
				+单词拼写: initial letter, english
			
 
				+单词拼写: initial letter, content
			
 
				+课文填空: 课文填空
			
 
				+课文填空: 课文, 填空
			
 
				+课文填空: 课文, 填, 内容
			
 
				+课文填空: 课文, 内容, 完成, 句子
			
 
				+#课文填空: recitation
			
 
				+课文填空: complete, sentence
			
 
				+课文填空: complete, sentences
			
 
				+#句子翻译：翻译
			
 
				+句子翻译: 句子,翻译
			
 
				+句子翻译: 短文,翻译
			
 
				+句子翻译：汉, 译, 英
			
 
				+句子翻译：英, 译, 汉
			
 
				+#句子翻译：translation
			
 
				+句子翻译：sentence translation
			
 
				+句子翻译：sentences translation
			
 
				+句子翻译：English, Chinese, translation
			
 
				+句子翻译：Chinese, English, translation
			
 
				+句子翻译：English to Chinese
			
 
				+句子翻译：Chinese to English
			
 
				+句子翻译：Chinese, translate, English
			
 
				+句子翻译：English, translate, Chinese
			
 
				+书面表达: 书面表达
			
 
				+书面表达: 应用文
			
 
				+书面表达: 情景作文
			
 
				+#书面表达: 写一篇短文
			
 
				+#书面表达：写一封信
			
 
				+#书面表达：回信
			
 
				+#书面表达：申请信
			
 
				+书面表达: 写作
			
 
				+书面表达: 作文
			
 
				+#书面表达: 开头, 总词数
			
 
				+#书面表达: 100, 字
			
 
				+#书面表达：120, 字
			
 
				+#书面表达：150, 字
			
 
				+#书面表达：180, 字
			
 
				+#书面表达：200, 字
			
 
				+#书面表达: 100, 词
			
 
				+#书面表达：120, 词
			
 
				+#书面表达：150, 词
			
 
				+#书面表达：180, 词
			
 
				+#书面表达：200, 词
			
 
				+#书面表达: 开头, 写好
			
 
				+#书面表达: 开头, 写出
			
 
				+#书面表达：词, 左右
			
 
				+#书面表达：文章, 通顺
			
 
				+#书面表达：文章, 连贯
			
 
				+书面表达：writing
			
 
				+书面表达：guided writing
			
 
				+#书面表达：summary writing
			
 
				+#书面表达：practical writing
			
 
				+#书面表达：composition
			
 
				+#书面表达：situational composition
			
 
				+#书面表达：write, essay
			
 
				+#书面表达：write, letter
			
 
				+#书面表达：application letter
			
 
				+#书面表达：100 words
			
 
				+#书面表达：120 words
			
 
				+#书面表达：150 words
			
 
				+#书面表达：180 words
			
 
				+#书面表达：200 words
			
 
				+完成句子: 完成句子
			
 
				+完成句子: 完成, 句子
			
 
				+完成句子: 完整句子
			
 
				+完成句子: 完成, 各句
			
 
				+完成句子: finish, sentence
			
 
				+完成句子: finish, sentences
			
 
				+完成句子: complete, sentence
			
 
				+完成句子: complete, sentences
			
 
				+信息匹配：信息匹配
			
 
				+句型转换：句型转换
			
 
				+句型转换：转换句型
			
 
				+单词辨音: 单词辨音
			
 
				+单词辨音：单词，音标
			
 
				+单词辨音：单词，读音
			
--- a/segment/server.py
+++ b/segment/server.py
@@ -0,0 +1,799 @@
 
				+import base64
			
 
				+import glob
			
 
				+import os
			
 
				+import time
			
 
				+import uuid
			
 
				+import shutil
			
 
				+import xml.etree.cElementTree as ET
			
 
				+from urllib import parse, request
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import pypinyin
			
 
				+import requests
			
 
				+from PIL import Image
			
 
				+from django.conf import settings
			
 
				+from pdf2image import convert_from_path
			
 
				+
			
 
				+import segment.logging_config as logging
			
 
				+from segment.image_operation.exam_segment import get_page_text
			
 
				+from segment.image_operation.pre_segment import segment2parts
			
 
				+from segment.image_operation.segment import joint_image
			
 
				+from segment.image_operation.split_lines import line_split
			
 
				+from segment.image_operation.utils import create_xml, resize_by_percent
			
 
				+from segment.image_operation.utils import write_single_img
			
 
				+from segment.models import OcrToken
			
 
				+from segment.ocr.group_pictures import group_pictures
			
 
				+from segment.ocr.group_text import group_text
			
 
				+from segment.ocr.penguin_ocr import get_ocr_english_text
			
 
				+
			
 
				+logger = logging.getLogger(settings.LOGGING_TYPE)
			
 
				+
			
 
				+
			
 
				+def convert_pil_to_jpeg(raw_img):
			
 
				+    if raw_img.mode == 'L':
			
 
				+        channels = raw_img.split()
			
 
				+        img = Image.merge("RGB", (channels[0], channels[0], channels[0]))
			
 
				+    elif raw_img.mode == 'RGB':
			
 
				+        img = raw_img
			
 
				+    elif raw_img.mode == 'RGBA':
			
 
				+        img = Image.new("RGB", raw_img.size, (255, 255, 255))
			
 
				+        img.paste(raw_img, mask=raw_img.split()[3])  # 3 is the alpha channel
			
 
				+    else:
			
 
				+        img = raw_img
			
 
				+    open_cv_image = np.array(img)
			
 
				+    return img, open_cv_image
			
 
				+
			
 
				+
			
 
				+def opencv2base64(img):
			
 
				+    image = cv2.imencode('.jpg', img)[1]
			
 
				+    base64_data = str(base64.b64encode(image))[2:-1]
			
 
				+    return base64_data
			
 
				+
			
 
				+
			
 
				+def get_dir_next_index_name(path, file_type):
			
 
				+    files_list = os.listdir(path)
			
 
				+    imgs_list = [file.replace(file_type, '') for file in files_list if file.endswith(file_type)]
			
 
				+
			
 
				+    length = len(imgs_list)
			
 
				+    if length == 0:
			
 
				+        return 1
			
 
				+    else:
			
 
				+        index_name = max(imgs_list)
			
 
				+        return int(index_name) + 1
			
 
				+
			
 
				+
			
 
				+def save_raw_image(subject, datetime, img_file, analysis_type):
			
 
				+    # 随机生成新的图片名，自定义路径。
			
 
				+    ext = img_file.name.split('.')[-1]
			
 
				+    raw_name = img_file.name[0:-len(ext) - 1]
			
 
				+    file_name = '{}_{}.{}'.format(raw_name, uuid.uuid4().hex[:10], 'jpg')
			
 
				+
			
 
				+    raw_img = Image.open(img_file)  # 读取上传的网络图像
			
 
				+    save_dir = os.path.join(settings.MEDIA_ROOT, analysis_type, subject, datetime)
			
 
				+    if not os.path.exists(save_dir):
			
 
				+        os.makedirs(save_dir)
			
 
				+    save_path = os.path.join(save_dir, file_name)
			
 
				+
			
 
				+    channels = raw_img.split()
			
 
				+    if len(channels) >= 3:
			
 
				+        img = Image.merge("RGB", (channels[0], channels[1], channels[2]))
			
 
				+        open_cv_image = np.array(img)
			
 
				+        img_reload = open_cv_image[:, :, ::-1].copy()
			
 
				+        parts_list = segment2parts(img_reload, save_path)
			
 
				+    else:
			
 
				+        img = raw_img
			
 
				+        open_cv_image = np.array(img)
			
 
				+        parts_list = segment2parts(open_cv_image, save_path)
			
 
				+
			
 
				+    # for part in parts_list:
			
 
				+    #     with open(part['img_part'], 'rb') as f:
			
 
				+    #         bin_img = f.read()
			
 
				+    #         part['img_part'] = bin_img
			
 
				+
			
 
				+    try:
			
 
				+        img.save(save_path)
			
 
				+    except Exception as e:
			
 
				+        raise e
			
 
				+
			
 
				+    url_path = os.path.join(settings.MEDIA_URL, analysis_type, subject, datetime, file_name).replace('\\', '/')
			
 
				+    return save_path, parts_list, url_path
			
 
				+
			
 
				+
			
 
				+def save_raw_image_without_segment(subject, datetime, img_file, analysis_type):
			
 
				+    # 随机生成新的图片名，自定义路径。
			
 
				+    ext = img_file.name.split('.')[-1]
			
 
				+    raw_name = img_file.name[0:-len(ext) - 1]
			
 
				+    file_name = '{}_{}.{}'.format(raw_name, uuid.uuid4().hex[:10], 'jpg')
			
 
				+
			
 
				+    raw_img = Image.open(img_file)  # 读取上传的网络图像
			
 
				+    save_dir = os.path.join(settings.MEDIA_ROOT, analysis_type, subject, datetime)
			
 
				+    if not os.path.exists(save_dir):
			
 
				+        os.makedirs(save_dir)
			
 
				+    save_path = os.path.join(save_dir, file_name)
			
 
				+
			
 
				+    pil_img, open_cv_image = convert_pil_to_jpeg(raw_img)
			
 
				+    try:
			
 
				+        pil_img.save(save_path)
			
 
				+        shutil.copy(save_path, save_path.replace('.jpg', '_small.jpg'))
			
 
				+    except Exception as e:
			
 
				+        raise e
			
 
				+
			
 
				+    url_path = os.path.join(settings.MEDIA_URL, analysis_type, subject, datetime, file_name).replace('\\', '/')
			
 
				+    return save_path, open_cv_image, url_path
			
 
				+
			
 
				+
			
 
				+def save_raw_image_without_segment_formula(subject, datetime, img_file, analysis_type):
			
 
				+    # 随机生成新的图片名，自定义路径。
			
 
				+    ext = img_file.name.split('.')[-1]
			
 
				+    raw_name = img_file.name[0:-len(ext) - 1]
			
 
				+    file_name = '{}_{}.{}'.format(raw_name, uuid.uuid4().hex[:10], ext)
			
 
				+
			
 
				+    raw_img = Image.open(img_file)  # 读取上传的网络图像
			
 
				+    save_dir = os.path.join(settings.MEDIA_ROOT, analysis_type, subject, datetime)
			
 
				+
			
 
				+    if not os.path.exists(save_dir):
			
 
				+        os.makedirs(save_dir)
			
 
				+    save_path = os.path.join(save_dir, file_name)
			
 
				+
			
 
				+    channels = raw_img.split()
			
 
				+    # if ext == 'png' and len(channels) >= 3:  # 公式ocr分割透明png
			
 
				+    #     img = Image.merge("RGB", (channels[0], channels[1], channels[2]))
			
 
				+    #     open_cv_image = np.array(img)
			
 
				+    #     resize_img = resize_by_percent(open_cv_image, 0.5)
			
 
				+    #
			
 
				+    # else:
			
 
				+    #     img = raw_img
			
 
				+    #     open_cv_image = np.array(img)
			
 
				+    #     resize_img = resize_by_percent(open_cv_image, 0.5)
			
 
				+
			
 
				+    try:
			
 
				+        raw_img.save(save_path)
			
 
				+        # write_single_img(resize_img, save_path.replace('.jpg', '_small.jpg'))
			
 
				+    except Exception as e:
			
 
				+        raise e
			
 
				+
			
 
				+    url_path = os.path.join(settings.MEDIA_URL, analysis_type, subject, datetime, file_name).replace('\\', '/')
			
 
				+    return save_path, url_path, raw_img
			
 
				+
			
 
				+
			
 
				+def save_raw_image_in_jpeg(subject, datetime, img_file, analysis_type):
			
 
				+    # 随机生成新的图片名，自定义路径。
			
 
				+    ext = img_file.name.split('.')[-1]
			
 
				+    raw_name = img_file.name[0:-len(ext) - 1]
			
 
				+    file_name = '{}_{}.{}'.format(raw_name, uuid.uuid4().hex[:10], 'jpg')
			
 
				+
			
 
				+    raw_img = Image.open(img_file)  # 读取上传的网络图像
			
 
				+    save_dir = os.path.join(settings.MEDIA_ROOT, analysis_type, subject, datetime)
			
 
				+
			
 
				+    if not os.path.exists(save_dir):
			
 
				+        os.makedirs(save_dir)
			
 
				+    save_path = os.path.join(save_dir, file_name)
			
 
				+
			
 
				+    if raw_img.mode == 'L':
			
 
				+        channels = raw_img.split()
			
 
				+        img = Image.merge("RGB", (channels[0], channels[0], channels[0]))
			
 
				+    elif raw_img.mode == 'RGB':
			
 
				+        img = raw_img
			
 
				+    elif raw_img.mode == 'RGBA':
			
 
				+        img = Image.new("RGB", raw_img.size, (255, 255, 255))
			
 
				+        img.paste(raw_img, mask=raw_img.split()[3])  # 3 is the alpha channel
			
 
				+    else:
			
 
				+        img = raw_img
			
 
				+    open_cv_image = np.array(img)
			
 
				+    # resize_img = resize_by_percent(open_cv_image, 0.5)
			
 
				+
			
 
				+    try:
			
 
				+        img.save(save_path)
			
 
				+        # write_single_img(resize_img, save_path.replace('.jpg', '_small.jpg'))
			
 
				+    except Exception as e:
			
 
				+        raise e
			
 
				+
			
 
				+    url_path = os.path.join(settings.MEDIA_URL, analysis_type, subject, datetime, file_name).replace('\\', '/')
			
 
				+    return save_path, url_path, open_cv_image
			
 
				+
			
 
				+
			
 
				+def ocr_login():
			
 
				+    def login():
			
 
				+        grant_type = 'client_credentials'
			
 
				+        client_id = settings.OCR_CLIENT_ID
			
 
				+        client_secret = settings.OCR_CLIENT_SECRET
			
 
				+
			
 
				+        textmod = {'grant_type': grant_type, 'client_id': client_id, 'client_secret': client_secret}
			
 
				+        textmod = parse.urlencode(textmod)
			
 
				+
			
 
				+        # 输出内容:user=admin&password=admin
			
 
				+        header_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'}
			
 
				+        url = 'https://aip.baidubce.com/oauth/2.0/token'
			
 
				+        req = request.Request(url='{}{}{}'.format(url, '?', textmod), headers=header_dict)
			
 
				+        res = request.urlopen(req).read()
			
 
				+        token = eval(res.decode(encoding='utf-8'))['access_token']
			
 
				+        lastest_access_token = OcrToken(access_token=token)
			
 
				+        lastest_access_token.save()
			
 
				+        return token
			
 
				+
			
 
				+    objects = OcrToken.objects.latest('update_time')
			
 
				+    lastest_access_token_db = objects.access_token
			
 
				+    lastest_date = objects.update_time
			
 
				+    ans_time = time.mktime(lastest_date.timetuple())
			
 
				+    update_date = settings.OCR_TOKEN_UPDATE_DATE
			
 
				+
			
 
				+    current_time = time.time()
			
 
				+
			
 
				+    if (ans_time + update_date * 24 * 60 * 60) > current_time:
			
 
				+        return lastest_access_token_db
			
 
				+    else:
			
 
				+        return login()
			
 
				+
			
 
				+
			
 
				+def get_exam_bbox_by_tesseract(img_raw_name, img_path, subject):
			
 
				+    error_info = ''
			
 
				+    status = 1
			
 
				+    text = []
			
 
				+
			
 
				+    lines_save_dir = img_path.replace('.jpg', '_lines')
			
 
				+
			
 
				+    img_path = os.path.abspath(img_path)
			
 
				+    lines_save_dir = os.path.abspath(lines_save_dir)
			
 
				+    if not os.path.exists(lines_save_dir):
			
 
				+        os.makedirs(lines_save_dir)
			
 
				+    start_time = time.time()
			
 
				+    try:
			
 
				+        bbox, lines_abs_path_list = line_split(img_path, lines_save_dir, settings.TOLERANCE_PIX_NUMBER)  # 分行
			
 
				+    except Exception as e:
			
 
				+        logger.error('line_split failed: {}'.format(e), exc_info=True)
			
 
				+        status = 0
			
 
				+        error_info = str(e)
			
 
				+
			
 
				+        info = {'is_success': status, 'img_name': img_raw_name, 'coordinate': text, 'error': error_info}
			
 
				+        return info
			
 
				+
			
 
				+    time1 = time.time()
			
 
				+    logger.info('lines_segment, cost: {}'.format(time1 - start_time))
			
 
				+    exam_group = []
			
 
				+
			
 
				+    try:
			
 
				+        _, exam_group = group_pictures(lines_abs_path_list, subject)
			
 
				+        logger.info('exam_group info : {}'.format(exam_group))
			
 
				+    except (SystemExit, KeyboardInterrupt):
			
 
				+        raise
			
 
				+    except Exception as e:
			
 
				+        logger.error('ocr failed: {}'.format(e), exc_info=True)
			
 
				+        status = 0
			
 
				+        error_info = error_info + str(e)
			
 
				+
			
 
				+    time2 = time.time()
			
 
				+    logger.info('exam_grouped, cost: {}'.format(time2 - time1))
			
 
				+
			
 
				+    try:
			
 
				+        text = joint_image(img_path, bbox, exam_group)
			
 
				+    except (SystemExit, KeyboardInterrupt):
			
 
				+        raise
			
 
				+    except Exception as e:
			
 
				+        logger.error('generate coordinate info failed: {}'.format(e), exc_info=True)
			
 
				+        status = 0
			
 
				+        error_info = error_info + str(e)
			
 
				+
			
 
				+    info = {'img_name': img_raw_name, 'coordinate': text}
			
 
				+    if error_info:
			
 
				+        info = {'img_name': img_raw_name, 'coordinate': text, 'error': error_info}
			
 
				+
			
 
				+    logger.info('{} done'.format(img_raw_name))
			
 
				+    return status, info
			
 
				+
			
 
				+
			
 
				+def get_ocr_text(access_token, img, subject=None):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+
			
 
				+    url = '{}{}{}{}{}'.format(settings.OCR_URL, settings.OCR_ACCURACY, '_basic', '?', textmod)
			
 
				+    url_general = '{}{}{}{}{}'.format(settings.OCR_URL, 'general', '_basic', '?', textmod)
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    # image = opecv2base64(img)  # 得到 byte 编码的数据
			
 
				+    image = img
			
 
				+
			
 
				+    data = {
			
 
				+        'image': image,
			
 
				+        'detect_direction': 'true',
			
 
				+        'language_type': 'CHN_ENG',
			
 
				+    }
			
 
				+
			
 
				+    if subject == 'english':
			
 
				+        resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    else:
			
 
				+        resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    words_result = resp.get('words_result')
			
 
				+    text_list = [word.get('words') for word in words_result]
			
 
				+    # words_list = {'word': text_list, 'subject': subject}
			
 
				+    return text_list
			
 
				+
			
 
				+
			
 
				+def get_ocr_text_and_coordinate_in_raw_format(access_token, img):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}{}'.format(settings.OCR_BOX_URL, settings.OCR_ACCURACY, '?', textmod)
			
 
				+    url_general = '{}{}{}{}'.format(settings.OCR_BOX_URL, 'general', '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image_type = 'base64'
			
 
				+    group_id = 'group001'
			
 
				+    user_id = 'usr001'
			
 
				+
			
 
				+    # image = base64.b64encode(img)  # 得到 byte 编码的数据
			
 
				+    image = img
			
 
				+
			
 
				+    data = {
			
 
				+        'image_type': image_type,
			
 
				+        'group_id': group_id,
			
 
				+        'user_id': user_id,
			
 
				+        'image': image,
			
 
				+        'detect_direction': 'true',
			
 
				+        'recognize_granularity': 'small',
			
 
				+        # 'vertexes_location': 'true',
			
 
				+        # 'probability': 'true'
			
 
				+    }
			
 
				+
			
 
				+    resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    return resp
			
 
				+
			
 
				+
			
 
				+def get_ocr_text_and_coordinate(access_token, img):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}{}'.format(settings.OCR_BOX_URL, settings.OCR_ACCURACY, '?', textmod)
			
 
				+    url_general = '{}{}{}{}'.format(settings.OCR_BOX_URL, 'general', '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image_type = 'base64'
			
 
				+    group_id = 'group001'
			
 
				+    user_id = 'usr001'
			
 
				+
			
 
				+    # image = base64.b64encode(img)  # 得到 byte 编码的数据
			
 
				+    image = img
			
 
				+
			
 
				+    data = {
			
 
				+        'image_type': image_type,
			
 
				+        'group_id': group_id,
			
 
				+        'user_id': user_id,
			
 
				+        'image': image,
			
 
				+        'detect_direction': 'true',
			
 
				+        # 'recognize_granularity': 'small',
			
 
				+        # 'vertexes_location': 'true',
			
 
				+        # 'probability': 'true'
			
 
				+    }
			
 
				+
			
 
				+    resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    words_result = resp.get('words_result')
			
 
				+    text_list = [word.get('words') for word in words_result]
			
 
				+    # words_list = {'word': text_list, 'subject': subject}
			
 
				+    matrix_lt, matrix_rb = resolve_json(words_result)
			
 
				+    return text_list, matrix_lt, matrix_rb
			
 
				+
			
 
				+
			
 
				+def get_ocr_text_and_coordinate_formula(img, access_token, base64=False):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}{}'.format(settings.OCR_BOX_URL, settings.OCR_ACCURACY, '?', textmod)
			
 
				+    url_general = '{}{}{}{}'.format(settings.OCR_BOX_URL, 'general', '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image_type = 'base64'
			
 
				+    group_id = 'group001'
			
 
				+    user_id = 'usr001'
			
 
				+
			
 
				+    if base64:
			
 
				+        image = img
			
 
				+    else:
			
 
				+        image = opencv2base64(img)
			
 
				+
			
 
				+    data = {
			
 
				+        'image_type': image_type,
			
 
				+        'group_id': group_id,
			
 
				+        'user_id': user_id,
			
 
				+        'image': image,
			
 
				+        'detect_direction': 'true',
			
 
				+        'recognize_granularity': 'small',
			
 
				+        'language_type': 'CHN_ENG',
			
 
				+        # 'vertexes_location': 'true',
			
 
				+        # 'probability': 'true'
			
 
				+    }
			
 
				+
			
 
				+    resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    words_result = resp.get('words_result')
			
 
				+    return words_result
			
 
				+
			
 
				+
			
 
				+def resolve_json(words_result):
			
 
				+    box_list = [item[key] for item in words_result for key in item if key == 'location']
			
 
				+    matrix = np.array([0, 0, 0, 0])
			
 
				+    for box in box_list:
			
 
				+        # num_list = list(box.values())
			
 
				+        w = box.get('width')
			
 
				+        l = box.get('left')
			
 
				+        t = box.get('top')
			
 
				+        h = box.get('height')
			
 
				+        num_list = [w, t, l, h]
			
 
				+        matrix = np.vstack([matrix, np.array(num_list)])
			
 
				+    matrix = matrix[1:]
			
 
				+    matrix_w = matrix[:, 0:1]
			
 
				+    matrix_t = matrix[:, 1:2]
			
 
				+    matrix_l = matrix[:, 2:3]
			
 
				+    matrix_h = matrix[:, 3:]
			
 
				+
			
 
				+    matrix_lt = np.hstack([matrix_l, matrix_t])
			
 
				+    matrix_wh = np.hstack([matrix_w, matrix_h])
			
 
				+    matrix_rb = matrix_lt + matrix_wh
			
 
				+    return matrix_lt, matrix_rb
			
 
				+
			
 
				+
			
 
				+def group_to_coordinate(group_list, matrix_lt, matrix_rb):
			
 
				+    matrix_box_vlist = np.array([0, 0, 0, 0])
			
 
				+    for element in group_list:
			
 
				+        if element[0] < element[1]:
			
 
				+            rb = matrix_rb[element[0]:element[1]].max(axis=0)
			
 
				+            lt = matrix_lt[element[0]:element[1]].min(axis=0)
			
 
				+            matrix_box = np.hstack([lt, rb])
			
 
				+            matrix_box_vlist = np.vstack([matrix_box_vlist, matrix_box])
			
 
				+    matrix_box_vlist = matrix_box_vlist[1:]
			
 
				+    return matrix_box_vlist.tolist()
			
 
				+
			
 
				+
			
 
				+def get_exam_box(img_raw_name, img_list, save_path, subject, access_token):
			
 
				+    status = 1
			
 
				+    error_info = ''
			
 
				+    box_list = []
			
 
				+    words_list_all = []
			
 
				+    group_list_all = []
			
 
				+    try:
			
 
				+        for img_part in img_list:
			
 
				+            x_bias = img_part['x_bias']
			
 
				+            y_bias = img_part['y_bias']
			
 
				+            img = img_part['img_part']
			
 
				+
			
 
				+            words_list, matrix_lt, matrix_rb = get_ocr_text_and_coordinate(access_token, img)
			
 
				+
			
 
				+            matrix_lt = matrix_lt + np.asarray([x_bias, y_bias])
			
 
				+            matrix_rb = matrix_rb + np.asarray([x_bias, y_bias])
			
 
				+
			
 
				+            group_list = group_text(words_list, subject)
			
 
				+            part_box_list = group_to_coordinate(group_list, matrix_lt, matrix_rb)
			
 
				+            box_list = box_list + part_box_list
			
 
				+
			
 
				+            words_list.append('********************************')
			
 
				+            words_list_all = words_list_all + words_list
			
 
				+            group_list_all.append(group_list)
			
 
				+        try:
			
 
				+            txt_backup_path = save_path.replace('.jpg', '.txt')
			
 
				+            words_list = [line + ',\n' for line in words_list_all]
			
 
				+            with open(txt_backup_path, 'w', encoding='utf-8') as writer:
			
 
				+                writer.writelines('subject:' + subject + '\n')
			
 
				+                writer.writelines('[\n')
			
 
				+                writer.writelines(words_list)
			
 
				+                writer.writelines(']\n')
			
 
				+                writer.writelines(str(group_list_all))
			
 
				+            logger.info('{}试卷: {} 文本信息保存成功'.format(subject, img_raw_name))
			
 
				+        except Exception as e:
			
 
				+            logger.error('{}试卷: {} 文本信息保存失败: {}'.format(subject, img_raw_name, e), exc_info=True)
			
 
				+
			
 
				+        # 记录xml坐标信息
			
 
				+        tree = ET.parse(r'./segment/exam_info/000000-template.xml')  # xml tree
			
 
				+        for index_num, exam_bbox in enumerate(box_list):
			
 
				+            tree = create_xml('{:02d}'.format(index_num), tree,
			
 
				+                              exam_bbox[0], exam_bbox[1], exam_bbox[2], exam_bbox[3])
			
 
				+        # print(exam_items_bbox)
			
 
				+        tree.write(save_path.replace('.jpg', '.xml'))
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error('{}试卷: {} 坐标生成失败: {}'.format(subject, img_raw_name, e), exc_info=True)
			
 
				+        status = 0
			
 
				+        error_info = error_info + str(e)
			
 
				+
			
 
				+    info = {'img_name': img_raw_name, 'coordinate': box_list}
			
 
				+    if error_info:
			
 
				+        info = {'img_name': img_raw_name, 'coordinate': box_list, 'error': error_info}
			
 
				+    logger.info('{} done'.format(img_raw_name))
			
 
				+    return status, info
			
 
				+
			
 
				+
			
 
				+def get_exam_ocr(img_raw_name, img_list, save_path, subject, access_token):
			
 
				+    status = 1
			
 
				+    error_info = ''
			
 
				+    words_list = []
			
 
				+
			
 
				+    for img_part in img_list:
			
 
				+        img = img_part['img_part']
			
 
				+        try:
			
 
				+            part_words_list = get_ocr_text(access_token, img, subject)
			
 
				+        except Exception as e:
			
 
				+            part_words_list = []
			
 
				+            error_info = error_info + str(e)
			
 
				+        words_list = words_list + part_words_list
			
 
				+
			
 
				+    if len(words_list) < 1:
			
 
				+        logger.error('{}试卷: {} OCR解析失败: {}'.format(subject, img_raw_name, error_info), exc_info=True)
			
 
				+        status = 0
			
 
				+
			
 
				+    else:
			
 
				+        try:
			
 
				+            txt_backup_path = save_path.replace('.jpg', '.txt')
			
 
				+            words_list = [line + '\n' for line in words_list]
			
 
				+            # # words_list.append(group_list)
			
 
				+            with open(txt_backup_path, 'w', encoding='utf-8') as writer:
			
 
				+                writer.writelines('subject:' + subject + '\n')
			
 
				+                writer.writelines('[\n')
			
 
				+                writer.writelines(words_list)
			
 
				+                writer.writelines(']\n')
			
 
				+
			
 
				+            logger.info('{}试卷: {} 文本信息保存成功'.format(subject, img_raw_name))
			
 
				+        except Exception as e:
			
 
				+            logger.error('{}试卷: {} 文本信息保存失败: {}'.format(subject, img_raw_name, e), exc_info=True)
			
 
				+
			
 
				+    info = {'img_name': img_raw_name, 'text': words_list}
			
 
				+    if error_info:
			
 
				+        info = {'img_name': img_raw_name, 'text': words_list, 'error': error_info}
			
 
				+    logger.info('{} done'.format(img_raw_name))
			
 
				+    return status, info
			
 
				+
			
 
				+
			
 
				+def get_exam_ocr_single(img_raw_name, img, save_path, subject, access_token):
			
 
				+    status = 1
			
 
				+    error_info = ''
			
 
				+    words_list = []
			
 
				+
			
 
				+    try:
			
 
				+        part_words_list = get_ocr_text(access_token, img)
			
 
				+    except Exception as e:
			
 
				+        part_words_list = []
			
 
				+        error_info = error_info + str(e)
			
 
				+    words_list = words_list + part_words_list
			
 
				+
			
 
				+    if len(words_list) < 1:
			
 
				+        logger.error('{}试卷: {} OCR解析失败: {}'.format(subject, img_raw_name, error_info), exc_info=True)
			
 
				+        status = 0
			
 
				+
			
 
				+    else:
			
 
				+        try:
			
 
				+            txt_backup_path = save_path.replace('.jpg', '.txt')
			
 
				+            words_list = [line + ',\n' for line in words_list]
			
 
				+            # # words_list.append(group_list)
			
 
				+            with open(txt_backup_path, 'w', encoding='utf-8') as writer:
			
 
				+                writer.writelines('subject:' + subject + '\n')
			
 
				+                writer.writelines('[\n')
			
 
				+                writer.writelines(words_list)
			
 
				+                writer.writelines(']\n')
			
 
				+
			
 
				+            logger.info('{}试卷: {} 文本信息保存成功'.format(subject, img_raw_name))
			
 
				+        except Exception as e:
			
 
				+            logger.error('{}试卷: {} 文本信息保存失败: {}'.format(subject, img_raw_name, e), exc_info=True)
			
 
				+
			
 
				+    info = {'img_name': img_raw_name, 'text': words_list}
			
 
				+    if error_info:
			
 
				+        info = {'img_name': img_raw_name, 'text': words_list, 'error': error_info}
			
 
				+    logger.info('{} done'.format(img_raw_name))
			
 
				+    return status, info
			
 
				+
			
 
				+
			
 
				+def get_segment_by_ocr_once(opencv_img, token, subject, save_path, img_raw_name):
			
 
				+    img = opencv2base64(opencv_img)
			
 
				+    resp = get_ocr_text_and_coordinate_in_raw_format(token, img)
			
 
				+    if len(opencv_img.shape) == 3:
			
 
				+        opencv_img = cv2.cvtColor(opencv_img, cv2.COLOR_BGR2GRAY)
			
 
				+    test_list = get_page_text(resp['words_result'], opencv_img)
			
 
				+
			
 
				+    status = 1
			
 
				+    error_info = ''
			
 
				+    box_list = []
			
 
				+    words_list_all = []
			
 
				+    group_list_all = []
			
 
				+    try:
			
 
				+        for one_page_text in test_list:
			
 
				+            words_list = [word.get('words') for word in one_page_text]
			
 
				+            matrix_lt, matrix_rb = resolve_json(one_page_text)
			
 
				+
			
 
				+            group_list = group_text(words_list, subject)
			
 
				+            part_box_list = group_to_coordinate(group_list, matrix_lt, matrix_rb)
			
 
				+            box_list = box_list + part_box_list
			
 
				+
			
 
				+            words_list.append('********************************')
			
 
				+            words_list_all = words_list_all + words_list
			
 
				+            group_list_all.append(group_list)
			
 
				+        try:
			
 
				+            txt_backup_path = save_path.replace('.jpg', '.txt')
			
 
				+            words_list = [line + '\n' for line in words_list_all]
			
 
				+            with open(txt_backup_path, 'w', encoding='utf-8') as writer:
			
 
				+                writer.writelines('subject:' + subject + '\n')
			
 
				+                writer.writelines('[\n')
			
 
				+                writer.writelines(words_list)
			
 
				+                writer.writelines(']\n')
			
 
				+                writer.writelines(str(group_list_all))
			
 
				+            logger.info('{}试卷: {} 文本信息保存成功'.format(subject, img_raw_name))
			
 
				+        except Exception as e:
			
 
				+            logger.error('{}试卷: {} 文本信息保存失败: {}'.format(subject, img_raw_name, e), exc_info=True)
			
 
				+
			
 
				+        # 记录xml坐标信息
			
 
				+        tree = ET.parse(r'./segment/exam_info/000000-template.xml')  # xml tree
			
 
				+        for index_num, exam_bbox in enumerate(box_list):
			
 
				+            tree = create_xml('{:02d}'.format(index_num), tree,
			
 
				+                              exam_bbox[0], exam_bbox[1], exam_bbox[2], exam_bbox[3])
			
 
				+        # print(exam_items_bbox)
			
 
				+        tree.write(save_path.replace('.jpg', '.xml'))
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error('{}试卷: {} 坐标生成失败: {}'.format(subject, img_raw_name, e), exc_info=True)
			
 
				+        status = 0
			
 
				+        error_info = error_info + str(e)
			
 
				+
			
 
				+    info = {'img_name': img_raw_name, 'coordinate': box_list}
			
 
				+    if error_info:
			
 
				+        info = {'img_name': img_raw_name, 'coordinate': box_list, 'error': error_info}
			
 
				+    logger.info('{} done'.format(img_raw_name))
			
 
				+    return status, info
			
 
				+
			
 
				+
			
 
				+# opencv_img, token, subject, save_path, img_raw_name
			
 
				+def get_exam_ocr_once(opencv_img, token, subject, save_path, img_raw_name):
			
 
				+    img = opencv2base64(opencv_img)
			
 
				+    resp = get_ocr_text_and_coordinate_in_raw_format(token, img)
			
 
				+    if len(opencv_img.shape) == 3:
			
 
				+        opencv_img = cv2.cvtColor(opencv_img, cv2.COLOR_BGR2GRAY)
			
 
				+    test_list = get_page_text(resp['words_result'], opencv_img)
			
 
				+
			
 
				+    words_list = []
			
 
				+    for one_page_raw_text in test_list:
			
 
				+        one_page_words_list = [word.get('words') for word in one_page_raw_text]
			
 
				+        words_list = words_list + one_page_words_list
			
 
				+
			
 
				+    status = 1
			
 
				+    error_info = ''
			
 
				+
			
 
				+    if len(words_list) < 1:
			
 
				+        logger.error('{}试卷: {} OCR解析失败: {}'.format(subject, img_raw_name, error_info), exc_info=True)
			
 
				+        status = 0
			
 
				+
			
 
				+    else:
			
 
				+        try:
			
 
				+            txt_backup_path = save_path.replace('.jpg', '.txt')
			
 
				+            words_list = [line + '\n' for line in words_list]
			
 
				+            # # words_list.append(group_list)
			
 
				+            with open(txt_backup_path, 'w', encoding='utf-8') as writer:
			
 
				+                writer.writelines('subject:' + subject + '\n')
			
 
				+                writer.writelines('[\n')
			
 
				+                writer.writelines(words_list)
			
 
				+                writer.writelines(']\n')
			
 
				+
			
 
				+            logger.info('{}试卷: {} 文本信息保存成功'.format(subject, img_raw_name))
			
 
				+        except Exception as e:
			
 
				+            logger.error('{}试卷: {} 文本信息保存失败: {}'.format(subject, img_raw_name, e), exc_info=True)
			
 
				+
			
 
				+    info = {'img_name': img_raw_name, 'text': words_list}
			
 
				+    if error_info:
			
 
				+        info = {'img_name': img_raw_name, 'text': words_list, 'error': error_info}
			
 
				+    logger.info('{} done'.format(img_raw_name))
			
 
				+    return status, info
			
 
				+
			
 
				+
			
 
				+def save_pdf_image(pdf_file, subject, time_str):
			
 
				+    name = pdf_file.name[:-4]
			
 
				+    ext0 = pdf_file.name.split('.')[-1]
			
 
				+
			
 
				+    raw_name0 = ''.join([''.join(i) for i in pypinyin.pinyin(name, style=pypinyin.NORMAL)])
			
 
				+    save_dir0 = os.sep.join(
			
 
				+        [settings.MEDIA_ROOT, 'ocr', subject, time_str, raw_name0 + '_{}'.format(uuid.uuid4().hex[:10])])
			
 
				+    if not os.path.exists(save_dir0):
			
 
				+        os.makedirs(save_dir0)
			
 
				+    pdf_path = os.sep.join([save_dir0, raw_name0 + '.' + ext0])
			
 
				+    with open(pdf_path, 'wb') as pdfFileObj:
			
 
				+        for chunk in pdf_file.chunks():
			
 
				+            pdfFileObj.write(chunk)
			
 
				+    images_list = convert_from_path(pdf_path, dpi=200, output_folder=save_dir0,
			
 
				+                                    output_file='image',
			
 
				+                                    first_page=None, last_page=None, fmt='JPEG')
			
 
				+    upload_img_path_list = glob.glob(os.sep.join([save_dir0, '*.jpg']))
			
 
				+    try:
			
 
				+        images_list = [cv2.cvtColor(np.asarray(ele), cv2.COLOR_RGB2BGR) for ele in images_list]
			
 
				+    except Exception:
			
 
				+        images_list = [np.asarray(ele) for ele in images_list]
			
 
				+    return upload_img_path_list, images_list
			
 
				+
			
 
				+
			
 
				+def save_raw_image_without_segment_pdf(subject, datetime, raw_name, img_file, analysis_type):
			
 
				+    # 随机生成新的图片名，自定义路径。
			
 
				+    file_name = '{}_{}.{}'.format(raw_name, uuid.uuid4().hex[:10], 'jpg')
			
 
				+    raw_img = Image.open(img_file)  # 读取上传的网络图像
			
 
				+    save_dir = os.path.join(settings.MEDIA_ROOT, analysis_type, subject, datetime)
			
 
				+
			
 
				+    if not os.path.exists(save_dir):
			
 
				+        os.makedirs(save_dir)
			
 
				+    save_path = os.path.join(save_dir, file_name)
			
 
				+
			
 
				+    channels = raw_img.split()
			
 
				+    if len(channels) > 3:
			
 
				+        img = Image.merge("RGB", (channels[1], channels[2], channels[3]))
			
 
				+        open_cv_image = np.array(img)
			
 
				+        resize_img = resize_by_percent(open_cv_image, 0.5)
			
 
				+
			
 
				+    else:
			
 
				+        img = raw_img
			
 
				+        open_cv_image = np.array(img)
			
 
				+        resize_img = resize_by_percent(open_cv_image, 0.5)
			
 
				+
			
 
				+    try:
			
 
				+        img.save(save_path)
			
 
				+        # write_single_img(resize_img, save_path.replace('.jpg', '_small.jpg'))
			
 
				+    except Exception as e:
			
 
				+        raise e
			
 
				+
			
 
				+    url_path = os.path.join(settings.MEDIA_URL, analysis_type, subject, datetime, file_name).replace('\\', '/')
			
 
				+    return save_path, url_path, open_cv_image
			
 
				+
			
 
				+
			
 
				+def get_exam_ocr_by_penguin(img_raw_name, raw_image, size, save_path, subject):
			
 
				+    status = 1
			
 
				+    error_info = ''
			
 
				+    words_list = []
			
 
				+
			
 
				+    try:
			
 
				+        words_list = get_ocr_english_text(raw_image, size)
			
 
				+    except Exception as e:
			
 
				+        error_info = error_info + str(e)
			
 
				+
			
 
				+    if len(words_list) < 1:
			
 
				+        logger.error('{}试卷: {} OCR解析失败: {}'.format(subject, img_raw_name, error_info), exc_info=True)
			
 
				+        status = 0
			
 
				+
			
 
				+    else:
			
 
				+        try:
			
 
				+            txt_backup_path = save_path.replace('.jpg', '.txt')
			
 
				+            words_list = [line + '\n' for line in words_list]
			
 
				+            # # words_list.append(group_list)
			
 
				+            with open(txt_backup_path, 'w', encoding='utf-8') as writer:
			
 
				+                writer.writelines('subject:' + subject + '\n')
			
 
				+                writer.writelines('[\n')
			
 
				+                writer.writelines(words_list)
			
 
				+                writer.writelines(']\n')
			
 
				+
			
 
				+            logger.info('{}试卷: {} 文本信息保存成功'.format(subject, img_raw_name))
			
 
				+        except Exception as e:
			
 
				+            logger.error('{}试卷: {} 文本信息保存失败: {}'.format(subject, img_raw_name, e), exc_info=True)
			
 
				+
			
 
				+    info = {'img_name': img_raw_name, 'text': words_list}
			
 
				+    if error_info:
			
 
				+        info = {'img_name': img_raw_name, 'text': words_list, 'error': error_info}
			
 
				+    logger.info('{} done'.format(img_raw_name))
			
 
				+    return status, info
			
--- a/segment/sheet_resolve/__init__.py
+++ b/segment/sheet_resolve/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2018/12/19 0019 下午 14:10
			
--- a/segment/sheet_resolve/analysis/__init__.py
+++ b/segment/sheet_resolve/analysis/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2018/11/21 0021 下午 16:01
			
--- a/segment/sheet_resolve/analysis/anchor/__init__.py
+++ b/segment/sheet_resolve/analysis/anchor/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2019/9/16 0016 下午 14:37
			
--- a/segment/sheet_resolve/analysis/anchor/marker_detection.py
+++ b/segment/sheet_resolve/analysis/anchor/marker_detection.py
@@ -0,0 +1,644 @@
 
				+import glob
			
 
				+import os
			
 
				+import math
			
 
				+from .util import *
			
 
				+import ctypes
			
 
				+import time
			
 
				+import sys
			
 
				+import numpy as np
			
 
				+try:
			
 
				+    temp = ctypes.windll.LoadLibrary('opencv_ffmpeg410_64.dll')
			
 
				+except:
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+def rotate_by_anchor(image, method='connected', debug=0):
			
 
				+    #   寻找试卷大定位点并据此旋转纠偏
			
 
				+
			
 
				+    shift_threshold = 80   # 50    # 最上方两个定位点纵坐标相差阈值
			
 
				+    pie = 3.14159
			
 
				+    # height, width = image.shape[:2]
			
 
				+    height, width = image.shape[:2]
			
 
				+    # h_ratio = (0.1, 0.9)
			
 
				+    area_threshold = 0.28   # 0.25
			
 
				+    anchors_len_threshold = 2
			
 
				+    # shape_para = {'height': (80, 10), 'w2h': (3, 1.1), 'area': (6000, 500), 'area_ratio': 0.5}
			
 
				+
			
 
				+    binary = pre_process_for_anchors(image, debug=0)
			
 
				+    # h0 = int(binary.shape[0] * h_ratio[0])
			
 
				+    # h1 = int(binary.shape[0] * h_ratio[1])
			
 
				+    # binary[h0:h1, :] = 0
			
 
				+    binary = extract_feature(binary, method=4, debug=0)
			
 
				+    boxes = find_boxes(binary, method=method, debug=0)
			
 
				+    markers = find_marker_by_shape(boxes, debug=0)
			
 
				+    # 按面积从大到小排列
			
 
				+    markers.sort(reverse=True, key=lambda x: x[-1])
			
 
				+    anchors = []
			
 
				+    for i in range(len(markers)):
			
 
				+        anchors = []
			
 
				+        anchors.append(markers[i])
			
 
				+        for j in range(i+1, len(markers)):
			
 
				+            if (anchors[0][-1] - markers[j][-1]) / anchors[0][-1] <= area_threshold:
			
 
				+                anchors.append(markers[j])
			
 
				+        if len(anchors) >= anchors_len_threshold:
			
 
				+            break
			
 
				+
			
 
				+    anchors.sort(key=lambda x: x[4][1])
			
 
				+    top_anchors = []
			
 
				+    bottom_anchors = []
			
 
				+    for a in anchors:
			
 
				+        if a[4][1] - anchors[0][4][1] < shift_threshold:
			
 
				+            top_anchors.append(a)
			
 
				+        else:
			
 
				+            bottom_anchors.append(a)
			
 
				+
			
 
				+    # draw_box(image, top_anchors, (0, 255, 255))
			
 
				+    # draw_box(image, bottom_anchors, (255, 0, 255))
			
 
				+    # plt.figure(figsize=(15, 10))
			
 
				+    # plt.title(method)
			
 
				+    # plt.imshow(image)
			
 
				+    # plt.show()
			
 
				+
			
 
				+    #   旋转纠偏
			
 
				+    if len(top_anchors) >= 2:
			
 
				+        mean_y = sum([x[4][1] for x in top_anchors]) / len(top_anchors)
			
 
				+        top_anchors.sort(key=lambda x: abs(x[4][1] - mean_y))
			
 
				+        angle = 180 / pie * math.atan((top_anchors[1][4][1] - top_anchors[0][4][1]) / (top_anchors[1][4][0] -
			
 
				+                                                                                       top_anchors[0][4][0]))
			
 
				+    elif len(bottom_anchors) >= 2:
			
 
				+        mean_y = sum([x[4][1] for x in bottom_anchors]) / len(bottom_anchors)
			
 
				+        bottom_anchors.sort(key=lambda x: abs(x[4][1] - mean_y))
			
 
				+        angle = 180 / pie * math.atan((bottom_anchors[1][4][1] - bottom_anchors[0][4][1]) / (bottom_anchors[1][4][0] -
			
 
				+                                                                                             bottom_anchors[0][4][0]))
			
 
				+    else:
			
 
				+        return image, False
			
 
				+    # try:
			
 
				+    #     angle = 180 / pie * math.atan((top_anchors[1][4][1] - top_anchors[0][4][1]) / (top_anchors[1][4][0] -
			
 
				+    #                                                                                    top_anchors[0][4][0]))
			
 
				+    # except IndexError:
			
 
				+    #     print('Not Enough top_anchors! Proceed any way!')
			
 
				+    #     return image, 0
			
 
				+    (cx, cy) = (width // 2, height // 2)
			
 
				+    mat = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
			
 
				+    # compute the new bounding dimensions of the image
			
 
				+    cos = np.abs(mat[0, 0])
			
 
				+    sin = np.abs(mat[0, 1])
			
 
				+    new_width = int((height * sin) + (width * cos))
			
 
				+    new_height = int((height * cos) + (width * sin))
			
 
				+    # adjust the rotation matrix to take into account translation
			
 
				+    mat[0, 2] += (new_width / 2) - cx
			
 
				+    mat[1, 2] += (new_height / 2) - cy
			
 
				+
			
 
				+    rot_image = cv2.warpAffine(image, mat, (new_width, new_height), borderValue=(255, 255, 255))
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        draw_box(image, top_anchors, (0, 255, 255))
			
 
				+        draw_box(image, bottom_anchors, (255, 0, 255))
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.title(method)
			
 
				+        plt.imshow(image)
			
 
				+        plt.show()
			
 
				+
			
 
				+    return rot_image, True
			
 
				+
			
 
				+
			
 
				+def detect_anchor_by_position(anchors, markers, image, method, debug=0):
			
 
				+    #   按默认位置寻找第一个及最后一个定位点
			
 
				+    position_threshold = 300
			
 
				+    h_ratio = (0.1, 0.9)
			
 
				+    shift_shreshold = 50
			
 
				+
			
 
				+    height, width = image.shape[:2]
			
 
				+    first_anchor, last_anchor = [], []
			
 
				+    if method == 't':
			
 
				+        top_left_flag, top_right_flag = height*h_ratio[0] + position_threshold, height*h_ratio[0] - position_threshold
			
 
				+
			
 
				+        if len(anchors) == 0:
			
 
				+            for m in markers:
			
 
				+                if m[4][0] < position_threshold and m[4][1] + m[4][0] < top_left_flag:
			
 
				+                    top_left_flag = m[4][1] + m[4][0]
			
 
				+                    first_anchor = m
			
 
				+                elif width - m[4][0] < position_threshold and m[4][1] - m[4][0] < top_right_flag:
			
 
				+                    top_right_flag = m[4][1] - m[4][0]
			
 
				+                    last_anchor = m
			
 
				+            if len(first_anchor) > 0:
			
 
				+                anchors.append(first_anchor)
			
 
				+            if len(last_anchor) > 0:
			
 
				+                anchors.append(last_anchor)
			
 
				+        else:
			
 
				+            if anchors[0][4][0] > position_threshold:
			
 
				+                for m in markers:
			
 
				+                    if m[4][0] < position_threshold and m[4][1] + m[4][0] < top_left_flag:
			
 
				+                        top_left_flag = m[4][1] + m[4][0]
			
 
				+                        first_anchor = m
			
 
				+                if len(first_anchor) > 0:
			
 
				+                    anchors.insert(0, first_anchor)
			
 
				+            if width - anchors[-1][4][0] > position_threshold:
			
 
				+                for m in markers:
			
 
				+                    if width - m[4][0] < position_threshold and m[4][1] - m[4][0] < top_right_flag:
			
 
				+                        top_right_flag = m[4][1] - m[4][0]
			
 
				+                        last_anchor = m
			
 
				+                if len(last_anchor) > 0:
			
 
				+                    anchors.append(last_anchor)
			
 
				+    if method == 'b':
			
 
				+        bottom_left_flag, bottom_right_flag = height*h_ratio[1]-position_threshold, height*h_ratio[1]+position_threshold
			
 
				+
			
 
				+        if len(anchors) == 0:
			
 
				+            for m in markers:
			
 
				+                if m[4][0] < position_threshold and m[4][1] - m[4][0] > bottom_left_flag:
			
 
				+                    bottom_left_flag = m[4][1] - m[4][0]
			
 
				+                    first_anchor = m
			
 
				+                elif width - m[4][0] < position_threshold and m[4][1] + m[4][0] > bottom_right_flag:
			
 
				+                    bottom_right_flag = m[4][1] + m[4][0]
			
 
				+                    last_anchor = m
			
 
				+            if len(first_anchor) > 0:
			
 
				+                anchors.append(first_anchor)
			
 
				+            if len(last_anchor) > 0:
			
 
				+                anchors.append(last_anchor)
			
 
				+        else:
			
 
				+            if anchors[0][4][0] > position_threshold:
			
 
				+                for m in markers:
			
 
				+                    if m[4][0] < position_threshold and m[4][1] - m[4][0] > bottom_left_flag:
			
 
				+                        bottom_left_flag = m[4][1] - m[4][0]
			
 
				+                        first_anchor = m
			
 
				+                if len(first_anchor) > 0:
			
 
				+                    anchors.insert(0, first_anchor)
			
 
				+            if width - anchors[-1][4][0] > position_threshold:
			
 
				+                for m in markers:
			
 
				+                    if width - m[4][0] < position_threshold and m[4][1] + m[4][0] > bottom_right_flag:
			
 
				+                        bottom_right_flag = m[4][1] + m[4][0]
			
 
				+                        last_anchor = m
			
 
				+                if len(last_anchor) > 0:
			
 
				+                    anchors.append(last_anchor)
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        draw_box(image, [first_anchor, last_anchor], debug=1)
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.title('anchor by position')
			
 
				+        plt.imshow(image)
			
 
				+        plt.show()
			
 
				+
			
 
				+
			
 
				+def detect_anchor_public(image, method='connected', debug=0):
			
 
				+    #   寻找第三方试卷最上方及最下方的定位点
			
 
				+    shift_threshold = 50  # 80
			
 
				+    height, width = image.shape[:2]
			
 
				+    h0, h1 = 0.1, 0.9
			
 
				+    pos = (0.1, 0.3, 0.5, 0.6, 0.8, 0.9)
			
 
				+    pos_threshold = 0.1
			
 
				+    area_threshold = 0.28  # 0.25   # 大定位点面积差阈值
			
 
				+    anchors_len_threshold = 2
			
 
				+    shape_para = {'height': (80, 10), 'w2h': (3, 0.6), 'area': (6000, 500), 'area_ratio': 0.9}
			
 
				+    blur_size = 3
			
 
				+    sigma = 5
			
 
				+    shift_edge = 2  # 对定位点位置做微调偏移
			
 
				+
			
 
				+    binary = pre_process_for_anchors(image, debug=0, blur_size=blur_size, sigma=sigma)
			
 
				+    binary = extract_feature(binary, method=4, debug=0)
			
 
				+    boxes = find_boxes(binary, method=method, debug=0)
			
 
				+    markers = find_marker_by_shape(boxes, shape_para=shape_para, debug=0)
			
 
				+    marker_list = collect_markers_by_position(markers, method='h', shift_threshold=shift_threshold)
			
 
				+
			
 
				+    if len(marker_list) == 0:
			
 
				+        anchors = []
			
 
				+    elif len(marker_list) == 1:
			
 
				+        anchors = marker_list[0]
			
 
				+    else:
			
 
				+        anchors = marker_list[0]
			
 
				+        anchors.extend(marker_list[-1])
			
 
				+    anchors = [[a[0]-shift_edge, a[1]-shift_edge, a[2]-shift_edge-1, a[3]-shift_edge-1] for a in anchors]
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        # print(anchors)
			
 
				+        draw_box(image, anchors, (0, 255, 255), debug=1)
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.title(method)
			
 
				+        plt.imshow(image, cmap='gray')
			
 
				+        plt.show()
			
 
				+    elif debug == 2:
			
 
				+        markers.sort(reverse=True, key=lambda x: x[4][1])
			
 
				+        draw_box(image, markers, debug=1)
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.title('by shape')
			
 
				+        plt.imshow(image, cmap='gray')
			
 
				+        plt.show()
			
 
				+
			
 
				+    return anchors
			
 
				+
			
 
				+
			
 
				+def detect_anchor(image, method='connected', debug=0):
			
 
				+    #   寻找试卷最上方及最下方的定位点
			
 
				+
			
 
				+    shift_threshold = 50    # 80
			
 
				+    height, width = image.shape[:2]
			
 
				+    h0, h1 = 0.1, 0.9
			
 
				+    pos = (0.1, 0.3, 0.5, 0.6, 0.8, 0.9)
			
 
				+    pos_threshold = 0.1
			
 
				+    area_threshold = 0.28   # 0.25   # 大定位点面积差阈值
			
 
				+    anchors_len_threshold = 2
			
 
				+
			
 
				+    binary = pre_process_for_anchors(image, debug=0)
			
 
				+    binary = extract_feature(binary, method=4, debug=0)
			
 
				+    boxes = find_boxes(binary, method=method, debug=0)
			
 
				+    markers = find_marker_by_shape(boxes, debug=0)
			
 
				+    marker_list = collect_markers_by_position(markers, method='h', shift_threshold=shift_threshold)
			
 
				+    # print(len(marker_list[0]))
			
 
				+    # print(len(marker_list[-1]))
			
 
				+    if len(marker_list) == 0:
			
 
				+        return []
			
 
				+
			
 
				+    #   如果上方定位点不多于下方定位点，上下左右翻转答题卡
			
 
				+    if len(marker_list[0]) < len(marker_list[-1]) and len(marker_list[-1]) >= 6:
			
 
				+        image = cv2.flip(image, -1)
			
 
				+        binary = cv2.flip(binary, -1)
			
 
				+        boxes = find_boxes(binary, method=method, debug=0)
			
 
				+        markers = find_marker_by_shape(boxes, debug=0)
			
 
				+        marker_list = collect_markers_by_position(markers, method='h', shift_threshold=shift_threshold)
			
 
				+
			
 
				+    # for m in marker_list:
			
 
				+    #     m.sort(key=lambda x: x[4][0])
			
 
				+    top_anchors = []
			
 
				+    bottom_anchors = []
			
 
				+    version_points = []
			
 
				+    top_y = np.mean(np.array([m[4][1] for m in marker_list[0]]))
			
 
				+    bottom_y = np.mean(np.array([m[4][1] for m in marker_list[-1]]))
			
 
				+    top_index, bottom_index = 0, -1
			
 
				+
			
 
				+    try:
			
 
				+        bottom_anchors.append(min([m for m in marker_list[bottom_index] if m[4][0] / width < pos[0]],
			
 
				+                                  key=lambda x: abs(x[4][1] - bottom_y)))
			
 
				+    except ValueError:
			
 
				+        pass
			
 
				+    try:
			
 
				+        bottom_anchors.append(min([m for m in marker_list[bottom_index] if m[4][0] / width > pos[-1]],
			
 
				+                                  key=lambda x: abs(x[4][1] - bottom_y)))
			
 
				+    except ValueError:
			
 
				+        pass
			
 
				+
			
 
				+    top_list = [m for m in marker_list[top_index] if m[4][0] / width < pos[0]]
			
 
				+    if len(top_list) > 0:
			
 
				+        top_anchors.append(min(top_list, key=lambda x: abs(x[4][1] - top_y)))
			
 
				+    top_list = [m for m in marker_list[top_index] if abs(m[4][0] / width - pos[1]) < pos_threshold]
			
 
				+    if len(top_list) > 0:
			
 
				+        top_anchors.append(min(top_list, key=lambda x: abs(x[4][1] - top_y)))
			
 
				+        top_list = [m for m in marker_list[top_index] if abs(m[4][0] / width - pos[3]) < pos_threshold]
			
 
				+        if len(top_list) > 0:
			
 
				+            top_anchors.append(min(top_list, key=lambda x: abs(x[4][1] - top_y)))
			
 
				+    else:
			
 
				+        top_list = [m for m in marker_list[top_index] if abs(m[4][0] / width - pos[2]) < pos_threshold]
			
 
				+        if len(top_list) > 0:
			
 
				+            top_anchors.append(min(top_list, key=lambda x: abs(x[4][1] - top_y)))
			
 
				+        else:
			
 
				+            top_list = [m for m in marker_list[top_index] if abs(m[4][0] / width - pos[3]) < pos_threshold]
			
 
				+            if len(top_list) > 0:
			
 
				+                top_anchors.append(min(top_list, key=lambda x: abs(x[4][1] - top_y)))
			
 
				+
			
 
				+    try:
			
 
				+        top_anchors.append(min([m for m in marker_list[top_index] if m[4][0] / width > pos[-1]],
			
 
				+                               key=lambda x: abs(x[4][1] - top_y) - x[4][0]))
			
 
				+    except ValueError:
			
 
				+        pass
			
 
				+    # for m in marker_list[-1]:
			
 
				+    #     if m[4][0] / width < pos[0]:
			
 
				+    #         bottom_anchors[0] = m
			
 
				+    #     elif m[4][0] / width > pos[-1]:
			
 
				+    #         bottom_anchors[1] = m
			
 
				+    # for m in marker_list[0]:
			
 
				+    #     if m[4][0] / width > pos[-1]:
			
 
				+    #         top_anchors[-1] = m
			
 
				+    #     elif abs(m[4][0] / width - pos[2]) < pos_threshold:
			
 
				+    #         top_anchors[1] = m
			
 
				+    #         top_anchors.pop(2)
			
 
				+    #     elif abs(m[4][0] / width - pos[1]) < pos_threshold:
			
 
				+    #         top_anchors[1] = m
			
 
				+    #     elif abs(m[4][0] / width - pos[3]) < pos_threshold:
			
 
				+    #         top_anchors[2] = m
			
 
				+
			
 
				+    # for i in range(1, len(marker_list[0])+1):
			
 
				+    #     if marker_list[0][len(marker_list[0])-i][4][0] / width < pos[0]:
			
 
				+    #         top_anchors[0] = marker_list[0][-i]
			
 
				+    #         break
			
 
				+
			
 
				+
			
 
				+    # # 按面积从大到小排列
			
 
				+    # markers.sort(reverse=True, key=lambda x: x[-1])
			
 
				+    # flag = 0
			
 
				+    # anchors = []
			
 
				+    # for i in range(len(markers)):
			
 
				+    #     anchors = []
			
 
				+    #     anchors.append(markers[i])
			
 
				+    #     for j in range(i+1, len(markers)):
			
 
				+    #         if (anchors[0][-1] - markers[j][-1]) / anchors[0][-1] <= area_threshold:
			
 
				+    #             anchors.append(markers[j])
			
 
				+    #             flag = j
			
 
				+    #     if len(anchors) >= anchors_len_threshold:
			
 
				+    #         break
			
 
				+    # if len(anchors) == 0:
			
 
				+    #     return [[], [], []]
			
 
				+    # anchors.sort(key=lambda x: x[4][1])
			
 
				+    # # print('anchors\n')
			
 
				+    # # print(anchors)
			
 
				+    # top_anchors = []
			
 
				+    # bottom_anchors = []
			
 
				+    # for a in anchors:
			
 
				+    #     if a[4][1] - anchors[0][4][1] < shift_threshold:
			
 
				+    #         top_anchors.append(a)
			
 
				+    #     elif anchors[-1][4][1] - a[4][1] < shift_threshold:
			
 
				+    #         bottom_anchors.append(a)
			
 
				+    # top_anchors.sort(key=lambda x: x[4][0])
			
 
				+    # bottom_anchors.sort(key=lambda x: x[4][0])
			
 
				+    # detect_anchor_by_position(top_anchors, markers, image, method='t', debug=0)
			
 
				+    # detect_anchor_by_position(bottom_anchors, markers, image, method='b', debug=0)
			
 
				+    #
			
 
				+    # version_points = []
			
 
				+    # for i in range(flag + 1, len(markers)):
			
 
				+    #     if abs(markers[i][4][1] - anchors[0][4][1]) <= shift_threshold and markers[i][4][0] > width / 2:
			
 
				+    #         version_points.append(markers[i])
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        draw_box(image, top_anchors, (0, 255, 255), debug=1)
			
 
				+        draw_box(image, bottom_anchors, (255, 0, 255), debug=1)
			
 
				+        draw_box(image, version_points, (255, 255, 0), debug=1)
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.title(method)
			
 
				+        plt.imshow(image)
			
 
				+        plt.show()
			
 
				+    elif debug == 2:
			
 
				+        markers.sort(reverse=True, key=lambda x: x[4][1])
			
 
				+        draw_box(image, markers, debug=1)
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.title('by shape')
			
 
				+        plt.imshow(image)
			
 
				+        plt.show()
			
 
				+    elif debug == 3:
			
 
				+        colors = ((0, 255, 255), (255, 0, 255), (255, 255, 0))
			
 
				+        if len(marker_list) > 0:
			
 
				+            c = 0
			
 
				+            for p in marker_list:
			
 
				+                draw_box(image, p, color=colors[c])
			
 
				+                c = (c + 1) % 3
			
 
				+            plt.figure(figsize=(15, 10))
			
 
				+            plt.title('marker list')
			
 
				+            plt.imshow(image)
			
 
				+            plt.show()
			
 
				+
			
 
				+    return [[top_anchors, bottom_anchors, version_points], image]
			
 
				+
			
 
				+
			
 
				+def detect_problem_marker(image, anchors, method='connected', column_num=2, debug=0):
			
 
				+    #   寻找题目定位点
			
 
				+    double_page_width_ratio = 0.42      # 默认双栏宽度比例
			
 
				+    three_page_width_ratio = 0.29       # 默认三栏宽度比例
			
 
				+    double_page_separation = 250        # 默认双栏栏间间距
			
 
				+    three_page_separation = 100         # 默认三栏栏间间距
			
 
				+    horizontal_threshold = 100           # 单栏宽度比例阈值
			
 
				+    shift_threshold = 80    # 50
			
 
				+    col_threshold = 100
			
 
				+    blank1 = 20
			
 
				+    blank2 = 100
			
 
				+    # shape_para = {'height': (80, 10), 'w2h': (3, 0.5), 'area': (6000, 500), 'area_ratio': 0.5}
			
 
				+    colors = ((0, 255, 255), (255, 0, 255), (255, 255, 0))
			
 
				+    height, width = image.shape[:2]
			
 
				+    remove_iteration = 1                # 去除异常值循环次数
			
 
				+
			
 
				+    #   确定单栏宽度及每栏定位
			
 
				+    page_width, column_num, column_pos = find_column(anchors, width, column_num, debug=0)
			
 
				+
			
 
				+    #   清除答题卡上下方定位点
			
 
				+    top_anchors, bottom_anchors = anchors[:2]
			
 
				+    if len(top_anchors) > 0:
			
 
				+        blank_top = max(top_anchors, key=lambda x: x[3])[3] + blank1
			
 
				+    else:
			
 
				+        blank_top = blank2
			
 
				+    if len(bottom_anchors) > 0:
			
 
				+        blank_bottom = min(bottom_anchors, key=lambda x: x[1])[1] - blank1
			
 
				+    else:
			
 
				+        blank_bottom = height - blank2
			
 
				+    binary = pre_process(image, blank_top, blank_bottom, debug=0)
			
 
				+
			
 
				+    #   寻找所有可能的题目定位点
			
 
				+    binary = extract_feature(binary, method=3, debug=0)
			
 
				+    boxes = find_boxes(binary, method=method, debug=0)
			
 
				+    marker_candidates = find_marker_by_shape(boxes, debug=0)
			
 
				+    marker_candidates.sort(key=lambda x: x[4][0])
			
 
				+
			
 
				+    # #   寻找配对点
			
 
				+    # pair_list = []
			
 
				+    # pair_list_indexes = []
			
 
				+    # for i in range(len(marker_candidates)):
			
 
				+    #     if i not in pair_list_indexes:
			
 
				+    #         p, p_index = find_pair(marker_candidates[i], marker_candidates, page_width, horizontal_threshold)
			
 
				+    #         if p_index >= 0:
			
 
				+    #             pair_list.append([marker_candidates[i], p])
			
 
				+    #             pair_list_indexes.append(p_index)
			
 
				+    # if len(pair_list) > 0:
			
 
				+    #     pair_list.sort(key=lambda x: x[0][4][0])
			
 
				+    #
			
 
				+    # #   按栏寻找题目定位点
			
 
				+    # if column_num == 2:
			
 
				+    #     problem_markers = [[], []]      # 题目定位点分两栏排列
			
 
				+    #     for p in pair_list:
			
 
				+    #         if abs(p[0][4][0] - column_pos[0]) < shift_threshold:
			
 
				+    #             problem_markers[0].extend(p)
			
 
				+    #         elif abs(p[0][4][0] - column_pos[1]) < shift_threshold:
			
 
				+    #             problem_markers[1].extend(p)
			
 
				+    #
			
 
				+    # elif column_num == 3:
			
 
				+    #     problem_markers = [[], [], []]  # 题目定位点分三栏排列
			
 
				+    #     for p in pair_list:
			
 
				+    #         if abs(p[0][4][0] - column_pos[0]) < shift_threshold:
			
 
				+    #             problem_markers[0].extend(p)
			
 
				+    #         elif abs(p[0][4][0] - column_pos[1]) < shift_threshold:
			
 
				+    #             problem_markers[2].extend(p)
			
 
				+    #         elif abs(p[0][4][0] - column_pos[0] - page_width - three_page_separation) < shift_threshold:
			
 
				+    #             problem_markers[1].extend(p)
			
 
				+    #
			
 
				+    # #   剔除异常mark
			
 
				+    # for i in range(len(problem_markers)):
			
 
				+    #     problem_markers[i] = remove_abnormal_marker(problem_markers[i], debug=0)
			
 
				+
			
 
				+    #   将定位点按垂直位置排列
			
 
				+    marker_list = collect_markers_by_position(marker_candidates, method='v', debug=0)
			
 
				+    if column_num == 2:
			
 
				+        problem_markers = [[], []]
			
 
				+    elif column_num == 3:
			
 
				+        problem_markers = [[], [], []]
			
 
				+    if len(marker_list) == 0:
			
 
				+        return problem_markers
			
 
				+
			
 
				+    #   将题目定位点配对
			
 
				+    # for col in marker_list:
			
 
				+    #     col.sort(key=lambda x: x[4][1])
			
 
				+    mid_left_col_pos, mid_right_col_pos = column_pos[0] + page_width, column_pos[1] - page_width
			
 
				+    mid_left_col_indexes, mid_right_col_indexes = [], []
			
 
				+    for col_index, col in enumerate(marker_list):
			
 
				+        if abs(col[0][4][0] - column_pos[0]) < col_threshold:
			
 
				+            pair_list_index = find_pair_list(col, marker_list, page_width, col_threshold)[1]
			
 
				+            if pair_list_index >= 0:
			
 
				+                for c in col:
			
 
				+                    pair, pair_index, distance = find_pair(c, marker_list[pair_list_index], page_width, col_threshold)
			
 
				+                    if pair_index >= 0:
			
 
				+                        problem_markers[0].extend([c, pair])
			
 
				+                        mid_left_col_indexes.append(pair_list_index)
			
 
				+                        # print('distance', col_index, distance)
			
 
				+                        if pair[4][0] < mid_left_col_pos:
			
 
				+                            mid_left_col_pos = pair[4][0]
			
 
				+        elif abs(col[0][4][0] - column_pos[1]) < col_threshold:
			
 
				+            pair_list_index = find_pair_list(col, marker_list, page_width, col_threshold)[1]
			
 
				+            if pair_list_index >= 0:
			
 
				+                for c in col:
			
 
				+                    pair, pair_index, distance = find_pair(c, marker_list[pair_list_index], page_width, col_threshold)
			
 
				+                    if pair_index >= 0:
			
 
				+                        problem_markers[-1].extend([c, pair])
			
 
				+                        mid_right_col_indexes.append(col_index)
			
 
				+                        # print('distance', col_index, distance)
			
 
				+                        if c[4][0] - page_width > mid_right_col_pos:
			
 
				+                            mid_right_col_pos = c[4][0] - page_width
			
 
				+    mid_left_col_indexes = set(mid_left_col_indexes)
			
 
				+    mid_right_col_indexes = set(mid_right_col_indexes)
			
 
				+    # print(mid_left_col_indexes, mid_left_col_pos)
			
 
				+    # print(mid_right_col_indexes, mid_right_col_pos)
			
 
				+    if column_num == 3:
			
 
				+        for col_index, col in enumerate(marker_list):
			
 
				+            if mid_left_col_pos < col[0][4][0] < mid_right_col_pos and (col_index not in mid_left_col_indexes):
			
 
				+                pair_list_index = find_pair_list(col, marker_list, page_width, col_threshold)[1]
			
 
				+                if pair_list_index not in mid_right_col_indexes:
			
 
				+                    for c in col:
			
 
				+                        pair, pair_index, distance = find_pair(c, marker_list[pair_list_index], page_width,
			
 
				+                                                               col_threshold)
			
 
				+                        if pair_index >= 0:
			
 
				+                            problem_markers[1].extend([c, pair])
			
 
				+                            # print('distance', col_index, distance)
			
 
				+
			
 
				+    for index, p in enumerate(problem_markers):
			
 
				+        temp = [[p[2*i], p[2*i+1]] for i in range(len(p)//2)]
			
 
				+        temp.sort(key=lambda x: x[0][4][1])
			
 
				+        problem_markers[index] = []
			
 
				+        for t in temp:
			
 
				+            problem_markers[index].extend(t)
			
 
				+    #   剔除异常mark
			
 
				+    for i in range(len(problem_markers)):
			
 
				+        for j in range(remove_iteration):
			
 
				+            problem_markers[i], page_width = remove_abnormal_marker(problem_markers[i], page_width, debug=0)
			
 
				+    problem_markers = check_with_anchor(problem_markers, top_anchors, page_width, column_num)
			
 
				+
			
 
				+    # pair_list = find_pair_list(marker_list[0], marker_list, page_width)
			
 
				+    # if pair_list:
			
 
				+    #     page_width = abs(marker_list[0][0][4][0] - pair_list[0][4][0])
			
 
				+    # abscissa_flag = 0
			
 
				+    # for prob in marker_list:
			
 
				+    #     if prob[0][4][0] >= abscissa_flag:
			
 
				+    #         pair_list = find_pair_list(prob, marker_list, page_width)
			
 
				+    #         if pair_list:
			
 
				+    #             for p in prob:
			
 
				+    #                 pair = find_pair(p, pair_list, page_width)
			
 
				+    #                 if pair:
			
 
				+    #                     problem_markers.extend(pair)
			
 
				+    #                     abscissa_flag = pair[1][4][0] + shift_threshold
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        print(page_width, column_num, column_pos)
			
 
				+        c = 0
			
 
				+        for p in problem_markers:
			
 
				+            print('column:', c)
			
 
				+            draw_box(image, p, color=colors[c], debug=1)
			
 
				+            c = (c + 1) % 3
			
 
				+            t = [(p[2*i+2][4][0]-p[2*i][4][0])/(p[2*i+2][4][1]-p[2*i][4][1]) for i in range(len(p)//2-1)]
			
 
				+            print('slope', t)
			
 
				+        # draw_box(image, problem_markers)
			
 
				+        plt.figure(figsize=(12, 8))
			
 
				+        plt.title('problem markers')
			
 
				+        plt.imshow(image)
			
 
				+        plt.show()
			
 
				+    elif debug == 2:
			
 
				+        print(page_width, column_num, column_pos)
			
 
				+        if len(marker_list) > 0:
			
 
				+            c = 0
			
 
				+            for p in marker_list:
			
 
				+                draw_box(image, p, color=colors[c], debug=1)
			
 
				+                c = (c + 1) % 3
			
 
				+            plt.figure(figsize=(15, 10))
			
 
				+            plt.title('marker list')
			
 
				+            plt.imshow(image)
			
 
				+            plt.show()
			
 
				+
			
 
				+        # c = 0
			
 
				+        # for p in pair_list:
			
 
				+        #     draw_box(image, p, color=colors[c])
			
 
				+        #     c = (c + 1) % 3
			
 
				+        # plt.figure(figsize=(15, 10))
			
 
				+        # plt.title('pair list')
			
 
				+        # plt.imshow(image)
			
 
				+        # plt.show()
			
 
				+    elif debug == 3:
			
 
				+        draw_box(image, marker_candidates)
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.title('marker candidates')
			
 
				+        plt.imshow(image)
			
 
				+        plt.show()
			
 
				+    elif debug == 4:
			
 
				+        draw_box(image, boxes, debug=1)
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.title('boxes')
			
 
				+        plt.imshow(image)
			
 
				+        plt.show()
			
 
				+
			
 
				+    return problem_markers
			
 
				+
			
 
				+
			
 
				+def main(img_file, method='connected', debug=0):
			
 
				+    #   寻找所有定位点
			
 
				+    colors = ((0, 255, 255), (255, 0, 255), (255, 255, 0))
			
 
				+    # image = cv2.imread(img_file)
			
 
				+    image = read_single_img(img_file)
			
 
				+    # image = Image.open(img_file)
			
 
				+
			
 
				+    rot_image, flag = rotate_by_anchor(image, method=method, debug=0)
			
 
				+    # if not flag:
			
 
				+    #     return []
			
 
				+    anchors, rot_image = detect_anchor(rot_image, method=method, debug=0)
			
 
				+
			
 
				+    # #   如果上方定位点不多于下方定位点，上下左右翻转答题卡
			
 
				+    # if len(anchors) > 0:
			
 
				+    #     if len(anchors[0]) + len(anchors[2]) <= len(anchors[1]):
			
 
				+    #         rot_image = cv2.flip(rot_image, -1)
			
 
				+    #         anchors = detect_anchor(rot_image, method=method, debug=0)
			
 
				+
			
 
				+    #   寻找题目定位点
			
 
				+    problem_markers = detect_problem_marker(rot_image, anchors, method=method, debug=0)
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        c = 0
			
 
				+        for i in range(3):
			
 
				+            draw_box(rot_image, anchors[i], color=colors[c])
			
 
				+            c = (c + 1) % 3
			
 
				+        for p in problem_markers:
			
 
				+            draw_box(rot_image, p)
			
 
				+        plt.figure(figsize=(12, 8))
			
 
				+        plt.title('markers')
			
 
				+        plt.imshow(rot_image)
			
 
				+        plt.show()
			
 
				+    # elif debug == 2:
			
 
				+    #     path, image_name = os.path.split(img_file)
			
 
				+    #     out_dir = path + '_output'
			
 
				+    #     out_img_file = os.path.join(out_dir, image_name)
			
 
				+    #     if not os.path.exists(out_dir):
			
 
				+    #         os.makedirs(out_dir)
			
 
				+    #     cv2.imwrite(out_img_file, rot_image)
			
 
				+    #     rot_image.save(out_img_file)
			
 
				+    #     xml_file = out_img_file.replace('.jpg', '.xml')
			
 
				+    #     create_xml(xml_file, markers)
			
 
				+
			
 
				+    return anchors, problem_markers
			
 
				+
			
 
				+
			
 
				+def find_anchor(image, method='connected'):
			
 
				+    # rot_image, flag = rotate_by_anchor(image, method=method, debug=0)
			
 
				+    anchors_raw = detect_anchor_public(image, method=method, debug=0)
			
 
				+    anchors_list = []
			
 
				+    for anchor in anchors_raw:
			
 
				+        bbox = {'xmin': int(str(anchor[0])), 'ymin': int(str(anchor[1])),
			
 
				+                'xmax': int(str(anchor[2])), 'ymax': int(str(anchor[3]))}
			
 
				+        anchor_dict = {'class_name': 'anchor_point', 'bounding_box': bbox}
			
 
				+        anchors_list.append(anchor_dict)
			
 
				+    return anchors_list
			
--- a/segment/sheet_resolve/analysis/anchor/util.py
+++ b/segment/sheet_resolve/analysis/anchor/util.py
@@ -0,0 +1,624 @@
 
				+import cv2
			
 
				+import matplotlib.pylab as plt
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+def read_single_img(img_path):
			
 
				+    try:
			
 
				+        im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
			
 
				+    except FileNotFoundError as e:
			
 
				+        raise e
			
 
				+    return im
			
 
				+
			
 
				+
			
 
				+def pre_process(image, blank_top=20, blank_bottom=-20, blur_size=5, sigma=5, debug=0):
			
 
				+    #   返回二值逆图
			
 
				+    blank_size = 20
			
 
				+
			
 
				+    if image.ndim == 3:
			
 
				+        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
			
 
				+    elif image.ndim == 2:
			
 
				+        gray = image
			
 
				+    #   裁边
			
 
				+    gray[0:blank_top, :] = 255
			
 
				+    gray[blank_bottom:, :] = 255
			
 
				+    gray[:, 0:blank_size] = 255
			
 
				+    gray[:, -blank_size:] = 255
			
 
				+    pre = 255 - gray
			
 
				+    pre = cv2.GaussianBlur(pre, (blur_size, blur_size), sigma)
			
 
				+    binary = cv2.threshold(pre, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.subplot(211)
			
 
				+        plt.title('gray')
			
 
				+        plt.imshow(gray, cmap='gray')
			
 
				+        plt.subplot(212)
			
 
				+        plt.title('binary')
			
 
				+        plt.imshow(255 - binary, cmap='gray')
			
 
				+        plt.show()
			
 
				+
			
 
				+    return binary
			
 
				+
			
 
				+
			
 
				+def pre_process_for_anchors(image, blank_top=20, blank_bottom=-20, blur_size=5, sigma=10, blank_size=20, debug=0):
			
 
				+    #   去掉中间内容，返回上下定位点的二值逆图
			
 
				+
			
 
				+    h_ratio = (0.1, 0.9)
			
 
				+    h0 = int(image.shape[0] * h_ratio[0])
			
 
				+    h1 = int(image.shape[0] * h_ratio[1])
			
 
				+
			
 
				+    if image.ndim == 3:
			
 
				+        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
			
 
				+    elif image.ndim == 2:
			
 
				+        gray = image.copy()
			
 
				+
			
 
				+    #   裁边
			
 
				+    gray[0:blank_top, :] = 255
			
 
				+    gray[blank_bottom:, :] = 255
			
 
				+    gray[:, 0:blank_size] = 255
			
 
				+    gray[:, -blank_size:] = 255
			
 
				+    #   去掉中间内容
			
 
				+    gray[h0:h1, :] = 255
			
 
				+
			
 
				+    pre = 255 - gray
			
 
				+    pre = cv2.GaussianBlur(pre, (blur_size, blur_size), sigma)
			
 
				+    binary = cv2.threshold(pre, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.subplot(211)
			
 
				+        plt.title('gray')
			
 
				+        plt.imshow(gray, cmap='gray')
			
 
				+        plt.subplot(212)
			
 
				+        plt.title('binary')
			
 
				+        plt.imshow(255 - binary, cmap='gray')
			
 
				+        plt.show()
			
 
				+
			
 
				+    return binary
			
 
				+
			
 
				+
			
 
				+def extract_feature(binary, method=4, ker_size1=2, ker_size2=10, debug=0):
			
 
				+    #   对二值图进一步处理
			
 
				+    close_size = 3
			
 
				+    kernel_height = 5
			
 
				+    kernel_width = 1
			
 
				+    close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (close_size, close_size))
			
 
				+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_height, kernel_width))
			
 
				+
			
 
				+    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ker_size1, ker_size2))
			
 
				+    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ker_size2, ker_size1))
			
 
				+
			
 
				+    if method == 1:
			
 
				+        # ret = cv2.dilate(binary, kernel)
			
 
				+        ret = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=1)
			
 
				+        ret = cv2.morphologyEx(ret, cv2.MORPH_OPEN, vertical_kernel)
			
 
				+    elif method == 2:
			
 
				+        ret = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
			
 
				+        ret = cv2.morphologyEx(ret, cv2.MORPH_CLOSE, close_kernel)
			
 
				+    elif method == 3:
			
 
				+        ret = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horizontal_kernel)
			
 
				+        ret = cv2.morphologyEx(ret, cv2.MORPH_OPEN, vertical_kernel)
			
 
				+    elif method == 4:
			
 
				+        ret = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, close_kernel)
			
 
				+        ret = cv2.morphologyEx(ret, cv2.MORPH_OPEN, horizontal_kernel)
			
 
				+        ret = cv2.morphologyEx(ret, cv2.MORPH_OPEN, vertical_kernel)
			
 
				+    else:
			
 
				+        ret = binary
			
 
				+    if debug == 1:
			
 
				+        plt.figure(figsize=(15, 10))
			
 
				+        plt.subplot(211)
			
 
				+        plt.title('before feature extraction')
			
 
				+        plt.imshow(255 - binary, cmap='gray')
			
 
				+        # plt.show()
			
 
				+        # plt.figure(figsize=(15, 10))
			
 
				+        plt.subplot(212)
			
 
				+        plt.title('after feature extraction')
			
 
				+        plt.imshow(255 - ret, cmap='gray')
			
 
				+        plt.show()
			
 
				+
			
 
				+    return ret
			
 
				+
			
 
				+
			
 
				+def draw_contour(binary):
			
 
				+    (major, minor, _) = cv2.__version__.split(".")     # check cv version
			
 
				+    boxes = []
			
 
				+    contours = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+    contours = contours[1] if major == '3' else contours[0]
			
 
				+    for i in range(0, len(contours)):
			
 
				+        xmin, ymin, w, h = cv2.boundingRect(contours[i])
			
 
				+        xmax = xmin + w
			
 
				+        ymax = ymin + h
			
 
				+        centroid = [xmin + w // 2, ymin + h // 2]
			
 
				+        boxes.append([xmin, ymin, xmax, ymax, centroid, w*h])
			
 
				+
			
 
				+    return boxes
			
 
				+
			
 
				+
			
 
				+def draw_connected_component(binary):
			
 
				+    connectivity = 8
			
 
				+    boxes = []
			
 
				+    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary, connectivity=connectivity)
			
 
				+    for l in range(1, num_labels):
			
 
				+        xmin = stats[l, cv2.CC_STAT_LEFT]
			
 
				+        ymin = stats[l, cv2.CC_STAT_TOP]
			
 
				+        xmax = stats[l, cv2.CC_STAT_WIDTH] + xmin
			
 
				+        ymax = stats[l, cv2.CC_STAT_HEIGHT] + ymin
			
 
				+        area = stats[l, cv2.CC_STAT_AREA]
			
 
				+        boxes.append([xmin, ymin, xmax, ymax, [int(centroids[l][0]), int(centroids[l][1])], area])
			
 
				+    return boxes
			
 
				+
			
 
				+
			
 
				+def find_boxes(binary, method='connected', debug=0):
			
 
				+    #   寻找轮廓
			
 
				+    if method == 'contour':
			
 
				+        boxes = draw_contour(binary)
			
 
				+    elif method == 'connected':
			
 
				+        boxes = draw_connected_component(binary)
			
 
				+    if debug == 1:
			
 
				+        boxes.sort(key=lambda x: x[4][1])
			
 
				+        for box in boxes:
			
 
				+            width = box[2] - box[0]
			
 
				+            height = box[3] - box[1]
			
 
				+            w_to_h = width / height
			
 
				+            area = box[-1]
			
 
				+            centroid = box[-2]
			
 
				+            area_ratio = area / (width * height)
			
 
				+            print('width:{}, height:{}, centroid:{}, w_to_h:{}, area:{}, area ratio:{}'.
			
 
				+                  format(width, height, centroid, w_to_h, area, area_ratio))
			
 
				+    return boxes
			
 
				+
			
 
				+
			
 
				+def find_marker_by_shape(boxes,
			
 
				+                         shape_para={'height': (80, 10), 'w2h': (3, 0.6), 'area': (6000, 500), 'area_ratio': 0.5},
			
 
				+                         debug=0):
			
 
				+    #   通过形状参数寻找定位点
			
 
				+    area_ratio_threshold = 0.96
			
 
				+    max_height, min_height = shape_para['height']
			
 
				+    max_w2h, min_w2h = shape_para['w2h']
			
 
				+    max_area, min_area = shape_para['area']
			
 
				+    min_area_ratio = shape_para['area_ratio']
			
 
				+
			
 
				+    markers = []
			
 
				+    for box in boxes:
			
 
				+        w = box[2] - box[0]
			
 
				+        h = box[3] - box[1]
			
 
				+
			
 
				+        if box[-1] >= area_ratio_threshold*w*h and min_area <= box[-1] <= max_area:
			
 
				+            markers.append(box)
			
 
				+        elif min_height <= h <= max_height and min_w2h <= w/h <= max_w2h \
			
 
				+                and min_area <= box[-1] <= max_area and box[-1] >= min_area_ratio*w*h:
			
 
				+            markers.append(box)
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        markers.sort(reverse=True, key=lambda x: x[-1])
			
 
				+        for box in markers:
			
 
				+            width = box[2] - box[0]
			
 
				+            height = box[3] - box[1]
			
 
				+            w_to_h = width / height
			
 
				+            area = box[-1]
			
 
				+            centroid = box[-2]
			
 
				+            area_ratio = area / (width * height)
			
 
				+            print('width:{}, height:{}, centroid:{}, w_to_h:{}, area:{}, area ratio:{}'.
			
 
				+                  format(width, height, centroid, w_to_h, area, area_ratio))
			
 
				+    elif debug == 2:
			
 
				+        for box in boxes:
			
 
				+            markers.append(box)
			
 
				+        for box in markers:
			
 
				+            width = box[2] - box[0]
			
 
				+            height = box[3] - box[1]
			
 
				+            w_to_h = width / height
			
 
				+            area = box[-1]
			
 
				+            centroid = box[-2]
			
 
				+            area_ratio = area / (width * height)
			
 
				+            print('width:{}, height:{}, centroid:{}, w_to_h:{}, area:{}, area ratio:{}'.
			
 
				+                  format(width, height, centroid, w_to_h, area, area_ratio))
			
 
				+
			
 
				+    return markers
			
 
				+
			
 
				+
			
 
				+def find_box_list_by_position(box, box_list, method='h', shift_threshold=30, slope_threshold=0.2, area_threshold=0.28):
			
 
				+    #   根据相近原则将box加入box_list中
			
 
				+    if len(box_list) > 0:
			
 
				+        if method == 'h':   # 水平分布
			
 
				+            index_flag, distance = -1, shift_threshold
			
 
				+            for index, bl in enumerate(box_list):
			
 
				+                d = abs(box[4][1] - bl[-1][4][1])
			
 
				+                if d < distance:
			
 
				+                    distance = d
			
 
				+                    index_flag = index
			
 
				+            if index_flag >= 0:
			
 
				+                box_list[index_flag].append(box)
			
 
				+            else:
			
 
				+                box_list.append([box])
			
 
				+        elif method == 'v':     # 垂直分布
			
 
				+            index_flag, distance = -1, shift_threshold
			
 
				+            for index, bl in enumerate(box_list):
			
 
				+                d = abs(box[4][0] - bl[-1][4][0])
			
 
				+                if d < distance and d < abs(box[4][1] - bl[-1][4][1]) * slope_threshold:
			
 
				+                    distance = d
			
 
				+                    index_flag = index
			
 
				+            if index_flag >= 0:
			
 
				+                box_list[index_flag].append(box)
			
 
				+            else:
			
 
				+                box_list.append([box])
			
 
				+        elif method == 's':     # 面积相近分布
			
 
				+            index_flag, area_diff = -1, area_threshold
			
 
				+            for index, bl in enumerate(box_list):
			
 
				+                d = abs((box[-1] - bl[-1][-1]) / bl[-1][-1])
			
 
				+                if d < area_diff:
			
 
				+                    area_diff = d
			
 
				+                    index_flag = index
			
 
				+            if index_flag >= 0:
			
 
				+                box_list[index_flag].append(box)
			
 
				+            else:
			
 
				+                box_list.append([box])
			
 
				+    else:
			
 
				+        box_list.append([box])
			
 
				+    return box_list
			
 
				+
			
 
				+
			
 
				+def collect_markers_by_position(boxes, method='h', shift_threshold=30, slope_threshold=0.2, area_threshold=0.28, debug=0):
			
 
				+    #   按照相近位置排列定位点
			
 
				+    box_list = []
			
 
				+    if method == 'h':       # 按水平位置相近排列
			
 
				+        boxes.sort(key=lambda x: x[4][0])
			
 
				+        for b in boxes:
			
 
				+            box_list = find_box_list_by_position(b, box_list, method=method, shift_threshold=shift_threshold,
			
 
				+                                                 slope_threshold=slope_threshold)
			
 
				+        box_list.sort(key=lambda x: x[0][4][1])
			
 
				+    elif method == 'v':       # 按垂直位置相近排列
			
 
				+        boxes.sort(key=lambda x: x[4][1])
			
 
				+        for b in boxes:
			
 
				+            box_list = find_box_list_by_position(b, box_list, method=method, shift_threshold=shift_threshold,
			
 
				+                                                 slope_threshold=slope_threshold)
			
 
				+        box_list.sort(key=lambda x: x[0][4][0])
			
 
				+    elif method == 's':       # 按面积大小相近排列
			
 
				+        boxes.sort(reverse=True, key=lambda x: x[-1])
			
 
				+        for b in boxes:
			
 
				+            box_list = find_box_list_by_position(b, box_list, method=method, shift_threshold=shift_threshold,
			
 
				+                                                 slope_threshold=slope_threshold, area_threshold=area_threshold)
			
 
				+        box_list.sort(reverse=True, key=lambda x: x[0][-1])
			
 
				+
			
 
				+    # if method == 'h':       # 按水平位置相近排列
			
 
				+    #     boxes.sort(key=lambda x: x[4][1])
			
 
				+    #     for b in boxes:
			
 
				+    #         index_flag, distance = -1, shift_threshold
			
 
				+    #         for index, single_list in enumerate(box_list):
			
 
				+    #             if abs(b[4][1] - single_list[-1][4][1]) < distance:
			
 
				+    #                 distance = abs(b[4][1] - single_list[-1][4][1])
			
 
				+    #                 index_flag = index
			
 
				+    #         if index_flag >= 0:
			
 
				+    #             box_list[index_flag].append(b)
			
 
				+    #         else:
			
 
				+    #             box_list.append([b])
			
 
				+    #
			
 
				+    # elif method == 'v':     # 按垂直位置相近排列
			
 
				+    #     boxes.sort(key=lambda x: x[4][0])
			
 
				+    #     for b in boxes:
			
 
				+    #         index_flag, distance = -1, shift_threshold
			
 
				+    #         for index, single_list in enumerate(box_list):
			
 
				+    #             if abs(b[4][0] - single_list[-1][4][0]) < distance:
			
 
				+    #                 distance = abs(b[4][0] - single_list[-1][4][0])
			
 
				+    #                 index_flag = index
			
 
				+    #         if index_flag >= 0:
			
 
				+    #             box_list[index_flag].append(b)
			
 
				+    #         else:
			
 
				+    #             box_list.append([b])
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        print('box list slope')
			
 
				+        if method == 'h':
			
 
				+            for box in box_list:
			
 
				+                if len(box) >= 2:
			
 
				+                    for i in range(len(box)-1):
			
 
				+                        slope = (box[i+1][4][1] - box[i][4][1])/(box[i+1][4][0] - box[i][4][0])
			
 
				+                        print(slope)
			
 
				+        elif method == 'v':
			
 
				+            for box in box_list:
			
 
				+                if len(box) >= 2:
			
 
				+                    for i in range(len(box) - 1):
			
 
				+                        slope = (box[i + 1][4][0] - box[i][4][0]) / (box[i + 1][4][1] - box[i][4][1])
			
 
				+                        print(slope)
			
 
				+
			
 
				+    return box_list
			
 
				+
			
 
				+
			
 
				+def check_with_anchor(problem_markers, top_anchors, page_width, column_num):
			
 
				+    #    根据top_anchors位置去除异常markers
			
 
				+    min_shift = 100
			
 
				+
			
 
				+    column_pos = []
			
 
				+
			
 
				+    if len(top_anchors) == column_num + 1:
			
 
				+        column_pos.append(top_anchors[0][4][0])
			
 
				+        if top_anchors[1][4][0] - top_anchors[0][4][0] < top_anchors[-1][4][0] - top_anchors[-2][4][0]:
			
 
				+            for i in range(2, column_num+1):
			
 
				+                column_pos.append(top_anchors[i][4][0] - page_width)
			
 
				+        else:
			
 
				+            for i in range(1, column_num):
			
 
				+                column_pos.append(top_anchors[i][4][0])
			
 
				+        for index, markers in enumerate(problem_markers):
			
 
				+            remove_list = []
			
 
				+            for i in range(len(markers)//2):
			
 
				+                if abs(markers[2*i][4][0]-column_pos[index]) > min_shift or \
			
 
				+                        abs(markers[2*i+1][4][0]-column_pos[index]-page_width) > min_shift:
			
 
				+                    remove_list.extend([2*i, 2*i+1])
			
 
				+            problem_markers[index] = [problem_markers[index][i] for i in range(len(problem_markers[index]))
			
 
				+                                      if i not in remove_list]
			
 
				+
			
 
				+    return problem_markers
			
 
				+
			
 
				+
			
 
				+def remove_abnormal_marker(markers, page_width, debug=0):
			
 
				+    #   从markers中剔除异常点
			
 
				+    error = 10
			
 
				+    max_std = 3
			
 
				+    min_std = 0.1
			
 
				+    min_area_ratio = 0.9
			
 
				+    min_distance = 60
			
 
				+    min_slope = 0.2
			
 
				+    distance_list = []
			
 
				+    remove_list = []
			
 
				+
			
 
				+    for i in range(len(markers)//2-1):
			
 
				+        min_flag = i
			
 
				+        distance_flag = abs(markers[2*i+1][4][0] - markers[2*i][4][0] - page_width) + abs(markers[2*i+1][4][1] -
			
 
				+                                                                                          markers[2*i][4][1])
			
 
				+        for j in range(i+1, len(markers)//2):
			
 
				+            if abs(markers[2*i+1][4][0] - markers[2*j+1][4][0]) + abs(markers[2*i+1][4][1] - markers[2*j+1][4][1]) \
			
 
				+                    < error:
			
 
				+                if distance_flag < abs(markers[2*j+1][4][0] - markers[2*j][4][0] - page_width) + \
			
 
				+                        abs(markers[2*j+1][4][1] - markers[2*j][4][1]):
			
 
				+                    remove_list.extend([2*j, 2*j+1])
			
 
				+                else:
			
 
				+                    distance_flag = abs(markers[2*j+1][4][0] - markers[2*j][4][0] - page_width) + \
			
 
				+                                    abs(markers[2*j+1][4][1] - markers[2*j][4][1])
			
 
				+                    remove_list.extend([2*min_flag, 2*min_flag+1])
			
 
				+                    min_flag = j
			
 
				+    markers = [markers[i] for i in range(len(markers)) if i not in remove_list]
			
 
				+
			
 
				+    remove_list = []
			
 
				+    if len(markers) >= 6:
			
 
				+        left_slope_list = np.asarray([abs((markers[2 * i][4][0] - markers[2 * i + 2][4][0])
			
 
				+                                          / (markers[2 * i][4][1] - markers[2 * i + 2][4][1]))
			
 
				+                                      for i in range(len(markers)//2-1)])
			
 
				+        right_slope_list = np.asarray([abs((markers[2 * i + 1][4][0] - markers[2 * i + 3][4][0]) /
			
 
				+                                           (markers[2 * i + 1][4][1] - markers[2 * i + 3][4][1]))
			
 
				+                                       for i in range(len(markers) // 2 - 1)])
			
 
				+        left_slope_list = left_slope_list > min_slope
			
 
				+        right_slope_list = right_slope_list > min_slope
			
 
				+        for i in range(len(left_slope_list)):
			
 
				+            if left_slope_list[i]:
			
 
				+                if i == len(left_slope_list) - 1:
			
 
				+                    if not left_slope_list[i-1]:
			
 
				+                        remove_list.extend([2*(i+1), 2*(i+1)+1])
			
 
				+                elif left_slope_list[i+1]:
			
 
				+                    remove_list.extend([2*(i+1), 2*(i+1)+1])
			
 
				+                elif not left_slope_list[i+1]:
			
 
				+                    remove_list.extend([2*i, 2*i+1])
			
 
				+        for i in range(len(right_slope_list)):
			
 
				+            if right_slope_list[i]:
			
 
				+                if i == len(right_slope_list) - 1:
			
 
				+                    if not right_slope_list[i-1]:
			
 
				+                        remove_list.extend([2*(i+1), 2*(i+1)+1])
			
 
				+                elif right_slope_list[i+1]:
			
 
				+                    remove_list.extend([2*(i+1), 2*(i+1)+1])
			
 
				+                elif not right_slope_list[i+1]:
			
 
				+                    remove_list.extend([2*i, 2*i+1])
			
 
				+
			
 
				+        markers = [markers[i] for i in range(len(markers)) if i not in set(remove_list)]
			
 
				+
			
 
				+    remove_list = []
			
 
				+    if len(markers) >= 2:
			
 
				+        left_x_list = np.asarray([markers[2*i][4][0] for i in range(len(markers)//2)])
			
 
				+        left_y_list = np.asarray([markers[2*i][4][1] for i in range(len(markers)//2)])
			
 
				+        right_x_list = np.asarray([markers[2*i+1][4][0] for i in range(len(markers)//2)])
			
 
				+        right_y_list = np.asarray([markers[2*i+1][4][1] for i in range(len(markers)//2)])
			
 
				+        distance_list = right_x_list - left_x_list
			
 
				+        shift_list = right_y_list - left_y_list
			
 
				+
			
 
				+        left_x_mean = left_x_list.mean()
			
 
				+        distance_mean = distance_list.mean()
			
 
				+        shift_mean = shift_list.mean()
			
 
				+        left_x_std = left_x_list.std()
			
 
				+        distance_std = distance_list.std()
			
 
				+        shift_std = shift_list.std()
			
 
				+        if len(markers) >= 4:
			
 
				+            for i in range(len(markers)//2):
			
 
				+                if left_x_std > min_std and abs(left_x_list[i] - left_x_mean) / left_x_std > max_std:
			
 
				+                    remove_list.extend([2*i, 2*i+1])
			
 
				+                elif shift_std > min_std and abs(shift_list[i] - shift_mean) / shift_std > max_std:
			
 
				+                    remove_list.extend([2 * i, 2 * i + 1])
			
 
				+                elif distance_std > min_std and abs(distance_list[i] - distance_mean) / distance_std > max_std:
			
 
				+                    remove_list.extend([2 * i, 2 * i + 1])
			
 
				+        elif len(markers) == 2:
			
 
				+            # area_ratio_list = np.asarray([m[-1]/((m[2]-m[0])*(m[3]-m[1])) for m in markers])
			
 
				+            if abs(distance_list-page_width) + abs(shift_list) > min_distance:
			
 
				+                remove_list.extend([0, 1])
			
 
				+    markers = [markers[i] for i in range(len(markers)) if i not in remove_list]
			
 
				+
			
 
				+    if len(markers) >= 2:
			
 
				+        new_page_width = markers[1][4][0] - markers[0][4][0]
			
 
				+    else:
			
 
				+        new_page_width = page_width
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        print(len(markers))
			
 
				+        if len(markers) >= 4:
			
 
				+            print('left', left_x_mean, left_x_std)
			
 
				+            print(left_x_list)
			
 
				+            for i, x in enumerate(left_x_list):
			
 
				+                delta = abs(x - left_x_mean) / left_x_std
			
 
				+                print(delta, left_y_list[i])
			
 
				+            print('shift', shift_mean, shift_std)
			
 
				+            print(shift_list)
			
 
				+            for i, shift in enumerate(shift_list):
			
 
				+                delta = abs(shift - shift_mean) / shift_std
			
 
				+                print(delta, left_y_list[i])
			
 
				+            print('distance', distance_mean, distance_std)
			
 
				+            print(distance_list)
			
 
				+            for i, distance in enumerate(distance_list):
			
 
				+                delta = abs(distance - distance_mean) / distance_std
			
 
				+                print(delta, left_y_list[i])
			
 
				+            print('total')
			
 
				+            for i in range(len(left_x_list)):
			
 
				+                delta = abs(left_x_list[i] - left_x_mean) / left_x_std + abs(shift_list[i] - shift_mean) / shift_std \
			
 
				+                        + abs(distance_list[i] - distance_mean) / distance_std
			
 
				+                d = abs(left_x_list[i] - left_x_mean) + abs(shift_list[i] - shift_mean) + \
			
 
				+                    abs(distance_list[i] - distance_mean)
			
 
				+                print(delta, d, left_y_list[i])
			
 
				+    if debug == 2:
			
 
				+        if len(markers) >= 2:
			
 
				+            print('page width', page_width, 'new page width:', new_page_width,
			
 
				+                  'distant difference:', abs(new_page_width - page_width) + abs(markers[1][4][1]-markers[0][4][1]))
			
 
				+
			
 
				+    return markers, new_page_width
			
 
				+
			
 
				+
			
 
				+def draw_box(image, boxes, color=(0, 255, 0), debug=0):
			
 
				+    #   生成定位点标注框图
			
 
				+    for box in boxes:
			
 
				+        if len(box) > 0:
			
 
				+            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 5)
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        for box in boxes:
			
 
				+            if len(box) == 4:
			
 
				+                width = box[2] - box[0]
			
 
				+                height = box[3] - box[1]
			
 
				+                w_to_h = width / height
			
 
				+                # centroid = (box[0]+box[1])/2, (box[1]+box[3])/2
			
 
				+                print('width:{}, height:{}, w_to_h:{}, top_left:{}, bottom_right:{},'.
			
 
				+                      format(width, height, w_to_h, box[0:2], box[2:4]))
			
 
				+            elif len(box) > 4:
			
 
				+                width = box[2] - box[0]
			
 
				+                height = box[3] - box[1]
			
 
				+                w_to_h = width / height
			
 
				+                position_ratio = box[4][0] / image.shape[1]
			
 
				+                area = box[-1]
			
 
				+                centroid = box[4]
			
 
				+                area_ratio = area / (width * height)
			
 
				+                print('width:{}, height:{}, centroid:{}, position ratio:{}, w_to_h:{}, area:{}, area ratio:{}'.
			
 
				+                      format(width, height, centroid, position_ratio, w_to_h, area, area_ratio))
			
 
				+
			
 
				+
			
 
				+def find_pair(marker, boxes, page_width, threshold=100):
			
 
				+    #   若page_width为正，在boxes中找到marker的右配对， 若page_width为负， 在在boxes中找到marker的左配对
			
 
				+
			
 
				+    distance = threshold
			
 
				+    pair_index = -1
			
 
				+    for i in range(len(boxes)):
			
 
				+        if abs(marker[4][1] - boxes[i][4][1]) + abs(boxes[i][4][0] - marker[4][0] - page_width) <= threshold:
			
 
				+            if abs(marker[4][1] - boxes[i][4][1]) + abs(boxes[i][4][0] - marker[4][0] - page_width) < distance:
			
 
				+                distance = abs(marker[4][1] - boxes[i][4][1]) + abs(boxes[i][4][0] - marker[4][0] - page_width)
			
 
				+                pair_index = i
			
 
				+    if pair_index >= 0:
			
 
				+        return boxes[pair_index], pair_index, distance
			
 
				+    else:
			
 
				+        return [], pair_index, distance
			
 
				+
			
 
				+
			
 
				+def find_pair_list(marker_list, all_list, page_width, horizontal_threshold=100, debug=0):
			
 
				+    #   all_list 中找到与marker_list最接近的配对list
			
 
				+    max_count = 0
			
 
				+    index_flag = -1
			
 
				+    min_distance = horizontal_threshold
			
 
				+    for index, l in enumerate(all_list):
			
 
				+        count = 0
			
 
				+        distance = 0
			
 
				+        for m in marker_list:
			
 
				+            if find_pair(m, l, page_width, horizontal_threshold)[1] >= 0:
			
 
				+                count += 1
			
 
				+                distance += find_pair(m, l, page_width, horizontal_threshold)[2]
			
 
				+        if count > max_count:
			
 
				+            max_count = count
			
 
				+            index_flag = index
			
 
				+            min_distance = distance / count
			
 
				+        elif count == max_count and count > 0:
			
 
				+            distance /= count
			
 
				+            if distance < min_distance:
			
 
				+                min_distance = distance
			
 
				+                index_flag = index
			
 
				+    if debug == 1:
			
 
				+        if index_flag >= 0:
			
 
				+            print('page width:', abs(marker_list[0][4][0] - all_list[index_flag][0][4][0]), 'anchor width:', page_width)
			
 
				+
			
 
				+    return all_list[index_flag], index_flag, min_distance
			
 
				+
			
 
				+
			
 
				+def find_column(anchors, width, column_num=2, debug=0):
			
 
				+    #   确定栏数，单栏宽度及第一栏和最后一栏的定位
			
 
				+    double_page_width_ratio = 0.42  # 默认双栏宽度比例
			
 
				+    three_page_width_ratio = 0.29  # 默认三栏宽度比例
			
 
				+    double_page_separation = 250  # 默认双栏栏间间距
			
 
				+    three_page_separation = 100  # 默认三栏栏间间距
			
 
				+    horizontal_threshold = 80  # 单栏宽度比例阈值
			
 
				+
			
 
				+    top_anchors, bottom_anchors = anchors[:2]
			
 
				+
			
 
				+    page_width = width
			
 
				+    if len(top_anchors) >= 2:
			
 
				+        for i in range(len(top_anchors)-1):
			
 
				+            page_width_0 = top_anchors[i+1][4][0] - top_anchors[i][4][0]
			
 
				+            page_width_1 = (top_anchors[i + 1][4][0] - top_anchors[i][4][0]) // 2
			
 
				+            page_width_2 = (top_anchors[i + 1][4][0] - top_anchors[i][4][0]) // 3
			
 
				+            if abs(page_width_0 - width * double_page_width_ratio) < horizontal_threshold:
			
 
				+                column_num = 2
			
 
				+                if page_width_0 < page_width:
			
 
				+                    page_width = page_width_0
			
 
				+            elif abs(page_width_0 - width * three_page_width_ratio) < horizontal_threshold:
			
 
				+                column_num = 3
			
 
				+                if page_width_0 < page_width:
			
 
				+                    page_width = page_width_0
			
 
				+            elif abs(page_width_1 - width * double_page_width_ratio) < horizontal_threshold:
			
 
				+                column_num = 2
			
 
				+                if page_width_1 < page_width:
			
 
				+                    page_width = page_width_1
			
 
				+            elif abs(page_width_1 - width * three_page_width_ratio) < horizontal_threshold:
			
 
				+                column_num = 3
			
 
				+                if page_width_1 < page_width:
			
 
				+                    page_width = page_width_1
			
 
				+            elif abs(page_width_2 - width * double_page_width_ratio) < horizontal_threshold:
			
 
				+                column_num = 2
			
 
				+                if page_width_2 < page_width:
			
 
				+                    page_width = page_width_2
			
 
				+            elif abs(page_width_2 - width * three_page_width_ratio) < horizontal_threshold:
			
 
				+                column_num = 3
			
 
				+                if page_width_2 < page_width:
			
 
				+                    page_width = page_width_2
			
 
				+    if page_width == width:
			
 
				+        if column_num == 2:
			
 
				+            page_width = int(width * double_page_width_ratio)    # 如果没有找到合适的大定位点，使用默认的双栏宽度
			
 
				+        elif column_num == 3:
			
 
				+            page_width = int(width * three_page_width_ratio)     # 如果没有找到合适的大定位点，使用默认的三栏宽度
			
 
				+
			
 
				+    #   寻找第一栏和最后一栏的定位
			
 
				+    column_pos = []
			
 
				+    if len(top_anchors) >= 1:
			
 
				+        for i in range(4):
			
 
				+            if top_anchors[0][4][0] - (i + 1) * page_width < 0:
			
 
				+                column_pos.append(top_anchors[0][4][0] - i * page_width)
			
 
				+                break
			
 
				+        for i in range(4):
			
 
				+            if top_anchors[-1][4][0] + (i + 1) * page_width > width:
			
 
				+                column_pos.append(top_anchors[-1][4][0] + (i - 1) * page_width)
			
 
				+                break
			
 
				+    elif len(bottom_anchors) == 2:
			
 
				+        column_pos = [bottom_anchors[0][4][0], bottom_anchors[-1][4][0] - page_width]
			
 
				+    elif column_num == 2:
			
 
				+        column_pos = [(width - double_page_separation) // 2 - page_width, (width + double_page_separation) // 2]
			
 
				+    elif column_num == 3:
			
 
				+        column_pos = [width // 2 - three_page_separation - page_width * 3 // 2,
			
 
				+                      width // 2 + three_page_separation + page_width // 2]
			
 
				+
			
 
				+    if debug == 1:
			
 
				+        print('top anchors')
			
 
				+        for t in top_anchors:
			
 
				+            print(t[4])
			
 
				+        print('bottom anchors')
			
 
				+        for b in bottom_anchors:
			
 
				+            print(b[4])
			
 
				+        print('page width:', page_width, 'column number:', column_num, 'column position:', column_pos)
			
 
				+
			
 
				+    return page_width, column_num, column_pos
			
--- a/segment/sheet_resolve/analysis/choice/__init__.py
+++ b/segment/sheet_resolve/analysis/choice/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2018/11/21 0021 下午 16:01
			
--- a/segment/sheet_resolve/analysis/choice/analysis_choice.py
+++ b/segment/sheet_resolve/analysis/choice/analysis_choice.py
@@ -0,0 +1,95 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : analysis_choice.py
			
 
				+import time
			
 
				+
			
 
				+import numpy as np
			
 
				+
			
 
				+from segment.sheet_resolve.lib.model.test import im_detect
			
 
				+from segment.sheet_resolve.lib.model.nms_wrapper import nms
			
 
				+from segment.sheet_resolve.lib.utils.timer import Timer
			
 
				+from segment.sheet_resolve.tools import utils
			
 
				+
			
 
				+
			
 
				+def analysis_single_image_with_regions(analysis_type, classes, sess, net,
			
 
				+                                       im, conf_thresh, mns_thresh,
			
 
				+                                       coordinate_bias_dict):
			
 
				+    """Detect object classes in an image using pre-computed object proposals."""
			
 
				+
			
 
				+    size = im.shape
			
 
				+
			
 
				+    # Detect all object classes and regress object bounds
			
 
				+    timer = Timer()
			
 
				+    timer.tic()
			
 
				+    im, radio = utils.img_resize(analysis_type, im)
			
 
				+    scores, boxes = im_detect(analysis_type, sess, net, im)
			
 
				+    timer.toc()
			
 
				+    print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0]))
			
 
				+
			
 
				+    content_list = []
			
 
				+    analysis_cls_list = []
			
 
				+    for cls_ind, cls in enumerate(classes[1:]):  # classes
			
 
				+        cls_ind += 1  # because we skipped background
			
 
				+        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
			
 
				+        cls_scores = scores[:, cls_ind]
			
 
				+        dets = np.hstack((cls_boxes,
			
 
				+                          cls_scores[:, np.newaxis])).astype(np.float32)
			
 
				+        keep = nms(dets, mns_thresh)
			
 
				+        dets = dets[keep, :]
			
 
				+        # vis_detections(im, cls, dets, ax, thresh=conf_thresh)
			
 
				+        inds = np.where(dets[:, -1] >= conf_thresh)[0]
			
 
				+        if len(inds) > 0:
			
 
				+            if cls in list(coordinate_bias_dict.keys()):
			
 
				+                xmin_bias = coordinate_bias_dict[cls]['xmin_bias']
			
 
				+                ymin_bias = coordinate_bias_dict[cls]['ymin_bias']
			
 
				+                xmax_bias = coordinate_bias_dict[cls]['xmax_bias']
			
 
				+                ymax_bias = coordinate_bias_dict[cls]['ymax_bias']
			
 
				+            else:
			
 
				+                xmin_bias = 0
			
 
				+                ymin_bias = 0
			
 
				+                xmax_bias = 0
			
 
				+                ymax_bias = 0
			
 
				+            for i in inds:
			
 
				+                bbox = dets[i, :4]
			
 
				+                score = '{:.4f}'.format(dets[i, -1])
			
 
				+
			
 
				+                xmin = int(int(bbox[0]) * radio[0]) + xmin_bias
			
 
				+                ymin = int(int(bbox[1]) * radio[1]) + ymin_bias
			
 
				+                xmax = int(int(bbox[2]) * radio[0]) + xmax_bias
			
 
				+                ymax = int(int(bbox[3]) * radio[1]) + ymax_bias
			
 
				+
			
 
				+                xmin = (xmin if (xmin > 0) else 1)
			
 
				+                ymin = (ymin if (ymin > 0) else 1)
			
 
				+                xmax = (xmax if (xmax < size[1]) else size[1] - 1)
			
 
				+                ymax = (ymax if (ymax < size[0]) else size[0] - 1)
			
 
				+                xavg = int(xmin + (xmax - xmin) / 2)
			
 
				+                yavg = int(ymin + (ymax - ymin) / 2)
			
 
				+
			
 
				+                bbox_dict = {"xmin": xmin, "ymin": ymin, "xmax": xmax, "ymax": ymax, 'x_center': xavg, 'y_center': yavg}
			
 
				+                class_dict = {"class_name": cls, "bounding_box": bbox_dict, "score": score, 'x_center': xavg,
			
 
				+                              'y_center': yavg}
			
 
				+                content_list.append(class_dict)
			
 
				+
			
 
				+                analysis_cls_list.append(cls)
			
 
				+
			
 
				+    return content_list, analysis_cls_list
			
 
				+
			
 
				+
			
 
				+def get_single_image_sheet_regions(analysis_type, im, classes,
			
 
				+                                   sess, net, conf_thresh, mns_thresh,
			
 
				+                                   coordinate_bias_dict):
			
 
				+    start_time = time.time()
			
 
				+
			
 
				+    content, cls = analysis_single_image_with_regions(analysis_type, classes,
			
 
				+                                                      sess, net,
			
 
				+                                                      im, conf_thresh, mns_thresh,
			
 
				+                                                      coordinate_bias_dict)
			
 
				+
			
 
				+    img_dict = {"img_name": 'choice_m',
			
 
				+                'analysis_type': analysis_type,
			
 
				+                "regions": content,
			
 
				+                }
			
 
				+
			
 
				+    end_time = time.time()
			
 
				+    print(end_time - start_time)
			
 
				+
			
 
				+    return img_dict
			
--- a/segment/sheet_resolve/analysis/choice/choice_box.py
+++ b/segment/sheet_resolve/analysis/choice/choice_box.py
@@ -0,0 +1,490 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : choice_box.py
			
 
				+# @Time    : 2018/11/22 0022 下午 16:01
			
 
				+import re
			
 
				+import time
			
 
				+import xml.etree.cElementTree as ET
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+
			
 
				+from segment.sheet_resolve.analysis.choice.choice_m_row_column import get_choice_m_row_and_col
			
 
				+from segment.sheet_resolve.tools import utils
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate
			
 
				+
			
 
				+
			
 
				+def get_interval(word_result_list):
			
 
				+    all_char_str = ''
			
 
				+    location = []
			
 
				+    for i, chars_dict in enumerate(word_result_list):
			
 
				+        chars_list = chars_dict['chars']
			
 
				+        for ele in chars_list:
			
 
				+            all_char_str = all_char_str + ele['char']
			
 
				+            location.append(ele['location'])
			
 
				+
			
 
				+    pattern1 = re.compile(r"\]\[")
			
 
				+    pattern2 = re.compile(r"\[[ABCD]")
			
 
				+
			
 
				+    def intervel(pattern):
			
 
				+        group_list = []
			
 
				+        for i in pattern.finditer(all_char_str):
			
 
				+            # print(i.group() + str(i.span()))
			
 
				+            group_list.append(list(i.span()))
			
 
				+        # print(group_list)
			
 
				+
			
 
				+        sum_intervel = 0
			
 
				+        size = 0
			
 
				+        for group in group_list:
			
 
				+            left_x, right_x = location[group[0]]['left'] \
			
 
				+                              + location[group[0]]['width'], location[group[1] - 1]['left']
			
 
				+            if abs(location[group[0]]['top'] - location[group[1]]['top']) < location[group[0]]['height']:
			
 
				+                if right_x - left_x > 0:
			
 
				+                    sum_intervel = sum_intervel + right_x - left_x
			
 
				+                    size += 1
			
 
				+
			
 
				+        # print(sum_intervel // size)
			
 
				+        return sum_intervel // size
			
 
				+
			
 
				+    intervel_width1 = intervel(pattern1)
			
 
				+    intervel_width2 = intervel(pattern2)
			
 
				+
			
 
				+    return (intervel_width1 + intervel_width2) * 2 // 3
			
 
				+
			
 
				+
			
 
				+def preprocess(image0, xe, ye):
			
 
				+    scale = 0
			
 
				+    dilate = 1
			
 
				+    blur = 5
			
 
				+    # 预处理图像
			
 
				+    img = image0
			
 
				+
			
 
				+    # rescale the image
			
 
				+    if scale != 0:
			
 
				+        img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
			
 
				+
			
 
				+    # Convert to gray
			
 
				+    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
			
 
				+
			
 
				+    # # Apply dilation and erosion to remove some noise
			
 
				+    # if dilate != 0:
			
 
				+    #     kernel = np.ones((dilate, dilate), np.uint8)
			
 
				+    #     img = cv2.dilate(img, kernel, iterations=1)
			
 
				+    #     img = cv2.erode(img, kernel, iterations=1)
			
 
				+
			
 
				+    # Apply blur to smooth out the edges
			
 
				+    # if blur != 0:
			
 
				+    #     img = cv2.GaussianBlur(img, (blur, blur), 0)
			
 
				+
			
 
				+    # Apply threshold to get image with only b&w (binarization)
			
 
				+    img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
			
 
				+
			
 
				+    # cv2.namedWindow('image', cv2.WINDOW_NORMAL)
			
 
				+    # cv2.imshow('image', img)
			
 
				+    # if cv2.waitKey(0) == 27:
			
 
				+    #     cv2.destroyAllWindows()
			
 
				+    # cv2.imwrite('otsu.jpg', img)
			
 
				+
			
 
				+    kernel = np.ones((ye, xe), np.uint8)  # y轴膨胀, x轴膨胀
			
 
				+
			
 
				+    dst = cv2.dilate(img, kernel, iterations=1)
			
 
				+    # cv2.imshow('dilate', dst)
			
 
				+    # if cv2.waitKey(0) == 27:
			
 
				+    #     cv2.destroyAllWindows()
			
 
				+
			
 
				+    return dst
			
 
				+
			
 
				+
			
 
				+def contours(image):
			
 
				+    _, cnts, hierarchy = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+
			
 
				+    bboxes = []
			
 
				+    for cnt_id, cnt in enumerate(reversed(cnts)):
			
 
				+        x, y, w, h = cv2.boundingRect(cnt)
			
 
				+        bboxes.append((x, y, x + w, y + h))
			
 
				+
			
 
				+    return bboxes
			
 
				+
			
 
				+
			
 
				+def box_coordinates(img):
			
 
				+    img_arr = np.asarray(img)
			
 
				+
			
 
				+    def axix_break_point(img, tolerance_number, axis):
			
 
				+        sum_x_axis = img.sum(axis=axis)
			
 
				+        sum_x_axis[sum_x_axis > 255 * tolerance_number] = 1  # 白色有字
			
 
				+        sum_x_axis[sum_x_axis != 1] = 0  # 黑色无字
			
 
				+        sum_x_axis_list = list(sum_x_axis)
			
 
				+        sum_x_axis_list.append(0)  # 最后几行到结束有字时，使索引值增加最后一位
			
 
				+
			
 
				+        split_x_index = []
			
 
				+        num = 1
			
 
				+        for index, ele in enumerate(sum_x_axis_list):
			
 
				+            num = num % 2
			
 
				+            if ele == num:
			
 
				+                # print(i)
			
 
				+                num = num + 1
			
 
				+                split_x_index.append(index)
			
 
				+        # print('length: ', len(split_x_index), split_x_index)
			
 
				+        return split_x_index
			
 
				+
			
 
				+    y_break_points_list = axix_break_point(img_arr, 1, axis=1)
			
 
				+    x_break_points_list = axix_break_point(img_arr, 1, axis=0)
			
 
				+
			
 
				+    all_coordinates = []
			
 
				+    for i in range(0, len(y_break_points_list), 2):  # y轴分组
			
 
				+        ymin = y_break_points_list[i]
			
 
				+        ymax = y_break_points_list[i + 1]
			
 
				+        for j in range(0, len(x_break_points_list), 2):
			
 
				+            xmin = x_break_points_list[j]
			
 
				+            xmax = x_break_points_list[j + 1]
			
 
				+            all_coordinates.append([xmin, ymin, xmax, ymax])
			
 
				+
			
 
				+    return all_coordinates
			
 
				+
			
 
				+
			
 
				+def get_choice_box_coordinate(word_result_list, choice_img, cv_box_list, choice_bbox_list):
			
 
				+    shape = choice_img.shape
			
 
				+    y, x = shape[0], shape[1]
			
 
				+
			
 
				+    # cv2.imshow('ocr_region', ocr_region)
			
 
				+    # if cv2.waitKey(0) == 27:
			
 
				+    #     cv2.destroyAllWindows()
			
 
				+
			
 
				+    all_digital_list = []
			
 
				+    digital_model = re.compile(r'\d')
			
 
				+    for i, chars_dict in enumerate(word_result_list):
			
 
				+        chars_list = chars_dict['chars']
			
 
				+        for ele in chars_list:
			
 
				+            if digital_model.search(ele['char']):
			
 
				+                all_digital_list.append(ele)
			
 
				+
			
 
				+    new_all_digital_list = []
			
 
				+    i = 1
			
 
				+    while i <= len(all_digital_list):
			
 
				+        pre_one = all_digital_list[i - 1]
			
 
				+        if i == len(all_digital_list):
			
 
				+            new_all_digital_list.append(pre_one)
			
 
				+            break
			
 
				+        rear_one = all_digital_list[i]
			
 
				+        condition1 = abs(pre_one['location']['top'] - rear_one['location']['top']) < pre_one['location'][
			
 
				+            'height']  # 两字高度差小于一字高度
			
 
				+        condition2 = pre_one['location']['left'] + 2 * pre_one['location']['width'] > rear_one['location'][
			
 
				+            'left']  # 某字宽度的2倍大于两字间间隔
			
 
				+        if condition1:
			
 
				+            if condition2:
			
 
				+                new_char = pre_one['char'] + rear_one['char']
			
 
				+                new_location = {'left': pre_one['location']['left'],
			
 
				+                                'top': min(pre_one['location']['top'], rear_one['location']['top']),
			
 
				+                                'width': rear_one['location']['left'] + rear_one['location']['width'] -
			
 
				+                                         pre_one['location']['left'],
			
 
				+                                'height': max(pre_one['location']['height'], rear_one['location']['height'])}
			
 
				+                new_all_digital_list.append({'char': new_char, 'location': new_location})
			
 
				+                i = i + 1 + 1
			
 
				+            else:
			
 
				+                new_all_digital_list.append(pre_one)
			
 
				+                i = i + 1
			
 
				+        else:
			
 
				+            new_all_digital_list.append(pre_one)  # 遇到字符y轴相差过大就结束
			
 
				+            i = i + 1
			
 
				+
			
 
				+    content_list = list()
			
 
				+    for index, box in enumerate(choice_bbox_list['regions']):  # rcnn识别的框匹配题号
			
 
				+        box = box['bounding_box']
			
 
				+        box_coordinate = (box['xmin'], box['ymin'], box['xmax'], box['ymax'])
			
 
				+        horizontal = box['xmax'] - box['xmin'] >= box['ymax'] - box['ymin']
			
 
				+        vertical = box['xmax'] - box['xmin'] < box['ymax'] - box['ymin']
			
 
				+        choice_number = {'number': 99, 'location': box_coordinate}
			
 
				+        content_list.insert(index, choice_number)
			
 
				+        for digital in new_all_digital_list:
			
 
				+            digital_coordiante = (digital['location']['left'], digital['location']['top'],
			
 
				+                                  digital['location']['left'] + digital['location']['width'],
			
 
				+                                  digital['location']['top'] + digital['location']['height'])
			
 
				+
			
 
				+            if utils.decide_coordinate_contains(digital_coordiante, box_coordinate):
			
 
				+                if horizontal:
			
 
				+                    box['xmin'] = digital['location']['left'] + digital['location']['width'] + 1  # 从数字处截取
			
 
				+                if vertical:
			
 
				+                    box['ymin'] = digital['location']['top'] + digital['location']['height'] + 1
			
 
				+
			
 
				+                box_coordinate = (box['xmin'], box['ymin'], box['xmax'], box['ymax'])
			
 
				+                content_list[index]['number'] = digital['char']
			
 
				+                content_list[index]['location'] = box_coordinate
			
 
				+                break
			
 
				+
			
 
				+    for box in content_list:
			
 
				+        box_coordinate = (box['location'][0], box['location'][1], box['location'][2], box['location'][3])
			
 
				+        mtx = []
			
 
				+        for cv_box in cv_box_list:
			
 
				+            if utils.decide_coordinate_contains(cv_box, box_coordinate):  # 若fasterrcnn未识别到选项框，单独的ABCD也舍去
			
 
				+                mtx.append(cv_box)
			
 
				+
			
 
				+        matrix = np.asarray(sorted(mtx))
			
 
				+        dif = matrix[1:, 0] - matrix[:-1, 2]  # 后一个char的left与起一个char的right的差
			
 
				+        dif[dif < 0] = 0
			
 
				+        dif_length = np.mean(dif)  # 小于平均间隔的合并
			
 
				+        block_list = utils.box_by_x_intervel(matrix, dif_length)
			
 
				+        # block_list = utils.box_by_x_intervel(matrix, 5)
			
 
				+        box['abcd'] = block_list
			
 
				+
			
 
				+    return content_list
			
 
				+
			
 
				+
			
 
				+def choice(left, top, image, choice_bbox_list, xml_path):
			
 
				+    a_z = '_ABCDEFGHIJKLMTUNOPQRSVWXYZ'
			
 
				+    t1 = time.time()
			
 
				+    word_result_list0 = get_ocr_text_and_coordinate(image, ocr_accuracy='accurate', language_type='ENG')
			
 
				+    t2 = time.time()
			
 
				+    print('choice ocr time cost: ', t2 - t1)
			
 
				+    # print(word_result_list0)
			
 
				+
			
 
				+    # try:
			
 
				+    #     intervel_x = get_interval(word_result_list0)
			
 
				+    # except Exception:
			
 
				+    #     intervel_x = 15
			
 
				+    intervel_x = 3
			
 
				+    img = preprocess(image, intervel_x, 3)
			
 
				+    cv_box_list0 = box_coordinates(img)
			
 
				+
			
 
				+    content_list = get_choice_box_coordinate(word_result_list0, image, cv_box_list0, choice_bbox_list)
			
 
				+
			
 
				+    tree = ET.parse(xml_path)  # xml tree
			
 
				+
			
 
				+    w = content_list[0]['location'][2] - content_list[0]['location'][0]
			
 
				+    h = content_list[0]['location'][3] - content_list[0]['location'][1]
			
 
				+
			
 
				+    def xml(xml_tree, sorted_abcd_list, bias=0):
			
 
				+        ii = 0
			
 
				+        for i, choice_bbox in enumerate(sorted_abcd_list):
			
 
				+            area = (choice_bbox[2] - choice_bbox[0]) * (choice_bbox[3] - choice_bbox[1])
			
 
				+            if area > 400:
			
 
				+                name = '{:02d}_{}'.format(int(choice['number']), a_z[ii + bias])
			
 
				+                xml_tree = utils.create_xml(name, xml_tree,
			
 
				+                                            choice_bbox[0] + left, choice_bbox[1] + top, choice_bbox[2] + left,
			
 
				+                                            choice_bbox[3] + top)
			
 
				+                ii += 1
			
 
				+        return xml_tree
			
 
				+
			
 
				+    def get_json(ajson_list, sorted_abcd_list, bias=0):
			
 
				+        ii = 0
			
 
				+        for i, choice_bbox in enumerate(sorted_abcd_list):
			
 
				+            area = (choice_bbox[2] - choice_bbox[0]) * (choice_bbox[3] - choice_bbox[1])
			
 
				+            if area > 400:
			
 
				+                name = '{:02d}_{}'.format(int(choice['number']), a_z[ii + bias])
			
 
				+                region = [choice_bbox[0] + left, choice_bbox[1] + top, choice_bbox[2] + left, choice_bbox[3] + top]
			
 
				+                ajson_list.append({'number': name, 'region': region})
			
 
				+                ii += 1
			
 
				+        return ajson_list
			
 
				+
			
 
				+    json_list = []
			
 
				+    for index_num, choice in enumerate(content_list):
			
 
				+        abcd = choice['abcd']
			
 
				+        if int(choice['number']) == 99:
			
 
				+            if w >= h:
			
 
				+                tree = xml(tree, sorted(abcd))
			
 
				+                json_list = get_json(json_list, sorted(abcd))
			
 
				+
			
 
				+            else:
			
 
				+                tree = xml(tree, sorted(abcd, key=lambda x: (x[1], x[0])))
			
 
				+                json_list = get_json(json_list, sorted(abcd, key=lambda x: (x[1], x[0])))
			
 
				+
			
 
				+        else:
			
 
				+            if w >= h:
			
 
				+                tree = xml(tree, sorted(abcd), bias=1)
			
 
				+                json_list = get_json(json_list, sorted(abcd), bias=1)
			
 
				+
			
 
				+            else:
			
 
				+                tree = xml(tree, sorted(abcd, key=lambda x: (x[1], x[0])), bias=1)
			
 
				+                json_list = get_json(json_list, sorted(abcd, key=lambda x: (x[1], x[0])), bias=1)
			
 
				+
			
 
				+    tree.write(xml_path)
			
 
				+    return json_list
			
 
				+
			
 
				+
			
 
				+def get_number_by_enlarge_choice_m(image, choice_m_region_list, xml_path):
			
 
				+    a_z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
			
 
				+
			
 
				+    choice_m_dict_list = []  # choice_m region with same index
			
 
				+    choice_m_enlarge = []
			
 
				+    left, top, right, bottom = 9999, 9999, 0, 0
			
 
				+    for _, box in enumerate(choice_m_region_list):
			
 
				+        box = box['bounding_box']
			
 
				+        m_left, m_top = box['xmin'], box['ymin'],
			
 
				+        width, height = box['xmax'] - box['xmin'], box['ymax'] - box['ymin']
			
 
				+
			
 
				+        box_coordinate = (m_left, m_top, box['xmax'], box['ymax'])
			
 
				+        single_choice_m = utils.crop_region_direct(image, box_coordinate)
			
 
				+        row_col_dict = get_choice_m_row_and_col(m_left, m_top, single_choice_m)
			
 
				+        choice_m_dict_list.append(row_col_dict)
			
 
				+
			
 
				+        box_coordinate_enlarge = (
			
 
				+            m_left - int(width / 2), m_top - int(height / 2), box['xmax'], box['ymax'])  # 扩大的choice_m， 多个分散choice_m
			
 
				+        choice_m_enlarge.append(box_coordinate_enlarge)
			
 
				+        left = min(left, box_coordinate_enlarge[0])
			
 
				+        top = min(top, box_coordinate_enlarge[1])
			
 
				+        right = max(right, box_coordinate_enlarge[2])
			
 
				+        bottom = max(bottom, box_coordinate_enlarge[3])
			
 
				+
			
 
				+    choice_whole_region = utils.crop_region_direct(image, (left, top, right, bottom))
			
 
				+    # cv2.imwrite(r'C:\Users\Administrator\Desktop\test\sheet\choice_enlarge.jpg', choice_whole_region)
			
 
				+    # cv2.imshow('img', choice_whole_region)
			
 
				+    # cv2.waitKey(0)
			
 
				+    # cv2.destroyAllWindows()
			
 
				+
			
 
				+    choice_region_text = get_ocr_text_and_coordinate(choice_whole_region)
			
 
				+    all_digital_list = []
			
 
				+    pattern = re.compile(r'\d')
			
 
				+    for i, chars_dict in enumerate(choice_region_text):
			
 
				+        chars_list = chars_dict['chars']
			
 
				+        for ele in chars_list:
			
 
				+            if pattern.search(ele['char']):
			
 
				+                all_digital_list.append(ele)
			
 
				+
			
 
				+    combined_digital_list = utils.combine_char(all_digital_list)
			
 
				+    direction_list = []
			
 
				+    for index, enlarge_box in enumerate(choice_m_enlarge):
			
 
				+        digital_list = []
			
 
				+        xmin, ymin, xmax, ymax = 9999, 9999, 0, 0
			
 
				+
			
 
				+        choice_m_dict = choice_m_dict_list[index]
			
 
				+        choice_m_dict_box = (choice_m_dict['bounding_box']['xmin'], choice_m_dict['bounding_box']['ymin'],
			
 
				+                             choice_m_dict['bounding_box']['xmax'], choice_m_dict['bounding_box']['ymax'],)
			
 
				+
			
 
				+        for jndex, digital_box in enumerate(combined_digital_list):
			
 
				+            digital_coordinate = (digital_box['location']['left'] + left,
			
 
				+                                  digital_box['location']['top'] + top,
			
 
				+                                  digital_box['location']['left'] + digital_box['location']['width'] + left,
			
 
				+                                  digital_box['location']['top'] + digital_box['location']['height'] + top)
			
 
				+            digital_box.update({'coordinate': digital_coordinate})
			
 
				+            if (utils.decide_coordinate_contains(digital_coordinate, enlarge_box)) and not \
			
 
				+                    (utils.decide_coordinate_contains(digital_coordinate, choice_m_dict_box)):
			
 
				+                digital_list.append(digital_box)
			
 
				+                xmin = min(xmin, digital_box['coordinate'][0])
			
 
				+                ymin = min(ymin, digital_box['coordinate'][1])
			
 
				+                xmax = max(xmax, digital_box['coordinate'][2])
			
 
				+                ymax = max(ymax, digital_box['coordinate'][3])
			
 
				+
			
 
				+        digital_list_coordinate = (xmin, ymin, xmax, ymax)
			
 
				+
			
 
				+        direction = utils.decide_choice_m_left_top(digital_list_coordinate, choice_m_dict_box)
			
 
				+        if int(direction):
			
 
				+            choice_m_dict['direction'] = direction
			
 
				+            direction_list.append(direction)
			
 
				+            if direction == '180':  # 数字垂直排列
			
 
				+                std_num_length = choice_m_dict['rows']
			
 
				+                choice_option = a_z[:choice_m_dict['cols']].replace('', ',')[1:-1]
			
 
				+                default_points = [-1] * std_num_length
			
 
				+                choice_m_dict.update({'option': choice_option, 'default_points': default_points})
			
 
				+
			
 
				+                sorted(digital_list, key=lambda k: k.get('coordinate')[1])
			
 
				+                choice_ymin = choice_m_dict['bounding_box']['ymin']
			
 
				+                single_height = choice_m_dict['single_height']
			
 
				+                mean_interval = ((choice_m_dict['bounding_box']['ymax'] - choice_m_dict['bounding_box']['ymin'])
			
 
				+                                 - single_height * std_num_length) / (std_num_length - 1)
			
 
				+                spilt_index = [choice_ymin - mean_interval / 2 + (single_height + mean_interval) * ele for ele in
			
 
				+                               range(std_num_length + 1)]
			
 
				+
			
 
				+                number_list = [-1] * std_num_length
			
 
				+                number_location = [(-1, -1, -1, -1)] * std_num_length
			
 
				+                for i in range(0, len(spilt_index) - 1):
			
 
				+                    start = spilt_index[i]
			
 
				+                    end = spilt_index[i + 1]
			
 
				+                    number_location[i] = (xmin, start, xmax, end)
			
 
				+                    for digital_coordinate in digital_list:
			
 
				+                        middle_y = (digital_coordinate['coordinate'][3] - digital_coordinate['coordinate'][1]) / 2 + \
			
 
				+                                   digital_coordinate['coordinate'][1]
			
 
				+                        middle_x = (digital_coordinate['coordinate'][2] - digital_coordinate['coordinate'][0]) / 2 + \
			
 
				+                                   digital_coordinate['coordinate'][0]
			
 
				+                        if (start <= middle_y <= end
			
 
				+                                and
			
 
				+                                middle_x < choice_m_dict['bounding_box']['xmin']):  # 数字在choice_m外侧
			
 
				+                            number_list[i] = int(digital_coordinate['char'])
			
 
				+                            number_location[i] = digital_coordinate['coordinate']
			
 
				+
			
 
				+                number_list = _infer_number(number_list)
			
 
				+                choice_m_dict['number'] = _infer_number(number_list)
			
 
				+                # choice_m_dict['number'] = [{'number': number,
			
 
				+                #                             'location': {'xmin': xi, 'ymin': yi, 'xmax': xm, 'ymax': ym}}
			
 
				+                #                            for number in number_list
			
 
				+                #                            for (xi, yi, xm, ym) in number_location]
			
 
				+
			
 
				+            if direction == '90':  # 数字水平排列
			
 
				+                std_num_length = choice_m_dict['cols']
			
 
				+                choice_option = a_z[:std_num_length].replace('', ',')[1:-1]
			
 
				+                default_points = [-1] * std_num_length
			
 
				+                choice_m_dict.update({'option': choice_option, 'default_points': default_points})
			
 
				+
			
 
				+                sorted(digital_list, key=lambda k: k.get('coordinate')[0])
			
 
				+                choice_xmin = choice_m_dict['bounding_box']['ymin']
			
 
				+                single_width = choice_m_dict['single_width']
			
 
				+                mean_interval = ((choice_m_dict['bounding_box']['xmax'] - choice_m_dict['bounding_box']['xmin'])
			
 
				+                                 - single_width * std_num_length) / (std_num_length - 1)
			
 
				+                spilt_index = [choice_xmin - mean_interval / 2 + (single_width + mean_interval) * ele for ele in
			
 
				+                               range(std_num_length)]
			
 
				+                number_list = [-1] * std_num_length
			
 
				+                number_location = [(-1, -1, -1, -1)] * std_num_length
			
 
				+                for i in range(0, len(spilt_index) - 1):
			
 
				+                    start = spilt_index[i]
			
 
				+                    end = spilt_index[i + 1]
			
 
				+                    number_location[i] = (start, ymin, end, ymax)
			
 
				+                    for digital_coordinate in digital_list:
			
 
				+                        middle_y = (digital_coordinate['coordinate'][3] - digital_coordinate['coordinate'][1]) / 2 + \
			
 
				+                                   digital_coordinate['coordinate'][1]
			
 
				+                        middle_x = (digital_coordinate['coordinate'][2] - digital_coordinate['coordinate'][0]) / 2 + \
			
 
				+                                   digital_coordinate['coordinate'][0]
			
 
				+                        if start <= middle_x <= end and middle_y < choice_m_dict['bounding_box']['ymin']:
			
 
				+                            number_list[i] = int(digital_coordinate['char'])
			
 
				+                            number_location[i] = digital_coordinate['coordinate']
			
 
				+
			
 
				+                number_list = _infer_number(number_list)
			
 
				+                choice_m_dict['number'] = _infer_number(number_list)
			
 
				+
			
 
				+                # choice_m_dict['number'] = [{'number': number,
			
 
				+                #                             'location': {'xmin': xi, 'ymin': yi, 'xmax': xm, 'ymax': ym}}
			
 
				+                #                            for number in number_list
			
 
				+                #                            for (xi, yi, xm, ym) in number_location]
			
 
				+
			
 
				+        else:
			
 
				+            choice_m_dict['direction'] = '0'
			
 
				+            choice_m_dict['number'] = [-1]
			
 
				+            choice_m_dict['default_points'] = [-1]
			
 
				+
			
 
				+    count180 = ','.join(direction_list).count('180')
			
 
				+    count90 = ','.join(direction_list).count('90')
			
 
				+
			
 
				+    infer_direction = ['180', '90'][[count180, count90].index(max(count180, count90))]
			
 
				+    for ele in choice_m_dict_list:
			
 
				+        if ele['direction'] != '0':
			
 
				+            ele.update({'direction': infer_direction})
			
 
				+
			
 
				+    # tree = ET.parse(xml_path)  # xml tree
			
 
				+    # for index_num, choice_box in enumerate(choice_m_dict_list):
			
 
				+    #     if len(choice_box['bounding_box']) > 0:
			
 
				+    #         abcd = choice_box['bounding_box']
			
 
				+    #         number = str(choice_box['number'])
			
 
				+    #         name = '{}_{}*{}_{}_{}'.format('choice_m', choice_box['rows'],
			
 
				+    #                                        choice_box['cols'], choice_box['direction'],
			
 
				+    #                                        number)
			
 
				+    #         tree = utils.create_xml(name, tree,
			
 
				+    #                                 abcd['xmin'], abcd['ymin'],
			
 
				+    #                                 abcd['xmax'], abcd['ymax'])
			
 
				+    #
			
 
				+    # tree.write(xml_path)
			
 
				+    return choice_m_dict_list
			
 
				+
			
 
				+
			
 
				+def _infer_number(number_list):
			
 
				+    if -1 not in number_list or sum(number_list) == -1 * len(number_list):
			
 
				+        return number_list
			
 
				+    else:
			
 
				+        for n_index in range(0, len(number_list) - 1):
			
 
				+            if n_index == 0:
			
 
				+                if number_list[n_index] != -1:
			
 
				+
			
 
				+                    if len(number_list) > 1 and number_list[n_index + 1] == -1:
			
 
				+                        number_list[n_index + 1] = number_list[n_index] + 1
			
 
				+
			
 
				+            if number_list[n_index] != -1:
			
 
				+                if number_list[n_index - 1] == -1:
			
 
				+                    number_list[n_index - 1] = number_list[n_index] - 1
			
 
				+                if number_list[n_index + 1] == -1:
			
 
				+                    number_list[n_index + 1] = number_list[n_index] + 1
			
 
				+        return _infer_number(number_list)
			
--- a/segment/sheet_resolve/analysis/choice/choice_line_box.py
+++ b/segment/sheet_resolve/analysis/choice/choice_line_box.py
--- a/segment/sheet_resolve/analysis/choice/choice_m_row_column.py
+++ b/segment/sheet_resolve/analysis/choice/choice_m_row_column.py
@@ -0,0 +1,211 @@
 
				+# @Author  : liu fan
			
 
				+import numpy as np
			
 
				+import tensorflow as tf
			
 
				+
			
 
				+from segment.sheet_resolve.lib.ssd_model.utils import label_map_util, ops as utils_ops
			
 
				+from segment.sheet_resolve.tools import tf_settings
			
 
				+
			
 
				+from segment.sheet_resolve.tools.tf_sess import SsdSess
			
 
				+from PIL import Image
			
 
				+
			
 
				+tf_sess_dict = {
			
 
				+    'choice_ssd': SsdSess('choice_ssd'),
			
 
				+}
			
 
				+
			
 
				+choice_ssd_sess = tf_sess_dict['choice_ssd']
			
 
				+sess = choice_ssd_sess.sess
			
 
				+detection_graph = choice_ssd_sess.graph
			
 
				+
			
 
				+
			
 
				+def load_image_into_numpy_array(image):
			
 
				+    # print(image)
			
 
				+    image = image.convert('RGB')
			
 
				+    (im_width, im_height) = image.size
			
 
				+    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
			
 
				+
			
 
				+
			
 
				+def run_inference_for_single_image(image):
			
 
				+    ops = detection_graph.get_operations()
			
 
				+    all_tensor_names = {output.name for op in ops for output in op.outputs}
			
 
				+    tensor_dict = {}
			
 
				+    for key in [
			
 
				+        'num_detections', 'detection_boxes', 'detection_scores',
			
 
				+        'detection_classes', 'detection_masks'
			
 
				+    ]:
			
 
				+        tensor_name = key + ':0'
			
 
				+        if tensor_name in all_tensor_names:
			
 
				+            tensor_dict[key] = detection_graph.get_tensor_by_name(
			
 
				+                tensor_name)
			
 
				+    if 'detection_masks' in tensor_dict:
			
 
				+        # The following processing is only for single image
			
 
				+        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
			
 
				+        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
			
 
				+        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
			
 
				+        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
			
 
				+        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
			
 
				+        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
			
 
				+        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
			
 
				+            detection_masks, detection_boxes, image.shape[0], image.shape[1])
			
 
				+        detection_masks_reframed = tf.cast(
			
 
				+            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
			
 
				+        # Follow the convention by adding back the batch dimension
			
 
				+        tensor_dict['detection_masks'] = tf.expand_dims(
			
 
				+            detection_masks_reframed, 0)
			
 
				+    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
			
 
				+
			
 
				+    # Run inference
			
 
				+    # start = time.time()
			
 
				+    output_dict = sess.run(tensor_dict,
			
 
				+                           feed_dict={image_tensor: np.expand_dims(image, 0)})
			
 
				+    # print(time.time()-start)
			
 
				+    # all outputs are float32 numpy arrays, so convert types as appropriate
			
 
				+    output_dict['num_detections'] = int(output_dict['num_detections'][0])
			
 
				+    output_dict['detection_classes'] = output_dict[
			
 
				+        'detection_classes'][0].astype(np.uint8)
			
 
				+    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
			
 
				+    output_dict['detection_scores'] = output_dict['detection_scores'][0]
			
 
				+    if 'detection_masks' in output_dict:
			
 
				+        output_dict['detection_masks'] = output_dict['detection_masks'][0]
			
 
				+    return output_dict
			
 
				+
			
 
				+
			
 
				+def image_detect(image_np, category, score_threshold):
			
 
				+    image_np = load_image_into_numpy_array(image_np)
			
 
				+    detections = []
			
 
				+    w, h = image_np.shape[1], image_np.shape[0]
			
 
				+    with tf.device("/device:GPU:{}".format(0)):
			
 
				+        output_dict = run_inference_for_single_image(image_np)
			
 
				+    boxes = output_dict['detection_boxes']
			
 
				+    scores = output_dict['detection_scores']
			
 
				+    labels = output_dict['detection_classes']
			
 
				+    indices = np.where(scores > score_threshold)
			
 
				+    image_scores = scores[indices]
			
 
				+    image_boxes = boxes[indices]
			
 
				+    image_labels = labels[indices]
			
 
				+    image_detections = np.concatenate(
			
 
				+        [image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
			
 
				+    for detection in image_detections:
			
 
				+        y0 = int(detection[0] * h)
			
 
				+        x0 = int(detection[1] * w)
			
 
				+        y1 = int(detection[2] * h)
			
 
				+        x1 = int(detection[3] * w)
			
 
				+        label_index = int(detection[5])
			
 
				+        label_name = category[label_index]['name']
			
 
				+        detections.append((x0, y0, x1, y1, label_index, detection[4], label_name))
			
 
				+    return detections
			
 
				+
			
 
				+
			
 
				+def get_choice_m_row_and_col(left, top, image):
			
 
				+    im_resize = 300
			
 
				+    ''' choice_m resize to 300*300'''
			
 
				+    image_src = Image.fromarray(image)
			
 
				+    if image_src.mode == 'RGB':
			
 
				+        image_src = image_src.convert("L")
			
 
				+    w, h = image_src.size
			
 
				+    if h > w:
			
 
				+        image_src = image_src.resize((int(im_resize / h * w), im_resize))
			
 
				+    else:
			
 
				+        image_src = image_src.resize((im_resize, int(im_resize / w * h)))
			
 
				+    w_, h_ = image_src.size
			
 
				+    image_300 = Image.new(image_src.mode, (im_resize, im_resize), (255))
			
 
				+    image_300.paste(image_src, [0, 0, w_, h_])
			
 
				+
			
 
				+    category_index = label_map_util.create_category_index_from_labelmap(tf_settings.choice_m_ssd_label,
			
 
				+                                                                        use_display_name=True)
			
 
				+    detections = image_detect(image_300, category_index, 0.5)
			
 
				+    if len(detections) > 1:
			
 
				+        box_xmin = []
			
 
				+        box_ymin = []
			
 
				+        box_xmax = []
			
 
				+        box_ymax = []
			
 
				+        x_distance_all = []
			
 
				+        y_distance_all = []
			
 
				+        x_width_all = []
			
 
				+        y_height_all = []
			
 
				+        all_small_coordinate = []
			
 
				+        ssd_column = 1
			
 
				+        ssd_row = 1
			
 
				+        count_x = 0
			
 
				+        count_y = 0
			
 
				+        for index, box in enumerate(detections):
			
 
				+            if box[-1] != 'T' and box[2] <= w_ and box[3] <= h_:
			
 
				+                box0 = round(box[0] * (w / w_))  # Map to the original image
			
 
				+                box1 = round(box[1] * (h / h_))
			
 
				+                box2 = round(box[2] * (w / w_))
			
 
				+                box3 = round(box[3] * (h / h_))
			
 
				+                box_xmin.append(box0)
			
 
				+                box_ymin.append(box1)
			
 
				+                box_xmax.append(box2)
			
 
				+                box_ymax.append(box3)
			
 
				+                small_coordinate = {'xmin': box0 + left,
			
 
				+                                    'ymin': box1 + top,
			
 
				+                                    'xmax': box2 + left,
			
 
				+                                    'ymax': box3 + top}
			
 
				+                all_small_coordinate.append(small_coordinate)
			
 
				+                x_width = box2 - box0
			
 
				+                y_height = box3 - box1
			
 
				+                x_width_all.append(x_width)
			
 
				+                y_height_all.append(y_height)
			
 
				+
			
 
				+        sorted_xmin = sorted(box_xmin)
			
 
				+        sorted_ymin = sorted(box_ymin)
			
 
				+        sorted_xmax = sorted(box_xmax)
			
 
				+        sorted_ymax = sorted(box_ymax)
			
 
				+
			
 
				+        x_width_all_sorted = sorted(x_width_all, reverse=True)
			
 
				+        y_height_all_sorted = sorted(y_height_all, reverse=True)
			
 
				+        len_x = len(x_width_all)
			
 
				+        len_y = len(y_height_all)
			
 
				+        x_width_median = np.median(x_width_all_sorted)
			
 
				+        y_height_median = np.median(y_height_all_sorted)
			
 
				+
			
 
				+        for i in range(len(sorted_xmin) - 1):
			
 
				+            x_distance = abs(sorted_xmin[i + 1] - sorted_xmin[i])
			
 
				+            y_distance = abs(sorted_ymin[i + 1] - sorted_ymin[i])
			
 
				+            if x_distance > 20:
			
 
				+                ssd_column = ssd_column + 1
			
 
				+                x_distance_all.append(x_distance)
			
 
				+                if x_distance > 2 * x_width_median + 4:
			
 
				+                    count_x = count_x + 1
			
 
				+            if y_distance > 10:
			
 
				+                ssd_row = ssd_row + 1
			
 
				+                y_distance_all.append(y_distance)
			
 
				+                if y_distance > 2 * y_height_median + 3:
			
 
				+                    count_y = count_y + 1
			
 
				+            if x_width_all_sorted[i] - x_width_median > 40:
			
 
				+                ssd_column = ssd_column - 1
			
 
				+            elif x_width_median - x_width_all_sorted[i] > 40:
			
 
				+                ssd_column = ssd_column - 1
			
 
				+            if y_height_all_sorted[i] - y_height_median > 20:
			
 
				+                ssd_row = ssd_row - 1
			
 
				+            elif y_height_median - y_height_all_sorted[i] > 20:
			
 
				+                ssd_row = ssd_row - 1
			
 
				+
			
 
				+        if count_x < len(x_distance_all) / 2 + 1:
			
 
				+            ssd_column = ssd_column + count_x
			
 
				+        elif count_y < len(y_distance_all) / 2 + 1:
			
 
				+            ssd_row = ssd_row + count_y
			
 
				+
			
 
				+        average_height = int(np.mean(y_height_all))
			
 
				+        average_width = int(np.mean(x_width_all))
			
 
				+
			
 
				+        # average_height = format(np.mean(y_height_all), '.2f')
			
 
				+        # average_width = format(np.mean(x_width_all), '.2f')
			
 
				+        # average_height = int(np.mean(y_distance_all))
			
 
				+        # average_width = int(np.mean(x_distance_all))
			
 
				+        location_ssd = {'xmin': sorted_xmin[0] + left,
			
 
				+                        'ymin': sorted_ymin[0] + top,
			
 
				+                        'xmax': sorted_xmax[-1] + left,
			
 
				+                        'ymax': sorted_ymax[-1] + top}
			
 
				+
			
 
				+        choice_m_ssd = {'bounding_box': location_ssd,
			
 
				+                        "single_height": average_height,
			
 
				+                        "single_width": average_width,
			
 
				+                        "rows": ssd_row,
			
 
				+                        "cols": ssd_column,
			
 
				+                        'class_name': 'choice_m',
			
 
				+                        'all_small_coordinate': all_small_coordinate
			
 
				+                        }
			
 
				+    else:
			
 
				+        choice_m_ssd = {}
			
 
				+    return choice_m_ssd
			
--- a/segment/sheet_resolve/analysis/choice/get_title_number_by_choice_m.py
+++ b/segment/sheet_resolve/analysis/choice/get_title_number_by_choice_m.py
@@ -0,0 +1,496 @@
 
				+from segment.sheet_resolve.tools import utils
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate, get_ocr_text_and_coordinate0
			
 
				+import numpy as np
			
 
				+import re, os
			
 
				+import xml.etree.cElementTree as ET
			
 
				+import cv2
			
 
				+
			
 
				+
			
 
				+def combine_char(all_digital_list):
			
 
				+    new_all_digital_list = []
			
 
				+    i = 1
			
 
				+    while i <= len(all_digital_list):
			
 
				+        pre_one = all_digital_list[i - 1]
			
 
				+        if i == len(all_digital_list):
			
 
				+            new_all_digital_list.append(pre_one)
			
 
				+            break
			
 
				+        rear_one = all_digital_list[i]
			
 
				+        condition1 = abs(pre_one['location']['top'] - rear_one['location']['top']) < pre_one['location'][
			
 
				+            'height']  # 两字高度差小于一字高度
			
 
				+        condition2 = pre_one['location']['left'] + 1.8 * pre_one['location']['width'] > rear_one['location'][
			
 
				+            'left']  # 某字宽度的2倍大于两字间间隔
			
 
				+        if condition1:
			
 
				+            if condition2:
			
 
				+                new_char_list = [pre_one, rear_one]
			
 
				+                new_char_list = sorted(new_char_list, key=lambda k: k.get('location')['left'])
			
 
				+                pre_one = new_char_list[0]
			
 
				+                rear_one = new_char_list[1]
			
 
				+                new_char = pre_one['char'] + rear_one['char']
			
 
				+                new_location = {'left': pre_one['location']['left'],
			
 
				+                                'top': min(pre_one['location']['top'], rear_one['location']['top']),
			
 
				+                                'width': rear_one['location']['left'] + rear_one['location']['width'] -
			
 
				+                                         pre_one['location']['left'],
			
 
				+                                'height': max(pre_one['location']['height'], rear_one['location']['height'])}
			
 
				+                new_all_digital_list.append({'char': new_char, 'location': new_location})
			
 
				+                i = i + 1 + 1
			
 
				+            else:
			
 
				+                new_all_digital_list.append(pre_one)
			
 
				+                i = i + 1
			
 
				+        else:
			
 
				+            new_all_digital_list.append(pre_one)  # 遇到字符y轴相差过大就结束
			
 
				+            i = i + 1
			
 
				+    return new_all_digital_list
			
 
				+
			
 
				+
			
 
				+def get_x_diff_and_y_diff0(single_choice_m_coordinates):
			
 
				+    single_choice_m_matrix = np.array(single_choice_m_coordinates)
			
 
				+    x_diff = single_choice_m_matrix[1:, 0] - single_choice_m_matrix[:-1, 2]
			
 
				+    x_diff_ = [ele for ele in x_diff.tolist() if ele < 0]
			
 
				+    xx = [ele for ele in x_diff.tolist() if ele not in x_diff_]
			
 
				+    x_interval = int(np.mean(xx))
			
 
				+    return x_interval
			
 
				+
			
 
				+
			
 
				+def get_x_diff_and_y_diff(single_choice_m_coordinates):
			
 
				+    single_choice_m_matrix = np.array(single_choice_m_coordinates)
			
 
				+    x_diff = single_choice_m_matrix[1:, 0] - single_choice_m_matrix[:-1, 2]
			
 
				+    x_diff_ = [ele for ele in x_diff.tolist() if ele < 0]
			
 
				+    xx = [ele for ele in x_diff.tolist() if ele not in x_diff_]
			
 
				+    x_dif_length = int(np.mean(xx))
			
 
				+
			
 
				+    yy_diff = single_choice_m_matrix[1:, 1] - single_choice_m_matrix[:-1, 3]
			
 
				+    y_diff_ = [ele for ele in yy_diff.tolist() if ele < 0]
			
 
				+    yy = [ele for ele in yy_diff.tolist() if ele not in y_diff_]
			
 
				+    y_dif_length = int(np.mean(yy))
			
 
				+    x_y_interval = (x_dif_length, y_dif_length)
			
 
				+    return x_y_interval
			
 
				+
			
 
				+
			
 
				+def choice_bbox_vague(choice_m_absolute_box, x_y_interval, single_width, single_height, direction):
			
 
				+    xmin0 = [ele[0] for ele in choice_m_absolute_box]
			
 
				+    ymin0 = [ele[1] for ele in choice_m_absolute_box]
			
 
				+    xmax0 = [ele[2] for ele in choice_m_absolute_box]
			
 
				+    ymax0 = [ele[3] for ele in choice_m_absolute_box]
			
 
				+
			
 
				+    if direction == 180:
			
 
				+        x_diff = x_y_interval[0]
			
 
				+        s_width = single_height
			
 
				+        choice_bbox = (np.hstack((np.array([min(xmin0) - x_diff - s_width, min(ymin0)]), np.array([max(xmax0), max(ymax0)])))).tolist()
			
 
				+        return choice_bbox
			
 
				+    elif direction == 90:
			
 
				+        y_diff = x_y_interval[1]
			
 
				+        s_height = single_width
			
 
				+        choice_bbox = (np.hstack((np.array([min(xmin0), min(ymin0) - y_diff - s_height]), np.array([max(xmax0), max(ymax0)])))).tolist()
			
 
				+        return choice_bbox
			
 
				+
			
 
				+
			
 
				+def get_digital_near_choice_m_box(new_all_digital_list, choice_m_new_bbox, x_y_interval_ave, singe_box_width_height_ave, direction):
			
 
				+    digital_list_by_choice_m = []
			
 
				+
			
 
				+    for i, c_ele in enumerate(choice_m_new_bbox):
			
 
				+        c_box = c_ele['bounding_box']
			
 
				+        title_number_list = []
			
 
				+        title_number_dict = {}
			
 
				+        for j, d_ele in enumerate(new_all_digital_list):
			
 
				+            d_location = d_ele['location']
			
 
				+            if direction == 180:
			
 
				+                if utils.decide_coordinate_left(d_location, c_box, x_y_interval_ave, singe_box_width_height_ave) == True:
			
 
				+                    title_number_list.append(d_ele)
			
 
				+            elif direction == 90:
			
 
				+                if utils.decide_coordinate_top(d_location, c_box, x_y_interval_ave, singe_box_width_height_ave) == True:
			
 
				+                    title_number_list.append(d_ele)
			
 
				+
			
 
				+        title_number_dict['bounding_box'] = c_box
			
 
				+        title_number_dict['title_number'] = title_number_list
			
 
				+
			
 
				+        digital_list_by_choice_m.append(title_number_dict)
			
 
				+
			
 
				+    return digital_list_by_choice_m
			
 
				+
			
 
				+
			
 
				+def move_intersect_box(all_small_coordinate_list):
			
 
				+    all_small_coordinate_list = sorted(all_small_coordinate_list, key=lambda k: k[0])
			
 
				+    all_small_coordinate_list_temp = all_small_coordinate_list.copy()
			
 
				+
			
 
				+    del_list = []
			
 
				+    new_box = []
			
 
				+    for i, outer in enumerate(all_small_coordinate_list_temp):
			
 
				+        for j, inner in enumerate(all_small_coordinate_list_temp):
			
 
				+            if i == j:
			
 
				+                continue
			
 
				+            else:
			
 
				+                if utils.get_min_distance(inner, outer) == 'i':
			
 
				+                    inner_outer_list = [inner, outer]
			
 
				+                    inner_outer_list = sorted(inner_outer_list, key=lambda k: k[0])
			
 
				+
			
 
				+                    if abs(inner_outer_list[0][2] - inner_outer_list[1][0]) > int(inner[2] - inner[0]) // 4:
			
 
				+                        del_list.append(inner)
			
 
				+                        del_list.append(outer)
			
 
				+                        new_box_xmin = (inner[0] + outer[0]) // 2
			
 
				+                        new_box_ymin = (inner[1] + outer[1]) // 2
			
 
				+                        new_box_xmax = (inner[2] + outer[2]) // 2
			
 
				+                        new_box_ymax = (inner[3] + outer[3]) // 2
			
 
				+                        new_box.append([new_box_xmin, new_box_ymin, new_box_xmax, new_box_ymax])
			
 
				+                    else:
			
 
				+                        continue
			
 
				+    del_list0 = [list(t) for t in set(tuple(ele) for ele in del_list)]
			
 
				+    del_list0.sort(key=del_list.index)
			
 
				+    new_list0 = [list(t) for t in set(tuple(ele) for ele in new_box)]
			
 
				+    new_list0.sort(key=new_box.index)
			
 
				+
			
 
				+    all_small_coordinate = []
			
 
				+    for ele in all_small_coordinate_list_temp:
			
 
				+        if ele in all_small_coordinate_list_temp:
			
 
				+            if ele not in del_list0:
			
 
				+                all_small_coordinate.append(ele)
			
 
				+    for ele in new_list0:
			
 
				+        all_small_coordinate.append(ele)
			
 
				+    return all_small_coordinate
			
 
				+
			
 
				+
			
 
				+def get_one_line_box(all_small_coordinate_list, height):
			
 
				+    all_small_coordinate_raw = sorted(all_small_coordinate_list, key=lambda k: k[1])
			
 
				+    all_small_coordinate_raw_array = np.array(all_small_coordinate_raw)
			
 
				+
			
 
				+    pre = all_small_coordinate_raw_array[1:, 1]
			
 
				+    rear = all_small_coordinate_raw_array[:-1, 1]
			
 
				+    y_diff = rear - pre
			
 
				+    index_list = [index for index, ele in enumerate(y_diff) if ele < 0 and abs(ele) > height // 3]
			
 
				+
			
 
				+    res_list = []
			
 
				+    split_x_index = [ele + 1 for ele in index_list]
			
 
				+    split_x_index.insert(0, 0)
			
 
				+    split_x_index.insert(-1, len(all_small_coordinate_raw))
			
 
				+    split_x_index = sorted(list(set(split_x_index)))
			
 
				+    for i, split in enumerate(split_x_index[1:]):
			
 
				+        one_line = all_small_coordinate_raw[split_x_index[i]:split_x_index[i + 1]]
			
 
				+        one_line = sorted(one_line, key=lambda k: k[0])
			
 
				+        res_list.append(one_line)
			
 
				+    return res_list
			
 
				+
			
 
				+
			
 
				+def get_one_col_box(all_small_coordinate_list, width):
			
 
				+    all_small_coordinate_raw = sorted(all_small_coordinate_list, key=lambda k: k[0])
			
 
				+    all_small_coordinate_raw_array = np.array(all_small_coordinate_raw)
			
 
				+
			
 
				+    pre = all_small_coordinate_raw_array[1:, 0]
			
 
				+    rear = all_small_coordinate_raw_array[:-1, 0]
			
 
				+    y_diff = rear - pre
			
 
				+    index_list = [index for index, ele in enumerate(y_diff) if ele < 0 and abs(ele) > width // 3]
			
 
				+
			
 
				+    res_list = []
			
 
				+    split_x_index = [ele + 1 for ele in index_list]
			
 
				+    split_x_index.insert(0, 0)
			
 
				+    split_x_index.insert(-1, len(all_small_coordinate_raw))
			
 
				+    split_x_index = sorted(list(set(split_x_index)))
			
 
				+    for i, split in enumerate(split_x_index[1:]):
			
 
				+        one_line = all_small_coordinate_raw[split_x_index[i]:split_x_index[i + 1]]
			
 
				+        one_line = sorted(one_line, key=lambda k: k[0])
			
 
				+        res_list.append(one_line)
			
 
				+    return res_list
			
 
				+
			
 
				+
			
 
				+def analysis_s_box(choice_m_bbox_list):
			
 
				+    choice_m_box_dict = []
			
 
				+    for index, s_choice_m_dict in enumerate(choice_m_bbox_list):
			
 
				+        all_small_coordinate_list0 = [[ele['xmin'], ele['ymin'], ele['xmax'], ele['ymax']] for ele in s_choice_m_dict['all_small_coordinate']]
			
 
				+        all_small_coordinate_raw0 = sorted(all_small_coordinate_list0, key=lambda k: k[1])
			
 
				+        all_small_coordinate_raw_array0 = np.array(all_small_coordinate_raw0)
			
 
				+        s_box_wid_hei = (
			
 
				+            int(np.mean(all_small_coordinate_raw_array0[:, 2])) - int(np.mean(all_small_coordinate_raw_array0[:, 0])),
			
 
				+            int(np.mean(all_small_coordinate_raw_array0[:, 3])) - int(np.mean(all_small_coordinate_raw_array0[:, 1])))
			
 
				+
			
 
				+        all_small_coordinate_list = move_intersect_box(all_small_coordinate_list0)
			
 
				+
			
 
				+        all_small_coordinate_raw = sorted(all_small_coordinate_list, key=lambda k: k[1])
			
 
				+        all_small_coordinate_raw_array = np.array(all_small_coordinate_raw)
			
 
				+
			
 
				+        pre = all_small_coordinate_raw_array[1:, 1]
			
 
				+        rear = all_small_coordinate_raw_array[:-1, 1]
			
 
				+        y_diff = rear - pre
			
 
				+        index_list = [index for index, ele in enumerate(y_diff) if ele < 0 and abs(ele) > s_box_wid_hei[1]//3]
			
 
				+
			
 
				+        res_list = []
			
 
				+        split_x_index = [ele + 1 for ele in index_list]
			
 
				+        split_x_index.insert(0, 0)
			
 
				+        split_x_index.insert(-1, len(all_small_coordinate_raw))
			
 
				+        split_x_index = sorted(list(set(split_x_index)))
			
 
				+        for i, split in enumerate(split_x_index[1:]):
			
 
				+            one_line = all_small_coordinate_raw[split_x_index[i]:split_x_index[i + 1]]
			
 
				+            one_line = sorted(one_line, key=lambda k: k[0])
			
 
				+            res_list.append(one_line)
			
 
				+
			
 
				+        one_line_list = []
			
 
				+        for index1, ele1 in enumerate(res_list):
			
 
				+            if len(ele1) == s_choice_m_dict['cols']:
			
 
				+                one_line_list.append(ele1)
			
 
				+        s_box_all = [ele0 for ele in one_line_list for ele0 in ele]
			
 
				+        if s_box_all == []:
			
 
				+            x_y_interval = int((s_choice_m_dict['single_width'] * 2) // 3)
			
 
				+        else:
			
 
				+            x_y_interval = []
			
 
				+            if len(one_line_list) > 1:
			
 
				+                x_y_interval = get_x_diff_and_y_diff0(s_box_all)
			
 
				+            elif len(one_line_list) == 1:
			
 
				+                s_box_arr = np.array(s_box_all)
			
 
				+                rear = s_box_arr[1:, 0]
			
 
				+                pre = s_box_arr[:-1, 2]
			
 
				+                x_y_interval = int(np.mean(rear - pre))
			
 
				+            elif len(one_line_list) == []:
			
 
				+                x_y_interval = get_x_diff_and_y_diff0(all_small_coordinate_raw)
			
 
				+
			
 
				+        all_small_coordinate = []
			
 
				+        res_list = sorted(res_list, key=lambda k: k[0])
			
 
				+        for index, box_list in enumerate(res_list):
			
 
				+            for s_box in box_list:
			
 
				+                all_small_coordinate.append(s_box)
			
 
				+            if len(box_list) == s_choice_m_dict['cols']:
			
 
				+                continue
			
 
				+            else:
			
 
				+                one_line_xmin = [ele[0] for ele in box_list]
			
 
				+                choice_m_xmin = s_choice_m_dict['bounding_box']['xmin']
			
 
				+                exist_index_all = []
			
 
				+                for exist_index, exist_xmin in enumerate(one_line_xmin):
			
 
				+                    if abs(choice_m_xmin - exist_xmin) <= s_box_wid_hei[0]:
			
 
				+                        exist_index_all.append(0)
			
 
				+                    else:
			
 
				+                        k = round(abs(choice_m_xmin - exist_xmin) / (s_box_wid_hei[0] + x_y_interval))
			
 
				+                        exist_index_all.append(k)
			
 
				+
			
 
				+                s_box_index_all = [ele for ele in range(s_choice_m_dict['cols'])]
			
 
				+                lack_index = [ele for ele in s_box_index_all if ele not in exist_index_all]
			
 
				+                if 0 in exist_index_all:
			
 
				+                    for lack_ele in lack_index:
			
 
				+                        xmin = box_list[0][0] + lack_ele * (s_box_wid_hei[0] + x_y_interval)
			
 
				+                        ymin = box_list[0][1]
			
 
				+                        xmax = xmin + s_box_wid_hei[0]
			
 
				+                        ymax = box_list[0][3]
			
 
				+                        all_small_coordinate.append([xmin, ymin, xmax, ymax])
			
 
				+                elif (s_choice_m_dict['cols'] - 1) in exist_index_all:
			
 
				+                    for lack_ele in lack_index:
			
 
				+                        xmin = box_list[-1][0] - (s_choice_m_dict['cols'] - 1 - lack_ele) * (s_box_wid_hei[0] + x_y_interval)
			
 
				+                        ymin = box_list[-1][1]
			
 
				+                        xmax = xmin + s_box_wid_hei[0]
			
 
				+                        ymax = box_list[-1][3]
			
 
				+                        all_small_coordinate.append([xmin, ymin, xmax, ymax])
			
 
				+                elif 1 in exist_index_all:
			
 
				+                    for lack_ele in lack_index:
			
 
				+                        if lack_ele < 1:
			
 
				+                            xmin = box_list[0][0] - (s_box_wid_hei[0] + x_y_interval)
			
 
				+                            ymin = box_list[0][1]
			
 
				+                            xmax = xmin + s_box_wid_hei[0]
			
 
				+                            ymax = box_list[0][3]
			
 
				+                            all_small_coordinate.append([xmin, ymin, xmax, ymax])
			
 
				+                        else:
			
 
				+                            xmin = box_list[0][0] + (lack_ele - 1) * (s_box_wid_hei[0] + x_y_interval)
			
 
				+                            ymin = box_list[0][1]
			
 
				+                            xmax = xmin + s_box_wid_hei[0]
			
 
				+                            ymax = box_list[0][3]
			
 
				+                            all_small_coordinate.append([xmin, ymin, xmax, ymax])
			
 
				+        all_small_coordinate0 = []
			
 
				+        for s_bbox in all_small_coordinate:
			
 
				+            location = {}
			
 
				+            location['xmin'] = s_bbox[0]
			
 
				+            location['ymin'] = s_bbox[1]
			
 
				+            location['xmax'] = s_bbox[2]
			
 
				+            location['ymax'] = s_bbox[3]
			
 
				+            all_small_coordinate0.append(location)
			
 
				+        s_choice_m_dict.update({'all_small_coordinate': all_small_coordinate0})
			
 
				+        choice_m_box_dict.append(s_choice_m_dict)
			
 
				+    return choice_m_box_dict
			
 
				+
			
 
				+
			
 
				+def get_title_number(choice_bbox, choice_region, choice_m_box_dict, direction):
			
 
				+    words_result_choice = get_ocr_text_and_coordinate0(choice_region, ocr_accuracy='accurate', language_type='CHN_ENG')
			
 
				+    all_digital_list0 = []
			
 
				+    pattern = re.compile(r'\d')
			
 
				+    for i, chars_dict in enumerate(words_result_choice):
			
 
				+        chars_list = chars_dict['chars']
			
 
				+        for ele in chars_list:
			
 
				+            if pattern.search(ele['char']):
			
 
				+                all_digital_list0.append(ele)
			
 
				+
			
 
				+    # tree = ET.parse(r'C:\Users\admin\Desktop\exam_segment_django113\segment\exam_info\000000-template.xml')  # xml tree
			
 
				+    # for index, bbox in enumerate(all_digital_list0):
			
 
				+    #     # bbox0 = region_info['bbox']
			
 
				+    #     location = bbox['location']
			
 
				+    #     xmin = location['left']
			
 
				+    #     ymin = location['top']
			
 
				+    #     xmax = location['left'] + location['width']
			
 
				+    #     ymax = location['top'] + location['height']
			
 
				+    #     tree = utils.create_xml(bbox['char'], tree, xmin, ymin, xmax, ymax)
			
 
				+    # tree.write(r'C:\Users\admin\Desktop\exam_segment_django113\segment\exam_image\sheet\arts_comprehensive\2020-02-05\choice_region_00.xml')
			
 
				+
			
 
				+    delete_list = []
			
 
				+    for ele_digtal in all_digital_list0:
			
 
				+        for ele_choice_m in choice_m_box_dict:
			
 
				+            xmin_d = ele_digtal['location']['left']
			
 
				+            ymin_d = ele_digtal['location']['top']
			
 
				+            xmax_d = ele_digtal['location']['left'] + ele_digtal['location']['width']
			
 
				+            ymax_d = ele_digtal['location']['top'] + ele_digtal['location']['height']
			
 
				+
			
 
				+            ele_digtal_bbox = [xmin_d, ymin_d, xmax_d, ymax_d]
			
 
				+
			
 
				+            ele_choice_m_bbox = [ele_choice_m['bounding_box']['xmin'], ele_choice_m['bounding_box']['ymin'],
			
 
				+                                 ele_choice_m['bounding_box']['xmax'], ele_choice_m['bounding_box']['ymax']]
			
 
				+
			
 
				+            choice_m_new_box = utils.get_img_region_box1(ele_choice_m_bbox, choice_bbox)
			
 
				+            if utils.decide_coordinate_full_contains2(choice_m_new_box, ele_digtal_bbox) == True:
			
 
				+                delete_list.append(ele_digtal)
			
 
				+
			
 
				+    all_digital_list = []
			
 
				+
			
 
				+    for ele in all_digital_list0:
			
 
				+        if ele in delete_list:
			
 
				+            continue
			
 
				+        else:
			
 
				+            all_digital_list.append(ele)
			
 
				+
			
 
				+    # new_all_digital_list = combine_char(all_digital_list)
			
 
				+    #
			
 
				+    # tree = ET.parse(r'C:\Users\admin\Desktop\exam_segment_django113\segment\exam_info\000000-template.xml')  # xml tree
			
 
				+    # for index, bbox in enumerate(new_all_digital_list):
			
 
				+    #     # bbox0 = region_info['bbox']
			
 
				+    #     location = bbox['location']
			
 
				+    #     xmin = location['left']
			
 
				+    #     ymin = location['top']
			
 
				+    #     xmax = location['left'] + location['width']
			
 
				+    #     ymax = location['top'] + location['height']
			
 
				+    #     tree = utils.create_xml(bbox['char'], tree, xmin, ymin, xmax, ymax)
			
 
				+    # tree.write(r'C:\Users\admin\Desktop\exam_segment_django113\segment\exam_image\sheet\arts_comprehensive\2020-02-05\choice_region_0.xml')
			
 
				+
			
 
				+    choice_m_box_dict_new = []
			
 
				+    x_y_interval_all = []
			
 
				+    s_box_w_h = []
			
 
				+    for index, s_choice_m_box in enumerate(choice_m_box_dict):
			
 
				+        choice_m_box = [s_choice_m_box['bounding_box']['xmin'], s_choice_m_box['bounding_box']['ymin'],
			
 
				+                        s_choice_m_box['bounding_box']['xmax'], s_choice_m_box['bounding_box']['ymax']]
			
 
				+        choice_m_new_box = utils.get_img_region_box1(choice_m_box, choice_bbox)
			
 
				+        all_small_coordinate_dict = s_choice_m_box['all_small_coordinate']
			
 
				+        all_small_coordinate_list = [[ele['xmin'], ele['ymin'], ele['xmax'], ele['ymax']] for ele in
			
 
				+                                     all_small_coordinate_dict]
			
 
				+        all_small_coordinate_new = []
			
 
				+        for s_bbox in all_small_coordinate_list:
			
 
				+            s_bbox_new = utils.get_img_region_box1(s_bbox, choice_bbox)
			
 
				+            all_small_coordinate_new.append(s_bbox_new)
			
 
				+        col = s_choice_m_box['cols']
			
 
				+        x_y_interval = utils.get_x_diff_and_y_diff1(all_small_coordinate_new, col)
			
 
				+        x_y_interval_all.append(x_y_interval)
			
 
				+
			
 
				+        all_small_coordinate_list = sorted(all_small_coordinate_new, key=lambda k: k[1])
			
 
				+        s_box_array = np.array(all_small_coordinate_list)
			
 
				+        s_box_wid_hei = (int(np.mean(s_box_array[:, 2])) - int(np.mean(s_box_array[:, 0])),
			
 
				+                         int(np.mean(s_box_array[:, 3])) - int(np.mean(s_box_array[:, 1])))
			
 
				+        s_box_w_h.append(s_box_wid_hei)
			
 
				+        s_choice_m_box.update({'bounding_box': choice_m_new_box,
			
 
				+                               'all_small_coordinate': all_small_coordinate_new,
			
 
				+                               's_box_w_h': s_box_wid_hei,
			
 
				+                               'x_y_interval': x_y_interval})
			
 
				+
			
 
				+        choice_m_box_dict_new.append(s_choice_m_box)
			
 
				+
			
 
				+    x_y_interval_arr = np.array(x_y_interval_all)
			
 
				+    x_y_interval_ave = (int(np.mean(x_y_interval_arr[:, 0])), int(np.mean(x_y_interval_arr[:, 1])))
			
 
				+
			
 
				+    s_box_w_h_arr = np.array(s_box_w_h)
			
 
				+    singe_box_width_height_ave = (int(np.mean(s_box_w_h_arr[:, 0])), int(np.mean(s_box_w_h_arr[:, 1])))
			
 
				+
			
 
				+    digital_list_by_choice_m = get_digital_near_choice_m_box(all_digital_list0, choice_m_box_dict_new, x_y_interval_ave, singe_box_width_height_ave, direction)
			
 
				+
			
 
				+    for number in digital_list_by_choice_m:
			
 
				+        title_number_list = number['title_number']
			
 
				+
			
 
				+        all_digital_list = sorted(title_number_list, key=lambda k: k.get('location')['top'])
			
 
				+        new_title_number_list = combine_char(all_digital_list)
			
 
				+        number.update({'title_number': new_title_number_list})
			
 
				+
			
 
				+    # tree = ET.parse(r'C:\Users\admin\Desktop\exam_segment_django113\segment\exam_info\000000-template.xml')  # xml tree
			
 
				+    # for index, bbox0 in enumerate(digital_list_by_choice_m):
			
 
				+    #     title_number_list = bbox0['title_number']
			
 
				+    #     for bbox in title_number_list:
			
 
				+    #         location = bbox['location']
			
 
				+    #         xmin = location['left']
			
 
				+    #         ymin = location['top']
			
 
				+    #         xmax = location['left'] + location['width']
			
 
				+    #         ymax = location['top'] + location['height']
			
 
				+    #         tree = utils.create_xml(bbox['char'], tree, xmin, ymin, xmax, ymax)
			
 
				+    # tree.write(r'C:\Users\admin\Desktop\exam_segment_django113\segment\exam_image\sheet\arts_comprehensive\2020-02-05\choice_region_0.xml')
			
 
				+
			
 
				+
			
 
				+    digital_list_by_choice_m = sorted(digital_list_by_choice_m, key=lambda k: k.get('bounding_box')[1])
			
 
				+    digital_list_by_choice_m = sorted(digital_list_by_choice_m, key=lambda k: k.get('bounding_box')[0])
			
 
				+
			
 
				+    all_list = []
			
 
				+    for index0, ele0 in enumerate(choice_m_box_dict_new):
			
 
				+        for index1, ele1 in enumerate(digital_list_by_choice_m):
			
 
				+            choice_m0 = ele0['bounding_box']
			
 
				+            choice_m1 = ele1['bounding_box']
			
 
				+            if choice_m0 == choice_m1:
			
 
				+                ele0.update({'title_number': ele1['title_number']})
			
 
				+                all_list.append(ele0)
			
 
				+
			
 
				+    all_list_new = []
			
 
				+    a_z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
			
 
				+    for choice_m_index, choice_m_s in enumerate(all_list):
			
 
				+        # if direction == 180:     # 题号vertical
			
 
				+        row_and_col = (choice_m_s['rows'], choice_m_s['cols'])
			
 
				+        title_number = choice_m_s['title_number']
			
 
				+        x_y_interval = choice_m_s['x_y_interval']
			
 
				+        single_bbox_width_height = choice_m_s['s_box_w_h']
			
 
				+        s_box = choice_m_s['all_small_coordinate']
			
 
				+        if len(title_number) == row_and_col[0]:
			
 
				+            title_number_list = [int(ele['char']) for ele in title_number]
			
 
				+            choice_m_s.update({'title_number': title_number_list})
			
 
				+            all_list_new.append(choice_m_s)
			
 
				+        else:
			
 
				+            s_box = sorted(s_box, key=lambda k: k[1])
			
 
				+            s_box_row = get_one_line_box(s_box, single_bbox_width_height[1])
			
 
				+            row_box_ = []
			
 
				+
			
 
				+            index_list = []  # 存在的index
			
 
				+            for index0, ele0 in enumerate(s_box_row):
			
 
				+                for index1, ele1 in enumerate(title_number):
			
 
				+                    title_number_bbox = [ele1['location']['left'], ele1['location']['top'],
			
 
				+                                         ele1['location']['left'] + ele1['location']['width'],
			
 
				+                                         ele1['location']['top'] + ele1['location']['height']]
			
 
				+                    row_box = {}
			
 
				+                    if title_number_bbox[1] - single_bbox_width_height[1] < ele0[0][1] < title_number_bbox[1] + \
			
 
				+                            single_bbox_width_height[1] \
			
 
				+                            and title_number_bbox[3] - single_bbox_width_height[1] < ele0[0][3] < \
			
 
				+                            title_number_bbox[3] + single_bbox_width_height[1]:
			
 
				+                        row_box['title_number'] = ele1
			
 
				+                        row_box['row_box'] = ele0
			
 
				+                        row_box_.append(row_box)
			
 
				+                        index_list.append(index0)
			
 
				+            index0 = list(set([i for i in range(0, row_and_col[0])]) - set(index_list))  # lack index
			
 
				+            number0 = choice_m_s['number']
			
 
				+            for index, exist_index in enumerate(index_list):
			
 
				+                number_char = choice_m_s['title_number'][index]['char']
			
 
				+                number0[exist_index] = int(number_char)
			
 
				+            new_number_list = utils.infer_number(number0)
			
 
				+            choice_m_s.update({'title_number': new_number_list})
			
 
				+            all_list_new.append(choice_m_s)
			
 
				+    # print(all_list_new)
			
 
				+    title_number_by_choice_m_list = []     # sort change coordinate
			
 
				+    for index, single_choice_m in enumerate(all_list_new):
			
 
				+        small_bbox_list = []
			
 
				+        for index_s, ele_s in enumerate(single_choice_m['all_small_coordinate']):
			
 
				+            location_s = {}
			
 
				+            s_box_new = utils.get_img_region_box0(ele_s, choice_bbox)
			
 
				+            location_s['xmin'] = s_box_new[0]
			
 
				+            location_s['ymin'] = s_box_new[1]
			
 
				+            location_s['xmax'] = s_box_new[2]
			
 
				+            location_s['ymax'] = s_box_new[3]
			
 
				+            small_bbox_list.append(location_s)
			
 
				+
			
 
				+        choice_m_bbox = single_choice_m['bounding_box']
			
 
				+        choice_m_by_img = utils.get_img_region_box0(choice_m_bbox, choice_bbox)
			
 
				+        choice_m_by_img0 = utils.list_to_dict(choice_m_by_img)
			
 
				+        single_choice_m.update({'number': single_choice_m['title_number']})
			
 
				+        single_choice_m.update({'bounding_box': choice_m_by_img0,
			
 
				+                                'all_small_coordinate': small_bbox_list})
			
 
				+        single_choice_m.pop('x_y_interval')
			
 
				+        single_choice_m.pop('s_box_w_h')
			
 
				+        single_choice_m.pop('title_number')
			
 
				+        title_number_by_choice_m_list.append(single_choice_m)
			
 
				+    return title_number_by_choice_m_list
			
--- a/segment/sheet_resolve/analysis/cloze/__init__.py
+++ b/segment/sheet_resolve/analysis/cloze/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2018/11/21 0021 下午 16:02
			
--- a/segment/sheet_resolve/analysis/cloze/analysis_cloze.py
+++ b/segment/sheet_resolve/analysis/cloze/analysis_cloze.py
@@ -0,0 +1,101 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : analysis_cloze.py
			
 
				+import time
			
 
				+
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+
			
 
				+from segment.sheet_resolve.lib.model.test import im_detect
			
 
				+from segment.sheet_resolve.lib.model.nms_wrapper import nms
			
 
				+from segment.sheet_resolve.lib.utils.timer import Timer
			
 
				+from segment.sheet_resolve.tools import utils
			
 
				+
			
 
				+
			
 
				+def analysis_single_image_with_regions(analysis_type, classes, sess, net,
			
 
				+                                       im, conf_thresh, mns_thresh,
			
 
				+                                       coordinate_bias_dict):
			
 
				+    """Detect object classes in an image using pre-computed object proposals."""
			
 
				+
			
 
				+    size = im.shape
			
 
				+
			
 
				+    # Detect all object classes and regress object bounds
			
 
				+    timer = Timer()
			
 
				+    timer.tic()
			
 
				+    im, ratio = utils.img_resize(analysis_type, im)
			
 
				+    scores, boxes = im_detect(analysis_type, sess, net, im)
			
 
				+    timer.toc()
			
 
				+    print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0]))
			
 
				+
			
 
				+    content_list = []
			
 
				+    analysis_cls_list = []
			
 
				+    for cls_ind, cls in enumerate(classes[1:]):  # classes
			
 
				+        cls_ind += 1  # because we skipped background
			
 
				+        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
			
 
				+        cls_scores = scores[:, cls_ind]
			
 
				+        dets = np.hstack((cls_boxes,
			
 
				+                          cls_scores[:, np.newaxis])).astype(np.float32)
			
 
				+        keep = nms(dets, mns_thresh)
			
 
				+        dets = dets[keep, :]
			
 
				+        # vis_detections(im, cls, dets, ax, thresh=conf_thresh)
			
 
				+        inds = np.where(dets[:, -1] >= conf_thresh)[0]
			
 
				+        if len(inds) > 0:
			
 
				+            if cls in list(coordinate_bias_dict.keys()):
			
 
				+                xmin_bias = coordinate_bias_dict[cls]['xmin_bias']
			
 
				+                ymin_bias = coordinate_bias_dict[cls]['ymin_bias']
			
 
				+                xmax_bias = coordinate_bias_dict[cls]['xmax_bias']
			
 
				+                ymax_bias = coordinate_bias_dict[cls]['ymax_bias']
			
 
				+            else:
			
 
				+                xmin_bias = 0
			
 
				+                ymin_bias = 0
			
 
				+                xmax_bias = 0
			
 
				+                ymax_bias = 0
			
 
				+            for i in inds:
			
 
				+                bbox = dets[i, :4]
			
 
				+                score = '{:.4f}'.format(dets[i, -1])
			
 
				+
			
 
				+                xmin = int(int(bbox[0]) * ratio[0]) + xmin_bias
			
 
				+                ymin = int(int(bbox[1]) * ratio[1]) + ymin_bias
			
 
				+                xmax = int(int(bbox[2]) * ratio[0]) + xmax_bias
			
 
				+                ymax = int(int(bbox[3]) * ratio[1]) + ymax_bias
			
 
				+
			
 
				+                if xmin_bias - xmax_bias >= xmax - xmin:
			
 
				+                    print('{:s}, xmin_bias - xmax_bias >= region_width'.format(cls))
			
 
				+                    continue
			
 
				+                if ymin_bias - ymax_bias >= ymax - ymin:
			
 
				+                    print('{:s}, ymin_bias - ymax_bias >= region_width'.format(cls))
			
 
				+                    continue
			
 
				+
			
 
				+                # xmin >=1, ymin>=1, xmax <= size[0] - 1, ymax <= size[1] - 1
			
 
				+                xmin = (xmin if (xmin > 0) else 1)
			
 
				+                ymin = (ymin if (ymin > 0) else 1)
			
 
				+                xmax = (xmax if (xmax < size[1]) else size[1] - 1)
			
 
				+                ymax = (ymax if (ymax < size[0]) else size[0] - 1)
			
 
				+
			
 
				+                bbox_dict = {"xmin": xmin, "ymin": ymin, "xmax": xmax, "ymax": ymax}
			
 
				+                class_dict = {"class_name": cls, "bounding_box": bbox_dict, "score": score}
			
 
				+                content_list.append(class_dict)
			
 
				+
			
 
				+                analysis_cls_list.append(cls)
			
 
				+
			
 
				+    return content_list, sorted(analysis_cls_list)
			
 
				+
			
 
				+
			
 
				+def get_single_image_sheet_regions(analysis_type, im, classes,
			
 
				+                                   sess, net, conf_thresh, mns_thresh,
			
 
				+                                   coordinate_bias_dict):
			
 
				+    start_time = time.time()
			
 
				+
			
 
				+    content, cls = analysis_single_image_with_regions(analysis_type, classes,
			
 
				+                                                      sess, net,
			
 
				+                                                      im, conf_thresh, mns_thresh,
			
 
				+                                                      coordinate_bias_dict)
			
 
				+
			
 
				+    img_dict = {"img_name": 'cloze',
			
 
				+                'analysis_type': analysis_type,
			
 
				+                "regions": content,
			
 
				+                }
			
 
				+
			
 
				+    end_time = time.time()
			
 
				+    print(end_time - start_time)
			
 
				+
			
 
				+    return img_dict
			
--- a/segment/sheet_resolve/analysis/cloze/cloze_box.py
+++ b/segment/sheet_resolve/analysis/cloze/cloze_box.py
@@ -0,0 +1,146 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : cloze_box.py
			
 
				+# @Time    : 2018/11/23 0023 上午 10:47
			
 
				+import re
			
 
				+import xml.etree.cElementTree as ET
			
 
				+import time
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate
			
 
				+from segment.sheet_resolve.tools import tf_settings, utils
			
 
				+
			
 
				+
			
 
				+def get_cloze_box_coordinate(solve_img):
			
 
				+    ocr_region = solve_img
			
 
				+    t11 = time.time()
			
 
				+    word_result_list = get_ocr_text_and_coordinate(ocr_region)
			
 
				+    t22 = time.time()
			
 
				+    print('choice ocr time cost: ', t22-t11)
			
 
				+    return word_result_list
			
 
				+
			
 
				+
			
 
				+def get_each_coordinate(solve_img):
			
 
				+
			
 
				+    word_result_list = get_cloze_box_coordinate(solve_img)
			
 
				+    all_char_list = []
			
 
				+    digital_model = re.compile(r'\d')
			
 
				+    for i, chars_dict in enumerate(word_result_list):
			
 
				+        chars_list = chars_dict['chars']
			
 
				+        for ele in chars_list:
			
 
				+            if digital_model.search(ele['char']):
			
 
				+                all_char_list.append(ele)
			
 
				+
			
 
				+    return all_char_list
			
 
				+
			
 
				+
			
 
				+def decide_coordinate_contains(solve_img, xml_box):
			
 
				+    ocr_box = get_each_coordinate(solve_img)
			
 
				+
			
 
				+    ocr_all_char_list = []
			
 
				+    i = 1
			
 
				+    while i <= len(ocr_box):
			
 
				+        pre_one = ocr_box[i - 1]
			
 
				+        if i == len(ocr_box):
			
 
				+            ocr_all_char_list.append(pre_one)
			
 
				+            break
			
 
				+        rear_one = ocr_box[i]
			
 
				+        # 两字高度差小于一字高度
			
 
				+        condition1 = abs(pre_one['location']['top'] - rear_one['location']['top']) < pre_one['location']['height']
			
 
				+        # 两字长度大于两字间间隔
			
 
				+        condition2 = abs(pre_one['location']['left'] + pre_one['location']['width']
			
 
				+                         - rear_one['location']['left']) < pre_one['location']['width']
			
 
				+        if condition1:
			
 
				+            if condition2:
			
 
				+                new_char = pre_one['char'] + rear_one['char']
			
 
				+                new_location = {'left': pre_one['location']['left'],
			
 
				+                                'top': min(pre_one['location']['top'], rear_one['location']['top']),
			
 
				+                                'width': rear_one['location']['left'] + rear_one['location']['width']
			
 
				+                                - pre_one['location']['left'],
			
 
				+                                'height': max(pre_one['location']['height'], rear_one['location']['height'])}
			
 
				+                ocr_all_char_list.append({'char': new_char, 'location': new_location})
			
 
				+                i = i + 1 + 1
			
 
				+            else:
			
 
				+                ocr_all_char_list.append(pre_one)
			
 
				+                i = i + 1
			
 
				+        else:
			
 
				+            ocr_all_char_list.append(pre_one)  # 遇到字符y轴相差过大就结束
			
 
				+            i = i + 1
			
 
				+
			
 
				+    content_list = []
			
 
				+    for index, xml_b in enumerate(xml_box):  # faster-rcnn 的框
			
 
				+        bbox_right = []
			
 
				+        xml_b = xml_b['bounding_box']
			
 
				+        xmin2 = xml_b['xmin']
			
 
				+        ymin2 = xml_b['ymin']
			
 
				+        xmax2 = xml_b['xmax']
			
 
				+        ymax2 = xml_b['ymax']
			
 
				+        mid_x2 = int(xmin2 + (xmax2 - xmin2) // 2)
			
 
				+        box_coordiante = (xmin2, ymin2, xmax2, ymax2)
			
 
				+        choice_number = {'number': 999, 'location': box_coordiante}
			
 
				+        content_list.insert(index, choice_number)
			
 
				+
			
 
				+        fixed_height = 60  # 高度固定
			
 
				+        for ocr_b in ocr_all_char_list:  # ocr识别的框
			
 
				+            xmin1 = ocr_b['location']['left']
			
 
				+            ymin1 = ocr_b['location']['top']
			
 
				+            xmax1 = xmin1 + ocr_b['location']['width']
			
 
				+            ymax1 = ymin1 + ocr_b['location']['height']
			
 
				+            mid_x = int(xmin1 + (xmax1 - xmin1) // 2)
			
 
				+            mid_y = int(ymin1 + (ymax1 - ymin1) // 2)
			
 
				+
			
 
				+            if xmin2 <= mid_x <= xmax2 and ymin2 <= mid_y <= ymax2 and mid_x < mid_x2:  # 包含且在左侧
			
 
				+                content_list[index]['number'] = ocr_b['char']
			
 
				+
			
 
				+            if xmin2 <= mid_x <= xmax2 and ymin2 <= mid_y <= ymax2 and mid_x >= mid_x2:  # 包含且在右侧
			
 
				+                ocr_b['location']['left'] = int(xmin1 - 1 * ocr_b['location']['width'])  # 打分框的边框
			
 
				+                ocr_b['location']['width'] = 3 * ocr_b['location']['width']
			
 
				+                ocr_b['location']['top'] = int(ymin1 - 0.5 * ocr_b['location']['height'])
			
 
				+
			
 
				+                xmin = ocr_b['location']['left']
			
 
				+                xmax = xmin + ocr_b['location']['width']
			
 
				+                ymin = ocr_b['location']['top']
			
 
				+                ymax = ymin + fixed_height
			
 
				+                bbox_right.append({'points': ocr_b['char'], 'location': [xmin, ymin, xmax, ymax]})
			
 
				+
			
 
				+        ymin = min([ele['location'][1] for ele in bbox_right])
			
 
				+        ymax = max([ele['location'][3] for ele in bbox_right])
			
 
				+        for ele in bbox_right:
			
 
				+            ele['location'][1] = ymin
			
 
				+            ele['location'][3] = ymax
			
 
				+
			
 
				+        content_list[index]['right'] = bbox_right
			
 
				+
			
 
				+    return content_list
			
 
				+
			
 
				+
			
 
				+def cloze(left, top, image, choice_bbox_list, xml_path):
			
 
				+
			
 
				+    content = decide_coordinate_contains(image, choice_bbox_list)
			
 
				+
			
 
				+    tree = ET.parse(xml_path)  # xml tree
			
 
				+    cloze_list = []
			
 
				+    for ele in content:
			
 
				+        number = '{:02d}_cloze'.format(int(ele['number']))
			
 
				+        cloze_xmin = ele['location'][0] + left
			
 
				+        cloze_ymin = ele['location'][1] + top
			
 
				+        cloze_xmax = ele['location'][2] + left
			
 
				+        cloze_ymax = ele['location'][3] + top
			
 
				+
			
 
				+        tree = utils.create_xml(number, tree, cloze_xmin, cloze_ymin, cloze_xmax, cloze_ymax)
			
 
				+        region = [cloze_xmin, cloze_ymin, cloze_xmax, cloze_ymax]
			
 
				+
			
 
				+        points_list = []
			
 
				+        if len(ele['right']) > 0:  # 存在打分框
			
 
				+            for right_ele in ele['right']:
			
 
				+                points = right_ele['points']
			
 
				+                xmin = right_ele['location'][0] + left
			
 
				+                ymin = right_ele['location'][1] + top
			
 
				+                xmax = right_ele['location'][2] + left
			
 
				+                ymax = right_ele['location'][3] + top
			
 
				+                point_number = '{}_{}'.format(number, points)
			
 
				+
			
 
				+                tree = utils.create_xml(point_number, tree, xmin, ymin, xmax, ymax)
			
 
				+                region = [xmin, ymin, xmax, ymax]
			
 
				+                points_list.append({'points': point_number, 'region': region})
			
 
				+
			
 
				+        cloze_list.append({'number': number, 'region': region, 'points': points_list})
			
 
				+    tree.write(xml_path)
			
 
				+    return cloze_list
			
--- a/segment/sheet_resolve/analysis/cloze/cloze_line_box.py
+++ b/segment/sheet_resolve/analysis/cloze/cloze_line_box.py
@@ -0,0 +1,117 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : cloze_line_box.py
			
 
				+# @Time    : 2019/2/21 0021 上午 11:13
			
 
				+
			
 
				+import os
			
 
				+import re
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import xml.etree.cElementTree as ET
			
 
				+import time
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate
			
 
				+from segment.sheet_resolve.tools import tf_settings, utils
			
 
				+
			
 
				+
			
 
				+def get_cloze_box_coordinate(solve_img):
			
 
				+    ocr_region = solve_img
			
 
				+    t11 = time.time()
			
 
				+    word_result_list = get_ocr_text_and_coordinate(ocr_region)
			
 
				+    t22 = time.time()
			
 
				+    print('cloze ocr time cost: ', t22-t11)
			
 
				+    return word_result_list
			
 
				+
			
 
				+
			
 
				+def get_each_coordinate(solve_img):
			
 
				+    word_result_list = get_cloze_box_coordinate(solve_img)
			
 
				+    all_char_list = []
			
 
				+    digital_model = re.compile(r'\d')
			
 
				+    for i, chars_dict in enumerate(word_result_list):
			
 
				+        chars_list = chars_dict['chars']
			
 
				+        for ele in chars_list:
			
 
				+            if digital_model.search(ele['char']):
			
 
				+                all_char_list.append(ele)
			
 
				+
			
 
				+    return all_char_list
			
 
				+
			
 
				+
			
 
				+def decide_coordinate_contains(solve_img, xml_box):
			
 
				+    ocr_box = get_each_coordinate(solve_img)
			
 
				+
			
 
				+    ocr_all_char_list = []
			
 
				+    i = 1
			
 
				+    while i <= len(ocr_box):
			
 
				+        pre_one = ocr_box[i - 1]
			
 
				+        if i == len(ocr_box):
			
 
				+            ocr_all_char_list.append(pre_one)
			
 
				+            break
			
 
				+        rear_one = ocr_box[i]
			
 
				+        # 两字高度差小于一字高度
			
 
				+        condition1 = abs(pre_one['location']['top'] - rear_one['location']['top']) < pre_one['location']['height']
			
 
				+        # 两字长度大于两字间间隔
			
 
				+        condition2 = abs(pre_one['location']['left'] + pre_one['location']['width']
			
 
				+                         - rear_one['location']['left']) < pre_one['location']['width']
			
 
				+        if condition1:
			
 
				+            if condition2:
			
 
				+                new_char = pre_one['char'] + rear_one['char']
			
 
				+                new_location = {'left': pre_one['location']['left'],
			
 
				+                                'top': min(pre_one['location']['top'], rear_one['location']['top']),
			
 
				+                                'width': rear_one['location']['left'] + rear_one['location']['width']
			
 
				+                                - pre_one['location']['left'],
			
 
				+                                'height': max(pre_one['location']['height'], rear_one['location']['height'])}
			
 
				+                ocr_all_char_list.append({'char': new_char, 'location': new_location})
			
 
				+                i = i + 1 + 1
			
 
				+            else:
			
 
				+                ocr_all_char_list.append(pre_one)
			
 
				+                i = i + 1
			
 
				+        else:
			
 
				+            ocr_all_char_list.append(pre_one)  # 遇到字符y轴相差过大就结束
			
 
				+            i = i + 1
			
 
				+
			
 
				+    content_list = []
			
 
				+    for index, xml_b in enumerate(xml_box):  # faster-rcnn 的框
			
 
				+        bbox_right = []
			
 
				+        xml_b = xml_b['bounding_box']
			
 
				+        xmin2 = xml_b['xmin']
			
 
				+        ymin2 = xml_b['ymin']
			
 
				+        xmax2 = xml_b['xmax']
			
 
				+        ymax2 = xml_b['ymax']
			
 
				+        mid_x2 = int(xmin2 + (xmax2 - xmin2) // 2)
			
 
				+        box_coordiante = (xmin2, ymin2, xmax2, ymax2)
			
 
				+        choice_number = {'number': 999, 'location': box_coordiante}
			
 
				+        content_list.insert(index, choice_number)
			
 
				+
			
 
				+        fixed_height = 60
			
 
				+        for ocr_b in ocr_all_char_list:  # ocr识别的框
			
 
				+            xmin1 = ocr_b['location']['left']
			
 
				+            ymin1 = ocr_b['location']['top']
			
 
				+            xmax1 = xmin1 + ocr_b['location']['width']
			
 
				+            ymax1 = ymin1 + ocr_b['location']['height']
			
 
				+            mid_x = int(xmin1 + (xmax1 - xmin1) // 2)
			
 
				+            mid_y = int(ymin1 + (ymax1 - ymin1) // 2)
			
 
				+
			
 
				+            if xmin2 <= mid_x <= xmax2 and ymin2 <= mid_y <= ymax2 and mid_x < mid_x2:  # 包含且在左侧
			
 
				+                content_list[index]['number'] = ocr_b['char']
			
 
				+
			
 
				+    return content_list
			
 
				+
			
 
				+
			
 
				+def cloze_line(left, top, image, choice_bbox_list, xml_path):
			
 
				+
			
 
				+    content = decide_coordinate_contains(image, choice_bbox_list)
			
 
				+
			
 
				+    tree = ET.parse(xml_path)  # xml tree
			
 
				+    cloze_list = []
			
 
				+    for ele in content:
			
 
				+        number = int(ele['number'])
			
 
				+        cloze_xmin = ele['location'][0] + left
			
 
				+        cloze_ymin = ele['location'][1] + top
			
 
				+        cloze_xmax = ele['location'][2] + left
			
 
				+        cloze_ymax = ele['location'][3] + top
			
 
				+
			
 
				+        tree = utils.create_xml(str(number), tree, cloze_xmin, cloze_ymin, cloze_xmax, cloze_ymax)
			
 
				+        region = {'xmin': cloze_xmin, 'ymin': cloze_ymin, 'xmax': cloze_xmax, 'ymax': cloze_ymax}
			
 
				+
			
 
				+        # cloze_list.append({'number': number, 'location': region, 'default_points': 5, 'class_name': 'cloze_row_col'})
			
 
				+        cloze_list.append({'number': number, 'bounding_box': region, 'default_points': 5, 'class_name': 'cloze_row_col'})
			
 
				+    tree.write(xml_path)
			
 
				+    return cloze_list
			
--- a/segment/sheet_resolve/analysis/correct/__init__.py
+++ b/segment/sheet_resolve/analysis/correct/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2018/12/10 0010 上午 10:25
			
--- a/segment/sheet_resolve/analysis/correct/coordinates_correct.py
+++ b/segment/sheet_resolve/analysis/correct/coordinates_correct.py
@@ -0,0 +1,244 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : coordinates_correct.py
			
 
				+# @Time    : 2018/12/10 0010 上午 10:26
			
 
				+import os
			
 
				+import cv2
			
 
				+import traceback
			
 
				+import numpy as np
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate_in_google_format, tesseract_boxes_by_py
			
 
				+from segment.sheet_resolve.tools.utils import read_single_img, write_single_img, crop_region, read_xml_to_json
			
 
				+import glob2 as glob
			
 
				+
			
 
				+
			
 
				+def transform(template_img_size, correcting_img, correcting_pts, template_pts):
			
 
				+    pts1 = np.float32(correcting_pts)  # 原始坐标
			
 
				+    pts2 = np.float32(template_pts)  # 目标坐标
			
 
				+
			
 
				+    M = cv2.getAffineTransform(pts1, pts2)
			
 
				+    dst = cv2.warpAffine(correcting_img, M, template_img_size, borderValue=(0, 0, 255))
			
 
				+    return dst
			
 
				+
			
 
				+
			
 
				+def get_same_str(str1, str2):
			
 
				+    str1_set = set(str1)
			
 
				+    str2_set = set(str2)
			
 
				+    intersection = str1_set & str2_set
			
 
				+    if intersection:
			
 
				+        len1 = len(str1)
			
 
				+        len2 = len(str2)
			
 
				+        if len2 == 1:
			
 
				+            start_index = str1.index(str2)
			
 
				+            return {'ismatch': True, 'coordinates': ((start_index, start_index + 1), (0, 1))}
			
 
				+        else:
			
 
				+            str_set = set(str1 + str2)
			
 
				+            str_set_dict = {}
			
 
				+            for i, ele in enumerate(sorted(list(str_set))):
			
 
				+                str_set_dict[ele] = i + 1
			
 
				+
			
 
				+            str1_np = np.asarray([str_set_dict[k] for k in str1])
			
 
				+            str2_np = np.asarray([str_set_dict[k] for k in str2])
			
 
				+
			
 
				+            np1 = np.tile(str1_np, (len2, 1))
			
 
				+            np2 = np.tile(str2_np, (1, len1)).reshape(len1, len2).T
			
 
				+
			
 
				+            np3 = np1 - np2
			
 
				+
			
 
				+            size = np3.shape
			
 
				+
			
 
				+            np4 = np3.reshape(-1, 1)
			
 
				+
			
 
				+            np4_list = np4.tolist()
			
 
				+            zero_list = list()
			
 
				+            str_index = []
			
 
				+            for i, ele in enumerate(np4):
			
 
				+                if i in zero_list:
			
 
				+                    continue
			
 
				+                else:
			
 
				+                    if ele == [0]:
			
 
				+                        length = 0
			
 
				+                        zero_list.append(i)
			
 
				+                        for interval in range(1, size[1]):
			
 
				+                            next_index = i + size[1] * interval + interval
			
 
				+                            if next_index < len(np4_list):
			
 
				+                                if np4_list[next_index] == [0]:
			
 
				+                                    length += 1
			
 
				+                                    zero_list.append(next_index)  # 跳过的循环
			
 
				+                                if np4_list[next_index] != [0]:
			
 
				+                                    break
			
 
				+                        str_index.append((i, i + size[1] * length + length, length))
			
 
				+                    else:
			
 
				+                        pass
			
 
				+
			
 
				+            # print(str_index)
			
 
				+            # print(max_index[0])
			
 
				+            max_index = sorted(str_index, key=lambda k: k[2], reverse=True)
			
 
				+            a = (max_index[0][0] // size[1], max_index[0][0] % size[1])
			
 
				+            b = (max_index[0][1] // size[1], max_index[0][1] % size[1])
			
 
				+
			
 
				+            max_str1 = str1[a[1]:b[1] + 1]
			
 
				+            max_str2 = str2[a[0]:b[0] + 1]
			
 
				+            # print(max_str1, max_str2)
			
 
				+
			
 
				+            return {'ismatch': True, 'coordinates': ((a[1], b[1] + 1), (a[0], b[0] + 1))}
			
 
				+
			
 
				+    else:
			
 
				+        return {'ismatch': False, 'coordinates': ()}
			
 
				+
			
 
				+
			
 
				+def match_string(correcting_ocr, correcting_bias, template_ocr, template_bias):
			
 
				+    correcting_words_list = correcting_ocr['chars']
			
 
				+    template_words_list = template_ocr['chars']
			
 
				+
			
 
				+    max_same_str = ''
			
 
				+    index_pair = {}
			
 
				+
			
 
				+    longer = ''.join(correcting_words_list)
			
 
				+    shorter = ''.join(template_words_list)
			
 
				+
			
 
				+    res = get_same_str(longer, shorter)
			
 
				+    if res['ismatch']:
			
 
				+        c, t = res['coordinates']
			
 
				+        if len(max_same_str) < c[1]-c[0]:
			
 
				+            max_same_str = longer[c[0]:c[1]]
			
 
				+            index_pair['correcting'] = (c[0], c[1])
			
 
				+            index_pair['template'] = (t[0], t[1])
			
 
				+
			
 
				+    if len(index_pair) > 0:
			
 
				+        correcting_coordinate = correcting_ocr['coordinates'][index_pair['correcting'][0]]  # xmin, ymin, xmax, ymax
			
 
				+        c_x, c_y = correcting_bias[0] + correcting_coordinate[0], correcting_bias[1] + correcting_coordinate[1]  # 取左上角的坐标
			
 
				+
			
 
				+        template_coordinate = template_ocr['coordinates'][index_pair['template'][0]]
			
 
				+        t_x, t_y = template_bias[0] + template_coordinate[0], template_bias[1] + template_coordinate[1]
			
 
				+
			
 
				+        print(max_same_str)
			
 
				+        if abs(c_x-t_x) < 50 and abs(c_y-t_y) < 50:
			
 
				+            return {'correcting': (c_x, c_y), 'template': (t_x, t_y)}
			
 
				+        else:
			
 
				+            return {}
			
 
				+    else:
			
 
				+        return {}
			
 
				+
			
 
				+
			
 
				+def get_template(template_img, ocr_classes_dict, method='google'):
			
 
				+    template_dict = {}
			
 
				+
			
 
				+    for ocr in ocr_classes_dict:
			
 
				+        template = {}
			
 
				+        class_name = ocr['class_name']
			
 
				+        if 'solve' in class_name:
			
 
				+            # ocr['region']['ymax'] = int(0.10 * (ocr['region']['ymax']-ocr['region']['ymin']) +ocr['region']['ymin'])
			
 
				+            ocr['region']['ymax'] = int(250 + ocr['region']['ymin'])
			
 
				+        ocr_box = ocr['region']
			
 
				+        left, top = ocr_box['xmin'], ocr_box['ymin']
			
 
				+
			
 
				+        ocr_img = crop_region(template_img, ocr_box)
			
 
				+        # cv2.imshow(class_name, ocr_img)
			
 
				+        # if cv2.waitKey(0) == 27:
			
 
				+        #     cv2.destroyAllWindows()
			
 
				+
			
 
				+        if method == 'baidu':
			
 
				+            ocr_word = get_ocr_text_and_coordinate_in_google_format(ocr_img)  # baidu
			
 
				+        else:
			
 
				+            ocr_word = tesseract_boxes_by_py(ocr_img)  # tesseract
			
 
				+
			
 
				+        template['words_result'] = ocr_word
			
 
				+        template['coordinate_bias'] = (left, top)
			
 
				+
			
 
				+        template_dict[class_name] = template
			
 
				+    return template_dict
			
 
				+
			
 
				+
			
 
				+def get_correct_points(points_list, size):
			
 
				+    if len(points_list) > size >= 3:
			
 
				+        points_list = sorted(points_list, key=lambda k: k[1])
			
 
				+        i = 1
			
 
				+        choice_index_list = [0]
			
 
				+        for index, ele in enumerate(points_list):
			
 
				+            if abs(ele[1] - points_list[0][1]) > 500:  # 找y轴差值大于500的坐标
			
 
				+                choice_index_list.append(index)
			
 
				+                i = i + 1
			
 
				+                if i == size:
			
 
				+                    break
			
 
				+        return choice_index_list
			
 
				+    elif len(points_list) == 3:
			
 
				+        return [0, 1, 2]
			
 
				+    else:
			
 
				+        raise Exception
			
 
				+
			
 
				+
			
 
				+def save_transformed_img(template_img_size, images_path_list, ocr_classes_dict, correcting_img_dir_path, corrected_img_save_dir, template_dict, method='google'):
			
 
				+    for img_path in images_path_list:
			
 
				+        correcting_img = read_single_img(img_path)
			
 
				+        print('***********************************')
			
 
				+        print(img_path)
			
 
				+        correct_coordinates_list = []
			
 
				+        template_coordinates_list = []
			
 
				+        try:
			
 
				+            for ocr in ocr_classes_dict:
			
 
				+                class_name = ocr['class_name']
			
 
				+                if 'solve' in class_name:
			
 
				+                    ocr['region']['ymax'] = ocr['region']['ymax']
			
 
				+                ocr_box = ocr['region']
			
 
				+                left, top = ocr_box['xmin'], ocr_box['ymin']
			
 
				+
			
 
				+                correcting_oct_region = crop_region(correcting_img, ocr_box)
			
 
				+                # cv2.imwrite(class_name+'.jpg', correcting_oct_region)
			
 
				+                # cv2.imshow(class_name, correcting_oct_region)
			
 
				+                # if cv2.waitKey(0) == 27:
			
 
				+                #     cv2.destroyAllWindows()
			
 
				+                if method == 'baidu':
			
 
				+                    correcting_word = get_ocr_text_and_coordinate_in_google_format(correcting_oct_region)  # baidu
			
 
				+                else:
			
 
				+                    correcting_word = tesseract_boxes_by_py(correcting_oct_region)  # tesseract
			
 
				+
			
 
				+                template = template_dict[class_name]
			
 
				+                template_word = template['words_result']
			
 
				+                template_bias = template['coordinate_bias']
			
 
				+
			
 
				+                coordiantes_dict = match_string(correcting_word, (left, top), template_word, template_bias)
			
 
				+                if len(coordiantes_dict) > 0:
			
 
				+                    correct_coordinates_list.append(coordiantes_dict['correcting'])
			
 
				+                    template_coordinates_list.append(coordiantes_dict['template'])
			
 
				+                else:
			
 
				+                    continue
			
 
				+
			
 
				+            choice_index_list = get_correct_points(correct_coordinates_list, 3)
			
 
				+            c_coordinates = [correct_coordinates_list[ele] for ele in choice_index_list]
			
 
				+            t_coordiantes = [template_coordinates_list[ele] for ele in choice_index_list]
			
 
				+            dst = transform(template_img_size, correcting_img,
			
 
				+                            c_coordinates, t_coordiantes)
			
 
				+            print(c_coordinates, t_coordiantes)
			
 
				+            save_path = img_path.replace(correcting_img_dir_path, corrected_img_save_dir)
			
 
				+            write_single_img(dst, save_path)
			
 
				+            print(save_path)
			
 
				+        except Exception:
			
 
				+            print('image corrected error')
			
 
				+            traceback.print_exc()
			
 
				+
			
 
				+
			
 
				+def correct(template_path, correcting_img_dir_path, corrected_img_save_dir, sheet_dict, method, sheet_sides='front'):
			
 
				+    find_str = os.path.join(correcting_img_dir_path, '*.jpg')
			
 
				+    correcting_img_path_list = glob.glob(find_str)
			
 
				+    # correcting_img_path_list = [r'C:\Users\Administrator\Desktop\sheet\correct\back_sizes\20180719004308818_0030.jpg']
			
 
				+    template_img = read_single_img(template_path)
			
 
				+    y, x = template_img.shape[0], template_img.shape[1]
			
 
				+    ocr_classes_dict = []
			
 
				+    # ocr_class = ['info_title', 'page']
			
 
				+    ocr_class = {'front': ['info_title', 'page'], 'back': ['solve', 'solve0', 'page']}
			
 
				+    page_index = 1
			
 
				+    for ele in sheet_dict['regions']:
			
 
				+        if ele['class_name'] in ocr_class[sheet_sides]:
			
 
				+            ocr_classes_dict.append({'class_name': '{}_{}'.format(ele['class_name'], str(page_index)), 'region': ele['bounding_box']})
			
 
				+            page_index += 1
			
 
				+
			
 
				+    template_info_dict = get_template(template_img, ocr_classes_dict, method)
			
 
				+    save_transformed_img((x, y), correcting_img_path_list, ocr_classes_dict, correcting_img_dir_path, corrected_img_save_dir, template_info_dict, method)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    template_path0 = r'C:\Users\Administrator\Desktop\sheet\correct\back_sizes\template\20180719004308818_0020.jpg'
			
 
				+    img_dir_path = r'C:\Users\Administrator\Desktop\sheet\correct\back_sizes'
			
 
				+    img_save_dir = r'C:\Users\Administrator\Desktop\sheet\correct\back_sizes\corrected'
			
 
				+    xml_path = template_path0.replace('.jpg', '.xml')
			
 
				+    sheet_dict0 = read_xml_to_json(xml_path)
			
 
				+    correct(template_path0, img_dir_path, img_save_dir, sheet_dict0, sheet_sides='back', method='baidu')
			
--- a/segment/sheet_resolve/analysis/correct/coordinates_correct_pyinstaller.py
+++ b/segment/sheet_resolve/analysis/correct/coordinates_correct_pyinstaller.py
@@ -0,0 +1,479 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : coordinates_correct_pyinstaller.py
			
 
				+# @Time    : 2018/12/10 0010 上午 10:26
			
 
				+import os
			
 
				+import argparse
			
 
				+import cv2
			
 
				+import traceback
			
 
				+import numpy as np
			
 
				+import glob2 as glob
			
 
				+import xml.etree.cElementTree as ET
			
 
				+import requests
			
 
				+import base64
			
 
				+from urllib import parse, request
			
 
				+
			
 
				+
			
 
				+access_token = '24.214174608e47e6047f31c3fd8c3cedef.2592000.1548390126.282335-14614857'
			
 
				+# access_token = ocr_login()
			
 
				+OCR_BOX_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/'
			
 
				+OCR_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/'
			
 
				+# OCR_ACCURACY = 'general'
			
 
				+OCR_ACCURACY = 'accurate'
			
 
				+OCR_CLIENT_ID = 'AVH7VGKG8QxoSotp6wG9LyZq'
			
 
				+OCR_CLIENT_SECRET = 'gG7VYvBWLU8Rusnin8cS8Ta4dOckGFl6'
			
 
				+OCR_TOKEN_UPDATE_DATE = 10
			
 
				+
			
 
				+
			
 
				+def login():
			
 
				+    grant_type = 'client_credentials'
			
 
				+    client_id = OCR_CLIENT_ID
			
 
				+    client_secret = OCR_CLIENT_SECRET
			
 
				+
			
 
				+    textmod = {'grant_type': grant_type, 'client_id': client_id, 'client_secret': client_secret}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+
			
 
				+    # 输出内容:user=admin&password=admin
			
 
				+    header_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'}
			
 
				+    url = 'https://aip.baidubce.com/oauth/2.0/token'
			
 
				+    req = request.Request(url='{}{}{}'.format(url, '?', textmod), headers=header_dict)
			
 
				+    res = request.urlopen(req).read()
			
 
				+    token = eval(res.decode(encoding='utf-8'))['access_token']
			
 
				+    return token
			
 
				+
			
 
				+
			
 
				+def opencv2base64(img):
			
 
				+    image = cv2.imencode('.jpg', img)[1]
			
 
				+    base64_data = str(base64.b64encode(image))[2:-1]
			
 
				+    return base64_data
			
 
				+
			
 
				+
			
 
				+def get_ocr_raw_result(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
			
 
				+    textmod = {'access_token': login()}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
			
 
				+    url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image_type = 'base64'
			
 
				+    group_id = 'group001'
			
 
				+    user_id = 'usr001'
			
 
				+
			
 
				+    image = opencv2base64(img)
			
 
				+
			
 
				+    data = {
			
 
				+        'image_type': image_type,
			
 
				+        'group_id': group_id,
			
 
				+        'user_id': user_id,
			
 
				+        'image': image,
			
 
				+        'detect_direction': 'true',
			
 
				+        'recognize_granularity': 'small',
			
 
				+        'language_type': language_type,
			
 
				+        # 'vertexes_location': 'true',
			
 
				+        # 'probability': 'true'
			
 
				+    }
			
 
				+
			
 
				+    resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    return resp
			
 
				+
			
 
				+
			
 
				+def get_ocr_text_and_coordinate_in_google_format(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
			
 
				+    url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image_type = 'base64'
			
 
				+    group_id = 'group001'
			
 
				+    user_id = 'usr001'
			
 
				+
			
 
				+    image = opencv2base64(img)
			
 
				+
			
 
				+    data = {
			
 
				+        'image_type': image_type,
			
 
				+        'group_id': group_id,
			
 
				+        'user_id': user_id,
			
 
				+        'image': image,
			
 
				+        'detect_direction': 'true',
			
 
				+        'recognize_granularity': 'small',
			
 
				+        'language_type': language_type,
			
 
				+        # 'vertexes_location': 'true',
			
 
				+        # 'probability': 'true'
			
 
				+    }
			
 
				+
			
 
				+    resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    words_result = resp.get('words_result')
			
 
				+    dict_list = [item2.get('location') for item in words_result for item2 in item['chars']]
			
 
				+    char_list = [item2.get('char') for item in words_result for item2 in item['chars']]
			
 
				+    matrix = []
			
 
				+    for ele in dict_list:
			
 
				+        xmin = ele['left']
			
 
				+        ymin = ele['top']
			
 
				+        xmax = ele['width'] + ele['left']
			
 
				+        ymax = ele['top'] + ele['height']
			
 
				+        item0 = (xmin, ymin, xmax, ymax)
			
 
				+        matrix.append(item0)
			
 
				+
			
 
				+    res_dict = {'chars': char_list, 'coordinates': matrix}
			
 
				+    return res_dict
			
 
				+
			
 
				+
			
 
				+def read_xml_to_json(xml_path):
			
 
				+    tree = ET.parse(xml_path)
			
 
				+    root = tree.getroot()
			
 
				+    regions_list = []
			
 
				+    for obj in root.findall('object'):
			
 
				+        class_name = obj.find('name').text
			
 
				+        bbox = obj.find('bndbox')
			
 
				+        xmin = int(bbox.find('xmin').text)
			
 
				+        ymin = int(bbox.find('ymin').text)
			
 
				+        xmax = int(bbox.find('xmax').text)
			
 
				+        ymax = int(bbox.find('ymax').text)
			
 
				+        bbox_dict = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
			
 
				+        region = {'class_name': class_name, 'bounding_box': bbox_dict}
			
 
				+        regions_list.append(region)
			
 
				+
			
 
				+    serial = '201812260000001'
			
 
				+    sheet_dict = {'series_number': serial, 'regions': regions_list}
			
 
				+    return sheet_dict
			
 
				+
			
 
				+
			
 
				+def crop_region(im, bbox):
			
 
				+    xmin = int(bbox['xmin'])
			
 
				+    ymin = int(bbox['ymin'])
			
 
				+    xmax = int(bbox['xmax'])
			
 
				+    ymax = int(bbox['ymax'])
			
 
				+
			
 
				+    region = im[ymin:ymax, xmin:xmax]
			
 
				+    return region
			
 
				+
			
 
				+
			
 
				+def write_single_img(dst, save_path):
			
 
				+    try:
			
 
				+        cv2.imencode('.jpg', dst)[1].tofile(save_path)
			
 
				+    except FileNotFoundError as e:
			
 
				+        raise e
			
 
				+
			
 
				+
			
 
				+def read_single_img(img_path):
			
 
				+    try:
			
 
				+        im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
			
 
				+    except FileNotFoundError as e:
			
 
				+        raise e
			
 
				+    return im
			
 
				+
			
 
				+
			
 
				+def transform(template_img_size, correcting_img, correcting_pts, template_pts):
			
 
				+    pts1 = np.float32(correcting_pts)  # 原始坐标
			
 
				+    pts2 = np.float32(template_pts)  # 目标坐标
			
 
				+
			
 
				+    mtx = cv2.getAffineTransform(pts1, pts2)
			
 
				+    dst = cv2.warpAffine(correcting_img, mtx, template_img_size, borderValue=(0, 0, 255))
			
 
				+    return dst
			
 
				+
			
 
				+
			
 
				+def get_same_str(str1, str2):
			
 
				+    str1_set = set(str1)
			
 
				+    str2_set = set(str2)
			
 
				+    intersection = str1_set & str2_set
			
 
				+    if intersection:
			
 
				+        len1 = len(str1)
			
 
				+        len2 = len(str2)
			
 
				+        if len2 == 1:
			
 
				+            start_index = str1.index(str2)
			
 
				+            return {'ismatch': True, 'coordinates': ((start_index, start_index + 1), (0, 1))}
			
 
				+        else:
			
 
				+            str_set = set(str1 + str2)
			
 
				+            str_set_dict = {}
			
 
				+            for i, ele in enumerate(sorted(list(str_set))):
			
 
				+                str_set_dict[ele] = i + 1
			
 
				+
			
 
				+            str1_np = np.asarray([str_set_dict[k] for k in str1])
			
 
				+            str2_np = np.asarray([str_set_dict[k] for k in str2])
			
 
				+
			
 
				+            np1 = np.tile(str1_np, (len2, 1))
			
 
				+            np2 = np.tile(str2_np, (1, len1)).reshape(len1, len2).T
			
 
				+
			
 
				+            np3 = np1 - np2
			
 
				+
			
 
				+            size = np3.shape
			
 
				+
			
 
				+            np4 = np3.reshape(-1, 1)
			
 
				+
			
 
				+            np4_list = np4.tolist()
			
 
				+            zero_list = list()
			
 
				+            str_index = []
			
 
				+            for i, ele in enumerate(np4):
			
 
				+                if i in zero_list:
			
 
				+                    continue
			
 
				+                else:
			
 
				+                    if ele == [0]:
			
 
				+                        length = 0
			
 
				+                        zero_list.append(i)
			
 
				+                        for interval in range(1, size[1]):
			
 
				+                            next_index = i + size[1] * interval + interval
			
 
				+                            if next_index < len(np4_list):
			
 
				+                                if np4_list[next_index] == [0]:
			
 
				+                                    length += 1
			
 
				+                                    zero_list.append(next_index)  # 跳过的循环
			
 
				+                                if np4_list[next_index] != [0]:
			
 
				+                                    break
			
 
				+                        str_index.append((i, i + size[1] * length + length, length))
			
 
				+                    else:
			
 
				+                        pass
			
 
				+
			
 
				+            # print(str_index)
			
 
				+            # print(max_index[0])
			
 
				+            max_index = sorted(str_index, key=lambda k: k[2], reverse=True)
			
 
				+            a = (max_index[0][0] // size[1], max_index[0][0] % size[1])
			
 
				+            b = (max_index[0][1] // size[1], max_index[0][1] % size[1])
			
 
				+
			
 
				+            # max_str1 = str1[a[1]:b[1] + 1]
			
 
				+            # max_str2 = str2[a[0]:b[0] + 1]
			
 
				+            # print(max_str1, max_str2)
			
 
				+
			
 
				+            return {'ismatch': True, 'coordinates': ((a[1], b[1] + 1), (a[0], b[0] + 1))}
			
 
				+
			
 
				+    else:
			
 
				+        return {'ismatch': False, 'coordinates': ()}
			
 
				+
			
 
				+
			
 
				+def match_string(correcting_ocr, correcting_bias, template_ocr, template_bias):
			
 
				+    correcting_words_list = correcting_ocr['chars']
			
 
				+    template_words_list = template_ocr['chars']
			
 
				+
			
 
				+    max_same_str = ''
			
 
				+    index_pair = {}
			
 
				+
			
 
				+    longer = ''.join(correcting_words_list)
			
 
				+    shorter = ''.join(template_words_list)
			
 
				+
			
 
				+    res = get_same_str(longer, shorter)
			
 
				+    if res['ismatch']:
			
 
				+        c, t = res['coordinates']
			
 
				+        if len(max_same_str) < c[1]-c[0]:
			
 
				+            max_same_str = longer[c[0]:c[1]]
			
 
				+            index_pair['correcting'] = (c[0], c[1])
			
 
				+            index_pair['template'] = (t[0], t[1])
			
 
				+
			
 
				+    if len(index_pair) > 0:
			
 
				+        correcting_coordinate = correcting_ocr['coordinates'][index_pair['correcting'][0]]  # xmin, ymin, xmax, ymax
			
 
				+        c_x = correcting_bias[0] + correcting_coordinate[0]
			
 
				+        c_y = correcting_bias[1] + correcting_coordinate[1]
			
 
				+        template_coordinate = template_ocr['coordinates'][index_pair['template'][0]]
			
 
				+        t_x, t_y = template_bias[0] + template_coordinate[0], template_bias[1] + template_coordinate[1]
			
 
				+
			
 
				+        print(max_same_str)
			
 
				+        if abs(c_x-t_x) < 50 and abs(c_y-t_y) < 50:
			
 
				+            return {'correcting': (c_x, c_y), 'template': (t_x, t_y)}
			
 
				+        else:
			
 
				+            return {}
			
 
				+    else:
			
 
				+        return {}
			
 
				+
			
 
				+
			
 
				+def get_template(template_img, ocr_classes_dict):
			
 
				+    template_dict = {}
			
 
				+
			
 
				+    for ocr in ocr_classes_dict:
			
 
				+        template = {}
			
 
				+        class_name = ocr['class_name']
			
 
				+        if 'solve' in class_name:
			
 
				+            # ocr['region']['ymax'] = int(0.10 * (ocr['region']['ymax']-ocr['region']['ymin']) +ocr['region']['ymin'])
			
 
				+            ocr['region']['ymax'] = int(250 + ocr['region']['ymin'])
			
 
				+        ocr_box = ocr['region']
			
 
				+        left, top = ocr_box['xmin'], ocr_box['ymin']
			
 
				+
			
 
				+        ocr_img = crop_region(template_img, ocr_box)
			
 
				+        # cv2.imshow(class_name, ocr_img)
			
 
				+        # if cv2.waitKey(0) == 27:
			
 
				+        #     cv2.destroyAllWindows()
			
 
				+
			
 
				+        ocr_word = get_ocr_text_and_coordinate_in_google_format(ocr_img)  # baidu
			
 
				+        template['words_result'] = ocr_word
			
 
				+        template['coordinate_bias'] = (left, top)
			
 
				+
			
 
				+        template_dict[class_name] = template
			
 
				+    return template_dict
			
 
				+
			
 
				+
			
 
				+def get_correct_points(points_list, size):
			
 
				+    if len(points_list) > size >= 3:
			
 
				+        points_list = sorted(points_list, key=lambda k: k[1])
			
 
				+        i = 1
			
 
				+        choice_index_list = [0]
			
 
				+        for index, ele in enumerate(points_list):
			
 
				+            if abs(ele[1] - points_list[0][1]) > 500:  # 找y轴差值大于500的坐标
			
 
				+                choice_index_list.append(index)
			
 
				+                i = i + 1
			
 
				+                if i == size:
			
 
				+                    break
			
 
				+        return choice_index_list
			
 
				+    elif len(points_list) == 3:
			
 
				+        return [0, 1, 2]
			
 
				+    else:
			
 
				+        raise Exception
			
 
				+
			
 
				+
			
 
				+def save_transformed_img(template_img_size, images_path_list, ocr_classes_dict,
			
 
				+                         correcting_img_dir_path, corrected_img_save_dir, template_dict):
			
 
				+    for img_path in images_path_list:
			
 
				+        correcting_img = read_single_img(img_path)
			
 
				+        print('***********************************')
			
 
				+        print(img_path)
			
 
				+        correct_coordinates_list = []
			
 
				+        template_coordinates_list = []
			
 
				+        try:
			
 
				+            for ocr in ocr_classes_dict:
			
 
				+                class_name = ocr['class_name']
			
 
				+                if 'solve' in class_name:
			
 
				+                    ocr['region']['ymax'] = ocr['region']['ymax']
			
 
				+                ocr_box = ocr['region']
			
 
				+                left, top = ocr_box['xmin'], ocr_box['ymin']
			
 
				+
			
 
				+                correcting_oct_region = crop_region(correcting_img, ocr_box)
			
 
				+                # cv2.imwrite(class_name+'.jpg', correcting_oct_region)
			
 
				+                # cv2.imshow(class_name, correcting_oct_region)
			
 
				+                # if cv2.waitKey(0) == 27:
			
 
				+                #     cv2.destroyAllWindows()
			
 
				+                correcting_word = get_ocr_text_and_coordinate_in_google_format(correcting_oct_region)  # baidu
			
 
				+                template = template_dict[class_name]
			
 
				+                template_word = template['words_result']
			
 
				+                template_bias = template['coordinate_bias']
			
 
				+
			
 
				+                coordinates_dict = match_string(correcting_word, (left, top), template_word, template_bias)
			
 
				+                if len(coordinates_dict) > 0:
			
 
				+                    correct_coordinates_list.append(coordinates_dict['correcting'])
			
 
				+                    template_coordinates_list.append(coordinates_dict['template'])
			
 
				+                else:
			
 
				+                    continue
			
 
				+
			
 
				+            choice_index_list = get_correct_points(correct_coordinates_list, 3)
			
 
				+            c_coordinates = [correct_coordinates_list[ele] for ele in choice_index_list]
			
 
				+            t_coordinates = [template_coordinates_list[ele] for ele in choice_index_list]
			
 
				+            dst = transform(template_img_size, correcting_img,
			
 
				+                            c_coordinates, t_coordinates)
			
 
				+            print(c_coordinates, t_coordinates)
			
 
				+            save_path = img_path.replace(correcting_img_dir_path, corrected_img_save_dir)
			
 
				+            write_single_img(dst, save_path)
			
 
				+            print(save_path)
			
 
				+        except Exception as e:
			
 
				+            print('image corrected error: {}'.format(e))
			
 
				+            traceback.print_exc()
			
 
				+
			
 
				+
			
 
				+def correct(template_path, correcting_img_dir_path, corrected_img_save_dir, sheet_dict, sheet_sides='front'):
			
 
				+    find_str = os.path.join(correcting_img_dir_path, '*.jpg')
			
 
				+    correcting_img_path_list = glob.glob(find_str)
			
 
				+    # correcting_img_path_list = [r'C:\Users\Administrator\Desktop\sheet\correct\back_sizes\20180719004308818_0030.jpg']
			
 
				+    template_img = read_single_img(template_path)
			
 
				+    y, x = template_img.shape[0], template_img.shape[1]
			
 
				+    ocr_classes_dict = []
			
 
				+    # ocr_class = ['info_title', 'page']
			
 
				+    ocr_class = {'front': ['info_title', 'page'], 'back': ['solve', 'solve0', 'page']}
			
 
				+    page_index = 1
			
 
				+    for ele in sheet_dict['regions']:
			
 
				+        if ele['class_name'] in ocr_class[sheet_sides]:
			
 
				+            ocr_classes_dict.append({'class_name': '{}_{}'.format(ele['class_name'], str(page_index)),
			
 
				+                                     'region': ele['bounding_box']})
			
 
				+            page_index += 1
			
 
				+
			
 
				+    template_info_dict = get_template(template_img, ocr_classes_dict)
			
 
				+    save_transformed_img((x, y), correcting_img_path_list, ocr_classes_dict,
			
 
				+                         correcting_img_dir_path, corrected_img_save_dir, template_info_dict)
			
 
				+
			
 
				+
			
 
				+def rotate(image, angle, center=None, scale=1.0):
			
 
				+    (h, w) = image.shape[:2]
			
 
				+    if center is None:
			
 
				+        center = (w // 2, h // 2)
			
 
				+
			
 
				+    mtx = cv2.getRotationMatrix2D(center, angle, scale)
			
 
				+    rotated = cv2.warpAffine(image, mtx, (w, h))
			
 
				+    return rotated
			
 
				+
			
 
				+
			
 
				+def image_direction(image_raw, standard_direction):
			
 
				+    # 图片进来取上下两部分，baidu_ocr判断方向
			
 
				+    height, width = image_raw.shape[0], image_raw.shape[1]
			
 
				+    image = ''
			
 
				+    if height > width:
			
 
				+        if standard_direction == 'V':
			
 
				+            image = image_raw
			
 
				+        if standard_direction == 'H':
			
 
				+            image = np.rot90(image_raw)
			
 
				+    if height <= width:
			
 
				+        if standard_direction == 'V':
			
 
				+            image = np.rot90(image_raw)
			
 
				+        if standard_direction == 'H':
			
 
				+            image = image_raw
			
 
				+
			
 
				+    crop_ratio = 0.1
			
 
				+    crop_height = int(crop_ratio * height)
			
 
				+    top_part = image[:crop_height, :]
			
 
				+    bottom_part = image[height-crop_height:height, :]
			
 
				+    ocr_used_image = np.vstack([top_part, bottom_part])
			
 
				+    # - -1:未定义，
			
 
				+    # - 0:正向，
			
 
				+    # - 1: 逆时针90度，
			
 
				+    # - 2:逆时针180度，
			
 
				+    # - 3:逆时针270度
			
 
				+    direction = get_ocr_raw_result(ocr_used_image)['direction']
			
 
				+    if direction == 2:
			
 
				+        # flip:  1 水平翻转, 0 垂直翻转, -1 水平垂直翻转
			
 
				+        fliped_image = cv2.flip(image, 1)
			
 
				+    if direction == 0:
			
 
				+        fliped_image = image
			
 
				+    else:
			
 
				+        raise ValueError("direction={} is not supported!".format(direction))
			
 
				+
			
 
				+    # 判断是该套试卷中的第几份：识别页码？
			
 
				+
			
 
				+
			
 
				+def run():
			
 
				+    parser = argparse.ArgumentParser(description="your script description")  # --help
			
 
				+    parser.add_argument('--template', '-t', help='reviewed template file path')
			
 
				+    parser.add_argument('--page', '-p', choices=['front', 'back'], help='front sizes or back sizes')
			
 
				+    parser.add_argument('--raw', '-r', help='raw images folder path')
			
 
				+    parser.add_argument('--save', '-s', help='corrected images save folder path')
			
 
				+    # description参数可以用于插入描述脚本用途的信息，可以为空
			
 
				+
			
 
				+    args = parser.parse_args()  # 将变量以标签-值的字典形式存入args字典
			
 
				+
			
 
				+    template = args.template
			
 
				+    raw_dir = args.raw
			
 
				+    page = args.page
			
 
				+    save_dir = args.save
			
 
				+
			
 
				+    xml_path = template.replace('.jpg', '.xml')
			
 
				+    sheet_dict0 = read_xml_to_json(xml_path)
			
 
				+    correct(template, raw_dir, save_dir, sheet_dict0, sheet_sides=page)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    run()
			
 
				+    # template_path0 = r'C:\Users\Administrator\Desktop\sheet\correct\back_sizes\template\20180719004308818_0020.jpg'
			
 
				+    # img_dir_path = r'C:\Users\Administrator\Desktop\sheet\correct\back_sizes'
			
 
				+    # img_save_dir = r'C:\Users\Administrator\Desktop\sheet\correct\back_sizes\corrected'
			
 
				+    # xml_path = template_path0.replace('.jpg', '.xml')
			
 
				+    # print('hello', xml_path)
			
 
				+    # sheet_dict0 = read_xml_to_json(xml_path)
			
 
				+    # correct(template_path0, img_dir_path, img_save_dir, sheet_dict0, sheet_sides='back')
			
--- a/segment/sheet_resolve/analysis/correct/run.bat
+++ b/segment/sheet_resolve/analysis/correct/run.bat
@@ -0,0 +1,5 @@
 
				+python coordinates_correct_pyinstaller.py
			
 
				+--template=C:\Users\Administrator\Desktop\sheet\correct\back_sizes\template\20180719004308818_0020.jpg
			
 
				+--page=back
			
 
				+--raw=C:\Users\Administrator\Desktop\sheet\correct\back_sizes
			
 
				+--save=C:\Users\Administrator\Desktop\sheet\correct\back_sizes\corrected
			
--- a/segment/sheet_resolve/analysis/exam_number/__init__.py
+++ b/segment/sheet_resolve/analysis/exam_number/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2018/11/21 0021 下午 16:01
			
--- a/segment/sheet_resolve/analysis/exam_number/exam_number_box.py
+++ b/segment/sheet_resolve/analysis/exam_number/exam_number_box.py
@@ -0,0 +1,239 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : exam_number_box.py
			
 
				+# @Time    : 2018/11/22 0022 下午 15:59
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import xml.etree.cElementTree as ET
			
 
				+from segment.sheet_resolve.tools import utils
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate, get_ocr_text_and_coordinate_direction
			
 
				+import re
			
 
				+
			
 
				+
			
 
				+def preprocess(img, xe, ye):
			
 
				+    scale = 0
			
 
				+    dilate = 1
			
 
				+    blur = 5
			
 
				+    # 预处理图像
			
 
				+    # img = cv2.imread(picture)
			
 
				+
			
 
				+    # rescale the image
			
 
				+    if scale != 0:
			
 
				+        img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
			
 
				+
			
 
				+    # Convert to gray
			
 
				+    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
			
 
				+
			
 
				+    # # Apply dilation and erosion to remove some noise
			
 
				+    # if dilate != 0:
			
 
				+    #     kernel = np.ones((dilate, dilate), np.uint8)
			
 
				+    #     img = cv2.dilate(img, kernel, iterations=1)
			
 
				+    #     img = cv2.erode(img, kernel, iterations=1)
			
 
				+
			
 
				+    # Apply blur to smooth out the edges
			
 
				+    # if blur != 0:
			
 
				+    #     img = cv2.GaussianBlur(img, (blur, blur), 0)
			
 
				+
			
 
				+    # Apply threshold to get image with only b&w (binarization)
			
 
				+    img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
			
 
				+
			
 
				+    kernel = np.ones((ye, xe), np.uint8)  # y轴膨胀, x轴膨胀
			
 
				+
			
 
				+    dst = cv2.dilate(img, kernel, iterations=1)
			
 
				+    # cv2.imshow('dilate', dst)
			
 
				+    # if cv2.waitKey(0) == 27:
			
 
				+    #     cv2.destroyAllWindows()
			
 
				+
			
 
				+    return dst
			
 
				+
			
 
				+
			
 
				+def contours(image):
			
 
				+    _, cnts, hierarchy = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+
			
 
				+    bboxes = []
			
 
				+    for cnt_id, cnt in enumerate(reversed(cnts)):
			
 
				+        x, y, w, h = cv2.boundingRect(cnt)
			
 
				+        bboxes.append((x, y, x + w, y + h))
			
 
				+
			
 
				+    return bboxes
			
 
				+
			
 
				+
			
 
				+def box_coordinates(img):
			
 
				+    img_arr = np.asarray(img)
			
 
				+
			
 
				+    def axix_break_point(img, tolerance_number, axis):
			
 
				+        sum_x_axis = img.sum(axis=axis)
			
 
				+        sum_x_axis[sum_x_axis > 255 * tolerance_number] = 1  # 白色有字
			
 
				+        sum_x_axis[sum_x_axis != 1] = 0  # 黑色无字
			
 
				+        sum_x_axis_list = list(sum_x_axis)
			
 
				+        sum_x_axis_list.append(0)  # 最后几行到结束有字时，使索引值增加最后一位
			
 
				+
			
 
				+        split_x_index = []
			
 
				+        num = 1
			
 
				+        for index, ele in enumerate(sum_x_axis_list):
			
 
				+            num = num % 2
			
 
				+            if ele == num:
			
 
				+                # print(i)
			
 
				+                num = num + 1
			
 
				+                split_x_index.append(index)
			
 
				+        # print('length: ', len(split_x_index), split_x_index)
			
 
				+        return split_x_index
			
 
				+
			
 
				+    y_break_points_list = axix_break_point(img_arr, 1, axis=1)  # y轴分组
			
 
				+    img_arr_upper = img_arr[:y_break_points_list[1], :]
			
 
				+
			
 
				+    # cv2.imshow('img_arr_upper', img_arr_upper)
			
 
				+    # if cv2.waitKey(0) == 27:
			
 
				+    #     cv2.destroyAllWindows()
			
 
				+
			
 
				+    x_break_points_list = axix_break_point(img_arr_upper, 1, axis=0)
			
 
				+    if len(x_break_points_list) <= 4:
			
 
				+        hand_writing = True
			
 
				+    else:
			
 
				+        hand_writing = False
			
 
				+
			
 
				+    img_arr_for_x = img_arr
			
 
				+    ocr_region = img_arr_upper
			
 
				+    if hand_writing:  # 存在手写考号区域
			
 
				+        ocr_region = img_arr[y_break_points_list[2]:y_break_points_list[3], :]
			
 
				+        y_break_points_list = y_break_points_list[2:]
			
 
				+        img_arr_for_x = img_arr[y_break_points_list[1]:, :]
			
 
				+    x_break_points_list = axix_break_point(img_arr_for_x, 1, axis=0)
			
 
				+
			
 
				+    all_coordinates = []
			
 
				+    row_number = 0
			
 
				+    for i in range(0, len(y_break_points_list), 2):  # y轴分组
			
 
				+        ymin = y_break_points_list[i]
			
 
				+        ymax = y_break_points_list[i + 1]
			
 
				+        matrix = np.array([0, 0, 0, 0])
			
 
				+        if ymax-ymin > 3:  # 过滤噪音
			
 
				+            for j in range(0, len(x_break_points_list), 2):
			
 
				+                xmin = x_break_points_list[j]
			
 
				+                xmax = x_break_points_list[j + 1]
			
 
				+                if xmax - xmin > 3:
			
 
				+                    matrix = np.vstack([matrix, np.array([xmin, ymin, xmax, ymax])])
			
 
				+
			
 
				+            matrix = matrix[1:, :]
			
 
				+            dif = matrix[1:, 0] - matrix[:-1, 2]  # 后一个char的left与起一个char的right的差
			
 
				+            dif[dif < 0] = 0
			
 
				+            dif_length = np.mean(dif)  # 小于平均间隔的合并
			
 
				+            block_list = utils.box_by_x_intervel(matrix, dif_length)
			
 
				+
			
 
				+            row = {'row': '{}'.format(row_number), 'coordinates': block_list}
			
 
				+            all_coordinates.append(row)
			
 
				+            row_number += 1
			
 
				+
			
 
				+    # 识别文字和朝向
			
 
				+    try:
			
 
				+        word_result_list, _ = get_ocr_text_and_coordinate_direction(ocr_region)
			
 
				+    except Exception:
			
 
				+        word_result_list, _ = get_ocr_text_and_coordinate_direction(img_arr_for_x)
			
 
				+
			
 
				+    direction = 180
			
 
				+    if len(word_result_list) > 0:
			
 
				+        all_char_list = []
			
 
				+        digital_model = re.compile(r'\d')
			
 
				+        for i, chars_dict in enumerate(word_result_list):
			
 
				+            chars_list = chars_dict['chars']
			
 
				+            for ele in chars_list:
			
 
				+                if digital_model.search(ele['char']):
			
 
				+                    all_char_list.append(int(ele['char']))
			
 
				+
			
 
				+        if sum(all_char_list) < 45//2:
			
 
				+            direction = 180
			
 
				+        else:
			
 
				+            direction = 90
			
 
				+    return all_coordinates, direction
			
 
				+
			
 
				+
			
 
				+def exam_number(left, top, image, xml_path):
			
 
				+    img = preprocess(image, 3, 3)
			
 
				+
			
 
				+    box_list, _ = box_coordinates(img)
			
 
				+
			
 
				+    exam_bbox_list = []
			
 
				+    tree = ET.parse(xml_path)  # xml tree
			
 
				+    for index_num, exam_bbox in enumerate(box_list):
			
 
				+        row_number = exam_bbox['row']
			
 
				+        coordinates = exam_bbox['coordinates']
			
 
				+        ii = 0
			
 
				+        for i, coordinate in enumerate(coordinates):
			
 
				+            area = (coordinate[2] - coordinate[0]) * (coordinate[3] - coordinate[1])
			
 
				+            if area > 400:
			
 
				+                number = '{:02d}_{}'.format(ii, row_number)
			
 
				+                tree = utils.create_xml(number, tree,
			
 
				+                                        coordinate[0]+left, coordinate[1]+top, coordinate[2]+left, coordinate[3]+top)
			
 
				+
			
 
				+                region = [coordinate[0]+left, coordinate[1]+top, coordinate[2]+left, coordinate[3]+top]
			
 
				+                exam_bbox_list.append({'number': number, 'region': region})
			
 
				+                ii = ii + 1
			
 
				+    # print(exam_items_bbox)
			
 
				+    tree.write(xml_path)
			
 
				+    return exam_bbox_list
			
 
				+
			
 
				+
			
 
				+def exam_number_column(left, top, image, xml_path):
			
 
				+    img = preprocess(image, 3, 3)
			
 
				+
			
 
				+    box_list, _ = box_coordinates(img)
			
 
				+
			
 
				+    column_number = len(box_list[0]['coordinates'])
			
 
				+
			
 
				+    tree = ET.parse(xml_path)  # xml tree
			
 
				+    column_list = []
			
 
				+    for i in range(0, column_number):
			
 
				+        matrix = np.array([0, 0, 0, 0])
			
 
				+        for coord in box_list:
			
 
				+            col = coord['coordinates']
			
 
				+            matrix = np.vstack([matrix, np.array(col[i])])
			
 
				+
			
 
				+        combine = matrix[1:]
			
 
				+        min_temp = np.min(combine, axis=0)
			
 
				+        max_temp = np.max(combine, axis=0)
			
 
				+        column_coordinate = {'xmin': min_temp[0]+left, 'ymin': min_temp[1]+top,
			
 
				+                             'xmax': max_temp[2]+left, 'ymax': max_temp[3]+top}
			
 
				+        single_height = np.mean(combine[:, 3]-combine[:, 1])
			
 
				+        single_width = np.mean(combine[:, 2]-combine[:, 0])
			
 
				+
			
 
				+        column_dict = {'number': i, 'location': column_coordinate,
			
 
				+                       'single_height': int(single_height),
			
 
				+                       'single_width': int(single_width),
			
 
				+                       "choice_option": "0,1,2,3,4,5,6,7,8,9",
			
 
				+                       'row': 10, 'column': 1}
			
 
				+        column_list.append(column_dict)
			
 
				+        tree = utils.create_xml(str(i), tree,
			
 
				+                                column_coordinate['xmin'], column_coordinate['ymin'],
			
 
				+                                column_coordinate['xmax'], column_coordinate['ymax'])
			
 
				+
			
 
				+    return column_list
			
 
				+
			
 
				+
			
 
				+def exam_number_whole(left, top, image, xml_path):
			
 
				+    img = preprocess(image, 3, 3)
			
 
				+    box_list, direction = box_coordinates(img)
			
 
				+
			
 
				+    coor = [coord['coordinates'] for coord in box_list]
			
 
				+    column_number = len(box_list[0]['coordinates'])
			
 
				+    row_number = len(box_list)
			
 
				+
			
 
				+    tensor = np.asarray(coor).reshape(column_number*row_number, 4)
			
 
				+    min_temp = np.min(tensor, axis=0)
			
 
				+    max_temp = np.max(tensor, axis=0)
			
 
				+    column_coordinate = {'xmin': int(min_temp[0] + left), 'ymin': int(min_temp[1] + top),
			
 
				+                         'xmax': int(max_temp[2] + left), 'ymax': int(max_temp[3] + top)}
			
 
				+
			
 
				+    single_height = np.mean(tensor[:, 3] - tensor[:, 1])
			
 
				+    single_width = np.mean(tensor[:, 2] - tensor[:, 0])
			
 
				+
			
 
				+    column_dict = {'location': column_coordinate,
			
 
				+                   'single_height': int(single_height),
			
 
				+                   'single_width': int(single_width),
			
 
				+                   "choice_option": "0,1,2,3,4,5,6,7,8,9",
			
 
				+                   'row': row_number, 'column': column_number,
			
 
				+                   'direction': direction}
			
 
				+    tree = ET.parse(xml_path)  # xml tree
			
 
				+    tree = utils.create_xml('exam_number', tree,
			
 
				+                            column_coordinate['xmin'], column_coordinate['ymin'],
			
 
				+                            column_coordinate['xmax'], column_coordinate['ymax'])
			
 
				+
			
 
				+    tree.write(xml_path)
			
 
				+    return column_dict
			
--- a/segment/sheet_resolve/analysis/exam_number/exam_number_row_column.py
+++ b/segment/sheet_resolve/analysis/exam_number/exam_number_row_column.py
@@ -0,0 +1,234 @@
 
				+import numpy as np
			
 
				+import tensorflow as tf
			
 
				+
			
 
				+from segment.sheet_resolve.lib.ssd_model.utils import label_map_util, ops as utils_ops
			
 
				+from segment.sheet_resolve.tools import tf_settings
			
 
				+from segment.sheet_resolve.tools.tf_sess import SsdSess
			
 
				+
			
 
				+from PIL import Image
			
 
				+import math
			
 
				+
			
 
				+tf_sess_dict = {
			
 
				+    'exam_number_ssd': SsdSess('exam_number_ssd'),
			
 
				+}
			
 
				+
			
 
				+exam_number_sess = tf_sess_dict['exam_number_ssd']
			
 
				+sess = exam_number_sess.sess
			
 
				+detection_graph = exam_number_sess.graph
			
 
				+
			
 
				+
			
 
				+def load_image_into_numpy_array(image):
			
 
				+    # print(image)
			
 
				+    image = image.convert('RGB')
			
 
				+    (im_width, im_height) = image.size
			
 
				+    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
			
 
				+
			
 
				+
			
 
				+def run_inference_for_single_image(image):
			
 
				+    ops = detection_graph.get_operations()
			
 
				+    all_tensor_names = {output.name for op in ops for output in op.outputs}
			
 
				+    tensor_dict = {}
			
 
				+    for key in [
			
 
				+        'num_detections', 'detection_boxes', 'detection_scores',
			
 
				+        'detection_classes', 'detection_masks'
			
 
				+    ]:
			
 
				+        tensor_name = key + ':0'
			
 
				+        if tensor_name in all_tensor_names:
			
 
				+            tensor_dict[key] = detection_graph.get_tensor_by_name(
			
 
				+                tensor_name)
			
 
				+    if 'detection_masks' in tensor_dict:
			
 
				+        # The following processing is only for single image
			
 
				+        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
			
 
				+        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
			
 
				+        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
			
 
				+        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
			
 
				+        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
			
 
				+        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
			
 
				+        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
			
 
				+            detection_masks, detection_boxes, image.shape[0], image.shape[1])
			
 
				+        detection_masks_reframed = tf.cast(
			
 
				+            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
			
 
				+        # Follow the convention by adding back the batch dimension
			
 
				+        tensor_dict['detection_masks'] = tf.expand_dims(
			
 
				+            detection_masks_reframed, 0)
			
 
				+    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
			
 
				+
			
 
				+    # Run inference
			
 
				+    # start = time.time()
			
 
				+    output_dict = sess.run(tensor_dict,
			
 
				+                           feed_dict={image_tensor: np.expand_dims(image, 0)})
			
 
				+    # print(time.time()-start)
			
 
				+    # all outputs are float32 numpy arrays, so convert types as appropriate
			
 
				+    output_dict['num_detections'] = int(output_dict['num_detections'][0])
			
 
				+    output_dict['detection_classes'] = output_dict[
			
 
				+        'detection_classes'][0].astype(np.uint8)
			
 
				+    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
			
 
				+    output_dict['detection_scores'] = output_dict['detection_scores'][0]
			
 
				+    if 'detection_masks' in output_dict:
			
 
				+        output_dict['detection_masks'] = output_dict['detection_masks'][0]
			
 
				+    return output_dict
			
 
				+
			
 
				+
			
 
				+def image_detect(image_np, category, score_threshold):
			
 
				+    image_np = load_image_into_numpy_array(image_np)
			
 
				+    detections = []
			
 
				+    w, h = image_np.shape[1], image_np.shape[0]
			
 
				+    output_dict = run_inference_for_single_image(image_np)
			
 
				+    boxes = output_dict['detection_boxes']
			
 
				+    scores = output_dict['detection_scores']
			
 
				+    labels = output_dict['detection_classes']
			
 
				+    indices = np.where(scores > score_threshold)
			
 
				+    image_scores = scores[indices]
			
 
				+    image_boxes = boxes[indices]
			
 
				+    image_labels = labels[indices]
			
 
				+    image_detections = np.concatenate(
			
 
				+        [image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
			
 
				+    for detection in image_detections:
			
 
				+        y0 = int(detection[0] * h)
			
 
				+        x0 = int(detection[1] * w)
			
 
				+        y1 = int(detection[2] * h)
			
 
				+        x1 = int(detection[3] * w)
			
 
				+        label_index = int(detection[5])
			
 
				+        label_name = category[label_index]['name']
			
 
				+        detections.append((x0, y0, x1, y1, label_index, detection[4], label_name))
			
 
				+    return detections
			
 
				+
			
 
				+
			
 
				+def get_exam_number_row_and_col(left, top, image):
			
 
				+    im_resize = 512
			
 
				+    ''' exam_number resize to 512*512'''
			
 
				+    image_src = Image.fromarray(image)
			
 
				+    if image_src.mode == 'RGB':
			
 
				+        image_src = image_src.convert("L")
			
 
				+    w, h = image_src.size
			
 
				+    if h > w:
			
 
				+        image_src = image_src.resize((int(im_resize / h * w), im_resize))
			
 
				+    else:
			
 
				+        image_src = image_src.resize((im_resize, int(im_resize / w * h)))
			
 
				+    w_, h_ = image_src.size
			
 
				+    image_512 = Image.new(image_src.mode, (im_resize, im_resize), (255))
			
 
				+    image_512.paste(image_src, [0, 0, w_, h_])
			
 
				+
			
 
				+    n_z = "0123456789"
			
 
				+    category_index = label_map_util.create_category_index_from_labelmap(tf_settings.exam_number_ssd_label,
			
 
				+                                                                        use_display_name=True)
			
 
				+    detections = image_detect(image_512, category_index, 0.5)
			
 
				+    if len(detections):
			
 
				+        box_xmin = []
			
 
				+        box_ymin = []
			
 
				+        box_xmax = []
			
 
				+        box_ymax = []
			
 
				+        x_distance_all = []
			
 
				+        y_distance_all = []
			
 
				+        x_width_all = []
			
 
				+        y_height_all = []
			
 
				+        all_small_coordinate = []
			
 
				+        border = {}
			
 
				+        exam_number_ssd = {}
			
 
				+        ssd_column = 1
			
 
				+        ssd_row = 1
			
 
				+
			
 
				+        for index, box in enumerate(detections):
			
 
				+            box0 = round(box[0] * (w / w_))  # Map to the original image
			
 
				+            box1 = round(box[1] * (h / h_))
			
 
				+            box2 = round(box[2] * (w / w_))
			
 
				+            box3 = round(box[3] * (h / h_))
			
 
				+            if box[-1] == 'border':
			
 
				+                border = {'xmin': box0,
			
 
				+                          'ymin': box1,
			
 
				+                          'xmax': box2,
			
 
				+                          'ymax': box3
			
 
				+                          }
			
 
				+            # if box[2] - box[0] > 80 or box[3] - box[1] >80:
			
 
				+            #    continue
			
 
				+            else:
			
 
				+                box_xmin.append(box0)
			
 
				+                box_ymin.append(box1)
			
 
				+                box_xmax.append(box2)
			
 
				+                box_ymax.append(box3)
			
 
				+
			
 
				+                small_coordinate = {'xmin': box0 + left,
			
 
				+                                    'ymin': box1 + top,
			
 
				+                                    'xmax': box2 + left,
			
 
				+                                    'ymax': box3 + top}
			
 
				+                all_small_coordinate.append(small_coordinate)
			
 
				+                x_width = box2 - box0
			
 
				+                y_height = box3 - box1
			
 
				+                x_width_all.append(x_width)
			
 
				+                y_height_all.append(y_height)
			
 
				+
			
 
				+        sorted_xmin = sorted(box_xmin)
			
 
				+        sorted_ymin = sorted(box_ymin)
			
 
				+        sorted_xmax = sorted(box_xmax)
			
 
				+        sorted_ymax = sorted(box_ymax)
			
 
				+        # print(sorted_xmin, sorted_ymin)
			
 
				+        x_width_all_sorted = sorted(x_width_all, reverse=True)
			
 
				+        y_height_all_sorted = sorted(y_height_all, reverse=True)
			
 
				+        len_x = len(x_width_all)
			
 
				+        len_y = len(y_height_all)
			
 
				+        x_width_median = np.median(x_width_all_sorted)
			
 
				+        y_height_median = np.median(y_height_all_sorted)
			
 
				+
			
 
				+        for i in range(len(sorted_xmin) - 1):
			
 
				+            x_distance = sorted_xmin[i + 1] - sorted_xmin[i]
			
 
				+            y_distance = sorted_ymin[i + 1] - sorted_ymin[i]
			
 
				+            if x_distance > (x_width_median - 5):
			
 
				+                ssd_column = ssd_column + 1
			
 
				+                x_distance_all.append(x_distance)
			
 
				+            if y_distance > (y_height_median - 5):
			
 
				+                ssd_row = ssd_row + 1
			
 
				+                y_distance_all.append(y_distance)
			
 
				+
			
 
				+            # del the  borders where small items are too large
			
 
				+            if x_width_all_sorted[i] - x_width_median > x_width_median:
			
 
				+                ssd_column = ssd_column - 1
			
 
				+            elif x_width_median - x_width_all_sorted[i] > x_width_median:
			
 
				+                ssd_column = ssd_column - 1
			
 
				+            if y_height_all_sorted[i] - y_height_median > y_height_median:
			
 
				+                ssd_row = ssd_row - 1
			
 
				+            elif y_height_median - y_height_all_sorted[i] > y_height_median:
			
 
				+                ssd_row = ssd_row - 1
			
 
				+
			
 
				+        # Add rows and columns that might be missed
			
 
				+        x_distance_all_sorted = sorted(x_distance_all, reverse=True)
			
 
				+        y_distance_all_sorted = sorted(y_height_all, reverse=True)
			
 
				+        len_x_distance = len(x_distance_all)
			
 
				+        len_y_distance = len(y_distance_all)
			
 
				+        x_distance_median = np.median(x_distance_all_sorted)
			
 
				+        y_distance_median = np.median(y_distance_all_sorted)
			
 
				+        for i in range(len_x_distance):
			
 
				+            if x_distance_all[i] > 2 * x_distance_median - 4:
			
 
				+                ssd_column = ssd_column + 1
			
 
				+        for i in range(len_y_distance):
			
 
				+            if y_distance_all[i] > 2 * y_distance_median - 4:
			
 
				+                ssd_row = ssd_row + 1
			
 
				+
			
 
				+        if ssd_row < 10:
			
 
				+            test = math.ceil(len_y / ssd_column)
			
 
				+            if test > ssd_row:
			
 
				+                ssd_row = test
			
 
				+        if ssd_row > 10:
			
 
				+            ssd_row = 10
			
 
				+
			
 
				+        average_height = int(np.mean(y_height_all))
			
 
				+        average_width = int(np.mean(x_width_all))
			
 
				+
			
 
				+        location_ssd = {'xmin': sorted_xmin[0] + left,
			
 
				+                        'ymin': sorted_ymin[0] + top,
			
 
				+                        'xmax': sorted_xmax[-1] + left,
			
 
				+                        'ymax': sorted_ymax[-1] + top}
			
 
				+
			
 
				+        exam_number_ssd = {'bounding_box': location_ssd,
			
 
				+                           "single_height": average_height,
			
 
				+                           "single_width": average_width,
			
 
				+                           "rows": ssd_row,
			
 
				+                           "cols": ssd_column,
			
 
				+                           "option": n_z[:ssd_row].replace('', ',')[1:-1],
			
 
				+                           "direction": 180,
			
 
				+                           'class_name': 'exam_number_col_row',
			
 
				+                           'all_small_coordinate': all_small_coordinate
			
 
				+                           }
			
 
				+    else:
			
 
				+        exam_number_ssd = {}
			
 
				+
			
 
				+    return exam_number_ssd
			
--- a/segment/sheet_resolve/analysis/info_section/__init__.py
+++ b/segment/sheet_resolve/analysis/info_section/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2018/11/21 0021 下午 16:01
			
--- a/segment/sheet_resolve/analysis/info_section/info_section.py
+++ b/segment/sheet_resolve/analysis/info_section/info_section.py
@@ -0,0 +1,43 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : info_section.py
			
 
				+# @Time    : 2019/4/2 0002 下午 15:38
			
 
				+import cv2
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate_in_google_format
			
 
				+from segment.sheet_resolve.tools.utils import crop_region, read_xml_to_json, read_single_img
			
 
				+
			
 
				+
			
 
				+info_section_class = ['alarm_info',
			
 
				+                      'info_title',
			
 
				+                      'attention',
			
 
				+                      'page',
			
 
				+                      'full_filling',
			
 
				+                      'print_info',
			
 
				+                      'ban_area',
			
 
				+                      'type_score',
			
 
				+                      'time',
			
 
				+                      'total_score',
			
 
				+                      'executor',
			
 
				+                      'verify']
			
 
				+
			
 
				+
			
 
				+def get_text(sheet, raw_image):
			
 
				+    for ele in sheet['regions']:
			
 
				+        if ele['class_name'] in info_section_class:
			
 
				+            bbox = ele['bounding_box']
			
 
				+            img_region = crop_region(raw_image, bbox)
			
 
				+            try:
			
 
				+                text_dict = get_ocr_text_and_coordinate_in_google_format(img_region)
			
 
				+                text_list = text_dict['chars']
			
 
				+                text = ''.join(text_list)
			
 
				+                ele['text'] = text
			
 
				+            except Exception:
			
 
				+                ele['text'] = ''
			
 
				+    return sheet
			
 
				+
			
 
				+
			
 
				+# if __name__ == '__main__':
			
 
				+#     xml_path = r'C:\Users\Administrator\Desktop\test\third_raw\010515.xml'
			
 
				+#     jpg_path = r'C:\Users\Administrator\Desktop\test\third_raw\010515.jpg'
			
 
				+#     sheet_dict = read_xml_to_json(xml_path)
			
 
				+#     image = read_single_img(jpg_path)
			
 
				+#     get_text(sheet_dict, image)
			
--- a/segment/sheet_resolve/analysis/resolve.py
+++ b/segment/sheet_resolve/analysis/resolve.py
@@ -0,0 +1,466 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : resolve.py
			
 
				+# @Time    : 2018/12/3 0003 上午 10:16
			
 
				+
			
 
				+import time
			
 
				+import traceback
			
 
				+import xml.etree.cElementTree as ET
			
 
				+from django.conf import settings
			
 
				+import segment.logging_config as logging
			
 
				+import segment.sheet_resolve.analysis.choice.analysis_choice as resolve_choice
			
 
				+import segment.sheet_resolve.analysis.choice.choice_box as choice_box
			
 
				+import segment.sheet_resolve.analysis.choice.choice_line_box as choice_line_box
			
 
				+import segment.sheet_resolve.analysis.cloze.analysis_cloze as resolve_cloze
			
 
				+import segment.sheet_resolve.analysis.cloze.cloze_line_box as resolve_cloze_line_box
			
 
				+import segment.sheet_resolve.analysis.exam_number.exam_number_box as resolve_exam_number_box
			
 
				+import segment.sheet_resolve.analysis.exam_number.exam_number_row_column as exam_number_row_column
			
 
				+import segment.sheet_resolve.analysis.sheet.analysis_sheet as resolve_sheet
			
 
				+import segment.sheet_resolve.analysis.solve.mark_box as resolve_mark_box
			
 
				+import segment.sheet_resolve.analysis.solve.mark_line_box as resolve_mark_line_box
			
 
				+from segment.sheet_resolve.tools import utils
			
 
				+from segment.sheet_resolve.tools.tf_sess import TfSess
			
 
				+from segment.sheet_resolve.tools.tf_settings import xml_template_path, model_dict
			
 
				+from segment.sheet_resolve.tools.utils import read_single_img, read_xml_to_json, create_xml
			
 
				+from segment.sheet_resolve.analysis.sheet.sheet_adjust import adjust_item_edge_by_gray_image
			
 
				+from segment.sheet_resolve.analysis.sheet.sheet_infer import infer_bar_code, box_infer_and_complete
			
 
				+from segment.sheet_resolve.analysis.sheet.sheet_infer import infer_exam_number, adjust_exam_number, exam_number_infer_by_s
			
 
				+from segment.sheet_resolve.analysis.sheet.choice_infer import infer_choice_m
			
 
				+
			
 
				+logger = logging.getLogger(settings.LOGGING_TYPE)
			
 
				+
			
 
				+
			
 
				+sheet_infer_dict = dict(bar_code=True,
			
 
				+                        choice_m=True,
			
 
				+                        exam_number=True,
			
 
				+                        common_sheet=True)
			
 
				+infer_choice_m_flag = False
			
 
				+
			
 
				+
			
 
				+def sheet(series_number, image_path, image, conf_thresh, mns_thresh, subject, sheet_sess, ocr=''):
			
 
				+    global infer_choice_m_flag
			
 
				+    model_type = subject
			
 
				+    classes = list(model_dict[model_type]['classes'])
			
 
				+    coordinate_bias_dict = model_dict[model_type]['class_coordinate_bias']
			
 
				+
			
 
				+    if '_blank' in model_type:
			
 
				+        model_type = model_type.replace("_blank", "")
			
 
				+
			
 
				+    sheets_dict = resolve_sheet.get_single_image_sheet_regions(model_type, image_path, image, classes,
			
 
				+                                                               sheet_sess.sess, sheet_sess.net,
			
 
				+                                                               conf_thresh, mns_thresh, coordinate_bias_dict)
			
 
				+
			
 
				+    h, w = image.shape[0], image.shape[1]
			
 
				+    regions = sheets_dict['regions']
			
 
				+    fetched_class = [ele['class_name'] for ele in regions]
			
 
				+
			
 
				+    try:
			
 
				+        regions = adjust_item_edge_by_gray_image(image, regions)
			
 
				+    except Exception as e:
			
 
				+        traceback.print_exc()
			
 
				+        logger.info('试卷：{} 自适应边框失败: {}'.format(image_path, e))
			
 
				+
			
 
				+    if sheet_infer_dict['bar_code']:
			
 
				+        try:
			
 
				+            if ('bar_code' not in fetched_class) and ocr:
			
 
				+                attention_region = [ele for ele in regions if ele['class_name'] == 'attention']
			
 
				+                bar_code_list = infer_bar_code(image, ocr, attention_region)
			
 
				+                regions.extend(bar_code_list)
			
 
				+        except Exception as e:
			
 
				+            traceback.print_exc()
			
 
				+            logger.info('试卷：{} 条形码推断失败: {}'.format(image_path, e))
			
 
				+
			
 
				+    if sheet_infer_dict['exam_number']:
			
 
				+        try:
			
 
				+            cond1 = 'exam_number' in fetched_class
			
 
				+            tmp = ['info_title', 'qr_code', 'bar_code', 'choice', 'choice_m', 'exam_number_w']
			
 
				+            cond2 = True in [True for ele in tmp if ele in fetched_class]  # 第一面特征
			
 
				+            cond3 = 'exam_number_w' in fetched_class
			
 
				+            cond4 = 'exam_number_s' in fetched_class
			
 
				+
			
 
				+            if cond1 and cond3 and not cond4:
			
 
				+                regions = adjust_exam_number(regions)
			
 
				+            if not cond1 and cond4:
			
 
				+                exam_number_list = exam_number_infer_by_s(image, regions)
			
 
				+                regions.extend(exam_number_list)
			
 
				+
			
 
				+            if not cond1 and not cond4 and cond2 and ocr:
			
 
				+                exam_number_list = infer_exam_number(image, ocr, regions)
			
 
				+                regions.extend(exam_number_list)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            traceback.print_exc()
			
 
				+            logger.info('试卷：{} 考号推断失败: {}'.format(image_path, e))
			
 
				+
			
 
				+    if sheet_infer_dict['choice_m']:
			
 
				+
			
 
				+        try:
			
 
				+            choice_m_list = infer_choice_m(image, regions, ocr)
			
 
				+            remain_choice_m = []
			
 
				+            if len(choice_m_list) > 0:
			
 
				+                choice_m_old_list = [ele for ele in regions if 'choice_m' == ele['class_name']]
			
 
				+                for infer_box in choice_m_list.copy():
			
 
				+                    infer_loc = infer_box['bounding_box']
			
 
				+
			
 
				+                    for tf_box in choice_m_old_list:
			
 
				+                        tf_loc = tf_box['bounding_box']
			
 
				+                        iou = utils.cal_iou(infer_loc, tf_loc)
			
 
				+                        if iou[0] > 0.85 or iou[1] > 0.85:
			
 
				+                            if infer_box not in remain_choice_m:
			
 
				+                                remain_choice_m.append(infer_box)
			
 
				+                                choice_m_list.remove(infer_box)
			
 
				+                            regions.remove(tf_box)
			
 
				+                            break
			
 
				+                        elif iou[0] > 0:
			
 
				+                            choice_m_list.remove(infer_box)
			
 
				+                            break
			
 
				+
			
 
				+                remain_choice_m.extend(choice_m_list)
			
 
				+
			
 
				+                # regions = [ele for ele in regions if 'choice_m' != ele['class_name']]
			
 
				+                regions.extend(remain_choice_m)
			
 
				+                infer_choice_m_flag = True
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            traceback.print_exc()
			
 
				+            logger.info('试卷：{} 选择题推断失败: {}'.format(image_path, e))
			
 
				+
			
 
				+    if sheet_infer_dict['common_sheet']:
			
 
				+
			
 
				+        try:
			
 
				+            regions = box_infer_and_complete(image, regions, ocr)
			
 
				+        except Exception as e:
			
 
				+            traceback.print_exc()
			
 
				+            logger.info('试卷：{} 识别框补全推断失败: {}'.format(image_path, e))
			
 
				+
			
 
				+    try:
			
 
				+        adjust_regions = adjust_item_edge_by_gray_image(image, regions)
			
 
				+    except Exception as e:
			
 
				+        adjust_regions = regions
			
 
				+
			
 
				+        traceback.print_exc()
			
 
				+        logger.info('试卷：{} 自适应边框失败: {}'.format(image_path, e))
			
 
				+
			
 
				+    sheets_dict.update({'regions': adjust_regions})
			
 
				+
			
 
				+    #  generate xml
			
 
				+    tree = ET.parse(xml_template_path)
			
 
				+    xml_save_path = sheets_dict['img_name'].replace('.jpg', '.xml')
			
 
				+    root = tree.getroot()
			
 
				+    series = ET.SubElement(root, 'paper_id')
			
 
				+    series.text = series_number
			
 
				+
			
 
				+    img_shape = image.shape
			
 
				+    project = ET.SubElement(root, 'size', {})
			
 
				+    width = ET.SubElement(project, 'width')
			
 
				+    width.text = str(img_shape[1])
			
 
				+    height = ET.SubElement(project, 'height')
			
 
				+    height.text = str(img_shape[0])
			
 
				+    depth = ET.SubElement(project, 'depth')
			
 
				+    if len(img_shape) >= 3:
			
 
				+        depth.text = '3'
			
 
				+    else:
			
 
				+        depth.text = '1'
			
 
				+
			
 
				+    for ele in regions:
			
 
				+        name = ele['class_name']
			
 
				+        xmin = ele['bounding_box']['xmin']
			
 
				+        ymin = ele['bounding_box']['ymin']
			
 
				+        xmax = ele['bounding_box']['xmax']
			
 
				+        ymax = ele['bounding_box']['ymax']
			
 
				+        tree = create_xml(name, tree, xmin, ymin, xmax, ymax)
			
 
				+
			
 
				+    tree.write(xml_save_path)
			
 
				+    return sheets_dict, xml_save_path
			
 
				+
			
 
				+
			
 
				+def choice(image, regions, xml_path, conf_thresh, mns_thresh, choice_sess):
			
 
				+    model_type = 'choice'
			
 
				+    classes = model_dict[model_type]['classes']
			
 
				+    coordinate_bias_dict = model_dict[model_type]['class_coordinate_bias']
			
 
				+
			
 
				+    choice_list = []
			
 
				+    for ele in regions:
			
 
				+        if ele["class_name"] == 'choice':
			
 
				+
			
 
				+            choice_bbox = ele['bounding_box']
			
 
				+            left = choice_bbox['xmin']
			
 
				+            top = choice_bbox['ymin']
			
 
				+            choice_img = utils.crop_region(image, choice_bbox)
			
 
				+
			
 
				+            choice_dict_tf = resolve_choice. \
			
 
				+                get_single_image_sheet_regions('choice', choice_img, classes,
			
 
				+                                               choice_sess.sess, choice_sess.net, conf_thresh, mns_thresh,
			
 
				+                                               coordinate_bias_dict)
			
 
				+
			
 
				+            choice_list = choice_list + choice_line_box.choice_line(left, top, choice_img, choice_dict_tf, xml_path)
			
 
				+
			
 
				+    return choice_list
			
 
				+
			
 
				+
			
 
				+def choice_row_col(image, regions, xml_path, conf_thresh, mns_thresh, choice_sess):
			
 
				+    model_type = 'choice_m'
			
 
				+    classes = model_dict[model_type]['classes']
			
 
				+    coordinate_bias_dict = model_dict[model_type]['class_coordinate_bias']
			
 
				+
			
 
				+    choice_list = []
			
 
				+    for ele in regions:
			
 
				+        if ele["class_name"] == 'choice':
			
 
				+
			
 
				+            choice_box = ele['bounding_box']
			
 
				+            left = choice_box['xmin']
			
 
				+            top = choice_box['ymin']
			
 
				+            choice_img = utils.crop_region(image, choice_box)
			
 
				+
			
 
				+            choice_m_dict_tf = resolve_choice. \
			
 
				+                get_single_image_sheet_regions('choice_m', choice_img, classes,
			
 
				+                                               choice_sess.sess, choice_sess.net, conf_thresh, mns_thresh,
			
 
				+                                               coordinate_bias_dict)
			
 
				+
			
 
				+            choice_list = choice_list + choice_line_box.choice_line_with_number(left, top, choice_img, choice_m_dict_tf, xml_path)
			
 
				+
			
 
				+    return choice_list
			
 
				+
			
 
				+
			
 
				+def choice_m_row_col(image, regions, xml_path):
			
 
				+
			
 
				+    choice_m_dict_tf = [ele for ele in regions if ele['class_name'] == 'choice_m']
			
 
				+    # choice_m_row_col_with_number
			
 
				+    choice_list = []
			
 
				+    try:
			
 
				+        # choice_list = choice_box.get_number_by_enlarge_choice_m(image, choice_m_dict_tf, xml_path)
			
 
				+        # if infer_choice_m_flag:
			
 
				+        #     choice_list = choice_line_box.choice_m_adjust(image, choice_m_dict_tf)
			
 
				+        #
			
 
				+        # else:
			
 
				+        #     choice_list = choice_line_box.choice_m_row_col(image, choice_m_dict_tf, xml_path)  # 找选择题行列、分数
			
 
				+
			
 
				+        choice_list = choice_line_box.choice_m_row_col(image, choice_m_dict_tf, xml_path)  # 找选择题行列、分数
			
 
				+        tree = ET.parse(xml_path)  # xml tree
			
 
				+        for index_num, box in enumerate(choice_list):
			
 
				+            if len(box['bounding_box']) > 0:
			
 
				+                abcd = box['bounding_box']
			
 
				+                number = str(box['number'])
			
 
				+                name = '{}_{}*{}_{}_{}'.format('choice_m', box['rows'], box['cols'], box['direction'], number)
			
 
				+                tree = utils.create_xml(name, tree,
			
 
				+                                        abcd['xmin'], abcd['ymin'],
			
 
				+                                        abcd['xmax'], abcd['ymax'])
			
 
				+
			
 
				+        tree.write(xml_path)
			
 
				+    except Exception as e:
			
 
				+        traceback.print_exc()
			
 
				+        print(e)
			
 
				+
			
 
				+    return choice_list
			
 
				+
			
 
				+
			
 
				+def exam_number(image, regions, xml_path):
			
 
				+    exam_number_dict = {}
			
 
				+    for ele in regions:
			
 
				+        if ele["class_name"] == 'exam_number':
			
 
				+            exam_number_dict = ele
			
 
				+
			
 
				+    exam_number_box = exam_number_dict['bounding_box']
			
 
				+    left = exam_number_box['xmin']
			
 
				+    top = exam_number_box['ymin']
			
 
				+    exam_number_img = utils.crop_region(image, exam_number_box)
			
 
				+
			
 
				+    # exam_number_dict = resolve_exam_number_box.exam_number(left, top, exam_number_img, xml_path)
			
 
				+    exam_number_dict = resolve_exam_number_box.exam_number_whole(left, top, exam_number_img, xml_path)
			
 
				+
			
 
				+    # print(exam_number_dict)
			
 
				+    return exam_number_dict
			
 
				+
			
 
				+
			
 
				+def exam_number_row_col(image, regions, xml_path):
			
 
				+    exam_number_dict = {}
			
 
				+    for ele in regions:
			
 
				+        if ele["class_name"] == 'exam_number':
			
 
				+            exam_number_dict = ele
			
 
				+
			
 
				+    exam_number_box = exam_number_dict['bounding_box']
			
 
				+    left = exam_number_box['xmin']
			
 
				+    top = exam_number_box['ymin']
			
 
				+    exam_number_img = utils.crop_region(image, exam_number_box)
			
 
				+
			
 
				+    exam_number_row_col_dict = exam_number_row_column.get_exam_number_row_and_col(left, top, exam_number_img)
			
 
				+
			
 
				+    tree = ET.parse(xml_path)  # xml tree
			
 
				+    if len(exam_number_row_col_dict) > 0:
			
 
				+        exam_number_box = exam_number_row_col_dict['bounding_box']
			
 
				+        name = '{}_{}*{}_{}'.format('exam_number',
			
 
				+                                    exam_number_row_col_dict['rows'],
			
 
				+                                    exam_number_row_col_dict['cols'],
			
 
				+                                    exam_number_row_col_dict['direction'])
			
 
				+        tree = utils.create_xml(name, tree,
			
 
				+                                exam_number_box['xmin'], exam_number_box['ymin'],
			
 
				+                                exam_number_box['xmax'], exam_number_box['ymax'])
			
 
				+
			
 
				+    else:
			
 
				+        tree = utils.create_xml('exam_number', tree,
			
 
				+                                exam_number_box['xmin'], exam_number_box['ymin'],
			
 
				+                                exam_number_box['xmax'], exam_number_box['ymax'])
			
 
				+        exam_number_row_col_dict = {}
			
 
				+
			
 
				+    tree.write(xml_path)
			
 
				+
			
 
				+    return [exam_number_row_col_dict]
			
 
				+
			
 
				+
			
 
				+def cloze(image, regions, xml_path, conf_thresh, mns_thresh, cloze_sess):
			
 
				+    classes = model_dict['cloze']['classes']
			
 
				+    coordinate_bias_dict = model_dict['cloze']['class_coordinate_bias']
			
 
				+
			
 
				+    cloze_list = []
			
 
				+    for ele in regions:
			
 
				+        if ele["class_name"] == 'cloze':
			
 
				+            cloze_box = ele['bounding_box']
			
 
				+            left = cloze_box['xmin']
			
 
				+            top = cloze_box['ymin']
			
 
				+            cloze_img = utils.crop_region(image, cloze_box)
			
 
				+            cloze_dict_tf = resolve_cloze.get_single_image_sheet_regions('cloze', cloze_img, classes,
			
 
				+                                                                         cloze_sess.sess, cloze_sess.net, conf_thresh,
			
 
				+                                                                         mns_thresh, coordinate_bias_dict)
			
 
				+            cloze_list = cloze_list + resolve_cloze_line_box.cloze_line(left, top, cloze_img, cloze_dict_tf['regions'], xml_path)
			
 
				+
			
 
				+    return cloze_list
			
 
				+
			
 
				+
			
 
				+def solve_with_mark(image, regions, xml_path):
			
 
				+    solve_list = []
			
 
				+    mark_list = []
			
 
				+    for ele in regions.copy():
			
 
				+        if 'solve' in ele["class_name"]:
			
 
				+            exam_number_box = ele['bounding_box']
			
 
				+            left = exam_number_box['xmin']
			
 
				+            top = exam_number_box['ymin']
			
 
				+            exam_number_img = utils.crop_region(image, exam_number_box)
			
 
				+            solve_mark_dict = resolve_mark_box.solve_mark(left, top, exam_number_img, xml_path)
			
 
				+            if len(solve_mark_dict) > 0:
			
 
				+                ele['class_name'] = 'solve_'+str(solve_mark_dict['number'])
			
 
				+                solve_list.append(ele)
			
 
				+                mark_list.append(solve_mark_dict)
			
 
				+
			
 
				+    return solve_list, mark_list
			
 
				+
			
 
				+
			
 
				+def solve(image, regions, xml_path):
			
 
				+    solve_list = []
			
 
				+    tree = ET.parse(xml_path)
			
 
				+    for ele in regions.copy():
			
 
				+        if 'solve' in ele["class_name"]:
			
 
				+            exam_number_box = ele['bounding_box']
			
 
				+            exam_number_img = utils.crop_region(image, exam_number_box)
			
 
				+            number = resolve_mark_line_box.solve_line(exam_number_img)
			
 
				+            solve_dict = {'number': number, 'location': exam_number_box, 'default_points': 12}
			
 
				+            solve_list.append(solve_dict)
			
 
				+
			
 
				+            tree = utils.create_xml(str(number), tree,
			
 
				+                                    exam_number_box['xmin'], exam_number_box['ymin'],
			
 
				+                                    exam_number_box['xmax'], exam_number_box['ymax'])
			
 
				+    tree.write(xml_path)
			
 
				+    return solve_list
			
 
				+
			
 
				+
			
 
				+def solve_with_number(regions, xml_path):
			
 
				+    solve_list = []
			
 
				+    for ele in regions:
			
 
				+        if 'solve' in ele["class_name"] or 'composition' in ele["class_name"]:
			
 
				+            solve_dict = {'number': -1, 'default_points': -1}
			
 
				+            ele.update(solve_dict)
			
 
				+            solve_list.append(ele)
			
 
				+
			
 
				+    tree = ET.parse(xml_path)  # xml tree
			
 
				+    for index_num, box in enumerate(solve_list):
			
 
				+        if len(box['bounding_box']) > 0:
			
 
				+            abcd = box['bounding_box']
			
 
				+            number = str(box['number'])
			
 
				+            default_points = box["default_points"]
			
 
				+            name = '{}_{}_{}'.format(box["class_name"], number, default_points)
			
 
				+            tree = utils.create_xml(name, tree,
			
 
				+                                    abcd['xmin'], abcd['ymin'],
			
 
				+                                    abcd['xmax'], abcd['ymax'])
			
 
				+
			
 
				+    tree.write(xml_path)
			
 
				+    return solve_list
			
 
				+
			
 
				+
			
 
				+def cloze_with_number(regions, xml_path):
			
 
				+    cloze_list = []
			
 
				+    for ele in regions:
			
 
				+        if 'cloze' == ele["class_name"] or "cloze_s" == ele["class_name"]:
			
 
				+            cloze_dict = {'number': -1, 'default_points': -1}
			
 
				+            ele.update(cloze_dict)
			
 
				+            cloze_list.append(ele)
			
 
				+
			
 
				+    tree = ET.parse(xml_path)  # xml tree
			
 
				+    for index_num, box in enumerate(cloze_list):
			
 
				+        if len(box['bounding_box']) > 0:
			
 
				+            abcd = box['bounding_box']
			
 
				+            number = str(box['number'])
			
 
				+            default_points = box["default_points"]
			
 
				+            name = '{}_{}_{}'.format(box["class_name"], number, default_points)
			
 
				+            tree = utils.create_xml(name, tree,
			
 
				+                                    abcd['xmin'], abcd['ymin'],
			
 
				+                                    abcd['xmax'], abcd['ymax'])
			
 
				+
			
 
				+    tree.write(xml_path)
			
 
				+    return cloze_list
			
 
				+
			
 
				+
			
 
				+def make_together(image_path):
			
 
				+
			
 
				+    sheet_sess = TfSess('sheet')
			
 
				+    choice_sess = TfSess('choice')
			
 
				+    cloze_sess = TfSess('cloze')
			
 
				+
			
 
				+    raw_img = read_single_img(image_path)
			
 
				+    conf_thresh_0 = 0.7
			
 
				+    mns_thresh_0 = 0.3
			
 
				+
			
 
				+    series_number = 123456789
			
 
				+    subject = 'english'
			
 
				+    sheets_dict_0, xml_save_path = sheet(series_number, image_path, raw_img, conf_thresh_0, mns_thresh_0, subject, sheet_sess)
			
 
				+    # 手动修改faster_rcnn识别生成的框
			
 
				+
			
 
				+    sheets_dict_0 = read_xml_to_json(xml_save_path)
			
 
				+    regions = sheets_dict_0['regions']
			
 
				+    classes_name = str([ele['class_name'] for ele in regions])
			
 
				+
			
 
				+    if 'choice' in classes_name:
			
 
				+        try:
			
 
				+            sheets_dict_0['choice'] = choice(raw_img, regions, xml_save_path, conf_thresh_0, mns_thresh_0, choice_sess)
			
 
				+        except Exception:
			
 
				+            traceback.print_exc()
			
 
				+
			
 
				+    if 'exam_number' in classes_name:
			
 
				+        try:
			
 
				+            sheets_dict_0['exam_number'] = exam_number(raw_img, regions, xml_save_path)
			
 
				+        except Exception:
			
 
				+            traceback.print_exc()
			
 
				+
			
 
				+    if 'cloze' in classes_name:
			
 
				+        try:
			
 
				+            sheets_dict_0['cloze'] = cloze(raw_img, regions, xml_save_path, conf_thresh_0, mns_thresh_0, cloze_sess)
			
 
				+        except Exception:
			
 
				+            traceback.print_exc()
			
 
				+
			
 
				+    if 'solve' in classes_name:
			
 
				+        try:
			
 
				+            solve_list, mark_list = solve(raw_img, regions, xml_save_path,)
			
 
				+            sheets_dict_0['solve'] = solve_list
			
 
				+            sheets_dict_0['mark'] = mark_list
			
 
				+        except Exception:
			
 
				+            traceback.print_exc()
			
 
				+
			
 
				+    # print(sheets_dict_0)
			
 
				+    return sheets_dict_0
			
 
				+
			
 
				+
			
 
				+# if __name__ == '__main__':
			
 
				+#     start_time = time.time()
			
 
				+#
			
 
				+#     image_path_0 = os.path.join(r'C:\Users\Administrator\Desktop\sheet\correct\back_sizes\template',
			
 
				+#                                 '20180719004308818_0020.jpg')
			
 
				+#     make_together(image_path_0)
			
 
				+#     end_time = time.time()
			
 
				+#     print('time cost: ', (end_time - start_time))
			
--- a/segment/sheet_resolve/analysis/sheet/__init__.py
+++ b/segment/sheet_resolve/analysis/sheet/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2018/11/21 0021 下午 16:01
			
--- a/segment/sheet_resolve/analysis/sheet/analysis_sheet.py
+++ b/segment/sheet_resolve/analysis/sheet/analysis_sheet.py
@@ -0,0 +1,270 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : analysis_sheet.py
			
 
				+import time
			
 
				+import os
			
 
				+import traceback
			
 
				+
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+
			
 
				+from segment.sheet_resolve.lib.model.test import im_detect
			
 
				+from segment.sheet_resolve.lib.model.nms_wrapper import nms
			
 
				+from segment.sheet_resolve.lib.utils.timer import Timer
			
 
				+from segment.sheet_resolve.tools import utils
			
 
				+from segment.sheet_resolve.analysis.solve.optional_solve import find_contours, resolve_optional_choice
			
 
				+
			
 
				+
			
 
				+def analysis_single_image_with_regions(analysis_type, classes,
			
 
				+                                       sess, net,
			
 
				+                                       im_raw, conf_thresh, mns_thresh,
			
 
				+                                       coordinate_bias_dict):
			
 
				+    """Detect object classes in an image using pre-computed object proposals."""
			
 
				+
			
 
				+    size = im_raw.shape
			
 
				+
			
 
				+    # Detect all object classes and regress object bounds
			
 
				+    timer = Timer()
			
 
				+    timer.tic()
			
 
				+    if analysis_type in ['unknown_subject', 'math', 'math_zxhx', 'english', 'chinese',
			
 
				+                         'physics', 'chemistry', 'biology', 'politics', 'history',
			
 
				+                         'geography', 'science_comprehensive', 'arts_comprehensive'
			
 
				+                         ]:
			
 
				+        analysis_type = 'sheet'
			
 
				+    im, ratio = utils.img_resize(analysis_type, im_raw)
			
 
				+    scores, boxes = im_detect(analysis_type, sess, net, im)
			
 
				+    timer.toc()
			
 
				+    print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0]))
			
 
				+
			
 
				+    content_list = []
			
 
				+    analysis_cls_list = []
			
 
				+    qr_code_info = 'Nan'
			
 
				+
			
 
				+    for cls_ind, cls in enumerate(classes[1:]):  # classes
			
 
				+        cls_ind += 1  # because we skipped background
			
 
				+        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
			
 
				+        cls_scores = scores[:, cls_ind]
			
 
				+        dets = np.hstack((cls_boxes,
			
 
				+                          cls_scores[:, np.newaxis])).astype(np.float32)
			
 
				+        keep = nms(dets, mns_thresh)
			
 
				+        dets = dets[keep, :]
			
 
				+        # vis_detections(im, cls, dets, ax, thresh=conf_thresh)
			
 
				+        inds = np.where(dets[:, -1] >= conf_thresh)[0]
			
 
				+        if len(inds) > 0:
			
 
				+            if cls in list(coordinate_bias_dict.keys()):
			
 
				+                xmin_bias = coordinate_bias_dict[cls]['xmin_bias']
			
 
				+                ymin_bias = coordinate_bias_dict[cls]['ymin_bias']
			
 
				+                xmax_bias = coordinate_bias_dict[cls]['xmax_bias']
			
 
				+                ymax_bias = coordinate_bias_dict[cls]['ymax_bias']
			
 
				+            else:
			
 
				+                xmin_bias = 0
			
 
				+                ymin_bias = 0
			
 
				+                xmax_bias = 0
			
 
				+                ymax_bias = 0
			
 
				+            for i in inds:
			
 
				+                bbox = dets[i, :4]
			
 
				+                score = '{:.4f}'.format(dets[i, -1])
			
 
				+
			
 
				+                xmin = int(int(bbox[0]) * ratio[0]) + xmin_bias
			
 
				+                ymin = int(int(bbox[1]) * ratio[1]) + ymin_bias
			
 
				+                xmax = int(int(bbox[2]) * ratio[0]) + xmax_bias
			
 
				+                ymax = int(int(bbox[3]) * ratio[1]) + ymax_bias
			
 
				+
			
 
				+                xmin = (xmin if (xmin > 0) else 1)
			
 
				+                ymin = (ymin if (ymin > 0) else 1)
			
 
				+                xmax = (xmax if (xmax < size[1]) else size[1] - 1)
			
 
				+                ymax = (ymax if (ymax < size[0]) else size[0] - 1)
			
 
				+
			
 
				+                if cls in ['solve0', ]:
			
 
				+                    cls = 'solve'
			
 
				+
			
 
				+                bbox_dict = {"xmin": xmin, "ymin": ymin, "xmax": xmax, "ymax": ymax}
			
 
				+                # class_dict = {"class_name": cls, "bounding_box": bbox_dict, "score": score}
			
 
				+                class_dict = {"class_name": cls, "bounding_box": bbox_dict}
			
 
				+
			
 
				+                # if cls == 'qr_code':
			
 
				+                #     qr_img = utils.crop_region(im_raw, bbox_dict)
			
 
				+                #     qr_path = r'./qr_code.jpg'
			
 
				+                #     cv2.imwrite(qr_path, qr_img)
			
 
				+                #     qr_code_info = utils.check_qr_code_with_region_img(qr_path)
			
 
				+                #     os.remove(qr_path)
			
 
				+
			
 
				+                content_list.append(class_dict)
			
 
				+
			
 
				+    return content_list, analysis_cls_list, qr_code_info
			
 
				+
			
 
				+
			
 
				+def get_single_image_sheet_regions(analysis_type, img_path, img, classes,
			
 
				+                                   sess, net, conf_thresh, mns_thresh,
			
 
				+                                   coordinate_bias_dict):
			
 
				+    start_time = time.time()
			
 
				+
			
 
				+    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
			
 
				+    print('analysis for JPG {}'.format(img_path))
			
 
				+
			
 
				+    content, cls, qr_code_info = \
			
 
				+        analysis_single_image_with_regions(analysis_type, classes, sess, net,
			
 
				+                                           img, conf_thresh, mns_thresh,
			
 
				+                                           coordinate_bias_dict)
			
 
				+
			
 
				+    img_dict = {"img_name": img_path,
			
 
				+                # 'qr_code': qr_code_info,
			
 
				+                'subject': analysis_type,
			
 
				+                "regions": content,
			
 
				+                }
			
 
				+
			
 
				+    end_time = time.time()
			
 
				+    print(end_time - start_time)
			
 
				+
			
 
				+    return img_dict
			
 
				+
			
 
				+
			
 
				+def question_number_format(init_number, crt_numbers, sheet_dict):
			
 
				+    for region in sheet_dict['regions']:
			
 
				+        numbers = region.get("number")
			
 
				+        if numbers and isinstance(numbers, int):
			
 
				+            if numbers <= 0 or numbers in crt_numbers:
			
 
				+                numbers = init_number
			
 
				+                crt_numbers.append(numbers)
			
 
				+                init_number += 1
			
 
				+            region.update({"number": numbers})
			
 
				+            crt_numbers.append(numbers)
			
 
				+        if numbers and isinstance(numbers, list):
			
 
				+            for i, num in enumerate(numbers):
			
 
				+                if num <= 0 or num in crt_numbers:
			
 
				+                    numbers[i] = init_number
			
 
				+                    crt_numbers.append(init_number)
			
 
				+                    init_number += 1
			
 
				+
			
 
				+            region.update({"number": numbers})
			
 
				+            crt_numbers.extend(numbers)
			
 
				+
			
 
				+    return sheet_dict, init_number, crt_numbers
			
 
				+
			
 
				+
			
 
				+def box_region_format(sheet_dict, image, subject, shrink=True):
			
 
				+    include_class = ['anchor_point',
			
 
				+                     'bar_code',
			
 
				+                     'choice_m',
			
 
				+                     'cloze',
			
 
				+                     'cloze_s',
			
 
				+                     'exam_number_col_row',
			
 
				+                     'optional_choice',
			
 
				+                     'optional_solve',
			
 
				+                     # 'qr_code',
			
 
				+                     'solve',
			
 
				+                     'optional_solve',
			
 
				+                     'composition',
			
 
				+                     # 'correction'
			
 
				+                     ]
			
 
				+
			
 
				+    sheet_regions = sheet_dict['regions']
			
 
				+    optional_solve_tmp = []
			
 
				+    default_points_dict = {'choice_m': 5, "cloze": 5, 'solve': 12, 'cloze_s': 5, "composition": 60}
			
 
				+    if subject == "english":
			
 
				+        default_points_dict = {'choice_m': 2, "cloze": 2, 'solve': 2, 'cloze_s': 2, "composition": 25}
			
 
				+
			
 
				+    for i in range(len(sheet_regions) - 1, -1, -1):
			
 
				+        if subject == "math":
			
 
				+            if sheet_regions[i]['class_name'] == 'cloze':
			
 
				+                sheet_regions[i]['class_name'] = 'cloze_big'  # math exclude cloze big
			
 
				+            if sheet_regions[i]['class_name'] == 'cloze_s':
			
 
				+                sheet_regions[i]['class_name'] = 'cloze'  # math exclude cloze big
			
 
				+        if subject == "english":
			
 
				+            if sheet_regions[i]['class_name'] == 'solve':
			
 
				+                sheet_regions[i]['class_name'] = 'cloze'
			
 
				+            if sheet_regions[i]['class_name'] == 'correction':
			
 
				+                sheet_regions[i]['class_name'] = 'solve'
			
 
				+
			
 
				+    for i in range(len(sheet_regions) - 1, -1, -1):
			
 
				+        if sheet_regions[i]['class_name'] in ['solve0']:
			
 
				+            sheet_regions[i]['class_name'] = 'solve'
			
 
				+        if sheet_regions[i]['class_name'] in ['composition0']:
			
 
				+            sheet_regions[i]['class_name'] = 'composition'
			
 
				+
			
 
				+        if sheet_regions[i]['class_name'] == 'select_s':
			
 
				+            sheet_regions[i]['class_name'] = 'optional_choice'
			
 
				+            optional_solve_tmp.append(sheet_regions[i])
			
 
				+            sheet_regions.pop(i)
			
 
				+
			
 
				+        if shrink:
			
 
				+            if sheet_regions[i]['class_name'] not in include_class:
			
 
				+                sheet_regions.pop(i)
			
 
				+
			
 
				+    for ele in sheet_regions:
			
 
				+        if ele['class_name'] == 'solve':
			
 
				+            solve_box = (ele['bounding_box']['xmin'], ele['bounding_box']['ymin'],
			
 
				+                         ele['bounding_box']['xmax'], ele['bounding_box']['ymax'])
			
 
				+            for optional_solve in optional_solve_tmp:
			
 
				+                optional_solve_box = (optional_solve['bounding_box']['xmin'], optional_solve['bounding_box']['ymin'],
			
 
				+                                      optional_solve['bounding_box']['xmax'], optional_solve['bounding_box']['ymax'])
			
 
				+                if utils.decide_coordinate_contains(optional_solve_box, solve_box):
			
 
				+                    ele['class_name'] = 'optional_solve'
			
 
				+                    break
			
 
				+                else:
			
 
				+                    continue
			
 
				+
			
 
				+        if ele['class_name'] == "composition":
			
 
				+            if isinstance(ele['default_points'], list):
			
 
				+                for i, dp in enumerate(ele['default_points']):
			
 
				+                    if dp != default_points_dict[ele['class_name']]:
			
 
				+                        ele['default_points'][i] = default_points_dict[ele['class_name']]
			
 
				+
			
 
				+            if isinstance(ele['default_points'], int):
			
 
				+                if ele['default_points'] != default_points_dict[ele['class_name']]:
			
 
				+                    ele['default_points'] = default_points_dict[ele['class_name']]
			
 
				+
			
 
				+        if ele['class_name'] in ["choice_m", "cloze", "cloze_s", "solve"]:
			
 
				+            if isinstance(ele['default_points'], list):
			
 
				+                for i, dp in enumerate(ele['default_points']):
			
 
				+                    if dp == -1:
			
 
				+                        ele['default_points'][i] = default_points_dict[ele['class_name']]
			
 
				+
			
 
				+            if isinstance(ele['default_points'], int):
			
 
				+                if ele['default_points'] == -1:
			
 
				+                    ele['default_points'] = default_points_dict[ele['class_name']]
			
 
				+
			
 
				+    for ele in optional_solve_tmp:  # 选做题
			
 
				+        bbox = ele['bounding_box']
			
 
				+        box_region = utils.crop_region(image, bbox)
			
 
				+        left = bbox['xmin']
			
 
				+        top = bbox['ymin']
			
 
				+        right = bbox['xmax']
			
 
				+        bottom = bbox['ymax']
			
 
				+
			
 
				+        if (right - left) >= (bottom-top):
			
 
				+            direction = 180
			
 
				+        else:
			
 
				+            direction = 90
			
 
				+
			
 
				+        # res = find_contours(left, top, box_region)
			
 
				+        try:
			
 
				+            res = resolve_optional_choice(left, top, direction, box_region)
			
 
				+        except Exception as e:
			
 
				+            res = {'rows': 1, 'cols': 2,
			
 
				+                   'option': 'A, B',
			
 
				+                   'single_width': (right - left) // 3,
			
 
				+                   'single_height': bottom - top,
			
 
				+                   'bounding_box': {'xmin': left,
			
 
				+                                    'ymin': top,
			
 
				+                                    'xmax': right,
			
 
				+                                    'ymax': bottom}}
			
 
				+        res['class_name'] = 'optional_choice'
			
 
				+
			
 
				+        sheet_regions.append(res)
			
 
				+
			
 
				+    # iou
			
 
				+    sheet_tmp = sheet_regions.copy()
			
 
				+    remove_index = []
			
 
				+    for i, region in enumerate(sheet_tmp):
			
 
				+        if i not in remove_index:
			
 
				+            box = region['bounding_box']
			
 
				+            for j, region_in in enumerate(sheet_tmp):
			
 
				+                box_in = region_in['bounding_box']
			
 
				+                iou = utils.cal_iou(box, box_in)
			
 
				+                if iou[0] > 0.75 and i != j:
			
 
				+                    sheet_regions.remove(region)
			
 
				+                    remove_index.append(j)
			
 
				+                    break
			
 
				+
			
 
				+    sheet_dict.update({'regions': sheet_regions})
			
 
				+    return sheet_dict
			
--- a/segment/sheet_resolve/analysis/sheet/choice_infer.py
+++ b/segment/sheet_resolve/analysis/sheet/choice_infer.py
@@ -0,0 +1,671 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : choice_infer.py
			
 
				+import os
			
 
				+import traceback
			
 
				+import time
			
 
				+import random
			
 
				+from django.conf import settings
			
 
				+from segment.sheet_resolve.tools import utils, brain_api
			
 
				+from itertools import chain
			
 
				+import re
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+import xml.etree.cElementTree as ET
			
 
				+from segment.sheet_resolve.tools.utils import crop_region_direct, create_xml, infer_number, combine_char_in_raw_format
			
 
				+from sklearn.cluster import DBSCAN
			
 
				+from segment.sheet_resolve.analysis.sheet.ocr_sheet import ocr2sheet
			
 
				+
			
 
				+
			
 
				+def get_split_index(array, dif=0):
			
 
				+    array = np.array(array)
			
 
				+    interval_list = np.abs(array[1:] - array[:-1])
			
 
				+    split_index = [0]
			
 
				+    for i, interval in enumerate(interval_list):
			
 
				+        if dif:
			
 
				+            split_dif = dif
			
 
				+        else:
			
 
				+            split_dif = np.mean(interval_list)
			
 
				+        if interval > split_dif:
			
 
				+            split_index.append(i + 1)
			
 
				+
			
 
				+    split_index.append(len(array))
			
 
				+    split_index = sorted(list(set(split_index)))
			
 
				+    return split_index
			
 
				+
			
 
				+
			
 
				+def adjust_choice_m(image, xe, ye):
			
 
				+    dilate = 1
			
 
				+    blur = 5
			
 
				+
			
 
				+    # Convert to gray
			
 
				+    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
			
 
				+
			
 
				+    if blur != 0:
			
 
				+        image = cv2.GaussianBlur(image, (blur, blur), 0)
			
 
				+
			
 
				+    # Apply threshold to get image with only b&w (binarization)
			
 
				+    image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
			
 
				+
			
 
				+    kernel = np.ones((ye, xe), np.uint8)  # y轴膨胀, x轴膨胀
			
 
				+
			
 
				+    dst = cv2.dilate(image, kernel, iterations=1)
			
 
				+
			
 
				+    (major, minor, _) = cv2.__version__.split(".")
			
 
				+    contours = cv2.findContours(dst, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+    cnts = contours[0] if int(major) > 3 else contours[1]
			
 
				+
			
 
				+    # _, cnts, hierarchy = cv2.findContours(dst, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+
			
 
				+    right_limit = 0
			
 
				+    bottom_limit = 0
			
 
				+    for cnt_id, cnt in enumerate(reversed(cnts)):
			
 
				+        x, y, w, h = cv2.boundingRect(cnt)
			
 
				+        if x + w > right_limit:
			
 
				+            right_limit = x + w
			
 
				+
			
 
				+        if y + h > bottom_limit:
			
 
				+            bottom_limit = y + h
			
 
				+
			
 
				+    return right_limit, bottom_limit
			
 
				+
			
 
				+
			
 
				+def find_digital(ocr_raw_list):
			
 
				+    pattern = r'\d+'
			
 
				+    x_list = []
			
 
				+    y_list = []
			
 
				+    digital_list = list()
			
 
				+    chars_list = list()
			
 
				+    height_list, width_list = list(), list()
			
 
				+    ocr_dict_list = combine_char_in_raw_format(ocr_raw_list)
			
 
				+    for i, ele in enumerate(ocr_dict_list):
			
 
				+        words = ele['words']
			
 
				+        words = words.replace(' ', '').upper()  # 去除空格
			
 
				+
			
 
				+        digital_words_m = re.finditer(pattern, words)
			
 
				+        digital_index_list = [(m.group(), m.span()) for m in digital_words_m if m]
			
 
				+        chars_index = [ele for ele in range(0, len(ele['chars']))]
			
 
				+        digital_index_detail_list = []
			
 
				+        for letter_info in digital_index_list:
			
 
				+            number = letter_info[0]
			
 
				+            index_start = letter_info[1][0]
			
 
				+            index_end = letter_info[1][1] - 1
			
 
				+            char_start = ele['chars'][index_start]
			
 
				+            char_end = ele['chars'][index_end]
			
 
				+
			
 
				+            if index_start == index_end:
			
 
				+                digital_index_detail_list += [index_start]
			
 
				+            else:
			
 
				+                digital_index_detail_list += chars_index[index_start:index_end + 1]
			
 
				+
			
 
				+            letter_loc_xmin = int(char_start['location']['left'])
			
 
				+            letter_loc_ymin = min(int(char_start['location']['top']), int(char_end['location']['top']))
			
 
				+            letter_loc_xmax = int(char_end['location']['left']) + int(char_end['location']['width'])
			
 
				+            letter_loc_ymax = max(int(char_start['location']['top']) + int(char_start['location']['height']),
			
 
				+                                  int(char_end['location']['top']) + int(char_end['location']['height']))
			
 
				+
			
 
				+            mid_x = letter_loc_xmin + (letter_loc_xmax - letter_loc_xmin) // 2
			
 
				+            mid_y = letter_loc_ymin + (letter_loc_ymax - letter_loc_ymin) // 2
			
 
				+
			
 
				+            # print(number, (mid_x, mid_y))
			
 
				+            x_list.append(mid_x)
			
 
				+            y_list.append(mid_y)
			
 
				+
			
 
				+            height_list.append(letter_loc_ymax - letter_loc_ymin)
			
 
				+            width_list.append(letter_loc_xmax - letter_loc_xmin)
			
 
				+
			
 
				+            number_loc = (letter_loc_xmin, letter_loc_ymin, letter_loc_xmax, letter_loc_ymax, mid_x, mid_y)
			
 
				+            digital_list.append({"digital": int(number), "loc": number_loc})
			
 
				+
			
 
				+        current_chars = [char for index, char in enumerate(ele['chars'])
			
 
				+                         if index not in digital_index_detail_list and char['char'] not in ['.', ',', '。', '、']]
			
 
				+
			
 
				+        chars_list += current_chars
			
 
				+    d_mean_height = sum(height_list) // len(height_list)
			
 
				+    d_mean_width = sum(width_list) // len(width_list)
			
 
				+
			
 
				+    # mean_height = max(height_list)
			
 
				+    # mean_width = max(width_list)
			
 
				+    # print(x_list)
			
 
				+    # print(y_list)
			
 
				+    return digital_list, chars_list, d_mean_height, d_mean_width
			
 
				+
			
 
				+
			
 
				+def cluster2choice_m_(cluster_list, m_h, m_w):
			
 
				+    numbers = [ele['digital'] for ele in cluster_list]
			
 
				+
			
 
				+    loc_top_interval = (np.array([ele['loc'][3] for ele in cluster_list][1:]) -
			
 
				+                        np.array([ele['loc'][3] for ele in cluster_list][:-1]))
			
 
				+
			
 
				+    split_index = [0]
			
 
				+    for i, interval in enumerate(loc_top_interval):
			
 
				+        if interval > m_h * 1.5:
			
 
				+            split_index.append(i + 1)
			
 
				+
			
 
				+    split_index.append(len(cluster_list))
			
 
				+    split_index = sorted(list(set(split_index)))
			
 
				+    block_list = []
			
 
				+    for i in range(len(split_index) - 1):
			
 
				+        block = cluster_list[split_index[i]: split_index[i + 1]]
			
 
				+
			
 
				+        xmin = min([ele["loc"][0] for ele in block])
			
 
				+        ymin = min([ele["loc"][1] for ele in block])
			
 
				+        xmax = max([ele["loc"][2] for ele in block])
			
 
				+        ymax = max([ele["loc"][3] for ele in block])
			
 
				+
			
 
				+        numbers = [ele['digital'] for ele in block]
			
 
				+
			
 
				+        choice_m = {"number": numbers, "loc": (xmin, ymin, xmax, ymax)}
			
 
				+        block_list.append(choice_m)
			
 
				+
			
 
				+    return block_list
			
 
				+
			
 
				+
			
 
				+def cluster2choice_m(cluster_list, mean_width):
			
 
				+    # 比较x坐标，去掉误差值
			
 
				+    numbers_x = [ele['loc'][4] for ele in cluster_list]
			
 
				+    numbers_x_array = np.array(numbers_x)
			
 
				+    numbers_x_interval = np.abs((numbers_x_array[1:] - numbers_x_array[:-1]))
			
 
				+    error_index_superset = np.where(numbers_x_interval >= mean_width)[0]
			
 
				+    error_index_superset_interval = error_index_superset[1:] - error_index_superset[:-1]
			
 
				+    t_index = list(np.where(error_index_superset_interval > 1)[0] + 1)
			
 
				+    t_index.insert(0, 0)
			
 
				+    t_index.append(len(error_index_superset))
			
 
				+    error = []
			
 
				+    for i in range(0, len(t_index) - 1):
			
 
				+        a = t_index[i]
			
 
				+        b = t_index[i + 1]
			
 
				+        block = list(error_index_superset[a: b])
			
 
				+        error += block[1:]
			
 
				+
			
 
				+    cluster_list = [ele for i, ele in enumerate(cluster_list) if i not in error]
			
 
				+    numbers = [ele['digital'] for ele in cluster_list]
			
 
				+    numbers_array = np.array(numbers)
			
 
				+
			
 
				+    # numbers_y = [ele['loc'][5] for ele in cluster_list]
			
 
				+    # numbers_y_array = np.array(numbers_y)
			
 
				+    # numbers_y_interval = np.abs((numbers_y_array[1:] - numbers_y_array[:-1]))
			
 
				+    # split_index = [0]
			
 
				+    # for i, interval in enumerate(numbers_y_interval):
			
 
				+    #     if interval > np.mean(numbers_y_interval):
			
 
				+    #         split_index.append(i + 1)
			
 
				+    #
			
 
				+    # split_index.append(len(cluster_list))
			
 
				+    # split_index = sorted(list(set(split_index)))
			
 
				+    # for i in range(len(split_index) - 1):
			
 
				+    #     block = cluster_list[split_index[i]: split_index[i + 1]]
			
 
				+    #     block_numbers = numbers_array[split_index[i]: split_index[i + 1]]
			
 
				+
			
 
				+    # 确定数字题号的位置，前提：同block题号是某等差数列的子集
			
 
				+    numbers_sum = numbers_array + np.flipud(numbers_array)
			
 
				+
			
 
				+    counts = np.bincount(numbers_sum)
			
 
				+    mode_times = np.max(counts)
			
 
				+    mode_value = np.argmax(counts)
			
 
				+
			
 
				+    if mode_times != len(numbers) and mode_times >= 2:
			
 
				+        # 启动题号补全
			
 
				+
			
 
				+        number_interval_list = abs(numbers_array[1:] - numbers_array[:-1])
			
 
				+        number_interval_counts = np.bincount(number_interval_list)
			
 
				+        # number_interval_mode_times = np.max(number_interval_counts)
			
 
				+        number_interval_mode_value = np.argmax(number_interval_counts)
			
 
				+
			
 
				+        suspect_index = np.where(numbers_sum != mode_value)[0]
			
 
				+        numbers_array_len = len(numbers_array)
			
 
				+        for suspect in suspect_index:
			
 
				+            if suspect == 0:
			
 
				+                cond_left = False
			
 
				+                cond_right = numbers_array[suspect + 1] == numbers_array[suspect] + number_interval_mode_value
			
 
				+            elif suspect == numbers_array_len - 1:
			
 
				+                cond_right = False
			
 
				+                cond_left = numbers_array[suspect - 1] == numbers_array[suspect] - number_interval_mode_value
			
 
				+            else:
			
 
				+                cond_left = numbers_array[suspect - 1] == numbers_array[suspect] - number_interval_mode_value
			
 
				+                cond_right = numbers_array[suspect + 1] == numbers_array[suspect] + number_interval_mode_value
			
 
				+
			
 
				+            if cond_left or cond_right:
			
 
				+                pass
			
 
				+            else:
			
 
				+                numbers_array[suspect] = -1
			
 
				+
			
 
				+        numbers_array = infer_number(numbers_array, number_interval_mode_value)  # 推断题号
			
 
				+
			
 
				+    numbers_interval = np.abs(numbers_array[1:] - numbers_array[:-1])
			
 
				+
			
 
				+    split_index = [0]
			
 
				+    for i, interval in enumerate(numbers_interval):
			
 
				+        if interval > np.mean(numbers_interval):
			
 
				+            split_index.append(i + 1)
			
 
				+
			
 
				+    split_index.append(len(cluster_list))
			
 
				+    split_index = sorted(list(set(split_index)))
			
 
				+    block_list = []
			
 
				+
			
 
				+    for i in range(len(split_index) - 1):
			
 
				+        block = cluster_list[split_index[i]: split_index[i + 1]]
			
 
				+        block_numbers = numbers_array[split_index[i]: split_index[i + 1]]
			
 
				+
			
 
				+        xmin = min([ele["loc"][0] for ele in block])
			
 
				+        ymin = min([ele["loc"][1] for ele in block])
			
 
				+        xmax = max([ele["loc"][2] for ele in block])
			
 
				+        ymax = max([ele["loc"][3] for ele in block])
			
 
				+        mid_x = xmin + (xmax - xmin) // 2
			
 
				+        mid_y = ymin + (ymax - ymin) // 2
			
 
				+
			
 
				+        choice_m = {"numbers": list(block_numbers), "loc": [xmin, ymin, xmax, ymax, mid_x, mid_y]}
			
 
				+        block_list.append(choice_m)
			
 
				+
			
 
				+    return block_list
			
 
				+
			
 
				+
			
 
				+def cluster_and_anti_abnormal(image, xml_path, digital_list, chars_list,
			
 
				+                              mean_height, mean_width, choice_s_height, choice_s_width, limit_loc):
			
 
				+    limit_left, limit_top, limit_right, limit_bottom = limit_loc
			
 
				+    limit_width, limit_height = limit_right - limit_left, limit_bottom - limit_top
			
 
				+    arr = np.ones((len(digital_list), 2))
			
 
				+    for i, ele in enumerate(digital_list):
			
 
				+        arr[i] = np.array([ele["loc"][-2], ele["loc"][-1]])
			
 
				+
			
 
				+    if choice_s_height != 0:
			
 
				+        eps = int(choice_s_height * 2)
			
 
				+    else:
			
 
				+        eps = int(mean_height * 2.5)
			
 
				+    print("eps: ", eps)
			
 
				+    db = DBSCAN(eps=eps, min_samples=2, metric='chebyshev').fit(arr)
			
 
				+
			
 
				+    labels = db.labels_
			
 
				+    # print(labels)
			
 
				+
			
 
				+    cluster_label = []
			
 
				+    for ele in labels:
			
 
				+        if ele not in cluster_label and ele != -1:
			
 
				+            cluster_label.append(ele)
			
 
				+
			
 
				+    a_e_dict = {k: [] for k in cluster_label}
			
 
				+    choice_m_numbers_list = []
			
 
				+    for index, ele in enumerate(labels):
			
 
				+        if ele != -1:
			
 
				+            a_e_dict[ele].append(digital_list[index])
			
 
				+
			
 
				+    for ele in cluster_label:
			
 
				+        cluster = a_e_dict[ele]
			
 
				+        choice_m_numbers_list += cluster2choice_m(cluster, mean_width)
			
 
				+
			
 
				+    all_list_nums = [ele["numbers"] for ele in choice_m_numbers_list]
			
 
				+    all_nums_len = [len(ele) for ele in all_list_nums]
			
 
				+    all_nums = list(chain.from_iterable(all_list_nums))
			
 
				+
			
 
				+    counts = np.bincount(np.array(all_nums_len))
			
 
				+    if np.max(counts) < 2:
			
 
				+        mode_value = max(all_nums_len)
			
 
				+    else:
			
 
				+        mode_value = np.argmax(counts)
			
 
				+        mode_value = all_nums_len[np.where(np.array(all_nums_len) == mode_value)[0][-1]]
			
 
				+
			
 
				+    if mode_value > 1:  # 缺失补全
			
 
				+        error_index_list = list(np.where(np.array(all_nums_len) != mode_value)[0])
			
 
				+
			
 
				+        all_height = [ele["loc"][3] - ele["loc"][1] for index, ele
			
 
				+                      in enumerate(choice_m_numbers_list) if index not in error_index_list]
			
 
				+        choice_m_mean_height = int(sum(all_height) / len(all_height))
			
 
				+
			
 
				+        for e_index in list(error_index_list):
			
 
				+            current_choice_m = choice_m_numbers_list[e_index]
			
 
				+            current_numbers_list = list(all_list_nums[e_index])
			
 
				+            current_len = all_nums_len[e_index]
			
 
				+            dif = mode_value - current_len
			
 
				+
			
 
				+            if 1 in current_numbers_list:
			
 
				+                t2 = current_numbers_list + [-1] * dif
			
 
				+                infer_t1_list = infer_number(t2)  # 后补
			
 
				+                infer_t2_list = infer_number(t2)  # 后补
			
 
				+                cond1 = False
			
 
				+                cond2 = True
			
 
				+            else:
			
 
				+                t1_cond = [True] * dif
			
 
				+                t2_cond = [True] * dif
			
 
				+
			
 
				+                t1 = [-1] * dif + current_numbers_list
			
 
				+                infer_t1_list = infer_number(t1)  # 前补
			
 
				+                t2 = current_numbers_list + [-1] * dif
			
 
				+                infer_t2_list = infer_number(t2)  # 后补
			
 
				+
			
 
				+                for i in range(0, dif):
			
 
				+                    t1_infer = infer_t1_list[i]
			
 
				+                    t2_infer = infer_t2_list[-i - 1]
			
 
				+                    if t1_infer == 0 or t1_infer in all_nums:
			
 
				+                        t1_cond[i] = False
			
 
				+                    if t2_infer in all_nums:
			
 
				+                        t2_cond[i] = False
			
 
				+                cond1 = not (False in t1_cond)
			
 
				+                cond2 = not (False in t2_cond)
			
 
				+
			
 
				+            if cond1 and not cond2:
			
 
				+                current_loc = current_choice_m["loc"]
			
 
				+                current_height = current_loc[3] - current_loc[1]
			
 
				+
			
 
				+                infer_height = max((choice_m_mean_height - current_height), int(dif * current_height / current_len))
			
 
				+                choice_m_numbers_list[e_index]["loc"][1] = current_loc[1] - infer_height
			
 
				+                choice_m_numbers_list[e_index]["loc"][5] = (choice_m_numbers_list[e_index]["loc"][1] +
			
 
				+                                                            (choice_m_numbers_list[e_index]["loc"][3] -
			
 
				+                                                             choice_m_numbers_list[e_index]["loc"][1]) // 2)
			
 
				+                choice_m_numbers_list[e_index]["numbers"] = infer_t1_list
			
 
				+                all_nums.extend(infer_t1_list[:dif])
			
 
				+            if not cond1 and cond2:
			
 
				+                current_loc = current_choice_m["loc"]
			
 
				+                current_height = current_loc[3] - current_loc[1]
			
 
				+
			
 
				+                infer_height = max((choice_m_mean_height - current_height), int(dif * current_height / current_len))
			
 
				+                infer_bottom = min(current_loc[3] + infer_height, limit_height-1)
			
 
				+                if infer_bottom <= limit_height:
			
 
				+                    choice_m_numbers_list[e_index]["loc"][3] = infer_bottom
			
 
				+                    choice_m_numbers_list[e_index]["loc"][5] = (choice_m_numbers_list[e_index]["loc"][1] +
			
 
				+                                                                (choice_m_numbers_list[e_index]["loc"][3] -
			
 
				+                                                                 choice_m_numbers_list[e_index]["loc"][1]) // 2)
			
 
				+                    choice_m_numbers_list[e_index]["numbers"] = infer_t2_list
			
 
				+                    all_nums.extend(infer_t2_list[-dif:])
			
 
				+            else:
			
 
				+                # cond1 = cond2 = true, 因为infer选择题时已横向排序， 默认这种情况不会出现
			
 
				+                pass
			
 
				+
			
 
				+    for ele in choice_m_numbers_list:
			
 
				+        loc = ele["loc"]
			
 
				+        if loc[3] - loc[1] >= loc[2] - loc[0]:
			
 
				+            direction = 180
			
 
				+        else:
			
 
				+            direction = 90
			
 
				+        ele.update({'direction': direction})
			
 
				+    # tree = ET.parse(xml_path)
			
 
				+    # for index, choice_m in enumerate(choice_m_numbers_list):
			
 
				+    #     name = str(choice_m["numbers"])
			
 
				+    #     xmin, ymin, xmax, ymax, _, _ = choice_m["loc"]
			
 
				+    #     tree = create_xml(name, tree, str(xmin + limit_left), str(ymin + limit_top), str(xmax + limit_left), str(ymax + limit_top))
			
 
				+    #
			
 
				+    # tree.write(xml_path)
			
 
				+
			
 
				+    choice_m_numbers_list = sorted(choice_m_numbers_list, key=lambda x: x['loc'][3] - x['loc'][1], reverse=True)
			
 
				+    choice_m_numbers_right_limit = max([ele['loc'][2] for ele in choice_m_numbers_list])
			
 
				+    remain_len = len(choice_m_numbers_list)
			
 
				+    choice_m_list = list()
			
 
				+    need_revised_choice_m_list = list()
			
 
				+    while remain_len > 0:
			
 
				+        # 先确定属于同行的数据，然后找字母划分block
			
 
				+        # random_index = random.randint(0, len(choice_m_numbers_list)-1)
			
 
				+        random_index = 0
			
 
				+        # print(random_index)
			
 
				+        ymax_limit = choice_m_numbers_list[random_index]["loc"][3]
			
 
				+        ymin_limit = choice_m_numbers_list[random_index]["loc"][1]
			
 
				+        # choice_m_numbers_list.pop(random_index)
			
 
				+
			
 
				+        # 当前行的choice_m
			
 
				+        current_row_choice_m_d = [ele for ele in choice_m_numbers_list if ymin_limit < ele["loc"][5] < ymax_limit]
			
 
				+        current_row_choice_m_d = sorted(current_row_choice_m_d, key=lambda x: x["loc"][0])
			
 
				+        # current_row_choice_m_d.append(choice_m_numbers_list[random_index])
			
 
				+        split_pix = sorted([ele["loc"][0] for ele in current_row_choice_m_d])  # xmin排序
			
 
				+        split_index = get_split_index(split_pix)
			
 
				+        split_pix = [split_pix[ele] for ele in split_index[:-1]]
			
 
				+
			
 
				+        block_list = []
			
 
				+        for i in range(len(split_index) - 1):
			
 
				+            block = current_row_choice_m_d[split_index[i]: split_index[i + 1]]
			
 
				+            if len(block) > 1:
			
 
				+                remain_len = remain_len - (len(block) - 1)
			
 
				+                numbers_new = []
			
 
				+                loc_new = [[], [], [], []]
			
 
				+                for blk in block:
			
 
				+                    loc_old = blk["loc"]
			
 
				+                    numbers_new.extend(blk["numbers"])
			
 
				+                    for ii in range(4):
			
 
				+                        loc_new[ii].append(loc_old[ii])
			
 
				+
			
 
				+                loc_new[0] = min(loc_new[0])
			
 
				+                loc_new[1] = min(loc_new[1])
			
 
				+                loc_new[2] = max(loc_new[2])
			
 
				+                loc_new[3] = max(loc_new[3])
			
 
				+
			
 
				+                loc_new.append(loc_new[0] + (loc_new[2] - loc_new[0]) // 2)
			
 
				+                loc_new.append(loc_new[1] + (loc_new[3] - loc_new[1]) // 2)
			
 
				+
			
 
				+                block = [{"numbers": sorted(numbers_new), "loc": loc_new, "direction": block[0]["direction"]}]
			
 
				+
			
 
				+            block_list.extend(block)
			
 
				+
			
 
				+        current_row_choice_m_d = block_list
			
 
				+        current_row_chars = [ele for ele in chars_list
			
 
				+                             if ymin_limit < (ele["location"]["top"] + ele["location"]["height"] // 2) < ymax_limit]
			
 
				+
			
 
				+        # if not current_row_chars:
			
 
				+        #     max_char_width = choice_s_width // 4
			
 
				+        #     row_chars_xmax = choice_m_numbers_right_limit + int(choice_s_width * 1.5)
			
 
				+        # else:
			
 
				+        #     max_char_width = max([ele["location"]["width"] for ele in current_row_chars]) // 2
			
 
				+        #     row_chars_xmax = max(
			
 
				+        #         [ele["location"]["left"] + ele["location"]["width"] for ele in current_row_chars]) + max_char_width * 2
			
 
				+
			
 
				+        # split_index.append(row_chars_xmax)  # 边界
			
 
				+        split_pix.append(round(split_pix[-1] + choice_s_width * 1.2))
			
 
				+        for i in range(0, len(split_index) - 1):
			
 
				+            left_limit = split_index[i]
			
 
				+            right_limit = split_index[i + 1]
			
 
				+            block_chars = [ele for ele in current_row_chars
			
 
				+                           if left_limit < (ele["location"]["left"] + ele["location"]["width"] // 2) < right_limit]
			
 
				+
			
 
				+            # chars_xmin = min([ele["location"]["left"] for ele in block_chars]) - max_char_width
			
 
				+            # chars_xmax = max(
			
 
				+            #     [ele["location"]["left"] + ele["location"]["width"] for ele in block_chars]) + max_char_width
			
 
				+
			
 
				+            # a_z = '_ABCD_FGH__K_MNOPQRSTUVWXYZ'  EIJL -> _
			
 
				+            # a_z = '_ABCDEFGHI_K_MNOPQRSTUVWXYZ'
			
 
				+            a_z = '_ABCD_FGHT'
			
 
				+            # letter_text = set([ele['char'].upper() for ele in block_chars if ele['char'].upper() in a_z])
			
 
				+            letter_index = [a_z.index(ele['char'].upper()) for ele in block_chars if ele['char'].upper() in a_z]
			
 
				+
			
 
				+            letter_index_times = {ele: 0 for ele in set(letter_index)}
			
 
				+            for l_index in letter_index:
			
 
				+                letter_index_times[l_index] += 1
			
 
				+
			
 
				+            if (a_z.index("T") in letter_index) and (a_z.index("F") in letter_index):
			
 
				+                choice_option = "T, F"
			
 
				+                cols = 2
			
 
				+            else:
			
 
				+                if len(letter_index) < 1:
			
 
				+                    tmp = 4
			
 
				+                    choice_option = 'A,B,C,D'
			
 
				+                else:
			
 
				+                    tmp = max(set(letter_index))
			
 
				+                # while letter_index_times[tmp] < 2 and tmp > 3:
			
 
				+                #     t_list = list(set(letter_index))
			
 
				+                #     t_list.remove(tmp)
			
 
				+                #     tmp = max(t_list)
			
 
				+
			
 
				+                    choice_option = ",".join(a_z[min(letter_index):tmp + 1])
			
 
				+                cols = tmp
			
 
				+
			
 
				+            bias = 3  # pix
			
 
				+            current_loc = current_row_choice_m_d[i]["loc"]
			
 
				+            location = dict(xmin=(current_loc[2] + bias) + limit_left,  # 当前数字xmax右边
			
 
				+                            # xmin=max(current_loc[2] + bias, chars_xmin) + limit_left,
			
 
				+                            ymin=current_loc[1] + limit_top,
			
 
				+
			
 
				+                            xmax=(right_limit - bias) + limit_left,
			
 
				+                            # xmax=min(chars_xmax, right_limit - bias) + limit_left,
			
 
				+                            ymax=current_loc[3] + limit_top)
			
 
				+
			
 
				+            try:
			
 
				+                choice_m_img = utils.crop_region(image, location)
			
 
				+                right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
			
 
				+                if right_loc > 0:
			
 
				+                    location.update(dict(xmax=right_loc + location['xmin']))
			
 
				+                if bottom_loc > 0:
			
 
				+                    location.update(dict(ymax=bottom_loc + location['ymin']))
			
 
				+            except Exception as e:
			
 
				+                print(e)
			
 
				+                traceback.print_exc()
			
 
				+
			
 
				+            tmp_w, tmp_h = location['xmax'] - location['xmin'], location['ymax'] - location['ymin'],
			
 
				+            numbers = current_row_choice_m_d[i]["numbers"]
			
 
				+            direction = current_row_choice_m_d[i]["direction"]
			
 
				+            if direction == 180:
			
 
				+                choice_m = dict(class_name='choice_m',
			
 
				+                                number=numbers,
			
 
				+                                bounding_box=location,
			
 
				+                                choice_option=choice_option,
			
 
				+                                default_points=[5] * len(numbers),
			
 
				+                                direction=direction,
			
 
				+                                cols=cols,
			
 
				+                                rows=len(numbers),
			
 
				+                                single_width=tmp_w // cols,
			
 
				+                                single_height=tmp_h // len(numbers))
			
 
				+            else:
			
 
				+                choice_m = dict(class_name='choice_m',
			
 
				+                                number=numbers,
			
 
				+                                bounding_box=location,
			
 
				+                                choice_option=choice_option,
			
 
				+                                default_points=[5] * len(numbers),
			
 
				+                                direction=direction,
			
 
				+                                cols=len(numbers),
			
 
				+                                rows=cols,
			
 
				+                                single_width=tmp_w // len(numbers),
			
 
				+                                single_height=tmp_h // cols
			
 
				+                                )
			
 
				+
			
 
				+            if tmp_w > 2 * choice_s_width:
			
 
				+                need_revised_choice_m_list.append(choice_m)
			
 
				+            else:
			
 
				+                choice_m_list.append(choice_m)
			
 
				+
			
 
				+        remain_len = remain_len - len(current_row_choice_m_d)
			
 
				+        for ele in choice_m_numbers_list.copy():
			
 
				+            if ele in current_row_choice_m_d:
			
 
				+                choice_m_numbers_list.remove(ele)
			
 
				+
			
 
				+        for ele in choice_m_numbers_list.copy():
			
 
				+            if ele in current_row_chars:
			
 
				+                choice_m_numbers_list.remove(ele)
			
 
				+
			
 
				+    # 单独一行不聚类
			
 
				+    for i, revised_choice_m in enumerate(need_revised_choice_m_list):
			
 
				+        loc = revised_choice_m['bounding_box']
			
 
				+        left_part_loc = loc.copy()
			
 
				+        left_part_loc.update({'xmax': loc['xmin']+choice_s_width})
			
 
				+        choice_m_img = utils.crop_region(image, left_part_loc)
			
 
				+        right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
			
 
				+        if right_loc > 0:
			
 
				+            left_part_loc.update(dict(xmax=right_loc + left_part_loc['xmin']))
			
 
				+        if bottom_loc > 0:
			
 
				+            left_part_loc.update(dict(ymax=bottom_loc + left_part_loc['ymin']))
			
 
				+
			
 
				+        left_tmp_height = left_part_loc['ymax'] - left_part_loc['ymin']
			
 
				+
			
 
				+        right_part_loc = loc.copy()
			
 
				+        # right_part_loc.update({'xmin': loc['xmax']-choice_s_width})
			
 
				+        right_part_loc.update({'xmin': left_part_loc['xmax']+5})
			
 
				+        choice_m_img = utils.crop_region(image, right_part_loc)
			
 
				+        right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
			
 
				+        if right_loc > 0:
			
 
				+            right_part_loc.update(dict(xmax=right_loc + right_part_loc['xmin']))
			
 
				+        if bottom_loc > 0:
			
 
				+            right_part_loc.update(dict(ymax=bottom_loc + right_part_loc['ymin']))
			
 
				+
			
 
				+        right_tmp_height = right_part_loc['ymax'] - right_part_loc['ymin']
			
 
				+
			
 
				+        number_len = max(1, int(revised_choice_m['rows'] // (left_tmp_height // right_tmp_height)))
			
 
				+        number = [ele+revised_choice_m['number'][-1]+1 for ele in range(number_len)]
			
 
				+        rows = len(number)
			
 
				+
			
 
				+        revised_choice_m.update({'bounding_box': left_part_loc})
			
 
				+        choice_m_list.append(revised_choice_m)
			
 
				+
			
 
				+        tmp = revised_choice_m.copy()
			
 
				+        tmp.update({'bounding_box': right_part_loc, 'number': number, 'rows': rows})
			
 
				+        choice_m_list.append(tmp)
			
 
				+
			
 
				+    tmp = choice_m_list.copy()
			
 
				+    for ele in tmp:
			
 
				+        loc = ele["bounding_box"]
			
 
				+        w, h = loc['xmax'] - loc['xmin'], loc['ymax'] - loc['ymin']
			
 
				+        if w*h < choice_s_width*choice_s_height:
			
 
				+            choice_m_list.remove(ele)
			
 
				+    return choice_m_list
			
 
				+
			
 
				+
			
 
				+def infer_choice_m(image, tf_sheet, ocr, xml=None):
			
 
				+    infer_box_list = ocr2sheet(image, tf_sheet, ocr, xml)
			
 
				+    # print(sheet_region_list)
			
 
				+    choice_m_list = []
			
 
				+
			
 
				+    choice_s_h_list = [int(ele['bounding_box']['ymax']) - int(ele['bounding_box']['ymin']) for ele in tf_sheet
			
 
				+                       if ele['class_name'] == 'choice_s']
			
 
				+    if choice_s_h_list:
			
 
				+        choice_s_height = sum(choice_s_h_list) // len(choice_s_h_list)
			
 
				+    else:
			
 
				+        choice_s_height = 0
			
 
				+
			
 
				+    choice_s_w_list = [int(ele['bounding_box']['xmax']) - int(ele['bounding_box']['xmin']) for ele in tf_sheet
			
 
				+                       if ele['class_name'] == 'choice_s']
			
 
				+    if choice_s_w_list:
			
 
				+        choice_s_width = sum(choice_s_w_list) // len(choice_s_w_list)
			
 
				+
			
 
				+    else:
			
 
				+        choice_s_width = 0
			
 
				+
			
 
				+    for infer_box in infer_box_list:
			
 
				+        # {'loc': [240, 786, 1569, 1368]}
			
 
				+        loc = infer_box['loc']
			
 
				+        xmin, ymin, xmax, ymax = loc[0], loc[1], loc[2], loc[3]
			
 
				+        choice_flag = False
			
 
				+
			
 
				+        for ele in tf_sheet:
			
 
				+            if ele['class_name'] in ['choice_m', 'choice_s']:
			
 
				+                tf_loc = ele['bounding_box']
			
 
				+                tf_loc_l = tf_loc['xmin']
			
 
				+                tf_loc_t = tf_loc['ymin']
			
 
				+                if xmin < tf_loc_l < xmax and ymin < tf_loc_t < ymax:
			
 
				+                    choice_flag = True
			
 
				+                    break
			
 
				+
			
 
				+        if choice_flag:
			
 
				+            infer_image = utils.crop_region_direct(image, loc)
			
 
				+            try:
			
 
				+                save_dir = os.path.join(settings.MEDIA_ROOT, 'tmp')
			
 
				+                if not os.path.exists(save_dir):
			
 
				+                    os.makedirs(save_dir)
			
 
				+                save_path = os.path.join(save_dir, 'choice.jpeg')
			
 
				+                cv2.imwrite(save_path, infer_image)
			
 
				+                img_tmp = utils.read_single_img(save_path)
			
 
				+                os.remove(save_path)
			
 
				+                ocr = brain_api.get_ocr_text_and_coordinate(img_tmp, 'accurate', 'CHN_ENG')
			
 
				+            except Exception as e:
			
 
				+                print('write choice and ocr failed')
			
 
				+                traceback.print_exc()
			
 
				+                ocr = brain_api.get_ocr_text_and_coordinate(infer_image, 'accurate', 'CHN_ENG')
			
 
				+
			
 
				+            try:
			
 
				+                digital_list, chars_list, digital_mean_h, digital_mean_w = find_digital(ocr)
			
 
				+                choice_m = cluster_and_anti_abnormal(image, xml, digital_list, chars_list,
			
 
				+                                                     digital_mean_h, digital_mean_w,
			
 
				+                                                     choice_s_height, choice_s_width, loc)
			
 
				+
			
 
				+                choice_m_list.extend(choice_m)
			
 
				+            except Exception as e:
			
 
				+                traceback.print_exc()
			
 
				+                print('not found choice feature')
			
 
				+                pass
			
 
				+
			
 
				+    # print(choice_m_list)
			
 
				+    # tf_choice_sheet = [ele for ele in tf_sheet if ele['class_name'] == 'choice_m']
			
 
				+
			
 
				+    sheet_tmp = choice_m_list.copy()
			
 
				+    remove_index = []
			
 
				+    for i, region in enumerate(sheet_tmp):
			
 
				+        if i not in remove_index:
			
 
				+            box = region['bounding_box']
			
 
				+            for j, region_in in enumerate(sheet_tmp):
			
 
				+                box_in = region_in['bounding_box']
			
 
				+                iou = utils.cal_iou(box, box_in)
			
 
				+                if iou[0] > 0.85 and i != j:
			
 
				+                    choice_m_list.remove(region)
			
 
				+                    remove_index.append(j)
			
 
				+                    break
			
 
				+
			
 
				+    return choice_m_list
			
--- a/segment/sheet_resolve/analysis/sheet/ocr_key_words.py
+++ b/segment/sheet_resolve/analysis/sheet/ocr_key_words.py
@@ -0,0 +1,2932 @@
 
				+import re
			
 
				+
			
 
				+
			
 
				+def find_repeat(source, elmt):  # 去重后重新定位数字索引
			
 
				+    elmt_index = []
			
 
				+    s_index = 0
			
 
				+    e_index = len(source)
			
 
				+    while (s_index < e_index):
			
 
				+        try:
			
 
				+            temp = source.index(elmt, s_index, e_index)
			
 
				+            elmt_index.append(temp)
			
 
				+            s_index = temp + 1
			
 
				+        except ValueError:
			
 
				+            break
			
 
				+    return elmt_index
			
 
				+
			
 
				+
			
 
				+def ocr_key_words(rect,type_score_dict):  # 将ocr识别得到的文字与模型得到的type_score对应
			
 
				+    '''
			
 
				+    :param rect: OCR识别结果数组，格式：res = {'chars': [},'coordinates': [(),()},'words': []}
			
 
				+    :param type_score_dict: 模型得到的type_score(与模型得到的边框相对应)
			
 
				+    :return: 字典中添加word
			
 
				+    '''
			
 
				+    len_ocr = len(rect['chars'])
			
 
				+    xmin = type_score_dict['type_box'][0]
			
 
				+    ymin = type_score_dict['type_box'][1]
			
 
				+    xmax = type_score_dict['type_box'][2]
			
 
				+    ymax = type_score_dict['type_box'][3]
			
 
				+    words=[]
			
 
				+
			
 
				+    for j in range(len_ocr):
			
 
				+        if rect['coordinates'][j][0] - xmin > -30 and rect['coordinates'][j][1] - ymin > -30 and rect['coordinates'][j][2] - xmax < 30 and rect['coordinates'][j][3] - ymax < 30:
			
 
				+            word = rect['chars'][j]
			
 
				+            words.append(word)
			
 
				+    type_score_dict['words']= words
			
 
				+    type_score_dict_ocr = type_score_dict
			
 
				+
			
 
				+    return type_score_dict_ocr
			
 
				+
			
 
				+
			
 
				+def key_words(type_score_dict_ocr):  # 根据OCR结果结合关键字解析
			
 
				+
			
 
				+    total_score = 0
			
 
				+    volume_score = 0
			
 
				+    volume_structure_item = 0
			
 
				+    volume_structure = []
			
 
				+    Score_structure_item = 0
			
 
				+    Score_structure = []
			
 
				+    all_structure = {}
			
 
				+    keyword_volume = ['第卷', '第部']
			
 
				+    keyword_type = ['选择', '非选择题', '综合题', '问答题', '主观题', '客观题', '解答题','计算题']
			
 
				+    len_keyword_type = len(keyword_type)
			
 
				+    keyword_item1 = ['共分', '合计分', '总共分', '总计分', '小题满分', '本小题', '满分', '共计', '共.分', '合计.分', '总共.分', '总计.分', '小题满分.','本小题.', '满分.', '共计.']
			
 
				+    len_keyword_item1 = len(keyword_item1)
			
 
				+    keyword_item2 = ['每题分', '每小题分', '空分', '每小题.分', '每题.分', '空.分']  # '分/题'暂未考虑
			
 
				+    len_keyword_item2 = len(keyword_item2)
			
 
				+    keyword_item3 = ['共题', '共小题', '分小题', '本题小题', '共个小题', '分为小题', '分个小题','本大题共小题']
			
 
				+    len_keyword_item3 = len(keyword_item3)
			
 
				+    keyword_item4 = ['分']
			
 
				+    len_keyword_item4 = len(keyword_item4)
			
 
				+    keyword_item5 = ['分/题']
			
 
				+    len_keyword_item5 = len(keyword_item5)
			
 
				+    keyword_item5 = ['题', '.', '、']
			
 
				+
			
 
				+    ocr_1 = type_score_dict_ocr['words']
			
 
				+    s = ''.join((str(x) for x in ocr_1))  # 合并为一个字符串
			
 
				+    if s.find('IV') != -1 or s.find('Ⅳ') != -1:
			
 
				+        s = s.replace('Ⅳ', '4')
			
 
				+        s = s.replace('IV', '4')
			
 
				+    elif s.find('III') != -1 or s.find('Ⅲ') != -1:
			
 
				+        s = s.replace('Ⅲ', '3')
			
 
				+        s = s.replace('III', '3')
			
 
				+    elif s.find('II') != -1 or s.find('Ⅱ') != -1:
			
 
				+        s = s.replace('Ⅱ', '2')
			
 
				+        s = s.replace('II', '2')
			
 
				+    elif s.find('VI') != -1 or s.find('Ⅵ') != -1:
			
 
				+        s = s.replace('Ⅵ', '6')
			
 
				+        s = s.replace('VI', '6')
			
 
				+    elif s.find('VII') != -1 or s.find('Ⅶ') != -1:
			
 
				+        s = s.replace('Ⅶ', '7')
			
 
				+        s = s.replace('VII', '7')
			
 
				+    elif s.find('VIII') != -1 or s.find('Ⅷ') != -1:
			
 
				+        s = s.replace('Ⅷ', '8')
			
 
				+        s = s.replace('VIII', '8')
			
 
				+    elif s.find('IX') != -1 or s.find('Ⅸ') != -1:
			
 
				+        s = s.replace('Ⅸ', '9')
			
 
				+        s = s.replace('IX', '9')
			
 
				+    elif s.find('X') != -1 or s.find('Ⅹ') != -1:
			
 
				+        s = s.replace('Ⅹ', '10')
			
 
				+        s = s.replace('X', '10')
			
 
				+    elif s.find('I') != -1 or s.find('Ⅰ') != -1:
			
 
				+        s = s.replace('Ⅰ', '1')
			
 
				+        s = s.replace('I', '1')
			
 
				+    elif s.find('V') != -1 or s.find('Ⅴ') != -1:
			
 
				+        s = s.replace('Ⅴ', '5')
			
 
				+        s = s.replace('V', '5')
			
 
				+
			
 
				+    C_s = re.sub("[A-Za-z0-9\!\%\[\]\,\。]", "", s)  # 提取汉字
			
 
				+    E_s = ''.join(re.findall(r'[A-Za-z]', s))  # 提取英文字符
			
 
				+    N_s = re.findall('\d+', s)  # 提取阿拉伯数字
			
 
				+
			
 
				+    if len(N_s) == 1 and len(N_s[0]) < 6 and len(E_s) == 0 and (C_s == keyword_item5[0] or C_s == keyword_item5[1] or len(C_s) == 0):
			
 
				+        type_score_dict_ocr['item_N'] = int(N_s[0])
			
 
				+        type_score_dict_ocr['item_total_score'] = -1
			
 
				+        type_score_dict_ocr['item_count'] = -1
			
 
				+        type_score_dict_ocr['item_score'] = -1
			
 
				+        Score_structure_item = type_score_dict_ocr
			
 
				+        Score_structure.append(Score_structure_item)
			
 
				+        all_structure = {'volume_structure': -1,
			
 
				+                         'Score_structure': Score_structure}
			
 
				+    elif N_s != []:
			
 
				+        for iiii in range(len(keyword_volume)):
			
 
				+            Score_structure_item = {}
			
 
				+            if C_s.find(keyword_volume[iiii]) != -1:
			
 
				+                '''
			
 
				+                对应试卷中存在分卷信息的情况,根据包含数字的个数分为5类，暂定包含信息的有效数字个数小于5，并处理小题分数和总分可能包含小数点的情况
			
 
				+                暂定小题个数不包含小数
			
 
				+                暂定总分数中不存在有意义的小数位
			
 
				+                '''
			
 
				+                if len(N_s) == 1:
			
 
				+                    num_index = s.index(N_s[0])
			
 
				+                    num_infer = s[num_index - len(N_s[0])]
			
 
				+                    num_back = s[num_index + len(N_s[0])]
			
 
				+                    if num_back == '分':  # 第卷/部*分
			
 
				+                        volume_score = int(N_s[0])
			
 
				+                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                        type_score_dict_ocr['volume_count'] = -1
			
 
				+                        type_score_dict_ocr['volume_score'] = -1
			
 
				+                    elif num_back == '卷' or num_back == '部':  # 第*卷
			
 
				+                        volume_N = int(N_s[0])
			
 
				+                        type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                        type_score_dict_ocr['volume_total_score'] = -1
			
 
				+                        type_score_dict_ocr['volume_count'] = -1
			
 
				+                        type_score_dict_ocr['volume_score'] = -1
			
 
				+                elif len(N_s) == 2:
			
 
				+                    num_index1 = s.index(N_s[0])
			
 
				+                    num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                    all_1 = find_repeat(s, N_s[1])
			
 
				+                    temp1 = 0
			
 
				+                    for ii in range(len(N_s[0])):
			
 
				+                        if N_s[0][ii] == N_s[1]:
			
 
				+                            temp1 = temp1 + 1
			
 
				+                    num_index2 = all_1[temp1]
			
 
				+                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                    if isinstance(N_s[0], str):
			
 
				+                        N_s[0] = int(N_s[0])
			
 
				+                    if isinstance(N_s[1], str):
			
 
				+                        N_s[1] = int(N_s[1])
			
 
				+                    for k in range(len_keyword_item1):
			
 
				+                        if C_s.find(keyword_item1[k]) != -1:
			
 
				+                            if (num_back1 == '卷' or num_back1 == '部') and num_back2 == '分':  # 第*卷*分
			
 
				+                                volume_N = N_s[0]
			
 
				+                                volume_score = N_s[1]
			
 
				+                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                type_score_dict_ocr['volume_count'] = -1
			
 
				+                                type_score_dict_ocr['volume_score'] = -1
			
 
				+                                break
			
 
				+                            elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分':  # 第卷,共*.*分
			
 
				+                                volume_N = -1
			
 
				+                                volume_score = N_s[0]
			
 
				+                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                type_score_dict_ocr['volume_count'] = -1
			
 
				+                                type_score_dict_ocr['volume_score'] = -1
			
 
				+                                break
			
 
				+                            else:
			
 
				+                                for l in range(len_keyword_item2):
			
 
				+                                    if C_s.find(keyword_item2[l]) != -1:
			
 
				+                                        if (num_infer1 == '题' or num_infer1 == '空') and num_back2 == '分':  # 第卷，每小题*分，共*分
			
 
				+                                            volume_score = N_s[1]
			
 
				+                                            item_score = N_s[0]
			
 
				+                                            item_count = int(volume_score / item_score)
			
 
				+                                            type_score_dict_ocr['volume_N'] = -1
			
 
				+                                            type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                            type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                            type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                            break
			
 
				+                                        elif (num_infer2 == '题' or num_infer2 == '空') and num_back1 == '分':  # 第卷，共*分 ，每小题*分
			
 
				+                                            volume_score = N_s[0]
			
 
				+                                            item_score = N_s[1]
			
 
				+                                            item_count = int(volume_score / item_score)
			
 
				+                                            type_score_dict_ocr['volume_N'] = -1
			
 
				+                                            type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                            type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                            type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                            break
			
 
				+                                    elif l == len(keyword_item2) - 1:
			
 
				+                                        for m in range(len_keyword_item3):
			
 
				+                                            if C_s.find(keyword_item3[l]) != -1:
			
 
				+                                                if num_back2 == '分':  # 第卷，共*小题，共*分
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_count = N_s[0]
			
 
				+                                                    item_score = volume_score / item_count
			
 
				+                                                    type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                elif num_back1 == '分':  # 第卷，共*分 ，共*小题
			
 
				+                                                    volume_score = N_s[0]
			
 
				+                                                    item_count = N_s[1]
			
 
				+                                                    item_score = volume_score / item_count
			
 
				+                                                    type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                    break
			
 
				+                            break
			
 
				+                        elif k == len_keyword_item1 - 1:
			
 
				+                            for l in range(len_keyword_item2):
			
 
				+                                if C_s.find(keyword_item2[l]) != -1:
			
 
				+                                    for m in range(len_keyword_item3):
			
 
				+                                        if C_s.find(keyword_item3[l]) != -1:
			
 
				+                                            if num_back2 == '分':  # 第卷，共*小题，每小题*分
			
 
				+                                                item_count = N_s[0]
			
 
				+                                                item_score = N_s[1]
			
 
				+                                                volume_score = item_score * item_count
			
 
				+                                                type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif num_back1 == '分':  # 第卷，每小题*分 ，共*小题
			
 
				+                                                item_count = N_s[1]
			
 
				+                                                item_score = N_s[0]
			
 
				+                                                volume_score = item_count * item_score
			
 
				+                                                type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                        elif m == len_keyword_item3 - 1:
			
 
				+                                            if num_back2 == '分':  # 第卷，每小题*.*分
			
 
				+                                                volume_score = -1
			
 
				+                                                item_count = -1
			
 
				+                                                item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+
			
 
				+                                break
			
 
				+                            break
			
 
				+                elif len(N_s) == 3:
			
 
				+                    num_index1 = s.index(N_s[0])
			
 
				+                    num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                    all_1 = find_repeat(s, N_s[1])
			
 
				+                    temp1 = 0
			
 
				+                    for ii in range(len(N_s[0])):
			
 
				+                        if N_s[0][ii] == N_s[1]:
			
 
				+                            temp1 = temp1 + 1
			
 
				+                    num_index2 = all_1[temp1]
			
 
				+                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                    all_2 = find_repeat(s, N_s[2])
			
 
				+                    temp2 = 0
			
 
				+                    for ii in range(len(N_s[0])):
			
 
				+                        if N_s[0][ii] == N_s[2]:
			
 
				+                            temp2 = temp2 + 1
			
 
				+                    for jj in range(len(N_s[1])):
			
 
				+                        if N_s[1][jj] == N_s[2]:
			
 
				+                            temp2 = temp2 + 1
			
 
				+                    num_index3 = all_2[temp2]
			
 
				+                    num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                    num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                    if isinstance(N_s[0], str):
			
 
				+                        N_s[0] = int(N_s[0])
			
 
				+                    if isinstance(N_s[1], str):
			
 
				+                        N_s[1] = int(N_s[1])
			
 
				+                    if isinstance(N_s[2], str):
			
 
				+                        N_s[2] = int(N_s[2])
			
 
				+                    for l in range(len_keyword_item3):
			
 
				+                        if C_s.find(keyword_item3[l]) != -1:
			
 
				+                            for m in range(len_keyword_item2):
			
 
				+                                if C_s.find(keyword_item2[m]) != -1:
			
 
				+                                    if (num_back1 == '卷' or num_back1 == '部') and num_back3 == '分':  # 第*卷，共*题，每题*分
			
 
				+                                        volume_N = N_s[0]
			
 
				+                                        item_count = N_s[1]
			
 
				+                                        item_score = N_s[2]
			
 
				+                                        volume_score = item_count * item_score
			
 
				+                                        type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                        break
			
 
				+                                    elif (num_back1 == '卷' or num_back1 == '部') and num_back2 == '分':  # 第*卷，每题*分,共*题
			
 
				+                                        volume_N = N_s[0]
			
 
				+                                        item_count = N_s[2]
			
 
				+                                        item_score = N_s[1]
			
 
				+                                        volume_score = item_count * item_score
			
 
				+                                        type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                        break
			
 
				+                                    elif (num_back1 == '卷' or num_back1 == '部') and num_back2 == '分':  # 第卷，每题*.*分,共*题
			
 
				+                                        volume_N = -1
			
 
				+                                        item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                        item_count = N_s[2]
			
 
				+                                        volume_score = item_score * item_count
			
 
				+                                        type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                        break
			
 
				+                                    elif (num_back1 == '卷' or num_back1 == '部') and num_back3 == '分':  # 第卷，共*题，每题*.*分
			
 
				+                                        volume_N = -1
			
 
				+                                        item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                        item_count = N_s[0]
			
 
				+                                        volume_score = item_score * item_count
			
 
				+                                        type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                        break
			
 
				+                                    elif num_back1 == '分' and (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分':  # 第卷,共*分，每题*分,共*题
			
 
				+                                        volume_score = N_s[0]
			
 
				+                                        item_count = N_s[2]
			
 
				+                                        item_score = N_s[1]
			
 
				+                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                        break
			
 
				+                                    elif num_back1 == '分' and (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分':  # 第卷,共*分，共*题，每题*分
			
 
				+                                        volume_score = N_s[0]
			
 
				+                                        item_count = N_s[1]
			
 
				+                                        item_score = N_s[2]
			
 
				+                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                        break
			
 
				+                                    elif num_back2 == '分' and (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分':  # 第卷,共*题，共*分，每题*分
			
 
				+                                        volume_score = N_s[1]
			
 
				+                                        item_count = N_s[0]
			
 
				+                                        item_score = N_s[2]
			
 
				+                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                        break
			
 
				+                                    elif num_infer3 == '分' and (num_infer1 == '题' or num_infer1 == '空') and num_back1 == '分':  # 第卷,每题*分，共*题，共*分
			
 
				+                                        volume_score = N_s[2]
			
 
				+                                        item_count = N_s[1]
			
 
				+                                        item_score = N_s[0]
			
 
				+                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                        break
			
 
				+                                    elif num_back3 == '分' and (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分':  # 第卷,共*题，每题*分，共*分
			
 
				+                                        volume_score = N_s[2]
			
 
				+                                        item_count = N_s[0]
			
 
				+                                        item_score = N_s[1]
			
 
				+                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                        break
			
 
				+                                    elif num_back3 == '分' and (num_infer1 == '题' or num_infer1 == '空') and num_back1 == '分':  # 第卷,每题*分，共*题，共*分
			
 
				+                                        volume_score = N_s[2]
			
 
				+                                        item_count = N_s[1]
			
 
				+                                        item_score = N_s[0]
			
 
				+                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                        break
			
 
				+                                elif m == len_keyword_item2 - 1:
			
 
				+                                    for n in range(len_keyword_item1):
			
 
				+                                        if C_s.find(keyword_item1[n]) != -1:
			
 
				+                                            if (num_back1 == '卷' or num_back1 == '部') and (num_back2 == '题' or num_back2 == '小') and num_back3 == '分':  # 第*卷，共*题，共*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                volume_score = N_s[2]
			
 
				+                                                item_count = N_s[1]
			
 
				+                                                item_score = volume_score / item_count
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_back3 == '题' or num_back3 == '小') and num_back2 == '分':  # 第*卷，共*分,共*题
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                volume_score = N_s[1]
			
 
				+                                                item_count = N_s[2]
			
 
				+                                                item_score = volume_score / item_count
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif num_back1 == '.' and num_infer2 == '.' and (num_back3 == '题' or num_back3 == '小') and num_back2 == '分':  # 第卷，共*.*分,共*题
			
 
				+                                                volume_N = -1
			
 
				+                                                volume_score = N_s[0]
			
 
				+                                                item_count = N_s[2]
			
 
				+                                                item_score = volume_score / item_count
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif num_back2 == '.' and num_infer3 == '.' and (num_back1 == '题' or num_back1 == '小') and num_back3 == '分':  # 第卷，共*题，共*.*分
			
 
				+                                                volume_N = -1
			
 
				+                                                volume_score = N_s[1]
			
 
				+                                                item_count = N_s[0]
			
 
				+                                                item_score = volume_score / item_count
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+
			
 
				+                                break
			
 
				+                            break
			
 
				+                        elif l == len_keyword_item3 - 1:
			
 
				+                            for p in range(len_keyword_item1):
			
 
				+                                if C_s.find(keyword_item1[p]) != -1:
			
 
				+                                    for q in range(len_keyword_item2):
			
 
				+                                        if C_s.find(keyword_item2[q]) != -1:
			
 
				+                                            if (num_back1 == '卷' or num_back1 == '部') and num_back2 == '分' and num_back3 == '分':  # 第*卷，共*分，每题*分 /  第*卷，每题*分，共*分
			
 
				+                                                volume_N = int(N_s[0])
			
 
				+                                                if N_s[1] > N_s[2]:
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_score = N_s[2]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[2]
			
 
				+                                                    item_score = N_s[1]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back3 == '分':  # 第卷，共*.*分，每题*分 /  第卷，每题*.*分，共*分
			
 
				+                                                volume_N = -1
			
 
				+                                                if N_s[0] > N_s[2]:
			
 
				+                                                    volume_score = N_s[0]
			
 
				+                                                    item_score = N_s[2]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[2]
			
 
				+                                                    item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif num_back2 == '.' and num_infer3 == '.' and num_back1 == '分' and num_back3 == '分':  # 第卷，共*分，每题*.*分 /  第卷，每题*分，共*.*分
			
 
				+                                                volume_N = -1
			
 
				+                                                if N_s[0] > N_s[1]:
			
 
				+                                                    volume_score = N_s[0]
			
 
				+                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_score = N_s[0]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+
			
 
				+
			
 
				+                                    break
			
 
				+                            break
			
 
				+                elif len(N_s) == 4:
			
 
				+                    num_index1 = s.index(N_s[0])
			
 
				+                    num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                    all_1 = find_repeat(s, N_s[1])
			
 
				+                    temp1 = 0
			
 
				+                    for ii in range(len(N_s[0])):
			
 
				+                        if N_s[0][ii] == N_s[1]:
			
 
				+                            temp1 = temp1 + 1
			
 
				+                    num_index2 = all_1[temp1]
			
 
				+                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                    all_2 = find_repeat(s, N_s[2])
			
 
				+                    temp2 = 0
			
 
				+                    for ii in range(len(N_s[0])):
			
 
				+                        if N_s[0][ii] == N_s[2]:
			
 
				+                            temp2 = temp2 + 1
			
 
				+                    for jj in range(len(N_s[1])):
			
 
				+                        if N_s[1][jj] == N_s[2]:
			
 
				+                            temp2 = temp2 + 1
			
 
				+                    num_index3 = all_2[temp2]
			
 
				+                    num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                    num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                    all_3 = find_repeat(s, N_s[3])
			
 
				+                    temp3 = 0
			
 
				+                    for ii in range(len(N_s[0])):
			
 
				+                        if N_s[0][ii] == N_s[3]:
			
 
				+                            temp3 = temp3 + 1
			
 
				+                    for jj in range(len(N_s[1])):
			
 
				+                        if N_s[1][jj] == N_s[3]:
			
 
				+                            temp3 = temp3 + 1
			
 
				+                    for kk in range(len(N_s[2])):
			
 
				+                        if N_s[2][kk] == N_s[3]:
			
 
				+                            temp3 = temp3 + 1
			
 
				+                    num_index4 = all_3[temp3]
			
 
				+                    num_infer4 = s[num_index4 - len(N_s[3])]
			
 
				+                    num_back4 = s[num_index4 + len(N_s[3])]
			
 
				+                    if isinstance(N_s[0], str):
			
 
				+                        N_s[0] = int(N_s[0])
			
 
				+                    if isinstance(N_s[1], str):
			
 
				+                        N_s[1] = int(N_s[1])
			
 
				+                    if isinstance(N_s[2], str):
			
 
				+                        N_s[2] = int(N_s[2])
			
 
				+                    if isinstance(N_s[3], str):
			
 
				+                        N_s[3] = int(N_s[3])
			
 
				+                    for l in range(len_keyword_item1):
			
 
				+                        if C_s.find(keyword_item1[l]) != -1:
			
 
				+                            for m in range(len_keyword_item2):
			
 
				+                                if C_s.find(keyword_item2[m]) != -1:
			
 
				+                                    for n in range(len_keyword_item3):
			
 
				+                                        if C_s.find(keyword_item3[n]) != -1:
			
 
				+                                            if (num_back1 == '卷' or num_back1 == '部') and (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back4 == '分':  # 第*卷，每题*分，共*题，共*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                volume_score = N_s[3]
			
 
				+                                                item_count = N_s[2]
			
 
				+                                                item_score = N_s[1]
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分' and num_back4 == '分':  # 第*卷，共*题，每题*分，共*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                volume_score = N_s[3]
			
 
				+                                                item_count = N_s[1]
			
 
				+                                                item_score = N_s[2]
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_infer4 == '题' or num_infer4 == '空') and num_back4 == '分' and num_back2 == '分':  # 第*卷，共*分，共*题，每题*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                volume_score = N_s[1]
			
 
				+                                                item_count = N_s[2]
			
 
				+                                                item_score = N_s[3]
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_infer4 == '题' or num_infer4 == '空') and num_back4 == '分' and num_back3 == '分':  # 第*卷，共*题，共*分，每题*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                volume_score = N_s[2]
			
 
				+                                                item_count = N_s[1]
			
 
				+                                                item_score = N_s[3]
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分' and num_back2 == '分':  # 第*卷，共*分，每题*分，共*题
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                volume_score = N_s[1]
			
 
				+                                                item_count = N_s[3]
			
 
				+                                                item_score = N_s[2]
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back3 == '分':  # 第*卷，每题*分，共*分，共*题
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                volume_score = N_s[2]
			
 
				+                                                item_count = N_s[3]
			
 
				+                                                item_score = N_s[1]
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+
			
 
				+                                            elif (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back3 == '分':  # 第卷，每题*.*分，共*分，共*题/第卷，共*.*分，每题*分，共*题
			
 
				+                                                volume_N = -1
			
 
				+                                                if int(N_s[0]) > int(N_s[2]):
			
 
				+                                                    volume_score = N_s[0]
			
 
				+                                                    item_score = N_s[2]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[2]
			
 
				+                                                    item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back2 == '.' and num_infer3 == '.') and num_back1 == '分' and num_back3 == '分':  # 第卷，每题*分，共*.*分，共*题/第卷，共*分，每题*.*分，共*题
			
 
				+                                                volume_N = -1
			
 
				+                                                if int(N_s[0]) > int(N_s[1]):
			
 
				+                                                    volume_score = N_s[0]
			
 
				+                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_score = N_s[0]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back2 == '.' and num_infer3 == '.') and num_back3 == '分' and num_back4 == '分':  # 第卷，共*题，共*.*分，每题*分/第卷，共*题，每题*.*分,共*分
			
 
				+                                                volume_N = -1
			
 
				+                                                if N_s[1] > N_s[3]:
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_score = N_s[3]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[4]
			
 
				+                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back3 == '.' and num_infer4 == '.') and num_back2 == '分' and num_back4 == '分':  # 第卷，共*题，共*分，每题*.*分/第卷，共*题，每题*分,共*.*分
			
 
				+                                                volume_N = -1
			
 
				+                                                if int(N_s[1]) > int(N_s[2]):
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[2]
			
 
				+                                                    item_score = N_s[1]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back1 == '.' and num_infer2 == '.') and num_back2 == '分' and num_back4 == '分':  # 第卷，共*.*分，共*题，每题*分/第卷，每题*.*分，共*题，共*分
			
 
				+                                                volume_N = -1
			
 
				+                                                if int(N_s[0]) > int(N_s[3]):
			
 
				+                                                    volume_score = N_s[0]
			
 
				+                                                    item_score = N_s[3]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[3]
			
 
				+                                                    item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back3 == '.' and num_infer4 == '.') and num_back1 == '分' and num_back4 == '分':  # 第卷，共*分，共*题，每题*.*分/第卷，每题*分，共*题，共*.*分
			
 
				+                                                volume_N = -1
			
 
				+                                                if int(N_s[0]) > int(N_s[2]):
			
 
				+                                                    volume_score = N_s[0]
			
 
				+                                                    item_score = N_s[2] + '.' + N_s[3]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[2]
			
 
				+                                                    item_score = N_s[0]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                        elif n == len_keyword_item3 - 1:
			
 
				+                                            if (num_back1 == '卷' or num_back1 == '部') and num_back2 == '.' and num_infer3 == '.' and num_back3 == '分' and num_back4 == '分':  # 第*卷，每题*.*分，共*分/第*卷，共*.*分,每题*分
			
 
				+                                                volume_N = int(N_s[0])
			
 
				+                                                if N_s[1] > N_s[3]:
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_score = N_s[3]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[3]
			
 
				+                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and num_back3 == '.' and num_infer4 == '.' and num_back2 == '分' and num_back4 == '分':  # 第*卷，每题*分，共*.*分/第*卷，共*分,每题*.*分
			
 
				+                                                volume_N = int(N_s[0])
			
 
				+                                                if int(N_s[1]) > int(N_s[2]):
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[2]
			
 
				+                                                    item_score = N_s[1]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif num_back1 == '.' and num_infer2 == '.' and num_back2 == '分' and num_back3 == '.' and num_infer4 == '.' and num_back4 == '分':  # 第卷，每题*.*分，共*.*分/第卷，共*.*分,每题*.*分
			
 
				+                                                volume_N = -1
			
 
				+                                                if N_s[0] > N_s[2]:
			
 
				+                                                    volume_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                    item_score = N_s[3]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[3]
			
 
				+                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back3 == '分':  # 第*卷，每题*分，共*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                volume_score = N_s[2]
			
 
				+                                                item_count = -1
			
 
				+                                                item_score = N_s[1]
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_infer3 == '题' or num_infer3 == '空') and num_back2 == '分' and num_back3 == '分':  # 第*卷，共*分,每题*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                volume_score = N_s[1]
			
 
				+                                                item_count = -1
			
 
				+                                                item_score = N_s[2]
			
 
				+                                                type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+
			
 
				+                                    break
			
 
				+                            break
			
 
				+                elif len(N_s) == 5:
			
 
				+                    num_index1 = s.index(N_s[0])
			
 
				+                    num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                    all_1 = find_repeat(s, N_s[1])
			
 
				+                    temp1 = 0
			
 
				+                    for ii in range(len(N_s[0])):
			
 
				+                        if N_s[0][ii] == N_s[1]:
			
 
				+                            temp1 = temp1 + 1
			
 
				+                    num_index2 = all_1[temp1]
			
 
				+                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                    all_2 = find_repeat(s, N_s[2])
			
 
				+                    temp2 = 0
			
 
				+                    for ii in range(len(N_s[0])):
			
 
				+                        if N_s[0][ii] == N_s[2]:
			
 
				+                            temp2 = temp2 + 1
			
 
				+                    for jj in range(len(N_s[1])):
			
 
				+                        if N_s[1][jj] == N_s[2]:
			
 
				+                            temp2 = temp2 + 1
			
 
				+                    num_index3 = all_2[temp2]
			
 
				+                    num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                    num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                    all_3 = find_repeat(s, N_s[3])
			
 
				+                    temp3 = 0
			
 
				+                    for ii in range(len(N_s[0])):
			
 
				+                        if N_s[0][ii] == N_s[3]:
			
 
				+                            temp3 = temp3 + 1
			
 
				+                    for jj in range(len(N_s[1])):
			
 
				+                        if N_s[1][jj] == N_s[3]:
			
 
				+                            temp3 = temp3 + 1
			
 
				+                    for kk in range(len(N_s[2])):
			
 
				+                        if N_s[2][kk] == N_s[3]:
			
 
				+                            temp3 = temp3 + 1
			
 
				+                    num_index4 = all_3[temp3]
			
 
				+                    num_infer4 = s[num_index4 - len(N_s[3])]
			
 
				+                    num_back4 = s[num_index4 + len(N_s[3])]
			
 
				+                    all_4 = find_repeat(s, N_s[4])
			
 
				+                    temp4 = 0
			
 
				+                    for ii in range(len(N_s[0])):
			
 
				+                        if N_s[0][ii] == N_s[4]:
			
 
				+                            temp4 = temp4 + 1
			
 
				+                    for jj in range(len(N_s[1])):
			
 
				+                        if N_s[1][jj] == N_s[4]:
			
 
				+                            temp4 = temp4 + 1
			
 
				+                    for kk in range(len(N_s[2])):
			
 
				+                        if N_s[2][kk] == N_s[4]:
			
 
				+                            temp4 = temp4 + 1
			
 
				+                    for ll in range(len(N_s[3])):
			
 
				+                        if N_s[3][ll] == N_s[4]:
			
 
				+                            temp4 = temp4 + 1
			
 
				+                    num_index5 = all_4[temp4]
			
 
				+                    num_infer5 = s[num_index5 - len(N_s[4])]
			
 
				+                    num_back5 = s[num_index5 + len(N_s[4])]
			
 
				+                    if isinstance(N_s[0], str):
			
 
				+                        N_s[0] = int(N_s[0])
			
 
				+                    if isinstance(N_s[1], str):
			
 
				+                        N_s[1] = int(N_s[1])
			
 
				+                    if isinstance(N_s[2], str):
			
 
				+                        N_s[2] = int(N_s[2])
			
 
				+                    if isinstance(N_s[3], str):
			
 
				+                        N_s[3] = int(N_s[3])
			
 
				+                    if isinstance(N_s[4], str):
			
 
				+                        N_s[4] = int(N_s[4])
			
 
				+                    for l in range(len_keyword_item1):
			
 
				+                        if C_s.find(keyword_item1[l]) != -1:
			
 
				+                            for m in range(len_keyword_item2):
			
 
				+                                if C_s.find(keyword_item2[m]) != -1:
			
 
				+                                    for n in range(len_keyword_item3):
			
 
				+                                        if C_s.find(keyword_item3[n]) != -1:
			
 
				+                                            if (num_back1 == '卷' or num_back1 == '部') and (num_back2 == '.' and num_infer3 == '.') and num_back3 == '分' and num_back4 == '分':  # 第*卷，每题*.*分，共*分，共*题/第卷，共*.*分，每题*分，共*题
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                if N_s[1] > N_s[3]:
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_score = N_s[3]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[3]
			
 
				+                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_back3 == '.' and num_infer4 == '.') and num_back2 == '分' and num_back4 == '分':  # 第*卷，每题*分，共*.*分，共*题/第卷，共*分，每题*.*分，共*题
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                if N_s[1] > N_s[2]:
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[2]
			
 
				+                                                    item_score = N_s[1]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_back3 == '.' and num_infer4 == '.') and num_back4 == '分' and num_back5 == '分':  # 第卷，共*题，共*.*分，每题*分/第卷，共*题，每题*.*分,共*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                if N_s[2] > N_s[4]:
			
 
				+                                                    volume_score = N_s[2]
			
 
				+                                                    item_score = N_s[4]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[4]
			
 
				+                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_back4 == '.' and num_infer5 == '.') and num_back3 == '分' and num_back5 == '分':  # 第*卷，共*题，共*分，每题*.*分/第卷，共*题，每题*分,共*.*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                if N_s[2] > N_s[3]:
			
 
				+                                                    volume_score = N_s[2]
			
 
				+                                                    item_score = float(N_s[3] + '.' + N_s[4])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[3]
			
 
				+                                                    item_score = N_s[2]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_back2 == '.' and num_infer3 == '.') and num_back3 == '分' and num_back5 == '分':  # 第*卷，共*.*分，共*题，每题*分/第*卷，每题*.*分，共*题，共*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                if N_s[1] > N_s[4]:
			
 
				+                                                    volume_score = N_s[1]
			
 
				+                                                    item_score = N_s[4]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[4]
			
 
				+                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                            elif (num_back1 == '卷' or num_back1 == '部') and (num_back4 == '.' and num_infer5 == '.') and num_back2 == '分' and num_back5 == '分':  # 第*卷，共*分，共*题，每题*.*分/第卷，每题*分，共*题，共*.*分
			
 
				+                                                volume_N = N_s[0]
			
 
				+                                                if N_s[1] > N_s[3]:
			
 
				+                                                    volume_score = N_s[0]
			
 
				+                                                    item_score = float(N_s[4] + '.' + N_s[4])
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    volume_score = N_s[3]
			
 
				+                                                    item_score = N_s[1]
			
 
				+                                                    item_count = int(volume_score / item_score)
			
 
				+                                                    type_score_dict_ocr['volume_N'] = volume_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = volume_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                    type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                    break
			
 
				+                                    break
			
 
				+                            break
			
 
				+                if 'volume_N' not in type_score_dict_ocr.keys():
			
 
				+                    all_structure = {'volume_structure': -1,
			
 
				+                                     'Score_structure': -1}
			
 
				+                    break
			
 
				+                else:
			
 
				+                    for i in range(len_keyword_type):
			
 
				+                        if C_s.find(keyword_type[i]) != -1 and C_s.find('非') != -1:
			
 
				+                            type_score_dict_ocr['keyword_type'] = keyword_type[1]
			
 
				+                            break
			
 
				+                        elif C_s.find(keyword_type[0]) != -1:
			
 
				+                            type_score_dict_ocr['keyword_type'] = keyword_type[0]
			
 
				+                            Score_structure_item = type_score_dict_ocr
			
 
				+                            Score_structure.append(Score_structure_item)
			
 
				+                            break
			
 
				+                        elif C_s.find(keyword_type[i]) != -1:
			
 
				+                            type_score_dict_ocr['keyword_type'] = keyword_type[i]
			
 
				+                            break
			
 
				+                        elif i == len_keyword_type - 1:
			
 
				+                            type_score_dict_ocr['keyword_type'] = keyword_type[0]
			
 
				+                            Score_structure_item = type_score_dict_ocr
			
 
				+                            Score_structure.append(Score_structure_item)
			
 
				+                            break
			
 
				+                    volume_structure_item = type_score_dict_ocr
			
 
				+                    volume_structure.append(volume_structure_item)
			
 
				+                    if Score_structure == []:
			
 
				+                        all_structure = {'volume_structure': volume_structure,
			
 
				+                                         'Score_structure': -1}
			
 
				+                    else:
			
 
				+                        all_structure = {'volume_structure': volume_structure,
			
 
				+                                         'Score_structure': Score_structure}
			
 
				+                    break
			
 
				+            elif iiii == len(keyword_volume) - 1:
			
 
				+                '''
			
 
				+                对应试卷中不存在分卷信息的情况,根据包含数字的个数分为4类，暂定包含信息的有效数字个数小于4，并处理小题分数和总分可能包含小数点的情况
			
 
				+                暂定小题个数不包含小数
			
 
				+                暂定总分数中不存在有意义的小数位
			
 
				+                '''
			
 
				+                for xxx in range(len_keyword_type):
			
 
				+                    if C_s.find(keyword_type[xxx]) != -1:
			
 
				+                        for x in range(len_keyword_item1):
			
 
				+                            if C_s.find(keyword_item1[x]) != -1:
			
 
				+                                if len(N_s) == 1:
			
 
				+                                    num_index = s.index(N_s[0])
			
 
				+                                    num_infer = s[num_index - len(N_s[0])]
			
 
				+                                    num_back = s[num_index + len(N_s[0])]
			
 
				+                                    if isinstance(N_s[0], str):
			
 
				+                                        N_s[0] = int(N_s[0])
			
 
				+                                    if num_back == '分':  # 选择题/主观题，共*分
			
 
				+                                        item_total_score = N_s[0]
			
 
				+                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                        type_score_dict_ocr['volume_total_score'] = int(item_total_score)
			
 
				+                                        type_score_dict_ocr['volume_count'] = -1
			
 
				+                                        type_score_dict_ocr['volume_score'] = -1
			
 
				+                                    break
			
 
				+                                elif len(N_s) == 2:
			
 
				+                                    num_index1 = s.index(N_s[0])
			
 
				+                                    num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                    all_1 = find_repeat(s, N_s[1])
			
 
				+                                    temp1 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[1]:
			
 
				+                                            temp1 = temp1 + 1
			
 
				+                                    num_index2 = all_1[temp1]
			
 
				+                                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                    if isinstance(N_s[0], str):
			
 
				+                                        N_s[0] = int(N_s[0])
			
 
				+                                    if isinstance(N_s[1], str):
			
 
				+                                        N_s[1] = int(N_s[1])
			
 
				+                                    for y in range(len_keyword_item2):
			
 
				+                                        if C_s.find(keyword_item2[y]) != -1:
			
 
				+                                            if num_back1 == '分' and (num_infer2 == '题' or num_infer2 == '空'):  # 选择题/主观题/客观题，共*分，每题*分
			
 
				+                                                item_total_score = int(N_s[0])
			
 
				+                                                item_count = int(N_s[0] / N_s[1])
			
 
				+                                                item_score = N_s[1]
			
 
				+                                                type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif ( num_infer1 == '题' or num_infer1 == '空') and num_back2 == '分':  # 选择题/主观题，每题*分,共*分
			
 
				+                                                item_total_score = N_s[1]
			
 
				+                                                item_count = int(N_s[1] / N_s[0])
			
 
				+                                                item_score = N_s[0]
			
 
				+                                                type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                        elif y == len_keyword_item2 - 1:
			
 
				+                                            for u in range(len_keyword_item3):
			
 
				+                                                if C_s.find(keyword_item3[u]) != -1:
			
 
				+                                                    if num_back1 == '分':  # 选择题/主观题，共*分，共*题
			
 
				+                                                        item_total_score = N_s[0]
			
 
				+                                                        item_count = N_s[1]
			
 
				+                                                        item_score = N_s[0] / N_s[1]
			
 
				+                                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '分':  # 选择题/主观题，共*题,共*分
			
 
				+                                                        item_total_score = N_s[1]
			
 
				+                                                        item_count = N_s[0]
			
 
				+                                                        item_score = N_s[1] / N_s[0]
			
 
				+                                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                elif u == len_keyword_item3 - 1:
			
 
				+                                                    if num_back1 == '.' and num_infer2 == '.' and num_back2 == '分':   # *.*分
			
 
				+                                                        item_N = -1
			
 
				+                                                        item_total_score = N_s[0]
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_score'] = -1
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '分':  # *，*分
			
 
				+                                                        item_N = N_s[0]
			
 
				+                                                        item_total_score = int(N_s[1])
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_score'] = -1
			
 
				+                                                        break
			
 
				+                                            break
			
 
				+                                    break
			
 
				+                                elif len(N_s) == 3:
			
 
				+                                    num_index1 = s.index(N_s[0])
			
 
				+                                    num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                    all_1 = find_repeat(s, N_s[1])
			
 
				+                                    temp1 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[1]:
			
 
				+                                            temp1 = temp1 + 1
			
 
				+                                    num_index2 = all_1[temp1]
			
 
				+                                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                    all_2 = find_repeat(s, N_s[2])
			
 
				+                                    temp2 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[2]:
			
 
				+                                            temp2 = temp2 + 1
			
 
				+                                    for jj in range(len(N_s[1])):
			
 
				+                                        if N_s[1][jj] == N_s[2]:
			
 
				+                                            temp2 = temp2 + 1
			
 
				+                                    num_index3 = all_2[temp2]
			
 
				+                                    num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                                    num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                                    if isinstance(N_s[0], str):
			
 
				+                                        N_s[0] = int(N_s[0])
			
 
				+                                    if isinstance(N_s[1], str):
			
 
				+                                        N_s[1] = int(N_s[1])
			
 
				+                                    if isinstance(N_s[2], str):
			
 
				+                                        N_s[2] = int(N_s[2])
			
 
				+                                    for v in range(len_keyword_item2):
			
 
				+                                        if C_s.find(keyword_item2[v]) != -1:
			
 
				+                                            for w in range(len_keyword_item3):
			
 
				+                                                if C_s.find(keyword_item3[w]) != -1:
			
 
				+                                                    if (num_infer1 == '题' or num_infer1 == '空') and num_back1 == '分' and num_back3 == '分':  # 每题*分，共*题，共*分
			
 
				+                                                        item_total_score = N_s[2]
			
 
				+                                                        item_count = N_s[1]
			
 
				+                                                        item_score = N_s[0]
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif (num_infer1 == '题' or num_infer1 == '空') and num_back1 == '分' and num_back2 == '分':  # 每题*分，共*分，共*题
			
 
				+                                                        item_total_score = N_s[1]
			
 
				+                                                        item_count = N_s[2]
			
 
				+                                                        item_score = N_s[0]
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back3 == '分':  # 共*题，每题*分，共*分
			
 
				+                                                        item_total_score = N_s[2]
			
 
				+                                                        item_count = N_s[0]
			
 
				+                                                        item_score = N_s[1]
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back1 == '分':  # 共*分，每题*分，共*题
			
 
				+                                                        item_total_score = N_s[0]
			
 
				+                                                        item_count = N_s[2]
			
 
				+                                                        item_score = N_s[1]
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分' and num_back2 == '分':  # 共*题，共*分,每题*分
			
 
				+                                                        item_total_score = N_s[1]
			
 
				+                                                        item_count = N_s[0]
			
 
				+                                                        item_score = N_s[2]
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分' and num_back1 == '分':  # 共*分，共*题, 每题*分
			
 
				+                                                        item_total_score = N_s[0]
			
 
				+                                                        item_count = N_s[1]
			
 
				+                                                        item_score = N_s[2]
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                elif w == len_keyword_item3 - 1:
			
 
				+                                                    if num_back1 != '.' and num_back2 == '分' and (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分':  # *,共*分，每题*分
			
 
				+                                                        item_N = N_s[0]
			
 
				+                                                        item_total_score = N_s[1]
			
 
				+                                                        item_count = int(N_s[1] / N_s[2])
			
 
				+                                                        item_score = N_s[2]
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif num_back1 != '.' and num_back3 == '分' and (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分':  # *,每题*分,共*分，
			
 
				+                                                        item_N = N_s[0]
			
 
				+                                                        item_total_score = N_s[2]
			
 
				+                                                        item_count = int(N_s[2] / N_s[1])
			
 
				+                                                        item_score = N_s[1]
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif num_back1 == '.' and num_infer2 == '.' and num_back3 == '分' and num_back2 == '分':  # 每题*.*分,共*分/共*.*分，每题*分
			
 
				+                                                        item_N = -1
			
 
				+                                                        if int(N_s[0]) > int(N_s[2]):
			
 
				+                                                            item_total_score = N_s[0]
			
 
				+                                                            item_score = N_s[2]
			
 
				+                                                            item_count = int(item_total_score/item_score)
			
 
				+                                                        else:
			
 
				+                                                            item_total_score = N_s[2]
			
 
				+                                                            item_score = float(N_s[0]+'.'+N_s[1])
			
 
				+                                                            item_count = int(item_total_score / item_score)
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '.' and num_infer3 == '.' and num_back1 == '分' and num_back3 == '分':  # 每题*分,共*.*分/共*分，每题*.*分
			
 
				+                                                        item_N = -1
			
 
				+                                                        if int(N_s[0]) > int(N_s[1]):
			
 
				+                                                            item_total_score = N_s[0]
			
 
				+                                                            item_score = float(N_s[1]+'.'+N_s[2])
			
 
				+                                                            item_count = int(item_total_score/item_score)
			
 
				+                                                        else:
			
 
				+                                                            item_total_score = N_s[1]
			
 
				+                                                            item_score = N_s[0]
			
 
				+                                                            item_count = int(item_total_score / item_score)
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                            break
			
 
				+                                        elif v == len_keyword_item2 - 1:
			
 
				+                                            for w in range(len_keyword_item3):
			
 
				+                                                if C_s.find(keyword_item3[w]) != -1:
			
 
				+                                                    if num_back3 == '分' and num_infer3 =='.' and num_back2 =='.':  # *小题,共*.*分，
			
 
				+                                                        item_N = -1
			
 
				+                                                        item_total_score = N_s[1]
			
 
				+                                                        item_count = N_s[0]
			
 
				+                                                        item_score = N_s[1]/N_s[0]
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '分' and num_infer2 =='.'and num_back1 =='.':  # 共*.*分，*小题
			
 
				+                                                        item_N = -1
			
 
				+                                                        item_total_score = N_s[0]
			
 
				+                                                        item_count = N_s[2]
			
 
				+                                                        item_score = N_s[0]/N_s[2]
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif num_back3 == '分' and num_infer3 !='.':  # *,*小题,共*分，
			
 
				+                                                        item_N = N_s[0]
			
 
				+                                                        item_total_score = N_s[2]
			
 
				+                                                        item_count = N_s[1]
			
 
				+                                                        item_score = N_s[2]/N_s[1]
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '分' and num_infer2 !='.':  # *,共*分，共*小题
			
 
				+                                                        item_N = N_s[0]
			
 
				+                                                        item_total_score = N_s[1]
			
 
				+                                                        item_count = N_s[2]
			
 
				+                                                        item_score = N_s[1] / N_s[2]
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                elif w == len_keyword_item3-1:
			
 
				+                                                    if num_back3 == '分' and num_infer3 =='.' and num_back2 =='.':  # *,共*.*分，
			
 
				+                                                        item_N = N_s[0]
			
 
				+                                                        item_total_score = N_s[1]
			
 
				+                                                        item_count = -1
			
 
				+                                                        item_score = -1
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif num_back3 == '分':
			
 
				+                                                        item_total_score = N_s[1]
			
 
				+                                                        item_N = -1
			
 
				+                                                        item_count = -1
			
 
				+                                                        item_score = -1
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                            break
			
 
				+                                    break
			
 
				+                                elif len(N_s) == 4:
			
 
				+                                    num_index1 = s.index(N_s[0])
			
 
				+                                    num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                    all_1 = find_repeat(s, N_s[1])
			
 
				+                                    temp1 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[1]:
			
 
				+                                            temp1 = temp1 + 1
			
 
				+                                    num_index2 = all_1[temp1]
			
 
				+                                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                    all_2 = find_repeat(s, N_s[2])
			
 
				+                                    temp2 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[2]:
			
 
				+                                            temp2 = temp2 + 1
			
 
				+                                    for jj in range(len(N_s[1])):
			
 
				+                                        if N_s[1][jj] == N_s[2]:
			
 
				+                                            temp2 = temp2 + 1
			
 
				+                                    num_index3 = all_2[temp2]
			
 
				+                                    num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                                    num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                                    all_3 = find_repeat(s, N_s[3])
			
 
				+                                    temp3 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[3]:
			
 
				+                                            temp3 = temp3 + 1
			
 
				+                                    for jj in range(len(N_s[1])):
			
 
				+                                        if N_s[1][jj] == N_s[3]:
			
 
				+                                            temp3 = temp3 + 1
			
 
				+                                    for kk in range(len(N_s[2])):
			
 
				+                                        if N_s[2][kk] == N_s[3]:
			
 
				+                                            temp3 = temp3 + 1
			
 
				+                                    num_index4 = all_3[temp3]
			
 
				+                                    num_infer4 = s[num_index4 - len(N_s[3])]
			
 
				+                                    num_back4 = s[num_index4 + len(N_s[3])]
			
 
				+                                    if isinstance(N_s[0], str):
			
 
				+                                        N_s[0] = int(N_s[0])
			
 
				+                                    if isinstance(N_s[1], str):
			
 
				+                                        N_s[1] = int(N_s[1])
			
 
				+                                    if isinstance(N_s[2], str):
			
 
				+                                        N_s[2] = int(N_s[2])
			
 
				+                                    if isinstance(N_s[3], str):
			
 
				+                                        N_s[3] = int(N_s[3])
			
 
				+                                    for y in range(len_keyword_item1):
			
 
				+                                        if C_s.find(keyword_item1[y]) != -1:
			
 
				+                                            for z in range(len_keyword_item2):
			
 
				+                                                if C_s.find(keyword_item2[z]) != -1:
			
 
				+                                                    for u in range(len_keyword_item3):
			
 
				+                                                        if C_s.find(keyword_item3[u]) != -1:
			
 
				+                                                            if (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back4 == '分':  # *,每题*分，共*题，共*分
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                item_total_score = N_s[3]
			
 
				+                                                                item_count = N_s[2]
			
 
				+                                                                item_score = N_s[1]
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr[
			
 
				+                                                                    'volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back3 == '分':  # *,每题*分，共*分，共*题
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                item_total_score = N_s[2]
			
 
				+                                                                item_count = N_s[3]
			
 
				+                                                                item_score = N_s[1]
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                if item_total_score < item_count * item_score:
			
 
				+                                                                    item_total_score = item_count * item_score
			
 
				+                                                                type_score_dict_ocr[
			
 
				+                                                                    'volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分' and num_back4 == '分':  # *,共*题，每题*分，共*分
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                item_total_score = N_s[3]
			
 
				+                                                                item_count = N_s[1]
			
 
				+                                                                item_score = N_s[2]
			
 
				+                                                                if item_total_score < item_count * item_score:
			
 
				+                                                                    item_total_score = item_count * item_score
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr[
			
 
				+                                                                    'volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分' and num_back2 == '分':  # *,共*分，每题*分，共*题
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                item_total_score = N_s[1]
			
 
				+                                                                item_count = N_s[3]
			
 
				+                                                                item_score = N_s[2]
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr[
			
 
				+                                                                    'volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif (num_infer4 == '题' or num_infer4 == '空') and num_back4 == '分' and num_back3 == '分':  # *,共*题，共*分,每题*分
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                item_total_score = N_s[2]
			
 
				+                                                                item_count = N_s[1]
			
 
				+                                                                item_score = N_s[3]
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                if item_total_score < item_count * item_score:
			
 
				+                                                                    item_total_score = item_count * item_score
			
 
				+                                                                type_score_dict_ocr[
			
 
				+                                                                    'volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif (num_infer4 == '题' or num_infer4 == '空') and num_back4 == '分' and num_back2 == '分':  # *,共*分，共*题, 每题*分
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                item_total_score = N_s[1]
			
 
				+                                                                item_count = N_s[2]
			
 
				+                                                                item_score = N_s[3]
			
 
				+                                                                type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                                if item_total_score < item_count * item_score:
			
 
				+                                                                    item_total_score = item_count * item_score
			
 
				+                                                                type_score_dict_ocr[
			
 
				+                                                                    'item_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back4 == '分' :  # 共*.*分，共*题, 每题*分/每题*.*分，共*题,共*分
			
 
				+                                                                item_N = -1
			
 
				+                                                                if N_s[0] > N_s[3]:
			
 
				+                                                                    item_total_score = N_s[0]
			
 
				+                                                                    item_score = N_s[3]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[3]
			
 
				+                                                                    item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr[ 'item_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back3== '.' and num_infer4== '.' and num_back1 == '分'and num_back4 == '分' :  # 共*分，共*题, 每题*.*分/每题*分，共*题,共*.*分
			
 
				+                                                                item_N = -1
			
 
				+                                                                if N_s[0] > N_s[2]:
			
 
				+                                                                    item_total_score = N_s[0]
			
 
				+                                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[2]
			
 
				+                                                                    item_score = N_s[0]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr[ 'item_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back4 == '分' :  # 共*题，共*.*分,每题*分/共*题，每题*.*分,共*分
			
 
				+                                                                item_N = -1
			
 
				+                                                                if N_s[1] > N_s[3]:
			
 
				+                                                                    item_total_score = N_s[1]
			
 
				+                                                                    item_score = N_s[3]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[3]
			
 
				+                                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back3== '.' and num_infer4== '.' and num_back4 == '分'and num_back2 == '分' :  # 共*题，共*分,每题*.*分/共*题，每题*分,共*.*分
			
 
				+                                                                item_N = -1
			
 
				+                                                                if N_s[1] > N_s[2]:
			
 
				+                                                                    item_total_score = N_s[1]
			
 
				+                                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[2]
			
 
				+                                                                    item_score = N_s[1]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back3 == '分' :  # 每题*.*分，共*分，共*题/共*.*分，每题*分，共*题
			
 
				+                                                                item_N = -1
			
 
				+                                                                if N_s[0] > N_s[2]:
			
 
				+                                                                    item_total_score = N_s[0]
			
 
				+                                                                    item_score = N_s[2]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[2]
			
 
				+                                                                    item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back1 == '分' :  # 每题*分，共*.*分，共*题/共*分，每题*.*分，共*题
			
 
				+                                                                item_N = -1
			
 
				+                                                                if N_s[0] > N_s[1]:
			
 
				+                                                                    item_total_score = N_s[0]
			
 
				+                                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[1]
			
 
				+                                                                    item_score = N_s[0]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                        elif u == len_keyword_item3-1:
			
 
				+                                                            if num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back4 == '分' :  # *,共*.*分， 每题*分/每题*.*分，共*分
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                if N_s[1] > N_s[3]:
			
 
				+                                                                    item_total_score = N_s[1]
			
 
				+                                                                    item_score = N_s[3]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[3]
			
 
				+                                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr[ 'item_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back3== '.' and num_infer4== '.' and num_back2 == '分'and num_back4 == '分' :  # *,共*分， 每题*.*分/*,每题*分，共*.*分
			
 
				+                                                                item_N = int(N_s[0])
			
 
				+                                                                if N_s[1] > N_s[2]:
			
 
				+                                                                    item_total_score = N_s[1]
			
 
				+                                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[2]
			
 
				+                                                                    item_score = N_s[1]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr[ 'item_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back4 == '分' :  # *,共*.*分,每题*分/*，每题*.*分,共*分
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                if N_s[1] > N_s[3]:
			
 
				+                                                                    item_total_score = N_s[1]
			
 
				+                                                                    item_score = N_s[3]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[3]
			
 
				+                                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back3== '.' and num_infer4== '.' and num_back4 == '分'and num_back2 == '分' :  # *，共*分,每题*.*分/*，每题*分,共*.*分
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                if N_s[1] > N_s[2]:
			
 
				+                                                                    item_total_score = N_s[1]
			
 
				+                                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[2]
			
 
				+                                                                    item_score = N_s[1]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back3 == '分' :  # *,每题*.*分，共*分/*,共*.*分，每题*分
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                if N_s[1] > N_s[3]:
			
 
				+                                                                    item_total_score = N_s[1]
			
 
				+                                                                    item_score = N_s[3]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[3]
			
 
				+                                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+                                                            elif num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back1 == '分' :  # *,每题*分，共*.*分/*,共*分，每题*.*分
			
 
				+                                                                item_N = N_s[0]
			
 
				+                                                                if N_s[1] > N_s[2]:
			
 
				+                                                                    item_total_score = N_s[1]
			
 
				+                                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = N_s[2]
			
 
				+                                                                    item_score = N_s[1]
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                                break
			
 
				+
			
 
				+                                                    break
			
 
				+                                            break
			
 
				+                                        elif y == len_keyword_item1 - 1 and num_back4 == '分':
			
 
				+                                            item_total_score = N_s[3]
			
 
				+                                            item_N = -1
			
 
				+                                            item_score = -1
			
 
				+                                            item_count = -1
			
 
				+                                            type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                            type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                            type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                            type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                            break
			
 
				+                                    break
			
 
				+                                break
			
 
				+                            elif x == len_keyword_item1 - 1:
			
 
				+                                for y in range(len_keyword_item2):
			
 
				+                                    if C_s.find(keyword_item2[y]) != -1:
			
 
				+                                        if len(N_s) == 1:
			
 
				+                                            num_index1 = s.index(N_s[0])
			
 
				+                                            num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                            num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                            if isinstance(N_s[0], str):
			
 
				+                                                N_s[0] = int(N_s[0])
			
 
				+                                            if num_back1 == '分':  # 每题*分
			
 
				+                                                item_score = N_s[0]
			
 
				+                                                type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = -1
			
 
				+                                                type_score_dict_ocr['volume_count'] = -1
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                            break
			
 
				+                                        if len(N_s) == 2:
			
 
				+                                            num_index1 = s.index(N_s[0])
			
 
				+                                            num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                            num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                            all_1 = find_repeat(s, N_s[1])
			
 
				+                                            temp1 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[1]:
			
 
				+                                                    temp1 = temp1 + 1
			
 
				+                                            num_index2 = all_1[temp1]
			
 
				+                                            num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                            num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                            if isinstance(N_s[0], str):
			
 
				+                                                N_s[0] = int(N_s[0])
			
 
				+                                            if isinstance(N_s[1], str):
			
 
				+                                                N_s[1] = int(N_s[1])
			
 
				+                                            for z in range(len(keyword_item3)):
			
 
				+                                                if C_s.find(keyword_item3[z]) != -1:
			
 
				+                                                    if num_back2 == '分':  # 共*题，每题*分
			
 
				+                                                        item_total_score = N_s[0] * N_s[1]
			
 
				+                                                        item_count = N_s[0]
			
 
				+                                                        item_score = N_s[1]
			
 
				+                                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif num_back1 == '分':  # 每题*分，共*题
			
 
				+                                                        item_total_score = int(N_s[0]) * int(N_s[1])
			
 
				+                                                        item_count = int(N_s[1])
			
 
				+                                                        item_score = int(N_s[0])
			
 
				+                                                        type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                elif z == len(keyword_item3) - 1:
			
 
				+                                                    if num_back2 == '分' and num_back1 == '.' and num_infer2 == '.':  # *.*分
			
 
				+                                                        item_N = -1
			
 
				+                                                        item_score = float(N_s[0]+'.'+N_s[1])
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_count'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '分':  # *,*分
			
 
				+                                                        item_N = int(N_s[0])
			
 
				+                                                        item_score = int(N_s[1])
			
 
				+                                                        type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['volume_total_score'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_count'] = -1
			
 
				+                                                        type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                        break
			
 
				+                                            break
			
 
				+                                        if len(N_s) == 3:
			
 
				+                                            num_index1 = s.index(N_s[0])
			
 
				+                                            num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                            num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                            all_1 = find_repeat(s, N_s[1])
			
 
				+                                            temp1 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[1]:
			
 
				+                                                    temp1 = temp1 + 1
			
 
				+                                            num_index2 = all_1[temp1]
			
 
				+                                            num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                            num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                            all_2 = find_repeat(s, N_s[2])
			
 
				+                                            temp2 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[2]:
			
 
				+                                                    temp2 = temp2 + 1
			
 
				+                                            for jj in range(len(N_s[1])):
			
 
				+                                                if N_s[1][jj] == N_s[2]:
			
 
				+                                                    temp2 = temp2 + 1
			
 
				+                                            num_index3 = all_2[temp2]
			
 
				+                                            num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                                            num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                                            if isinstance(N_s[0], str):
			
 
				+                                                N_s[0] = int(N_s[0])
			
 
				+                                            if isinstance(N_s[1], str):
			
 
				+                                                N_s[1] = int(N_s[1])
			
 
				+                                            if isinstance(N_s[2], str):
			
 
				+                                                N_s[2] = int(N_s[2])
			
 
				+                                            if num_back3 == '分' and (num_back2 == '题' or num_back2 == '小' or num_back2 == '空') and num_back1 != '分':  # *，共*题，每题*分
			
 
				+                                                item_N = int(N_s[0])
			
 
				+                                                item_total_score = int(N_s[1]) * int(N_s[2])
			
 
				+                                                item_count = int(N_s[1])
			
 
				+                                                item_score = int(N_s[2])
			
 
				+                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif num_back2 == '分' and (num_back3 == '题' or num_back3 == '小' or num_back3 == '空') and num_back1 != '分':  # *，每题*分，共*题
			
 
				+                                                item_N = int(N_s[0])
			
 
				+                                                item_total_score = int(N_s[1]) * int(N_s[2])
			
 
				+                                                item_count = int(N_s[2])
			
 
				+                                                item_score = int(N_s[1])
			
 
				+                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif num_infer3 == '.' and num_back3 == '分' and num_back2 == '.':  # 共*题，每题*.*分
			
 
				+                                                item_N = -1
			
 
				+                                                item_count = int(N_s[0])
			
 
				+                                                item_score = float(N_s[1]+'.'+N_s[2])
			
 
				+                                                item_total_score = int(item_count * item_score)
			
 
				+                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif num_infer2 == '.' and num_back2 == '分' and num_back1 == '.' :  # 每题*.*分，共*题
			
 
				+                                                item_N = -1
			
 
				+                                                item_count = int(N_s[2])
			
 
				+                                                item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                item_total_score = int(item_count * item_score)
			
 
				+                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+                                            elif num_back3 == '分': # * * ，每题*分
			
 
				+                                                item_N = -1
			
 
				+                                                item_count = -1
			
 
				+                                                item_score = -1
			
 
				+                                                item_total_score = int(N_s[2])
			
 
				+                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                                break
			
 
				+
			
 
				+                                            break
			
 
				+                                        if len(N_s) == 4:
			
 
				+                                            num_index1 = s.index(N_s[0])
			
 
				+                                            num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                            num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                            all_1 = find_repeat(s, N_s[1])
			
 
				+                                            temp1 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[1]:
			
 
				+                                                    temp1 = temp1 + 1
			
 
				+                                            num_index2 = all_1[temp1]
			
 
				+                                            num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                            num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                            all_2 = find_repeat(s, N_s[2])
			
 
				+                                            temp2 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[2]:
			
 
				+                                                    temp2 = temp2 + 1
			
 
				+                                            for jj in range(len(N_s[1])):
			
 
				+                                                if N_s[1][jj] == N_s[2]:
			
 
				+                                                    temp2 = temp2 + 1
			
 
				+                                            num_index3 = all_2[temp2]
			
 
				+                                            num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                                            num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                                            all_3 = find_repeat(s, N_s[3])
			
 
				+                                            temp3 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[3]:
			
 
				+                                                    temp3 = temp3 + 1
			
 
				+                                            for jj in range(len(N_s[1])):
			
 
				+                                                if N_s[1][jj] == N_s[3]:
			
 
				+                                                    temp3 = temp3 + 1
			
 
				+                                            num_index4 = all_3[temp3]
			
 
				+                                            num_infer4 = s[num_index4 - len(N_s[3])]
			
 
				+                                            num_back4 = s[num_index4 + len(N_s[3])]
			
 
				+                                            if isinstance(N_s[0], str):
			
 
				+                                                N_s[0] = int(N_s[0])
			
 
				+                                            if isinstance(N_s[1], str):
			
 
				+                                                N_s[1] = int(N_s[1])
			
 
				+                                            if isinstance(N_s[2], str):
			
 
				+                                                N_s[2] = int(N_s[2])
			
 
				+                                            if isinstance(N_s[3], str):
			
 
				+                                                N_s[3] = int(N_s[3])
			
 
				+                                            if num_back3 == '.' and num_infer4 == '.' and num_back4 == '分' and (num_back2 == '题' or num_back2 == '小' or num_back2 == '空') and num_back1 != '分':  # *，共*题，每题*.*分
			
 
				+                                                item_N = int(N_s[0])
			
 
				+                                                item_count = int(N_s[1])
			
 
				+                                                item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                item_total_score = int(item_count * item_score)
			
 
				+                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                            elif num_back2 == '.' and num_infer3 == '.' and num_back3 == '分' and (num_back4 == '题' or num_back4 == '小' or num_back4 == '空') and num_back1 != '分':  # *，每题*.*分，共*题
			
 
				+                                                item_N = int(N_s[0])
			
 
				+                                                item_count = int(N_s[3])
			
 
				+                                                item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                item_total_score = int(item_count * item_score)
			
 
				+                                                type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = item_count
			
 
				+                                                type_score_dict_ocr['volume_score'] = item_score
			
 
				+                                            break
			
 
				+                                    elif y == len_keyword_item2 - 1:
			
 
				+                                        if C_s.find(keyword_item4[0]) != -1:
			
 
				+                                            if len(N_s) == 2:  # *，*分
			
 
				+                                                num_index1 = s.index(N_s[0])
			
 
				+                                                num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                                num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                                all_1 = find_repeat(s, N_s[1])
			
 
				+                                                temp1 = 0
			
 
				+                                                for ii in range(len(N_s[0])):
			
 
				+                                                    if N_s[0][ii] == N_s[1]:
			
 
				+                                                        temp1 = temp1 + 1
			
 
				+                                                num_index2 = all_1[temp1]
			
 
				+                                                num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                                num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                                if isinstance(N_s[0], str):
			
 
				+                                                    N_s[0] = int(N_s[0])
			
 
				+                                                if isinstance(N_s[1], str):
			
 
				+                                                    N_s[1] = int(N_s[1])
			
 
				+                                                if num_back1 == '分':
			
 
				+                                                    item_N = -1
			
 
				+                                                    item_total_score = int(N_s[0])
			
 
				+                                                    type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = -1
			
 
				+                                                    type_score_dict_ocr['volume_score'] = -1
			
 
				+                                                elif num_back2 == '分':
			
 
				+                                                    item_N = int(N_s[0])
			
 
				+                                                    item_total_score = int(N_s[1])
			
 
				+                                                    type_score_dict_ocr['volume_N'] = item_N
			
 
				+                                                    type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                    type_score_dict_ocr['volume_count'] = -1
			
 
				+                                                    type_score_dict_ocr['volume_score'] = -1
			
 
				+                                                break
			
 
				+                                            elif len(N_s) == 1:
			
 
				+                                                num_index1 = s.index(N_s[0])
			
 
				+                                                num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                                num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                                item_total_score = int(N_s[0])
			
 
				+                                                type_score_dict_ocr['volume_N'] = -1
			
 
				+                                                type_score_dict_ocr['volume_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['volume_count'] = -1
			
 
				+                                                type_score_dict_ocr['volume_score'] = -1
			
 
				+                                                # if num_back1 == '分':  # *分
			
 
				+                                                #     test_9_23=0
			
 
				+                                                break
			
 
				+                                break
			
 
				+                            else:
			
 
				+                                type_score_dict_ocr['volume_N'] = -1
			
 
				+                                type_score_dict_ocr['volume_total_score'] = -1
			
 
				+                                type_score_dict_ocr['volume_count'] = -1
			
 
				+                                type_score_dict_ocr['volume_score'] = -1
			
 
				+
			
 
				+                        if 'volume_N' not in type_score_dict_ocr.keys():
			
 
				+                            all_structure = {'volume_structure': -1,
			
 
				+                                             'Score_structure': -1}
			
 
				+                            break
			
 
				+                        else:
			
 
				+                            if C_s.find(keyword_type[1]) != -1:
			
 
				+                                type_score_dict_ocr['keyword_type'] = keyword_type[1]
			
 
				+                            elif C_s.find(keyword_type[0]) != -1:
			
 
				+                                type_score_dict_ocr['keyword_type'] = keyword_type[0]
			
 
				+                                Score_structure_item = type_score_dict_ocr
			
 
				+                                Score_structure.append(Score_structure_item)
			
 
				+                            elif C_s.find(keyword_type[xxx]) != -1:
			
 
				+                                type_score_dict_ocr['keyword_type'] = keyword_type[xxx]
			
 
				+                            elif xxx == len_keyword_type - 1:
			
 
				+                                type_score_dict_ocr['keyword_type'] = -2
			
 
				+                                type_score_dict_ocr['item_N'] = type_score_dict_ocr.pop(
			
 
				+                                    'volume_N')
			
 
				+                                type_score_dict_ocr[
			
 
				+                                    'item_total_score'] = type_score_dict_ocr.pop(
			
 
				+                                    'volume_total_score')
			
 
				+                                type_score_dict_ocr['item_count'] = type_score_dict_ocr.pop(
			
 
				+                                    'volume_count')
			
 
				+                                type_score_dict_ocr['item_score'] = type_score_dict_ocr.pop(
			
 
				+                                    'volume_score')
			
 
				+                                Score_structure_item = type_score_dict_ocr
			
 
				+                                Score_structure.append(Score_structure_item)
			
 
				+                            volume_structure_item = type_score_dict_ocr
			
 
				+                            volume_structure.append(volume_structure_item)
			
 
				+                            if Score_structure == []:
			
 
				+                                all_structure = {'volume_structure': volume_structure,
			
 
				+                                                 'Score_structure': -1}
			
 
				+                            elif Score_structure[0]['keyword_type'] != -2:
			
 
				+                                all_structure = {'volume_structure': volume_structure,
			
 
				+                                                 'Score_structure': Score_structure}
			
 
				+                            else:
			
 
				+                                all_structure = {'volume_structure': -1,
			
 
				+                                                 'Score_structure': Score_structure}
			
 
				+                            break
			
 
				+
			
 
				+                    elif xxx == len_keyword_type - 1:
			
 
				+                        for x in range(len_keyword_item1):
			
 
				+                            if C_s.find(keyword_item1[x]) != -1:
			
 
				+                                if len(N_s) == 1:
			
 
				+                                    num_index = s.index(N_s[0])
			
 
				+                                    num_infer = s[num_index - len(N_s[0])]
			
 
				+                                    num_back = s[num_index + len(N_s[0])]
			
 
				+                                    if isinstance(N_s[0], str):
			
 
				+                                        N_s[0] = int(N_s[0])
			
 
				+                                    if num_back == '分':  # 共*分
			
 
				+                                        item_total_score = N_s[0]
			
 
				+                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                        type_score_dict_ocr['item_count'] = -1
			
 
				+                                        type_score_dict_ocr['item_score'] = -1
			
 
				+                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                        Score_structure.append(Score_structure_item)
			
 
				+                                        all_structure = {'volume_structure': -1,
			
 
				+                                                         'Score_structure': Score_structure}
			
 
				+                                        break
			
 
				+                                    break
			
 
				+                                elif len(N_s) == 2:
			
 
				+                                    num_index1 = s.index(N_s[0])
			
 
				+                                    num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                    all_1 = find_repeat(s, N_s[1])
			
 
				+                                    temp1 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[1]:
			
 
				+                                            temp1 = temp1 + 1
			
 
				+                                    num_index2 = all_1[temp1]
			
 
				+                                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                    if isinstance(N_s[0], str):
			
 
				+                                        N_s[0] = int(N_s[0])
			
 
				+                                    if isinstance(N_s[1], str):
			
 
				+                                        N_s[1] = int(N_s[1])
			
 
				+                                    for y in range(len_keyword_item2):
			
 
				+                                        if C_s.find(keyword_item2[y]) != -1:
			
 
				+                                            if num_back1 == '分' and (num_infer2 == '题' or num_infer2 == '空'):  # 共*分，每题*分
			
 
				+                                                item_total_score = int(N_s[0])
			
 
				+                                                item_count = int(N_s[0]) / int(N_s[1])
			
 
				+                                                item_score = int(N_s[1])
			
 
				+                                                type_score_dict_ocr['item_N'] = -1
			
 
				+                                                type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                Score_structure_item = type_score_dict_ocr
			
 
				+                                                Score_structure.append(Score_structure_item)
			
 
				+                                                all_structure = {'volume_structure': -1,
			
 
				+                                                                 'Score_structure': Score_structure}
			
 
				+                                                break
			
 
				+                                            elif (num_infer1 == '题' or num_infer1 == '空') and num_back2 == '分':  # 每题*分,共*分
			
 
				+                                                item_total_score = int(N_s[1])
			
 
				+                                                item_count = int(N_s[1]) / int(N_s[0])
			
 
				+                                                item_score = int(N_s[0])
			
 
				+                                                type_score_dict_ocr['item_N'] = -1
			
 
				+                                                type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                Score_structure_item = type_score_dict_ocr
			
 
				+                                                Score_structure.append(Score_structure_item)
			
 
				+                                                all_structure = {'volume_structure': -1,
			
 
				+                                                                 'Score_structure': Score_structure}
			
 
				+                                                break
			
 
				+                                            break
			
 
				+                                        elif y == len_keyword_item2 - 1:
			
 
				+                                            for u in range(len_keyword_item3):
			
 
				+                                                if C_s.find(keyword_item3[u]) != -1:
			
 
				+                                                    if num_back1 == '分':  # 共*分，共*题
			
 
				+                                                        item_total_score = int(N_s[0])
			
 
				+                                                        item_count = int(N_s[1])
			
 
				+                                                        item_score = int(N_s[0]) / int(N_s[1])
			
 
				+                                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '分':  # 共*题,共*分
			
 
				+                                                        item_total_score = int(N_s[1])
			
 
				+                                                        item_count = int(N_s[0])
			
 
				+                                                        item_score = int(N_s[1]) / int(N_s[0])
			
 
				+                                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                elif u == len_keyword_item3 - 1:
			
 
				+                                                    if num_back2 == '分' and num_infer2 == '.'and num_back1 == '.':  # *.*分
			
 
				+                                                        item_N = -1
			
 
				+                                                        item_total_score = int(N_s[0])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = -1
			
 
				+                                                        type_score_dict_ocr['item_score'] = -1
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '分':
			
 
				+                                                        item_N = int(N_s[0])
			
 
				+                                                        item_total_score = int(N_s[1])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = -1
			
 
				+                                                        type_score_dict_ocr['item_score'] = -1
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+
			
 
				+                                    break
			
 
				+                                elif len(N_s) == 3:
			
 
				+                                    num_index1 = s.index(N_s[0])
			
 
				+                                    num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                    all_1 = find_repeat(s, N_s[1])
			
 
				+                                    temp1 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[1]:
			
 
				+                                            temp1 = temp1 + 1
			
 
				+                                    num_index2 = all_1[temp1]
			
 
				+                                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                    all_2 = find_repeat(s, N_s[2])
			
 
				+                                    temp2 = 0
			
 
				+
			
 
				+                                    if len(N_s[0]) == len(N_s[2]):
			
 
				+                                        if N_s[0] == N_s[2]:
			
 
				+                                            temp2 = temp2 + 1
			
 
				+                                    else:
			
 
				+                                        for ii in range(len(N_s[0])):
			
 
				+                                            if N_s[0][ii] == N_s[2]:
			
 
				+                                                temp2 = temp2 + 1
			
 
				+                                    if len(N_s[1]) == len(N_s[2]):
			
 
				+                                        if N_s[1] == N_s[2]:
			
 
				+                                            temp2 = temp2 + 1
			
 
				+                                    else:
			
 
				+                                        for jj in range(len(N_s[1])):
			
 
				+                                            if N_s[1][jj] == N_s[2]:
			
 
				+                                                temp2 = temp2 + 1
			
 
				+                                    num_index3 = all_2[temp2]
			
 
				+                                    num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                                    if num_index3 + len(N_s[2]) < len(s):
			
 
				+                                        num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                                    else:
			
 
				+                                        num_back3 = []
			
 
				+                                    if isinstance(N_s[0], str):
			
 
				+                                        N_s[0] = int(N_s[0])
			
 
				+                                    if isinstance(N_s[1], str):
			
 
				+                                        N_s[1] = int(N_s[1])
			
 
				+                                    if isinstance(N_s[2], str):
			
 
				+                                        N_s[2] = int(N_s[2])
			
 
				+                                    for v in range(len_keyword_item2):
			
 
				+                                        if C_s.find(keyword_item2[v]) != -1:
			
 
				+                                            for w in range(len_keyword_item3):
			
 
				+                                                if C_s.find(keyword_item3[w]) != -1:
			
 
				+                                                    if (num_infer1 == '题' or num_infer1 == '空') and num_back1 == '分' and num_back3 == '分':  # 每题*分，共*题，共*分
			
 
				+                                                        item_total_score = int(N_s[2])
			
 
				+                                                        item_count = int(N_s[1])
			
 
				+                                                        item_score = int(N_s[0])
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif (num_infer1 == '题' or num_infer1 == '空') and num_back1 == '分' and num_back2 == '分':  # 每题*分，共*分，共*题
			
 
				+                                                        item_total_score = int(N_s[1])
			
 
				+                                                        item_count = int(N_s[2])
			
 
				+                                                        item_score = int(N_s[0])
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back3 == '分':  # 共*题，每题*分，共*分
			
 
				+                                                        item_total_score = int(N_s[2])
			
 
				+                                                        item_count = int(N_s[0])
			
 
				+                                                        item_score = int(N_s[1])
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back1 == '分':  # 共*分，每题*分，共*题
			
 
				+                                                        item_total_score = int(N_s[0])
			
 
				+                                                        item_count = int(N_s[2])
			
 
				+                                                        item_score = int(N_s[1])
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分' and num_back2 == '分':  # 共*题，共*分,每题*分
			
 
				+                                                        item_total_score = int(N_s[1])
			
 
				+                                                        item_count = int(N_s[0])
			
 
				+                                                        item_score = int(N_s[2])
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分' and num_back1 == '分':  # 共*分，共*题, 每题*分
			
 
				+                                                        item_total_score = int(N_s[0])
			
 
				+                                                        item_count = int(N_s[1])
			
 
				+                                                        item_score = int(N_s[2])
			
 
				+                                                        if item_total_score < item_count * item_score:
			
 
				+                                                            item_total_score = item_count * item_score
			
 
				+                                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                elif w == len_keyword_item3 - 1:
			
 
				+                                                    if num_back2 == '分' and (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分':  # *,共*分，每题*分
			
 
				+                                                        item_N = int(N_s[0])
			
 
				+                                                        item_total_score = int(N_s[1])
			
 
				+                                                        item_count = int(N_s[1]) / int(N_s[2])
			
 
				+                                                        item_score = int(N_s[2])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif num_back3 == '分' and (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分':  # *,每题*分,共*分，
			
 
				+                                                        item_N = int(N_s[0])
			
 
				+                                                        item_total_score = int(N_s[2])
			
 
				+                                                        item_count = int(N_s[2]) / int(N_s[1])
			
 
				+                                                        item_score = int(N_s[1])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif num_back3 == '分' and num_infer2 == '.' and num_back1 == '.' and num_back2 == '分':  # 每题*.*分,共*分/共*.*分,每题*分
			
 
				+                                                        item_N = -1
			
 
				+                                                        if int(N_s[0]) > int(N_s[2]):
			
 
				+                                                            item_total_score = int(N_s[0])
			
 
				+                                                            item_score = int(N_s[2])
			
 
				+                                                            item_count = int(item_total_score / item_score)
			
 
				+                                                        else:
			
 
				+                                                            item_total_score = int(N_s[2])
			
 
				+                                                            item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                            item_count = int(item_total_score / item_score)
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif num_back3 == '分' and num_infer3 == '.' and num_back2 == '.' and num_back1 == '分':  # 每题*分,共*.*分/共*分,每题*.*分
			
 
				+                                                        item_N = -1
			
 
				+                                                        if int(N_s[0]) > int(N_s[2]):
			
 
				+                                                            item_total_score = int(N_s[0])
			
 
				+                                                            item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                            item_count = int(item_total_score / item_score)
			
 
				+                                                        else:
			
 
				+                                                            item_total_score = int(N_s[1])
			
 
				+                                                            item_score = int(N_s[0])
			
 
				+                                                            item_count = int(item_total_score / item_score)
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                            break
			
 
				+                                        elif v == len_keyword_item2 - 1:
			
 
				+                                            for w in range(len_keyword_item3):
			
 
				+                                                if C_s.find(keyword_item3[w]) != -1:
			
 
				+                                                    if num_back3 == '分' and num_infer3 == '.'and num_back2 == '.':  # *小题,共*.*分，
			
 
				+                                                        item_N = -1
			
 
				+                                                        item_total_score = int(N_s[1])
			
 
				+                                                        item_count = int(N_s[0])
			
 
				+                                                        item_score = int(N_s[1]) / int(N_s[0])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '分' and num_infer2 == '.'and num_back1 == '.':  # 共*.*分，*小题
			
 
				+                                                        item_N = -1
			
 
				+                                                        item_total_score = int(N_s[0])
			
 
				+                                                        item_count = int(N_s[1])
			
 
				+                                                        item_score = int(N_s[0]) / int(N_s[2])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '分':  # *,*小题,共*分，
			
 
				+                                                        item_N = int(N_s[0])
			
 
				+                                                        item_total_score = int(N_s[1])
			
 
				+                                                        item_count = int(N_s[2])
			
 
				+                                                        item_score = int(N_s[1]) / int(N_s[2])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif num_back3 == '分':  # *,共*分，共*小题
			
 
				+                                                        item_N = int(N_s[0])
			
 
				+                                                        item_total_score = int(N_s[2])
			
 
				+                                                        item_count = int(N_s[1])
			
 
				+                                                        item_score = int(N_s[2]) / int(N_s[1])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                elif w == len_keyword_item3 - 1:
			
 
				+                                                    if num_back3 == '分' and num_infer3 == '.'and num_back2 == '.':  # *,共*.*分，
			
 
				+                                                        item_N = int(N_s[0])
			
 
				+                                                        item_total_score = int(N_s[1])
			
 
				+                                                        item_count = -1
			
 
				+                                                        item_score = -1
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                            break
			
 
				+                                    break
			
 
				+                                elif len(N_s) == 4:
			
 
				+                                    num_index1 = s.index(N_s[0])
			
 
				+                                    num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                    all_1 = find_repeat(s, N_s[1])
			
 
				+                                    temp1 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[1]:
			
 
				+                                            temp1 = temp1 + 1
			
 
				+                                    num_index2 = all_1[temp1]
			
 
				+                                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                    all_2 = find_repeat(s, N_s[2])
			
 
				+                                    temp2 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[2]:
			
 
				+                                            temp2 = temp2 + 1
			
 
				+                                    for jj in range(len(N_s[1])):
			
 
				+                                        if N_s[1][jj] == N_s[2]:
			
 
				+                                            temp2 = temp2 + 1
			
 
				+                                    num_index3 = all_2[temp2]
			
 
				+                                    num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                                    num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                                    all_3 = find_repeat(s, N_s[3])
			
 
				+                                    temp3 = 0
			
 
				+                                    for ii in range(len(N_s[0])):
			
 
				+                                        if N_s[0][ii] == N_s[3]:
			
 
				+                                            temp3 = temp3 + 1
			
 
				+                                    for jj in range(len(N_s[1])):
			
 
				+                                        if N_s[1][jj] == N_s[3]:
			
 
				+                                            temp3 = temp3 + 1
			
 
				+                                    for kk in range(len(N_s[2])):
			
 
				+                                        if N_s[2][kk] == N_s[3]:
			
 
				+                                            temp3 = temp3 + 1
			
 
				+                                    num_index4 = all_3[temp3]
			
 
				+                                    num_infer4 = s[num_index4 - len(N_s[3])]
			
 
				+                                    num_back4 = s[num_index4 + len(N_s[3])]
			
 
				+                                    if isinstance(N_s[0], str):
			
 
				+                                        N_s[0] = int(N_s[0])
			
 
				+                                    if isinstance(N_s[1], str):
			
 
				+                                        N_s[1] = int(N_s[1])
			
 
				+                                    if isinstance(N_s[2], str):
			
 
				+                                        N_s[2] = int(N_s[2])
			
 
				+                                    if isinstance(N_s[3], str):
			
 
				+                                        N_s[3] = int(N_s[3])
			
 
				+                                    for y in range(len_keyword_item1):
			
 
				+                                        if C_s.find(keyword_item1[y]) != -1:
			
 
				+                                            for z in range(len_keyword_item2):
			
 
				+                                                if C_s.find(keyword_item2[z]) != -1:
			
 
				+                                                    for u in range(len_keyword_item3):
			
 
				+                                                        if C_s.find(keyword_item3[u]) != -1:
			
 
				+                                                            if (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back4 == '分':  # *,每题*分，共*题，共*分
			
 
				+                                                                item_N = int(N_s[0])
			
 
				+                                                                item_total_score = int(N_s[3])
			
 
				+                                                                item_count = int(N_s[2])
			
 
				+                                                                item_score = int(N_s[1])
			
 
				+                                                            elif (num_infer2 == '题' or num_infer2 == '空') and num_back2 == '分' and num_back3 == '分':  # *,每题*分，共*分，共*题
			
 
				+                                                                item_N = int(N_s[0])
			
 
				+                                                                item_total_score = int(N_s[2])
			
 
				+                                                                item_count = int(N_s[3])
			
 
				+                                                                item_score = int(N_s[1])
			
 
				+                                                            elif (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分' and num_back4 == '分':  # *,共*题，每题*分，共*分
			
 
				+                                                                item_N = int(N_s[0])
			
 
				+                                                                item_total_score = int(N_s[3])
			
 
				+                                                                item_count = int(N_s[1])
			
 
				+                                                                item_score = int(N_s[2])
			
 
				+                                                            elif (num_infer3 == '题' or num_infer3 == '空') and num_back3 == '分' and num_back2 == '分':  # *,共*分，每题*分，共*题
			
 
				+                                                                item_N = int(N_s[0])
			
 
				+                                                                item_total_score = int(N_s[1])
			
 
				+                                                                item_count = int(N_s[3])
			
 
				+                                                                item_score = int(N_s[2])
			
 
				+                                                            elif (num_infer4 == '题' or num_infer4 == '空') and num_back4 == '分' and num_back3 == '分':  # *,共*题，共*分,每题*分
			
 
				+                                                                item_N = int(N_s[0])
			
 
				+                                                                item_total_score = int(N_s[2])
			
 
				+                                                                item_count = int(N_s[1])
			
 
				+                                                                item_score = int(N_s[3])
			
 
				+                                                                type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                            elif (num_infer4 == '题' or num_infer4 == '空') and num_back4 == '分' and num_back2 == '分':  # *,共*分，共*题, 每题*分
			
 
				+                                                                item_N = int(N_s[0])
			
 
				+                                                                item_total_score = int(N_s[1])
			
 
				+                                                                item_count = int(N_s[2])
			
 
				+                                                                item_score = int(N_s[3])
			
 
				+
			
 
				+                                                            elif num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back4 == '分' :  # 共*.*分，共*题, 每题*分/每题*.*分，共*题,共*分
			
 
				+                                                                item_N = -1
			
 
				+                                                                if int(N_s[0]) > int(N_s[3]):
			
 
				+                                                                    item_total_score = int(N_s[0])
			
 
				+                                                                    item_score = int(N_s[3])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = int(N_s[3])
			
 
				+                                                                    item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                            elif num_back3== '.' and num_infer4== '.' and num_back1 == '分'and num_back4 == '分' :  # 共*分，共*题, 每题*.*分/每题*分，共*题,共*.*分
			
 
				+                                                                item_N = -1
			
 
				+                                                                if int(N_s[0]) > int(N_s[2]):
			
 
				+                                                                    item_total_score = int(N_s[0])
			
 
				+                                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = int(N_s[2])
			
 
				+                                                                    item_score = int(N_s[0])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                            elif num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back4 == '分' :  # 共*题，共*.*分,每题*分/共*题，每题*.*分,共*分
			
 
				+                                                                item_N = -1
			
 
				+                                                                if int(N_s[1]) > int(N_s[3]):
			
 
				+                                                                    item_total_score = int(N_s[1])
			
 
				+                                                                    item_score = int(N_s[3])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = int(N_s[3])
			
 
				+                                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                            elif num_back3== '.' and num_infer4== '.' and num_back4 == '分'and num_back2 == '分' :  # 共*题，共*分,每题*.*分/共*题，每题*分,共*.*分
			
 
				+                                                                item_N = -1
			
 
				+                                                                if int(N_s[1]) > int(N_s[2]):
			
 
				+                                                                    item_total_score = int(N_s[1])
			
 
				+                                                                    item_score = float(N_s[2] + '.' + N_s[3])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = int(N_s[2])
			
 
				+                                                                    item_score = int(N_s[1])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                            elif num_back1== '.' and num_infer2== '.' and num_back2 == '分'and num_back3 == '分' :  # 每题*.*分，共*分，共*题/共*.*分，每题*分，共*题
			
 
				+                                                                item_N = -1
			
 
				+                                                                if int(N_s[0]) > int(N_s[2]):
			
 
				+                                                                    item_total_score = int(N_s[0])
			
 
				+                                                                    item_score = int(N_s[2])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = int(N_s[2])
			
 
				+                                                                    item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                            elif num_back2== '.' and num_infer3== '.' and num_back3 == '分'and num_back1 == '分' :  # 每题*分，共*.*分，共*题/共*分，每题*.*分，共*题
			
 
				+                                                                item_N = -1
			
 
				+                                                                if int(N_s[0]) > int(N_s[1]):
			
 
				+                                                                    item_total_score = int(N_s[0])
			
 
				+                                                                    item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                                else:
			
 
				+                                                                    item_total_score = int(N_s[1])
			
 
				+                                                                    item_score = int(N_s[0])
			
 
				+                                                                    item_count = int(item_total_score / item_score)
			
 
				+                                                            else:
			
 
				+                                                                break
			
 
				+                                                            if item_total_score < item_count * item_score:
			
 
				+                                                                item_total_score = item_count * item_score
			
 
				+                                                            type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                            type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                            type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                            type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                            Score_structure_item = type_score_dict_ocr
			
 
				+                                                            Score_structure.append(Score_structure_item)
			
 
				+                                                            all_structure = {'volume_structure': -1,
			
 
				+                                                                             'Score_structure': Score_structure}
			
 
				+                                                            break
			
 
				+
			
 
				+
			
 
				+
			
 
				+                                                    break
			
 
				+                                            break
			
 
				+                                    break
			
 
				+                            elif x == len_keyword_item1 - 1:
			
 
				+                                for y in range(len_keyword_item2):
			
 
				+                                    if C_s.find(keyword_item2[y]) != -1:
			
 
				+                                        if len(N_s) == 1:
			
 
				+                                            num_index1 = s.index(N_s[0])
			
 
				+                                            num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                            num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                            if isinstance(N_s[0], str):
			
 
				+                                                N_s[0] = int(N_s[0])
			
 
				+                                            if num_back1 == '分':  # 每题*分
			
 
				+                                                item_score = int(N_s[0])
			
 
				+                                                type_score_dict_ocr['item_N'] = -1
			
 
				+                                                type_score_dict_ocr['item_total_score'] = -1
			
 
				+                                                type_score_dict_ocr['item_count'] = -1
			
 
				+                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                Score_structure_item = type_score_dict_ocr
			
 
				+                                                Score_structure.append(Score_structure_item)
			
 
				+                                                all_structure = {'volume_structure': -1,
			
 
				+                                                                 'Score_structure': Score_structure}
			
 
				+                                                break
			
 
				+                                        elif len(N_s) == 2:
			
 
				+                                            num_index1 = s.index(N_s[0])
			
 
				+                                            num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                            num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                            all_1 = find_repeat(s, N_s[1])
			
 
				+                                            temp1 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[1]:
			
 
				+                                                    temp1 = temp1 + 1
			
 
				+                                            num_index2 = all_1[temp1]
			
 
				+                                            num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                            num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                            if isinstance(N_s[0], str):
			
 
				+                                                N_s[0] = int(N_s[0])
			
 
				+                                            if isinstance(N_s[1], str):
			
 
				+                                                N_s[1] = int(N_s[1])
			
 
				+                                            for z in range(len(keyword_item3)):
			
 
				+                                                if C_s.find(keyword_item3[z]) != -1:
			
 
				+                                                    if num_back2 == '分':  # 共*题，每题*分
			
 
				+                                                        item_total_score = int(N_s[0]) * int(N_s[1])
			
 
				+                                                        item_count = int(N_s[0])
			
 
				+                                                        item_score = int(N_s[1])
			
 
				+                                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif num_back1 == '分':  # 每题*分，共*题
			
 
				+                                                        item_total_score = int(N_s[0]) * int(N_s[1])
			
 
				+                                                        item_count = int(N_s[1])
			
 
				+                                                        item_score = int(N_s[0])
			
 
				+                                                        type_score_dict_ocr['item_N'] = -1
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                elif z == len(keyword_item3) - 1:
			
 
				+                                                    if num_back2 == '分' and num_infer2 == '.' and num_back1 == '.':  # *.*分
			
 
				+                                                        item_N = -1
			
 
				+                                                        item_score = float(N_s[0] + '.'+N_s[1])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = -1
			
 
				+                                                        type_score_dict_ocr['item_count'] = -1
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                                    elif num_back2 == '分':  # *,*分
			
 
				+                                                        item_N = int(N_s[0])
			
 
				+                                                        item_score = int(N_s[1])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = -1
			
 
				+                                                        type_score_dict_ocr['item_count'] = -1
			
 
				+                                                        type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                            break
			
 
				+                                        elif len(N_s) == 3:  # 9月16号修改
			
 
				+                                            num_index1 = s.index(N_s[0])
			
 
				+                                            num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                            num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                            all_1 = find_repeat(s, N_s[1])
			
 
				+                                            temp1 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[1]:
			
 
				+                                                    temp1 = temp1 + 1
			
 
				+                                            num_index2 = all_1[temp1]
			
 
				+                                            num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                            num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                            all_2 = find_repeat(s, N_s[2])
			
 
				+                                            temp2 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[2]:
			
 
				+                                                    temp2 = temp2 + 1
			
 
				+                                            for jj in range(len(N_s[1])):
			
 
				+                                                if N_s[1][jj] == N_s[2]:
			
 
				+                                                    temp2 = temp2 + 1
			
 
				+                                            num_index3 = all_2[temp2]
			
 
				+                                            num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                                            num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                                            if isinstance(N_s[0], str):
			
 
				+                                                N_s[0] = int(N_s[0])
			
 
				+                                            if isinstance(N_s[1], str):
			
 
				+                                                N_s[1] = int(N_s[1])
			
 
				+                                            if isinstance(N_s[2], str):
			
 
				+                                                N_s[2] = int(N_s[2])
			
 
				+
			
 
				+                                            if num_back3 == '分' and num_back2 != '分' and num_infer3 != '.':  # *，共*题，每题*分
			
 
				+                                                item_N = int(N_s[0])
			
 
				+                                                item_total_score = int(N_s[1]) * int(N_s[2])
			
 
				+                                                item_count = int(N_s[1])
			
 
				+                                                item_score = int(N_s[2])
			
 
				+                                            elif num_back2 == '分' and num_back3 != '分' and num_infer2 != '.':  # *，每题*分，共*题
			
 
				+                                                item_N = int(N_s[0])
			
 
				+                                                item_total_score = int(N_s[1]) * int(N_s[2])
			
 
				+                                                item_count = int(N_s[2])
			
 
				+                                                item_score = int(N_s[1])
			
 
				+                                                type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                Score_structure_item = type_score_dict_ocr
			
 
				+                                                Score_structure.append(Score_structure_item)
			
 
				+                                                all_structure = {'volume_structure': -1,
			
 
				+                                                                 'Score_structure': Score_structure}
			
 
				+                                            elif num_back2 == '分' and num_back3 == '分'and num_infer3 != '.' and num_infer2 != '.':  # *，*分，每题*分
			
 
				+                                                item_N = int(N_s[0])
			
 
				+                                                item_total_score = int(N_s[1])
			
 
				+                                                item_count = int(N_s[1]) / int(N_s[2])
			
 
				+                                                item_score = int(N_s[2])
			
 
				+                                                type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                Score_structure_item = type_score_dict_ocr
			
 
				+                                                Score_structure.append(Score_structure_item)
			
 
				+                                                all_structure = {'volume_structure': -1,
			
 
				+                                                                 'Score_structure': Score_structure}
			
 
				+                                            elif num_back3 == '分' and num_back2 == '.' and num_infer3 == '.' and (num_back1 =='题' or num_back1 =='小'or num_back1 =='空'):  # 共*题，每题*.*分
			
 
				+                                                item_N = -1
			
 
				+                                                item_count = int(N_s[0])
			
 
				+                                                item_score = float(N_s[1]+'.'+N_s[2])
			
 
				+                                                item_total_score = int(item_score * item_count)
			
 
				+                                                type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                Score_structure_item = type_score_dict_ocr
			
 
				+                                                Score_structure.append(Score_structure_item)
			
 
				+                                                all_structure = {'volume_structure': -1,
			
 
				+                                                                 'Score_structure': Score_structure}
			
 
				+                                            elif num_back2 == '分' and num_back1 == '.' and num_infer2 == '.'and (num_back3 =='题' or num_back3 =='小'or num_back3 =='空'):  # 每题*.*分，共*题
			
 
				+                                                item_N = -1
			
 
				+                                                item_count = int(N_s[2])
			
 
				+                                                item_score = float(N_s[0] + '.' + N_s[1])
			
 
				+                                                item_total_score = int(item_score * item_count)
			
 
				+                                                type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                Score_structure_item = type_score_dict_ocr
			
 
				+                                                Score_structure.append(Score_structure_item)
			
 
				+                                                all_structure = {'volume_structure': -1,
			
 
				+                                                                 'Score_structure': Score_structure}
			
 
				+                                            elif num_back3 == '分' and num_back2 == '.' and num_infer3 == '.':  # *,每题*.*分
			
 
				+                                                item_N = N_s[0]
			
 
				+                                                item_count = -1
			
 
				+                                                item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                item_total_score = -1
			
 
				+                                                type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                Score_structure_item = type_score_dict_ocr
			
 
				+                                                Score_structure.append(Score_structure_item)
			
 
				+                                                all_structure = {'volume_structure': -1,
			
 
				+                                                                 'Score_structure': Score_structure}
			
 
				+                                        elif len(N_s) == 4:  # 9月16号修改
			
 
				+                                            num_index1 = s.index(N_s[0])
			
 
				+                                            num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                            num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                            all_1 = find_repeat(s, N_s[1])
			
 
				+                                            temp1 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[1]:
			
 
				+                                                    temp1 = temp1 + 1
			
 
				+                                            num_index2 = all_1[temp1]
			
 
				+                                            num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                            num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                            all_2 = find_repeat(s, N_s[2])
			
 
				+                                            temp2 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[2]:
			
 
				+                                                    temp2 = temp2 + 1
			
 
				+                                            for jj in range(len(N_s[1])):
			
 
				+                                                if N_s[1][jj] == N_s[2]:
			
 
				+                                                    temp2 = temp2 + 1
			
 
				+                                            num_index3 = all_2[temp2]
			
 
				+                                            num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+                                            num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                                            all_3 = find_repeat(s, N_s[3])
			
 
				+                                            temp3 = 0
			
 
				+                                            for ii in range(len(N_s[0])):
			
 
				+                                                if N_s[0][ii] == N_s[3]:
			
 
				+                                                    temp3 = temp3 + 1
			
 
				+                                            for jj in range(len(N_s[1])):
			
 
				+                                                if N_s[1][jj] == N_s[2]:
			
 
				+                                                    temp3 = temp3 + 1
			
 
				+                                            num_index4 = all_3[temp3]
			
 
				+                                            num_infer4 = s[num_index4 - len(N_s[3])]
			
 
				+                                            num_back4 = s[num_index4 + len(N_s[3])]
			
 
				+                                            if isinstance(N_s[0], str):
			
 
				+                                                N_s[0] = int(N_s[0])
			
 
				+                                            if isinstance(N_s[1], str):
			
 
				+                                                N_s[1] = int(N_s[1])
			
 
				+                                            if isinstance(N_s[2], str):
			
 
				+                                                N_s[2] = int(N_s[2])
			
 
				+                                            if isinstance(N_s[3], str):
			
 
				+                                                N_s[3] = int(N_s[3])
			
 
				+
			
 
				+                                            if num_back4 == '分' and num_back2 != '分' and num_infer4 == '.'and num_back3 == '.':  # *，共*题，每题*.*分
			
 
				+                                                item_N = int(N_s[0])
			
 
				+                                                item_count = int(N_s[1])
			
 
				+                                                item_score = float(N_s[2]+'.'+N_s[3])
			
 
				+                                                item_total_score = int(item_score * item_count)
			
 
				+                                                type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                Score_structure_item = type_score_dict_ocr
			
 
				+                                                Score_structure.append(Score_structure_item)
			
 
				+                                                all_structure = {'volume_structure': -1,
			
 
				+                                                                 'Score_structure': Score_structure}
			
 
				+
			
 
				+                                            elif num_back3 == '分' and num_back4 != '分' and num_infer3 == '.'and num_back2 == '.':  # *，每题*.*分，共*题
			
 
				+                                                item_N = int(N_s[0])
			
 
				+                                                item_count = int(N_s[3])
			
 
				+                                                item_score = float(N_s[1] + '.' + N_s[2])
			
 
				+                                                item_total_score = int(item_score * item_count)
			
 
				+                                                type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                type_score_dict_ocr['item_count'] = item_count
			
 
				+                                                type_score_dict_ocr['item_score'] = item_score
			
 
				+                                                Score_structure_item = type_score_dict_ocr
			
 
				+                                                Score_structure.append(Score_structure_item)
			
 
				+                                                all_structure = {'volume_structure': -1,
			
 
				+                                                                 'Score_structure': Score_structure}
			
 
				+                                        break
			
 
				+                                    elif y == len_keyword_item2 - 1:
			
 
				+                                        if C_s.find(keyword_item4[0]) != -1:
			
 
				+                                            if len(N_s) == 2:  # *，*分
			
 
				+                                                num_index1 = s.index(N_s[0])
			
 
				+                                                num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                                num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                                if num_infer1 == '( ' or num_back1 == ')' or num_infer1 == '（ ' or num_back1 == '）':
			
 
				+                                                    break
			
 
				+                                                else:
			
 
				+                                                    all_1 = find_repeat(s, N_s[1])
			
 
				+                                                    temp1 = 0
			
 
				+                                                    for ii in range(len(N_s[0])):
			
 
				+                                                        if N_s[0][ii] == N_s[1]:
			
 
				+                                                            temp1 = temp1 + 1
			
 
				+                                                    num_index2 = all_1[temp1]
			
 
				+                                                    num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                                    num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                                    if isinstance(N_s[0], str):
			
 
				+                                                        N_s[0] = int(N_s[0])
			
 
				+                                                    if isinstance(N_s[1], str):
			
 
				+                                                        N_s[1] = int(N_s[1])
			
 
				+                                                    if int(N_s[0]) > 1000:
			
 
				+                                                        item_N =0
			
 
				+                                                        item_N1 = int(N_s[0][-4] + N_s[0][-3])
			
 
				+                                                        item_N2 = int(N_s[0][-2] + N_s[0][-1])
			
 
				+                                                        if item_N2 - item_N1 == 1:
			
 
				+                                                            item_N = [0, 0]
			
 
				+                                                            item_N = [item_N1, item_N2]
			
 
				+                                                        elif item_N2 - item_N1 == 2:
			
 
				+                                                            item_N = [0, 0, 0]
			
 
				+                                                            item_N = [item_N1, item_N1 + 1, item_N2]
			
 
				+                                                        elif item_N2 - item_N1 == 3:
			
 
				+                                                            item_N = [0, 0, 0, 0]
			
 
				+                                                            item_N = [item_N1, item_N1 + 1, item_N1 + 2, item_N2]
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+
			
 
				+                                                    else:
			
 
				+                                                        item_N = int(N_s[0])
			
 
				+                                                        item_total_score = int(N_s[1])
			
 
				+                                                        type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                        type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                        type_score_dict_ocr['item_count'] = -1
			
 
				+                                                        type_score_dict_ocr['item_score'] = -1
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                    break
			
 
				+                                            elif len(N_s) == 3:  # *，*分
			
 
				+                                                num_index1 = s.index(N_s[0])
			
 
				+                                                num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                                num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                                all_1 = find_repeat(s, N_s[1])
			
 
				+                                                temp1 = 0
			
 
				+                                                for ii in range(len(N_s[0])):
			
 
				+                                                    if N_s[0][ii] == N_s[1]:
			
 
				+                                                        temp1 = temp1 + 1
			
 
				+                                                num_index2 = all_1[temp1]
			
 
				+                                                num_infer2 = s[num_index2 - len(N_s[1])]
			
 
				+                                                num_back2 = s[num_index2 + len(N_s[1])]
			
 
				+                                                all_2 = find_repeat(s, N_s[2])
			
 
				+                                                temp2 = 0
			
 
				+                                                for ii in range(len(N_s[0])):
			
 
				+                                                    if N_s[0][ii] == N_s[2]:
			
 
				+                                                        temp2 = temp2 + 1
			
 
				+                                                for jj in range(len(N_s[1])):
			
 
				+                                                    if N_s[1][jj] == N_s[2]:
			
 
				+                                                        temp2 = temp2 + 1
			
 
				+                                                num_index3 = all_2[temp2]
			
 
				+                                                num_infer3 = s[num_index3 - len(N_s[2])]
			
 
				+
			
 
				+                                                if num_index3 + len(N_s[2]) < len(s):
			
 
				+                                                    num_back3 = s[num_index3 + len(N_s[2])]
			
 
				+                                                else:
			
 
				+                                                    num_back3 = []
			
 
				+                                                if isinstance(N_s[0], str):
			
 
				+                                                    N_s[0] = int(N_s[0])
			
 
				+                                                if isinstance(N_s[1], str):
			
 
				+                                                    N_s[1] = int(N_s[1])
			
 
				+                                                if isinstance(N_s[2], str):
			
 
				+                                                    N_s[2] = int(N_s[2])
			
 
				+
			
 
				+                                                if num_back2 == '分':
			
 
				+                                                    item_N = N_s[0]
			
 
				+                                                    item_total_score = int(N_s[1])
			
 
				+                                                    type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                    item_total_score = int(N_s[2])
			
 
				+                                                    type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                    type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                    type_score_dict_ocr['item_count'] = -1
			
 
				+                                                    type_score_dict_ocr['item_score'] = -1
			
 
				+                                                    Score_structure_item = type_score_dict_ocr
			
 
				+                                                    Score_structure.append(Score_structure_item)
			
 
				+                                                    all_structure = {'volume_structure': -1,
			
 
				+                                                                     'Score_structure': Score_structure}
			
 
				+                                                    break
			
 
				+                                                elif num_back3 == '分':
			
 
				+                                                    if int(N_s[1]) - int(N_s[0]) == 1:
			
 
				+                                                        item_N = [0, 0]
			
 
				+                                                        item_N = [int(N_s[0]), int(N_s[1])]
			
 
				+                                                    elif int(N_s[1]) - int(N_s[0]) == 2:
			
 
				+                                                        item_N = [0, 0, 0]
			
 
				+                                                        item_N = [int(N_s[0]), int(N_s[0]) + 1, int(N_s[1])]
			
 
				+                                                    elif int(N_s[1]) - int(N_s[0]) == 3:
			
 
				+                                                        item_N = [0, 0, 0, 0]
			
 
				+                                                        item_N = [int(N_s[0]), int(N_s[0]) + 1, int(N_s[0]) + 2,
			
 
				+                                                                  int(N_s[1])]
			
 
				+                                                    else:
			
 
				+                                                        break
			
 
				+                                                    item_total_score = int(N_s[2])
			
 
				+                                                    type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                    type_score_dict_ocr['item_N'] = item_N
			
 
				+                                                    type_score_dict_ocr['item_count'] = -1
			
 
				+                                                    type_score_dict_ocr['item_score'] = -1
			
 
				+                                                    Score_structure_item = type_score_dict_ocr
			
 
				+                                                    Score_structure.append(Score_structure_item)
			
 
				+                                                    all_structure = {'volume_structure': -1,
			
 
				+                                                                     'Score_structure': Score_structure}
			
 
				+                                                    break
			
 
				+                                            elif len(N_s) == 1:
			
 
				+                                                num_index1 = s.index(N_s[0])
			
 
				+                                                num_infer1 = s[num_index1 - len(N_s[0])]
			
 
				+                                                if num_index1 + len(N_s[0]) < len(s):
			
 
				+                                                    num_back1 = s[num_index1 + len(N_s[0])]
			
 
				+                                                    item_total_score = int(N_s[0])
			
 
				+                                                    type_score_dict_ocr['item_N'] = -1
			
 
				+                                                    type_score_dict_ocr['item_total_score'] = item_total_score
			
 
				+                                                    type_score_dict_ocr['item_count'] = -1
			
 
				+                                                    type_score_dict_ocr['item_score'] = -1
			
 
				+                                                    if num_back1 == '分':  # *分
			
 
				+                                                        Score_structure_item = type_score_dict_ocr
			
 
				+                                                        Score_structure.append(Score_structure_item)
			
 
				+                                                        all_structure = {'volume_structure': -1,
			
 
				+                                                                         'Score_structure': Score_structure}
			
 
				+                                                        break
			
 
				+                                break
			
 
				+
			
 
				+    return all_structure
			
--- a/segment/sheet_resolve/analysis/sheet/ocr_sheet.py
+++ b/segment/sheet_resolve/analysis/sheet/ocr_sheet.py
@@ -0,0 +1,218 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : ocr_sheet.py
			
 
				+import re
			
 
				+import numpy as np
			
 
				+import xml.etree.cElementTree as ET
			
 
				+from segment.sheet_resolve.tools.utils import create_xml
			
 
				+from segment.sheet_resolve.analysis.sheet.sheet_adjust import adjust_item_edge_by_gray_image
			
 
				+
			
 
				+
			
 
				+def subfield_answer_sheet(img0, answer_sheet):
			
 
				+    h, w = img0.shape[:2]
			
 
				+    one_part = 0
			
 
				+    line_xmax_1 = 0
			
 
				+    line_xmax_2 = 0
			
 
				+    modules = []
			
 
				+    modules11 = []
			
 
				+    w_int_1 = w
			
 
				+    w_int_2 = round(w / 2)
			
 
				+    w_int_3 = round(w / 3)
			
 
				+    w_int_4 = round(w / 4)
			
 
				+    w_int_8 = round(w / 8)
			
 
				+    if w_int_8 < 50:
			
 
				+        w_int_8 = 50
			
 
				+
			
 
				+    key_modules_classes = ['choice', 'cloze', 'solve', 'solve0', 'composition0', 'composition', 'correction',
			
 
				+                           'ban_area', ]
			
 
				+    if h > w:  # 暂定答题卡高大于宽的为单栏
			
 
				+        one_part = 1
			
 
				+    else:
			
 
				+        temp1 = 0
			
 
				+        temp2 = 0
			
 
				+        for ele in answer_sheet:
			
 
				+            if ele["class_name"] in key_modules_classes:
			
 
				+                modules.append(ele)
			
 
				+        modules_xmin = sorted(modules, key=lambda x: (x['bounding_box']['xmin']))
			
 
				+        modules_xmax = sorted(modules, key=lambda x: (x['bounding_box']['xmax']))
			
 
				+        for i in range(len(modules_xmin) - 1):
			
 
				+            if i == 0 and modules_xmin[0]['bounding_box']['xmin'] - 0 > w_int_4:
			
 
				+                temp1 = 1
			
 
				+            else:
			
 
				+                if modules_xmin[i + 1]['bounding_box']['xmin'] - modules_xmin[i]['bounding_box']['xmax'] > w_int_4:
			
 
				+                    if modules11 == []:
			
 
				+                        line_xmax_1 = modules_xmin[i]['bounding_box']['xmax'] + 20
			
 
				+                        line_xmax_2 = modules_xmin[i + 1]['bounding_box']['xmin'] - 20
			
 
				+                    else:
			
 
				+                        modules11.append(modules_xmin[i]['bounding_box']['xmax'])
			
 
				+                        modules11_xmax = sorted(modules11)[-1]
			
 
				+                        line_xmax_1 = modules11_xmax + 20
			
 
				+                        line_xmax_2 = modules_xmin[i + 1]['bounding_box']['xmin'] - 20
			
 
				+                        modules11 = []
			
 
				+                    temp1 = 1
			
 
				+                    temp2 = 1
			
 
				+                    break
			
 
				+                elif modules_xmin[i + 1]['bounding_box']['xmin'] - modules_xmin[i]['bounding_box']['xmax'] > -w_int_8:
			
 
				+                    if temp1 == 0:
			
 
				+                        if modules11 == []:
			
 
				+                            line_xmax_1 = int((modules_xmin[i + 1]['bounding_box']['xmin'] +
			
 
				+                                               modules_xmin[i]['bounding_box']['xmax']) / 2)
			
 
				+                        else:
			
 
				+                            modules11.append(modules_xmin[i]['bounding_box']['xmax'])
			
 
				+                            modules11_xmax = sorted(modules11)[-1]
			
 
				+                            line_xmax_1 = int((modules_xmin[i + 1]['bounding_box']['xmin'] +
			
 
				+                                               modules11_xmax) / 2)
			
 
				+                            modules11 = []
			
 
				+                        temp1 = 1
			
 
				+                    elif temp1 == 1:
			
 
				+                        if modules11 == []:
			
 
				+                            line_xmax_2 = int((modules_xmin[i + 1]['bounding_box']['xmin'] +
			
 
				+                                               modules_xmin[i]['bounding_box']['xmax']) / 2)
			
 
				+                        else:
			
 
				+                            modules11.append(modules_xmin[i]['bounding_box']['xmax'])
			
 
				+                            modules11_xmax = sorted(modules11)[-1]
			
 
				+                            line_xmax_2 = int((modules_xmin[i + 1]['bounding_box']['xmin'] +
			
 
				+                                               modules11_xmax) / 2)
			
 
				+                        temp2 = 1
			
 
				+                else:
			
 
				+                    modules11.append(modules_xmin[i]['bounding_box']['xmax'])
			
 
				+
			
 
				+        if temp1 == 0 and temp2 == 0:
			
 
				+            if modules_xmax[-1]['bounding_box']['xmax'] - w < -(2 * w_int_4):
			
 
				+                line_xmax_1 = modules_xmax[-1]['bounding_box']['xmax'] + 20
			
 
				+                line_xmax_2 = 2 * w_int_3
			
 
				+            elif modules_xmax[-1]['bounding_box']['xmax'] - w < -w_int_4:
			
 
				+                line_xmax_1 = modules_xmax[-1]['bounding_box']['xmax'] + 20
			
 
				+        elif temp1 == 1 and temp2 == 0:
			
 
				+            if modules_xmax[-1]['bounding_box']['xmax'] - w < -w_int_4:
			
 
				+                line_xmax_2 = 2 * w_int_3
			
 
				+
			
 
				+    return line_xmax_1, line_xmax_2
			
 
				+
			
 
				+
			
 
				+def tell_columns(image, sheet_dict):
			
 
				+    h, w = image.shape[0], image.shape[1]
			
 
				+    # sheet_dict = adjust_item_edge_by_gray_image(image, sheet_dict)
			
 
				+    x1, x2 = subfield_answer_sheet(image, sheet_dict)
			
 
				+
			
 
				+    split_x = [px for px in [x1, x2] if px != 0]
			
 
				+
			
 
				+    if not split_x:
			
 
				+        split_x = [w-1]
			
 
				+
			
 
				+    return split_x
			
 
				+
			
 
				+
			
 
				+def ocr2sheet(image, sheet_dict, raw_ocr, xml_path=None):
			
 
				+    col_split_list = tell_columns(image, sheet_dict)
			
 
				+    digital_p = r'\d'
			
 
				+    eng_char_p = '[\u0041-\u005a|\u0061-\u007a]'  # english
			
 
				+    chn_char_p = '[\u4e00-\u9fa5]'  # chinese
			
 
				+    sp_nums_p = '[①②③④⑤⑥⑦⑧⑨⑩]'
			
 
				+    punctuation_p = '[，；：。,;:·√()（）]+'
			
 
				+
			
 
				+    pattern_list = [chn_char_p]
			
 
				+
			
 
				+    ocr_res_len = len(raw_ocr)
			
 
				+    for i, words_line in enumerate(raw_ocr):
			
 
				+        words = words_line['words']
			
 
				+        words = words.replace(' ', '').upper()  # 去除空格
			
 
				+        loc = words_line['location']
			
 
				+        top = int(loc['top'])
			
 
				+        left = int(loc['left'])
			
 
				+        width = int(loc['width'])
			
 
				+        height = int(loc['height'])
			
 
				+        loc.update({'right': left + width, 'bottom': top + height,
			
 
				+                    'mid_x': left + width // 2, 'mid_y': top + height // 2})
			
 
				+
			
 
				+    raw_ocr = sorted(raw_ocr, key=lambda x:x['location']['mid_x'])
			
 
				+    mid_x_list = [ele['location']['mid_x'] for ele in raw_ocr]
			
 
				+    col_list = []
			
 
				+    for split in col_split_list:
			
 
				+        mid_x_list.append(split)
			
 
				+        mid_x_list = sorted(mid_x_list)
			
 
				+        split_index = mid_x_list.index(split)
			
 
				+        col_list.append(raw_ocr[:split_index])
			
 
				+        raw_ocr = raw_ocr[split_index:]
			
 
				+        mid_x_list = mid_x_list[split_index+1:]
			
 
				+
			
 
				+    if raw_ocr:
			
 
				+        col_list.append(raw_ocr)
			
 
				+
			
 
				+    block_list = []
			
 
				+    for ocr_res in col_list:
			
 
				+        ocr_res = sorted(ocr_res, key=lambda x: x['location']['top'])
			
 
				+        raw_chn_index = []
			
 
				+        for i, words_line in enumerate(ocr_res):
			
 
				+            words = words_line['words']
			
 
				+            loc = words_line['location']
			
 
				+            width = int(loc['width'])
			
 
				+            height = int(loc['height'])
			
 
				+            if width >= height:
			
 
				+                match_nums_list = []
			
 
				+                for p in pattern_list:
			
 
				+                    words_m = re.finditer(p, words)
			
 
				+                    match_index_list = [(m.group(), m.span()) for m in words_m if m]
			
 
				+                    match_nums = len(match_index_list) * 2
			
 
				+                    match_nums_list.append(match_nums)
			
 
				+
			
 
				+                if sum(match_nums_list) >= 2:
			
 
				+                    raw_chn_index.append(i)
			
 
				+
			
 
				+        # print(raw_chn_index)
			
 
				+
			
 
				+        left_limit = min([ele['location']['left'] for ele in ocr_res
			
 
				+                          if ele['location']['width'] >= ele['location']['height']]) - 10
			
 
				+        right_limit = max([ele['location']['right'] for ele in ocr_res
			
 
				+                           if ele['location']['width'] >= ele['location']['height']]) + 10
			
 
				+        chn_index = raw_chn_index.copy()
			
 
				+        if ocr_res_len - 1 not in raw_chn_index:
			
 
				+            chn_index.append(len(ocr_res) - 1)
			
 
				+        split_index_arr = np.array(chn_index)
			
 
				+
			
 
				+        numbers_interval = np.abs(split_index_arr[1:] - split_index_arr[:-1])
			
 
				+
			
 
				+        split_index = []
			
 
				+        for i, interval in enumerate(numbers_interval):
			
 
				+            if interval > np.mean(numbers_interval) and interval > 2:
			
 
				+                split_index.append(i)
			
 
				+
			
 
				+        split_index = sorted(list(set(split_index)))
			
 
				+        # print('split_index', split_index)
			
 
				+
			
 
				+        for i, ele in enumerate(split_index):
			
 
				+            top_limit = raw_chn_index[ele]
			
 
				+            if top_limit == len(ocr_res) - 1:
			
 
				+                break
			
 
				+            else:
			
 
				+                # 下分界行的上一行
			
 
				+                bottom_limit = chn_index[split_index[i] + 1]
			
 
				+                if bottom_limit in raw_chn_index:
			
 
				+                    while int(ocr_res[bottom_limit - 1]["location"]['height']) >= int(
			
 
				+                            ocr_res[bottom_limit - 1]["location"]['width']):
			
 
				+                        bottom_limit = bottom_limit - 1
			
 
				+
			
 
				+                    bottom = int(
			
 
				+                        ocr_res[bottom_limit - 1]["location"]["top"] +
			
 
				+                        ocr_res[bottom_limit - 1]["location"]["height"] * 1.2)
			
 
				+                else:
			
 
				+                    bottom_limit = chn_index[-1]
			
 
				+                    bottom = int(
			
 
				+                        ocr_res[bottom_limit]["location"]["top"] +
			
 
				+                        ocr_res[bottom_limit]["location"]["height"] * 1.2)
			
 
				+
			
 
				+            # 上分界行的下一行
			
 
				+            top = int(ocr_res[top_limit + 1]["location"]["top"] - 0.2 * ocr_res[top_limit + 1]["location"]["height"])
			
 
				+
			
 
				+            left = left_limit
			
 
				+            right = right_limit
			
 
				+
			
 
				+            block_list.append({'loc': [left, top, right, bottom]})
			
 
				+
			
 
				+    # tree = ET.parse(xml_path)
			
 
				+    # for index, choice_m in enumerate(block_list):
			
 
				+    #     xmin, ymin, xmax, ymax = choice_m["loc"]
			
 
				+    #     tree = create_xml(f'block_{index}', tree, str(xmin), str(ymin), str(xmax), str(ymax))
			
 
				+    #
			
 
				+    # tree.write(xml_path)
			
 
				+
			
 
				+    return block_list
			
--- a/segment/sheet_resolve/analysis/sheet/sheet_adjust.py
+++ b/segment/sheet_resolve/analysis/sheet/sheet_adjust.py
@@ -0,0 +1,485 @@
 
				+# @Author  : mbq
			
 
				+# @File    : sheet_adjust.py
			
 
				+# @Time    : 2019/9/26 0026 上午 10:12
			
 
				+import copy
			
 
				+import json
			
 
				+import os
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+
			
 
				+''' 根据CV检测矩形框 调整模型输出框'''
			
 
				+''' LSD直线检测 暂时改用 霍夫曼检测'''
			
 
				+
			
 
				+
			
 
				+# 用户自己计算阈值
			
 
				+def custom_threshold(gray, type_inv=cv2.THRESH_BINARY):
			
 
				+    # gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)  #把输入图像灰度化
			
 
				+    h, w = gray.shape[:2]
			
 
				+    m = np.reshape(gray, [1, w * h])
			
 
				+    mean = m.sum() / (w * h)
			
 
				+    ret, binary = cv2.threshold(gray, min(230, mean), 255, type_inv)
			
 
				+    return binary
			
 
				+
			
 
				+
			
 
				+# 开运算
			
 
				+def open_img(image_bin, kera=(5, 5)):
			
 
				+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, kera)
			
 
				+    opening = cv2.morphologyEx(image_bin, cv2.MORPH_OPEN, kernel)
			
 
				+    return opening
			
 
				+
			
 
				+
			
 
				+# 闭运算
			
 
				+def close_img(image_bin, kera=(5, 5)):
			
 
				+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, kera)
			
 
				+    closing = cv2.morphologyEx(image_bin, cv2.MORPH_CLOSE, kernel)
			
 
				+    return closing
			
 
				+
			
 
				+
			
 
				+# 腐蚀
			
 
				+def erode_img(image, kernel_size):
			
 
				+    kernel = np.ones((kernel_size, kernel_size), np.uint8)
			
 
				+    erosion = cv2.erode(image, kernel)
			
 
				+    return erosion
			
 
				+
			
 
				+
			
 
				+# 膨胀
			
 
				+def dilation_img(image, kernel_size):
			
 
				+    kernel = np.ones((kernel_size, kernel_size), np.uint8)
			
 
				+    dilaion = cv2.dilate(image, kernel)
			
 
				+    return dilaion
			
 
				+
			
 
				+
			
 
				+# 图像padding
			
 
				+def image_padding(image, padding_w, padding_h):
			
 
				+    h, w = image.shape[:2]
			
 
				+    if (3 == len(image.shape)):
			
 
				+        image_new = np.zeros((h + padding_h, w + padding_w, 3), np.uint8)
			
 
				+    else:
			
 
				+        image_new = np.zeros((h + padding_h, w + padding_w), np.uint8)
			
 
				+    image_new[int(padding_h / 2):int(padding_h / 2) + h, int(padding_w / 2):int(padding_w / 2) + w] = image
			
 
				+    return image_new
			
 
				+
			
 
				+
			
 
				+def horizontal_projection(img_bin, mut=0):
			
 
				+    '''水平方向投影'''
			
 
				+    h, w = img_bin.shape[:2]
			
 
				+    hist = [0 for i in range(w)]
			
 
				+    for x in range(w):
			
 
				+        tmp = 0
			
 
				+        for y in range(h):
			
 
				+            if img_bin[y][x]:
			
 
				+                tmp += 1
			
 
				+        if tmp > mut:
			
 
				+            hist[x] = tmp
			
 
				+    return hist
			
 
				+
			
 
				+
			
 
				+def vertical_projection(img_bin, mut=0):
			
 
				+    """垂直方向投影"""
			
 
				+    h, w = img_bin.shape[:2]
			
 
				+    hist = [0 for i in range(h)]
			
 
				+    for y in range(h):
			
 
				+        tmp = 0
			
 
				+        for x in range(w):
			
 
				+            if img_bin[y][x]:
			
 
				+                tmp += 1
			
 
				+        if tmp > mut:
			
 
				+            hist[y] = tmp
			
 
				+    return hist
			
 
				+
			
 
				+
			
 
				+def get_white_blok_pos(arry, blok_w=0):
			
 
				+    '''获取投影结果中的白色块'''
			
 
				+    pos = []
			
 
				+    start = 1
			
 
				+    x0 = 0
			
 
				+    x1 = 0
			
 
				+    for idx, val in enumerate(arry):
			
 
				+        if (start):
			
 
				+            if val:
			
 
				+                x0 = idx
			
 
				+                start = 0
			
 
				+        else:
			
 
				+            if (0 == val):
			
 
				+                x1 = idx
			
 
				+                start = 1
			
 
				+                if (x1 - x0 > blok_w):
			
 
				+                    pos.append((x0, x1))
			
 
				+    if (0 == start):
			
 
				+        x1 = len(arry) - 1
			
 
				+        if (x1 - x0 > blok_w):
			
 
				+            pos.append((x0, x1))
			
 
				+    return pos
			
 
				+
			
 
				+
			
 
				+def get_decide_boberLpa(itemRe, itemGT):
			
 
				+    '''
			
 
				+    IOU 计算
			
 
				+    '''
			
 
				+    x1 = int(itemRe[0])
			
 
				+    y1 = int(itemRe[1])
			
 
				+    x1_ = int(itemRe[2])
			
 
				+    y1_ = int(itemRe[3])
			
 
				+    width1 = x1_ - x1
			
 
				+    height1 = y1_ - y1
			
 
				+
			
 
				+    x2 = int(float(itemGT[0]))
			
 
				+    y2 = int(float(itemGT[1]))
			
 
				+    x2_ = int(float(itemGT[2]))
			
 
				+    y2_ = int(float(itemGT[3]))
			
 
				+    width2 = x2_ - x2
			
 
				+    height2 = y2_ - y2
			
 
				+
			
 
				+    endx = max(x1_, x2_)
			
 
				+    startx = min(x1, x2)
			
 
				+    width = width1 + width2 - (endx - startx)
			
 
				+
			
 
				+    endy = max(y1_, y2_)
			
 
				+    starty = min(y1, y2)
			
 
				+    height = height1 + height2 - (endy - starty)
			
 
				+
			
 
				+    AreaJc = 0
			
 
				+    ratio = 0.0
			
 
				+
			
 
				+    if width <= 0 or height <= 0:
			
 
				+        res = 0
			
 
				+    else:
			
 
				+        AreaJc = width * height
			
 
				+        AreaRe = width1 * height1
			
 
				+        AreaGT = width2 * height2
			
 
				+        ratio = float(AreaJc) / float((AreaGT + AreaRe - AreaJc))
			
 
				+    return ratio
			
 
				+
			
 
				+
			
 
				+# 查找连通区域 微调专用 不通用
			
 
				+def get_contours(image):
			
 
				+    # image = cv2.imread(img_path,0)
			
 
				+    # if debug: plt_imshow(image)
			
 
				+    image_binary = custom_threshold(image)
			
 
				+    # if debug: plt_imshow(image_binary)
			
 
				+    # if debug: cv2.imwrite(os.path.join(file_dir,"bin.jpg"),image_binary)
			
 
				+    image_dilation = open_img(image_binary, kera=(5, 1))
			
 
				+    image_dilation = open_img(image_dilation, kera=(1, 5))
			
 
				+    # if debug: plt_imshow(image_dilation)
			
 
				+    # if debug: cv2.imwrite(os.path.join(file_dir,"dia.jpg"),image_dilation)
			
 
				+    _, labels, stats, centers = cv2.connectedComponentsWithStats(image_dilation)
			
 
				+    rects = []
			
 
				+    img_h, img_w = image.shape[:2]
			
 
				+    for box in stats:
			
 
				+        x0 = int(box[0])
			
 
				+        y0 = int(box[1])
			
 
				+        w = int(box[2])
			
 
				+        h = int(box[3])
			
 
				+        area = int(box[4])
			
 
				+        if (w < img_w / 5 or w > img_w - 10 or h < 50 or h > img_h - 10):  # 常见框大小限定
			
 
				+            continue
			
 
				+        if (img_w > img_h):  # 多栏答题卡 w大于宽度的一般肯定是错误的框
			
 
				+            if (w > img_w / 2):
			
 
				+                continue
			
 
				+        if (area < w * h / 3):  # 大框套小框 中空白色区域形成的面积 排除
			
 
				+            continue
			
 
				+        rects.append((x0, y0, x0 + w, y0 + h))
			
 
				+    return rects
			
 
				+
			
 
				+
			
 
				+def adjust_alarm_info(image, box):
			
 
				+    '''
			
 
				+    调整上下坐标 排除内部含有了边框线情况
			
 
				+    左右调整只有100%确认的 从边界开始遇到的第一个非0列就终止 误伤情况太多
			
 
				+    LSD算法转不过来  霍夫曼检测不靠谱 连通区域测试后排除误伤情况太多  改用投影
			
 
				+    image: 灰度 非 二值图
			
 
				+    box  : 坐标信息
			
 
				+    '''
			
 
				+    # debug
			
 
				+    # debug = 0
			
 
				+
			
 
				+    if (image is None):
			
 
				+        print("error image")
			
 
				+        return box
			
 
				+    img_box = image[box[1]:box[3], box[0]:box[2]]
			
 
				+    h, w = img_box.shape[:2]
			
 
				+
			
 
				+    # debug
			
 
				+    # if debug: ia.imshow(img_box)
			
 
				+
			
 
				+    img_bin = custom_threshold(img_box, type_inv=cv2.THRESH_BINARY_INV)
			
 
				+    img_padding = image_padding(img_bin, 100, 100)
			
 
				+    img_close = close_img(img_padding, kera=(30, 3))
			
 
				+    img_back = img_close[50:50 + h, 50:50 + w]
			
 
				+
			
 
				+    # debug
			
 
				+    # if debug: ia.imshow(img_back)
			
 
				+
			
 
				+    # 垂直投影 找 left top
			
 
				+    hist_vert = vertical_projection(img_back, mut=h / 4)
			
 
				+
			
 
				+    # debug
			
 
				+    # if debug:
			
 
				+    #     print(hist_vert)
			
 
				+    #     black_img_h = np.zeros_like(img_back)
			
 
				+    #     for idx, val in enumerate(hist_vert):
			
 
				+    #         if (val == 0):
			
 
				+    #             continue
			
 
				+    #         for x in range(val):
			
 
				+    #             black_img_h[idx][x] = 255
			
 
				+    #     ia.imshow(black_img_h)
			
 
				+
			
 
				+    y_pos = get_white_blok_pos(hist_vert, 2)
			
 
				+    if (len(y_pos) == 0):
			
 
				+        return box
			
 
				+
			
 
				+    # 获取最大的作为alarm_info的区域
			
 
				+    max_id = 0
			
 
				+    max_len = 0
			
 
				+    for idx, pos_tmp in enumerate(y_pos):
			
 
				+        pos_len = abs(pos_tmp[1] - pos_tmp[0])
			
 
				+        if (pos_len > max_len):
			
 
				+            max_id = idx
			
 
				+            max_len = pos_len
			
 
				+
			
 
				+    # debug to show
			
 
				+    # if debug:
			
 
				+    #     img_show = cv2.cvtColor(img_box, cv2.COLOR_GRAY2BGR)
			
 
				+    #     cv2.line(img_show, (0, y_pos[max_id][0]), (w - 1, y_pos[max_id][0]), (0, 0, 255), 2)
			
 
				+    #     cv2.line(img_show, (0, y_pos[max_id][1]), (w - 1, y_pos[max_id][1]), (0, 0, 255), 2)
			
 
				+    #     ia.imshow(img_show)
			
 
				+
			
 
				+    # 左右 的微调
			
 
				+    img_next = img_bin[y_pos[max_id][0]:y_pos[max_id][1], 0:w - 1]
			
 
				+    img_lr_close = open_img(img_next, kera=(1, 1))
			
 
				+    img_lr_close = close_img(img_lr_close, kera=(3, 1))
			
 
				+
			
 
				+    # debug
			
 
				+    # if debug: ia.imshow(img_lr_close)
			
 
				+
			
 
				+    hist_proj = horizontal_projection(img_lr_close, mut=1)
			
 
				+    w_len = len(hist_proj)
			
 
				+    new_left = 0
			
 
				+    new_right = w_len - 1
			
 
				+    b_flag = [0, 0]
			
 
				+    for idx, val in enumerate(hist_proj):
			
 
				+        if (0 == b_flag[0]):
			
 
				+            if (val != 0):
			
 
				+                new_left = idx
			
 
				+                b_flag[0] = 1
			
 
				+        if (0 == b_flag[1]):
			
 
				+            if (hist_proj[w_len - 1 - idx] != 0):
			
 
				+                new_right = w_len - idx - 1
			
 
				+                b_flag[1] = 1
			
 
				+        if (b_flag[0] and b_flag[1]):
			
 
				+            break
			
 
				+
			
 
				+    new_top = box[1] + y_pos[max_id][0]
			
 
				+    new_bottom = box[1] + y_pos[max_id][1]
			
 
				+    new_left += box[0]
			
 
				+    new_right += box[0]
			
 
				+    box[1] = new_top
			
 
				+    box[3] = new_bottom
			
 
				+    box[0] = new_left
			
 
				+    box[2] = new_right
			
 
				+
			
 
				+    return box
			
 
				+
			
 
				+
			
 
				+def adjust_zg_info(image, box, cv_boxes):
			
 
				+    '''
			
 
				+    调整大区域的box
			
 
				+    1、cvbox要与box纵坐标有交叉
			
 
				+    2、IOU值大于0。8时 默认相等拷贝区域坐标
			
 
				+    '''
			
 
				+    if (image is None):
			
 
				+        return box
			
 
				+
			
 
				+    min_rotio = 0.5
			
 
				+    img_box = image[box[1]:box[3], box[0]:box[2]]
			
 
				+    h, w = img_box.shape[:2]
			
 
				+
			
 
				+    jc_boxes = []  # 记录与box存在交叉的 cv_boxes
			
 
				+    tmp_rotio = 0
			
 
				+    rc_mz = box
			
 
				+    for idx, cv_box in enumerate(cv_boxes):
			
 
				+        if ((box[1] - 10) > (cv_box[3])):  # 首先要保证纵坐标有交叉
			
 
				+            continue
			
 
				+        if ((box[3] + 10) < cv_box[1]):
			
 
				+            continue
			
 
				+
			
 
				+        jc_x = max(box[0], cv_box[0])
			
 
				+        jc_y = min(box[2], cv_box[2])
			
 
				+        bj_x = min(box[0], cv_box[0])
			
 
				+        bj_y = max(box[2], cv_box[2])
			
 
				+
			
 
				+        rt = abs(jc_y - jc_x) * 1.0 / abs(bj_y - bj_x) * 1.0
			
 
				+        if (rt < min_rotio):
			
 
				+            continue
			
 
				+        jc_boxes.append(cv_box)
			
 
				+        if (rt > tmp_rotio):
			
 
				+            rc_mz = cv_box
			
 
				+            tmp_rotio = rt
			
 
				+    # 判断 调整
			
 
				+    if (len(jc_boxes) != 0):
			
 
				+        box[0] = rc_mz[0]
			
 
				+        box[2] = rc_mz[2]
			
 
				+        b_find = 0
			
 
				+        frotio = 0.0
			
 
				+        rc_biggst = rc_mz
			
 
				+        for mz_box in jc_boxes:
			
 
				+            iou = get_decide_boberLpa(mz_box, box)
			
 
				+            if (iou > 0.8):
			
 
				+                b_find = 1
			
 
				+                frotio = iou
			
 
				+                rc_biggst = mz_box
			
 
				+        if (b_find):
			
 
				+            box[1] = rc_biggst[1]
			
 
				+            box[3] = rc_biggst[3]
			
 
				+    return box
			
 
				+
			
 
				+
			
 
				+def adjust_item_edge(img_path, reback_json):
			
 
				+    '''
			
 
				+    根据图像的CV分析结果和 模型直接输出结果 对模型输出的边框做微调
			
 
				+    1、外接矩形查找
			
 
				+    2、LSD直线检测 替换方法 霍夫曼直线检测
			
 
				+    3、只处理有把握的情况 任何含有不确定因素的一律不作任何处理
			
 
				+    img_path: 待处理图像绝对路径
			
 
				+    re_json : 模型输出结果
			
 
				+    '''
			
 
				+    debug = 1
			
 
				+    # 存放新的结果
			
 
				+    re_json = copy.deepcopy(reback_json)
			
 
				+    if (not os.path.exists(img_path) or 0 == len(re_json)):
			
 
				+        return
			
 
				+    image = cv2.imread(img_path, 0)
			
 
				+    # 获取CV连通区域结果
			
 
				+    cv_boxes = get_contours(image)
			
 
				+
			
 
				+    if debug:
			
 
				+        print(len(cv_boxes))
			
 
				+        image_draw = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
			
 
				+        # for item in cv_boxes:
			
 
				+        #     cv2.rectangle(image_draw, (item[0], item[1]), (item[2], item[3]), (0, 0, 250), 2)
			
 
				+        # cv2.imwrite(os.path.join(file_dir, "show.jpg"), image_draw)
			
 
				+    # 循环处理指定的box
			
 
				+    for idx, item in enumerate(re_json):
			
 
				+        name = item["class_name"]
			
 
				+        box = [item["bounding_box"]["xmin"], item["bounding_box"]["ymin"], item["bounding_box"]["xmax"],
			
 
				+               item["bounding_box"]["ymax"]]
			
 
				+        # print(name ,box)
			
 
				+        if (name == "alarm_info" or name == "page" or name == "type_score"):
			
 
				+            if debug:
			
 
				+                cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
			
 
				+            new_box = adjust_alarm_info(image, box)
			
 
				+            if debug:
			
 
				+                cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
			
 
				+            item["bounding_box"]["xmin"] = box[0]
			
 
				+            item["bounding_box"]["xmax"] = box[2]
			
 
				+            item["bounding_box"]["ymin"] = box[1]
			
 
				+            item["bounding_box"]["ymax"] = box[3]
			
 
				+        elif (name == "solve" or name == "solve0"
			
 
				+              or name == "cloze" or name == "choice"
			
 
				+              or name == "composition" or name == "composition0"):
			
 
				+            if debug:
			
 
				+                cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
			
 
				+            new_box = adjust_zg_info(image, box, cv_boxes)
			
 
				+            if debug:
			
 
				+                cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
			
 
				+            item["bounding_box"]["xmin"] = box[0]
			
 
				+            item["bounding_box"]["xmax"] = box[2]
			
 
				+            item["bounding_box"]["ymin"] = box[1]
			
 
				+            item["bounding_box"]["ymax"] = box[3]
			
 
				+        else:
			
 
				+            pass
			
 
				+    if debug:
			
 
				+        cv2.imwrite(os.path.join(r"E:\data\aug_img\adjust", "show.jpg"), image_draw)
			
 
				+    return re_json
			
 
				+
			
 
				+
			
 
				+def adjust_item_edge_by_gray_image(image, reback_json):
			
 
				+    '''
			
 
				+    根据图像的CV分析结果和 模型直接输出结果 对模型输出的边框做微调
			
 
				+    1、外接矩形查找
			
 
				+    2、LSD直线检测 替换方法 霍夫曼直线检测
			
 
				+    3、只处理有把握的情况 任何含有不确定因素的一律不作任何处理
			
 
				+    img_path: 待处理图像绝对路径
			
 
				+    re_json : 模型输出结果
			
 
				+    '''
			
 
				+    debug = 0
			
 
				+    re_json = copy.deepcopy(reback_json)
			
 
				+    # 存放新的结果
			
 
				+    # 获取CV连通区域结果
			
 
				+    if len(image.shape) > 2:
			
 
				+        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
			
 
				+    cv_boxes = get_contours(image)
			
 
				+
			
 
				+    if debug:
			
 
				+        print(len(cv_boxes))
			
 
				+        image_draw = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
			
 
				+        # for item in cv_boxes:
			
 
				+        #     cv2.rectangle(image_draw, (item[0], item[1]), (item[2], item[3]), (0, 0, 250), 2)
			
 
				+        # cv2.imwrite(os.path.join(file_dir, "show.jpg"), image_draw)
			
 
				+    # 循环处理指定的box
			
 
				+    for idx, item in enumerate(re_json):
			
 
				+        name = item["class_name"]
			
 
				+        box = [item["bounding_box"]["xmin"], item["bounding_box"]["ymin"], item["bounding_box"]["xmax"],
			
 
				+               item["bounding_box"]["ymax"]]
			
 
				+        # print(name ,box)
			
 
				+        if (name == "alarm_info" or name == "page" or name == "type_score"):
			
 
				+            if debug:
			
 
				+                cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
			
 
				+            new_box = adjust_alarm_info(image, box)
			
 
				+            if debug:
			
 
				+                cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
			
 
				+            item["bounding_box"]["xmin"] = box[0]
			
 
				+            item["bounding_box"]["xmax"] = box[2]
			
 
				+            item["bounding_box"]["ymin"] = box[1]
			
 
				+            item["bounding_box"]["ymax"] = box[3]
			
 
				+        elif (name == "solve" or name == "solve0"
			
 
				+              or name == "cloze" or name == "choice"
			
 
				+              or name == "composition" or name == "composition0"):
			
 
				+            if debug:
			
 
				+                cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
			
 
				+            new_box = adjust_zg_info(image, box, cv_boxes)
			
 
				+            if debug:
			
 
				+                cv2.rectangle(image_draw, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
			
 
				+            item["bounding_box"]["xmin"] = box[0]
			
 
				+            item["bounding_box"]["xmax"] = box[2]
			
 
				+            item["bounding_box"]["ymin"] = box[1]
			
 
				+            item["bounding_box"]["ymax"] = box[3]
			
 
				+        else:
			
 
				+            pass
			
 
				+    if debug:
			
 
				+        cv2.imwrite(os.path.join(r"E:\data\aug_img\adjust", "show.jpg"), image_draw)
			
 
				+    return re_json
			
 
				+
			
 
				+# if __name__ == '__main__':
			
 
				+#     '''服务端传入数据为json内数据 和图像
			
 
				+#     使用方法：
			
 
				+#     new_json = adjust_item_edge(img_path, key_json)
			
 
				+#     key_json : regions 数组
			
 
				+#     new_json : 调整后的结果 size == key_json.size
			
 
				+#     '''
			
 
				+#
			
 
				+#     print("前置解析")
			
 
				+#     file_dir = r"E:\data\aug_img\adjust"
			
 
				+#     img_path = os.path.join(file_dir, "7642572.jpg")
			
 
				+#     json_path = os.path.join(file_dir, "7642572.json")
			
 
				+#     print(img_path, json_path)
			
 
				+#     # 读取json
			
 
				+#     output_ios = open(json_path).read()
			
 
				+#     output_json = json.loads(output_ios)
			
 
				+#     for item in output_json:
			
 
				+#         # print(item,output_json[item])
			
 
				+#         if (item == "regions"):
			
 
				+#             key_json = output_json[item]
			
 
				+#     # print(len(key_json))
			
 
				+#     for idx, item in enumerate(key_json):
			
 
				+#         # print(key_json[idx])
			
 
				+#         if (item["class_name"] == "alarm_info"):
			
 
				+#             key_json[idx]["bounding_box"]["ymin"] -= 10
			
 
				+#             key_json[idx]["bounding_box"]["ymax"] += 10
			
 
				+#         # print(key_json[idx])
			
 
				+#
			
 
				+#     new_json = adjust_item_edge(img_path, key_json)
			
 
				+#     for idx, val in enumerate(key_json):
			
 
				+#         print(key_json[idx])
			
 
				+#         print(new_json[idx])
			
--- a/segment/sheet_resolve/analysis/sheet/sheet_infer.py
+++ b/segment/sheet_resolve/analysis/sheet/sheet_infer.py
@@ -0,0 +1,1192 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : sheet_infer.py
			
 
				+# @Time    : 2019/9/26 0026 上午 10:18
			
 
				+import itertools
			
 
				+import os
			
 
				+import re
			
 
				+import traceback
			
 
				+import xml.etree.cElementTree as ET
			
 
				+from itertools import combinations
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from shapely.geometry import LineString, Polygon
			
 
				+
			
 
				+from segment.sheet_resolve.tools.utils import create_xml, crop_region_direct, crop_region, image_hash_detection_simple
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate
			
 
				+
			
 
				+ASPECT_FLAG = 4.0
			
 
				+REMAIN_RATIO = 0.1
			
 
				+PIX_VALUE_LOW = 15.0
			
 
				+PIX_VALUE_HIGH = 245
			
 
				+TYPE_SCORE_MNS = 0.5
			
 
				+
			
 
				+
			
 
				+def _get_char_near_img(char_location, near):
			
 
				+    left = char_location['left']
			
 
				+    top = char_location['top']
			
 
				+    width = char_location['width']
			
 
				+    height = char_location['height']
			
 
				+
			
 
				+    next_location = char_location
			
 
				+
			
 
				+    if near == 'left':
			
 
				+        next_location = {'left': int(left - 1.5 * width), 'top': top, 'width': width, 'height': height}
			
 
				+    if near == 'right':
			
 
				+        next_location = {'left': int(left + 1.5 * width), 'top': top, 'width': width, 'height': height}
			
 
				+    if near == 'up':
			
 
				+        next_location = {'left': left, 'top': int(top - 1.5 * height), 'width': width, 'height': height}
			
 
				+    if near == 'down':
			
 
				+        next_location = {'left': left, 'top': int(top + 1.5 * height), 'width': width, 'height': height}
			
 
				+
			
 
				+    return next_location
			
 
				+
			
 
				+
			
 
				+def _get_board(image, location, direction):
			
 
				+    std = 0
			
 
				+    next_location = location
			
 
				+    while std < 10:
			
 
				+        next_location = _get_char_near_img(next_location, direction)
			
 
				+        box = (next_location['left'], next_location['top'],
			
 
				+               next_location['left'] + next_location['width'],
			
 
				+               next_location['top'] + next_location['height'],)
			
 
				+        region = crop_region_direct(image, box)
			
 
				+        std = np.var(region)
			
 
				+
			
 
				+    return next_location
			
 
				+
			
 
				+
			
 
				+def infer_bar_code(image, ocr_dict_list, attention_region):
			
 
				+    attention_polygon_list = []
			
 
				+    for attention in attention_region:
			
 
				+        coordinates = attention['bounding_box']
			
 
				+        xmin = coordinates['xmin']
			
 
				+        ymin = coordinates['ymin']
			
 
				+        xmax = coordinates['xmax']
			
 
				+        ymax = coordinates['ymax']
			
 
				+        attention_polygon = Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)])
			
 
				+        attention_polygon_list.append(attention_polygon)
			
 
				+
			
 
				+    img_cols, img_rows = image.shape[0], image.shape[1]
			
 
				+    pattern = r'条形码|条码|条形|形码'
			
 
				+    bar_code_dict_list = []
			
 
				+
			
 
				+    for index, ele in enumerate(ocr_dict_list):
			
 
				+        words = ele['words'].replace(' ', '')
			
 
				+        chars_list = ele['chars']
			
 
				+        length = len(chars_list)
			
 
				+        match_list = [(m.group(), m.span()) for m in re.finditer(pattern, words) if m]
			
 
				+        if match_list:  # 不为空
			
 
				+            for match in match_list:
			
 
				+                start_index = match[1][0]
			
 
				+                end_index = match[1][1] - 1
			
 
				+                for i in range(start_index - 1, -1, -1):
			
 
				+                    xmin_start = chars_list[start_index]['location']['left']
			
 
				+                    start_tmp = chars_list[i]['location']['left'] + 2 * chars_list[i]['location']['width']
			
 
				+                    if xmin_start <= start_tmp:
			
 
				+                        start_index = i
			
 
				+
			
 
				+                for i in range(end_index, length):
			
 
				+                    xmax_end = chars_list[end_index]['location']['left'] + 2 * chars_list[i]['location']['width']
			
 
				+                    end_tmp = chars_list[i]['location']['left']
			
 
				+                    if xmax_end >= end_tmp:
			
 
				+                        end_index = i
			
 
				+
			
 
				+                bar_code_char_xmin = chars_list[start_index]['location']["left"]
			
 
				+                bar_code_char_xmax = chars_list[end_index]['location']["left"]+chars_list[end_index]['location']["width"]
			
 
				+                bar_code_char_ymin = chars_list[start_index]['location']["top"]
			
 
				+                bar_code_char_ymax = chars_list[end_index]['location']["top"]+chars_list[end_index]['location']["height"]
			
 
				+                bar_code_char_polygon = Polygon([(bar_code_char_xmin, bar_code_char_ymin),
			
 
				+                                                 (bar_code_char_xmax, bar_code_char_ymin),
			
 
				+                                                 (bar_code_char_xmax, bar_code_char_ymax),
			
 
				+                                                 (bar_code_char_xmin, bar_code_char_ymax)])
			
 
				+
			
 
				+                contain_cond = [False]*len(attention_polygon_list)
			
 
				+                for i, attention_ele in enumerate(attention_polygon_list):
			
 
				+                    if attention_ele.contains(bar_code_char_polygon):
			
 
				+                        contain_cond[i] = True
			
 
				+
			
 
				+                if True not in contain_cond:  # 条形码文字不在attention里面
			
 
				+                    left_board_location = _get_board(image, chars_list[start_index]['location'], 'left')
			
 
				+                    right_board_location = _get_board(image, chars_list[end_index]['location'], 'right')
			
 
				+                    up_board_location = _get_board(image, chars_list[start_index]['location'], 'up')
			
 
				+                    down_board_location = _get_board(image, chars_list[end_index]['location'], 'down')
			
 
				+
			
 
				+                    xmin = left_board_location['left']
			
 
				+                    ymin = up_board_location['top']
			
 
				+                    xmax = right_board_location['left'] + right_board_location['width']
			
 
				+                    ymax = down_board_location['top'] + down_board_location['height']
			
 
				+
			
 
				+                    xmin = int(xmin) if xmin >= 1 else 1
			
 
				+                    ymin = int(ymin) if ymin >= 1 else 1
			
 
				+                    xmax = int(xmax) if xmax <= img_cols - 1 else img_cols - 1
			
 
				+                    ymax = int(ymax) if ymax <= img_rows - 1 else img_rows - 1
			
 
				+
			
 
				+                    bar_code_dict = {'class_name': 'bar_code',
			
 
				+                                     'bounding_box': {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}}
			
 
				+                    bar_code_dict_list.append(bar_code_dict)
			
 
				+                    # print(bar_code_dict)
			
 
				+                    break  # 默认只有一个条形码
			
 
				+                else:
			
 
				+                    continue
			
 
				+
			
 
				+    # 过滤attention 区域存在条形码的文字
			
 
				+    for bar_code in bar_code_dict_list.copy():
			
 
				+        coordinates = bar_code['bounding_box']
			
 
				+        xmin = coordinates['xmin']
			
 
				+        ymin = coordinates['ymin']
			
 
				+        xmax = coordinates['xmax']
			
 
				+        ymax = coordinates['ymax']
			
 
				+        bar_code_polygon = Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)])
			
 
				+        for attention_polygon in attention_polygon_list:
			
 
				+            cond1 = bar_code_polygon.within(attention_polygon) or bar_code_polygon.contains(attention_polygon)
			
 
				+            cond2 = False
			
 
				+            cond3 = bar_code_polygon.overlaps(attention_polygon)
			
 
				+            if cond3:
			
 
				+                intersection_poly = bar_code_polygon.intersection(attention_polygon)
			
 
				+                cond2 = intersection_poly.area / bar_code_polygon.area >= 0.01
			
 
				+                cond3 = intersection_poly.area / attention_polygon.area >= 0.01
			
 
				+            if cond1 or cond2 or cond3:
			
 
				+                bar_code_dict_list.remove(bar_code)
			
 
				+                break
			
 
				+
			
 
				+    return bar_code_dict_list
			
 
				+
			
 
				+
			
 
				+def infer_exam_number(image, ocr_dict_list, existed_regions, times_threshold=5):
			
 
				+    # existed_polygon_list = []
			
 
				+    # for region in existed_regions:
			
 
				+    #     coordinates = region['bounding_box']
			
 
				+    #     xmin = coordinates['xmin']
			
 
				+    #     ymin = coordinates['ymin']
			
 
				+    #     xmax = coordinates['xmax']
			
 
				+    #     ymax = coordinates['ymax']
			
 
				+    #     existed_polygon = Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)])
			
 
				+    #     existed_polygon_list.append(existed_polygon)
			
 
				+
			
 
				+    img_rows, img_cols = image.shape[0], image.shape[1]
			
 
				+    exam_number_dict_list = []
			
 
				+    xmin, ymin, xmax, ymax = 9999, 9999, 0, 0
			
 
				+    pattern = r'[0oO]|[2-9]'  # 除去1，避免[]被识别为1
			
 
				+    exclude = r'分|题|[ABD]'
			
 
				+    key_digital = []
			
 
				+    all_height = []
			
 
				+    cols = []
			
 
				+    for index, ele in enumerate(ocr_dict_list):
			
 
				+        words = ele['words'].replace(' ', '')
			
 
				+        match_list = [(m.group(), m.span()) for m in re.finditer(pattern, words) if m]
			
 
				+        exclude_list = [(m.group(), m.span()) for m in re.finditer(exclude, words, re.I) if m]
			
 
				+        match_digital_arr = np.asarray([int(char[0].replace('o', '0').replace('O', '0')) for char in match_list])
			
 
				+
			
 
				+        if len(match_digital_arr) > 0:
			
 
				+            counts = np.bincount(match_digital_arr)
			
 
				+            mode_times = np.max(counts)
			
 
				+            if mode_times >= times_threshold and len(exclude_list) < 1:
			
 
				+                mode_value = np.argmax(counts)  # 众数，避免考号末尾出现的其他数字
			
 
				+                key_index = np.where(match_digital_arr == mode_value)[0]
			
 
				+                cols.append(len(key_index))
			
 
				+                start_index = match_list[key_index[0]][1][0]
			
 
				+                end_index = match_list[key_index[-1]][1][0]
			
 
				+
			
 
				+                xmin_t = ele['chars'][start_index]['location']['left']
			
 
				+                ymin_t = ele['chars'][start_index]['location']['top']
			
 
				+                xmax_t = ele['chars'][end_index]['location']['left'] + ele['chars'][end_index]['location']['width']
			
 
				+                ymax_t = ele['chars'][end_index]['location']['top'] + ele['chars'][end_index]['location']['height']
			
 
				+
			
 
				+                mean_width = sum([int(ele['chars'][match_list[i][1][0]]['location']['width'])
			
 
				+                                  for i in key_index]) // len(key_index)
			
 
				+
			
 
				+                mean_height = sum([int(ele['chars'][match_list[i][1][0]]['location']['height'])
			
 
				+                                  for i in key_index]) // len(key_index)
			
 
				+
			
 
				+                all_height.append(mean_height)
			
 
				+
			
 
				+                xmin = min(xmin, xmin_t-mean_width)
			
 
				+                ymin = min(ymin, ymin_t)
			
 
				+                xmax = max(xmax, xmax_t+mean_width)
			
 
				+                ymax = max(ymax, ymax_t)
			
 
				+
			
 
				+                xmin = int(xmin) if xmin >= 1 else 1
			
 
				+                ymin = int(ymin) if ymin >= 1 else 1
			
 
				+                xmax = int(xmax) if xmax <= img_cols - 1 else img_cols - 1
			
 
				+                ymax = int(ymax) if ymax <= img_rows - 1 else img_rows - 1
			
 
				+
			
 
				+                key_digital.append(mode_value)
			
 
				+                if 9 in key_digital:
			
 
				+                    break
			
 
				+
			
 
				+    if 0 in key_digital and 9 in key_digital:
			
 
				+        mean_height = sum(all_height)//10
			
 
				+        exam_number_dict = {'class_name': 'exam_number',
			
 
				+                            'bounding_box': {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax+mean_height},
			
 
				+                            'rows': 10,
			
 
				+                            'cols': max(cols)
			
 
				+                            }
			
 
				+        exam_number_dict_list.append(exam_number_dict)
			
 
				+
			
 
				+        return exam_number_dict_list
			
 
				+    else:
			
 
				+        if len(key_digital) > 1:
			
 
				+            dgt_min = min(key_digital)
			
 
				+            dgt_max = max(key_digital)
			
 
				+            mean_height = sum(all_height)//len(all_height)
			
 
				+            dif = dgt_max - dgt_min
			
 
				+            blank_height = ymax - ymin - mean_height * (dif+1)
			
 
				+            mean_blank = blank_height // dif
			
 
				+
			
 
				+            upper_height = dgt_min * (mean_blank + mean_height) + mean_blank//2
			
 
				+            downward_height = (9-dgt_max) * (mean_blank + mean_height) + mean_blank
			
 
				+            exam_number_dict = {'class_name': 'exam_number',
			
 
				+                                'bounding_box': {'xmin': xmin, 'ymin': ymin-upper_height,
			
 
				+                                                 'xmax': xmax, 'ymax': ymax+downward_height},
			
 
				+                                'rows': 10,
			
 
				+                                'cols': max(cols)}
			
 
				+            exam_number_dict_list.append(exam_number_dict)
			
 
				+
			
 
				+        if len(key_digital) == 1:
			
 
				+            dgt_min = dgt_max = min(key_digital)
			
 
				+            eval_height = sum(all_height)//len(all_height) * 1.5
			
 
				+
			
 
				+            upper_height = dgt_min * eval_height
			
 
				+            downward_height = (9-dgt_max) * eval_height
			
 
				+            exam_number_dict = {'class_name': 'exam_number',
			
 
				+                                'bounding_box': {'xmin': xmin, 'ymin': ymin-upper_height,
			
 
				+                                                 'xmax': xmax, 'ymax': ymax+downward_height},
			
 
				+                                'rows': 10,
			
 
				+                                'cols': max(cols)}
			
 
				+            exam_number_dict_list.append(exam_number_dict)
			
 
				+
			
 
				+        iou_cond = True
			
 
				+        exam_number_dict_list_check = []
			
 
				+        for exam_number_dict in exam_number_dict_list:
			
 
				+            exam_number_polygon = Polygon([(exam_number_dict["xmin"], exam_number_dict["ymin"]),
			
 
				+                                           (exam_number_dict["xmax"], exam_number_dict["ymin"]),
			
 
				+                                           (exam_number_dict["xmax"], exam_number_dict["ymax"]),
			
 
				+                                           (exam_number_dict["xmin"], exam_number_dict["ymax"])])
			
 
				+            for region in existed_regions:
			
 
				+                class_name = region["class_name"]
			
 
				+
			
 
				+                if class_name in ["attention", "solve", "choice", "choice_m", 'choice_s', "cloze", 'cloze_s',
			
 
				+                                  'bar_code', 'qr_code', 'composition', 'solve0']:
			
 
				+                    coordinates = region['bounding_box']
			
 
				+                    xmin = coordinates['xmin']
			
 
				+                    ymin = coordinates['ymin']
			
 
				+                    xmax = coordinates['xmax']
			
 
				+                    ymax = coordinates['ymax']
			
 
				+                    existed_polygon = Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)])
			
 
				+                    overlab_area = existed_polygon.intersection(exam_number_polygon).area
			
 
				+                    iou = overlab_area / (exam_number_polygon.area + existed_polygon.area - overlab_area)
			
 
				+                    if iou > 0:
			
 
				+                        iou_cond = False
			
 
				+                        break
			
 
				+            if iou_cond:
			
 
				+                exam_number_dict_list_check.append(exam_number_polygon)
			
 
				+
			
 
				+        return exam_number_dict_list_check
			
 
				+
			
 
				+
			
 
				+def adjust_exam_number(regions):
			
 
				+    exam_number_w_regions = list()
			
 
				+    exam_number_regions = list()
			
 
				+    for i in range(len(regions) - 1, -1, -1):
			
 
				+        region = regions[i]
			
 
				+        if region['class_name'] == 'exam_number_w':
			
 
				+            exam_number_w_regions.append(region)
			
 
				+        if region['class_name'] == 'exam_number':
			
 
				+            exam_number_regions.append(region)
			
 
				+            regions.pop(i)
			
 
				+
			
 
				+    exam_number_region = exam_number_regions[0]
			
 
				+    if len(exam_number_regions) > 1:
			
 
				+        exam_number_regions = sorted(exam_number_regions, key=lambda x: x['bounding_box']['ymin'])
			
 
				+        exam_number_region = exam_number_regions[0]
			
 
				+
			
 
				+    exam_number_w_index = 0
			
 
				+    if len(exam_number_w_regions) > 1:
			
 
				+        distance = [abs(int(ele['bounding_box']['ymax']) - int(exam_number_region['bounding_box']['ymin ']))
			
 
				+                    for ele in exam_number_w_regions]
			
 
				+        exam_number_w_index = distance.index(min(distance))
			
 
				+    exam_number_w_region = exam_number_w_regions[exam_number_w_index]
			
 
				+
			
 
				+    standard = exam_number_w_region['bounding_box']
			
 
				+    exam_number_region['bounding_box'].update({'xmin': standard['xmin'], 'xmax': standard['xmax']})
			
 
				+    regions.append(exam_number_region)
			
 
				+
			
 
				+    return regions
			
 
				+
			
 
				+
			
 
				+def exam_number_infer_by_s(image, regions):
			
 
				+    exam_number_s_list = [ele for ele in regions if ele['class_name'] == 'exam_number_s'
			
 
				+                          and (int(ele['bounding_box']['xmax'])-int(ele['bounding_box']['xmin']) <
			
 
				+                               int(ele['bounding_box']['ymax'])-int(ele['bounding_box']['ymin']))]
			
 
				+    # 找边界
			
 
				+    exam_number_s_list = sorted(exam_number_s_list, key=lambda x: x['bounding_box']['xmin'])
			
 
				+
			
 
				+    left_limit = exam_number_s_list[0]['bounding_box']['xmin']
			
 
				+    right_limit = exam_number_s_list[-1]['bounding_box']['xmax']
			
 
				+
			
 
				+    left_image = crop_region(image, exam_number_s_list[0]['bounding_box'])
			
 
				+    right_image = crop_region(image, exam_number_s_list[-1]['bounding_box'])
			
 
				+
			
 
				+    mean_width = sum([int(ele['bounding_box']['xmax'])-int(ele['bounding_box']['xmin'])
			
 
				+                      for ele in exam_number_s_list]) // len(exam_number_s_list)
			
 
				+    top_limit = min([ele['bounding_box']['ymin'] for ele in exam_number_s_list])
			
 
				+    bottom_limit = max([ele['bounding_box']['ymax'] for ele in exam_number_s_list])
			
 
				+
			
 
				+    left_infer = True
			
 
				+    while left_infer:
			
 
				+        infer_box_xmin = int(left_limit - 1.5*mean_width)
			
 
				+        infer_box_xmax = int(left_limit - 0.5*mean_width)
			
 
				+        infer_box_ymin = int(exam_number_s_list[0]['bounding_box']['ymin'])
			
 
				+        infer_box_ymax = int(exam_number_s_list[0]['bounding_box']['ymax'])
			
 
				+
			
 
				+        infer_image = crop_region_direct(image, [infer_box_xmin, infer_box_ymin, infer_box_xmax, infer_box_ymax])
			
 
				+
			
 
				+        simi = image_hash_detection_simple(left_image, infer_image)
			
 
				+        print('l:', simi)
			
 
				+        if simi >= 0.85:
			
 
				+            left_limit = infer_box_xmin
			
 
				+        else:
			
 
				+            left_infer = False
			
 
				+
			
 
				+    right_infer = True
			
 
				+    while right_infer:
			
 
				+        infer_box_xmin = int(right_limit + 0.5 * mean_width)
			
 
				+        infer_box_xmax = int(right_limit + 1.5 * mean_width)
			
 
				+        infer_box_ymin = int(exam_number_s_list[-1]['bounding_box']['ymin'])
			
 
				+        infer_box_ymax = int(exam_number_s_list[-1]['bounding_box']['ymax'])
			
 
				+
			
 
				+        infer_image = crop_region_direct(image, [infer_box_xmin, infer_box_ymin, infer_box_xmax, infer_box_ymax])
			
 
				+
			
 
				+        simi = image_hash_detection_simple(right_image, infer_image)
			
 
				+        print('r:', simi)
			
 
				+        if simi >= 0.70:
			
 
				+            right_limit = infer_box_xmax
			
 
				+        else:
			
 
				+            right_infer = False
			
 
				+
			
 
				+    infer_exam_number_region = {'xmin': left_limit, 'xmax': right_limit, 'ymin': top_limit, 'ymax': bottom_limit, }
			
 
				+    exam_dict_list = [{'class_name': 'exam_number', 'bounding_box': infer_exam_number_region}]
			
 
				+    # print(exam_dict_list)
			
 
				+    return exam_dict_list
			
 
				+
			
 
				+
			
 
				+def gen_xml_new(path, ocr_list):
			
 
				+    tree = ET.parse(r'../../tools/000000-template.xml')  # xml tree
			
 
				+    for index, ele in enumerate(ocr_list):
			
 
				+        words = ele['words']
			
 
				+        location = ele['location']
			
 
				+        xmin = location['xmin']
			
 
				+        ymin = location['ymin']
			
 
				+        xmax = location['xmax']
			
 
				+        ymax = location['ymax']
			
 
				+
			
 
				+        tree = create_xml('{}'.format(words), tree, str(xmin), str(ymin), str(xmax), str(ymax))
			
 
				+        # print(exam_items_bbox)
			
 
				+    tree.write(path.replace('.jpg', '.xml'))
			
 
				+
			
 
				+
			
 
				+def subfield_answer_sheet(img0, answer_sheet):
			
 
				+    h, w = img0.shape[:2]
			
 
				+    one_part = 0
			
 
				+    line_xmax_1 = 0
			
 
				+    line_xmax_2 = 0
			
 
				+    modules = []
			
 
				+    modules11 = []
			
 
				+    w_int_1 = w
			
 
				+    w_int_2 = round(w / 2)
			
 
				+    w_int_3 = round(w / 3)
			
 
				+    w_int_4 = round(w / 4)
			
 
				+    w_int_8 = round(w / 8)
			
 
				+    if w_int_8 < 50:
			
 
				+        w_int_8 = 50
			
 
				+
			
 
				+    key_modules_classes = ['choice', 'cloze', 'solve', 'solve0', 'composition0', 'composition', 'correction',
			
 
				+                           'ban_area', ]
			
 
				+    if h > w:  # 暂定答题卡高大于宽的为单栏
			
 
				+        one_part = 1
			
 
				+    else:
			
 
				+        temp1 = 0
			
 
				+        temp2 = 0
			
 
				+        for ele in answer_sheet:
			
 
				+            if ele["class_name"] in key_modules_classes:
			
 
				+                modules.append(ele)
			
 
				+        modules_xmin = sorted(modules, key=lambda x: (x['bounding_box']['xmin']))
			
 
				+        modules_xmax = sorted(modules, key=lambda x: (x['bounding_box']['xmax']))
			
 
				+        for i in range(len(modules_xmin) - 1):
			
 
				+            if i == 0 and modules_xmin[0]['bounding_box']['xmin'] - 0 > w_int_4:
			
 
				+                temp1 = 1
			
 
				+            else:
			
 
				+                if modules_xmin[i + 1]['bounding_box']['xmin'] - modules_xmin[i]['bounding_box']['xmax'] > w_int_4:
			
 
				+                    if modules11 == []:
			
 
				+                        line_xmax_1 = modules_xmin[i]['bounding_box']['xmax'] + 20
			
 
				+                        line_xmax_2 = modules_xmin[i + 1]['bounding_box']['xmin'] - 20
			
 
				+                    else:
			
 
				+                        modules11.append(modules_xmin[i]['bounding_box']['xmax'])
			
 
				+                        modules11_xmax = sorted(modules11)[-1]
			
 
				+                        line_xmax_1 = modules11_xmax + 20
			
 
				+                        line_xmax_2 = modules_xmin[i + 1]['bounding_box']['xmin'] - 20
			
 
				+                        modules11 = []
			
 
				+                    temp1 = 1
			
 
				+                    temp2 = 1
			
 
				+                    break
			
 
				+                elif modules_xmin[i + 1]['bounding_box']['xmin'] - modules_xmin[i]['bounding_box']['xmax'] > -w_int_8:
			
 
				+                    if temp1 == 0:
			
 
				+                        if modules11 == []:
			
 
				+                            line_xmax_1 = int((modules_xmin[i + 1]['bounding_box']['xmin'] +
			
 
				+                                               modules_xmin[i]['bounding_box']['xmax']) / 2)
			
 
				+                        else:
			
 
				+                            modules11.append(modules_xmin[i]['bounding_box']['xmax'])
			
 
				+                            modules11_xmax = sorted(modules11)[-1]
			
 
				+                            line_xmax_1 = int((modules_xmin[i + 1]['bounding_box']['xmin'] +
			
 
				+                                               modules11_xmax) / 2)
			
 
				+                            modules11 = []
			
 
				+                        temp1 = 1
			
 
				+                    elif temp1 == 1:
			
 
				+                        if modules11 == []:
			
 
				+                            line_xmax_2 = int((modules_xmin[i + 1]['bounding_box']['xmin'] +
			
 
				+                                               modules_xmin[i]['bounding_box']['xmax']) / 2)
			
 
				+                        else:
			
 
				+                            modules11.append(modules_xmin[i]['bounding_box']['xmax'])
			
 
				+                            modules11_xmax = sorted(modules11)[-1]
			
 
				+                            line_xmax_2 = int((modules_xmin[i + 1]['bounding_box']['xmin'] +
			
 
				+                                               modules11_xmax) / 2)
			
 
				+                        temp2 = 1
			
 
				+                else:
			
 
				+                    modules11.append(modules_xmin[i]['bounding_box']['xmax'])
			
 
				+
			
 
				+        if temp1 == 0 and temp2 == 0:
			
 
				+            if modules_xmax[-1]['bounding_box']['xmax'] - w < -(2 * w_int_4):
			
 
				+                line_xmax_1 = modules_xmax[-1]['bounding_box']['xmax'] + 20
			
 
				+                line_xmax_2 = 2 * w_int_3
			
 
				+            elif modules_xmax[-1]['bounding_box']['xmax'] - w < -w_int_4:
			
 
				+                line_xmax_1 = modules_xmax[-1]['bounding_box']['xmax'] + 20
			
 
				+        elif temp1 == 1 and temp2 == 0:
			
 
				+            if modules_xmax[-1]['bounding_box']['xmax'] - w < -w_int_4:
			
 
				+                line_xmax_2 = 2 * w_int_3
			
 
				+
			
 
				+    return line_xmax_1, line_xmax_2
			
 
				+
			
 
				+
			
 
				+def get_intersection_point(lines, orthogonal_lines, border):
			
 
				+    intersect_point_list = []
			
 
				+    for line in lines:
			
 
				+        width_min, height_min, width_max, height_max = border
			
 
				+        (x_l, y_u), (x_r, y_d) = line.coords
			
 
				+
			
 
				+        x_l = x_l if x_l > width_min else width_min + 1  # 避免边界
			
 
				+        x_r = x_r if x_r < width_max else width_max - 1
			
 
				+        y_u = y_u if y_u > height_min else height_min + 1
			
 
				+        y_d = y_d if y_d < height_max else height_max - 1
			
 
				+
			
 
				+        points_list = []
			
 
				+        if x_l == x_r:
			
 
				+            line_direction = 'lon'
			
 
				+            raw_line = LineString([(x_l, y_u), (x_r, y_d)])
			
 
				+            extend_line = LineString([(x_l, height_min), (x_r, height_max)])
			
 
				+            points_list.extend([height_min + 1, height_max - 1])  # 延长线与边界交点，并避免key_point位于现有边界上
			
 
				+            line_start, line_end = y_u, y_d
			
 
				+        else:
			
 
				+            line_direction = 'lat'
			
 
				+            raw_line = LineString([(x_l, y_u), (x_r, y_d)])
			
 
				+            extend_line = LineString([(width_min, y_u), (width_max, y_d)])
			
 
				+            points_list.extend([width_min + 1, width_max - 1])  # 延长线与边界交点，并避免key_point位于现有边界上
			
 
				+            line_start, line_end = x_l, x_r
			
 
				+
			
 
				+        for ele in orthogonal_lines:
			
 
				+            cond1 = extend_line.intersects(ele)  # T, L, 十交叉
			
 
				+            cond2 = extend_line.crosses(ele)  # 十字交叉
			
 
				+            cond3 = raw_line.intersects(ele)
			
 
				+            cond4 = raw_line.crosses(ele)
			
 
				+            if line_direction == 'lat':
			
 
				+                if cond3:
			
 
				+                    (xp, yp) = raw_line.intersection(ele).bounds[:2]
			
 
				+                    intersect_point_list.append((xp, yp))
			
 
				+                elif cond1:
			
 
				+                    (xp, yp) = extend_line.intersection(ele).bounds[:2]
			
 
				+                    points_list.append(xp)
			
 
				+
			
 
				+            if line_direction == 'lon':
			
 
				+                if cond3:
			
 
				+                    (xp, yp) = raw_line.intersection(ele).bounds[:2]
			
 
				+                    intersect_point_list.append((xp, yp))
			
 
				+                elif cond1:
			
 
				+                    (xp, yp) = extend_line.intersection(ele).bounds[:2]
			
 
				+                    points_list.append(yp)
			
 
				+
			
 
				+        points_array = np.asarray(points_list, dtype=np.uint)
			
 
				+
			
 
				+        left_key = np.max(points_array[points_array <= line_start])
			
 
				+        right_key = np.min(points_array[points_array >= line_end])  # 延长线两边延长并取得第一个交点
			
 
				+
			
 
				+        if line_direction == 'lat':
			
 
				+            intersect_point = [(left_key, y_u), (right_key, y_d)]
			
 
				+        else:
			
 
				+            intersect_point = [(x_l, left_key), (x_r, right_key)]
			
 
				+
			
 
				+        # print(intersect_point)
			
 
				+        intersect_point_list.extend(intersect_point)
			
 
				+
			
 
				+    return intersect_point_list
			
 
				+
			
 
				+
			
 
				+def infer_sheet_box(image, sheet_dict, lon_split_line, exclude_classes):
			
 
				+    height_max, width_max = image.shape[0], image.shape[1]
			
 
				+    height_min, width_min = 0, 0
			
 
				+    latitude = []
			
 
				+    longitude = []
			
 
				+    lines = []
			
 
				+    sheet_polygons = []
			
 
				+    all_sheet_polygons = []
			
 
				+    choice_polygon = []
			
 
				+    # exclude_classes = ['cloze_s', 'exam_number_s', 'choice_s', 'type_score',
			
 
				+    #                    'mark', 'page', 'exam_number_s', 'cloze_score', 'name_w',
			
 
				+    #                    'class_w',]
			
 
				+
			
 
				+    h_min = []
			
 
				+    h_max = []
			
 
				+    for index, region_box in enumerate(sheet_dict):
			
 
				+        coordinates = region_box['bounding_box']
			
 
				+        xmin = coordinates['xmin']
			
 
				+        ymin = coordinates['ymin']
			
 
				+        xmax = coordinates['xmax']
			
 
				+        ymax = coordinates['ymax']
			
 
				+
			
 
				+        if region_box['class_name'] == 'info_title':  # 上限
			
 
				+            h_min.append(ymin)
			
 
				+        if region_box['class_name'] == 'page':  # 下限
			
 
				+            h_max.append(ymin)
			
 
				+        if region_box['class_name'] == 'alarm_info':
			
 
				+            h_min.append(ymin)
			
 
				+            h_max.append(ymin)
			
 
				+
			
 
				+    if h_min:
			
 
				+        hgt_min = min(h_min)
			
 
				+        if hgt_min < height_max / 4:
			
 
				+            height_min = hgt_min
			
 
				+    if h_max:
			
 
				+        hgt_max = max(h_max)
			
 
				+        if hgt_max > 3 * height_max / 4:
			
 
				+            height_max = hgt_max
			
 
				+
			
 
				+    # height_min = h_min if h_min != 9999 else height_min
			
 
				+    # height_max = h_max if h_max != 0 else height_max
			
 
				+    for index, region_box in enumerate(sheet_dict):
			
 
				+        coordinates = region_box['bounding_box']
			
 
				+        xmin = coordinates['xmin']
			
 
				+        ymin = coordinates['ymin']
			
 
				+        xmax = coordinates['xmax']
			
 
				+        ymax = coordinates['ymax']
			
 
				+        box_polygon = Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)])
			
 
				+
			
 
				+        if region_box['class_name'] not in exclude_classes:
			
 
				+            if region_box['class_name'] not in ['choice', 'cloze']:  # 推断选择题区域内的choice_m
			
 
				+                sheet_polygons.append(box_polygon)
			
 
				+            if region_box['class_name'] == 'choice':
			
 
				+                choice_polygon.append(box_polygon)
			
 
				+            all_sheet_polygons.append(box_polygon)
			
 
				+            line1 = LineString([(xmin, ymin), (xmin, ymax)])
			
 
				+            line2 = LineString([(xmax, ymin), (xmax, ymax)])
			
 
				+            line3 = LineString([(xmin, ymin), (xmax, ymin)])
			
 
				+            line4 = LineString([(xmin, ymax), (xmax, ymax)])
			
 
				+
			
 
				+            lines.extend([line1, line2, line3, line4])
			
 
				+
			
 
				+            longitude.extend([line1, line2])
			
 
				+            latitude.extend([line3, line4])
			
 
				+
			
 
				+    # sheet_polygons 去除包裹的情况
			
 
				+    sheet_polygons_ = list(combinations(sheet_polygons, 2))
			
 
				+    for polygons in sheet_polygons_:
			
 
				+        if polygons[0].within(polygons[1]) or polygons[0].contains(polygons[1]):
			
 
				+            area_list = [polygons[0].area, polygons[1].area]
			
 
				+            min_polygon = polygons[area_list.index(min(area_list))]
			
 
				+            if min_polygon in sheet_polygons:
			
 
				+                sheet_polygons.remove(min_polygon)
			
 
				+
			
 
				+    min_polygon = sorted(all_sheet_polygons, key=lambda p: p.area)[0]
			
 
				+    avg_area = sum([polygon.area for polygon in sheet_polygons]) / len(sheet_polygons)
			
 
				+
			
 
				+    # 所有矩形框的延长线与矩形框集图像边界的交点
			
 
				+    latitude = sorted(latitude, key=lambda x: x.bounds[1])  # y
			
 
				+    longitude = sorted(longitude, key=lambda x: x.bounds[0])  # x
			
 
				+
			
 
				+    lat_intersect_point_list = get_intersection_point(latitude, longitude,
			
 
				+                                                      (width_min, height_min, width_max, height_max))
			
 
				+    lon_intersect_point_list = get_intersection_point(longitude, latitude,
			
 
				+                                                      (width_min, height_min, width_max, height_max))
			
 
				+
			
 
				+    raw_corner = [(width_min + 1, height_min + 1), (width_min + 1, height_max - 1), (width_max - 1, 1),
			
 
				+                  (width_max - 1, height_max - 1)]
			
 
				+    # raw_corner = []
			
 
				+    intersect_point_list = lat_intersect_point_list + lon_intersect_point_list + raw_corner
			
 
				+    intersect_point_list = list(set(intersect_point_list))
			
 
				+    intersect_point_dict = {k: index + 1 for index, k in enumerate(intersect_point_list)}
			
 
				+
			
 
				+    def _filter_rect(p_list):
			
 
				+        flag = 0
			
 
				+        for ele in p_list:
			
 
				+            try:
			
 
				+                flag = intersect_point_dict[ele]
			
 
				+            except KeyError:
			
 
				+                flag = 0
			
 
				+                break
			
 
				+        if flag > 0:
			
 
				+            x_c = sum([ele[0] for ele in p_list]) / 4
			
 
				+            y_c = sum([ele[1] for ele in p_list]) / 4
			
 
				+            d1, d2, d3, d4 = [LineString([p, (x_c, y_c)]).length for p in p_list]
			
 
				+            return (0 not in [d1, d2, d3, d4]) and d1 == d2 and d1 == d3 and d1 == d4
			
 
				+        else:
			
 
				+            return False
			
 
				+
			
 
				+    def _find_rect(point):
			
 
				+        (x1, y1) = point[0]
			
 
				+        (x2, y2) = point[1]
			
 
				+        if x1 != x2 and y1 != y2:
			
 
				+            xmin, ymin = min(x1, x2), min(y1, y2)
			
 
				+            xmax, ymax = max(x1, x2), max(y1, y2)
			
 
				+
			
 
				+            points_4 = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
			
 
				+            w, h = xmax - xmin, ymax - ymin
			
 
				+            aspect_flag_extreme = max(w / h, h / w) < 1.5 * ASPECT_FLAG  # 解决极端情况
			
 
				+            rect_flag = _filter_rect(points_4)
			
 
				+            if aspect_flag_extreme and rect_flag:
			
 
				+                gen_polygon = Polygon([(points_4[0]), (points_4[1]), (points_4[2]), (points_4[3])])
			
 
				+                flags = set()
			
 
				+                for polygon in sheet_polygons:
			
 
				+                    decision = [gen_polygon.contains(polygon),
			
 
				+                                gen_polygon.within(polygon),
			
 
				+                                gen_polygon.overlaps(polygon)]
			
 
				+                    if True in decision:  # 边界问题
			
 
				+                        flags.add(False)
			
 
				+                        break
			
 
				+                    else:
			
 
				+                        flags.add(True)
			
 
				+                if False in flags:
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    return gen_polygon
			
 
				+
			
 
				+    def _filter_none(p):
			
 
				+        if p is not None:
			
 
				+            return True
			
 
				+
			
 
				+    points_2 = combinations(intersect_point_list, 2)
			
 
				+    gen_polygon_list = map(_find_rect, points_2)
			
 
				+
			
 
				+    gen_polygon_list = list(filter(_filter_none, gen_polygon_list))
			
 
				+    gen_polygon_list = sorted(gen_polygon_list, key=lambda p: p.area, reverse=True)
			
 
				+    # gen_polygon_list = [polygon for index, polygon in enumerate(gen_polygon_list) if index % 2 == 0]
			
 
				+    it = itertools.groupby(gen_polygon_list)
			
 
				+    gen_polygon_list = [k for k, g in it]
			
 
				+
			
 
				+    # 在选择题区域的infer polygon
			
 
				+    gen_choice = []
			
 
				+    for ele in gen_polygon_list:
			
 
				+        for choice_p in choice_polygon:
			
 
				+            if ele.within(choice_p):
			
 
				+                gen_choice.append(ele)
			
 
				+
			
 
				+    sheet_box_area = sum([polygon.area for polygon in sheet_polygons])
			
 
				+    image_area = width_max * height_max
			
 
				+    blank_ratio = 1 - sheet_box_area / image_area
			
 
				+
			
 
				+    polygon_index = 0
			
 
				+    include_polygon = []
			
 
				+    while blank_ratio > REMAIN_RATIO and polygon_index < len(gen_polygon_list):
			
 
				+        polygon = gen_polygon_list[polygon_index]
			
 
				+        blank_ratio = blank_ratio - polygon.area / image_area
			
 
				+        include_polygon.append(polygon)
			
 
				+        polygon_index += 1
			
 
				+
			
 
				+    # gen_polygon_list = [polygon for index, polygon in enumerate(gen_polygon_list)
			
 
				+    #                     if polygon.area > 1.5 * min_polygon.area]
			
 
				+
			
 
				+    for polygon in gen_polygon_list.copy():
			
 
				+        xi, yi, xx, yx = polygon.bounds
			
 
				+        w, h = xx - xi, yx - yi
			
 
				+        if polygon.area <= 1.5 * min_polygon.area or h / w > 2 and polygon.area < avg_area:
			
 
				+            gen_polygon_list.remove(polygon)
			
 
				+
			
 
				+    polygon_2 = list(combinations(gen_polygon_list, 2))
			
 
				+    for polygons in polygon_2:
			
 
				+        try:
			
 
				+            cond2 = polygons[0].overlaps(polygons[1])  # 叠置关系二次分段
			
 
				+            if cond2:
			
 
				+                area_list = [polygons[0].area, polygons[1].area]
			
 
				+                min_index = area_list.index(min(area_list))
			
 
				+                smaller_polygon = polygons[min_index]
			
 
				+                larger_polygon = polygons[1 - min_index]
			
 
				+                new_polygon = smaller_polygon.difference(larger_polygon)
			
 
				+
			
 
				+                if smaller_polygon in gen_polygon_list:
			
 
				+                    gen_polygon_list.remove(smaller_polygon)
			
 
				+                    if 'MultiPolygon' in str(type(new_polygon)):
			
 
				+                        for ele in new_polygon:
			
 
				+                            xm, ym, xx, yx = ele.bounds
			
 
				+                            w, h = xx - xm, yx - ym
			
 
				+                            if max(w / h, h / w) < 1.5 * ASPECT_FLAG and ele.area > 1.5 * min_polygon.area:
			
 
				+                                gen_polygon_list.append(ele)
			
 
				+                    elif len(set(new_polygon.exterior.coords)) == 4:
			
 
				+                        xm, ym, xx, yx = new_polygon.bounds
			
 
				+                        w, h = xx - xm, yx - ym
			
 
				+                        if max(w / h, h / w) < 1.5 * ASPECT_FLAG and new_polygon.area > 1.5 * min_polygon.area:
			
 
				+                            gen_polygon_list.append(new_polygon)
			
 
				+        except Exception as polygon_e:
			
 
				+            print(polygon_e)
			
 
				+            continue
			
 
				+
			
 
				+    polygon_2 = list(combinations(gen_polygon_list, 2))  # 包含关系取大值
			
 
				+    for polygons in polygon_2:
			
 
				+        cond1 = polygons[0].equals(polygons[1])
			
 
				+        if cond1 and polygons[1] in gen_polygon_list:
			
 
				+            gen_polygon_list.remove(polygons[1])
			
 
				+
			
 
				+    polygon_2 = list(combinations(gen_polygon_list, 2))
			
 
				+    for polygons in polygon_2:
			
 
				+        cond2 = polygons[0].contains(polygons[1]) or polygons[0].within(polygons[1])
			
 
				+        if cond2:
			
 
				+            area_list = [polygons[0].area, polygons[1].area]
			
 
				+            min_index = area_list.index(min(area_list))
			
 
				+
			
 
				+            smaller_polygon = polygons[min_index]
			
 
				+            larger_polygon = polygons[1 - min_index]
			
 
				+            sxi, syi, sxx, syx = smaller_polygon.bounds
			
 
				+            bxi, byi, bxx, byx = larger_polygon.bounds
			
 
				+            # inner_touch_cond = '212F11FF2' == larger_polygon.relate(smaller_polygon)
			
 
				+            two_side_touch_cond = (sxi == bxi and sxx == bxx) or (syi == byi and syx == byx)
			
 
				+            if two_side_touch_cond:
			
 
				+                dif_polygon = larger_polygon.difference(smaller_polygon)
			
 
				+                if larger_polygon in gen_polygon_list:
			
 
				+                    gen_polygon_list.remove(larger_polygon)
			
 
				+                if 'MultiPolygon' in str(type(dif_polygon)):
			
 
				+                    for ele in dif_polygon:
			
 
				+                        xm, ym, xx, yx = ele.bounds
			
 
				+                        w, h = xx - xm, yx - ym
			
 
				+                        if max(w / h, h / w) < 1.5 * ASPECT_FLAG and ele.area > 1.5 * min_polygon.area:
			
 
				+                            gen_polygon_list.append(ele)
			
 
				+                elif len(set(dif_polygon.exterior.coords)) == 4:  # empty
			
 
				+                    xm, ym, xx, yx = dif_polygon.bounds
			
 
				+                    w, h = xx - xm, yx - ym
			
 
				+                    if max(w / h, h / w) < 1.5 * ASPECT_FLAG and dif_polygon.area > 1.5 * min_polygon.area:
			
 
				+                        gen_polygon_list.append(dif_polygon)
			
 
				+            else:
			
 
				+                if smaller_polygon in gen_polygon_list:
			
 
				+                    gen_polygon_list.remove(smaller_polygon)
			
 
				+
			
 
				+    polygon_2 = list(combinations(gen_polygon_list, 2))  # 包含关系取大值
			
 
				+    for polygons in polygon_2:
			
 
				+        cond1 = polygons[0].equals(polygons[1])
			
 
				+        if cond1 and polygons[1] in gen_polygon_list:
			
 
				+            gen_polygon_list.remove(polygons[1])
			
 
				+
			
 
				+    if len(lon_split_line) > 0:
			
 
				+        for line in lon_split_line:
			
 
				+            # line = LineString([(286, 1), (286, 599)])
			
 
				+            for poly in gen_polygon_list.copy():
			
 
				+                cond1 = line.intersects(poly)
			
 
				+                cond2 = line.touches(poly)
			
 
				+                if cond1 and not cond2:
			
 
				+                    dif_polygons = poly.difference(line)
			
 
				+                    corner_list = list(set(dif_polygons.exterior.coords))
			
 
				+                    sorted_corner_list = sorted(corner_list, key=lambda x: x[0])
			
 
				+                    if len(sorted_corner_list) == 6:
			
 
				+                        left = sorted(sorted_corner_list[0:2], key=lambda x: x[1])
			
 
				+                        middle = sorted(sorted_corner_list[2:4], key=lambda x: x[1])
			
 
				+                        right = sorted(sorted_corner_list[4:6], key=lambda x: x[1])
			
 
				+
			
 
				+                        tmp_corner_list = [middle[0], left[0], left[1], middle[1], right[1], right[0], middle[0]]
			
 
				+
			
 
				+                        polygon1 = Polygon(tmp_corner_list[:4])
			
 
				+                        polygon2 = Polygon(tmp_corner_list[3:])
			
 
				+
			
 
				+                        gen_polygon_list.remove(poly)
			
 
				+                        for p in [polygon1, polygon2]:
			
 
				+                            xi, yi, xx, yx = p.bounds
			
 
				+                            w, h = xx - xi, yx - yi
			
 
				+                            aspect_flag = max(w / h, h / w) < ASPECT_FLAG
			
 
				+                            if aspect_flag:
			
 
				+                                gen_polygon_list.append(p)
			
 
				+
			
 
				+    gen_polygon_list = [polygon for index, polygon in enumerate(gen_polygon_list) if polygon.area > min_polygon.area]
			
 
				+
			
 
				+    if gen_choice:
			
 
				+        gen_choice = sorted(gen_choice, key=lambda x: x.area)[-1]
			
 
				+        gen_polygon_list.append(gen_choice)
			
 
				+    return gen_polygon_list
			
 
				+
			
 
				+
			
 
				+def infer_class(image, sheet_dict_list, infer_polygon, image_cols, ocr_dict_list=''):
			
 
				+    res = []
			
 
				+    all_type_score_polygon = []
			
 
				+    all_choice_polygon = []
			
 
				+    all_cloze_polygon = []
			
 
				+    all_solve_polygon = []
			
 
				+    all_choice_s_width = []
			
 
				+    for region_box in sheet_dict_list:
			
 
				+        if region_box['class_name'] in ['type_score', 'choice', 'cloze', 'solve', 'choice_s']:
			
 
				+            coordinates = region_box['bounding_box']
			
 
				+            xmin = coordinates['xmin']
			
 
				+            ymin = coordinates['ymin']
			
 
				+            xmax = coordinates['xmax']
			
 
				+            ymax = coordinates['ymax']
			
 
				+            box_polygon = Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)])
			
 
				+            if region_box['class_name'] == 'type_score':
			
 
				+                all_type_score_polygon.append(box_polygon)
			
 
				+
			
 
				+            if region_box['class_name'] == 'choice':
			
 
				+                all_choice_polygon.append(box_polygon)
			
 
				+
			
 
				+            if region_box['class_name'] == 'cloze':
			
 
				+                all_cloze_polygon.append(box_polygon)
			
 
				+
			
 
				+            if region_box['class_name'] == 'solve':
			
 
				+                all_solve_polygon.append(box_polygon)
			
 
				+
			
 
				+            if region_box['class_name'] == 'choice_s':
			
 
				+                all_choice_s_width.append(int(xmax)-int(xmin))
			
 
				+
			
 
				+    for poly in infer_polygon.copy():  # infer type_score solve
			
 
				+        p_xmin, p_ymin, p_xmax, p_ymax = poly.bounds
			
 
				+        type_score_num = 0
			
 
				+        type_score_ymin = []
			
 
				+        for type_score_polygon in all_type_score_polygon:
			
 
				+            cond1 = type_score_polygon.within(poly)
			
 
				+            cond2 = False
			
 
				+            cond3 = type_score_polygon.overlaps(poly)
			
 
				+            if cond3:
			
 
				+                intersection_poly = type_score_polygon.intersection(poly)
			
 
				+                d1 = intersection_poly.area / type_score_polygon.area >= TYPE_SCORE_MNS
			
 
				+                print('type_score:', intersection_poly.area / type_score_polygon.area)
			
 
				+                d2 = type_score_polygon.area < 0.2 * poly.area
			
 
				+                cond2 = d1 and d2
			
 
				+
			
 
				+            if cond1 or cond2:
			
 
				+                type_score_num += 1
			
 
				+                t_xmin, t_ymin, t_xmax, t_ymax = type_score_polygon.bounds
			
 
				+                type_score_ymin.append(t_ymin)
			
 
				+                t_height = t_ymax - t_ymin
			
 
				+                if t_ymin - p_ymin > 3 * t_height:
			
 
				+                    type_score_num += 1
			
 
				+                    type_score_ymin.append(p_ymin)
			
 
				+
			
 
				+        if type_score_num == 1:
			
 
				+            in_xmin, in_ymin, in_xmax, in_ymax = poly.bounds
			
 
				+            solve_box = {'class_name': 'solve',
			
 
				+                         'bounding_box': {'xmin': int(in_xmin), 'ymin': int(in_ymin),
			
 
				+                                          'xmax': int(in_xmax), 'ymax': int(in_ymax)}}
			
 
				+
			
 
				+            sheet_dict_list.append(solve_box)
			
 
				+            infer_polygon.remove(poly)
			
 
				+            res.append(solve_box)
			
 
				+        if type_score_num > 1:  # 多type_score
			
 
				+            type_score_ymin = sorted(type_score_ymin)
			
 
				+            type_score_ymin[0] = min(p_ymin, type_score_ymin[0])
			
 
				+            type_score_ymin.append(p_ymax)
			
 
				+            for i in range(0, len(type_score_ymin) - 1):
			
 
				+                w = p_xmax - p_xmin
			
 
				+                h = type_score_ymin[i + 1] - type_score_ymin[i]
			
 
				+                if max(w / h, h / w) < ASPECT_FLAG:
			
 
				+                    solve_box = {'class_name': 'solve',
			
 
				+                                 'bounding_box': {'xmin': int(p_xmin), 'ymin': int(type_score_ymin[i]),
			
 
				+                                                  'xmax': int(p_xmax), 'ymax': int(type_score_ymin[i + 1])}}
			
 
				+                    sheet_dict_list.append(solve_box)
			
 
				+                    res.append(solve_box)
			
 
				+            infer_polygon.remove(poly)
			
 
				+
			
 
				+    # for poly in infer_polygon.copy():  # infer choice_m
			
 
				+    #     for choice_polygon in all_choice_polygon:
			
 
				+    #         cond1 = choice_polygon.within(poly) or choice_polygon.contains(poly)
			
 
				+    #         cond2 = False
			
 
				+    #         cond3 = choice_polygon.overlaps(poly)
			
 
				+    #         if cond3:
			
 
				+    #             intersection_poly = choice_polygon.intersection(poly)
			
 
				+    #             cond2 = intersection_poly.area / poly.area >= 0.8
			
 
				+    #
			
 
				+    #         if cond1 or cond2:
			
 
				+    #             in_xmin, in_ymin, in_xmax, in_ymax = poly.bounds
			
 
				+    #             choice_m_img = crop_region_direct(image, (int(in_xmin), int(in_ymin),
			
 
				+    #                                                       int(in_xmax), int(in_ymax)))
			
 
				+    #             # cv2.imshow('m', choice_m_img)
			
 
				+    #             # cv2.waitKey(0)
			
 
				+    #             ocr_res = get_ocr_text_and_coordinate(choice_m_img)
			
 
				+    #             char_a_min = []
			
 
				+    #             char_d_max = []
			
 
				+    #             for index, chars in enumerate(ocr_res):
			
 
				+    #                 for char in chars['chars']:
			
 
				+    #                     left, top = char['location']['left'], char['location']['top']
			
 
				+    #                     width, height = char['location']['width'], char['location']['height']
			
 
				+    #                     if char['char'] in 'abcdlABCD[]aabbccddAABBCCDD[[]]':
			
 
				+    #                         xm, ym = int(left - width / 2), int(top - height / 2)
			
 
				+    #                         char_a_min.append((xm, ym))
			
 
				+    #                         xx, yx = int(left + 3 * width / 2), int(top + 3 * height / 2)
			
 
				+    #                         char_d_max.append((xx, yx))
			
 
				+    #             if char_a_min and char_d_max:
			
 
				+    #                 char_a_min_arr, char_d_max_arr = np.array(char_a_min), np.array(char_d_max)
			
 
				+    #                 tmp_min = np.min(char_a_min_arr, axis=0)
			
 
				+    #                 tmp_max = np.max(char_d_max_arr, axis=0)
			
 
				+    #
			
 
				+    #                 m_xmin, m_ymin, m_xmax, m_ymax = tmp_min[0], tmp_min[1], tmp_max[0], tmp_max[1]
			
 
				+    #                 dif_width = sum(all_choice_s_width) // len(all_choice_s_width) - (m_xmax - m_xmin)
			
 
				+    #                 choice_box = {'class_name': 'choice_m',
			
 
				+    #                               'bounding_box': {'xmin': int(m_xmin) + int(in_xmin) - dif_width // 2,
			
 
				+    #                                                'ymin': int(m_ymin) + int(in_ymin),
			
 
				+    #                                                'xmax': int(m_xmax) + int(in_xmin) + dif_width // 2,
			
 
				+    #                                                'ymax': int(m_ymax) + int(in_ymin)
			
 
				+    #                                                }}
			
 
				+    #
			
 
				+    #                 sheet_dict_list.append(choice_box)
			
 
				+    #                 infer_polygon.remove(poly)
			
 
				+    #                 res.append(choice_box)
			
 
				+    #                 break
			
 
				+
			
 
				+    for poly in infer_polygon.copy():  # infer ocr blank
			
 
				+        flag = []
			
 
				+        for ocr in ocr_dict_list:
			
 
				+            location = ocr['location']
			
 
				+            xmin = location['left']
			
 
				+            ymin = location['top']
			
 
				+            xmax = location['left'] + location['width']
			
 
				+            ymax = location['top'] + location['height']
			
 
				+            box_polygon = Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)])
			
 
				+            cond1 = poly.within(box_polygon) or poly.contains(box_polygon)
			
 
				+            cond2 = False
			
 
				+            cond3 = box_polygon.overlaps(poly)
			
 
				+            if cond3:
			
 
				+                intersection_poly = box_polygon.intersection(poly)
			
 
				+                cond2 = intersection_poly.area / poly.area >= 0.2
			
 
				+
			
 
				+            flag.append(cond1 or cond2 or False)  # True 不是blank
			
 
				+        if True not in flag:
			
 
				+            in_xmin, in_ymin, in_xmax, in_ymax = poly.bounds
			
 
				+            blank_box = {'class_name': 'blank',
			
 
				+                         'bounding_box': {'xmin': int(in_xmin), 'ymin': int(in_ymin),
			
 
				+                                          'xmax': int(in_xmax), 'ymax': int(in_ymax)}}
			
 
				+
			
 
				+            # sheet_dict_list.append(solve_box)
			
 
				+            infer_polygon.remove(poly)
			
 
				+            res.append(blank_box)
			
 
				+
			
 
				+    for poly in infer_polygon.copy():  # infer blank
			
 
				+        bounds = [int(ele) for ele in poly.bounds]
			
 
				+        img_region = crop_region_direct(image, bounds)
			
 
				+        img = cv2.threshold(img_region, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
			
 
				+        img_mean = np.mean(img)
			
 
				+
			
 
				+        img_raw_mean = np.mean(img_region)
			
 
				+        # print(img_mean, img_raw_mean)
			
 
				+        cond = img_mean < PIX_VALUE_LOW or img_raw_mean > PIX_VALUE_HIGH
			
 
				+        if cond:
			
 
				+            in_xmin, in_ymin, in_xmax, in_ymax = bounds
			
 
				+            blank_box = {'class_name': 'blank',
			
 
				+                         'bounding_box': {'xmin': int(in_xmin), 'ymin': int(in_ymin),
			
 
				+                                          'xmax': int(in_xmax), 'ymax': int(in_ymax)}}
			
 
				+
			
 
				+            # sheet_dict_list.append(solve_box)
			
 
				+            infer_polygon.remove(poly)
			
 
				+            res.append(blank_box)
			
 
				+
			
 
				+    # for poly in infer_polygon.copy():  # infer cloze_s
			
 
				+    #     for cloze_polygon in all_cloze_polygon:
			
 
				+    #         cond1 = cloze_polygon.within(poly) or cloze_polygon.contains(poly)
			
 
				+    #         cond2 = False
			
 
				+    #         cond3 = cloze_polygon.overlaps(poly)
			
 
				+    #         if cond3:
			
 
				+    #             intersection_poly = cloze_polygon.intersection(poly)
			
 
				+    #             cond2 = intersection_poly.area / poly.area >= 0.8
			
 
				+    #
			
 
				+    #         if cond1 or cond2:
			
 
				+    #             in_xmin, in_ymin, in_xmax, in_ymax = poly.bounds
			
 
				+    #             solve_box = {'class_name': 'cloze_s',
			
 
				+    #                          'bounding_box': {'xmin': int(in_xmin), 'ymin': int(in_ymin),
			
 
				+    #                                           'xmax': int(in_xmax), 'ymax': int(in_ymax)}}
			
 
				+    #
			
 
				+    #             sheet_dict_list.append(solve_box)
			
 
				+    #             infer_polygon.remove(poly)
			
 
				+    #             res.append(solve_box)
			
 
				+    #             break
			
 
				+
			
 
				+    for poly in infer_polygon.copy():  # infer solve
			
 
				+        in_xmin, in_ymin, in_xmax, in_ymax = poly.bounds
			
 
				+        w, h = in_xmax - in_xmin, in_ymax - in_ymin
			
 
				+        aspect_flag = max(w / h, h / w) < ASPECT_FLAG
			
 
				+        if aspect_flag:
			
 
				+            solve_box = {'class_name': 'solve_infer',
			
 
				+                         'bounding_box': {'xmin': int(in_xmin), 'ymin': int(in_ymin),
			
 
				+                                          'xmax': int(in_xmax), 'ymax': int(in_ymax)}}
			
 
				+        else:
			
 
				+            solve_box = {'class_name': 'blank',
			
 
				+                         'bounding_box': {'xmin': int(in_xmin), 'ymin': int(in_ymin),
			
 
				+                                          'xmax': int(in_xmax), 'ymax': int(in_ymax)}}
			
 
				+
			
 
				+        sheet_dict_list.append(solve_box)
			
 
				+        infer_polygon.remove(poly)
			
 
				+        res.append(solve_box)
			
 
				+
			
 
				+    if all_type_score_polygon:
			
 
				+        type_score_area = sum([ele.area for ele in all_type_score_polygon])
			
 
				+        mean_type_score_area = type_score_area/len(all_type_score_polygon)
			
 
				+        solve_filter = []
			
 
				+        for index, sheet_box in enumerate(sheet_dict_list.copy()):
			
 
				+            if sheet_box['class_name'] == 'solve_infer':
			
 
				+                w = sheet_box['bounding_box']['xmax'] - sheet_box['bounding_box']['xmin']
			
 
				+                h = sheet_box['bounding_box']['ymin'] - sheet_box['bounding_box']['ymin']
			
 
				+                if w * h < mean_type_score_area * 3:
			
 
				+                    sheet_dict_list.remove(sheet_box)
			
 
				+
			
 
				+    for ele in sheet_dict_list:
			
 
				+        if ele['class_name'] == 'solve_infer':
			
 
				+            ele.update({'class_name': 'solve'})
			
 
				+
			
 
				+    return sheet_dict_list
			
 
				+
			
 
				+
			
 
				+def box_infer_and_complete(image, sheet_region_dict, ocr=''):
			
 
				+    if len(image.shape) == 3:
			
 
				+        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
			
 
				+    if len(image.shape) == 4:
			
 
				+        image = cv2.cvtColor(image, cv2.COLOR_RGBA2GRAY)
			
 
				+    exclude_classes = [
			
 
				+        'cloze_s',
			
 
				+        'exam_number_s',
			
 
				+        'type_score',
			
 
				+        'page',
			
 
				+        'alarm_info',
			
 
				+        # 'score_collect',
			
 
				+        'choice_s',
			
 
				+    ]
			
 
				+    y, x = image.shape[0], image.shape[1]
			
 
				+    x1, x2 = subfield_answer_sheet(image, sheet_region_dict)
			
 
				+
			
 
				+    # lon_split_line = []
			
 
				+    lon_split_line = [LineString([(px, 1), (px, y - 1)]) for px in [x1, x2] if px != 0]
			
 
				+    split_line_poly = [(px, 1, px + 1, y - 1) for px in [x1, x2] if px != 0]
			
 
				+
			
 
				+    poly_list = infer_sheet_box(image, sheet_region_dict, lon_split_line, exclude_classes)
			
 
				+    image_cols = len(lon_split_line) + 1
			
 
				+    sheet_region_dict = infer_class(image, sheet_region_dict, poly_list, image_cols, ocr)
			
 
				+
			
 
				+    return sheet_region_dict
			
 
				+
			
 
				+
			
 
				+# 选择题区域补全
			
 
				+def _get_split_index(sorted_list, spilt_value):
			
 
				+    y_dif_list = np.array(sorted_list[1:]) - np.array(sorted_list[:-1])
			
 
				+    y_split_index = [index for index, ele in enumerate(y_dif_list) if ele >= spilt_value]
			
 
				+
			
 
				+    y_split_index = [ele + 1 for ele in y_split_index]  # 索引值扩大
			
 
				+    y_split_index.insert(0, 0)
			
 
				+    y_split_index.insert(-1, len(sorted_list))
			
 
				+    y_split_index = sorted(list(set(y_split_index)))
			
 
				+
			
 
				+    return y_split_index
			
 
				+
			
 
				+
			
 
				+def get_letter_group(letter, location_list):
			
 
				+    y_list = sorted([ele['location']['top'] for ele in location_list])
			
 
				+    height = np.mean(np.array([ele['location']['height'] for ele in location_list]))
			
 
				+    width = np.mean(np.array([ele['location']['width'] for ele in location_list]))
			
 
				+    y_split_dif, x_split_dif = height * 1.5, width * 1.5
			
 
				+
			
 
				+    y_split_index = _get_split_index(y_list, y_split_dif)
			
 
				+
			
 
				+    letter_group_list = []
			
 
				+    letter_group_location_list = []
			
 
				+    for i, split in enumerate(y_split_index[1:]):
			
 
				+        one_group_location_list = location_list[y_split_index[i]:y_split_index[i + 1]]
			
 
				+        one_group_x_list = sorted([ele['location']['top'] for ele in one_group_location_list])
			
 
				+        one_group_x_split_index = _get_split_index(one_group_x_list, x_split_dif)
			
 
				+
			
 
				+        block = []
			
 
				+        block_location = []
			
 
				+        for i_i, s_split in enumerate(one_group_x_split_index[1:]):
			
 
				+            letter_group = one_group_location_list[one_group_x_split_index[i_i]:
			
 
				+                                                   one_group_x_split_index[i_i + 1]]
			
 
				+            letter_group = sorted(letter_group, key=lambda k: k.get('location')['top'])
			
 
				+
			
 
				+            xmin = min([ele['location']['left'] for ele in letter_group])
			
 
				+            ymin = min([ele['location']['top'] for ele in letter_group])
			
 
				+            xmax = max([ele['location']['left'] for ele in letter_group]) + width
			
 
				+            ymax = max([ele['location']['top'] for ele in letter_group]) + height
			
 
				+            middle_x, middle_y = (xmax - xmin) / 2 + xmin, (ymax - ymin) / 2 + ymin
			
 
				+            block_location.append((xmin, ymin, xmax, ymax, middle_x, middle_y))
			
 
				+            block.append(letter_group)
			
 
				+
			
 
				+        letter_group_list.append(block)
			
 
				+        letter_group_location_list.append(block_location)
			
 
				+
			
 
				+    res_dict = {'letter': letter,
			
 
				+                'letter_group': letter_group_list,
			
 
				+                'letter_group_location': letter_group_location_list,
			
 
				+                'width': width, 'height': height}
			
 
				+
			
 
				+    return res_dict
			
 
				+
			
 
				+
			
 
				+def get_letter_group_h(letter, location_list):
			
 
				+    location_list = sorted(location_list, key=lambda k: k.get('location')['left'])
			
 
				+    x_list = sorted([ele['location']['left'] for ele in location_list])
			
 
				+    height = np.mean(np.array([ele['location']['height'] for ele in location_list]), dtype=np.uint)
			
 
				+    width = np.mean(np.array([ele['location']['width'] for ele in location_list]), dtype=np.uint)
			
 
				+    print('h, w: ', height, width)
			
 
				+    y_split_dif, x_split_dif = height * 1.5, width * 1.5
			
 
				+
			
 
				+    x_split_index = _get_split_index(x_list, x_split_dif)
			
 
				+
			
 
				+    letter_group_location_list = []
			
 
				+    for i, split in enumerate(x_split_index[1:]):
			
 
				+        one_group_location_list = location_list[x_split_index[i]:x_split_index[i + 1]]
			
 
				+        one_group_location_list = sorted(one_group_location_list, key=lambda k: k.get('location')['top'])
			
 
				+        xmin = min([ele['location']['left'] for ele in one_group_location_list])
			
 
				+        ymin = one_group_location_list[0]['location']['top']
			
 
				+        xmax = xmin + width
			
 
				+        ymax = one_group_location_list[-1]['location']['top'] + 2*one_group_location_list[-1]['location']['height']
			
 
				+        letter_group_location_list.append((xmin - 2*width, ymin,
			
 
				+                                           xmax + 2*width, ymax))
			
 
				+
			
 
				+    return {'letter': letter, 'group_location': letter_group_location_list}
			
 
				+
			
 
				+
			
 
				+def infer_choice_m_by_ocr(ocr_dict_list):
			
 
				+    # 若字母识别漏掉结果太多, 此方法不能使用
			
 
				+    a_e = 'ABCDEF'
			
 
				+    pattern = '[ABCDEF]'
			
 
				+    a_e_dict = {k: [] for k in a_e}
			
 
				+    block_num = 1  # default
			
 
				+    for i, ele in enumerate(ocr_dict_list):
			
 
				+        words = ele['words']
			
 
				+        cal_num = max([words.upper().count(char) for char in a_e])
			
 
				+        if cal_num > 0:
			
 
				+            words = words.replace(' ', '').upper()  # 去除空格，baidu_api bug
			
 
				+            abcd_words_m = re.finditer(pattern, words)
			
 
				+            abcd_index_list = [(m.group(), m.span()) for m in abcd_words_m if m]
			
 
				+            for letter_info in abcd_index_list:
			
 
				+                letter = letter_info[0]
			
 
				+                a_e_dict[letter].append(ele['chars'][letter_info[1][0]])
			
 
				+
			
 
				+    letter_group_list = []
			
 
				+    for k, v in a_e_dict.items():
			
 
				+        if v:
			
 
				+            letter_group = get_letter_group_h(k, v)
			
 
				+            block_num = max(block_num, len(letter_group['group_location']))
			
 
				+            print(letter_group)
			
 
				+            letter_group_list.append(letter_group)
			
 
				+
			
 
				+    choice_m_list = []
			
 
				+    for i in range(0, block_num):
			
 
				+        block = []
			
 
				+        for letter_group in letter_group_list:
			
 
				+            if len(letter_group['group_location']) > i:
			
 
				+                block.append(letter_group['group_location'][i])
			
 
				+
			
 
				+        if block:
			
 
				+            block_array = np.asarray(block)
			
 
				+            b_min = np.min(block_array, axis=0)
			
 
				+            b_max = np.max(block_array, axis=0)
			
 
				+            choice_m_dict = {'class_name': 'choice_m',
			
 
				+                             'location': {'xmin': b_min[0], 'ymin': b_min[1],
			
 
				+                                          'xmax': b_max[2], 'ymax': b_max[3]}}
			
 
				+            choice_m_list.append(choice_m_dict)
			
 
				+
			
 
				+    # print(choice_m_list)
			
 
				+    return choice_m_list
			
--- a/segment/sheet_resolve/analysis/sheet/sheet_points.py
+++ b/segment/sheet_resolve/analysis/sheet/sheet_points.py
@@ -0,0 +1,534 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : sheet_points.py
			
 
				+# @Time    : 2019/9/12 0012 下午 14:39
			
 
				+import re
			
 
				+import numpy as np
			
 
				+from segment.sheet_resolve.tools import utils
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate
			
 
				+OCR_ACCURACY = 'accurate'
			
 
				+
			
 
				+
			
 
				+def split_col(box_list, width):
			
 
				+    if len(box_list) == 1:
			
 
				+        return [box_list]
			
 
				+    else:
			
 
				+        box_list = sorted(box_list, key=lambda k: k.get('bounding_box')['xmin'])
			
 
				+        box_list_class_name = [[ele_box['bounding_box']['xmin'], ele_box['bounding_box']['ymin'],
			
 
				+                                ele_box['bounding_box']['xmax'], ele_box['bounding_box']['ymax']] for ele_box in
			
 
				+                               box_list]
			
 
				+        box_list_raw = sorted(box_list_class_name, key=lambda k: k[0])
			
 
				+        box_list_raw_arr = np.array(box_list_raw)
			
 
				+
			
 
				+        pre = box_list_raw_arr[1:, 0]
			
 
				+        rear = box_list_raw_arr[:-1, 0]
			
 
				+        y_diff = pre - rear
			
 
				+        index_list = [index for index, ele in enumerate(y_diff) if abs(ele) > width // 10]
			
 
				+
			
 
				+        res_list = []
			
 
				+        if index_list == []:
			
 
				+            res_list = [sorted(box_list, key=lambda k: k.get('bounding_box')['ymin'])]
			
 
				+            return res_list
			
 
				+        else:
			
 
				+            split_x_index = [ele + 1 for ele in index_list]
			
 
				+            split_x_index.insert(0, 0)
			
 
				+            split_x_index.insert(-1, len(box_list_raw))
			
 
				+            split_x_index = sorted(list(set(split_x_index)))
			
 
				+            for i, split in enumerate(split_x_index[1:]):
			
 
				+                one_col = box_list[split_x_index[i]:split_x_index[i + 1]]
			
 
				+                one_col = sorted(one_col, key=lambda k: k.get('bounding_box')['ymin'])
			
 
				+                res_list.append(one_col)
			
 
				+        return res_list
			
 
				+
			
 
				+
			
 
				+def check_classes(sheet_dict_of_front):
			
 
				+    sheet_dict_of_front_temp = sheet_dict_of_front.copy()
			
 
				+    index_list = []
			
 
				+    for index, ele in enumerate(sheet_dict_of_front_temp):
			
 
				+        for index0, one_paper in enumerate(ele):
			
 
				+            if one_paper['title_number'] == -1 and one_paper['title_with_value'] != -1:
			
 
				+                index_list.append(index0)
			
 
				+    # print(index_list)
			
 
				+    if index_list == []:
			
 
				+        return sheet_dict_of_front
			
 
				+    else:
			
 
				+        if 0 in index_list and len(index_list) > 2:
			
 
				+            index_list = index_list[1:]
			
 
				+        elif 0 in index_list and len(index_list) == 1:
			
 
				+            return sheet_dict_of_front
			
 
				+        else:
			
 
				+            index_list = index_list
			
 
				+        for index_a, ele_a in enumerate(sheet_dict_of_front):
			
 
				+            for index1, ele1 in enumerate(index_list):
			
 
				+                former_class = ele_a[ele1 - 1]
			
 
				+                if former_class != []:
			
 
				+                    lack_title_number_dict = ele_a[ele1]
			
 
				+                    lack_title_number_dict['title_number'] = former_class['title_number']
			
 
				+                    lack_title_number_dict.update({'title_number': former_class['title_number']})
			
 
				+        return sheet_dict_of_front
			
 
				+
			
 
				+
			
 
				+def change_box(cloze_s_res, cloze_s_region):
			
 
				+    words_result = []
			
 
				+    for ele in cloze_s_res:
			
 
				+        location = ele['location']
			
 
				+        xmin = location['left']
			
 
				+        ymin = location['top']
			
 
				+        xmax = location['left'] + location['width']
			
 
				+        ymax = location['top'] + location['height']
			
 
				+        bbox0 = utils.get_img_region_box01([xmin, ymin, xmax, ymax], cloze_s_region)
			
 
				+        location.update({'left': bbox0[0], 'top': bbox0[1], 'width': bbox0[2] - bbox0[0], 'height': bbox0[3] - bbox0[1]})
			
 
				+        words_result.append(ele)
			
 
				+    return words_result
			
 
				+
			
 
				+
			
 
				+def get_total_title_quantity_and_value(box_with_content):
			
 
				+    list_of_all = []
			
 
				+
			
 
				+    for index, ele in enumerate(box_with_content):
			
 
				+        total_score = []
			
 
				+        title_number = []
			
 
				+
			
 
				+        title_two_number = []
			
 
				+        title_two_value = []
			
 
				+        title_two_number_value_list = []
			
 
				+
			
 
				+        title_three_number = []
			
 
				+        title_three_value = []
			
 
				+
			
 
				+        above_content = ele['above_content']
			
 
				+        for words_index, words_str in enumerate(above_content):
			
 
				+            words = words_str['words']
			
 
				+
			
 
				+            pattern1 = re.compile(
			
 
				+                '^\d+[,、.]?[\u4e00-\u9fa5]?[(（]?\d+分+[)）]?[(（]?\d+[)）]?|^\d+[,、.]?[\u4e00-\u9fa5]?[(（]?\d+分+[)）]?')
			
 
				+            result1 = re.findall(pattern1, words)
			
 
				+
			
 
				+            pattern11 = re.compile('^\d+[,、.]?[\u4e00-\u9fa5]?[(（]?\d+[)）]?')
			
 
				+            result11 = re.findall(pattern11, words)
			
 
				+
			
 
				+            pattern2 = re.compile('[(（]?\d+[)）]?')
			
 
				+            result2 = re.findall(pattern2, words)
			
 
				+
			
 
				+            pattern3 = re.compile('[(（]?\d?分+[)）]?')
			
 
				+            result3 = re.findall(pattern3, words)
			
 
				+
			
 
				+            pattern4 = re.compile(r'①|②|③|④|⑤|⑥|⑦|⑧|⑨|⑩')
			
 
				+            result4 = re.findall(pattern4, words)
			
 
				+
			
 
				+            pattern5 = re.compile('[\u4e00-\u9fa5]')
			
 
				+            result5 = re.findall(pattern5, words)
			
 
				+
			
 
				+            if result1 and title_number == []:
			
 
				+                title_number_and_value = ''.join(result1)
			
 
				+                digital_number = re.findall('\d+', title_number_and_value)
			
 
				+                if len(digital_number) == 1:
			
 
				+                    title_number.append(int(digital_number[0]))
			
 
				+                    continue
			
 
				+                elif len(digital_number) == 2:
			
 
				+                    title_number.append(int(digital_number[0]))
			
 
				+                    total_score.append(int(digital_number[1]))
			
 
				+                    continue
			
 
				+                elif len(digital_number) == 3:
			
 
				+                    title_number.append(int(digital_number[0]))
			
 
				+                    total_score.append(int(digital_number[1]))
			
 
				+                    title_two_value.append(int(digital_number[2]))
			
 
				+                    continue
			
 
				+
			
 
				+            if result11 and result2 and not result3:
			
 
				+                digital_number = re.findall('\d+', words)
			
 
				+                if len(digital_number) == 1:
			
 
				+                    title_number.append(int(digital_number[0]))
			
 
				+                else:
			
 
				+                    title_number.append(int(digital_number[0]))
			
 
				+                    title_two_number.append(int(digital_number[1]))
			
 
				+
			
 
				+            if result2 and not result1 and not result3 and not result11:
			
 
				+                title_two_number_group = result2[0]
			
 
				+                digital_number2 = re.search('\d+', title_two_number_group)
			
 
				+                digital_number22 = digital_number2.group()
			
 
				+                title_two_number.append(int(digital_number22))
			
 
				+
			
 
				+            if result3 and not result1 and not result2 and not result4 and not result5:
			
 
				+                title_two_value_group = result3[0]
			
 
				+                digital_value = re.search('\d+', title_two_value_group)
			
 
				+                if digital_value == None:
			
 
				+                    value = 2
			
 
				+                else:
			
 
				+                    value = digital_value.group()
			
 
				+                title_two_value.append(int(value))
			
 
				+
			
 
				+            if result2 and result4:
			
 
				+                title_three_value_group = result4[0]
			
 
				+                title_three_number.append(title_three_value_group)
			
 
				+            if title_three_number != [] and result3:
			
 
				+                title_three_value_group = result3[0]
			
 
				+                digital_value = re.search('\d+', title_three_value_group)
			
 
				+                value = digital_value.group()
			
 
				+                title_three_value.append(int(value))
			
 
				+            if result2 and result3 and not result1:
			
 
				+                if len(result3) == 1:
			
 
				+                    title_two_value_group = result3[0]
			
 
				+                    digital_value = re.search('\d+', title_two_value_group)
			
 
				+                    if digital_value == None:
			
 
				+                        value = -1
			
 
				+                    else:
			
 
				+                        value = digital_value[0]
			
 
				+                    title_two_value.append(int(value))
			
 
				+                else:
			
 
				+                    title_two_number0 = result2[0]
			
 
				+                    digital_title_two_number = re.search('\d+', title_two_number0)
			
 
				+                    digital_title_two_group = digital_title_two_number.group()
			
 
				+                    # title_two_number.append(digital_title_two_group)
			
 
				+
			
 
				+                    value = result3[1]
			
 
				+                    digital_value = re.search('\d+', value)
			
 
				+                    if digital_value == None:
			
 
				+                        value0 = -1
			
 
				+                    else:
			
 
				+                        value0 = digital_value.group()
			
 
				+
			
 
				+                    # title_two_value.append(int(value0))
			
 
				+
			
 
				+                    title_two_number_value_list.append([int(digital_title_two_group), int(value0)])
			
 
				+
			
 
				+            if result1 and result2 and result3 and len(title_number) == 1:
			
 
				+                if len(result3) == 1:
			
 
				+                    title_two_value_group = result3[0]
			
 
				+                    digital_value = re.search('\d+', title_two_value_group)
			
 
				+                    if digital_value == None:
			
 
				+                        value = -1
			
 
				+                    else:
			
 
				+                        value = digital_value[0]
			
 
				+                    title_two_value.append(int(value))
			
 
				+                else:
			
 
				+                    title_two_number0 = result2[0]
			
 
				+                    digital_title_two_number = re.search('\d+', title_two_number0)
			
 
				+                    digital_title_two_group = digital_title_two_number.group()
			
 
				+                    # title_two_number.append(digital_title_two_group)
			
 
				+
			
 
				+                    value = result3[1]
			
 
				+                    digital_value = re.search('\d+', value)
			
 
				+                    if digital_value == None:
			
 
				+                        value0 = -1
			
 
				+                    else:
			
 
				+                        value0 = digital_value.group()
			
 
				+
			
 
				+                    # title_two_value.append(int(value0))
			
 
				+
			
 
				+                    title_two_number_value_list.append([int(digital_title_two_group), int(value0)])
			
 
				+            if result5:
			
 
				+                continue
			
 
				+        total_title_quantity = len(title_two_number)
			
 
				+        title_with_value = dict(zip(title_two_number, title_two_value))
			
 
				+
			
 
				+        if title_two_number == [] and title_two_value == [] and title_two_number_value_list != []:
			
 
				+            total_title_quantity = len(title_two_number_value_list)
			
 
				+            title_with_value = dict(zip([ele[0] for ele in title_two_number_value_list],
			
 
				+                                        [ele[1] for ele in title_two_number_value_list]))
			
 
				+        elif title_two_number == [] and title_two_value != [] and title_two_number_value_list != []:
			
 
				+            ele_0 = []
			
 
				+            for element in title_two_number_value_list:
			
 
				+                ele_0.append(element[0])
			
 
				+            ele_1 = [ele for ele in range(1, int(len(title_two_number_value_list) + len(title_two_value) + 1))]
			
 
				+
			
 
				+            lack_number = list(set(ele_1) - set(ele_0))
			
 
				+            lack_number_with_value = [lack_number[0], title_two_value[0]]
			
 
				+            title_two_number_value_list.append(lack_number_with_value)
			
 
				+
			
 
				+            title_two_number_value_list = sorted(title_two_number_value_list, key=lambda k: k[0])
			
 
				+
			
 
				+            title_with_value = dict(
			
 
				+                zip([ele[0] for ele in title_two_number_value_list], [ele[1] for ele in title_two_number_value_list]))
			
 
				+            total_title_quantity = len(title_with_value)
			
 
				+        elif title_two_number != [] and title_two_value == [] and title_two_number_value_list == []:
			
 
				+            title_two_value11 = []
			
 
				+            for i in range(len(title_two_number) + 1):
			
 
				+                if i:
			
 
				+                    title_two_value11.append(-1)
			
 
				+            total_title_quantity = len(title_two_number)
			
 
				+            title_with_value = dict(zip(title_two_number, title_two_value11))
			
 
				+        elif title_two_number != [] and title_two_value != [] \
			
 
				+                and title_two_number_value_list == [] and len(title_two_number) == len(title_two_value):
			
 
				+            title_with_value = dict(zip(title_two_number, title_two_value))
			
 
				+            total_title_quantity = len(title_two_number)
			
 
				+
			
 
				+        if total_score == [] and title_two_value != []:
			
 
				+            total_score = int(sum(title_two_value))
			
 
				+        elif total_score == [] and title_two_value == []:
			
 
				+            total_score = -1
			
 
				+        else:
			
 
				+            total_score = total_score[0]
			
 
				+
			
 
				+        if title_two_value == [] and total_score != -1 and title_two_number != []:
			
 
				+            title_two_value = int(total_score / len(title_two_number))
			
 
				+        elif title_two_value == [] and total_score == -1 and title_two_number != []:
			
 
				+            title_two_value0 = []
			
 
				+            for i in range(len(title_two_number) + 1):
			
 
				+                if i:
			
 
				+                    title_two_value0.append(-1)
			
 
				+            title_with_value = dict(zip(title_two_number, title_two_value0))
			
 
				+
			
 
				+        if title_number != [] and total_score != []:
			
 
				+            title_number = title_number[0]
			
 
				+            total_score = total_score
			
 
				+        elif title_number == [] and total_score != []:
			
 
				+            title_number = -1
			
 
				+            total_score = total_score
			
 
				+        elif title_number != [] and total_score == []:
			
 
				+            title_number = title_number[0]
			
 
				+            total_score = -1
			
 
				+
			
 
				+        if title_number == -1 and title_two_number == [] and total_score == -1 and title_two_value == [] and total_title_quantity == 0:
			
 
				+            per_title_content = {}
			
 
				+            per_title_content['class_name'] = ele['class_name']
			
 
				+            per_title_content['bounding_box'] = ele['bounding_box']
			
 
				+
			
 
				+            per_title_content['title_number'] = -1
			
 
				+            per_title_content['total_title_quantity'] = -1
			
 
				+            per_title_content['total_score'] = -1
			
 
				+            per_title_content['title_with_value'] = -1
			
 
				+
			
 
				+            list_of_all.append(per_title_content)
			
 
				+        elif title_number == -1 and title_two_number != [] and total_score == -1 and title_two_value == 0 and total_title_quantity != 0:
			
 
				+            per_title_content = {}
			
 
				+
			
 
				+            per_title_content['class_name'] = ele['class_name']
			
 
				+            per_title_content['bounding_box'] = ele['bounding_box']
			
 
				+            per_title_content['title_number'] = -1
			
 
				+            per_title_content['total_title_quantity'] = total_title_quantity
			
 
				+            per_title_content['total_score'] = -1
			
 
				+            per_title_content['title_with_value'] = -1
			
 
				+
			
 
				+            list_of_all.append(per_title_content)
			
 
				+
			
 
				+        elif total_title_quantity == 0 and title_with_value == []:
			
 
				+            per_title_content = {}
			
 
				+
			
 
				+            per_title_content['class_name'] = ele['class_name']
			
 
				+            per_title_content['bounding_box'] = ele['bounding_box']
			
 
				+            per_title_content['title_number'] = title_number
			
 
				+            per_title_content['total_title_quantity'] = -1
			
 
				+            per_title_content['total_score'] = total_score
			
 
				+            per_title_content['title_with_value'] = -1
			
 
				+            list_of_all.append(per_title_content)
			
 
				+
			
 
				+        else:
			
 
				+            if total_title_quantity == 0:
			
 
				+                total_title_quantity = -1
			
 
				+            if title_with_value == {}:
			
 
				+                title_with_value = -1
			
 
				+            per_title_content = {}
			
 
				+            per_title_content['class_name'] = ele['class_name']
			
 
				+            per_title_content['bounding_box'] = ele['bounding_box']
			
 
				+            per_title_content['title_number'] = title_number
			
 
				+            per_title_content['total_score'] = total_score
			
 
				+            per_title_content['total_title_quantity'] = total_title_quantity
			
 
				+            per_title_content['title_with_value'] = title_with_value
			
 
				+            list_of_all.append(per_title_content)
			
 
				+    return list_of_all
			
 
				+
			
 
				+
			
 
				+def get_sheet_points(sheet_dict_list):
			
 
				+    region_dict_front = []
			
 
				+    region_dict_back = []
			
 
				+
			
 
				+    for sheet_dict_s in sheet_dict_list:
			
 
				+        regions = sheet_dict_s['sheet_dict']['regions']
			
 
				+        class_name_list = [ele['class_name'] for ele in regions]
			
 
				+        if 'choice_s' in class_name_list or 'choice_m' in class_name_list:
			
 
				+            region_dict_front.append(sheet_dict_s)
			
 
				+        else:
			
 
				+            region_dict_back.append(sheet_dict_s)
			
 
				+
			
 
				+    sheet_dict_of_front = []
			
 
				+    sheet_dict_of_front_without_solve_cloze = []
			
 
				+    for sheet_dict_front_s in region_dict_front:
			
 
				+        h, w = sheet_dict_front_s['shape'][0], sheet_dict_front_s['shape'][1]
			
 
				+        words_result_front = sheet_dict_front_s['ocr']
			
 
				+        cloze_or_solve_list_front = []
			
 
				+        if sheet_dict_front_s['sheet_dict']['subject'] != 'english':
			
 
				+            cloze_or_solve_list_front = [ele for ele in sheet_dict_front_s['sheet_dict']['regions']
			
 
				+                                         if ele['class_name'] == 'solve0' or ele['class_name'] == 'solve']
			
 
				+        cloze_s_list = [ele for ele in sheet_dict_front_s['sheet_dict']['regions'] if ele['class_name'] == 'cloze_s']
			
 
				+        cloze_s_box_with_content = []
			
 
				+        for ele in cloze_s_list:
			
 
				+            cloze_s_dict = {}
			
 
				+            cloze_s_region = utils.crop_region(sheet_dict_list[0]['raw_image'], ele['bounding_box'])
			
 
				+            cloze_s_res = get_ocr_text_and_coordinate(cloze_s_region, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG')
			
 
				+            if cloze_s_res == []:
			
 
				+                cloze_s_res0 = cloze_s_res
			
 
				+            else:
			
 
				+                cloze_s_res0 = change_box(cloze_s_res, ele['bounding_box'])
			
 
				+
			
 
				+            cloze_s_dict['class_name'] = ele['class_name']
			
 
				+            cloze_s_dict['bounding_box'] = ele['bounding_box']
			
 
				+            cloze_s_dict['above_content'] = cloze_s_res0
			
 
				+            cloze_s_box_with_content.append(cloze_s_dict)
			
 
				+        solve_and_cloze_s_list = cloze_or_solve_list_front + cloze_s_list
			
 
				+
			
 
				+        if len(solve_and_cloze_s_list) == 0:
			
 
				+            sheet_dict_of_front_without_solve_cloze.append(sheet_dict_front_s)
			
 
				+        else:
			
 
				+            cloze_or_solve0_box_list_front = split_col(solve_and_cloze_s_list, w)
			
 
				+
			
 
				+            box_with_content_front = []
			
 
				+            for one_col_box in cloze_or_solve0_box_list_front:
			
 
				+
			
 
				+                for ele_box in one_col_box:
			
 
				+                    big_box = {}
			
 
				+                    words_list = []
			
 
				+                    for words_res in words_result_front:
			
 
				+                        xmin = words_res['location']['left']
			
 
				+                        ymin = words_res['location']['top']
			
 
				+                        xmax = words_res['location']['left'] + words_res['location']['width']
			
 
				+                        ymax = words_res['location']['top'] + words_res['location']['height']
			
 
				+                        words_bbox = [xmin, ymin, xmax, ymax]
			
 
				+                        if utils.decide_coordinate_contains1(words_bbox,
			
 
				+                                                             [ele_box['bounding_box']['xmin'],
			
 
				+                                                              ele_box['bounding_box']['ymin'],
			
 
				+                                                              ele_box['bounding_box']['xmax'],
			
 
				+                                                              ele_box['bounding_box']['ymax']]):
			
 
				+                            words_list.append(words_res)
			
 
				+                    big_box['class_name'] = ele_box['class_name']
			
 
				+                    big_box['bounding_box'] = ele_box['bounding_box']
			
 
				+                    big_box['above_content'] = words_list
			
 
				+                    box_with_content_front.append(big_box)
			
 
				+            box_with_content_front1 = box_with_content_front + cloze_s_box_with_content
			
 
				+            list_of_front = get_total_title_quantity_and_value(box_with_content_front1)
			
 
				+            sheet_dict_of_front.append(list_of_front)
			
 
				+
			
 
				+    sheet_dict_of_back = []
			
 
				+    sheet_dict_of_back_without_solve_cloze = []
			
 
				+    for single_sheet_dict in region_dict_back:
			
 
				+        h1, w1 = single_sheet_dict['shape'][0], single_sheet_dict['shape'][1]
			
 
				+        words_result_back = single_sheet_dict['ocr']
			
 
				+        cloze_or_solve0_box_list_back = []
			
 
				+        if single_sheet_dict['sheet_dict']['subject'] != 'english':
			
 
				+            cloze_or_solve0_box_list_back = [ele for ele in single_sheet_dict['sheet_dict']['regions']
			
 
				+                                         if ele['class_name'] == 'solve0'
			
 
				+                                         or ele['class_name'] == 'solve']
			
 
				+        cloze_s_list = [ele for ele in single_sheet_dict['sheet_dict']['regions'] if ele['class_name'] == 'cloze_s']
			
 
				+        cloze_s_box_with_content = []
			
 
				+        for ele in cloze_s_list:
			
 
				+            cloze_s_dict = {}
			
 
				+            cloze_s_region = utils.crop_region(sheet_dict_list[0]['raw_image'], ele['bounding_box'])
			
 
				+            cloze_s_res = get_ocr_text_and_coordinate(cloze_s_region, ocr_accuracy=OCR_ACCURACY,
			
 
				+                                                                       language_type='CHN_ENG')
			
 
				+            if cloze_s_res == []:
			
 
				+                cloze_s_res0 = cloze_s_res
			
 
				+            else:
			
 
				+                cloze_s_res0 = change_box(cloze_s_res, ele['bounding_box'])
			
 
				+
			
 
				+            cloze_s_dict['class_name'] = ele['class_name']
			
 
				+            cloze_s_dict['bounding_box'] = ele['bounding_box']
			
 
				+            cloze_s_dict['above_content'] = cloze_s_res0
			
 
				+            cloze_s_box_with_content.append(cloze_s_dict)
			
 
				+        solve_and_cloze_s_list_back = cloze_or_solve0_box_list_back + cloze_s_list
			
 
				+
			
 
				+        if len(solve_and_cloze_s_list_back) == 0:
			
 
				+            sheet_dict_of_back_without_solve_cloze.append(single_sheet_dict)
			
 
				+        else:
			
 
				+            cloze_or_solve0_box_list_back = split_col(solve_and_cloze_s_list_back, w1)
			
 
				+
			
 
				+            box_with_content_back = []
			
 
				+            for one_col_box in cloze_or_solve0_box_list_back:
			
 
				+                for ele_box in one_col_box:
			
 
				+                    words_list = []
			
 
				+                    big_box = {}
			
 
				+                    for words_res in words_result_back:
			
 
				+                        xmin = words_res['location']['left']
			
 
				+                        ymin = words_res['location']['top']
			
 
				+                        xmax = words_res['location']['left'] + words_res['location']['width']
			
 
				+                        ymax = words_res['location']['top'] + words_res['location']['height']
			
 
				+                        words_bbox = [xmin, ymin, xmax, ymax]
			
 
				+                        if utils.decide_coordinate_contains1(words_bbox,
			
 
				+                                                             [ele_box['bounding_box']['xmin'],
			
 
				+                                                              ele_box['bounding_box']['ymin'],
			
 
				+                                                              ele_box['bounding_box']['xmax'],
			
 
				+                                                              ele_box['bounding_box']['ymax']]):
			
 
				+                            words_list.append(words_res)
			
 
				+                    big_box['class_name'] = ele_box['class_name']
			
 
				+                    big_box['bounding_box'] = ele_box['bounding_box']
			
 
				+                    big_box['above_content'] = words_list
			
 
				+                    box_with_content_back.append(big_box)
			
 
				+            # print(box_with_content_back)
			
 
				+
			
 
				+            box_with_content_back1 = box_with_content_back + cloze_s_box_with_content
			
 
				+            # box_with_content_back1 = sorted(box_with_content_back1, key=lambda k: k.get('above_content'))
			
 
				+            list_of_back = get_total_title_quantity_and_value(box_with_content_back1)
			
 
				+            sheet_dict_of_back.append(list_of_back)
			
 
				+
			
 
				+    if sheet_dict_of_front != [] and sheet_dict_of_front_without_solve_cloze == []:
			
 
				+        sheet_dict_of_front = check_classes(sheet_dict_of_front)
			
 
				+    else:
			
 
				+        sheet_dict_of_front = []
			
 
				+    if sheet_dict_of_back != [] and sheet_dict_of_back_without_solve_cloze == []:
			
 
				+        sheet_dict_of_back = check_classes(sheet_dict_of_back)
			
 
				+    else:
			
 
				+        sheet_dict_of_back = []
			
 
				+
			
 
				+    if sheet_dict_of_front != [] and sheet_dict_of_back != []:
			
 
				+        for single_back in sheet_dict_of_back:
			
 
				+            if single_back[0]['title_number'] == -1 and single_back[0]['title_with_value'] != -1:
			
 
				+                if single_back[1]['title_number'] == -1:
			
 
				+                    title_number = single_back[0]['title_number']
			
 
				+                    single_back[0].update({'number': title_number})
			
 
				+                elif single_back[1]['title_number'] != -1:
			
 
				+                    title_number = int(single_back[1]['title_number']) - 1
			
 
				+                    single_back[0].update({'number': title_number})
			
 
				+                else:
			
 
				+                    continue
			
 
				+            else:
			
 
				+                continue
			
 
				+
			
 
				+    if sheet_dict_of_front != []:
			
 
				+        for ele11 in region_dict_front:
			
 
				+            class_names_front = ele11['sheet_dict']['regions']
			
 
				+            for single_classes in class_names_front:
			
 
				+                for ele12 in sheet_dict_of_front:
			
 
				+                    for ele13 in ele12:
			
 
				+                        if single_classes['class_name'] == ele13['class_name'] and single_classes['bounding_box'] == \
			
 
				+                                ele13['bounding_box']:
			
 
				+                            title_number = ele13['title_number']
			
 
				+                            total_score = ele13['total_score']
			
 
				+                            total_title_quantity = ele13['total_title_quantity']
			
 
				+                            title_with_value = ele13['title_with_value']
			
 
				+                            single_classes.update({'number': title_number, 'default_points': total_score,
			
 
				+                                                   'total_title_quantity': total_title_quantity,
			
 
				+                                                   'title_with_value': title_with_value})
			
 
				+                        else:
			
 
				+                            continue
			
 
				+    else:
			
 
				+        region_dict_front = sheet_dict_of_front_without_solve_cloze
			
 
				+
			
 
				+    if sheet_dict_of_back != []:
			
 
				+        for ele22 in region_dict_back:
			
 
				+            class_names_back = ele22['sheet_dict']['regions']
			
 
				+            for single_class_back in class_names_back:
			
 
				+                for single_paper in sheet_dict_of_back:       #
			
 
				+                    for regions_all in single_paper:
			
 
				+                        if single_class_back['class_name'] == regions_all['class_name'] and \
			
 
				+                                single_class_back['bounding_box'] == regions_all['bounding_box']:
			
 
				+                            title_number0 = regions_all['title_number']
			
 
				+                            total_score0 = regions_all['total_score']
			
 
				+                            total_title_quantity0 = regions_all['total_title_quantity']
			
 
				+                            title_with_value0 = regions_all['title_with_value']
			
 
				+                            single_class_back.update({'number': title_number0,
			
 
				+                                                      'default_points': total_score0,
			
 
				+                                                      'total_title_quantity': total_title_quantity0,
			
 
				+                                                      'title_with_value': title_with_value0})
			
 
				+                        else:
			
 
				+                            continue
			
 
				+    else:
			
 
				+        region_dict_back = sheet_dict_of_back_without_solve_cloze
			
 
				+
			
 
				+    tmp = region_dict_front + region_dict_back
			
 
				+    if len(sheet_dict_list) != len(tmp):
			
 
				+        raise ValueError({'答题卡分数识别结果页数丢失: sheet: {}, result: {}'.format(len(sheet_dict_list), len(tmp))})
			
 
				+    else:
			
 
				+        sheet_dict_list = tmp
			
 
				+        # for index, ele in enumerate(sheet_dict_list):
			
 
				+        #     if sheet_dict_list[index]['sheet_dict']['class_name'] == tmp[index]['sheet_dict']['class_name']:
			
 
				+        #         ele['sheet_dict'].update({'regions': tmp[index]['sheet_dict']['regions']})
			
 
				+
			
 
				+    return sheet_dict_list
			
--- a/segment/sheet_resolve/analysis/sheet/sheet_points_total.py
+++ b/segment/sheet_resolve/analysis/sheet/sheet_points_total.py
--- a/segment/sheet_resolve/analysis/solve/__init__.py
+++ b/segment/sheet_resolve/analysis/solve/__init__.py
@@ -0,0 +1,3 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
 
				+# @Time    : 2018/11/21 0021 下午 16:02
			
--- a/segment/sheet_resolve/analysis/solve/mark_box.py
+++ b/segment/sheet_resolve/analysis/solve/mark_box.py
@@ -0,0 +1,119 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : mark_box.py
			
 
				+# @Time    : 2018/11/21 0021 下午 16:16
			
 
				+import time
			
 
				+import re
			
 
				+import cv2
			
 
				+import xml.etree.cElementTree as ET
			
 
				+
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate
			
 
				+from segment.sheet_resolve.tools import utils
			
 
				+
			
 
				+
			
 
				+def solve_mark(left, top, solve_img, xml_path):
			
 
				+    shape = solve_img.shape
			
 
				+    y, x = shape[0], shape[1]
			
 
				+
			
 
				+    # ocr_region = solve_img[0:int(0.15 * y), :]
			
 
				+    ocr_region = solve_img[0:250, :]
			
 
				+
			
 
				+    # cv2.imshow('ocr_region', ocr_region)
			
 
				+    # if cv2.waitKey(0) == 27:
			
 
				+    #     cv2.destroyAllWindows()
			
 
				+
			
 
				+    t11 = time.time()
			
 
				+    word_result_list = get_ocr_text_and_coordinate(ocr_region)
			
 
				+    t22 = time.time()
			
 
				+    print('mark ocr time cost: ', t22-t11)
			
 
				+
			
 
				+    if len(word_result_list) < 1:
			
 
				+        return {}
			
 
				+    else:
			
 
				+        words_str = str([ele['words'] for ele in word_result_list])
			
 
				+        number = 999
			
 
				+        number_model = re.compile("\d+[.、:：]\D")
			
 
				+        number_list = number_model.findall(words_str)
			
 
				+        if len(number_list) > 0:
			
 
				+            number = int(re.sub('[\D]', '', number_list[0]))
			
 
				+
			
 
				+        all_chars_list = []
			
 
				+        zhmodel = re.compile(u'[\u4e00-\u9fa5]')
			
 
				+
			
 
				+        for i, chars_dict in enumerate(word_result_list):
			
 
				+            words = re.sub('[iIl|点]', '1', chars_dict['words'])
			
 
				+            match = zhmodel.search(words)  # 是否有中文
			
 
				+            if not match:
			
 
				+                chars_list = chars_dict['chars']
			
 
				+                all_chars_list = all_chars_list + chars_list
			
 
				+
			
 
				+        new_all_chars_list = []
			
 
				+        i = 1
			
 
				+
			
 
				+        while i <= len(all_chars_list):
			
 
				+            pre_one = all_chars_list[i - 1]
			
 
				+            if i == len(all_chars_list):
			
 
				+                new_all_chars_list.append(pre_one)
			
 
				+                break
			
 
				+            rear_one = all_chars_list[i]
			
 
				+            condition1 = abs(pre_one['location']['top'] - rear_one['location']['top']) < pre_one['location'][
			
 
				+                'height']  # 两字高度差小于一字高度
			
 
				+            condition2 = pre_one['location']['left'] + 2 * pre_one['location']['width'] > rear_one['location'][
			
 
				+                'left']  # 两字长度大于两字间间隔
			
 
				+            if condition1:
			
 
				+                if condition2:
			
 
				+                    new_char = pre_one['char'] + rear_one['char']
			
 
				+                    new_location = {'left': pre_one['location']['left'],
			
 
				+                                    'top': min(pre_one['location']['top'], rear_one['location']['top']),
			
 
				+                                    'width': rear_one['location']['left'] + rear_one['location']['width'] -
			
 
				+                                    pre_one['location']['left'],
			
 
				+                                    'height': max(pre_one['location']['height'], rear_one['location']['height'])}
			
 
				+                    new_all_chars_list.append({'char': new_char, 'location': new_location})
			
 
				+                    i = i + 1 + 1
			
 
				+                else:
			
 
				+                    new_all_chars_list.append(pre_one)
			
 
				+                    i = i + 1
			
 
				+            else:
			
 
				+                new_all_chars_list.append(pre_one)  # 遇到字符y轴相差过大就结束
			
 
				+                break  # break 直接跳行
			
 
				+
			
 
				+        tree = ET.parse(xml_path)
			
 
				+
			
 
				+        xml_list = []
			
 
				+        for i, ele in enumerate(new_all_chars_list[1:]):  # 从第二位开始索引
			
 
				+            pre_one = new_all_chars_list[i]
			
 
				+            rear_one = ele
			
 
				+
			
 
				+            intervel = (rear_one['location']['left'] -
			
 
				+                        pre_one['location']['left'] -
			
 
				+                        pre_one['location']['width']) // 2
			
 
				+
			
 
				+            xmin = ele['location']['left'] - intervel + left
			
 
				+            xmax = xmin + ele['location']['width'] + 2 * intervel
			
 
				+            ymin = ele['location']['top'] - ele['location']['height'] // 2 + top
			
 
				+            ymax = ymin + 2 * ele['location']['height']
			
 
				+            tree = utils.create_xml('{}_solve{}'.format(number, ele['char']), tree, xmin, ymin, xmax, ymax)
			
 
				+            xml_list.append({'char': ele['char'], 'location': [xmin, ymin, xmax, ymax]})
			
 
				+
			
 
				+            if i == 0:
			
 
				+                intervel = (rear_one['location']['left'] -
			
 
				+                            pre_one['location']['left'] -
			
 
				+                            pre_one['location']['width']) // 2
			
 
				+
			
 
				+                xmin = pre_one['location']['left'] - intervel + left
			
 
				+                xmax = xmin + pre_one['location']['width'] + 2 * intervel
			
 
				+                ymin = pre_one['location']['top'] - pre_one['location']['height'] // 2 + top
			
 
				+                ymax = ymin + 2 * pre_one['location']['height']
			
 
				+                tree = utils.create_xml('{}_solve{}'.format(number, pre_one['char']), tree, xmin, ymin, xmax, ymax)
			
 
				+                xml_list.insert(0, {'char': pre_one['char'], 'location': [xmin, ymin, xmax, ymax]})
			
 
				+
			
 
				+        tree.write(xml_path)
			
 
				+        return {'number': number, 'mark': xml_list}
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    path = r'C:\Users\Administrator\Desktop\sheet\cloze01.jpg'
			
 
				+    image = cv2.imread(path)
			
 
				+    t1 = time.time()
			
 
				+    # solve_mark(image, path)
			
 
				+    t2 = time.time()
			
 
				+    print(t2 - t1)
			
--- a/segment/sheet_resolve/analysis/solve/mark_line_box.py
+++ b/segment/sheet_resolve/analysis/solve/mark_line_box.py
@@ -0,0 +1,32 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : mark_box.py
			
 
				+# @Time    : 2018/11/21 0021 下午 16:16
			
 
				+import time
			
 
				+import re
			
 
				+import cv2
			
 
				+import xml.etree.cElementTree as ET
			
 
				+
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate
			
 
				+from segment.sheet_resolve.tools import utils
			
 
				+
			
 
				+
			
 
				+def solve_line(solve_img):
			
 
				+    # ocr_region = solve_img[0:int(0.15 * y), :]
			
 
				+    ocr_region = solve_img[0:250, :]
			
 
				+
			
 
				+    t11 = time.time()
			
 
				+    word_result_list = get_ocr_text_and_coordinate(ocr_region)
			
 
				+    t22 = time.time()
			
 
				+    print('mark ocr time cost: ', t22-t11)
			
 
				+
			
 
				+    if len(word_result_list) < 1:
			
 
				+        return 999
			
 
				+    else:
			
 
				+        words_str = str([ele['words'] for ele in word_result_list])
			
 
				+        number = 999
			
 
				+        number_model = re.compile("\d+[.、:：]\D")
			
 
				+        number_list = number_model.findall(words_str)
			
 
				+        if len(number_list) > 0:
			
 
				+            number = int(re.sub('[\D]', '', number_list[0]))
			
 
				+
			
 
				+        return number
			
--- a/segment/sheet_resolve/analysis/solve/optional_solve.py
+++ b/segment/sheet_resolve/analysis/solve/optional_solve.py
@@ -0,0 +1,118 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : optional_solve.py
			
 
				+# @Time    : 2019/9/17 0017 下午 13:18
			
 
				+import cv2
			
 
				+import re
			
 
				+from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate
			
 
				+
			
 
				+
			
 
				+def rgb2binary(im):
			
 
				+    gray_img = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
			
 
				+    _ret, thresh_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
			
 
				+    return thresh_img
			
 
				+
			
 
				+
			
 
				+def find_contours(left, top, image, ex_x=30, ex_y=1):
			
 
				+    threshed = rgb2binary(image)
			
 
				+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ex_x, ex_y))  # 膨胀系数
			
 
				+    # morphed = cv2.morphologyEx(threshed, cv2.MORPH_CLOSE, kernel)
			
 
				+    morphed = cv2.dilate(threshed, kernel, iterations=1)
			
 
				+
			
 
				+    (major, minor, _) = cv2.__version__.split(".")
			
 
				+    contours = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+    cnts = contours[0] if int(major) > 3 else contours[1]
			
 
				+
			
 
				+    cnt = sorted(cnts, key=cv2.contourArea)
			
 
				+    l, t, r, b = 9999, 9999, 0, 0
			
 
				+    sum_w, sum_h = 0, 0
			
 
				+    for ele in cnt:
			
 
				+        x, y, w, h = cv2.boundingRect(ele)
			
 
				+        xm = x + w
			
 
				+        ym = y + h
			
 
				+        l, t, r, b = min(l, x), min(t, y), max(r, xm), max(b, ym)
			
 
				+        sum_w, sum_h = sum_w + w, sum_h + h
			
 
				+
			
 
				+    cols = len(cnt)
			
 
				+    if cols > 4:
			
 
				+        cols = 4
			
 
				+    single_width, single_height = int(sum_w / len(cnt)), int(sum_h / len(cnt))
			
 
				+    optional_solve_dict = {'rows': 1, 'cols': cols,
			
 
				+                           'single_width': single_width,
			
 
				+                           'single_height': single_height,
			
 
				+                           'bounding_box': {'xmin': l + left + single_width,
			
 
				+                                            'ymin': t + top,
			
 
				+                                            'xmax': r + left,
			
 
				+                                            'ymax': b + top}
			
 
				+                           }
			
 
				+    return optional_solve_dict
			
 
				+
			
 
				+
			
 
				+def resolve_optional_choice(ll, tt, direction, image):
			
 
				+    ocr_res = get_ocr_text_and_coordinate(image)
			
 
				+    # ocr_res = [{'chars': [{'char': '[', 'location': {'width': 16, 'top': 12, 'left': 11, 'height': 32}}, {'char': '4', 'location': {'width': 16, 'top': 12, 'left': 27, 'height': 32}}, {'char': '5', 'location': {'width': 16, 'top': 12, 'left': 36, 'height': 32}}, {'char': ']', 'location': {'width': 16, 'top': 12, 'left': 55, 'height': 32}}, {'char': '[', 'location': {'width': 16, 'top': 12, 'left': 74, 'height': 32}}, {'char': '4', 'location': {'width': 16, 'top': 12, 'left': 93, 'height': 32}}, {'char': '6', 'location': {'width': 16, 'top': 12, 'left': 102, 'height': 32}}, {'char': ']', 'location': {'width': 16, 'top': 12, 'left': 121, 'height': 32}}, {'char': '[', 'location': {'width': 16, 'top': 12, 'left': 140, 'height': 32}}, {'char': '4', 'location': {'width': 16, 'top': 12, 'left': 159, 'height': 32}}, {'char': '7', 'location': {'width': 16, 'top': 12, 'left': 178, 'height': 32}}, {'char': ']', 'location': {'width': 14, 'top': 12, 'left': 188, 'height': 32}}], 'location': {'width': 191, 'top': 12, 'left': 11, 'height': 32}, 'words': '[45][46][47]'}]
			
 
				+    digital_p = r'[\[*|【*]\d+[\]*|]*]'
			
 
				+    eng_char_p = '[[*|【*][A|B|C|D|E|F|G|T|F][]*|】*]'  # english
			
 
				+
			
 
				+    pattern_list = [digital_p, eng_char_p]
			
 
				+
			
 
				+    option_list = []
			
 
				+    mean_width_list = []
			
 
				+    mean_height_list = []
			
 
				+    for i, words_line in enumerate(ocr_res):
			
 
				+        words = words_line['words']
			
 
				+        words = words.replace(' ', '').upper()  # 去除空格
			
 
				+        loc = words_line['location']
			
 
				+        top = int(loc['top'])
			
 
				+        left = int(loc['left'])
			
 
				+        width = int(loc['width'])
			
 
				+        height = int(loc['height'])
			
 
				+        loc.update({'right': left + width, 'bottom': top + height,
			
 
				+                    'mid_x': left + width // 2, 'mid_y': top + height // 2})
			
 
				+
			
 
				+        for p in pattern_list:
			
 
				+            words_m = re.finditer(p, words)
			
 
				+            match_index_list = [(m.group(), m.span()) for m in words_m if m]
			
 
				+
			
 
				+            option_list += [ele[0].replace('[', '').replace(']', '').replace('【', ']').replace('】', '')
			
 
				+                            for ele in match_index_list]
			
 
				+
			
 
				+            for letter_info in match_index_list:
			
 
				+                index_start = letter_info[1][0]
			
 
				+                index_end = letter_info[1][1] - 1
			
 
				+                char_start = words_line['chars'][index_start]
			
 
				+                char_end = words_line['chars'][index_end]
			
 
				+
			
 
				+                letter_loc_xmin = int(char_start['location']['left'])
			
 
				+                letter_loc_ymin = min(int(char_start['location']['top']), int(char_end['location']['top']))
			
 
				+                letter_loc_xmax = int(char_end['location']['left']) + int(char_end['location']['width'])
			
 
				+                letter_loc_ymax = max(int(char_start['location']['top']) + int(char_start['location']['height']),
			
 
				+                                      int(char_end['location']['top']) + int(char_end['location']['height']))
			
 
				+
			
 
				+                mean_width_list.append(letter_loc_xmax-letter_loc_xmin)
			
 
				+                mean_height_list.append(letter_loc_ymax-letter_loc_ymin)
			
 
				+
			
 
				+    if not option_list:
			
 
				+        option_list = 'A,B'
			
 
				+    left = min([int(ele['location']['left']) for ele in ocr_res])
			
 
				+    top = min([int(ele['location']['top']) for ele in ocr_res])
			
 
				+    right = max([int(ele['location']['left']) + int(ele['location']['width']) for ele in ocr_res])
			
 
				+    bottom = max([int(ele['location']['top']) + int(ele['location']['height']) for ele in ocr_res])
			
 
				+
			
 
				+    if direction == 180:
			
 
				+        rows, cols = 1, len(option_list)
			
 
				+    else:
			
 
				+        rows, cols = len(option_list), 1
			
 
				+
			
 
				+    mean_width = sum(mean_width_list) // len(mean_width_list)
			
 
				+    mean_height = sum(mean_height_list) // len(mean_height_list)
			
 
				+    optional_choice_dict = {'rows': rows, 'cols': cols,
			
 
				+                            'option': ','.join(option_list),
			
 
				+                            'single_width': mean_width,
			
 
				+                            'single_height': mean_height,
			
 
				+                            'direction': direction,
			
 
				+                            'bounding_box': {'xmin': ll + left,
			
 
				+                                             'ymin': tt + top,
			
 
				+                                             'xmax': ll + right,
			
 
				+                                             'ymax': tt + bottom}}
			
 
				+
			
 
				+    return optional_choice_dict
			
--- a/segment/sheet_resolve/labels/000000-template.xml
+++ b/segment/sheet_resolve/labels/000000-template.xml
@@ -0,0 +1,14 @@
 
				+<annotation>
			
 
				+	<folder>JPEGImage</folder>
			
 
				+	<filename>000001.jpg</filename>
			
 
				+	<path>00</path>
			
 
				+	<source>
			
 
				+		<database>Unknown</database>
			
 
				+	</source>
			
 
				+	<size>
			
 
				+		<width>1000</width>
			
 
				+		<height>1000</height>
			
 
				+		<depth>3</depth>
			
 
				+	</size>
			
 
				+	<segmented>0</segmented>
			
 
				+</annotation>
			
--- a/segment/sheet_resolve/tools/__init__.py
+++ b/segment/sheet_resolve/tools/__init__.py
@@ -0,0 +1,2 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : __init__.py.py
			
--- a/segment/sheet_resolve/tools/brain_api.py
+++ b/segment/sheet_resolve/tools/brain_api.py
@@ -0,0 +1,382 @@
 
				+# @Author  : lightXu
			
 
				+# @File    : brain_api.py
			
 
				+# @Time    : 2018/11/21 0021 下午 16:20
			
 
				+import shutil
			
 
				+import requests
			
 
				+import base64
			
 
				+from urllib import parse, request
			
 
				+import cv2
			
 
				+import time
			
 
				+import numpy as np
			
 
				+
			
 
				+import pytesseract
			
 
				+from segment.server import ocr_login
			
 
				+from segment.sheet_resolve.tools import utils
			
 
				+import xml.etree.cElementTree as ET
			
 
				+
			
 
				+# access_token = '24.82b09618f94abe2a35113177f4eec593.2592000.1546765941.282335-14614857'
			
 
				+access_token = ocr_login()
			
 
				+OCR_BOX_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/'
			
 
				+OCR_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/'
			
 
				+OCR_HAND_URL = 'https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting'
			
 
				+# OCR_ACCURACY = 'general'
			
 
				+OCR_ACCURACY = 'accurate'
			
 
				+OCR_CLIENT_ID = 'AVH7VGKG8QxoSotp6wG9LyZq'
			
 
				+OCR_CLIENT_SECRET = 'gG7VYvBWLU8Rusnin8cS8Ta4dOckGFl6'
			
 
				+OCR_TOKEN_UPDATE_DATE = 10
			
 
				+
			
 
				+
			
 
				+def preprocess(img):
			
 
				+    scale = 0
			
 
				+    dilate = 1
			
 
				+    blur = 3
			
 
				+
			
 
				+    # rescale the image
			
 
				+    if scale != 0:
			
 
				+        img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
			
 
				+
			
 
				+    # Convert to gray
			
 
				+    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
			
 
				+
			
 
				+    # Apply dilation and erosion to remove some noise
			
 
				+    if dilate != 0:
			
 
				+        kernel = np.ones((dilate, dilate), np.uint8)
			
 
				+        img = cv2.dilate(img, kernel, iterations=1)
			
 
				+        img = cv2.erode(img, kernel, iterations=1)
			
 
				+
			
 
				+    # Apply blur to smooth out the edges
			
 
				+    if blur != 0:
			
 
				+        img = cv2.GaussianBlur(img, (blur, blur), 0)
			
 
				+
			
 
				+    # Apply threshold to get image with only b&w (binarization)
			
 
				+    img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
			
 
				+
			
 
				+    return img
			
 
				+
			
 
				+
			
 
				+def opecv2base64(img):
			
 
				+    image = cv2.imencode('.jpg', img)[1]
			
 
				+    base64_data = str(base64.b64encode(image))[2:-1]
			
 
				+    return base64_data
			
 
				+
			
 
				+
			
 
				+def get_ocr_raw_result(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
			
 
				+    url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image_type = 'base64'
			
 
				+    group_id = 'group001'
			
 
				+    user_id = 'usr001'
			
 
				+
			
 
				+    image = opecv2base64(img)
			
 
				+
			
 
				+    data = {
			
 
				+        'image_type': image_type,
			
 
				+        'group_id': group_id,
			
 
				+        'user_id': user_id,
			
 
				+        'image': image,
			
 
				+        'detect_direction': 'true',
			
 
				+        'recognize_granularity': 'small',
			
 
				+        'language_type': language_type,
			
 
				+        # 'vertexes_location': 'true',
			
 
				+        # 'probability': 'true'
			
 
				+    }
			
 
				+
			
 
				+    resp = requests.post(url, data=data, headers=headers, timeout=15).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    return resp
			
 
				+
			
 
				+
			
 
				+def get_ocr_text_and_coordinate(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
			
 
				+    url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image_type = 'base64'
			
 
				+    group_id = 'group001'
			
 
				+    user_id = 'usr001'
			
 
				+
			
 
				+    image = opecv2base64(img)
			
 
				+
			
 
				+    data = {
			
 
				+        'image_type': image_type,
			
 
				+        'group_id': group_id,
			
 
				+        'user_id': user_id,
			
 
				+        'image': image,
			
 
				+        # 'detect_direction': 'true',
			
 
				+        'recognize_granularity': 'small',
			
 
				+        'language_type': language_type,
			
 
				+        # 'vertexes_location': 'true',
			
 
				+        # 'probability': 'true'
			
 
				+    }
			
 
				+
			
 
				+    # resp = requests.post(url, data=data, headers=headers, timeout=15).json()
			
 
				+    resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    words_result = resp.get('words_result')
			
 
				+    return words_result
			
 
				+
			
 
				+
			
 
				+def get_ocr_text_and_coordinate0(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
			
 
				+    url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image_type = 'base64'
			
 
				+    group_id = 'group001'
			
 
				+    user_id = 'usr001'
			
 
				+
			
 
				+    image = opecv2base64(img)
			
 
				+
			
 
				+    data = {
			
 
				+        'image_type': image_type,
			
 
				+        'group_id': group_id,
			
 
				+        'user_id': user_id,
			
 
				+        'image': image,
			
 
				+        'detect_direction': 'false',
			
 
				+        'recognize_granularity': 'small',
			
 
				+        'language_type': language_type,
			
 
				+        # 'vertexes_location': 'true',
			
 
				+        # 'probability': 'true'
			
 
				+    }
			
 
				+
			
 
				+    # resp = requests.post(url, data=data, headers=headers, timeout=15).json()
			
 
				+    resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    words_result = resp.get('words_result')
			
 
				+    return words_result
			
 
				+
			
 
				+
			
 
				+def get_ocr_text_and_coordinate_direction(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
			
 
				+    url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image_type = 'base64'
			
 
				+    group_id = 'group001'
			
 
				+    user_id = 'usr001'
			
 
				+
			
 
				+    image = opecv2base64(img)
			
 
				+
			
 
				+    data = {
			
 
				+        'image_type': image_type,
			
 
				+        'group_id': group_id,
			
 
				+        'user_id': user_id,
			
 
				+        'image': image,
			
 
				+        'detect_direction': 'true',
			
 
				+        'recognize_granularity': 'small',
			
 
				+        'language_type': language_type,
			
 
				+        # 'vertexes_location': 'true',
			
 
				+        # 'probability': 'true'
			
 
				+    }
			
 
				+
			
 
				+    resp = requests.post(url, data=data, headers=headers, timeout=15).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    words_result = resp.get('words_result')
			
 
				+    direction = resp.get('direction')
			
 
				+    # d_map = {0: 180,
			
 
				+    #          - 1: 90,
			
 
				+    #          - 2: -180,
			
 
				+    #          - 3: -270}
			
 
				+    d_map = {0: 180,
			
 
				+             -1: 90,
			
 
				+             -2: 180,
			
 
				+             -3: 90}
			
 
				+    return words_result, d_map[direction]
			
 
				+
			
 
				+
			
 
				+def get_ocr_text_and_coordinate_in_google_format(img, ocr_accuracy=OCR_ACCURACY, language_type='CHN_ENG'):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}{}'.format(OCR_BOX_URL, ocr_accuracy, '?', textmod)
			
 
				+    url_general = '{}{}{}{}'.format(OCR_BOX_URL, 'general', '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image_type = 'base64'
			
 
				+    group_id = 'group001'
			
 
				+    user_id = 'usr001'
			
 
				+
			
 
				+    image = opecv2base64(img)
			
 
				+
			
 
				+    data = {
			
 
				+        'image_type': image_type,
			
 
				+        'group_id': group_id,
			
 
				+        'user_id': user_id,
			
 
				+        'image': image,
			
 
				+        'detect_direction': 'true',
			
 
				+        'recognize_granularity': 'small',
			
 
				+        'language_type': language_type,
			
 
				+        # 'vertexes_location': 'true',
			
 
				+        # 'probability': 'true'
			
 
				+    }
			
 
				+
			
 
				+    resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        if 'internal error' in resp.get('error_msg'):
			
 
				+            resp = requests.post(url_general, data=data, headers=headers).json()
			
 
				+            if resp.get('error_msg'):
			
 
				+                raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+        else:
			
 
				+            raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    words_result = resp.get('words_result')
			
 
				+    dict_list = [item2.get('location') for item in words_result for item2 in item['chars']]
			
 
				+    char_list = [item2.get('char') for item in words_result for item2 in item['chars']]
			
 
				+    words = [item.get('words') for item in words_result]
			
 
				+    matrix = []
			
 
				+    for adict in dict_list:
			
 
				+        xmin = adict['left']
			
 
				+        ymin = adict['top']
			
 
				+        xmax = adict['width'] + adict['left']
			
 
				+        ymax = adict['top'] + adict['height']
			
 
				+        item0 = (xmin, ymin, xmax, ymax)
			
 
				+        matrix.append(item0)
			
 
				+
			
 
				+    res_dict = {'chars': char_list, 'coordinates': matrix, 'words': words}
			
 
				+    return res_dict
			
 
				+
			
 
				+
			
 
				+def change_format_baidu_to_google(words_result):
			
 
				+    dict_list = [item2.get('location') for item in words_result for item2 in item['chars']]
			
 
				+    char_list = [item2.get('char') for item in words_result for item2 in item['chars']]
			
 
				+    words = [item.get('words') for item in words_result]
			
 
				+    matrix = []
			
 
				+    for adict in dict_list:
			
 
				+        xmin = adict['left']
			
 
				+        ymin = adict['top']
			
 
				+        xmax = adict['width'] + adict['left']
			
 
				+        ymax = adict['top'] + adict['height']
			
 
				+        item0 = (xmin, ymin, xmax, ymax)
			
 
				+        matrix.append(item0)
			
 
				+
			
 
				+    res_dict = {'chars': char_list, 'coordinates': matrix, 'words': words}
			
 
				+    return res_dict
			
 
				+
			
 
				+
			
 
				+def get_handwriting_ocr_text_and_coordinate_in_google_format(img, words_type='words'):
			
 
				+    textmod = {'access_token': access_token}
			
 
				+    textmod = parse.urlencode(textmod)
			
 
				+    url = '{}{}{}'.format(OCR_HAND_URL, '?', textmod)
			
 
				+
			
 
				+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
			
 
				+
			
 
				+    image = opecv2base64(img)
			
 
				+
			
 
				+    data = {
			
 
				+        'image': image,
			
 
				+        'recognize_granularity': 'small',
			
 
				+        'words_type': words_type,
			
 
				+    }
			
 
				+
			
 
				+    resp = requests.post(url, data=data, headers=headers).json()
			
 
				+    if resp.get('error_msg'):
			
 
				+        raise Exception("ocr {}!".format(resp.get('error_msg')))
			
 
				+
			
 
				+    words_result = resp.get('words_result')
			
 
				+    dict_list = [item2.get('location') for item in words_result for item2 in item['chars']]
			
 
				+    char_list = [item2.get('char') for item in words_result for item2 in item['chars']]
			
 
				+    words = [item.get('words') for item in words_result]
			
 
				+    matrix = []
			
 
				+    for adict in dict_list:
			
 
				+        xmin = adict['left']
			
 
				+        ymin = adict['top']
			
 
				+        xmax = adict['width'] + adict['left']
			
 
				+        ymax = adict['top'] + adict['height']
			
 
				+        item0 = (xmin, ymin, xmax, ymax)
			
 
				+        matrix.append(item0)
			
 
				+
			
 
				+    res_dict = {'chars': char_list, 'coordinates': matrix, 'words': words}
			
 
				+    return res_dict
			
 
				+
			
 
				+
			
 
				+def tesseract_boxes_by_py(image, ocr_lang='chi_sim+eng'):
			
 
				+    img = preprocess(image)
			
 
				+    txt = pytesseract.image_to_boxes(img, lang=ocr_lang, output_type='dict')
			
 
				+    h, w = img.shape
			
 
				+    char_list = txt['char']
			
 
				+
			
 
				+    left = txt['left']
			
 
				+    bottom = [(h - top) for top in txt['top']]
			
 
				+    right = txt['right']
			
 
				+    top = [(h - bottom) for bottom in txt['bottom']]
			
 
				+
			
 
				+    matrix = []
			
 
				+    for i, ele in enumerate(left):
			
 
				+        matrix.append((ele, top[i], right[i], bottom[i]))
			
 
				+
			
 
				+    res_dict = {'chars': char_list, 'coordinates': matrix}
			
 
				+    return res_dict
			
 
				+
			
 
				+
			
 
				+def gen_xml_of_per_char(img_path):
			
 
				+    img = utils.read_single_img(img_path)
			
 
				+    res_dict = get_ocr_text_and_coordinate_in_google_format(img, 'accurate', 'CHN_ENG')
			
 
				+    box_list = res_dict['coordinates']
			
 
				+    tree = ET.parse(r'./000000-template.xml')  # xml tree
			
 
				+    for index_num, exam_bbox in enumerate(box_list):
			
 
				+        tree = utils.create_xml('{}'.format(res_dict['chars'][index_num]), tree,
			
 
				+                                exam_bbox[0], exam_bbox[1], exam_bbox[2], exam_bbox[3])
			
 
				+    # print(exam_items_bbox)
			
 
				+    tree.write(img_path.replace('.jpg', '.xml'))
			
 
				+
			
 
				+    res_dict_google = tesseract_boxes_by_py(img, ocr_lang='chi_sim+equ+eng')
			
 
				+    box_list_g = res_dict_google['coordinates']
			
 
				+    tree_g = ET.parse(r'./000000-template.xml')  # xml tree
			
 
				+    for index_num, exam_bbox in enumerate(box_list_g):
			
 
				+        tree_g = utils.create_xml('{}'.format(res_dict_google['chars'][index_num]), tree_g,
			
 
				+                                  exam_bbox[0], exam_bbox[1], exam_bbox[2], exam_bbox[3])
			
 
				+    # print(exam_items_bbox)
			
 
				+    tree_g.write(img_path.replace('.jpg', '_g.xml'))
			
 
				+    shutil.copy(img_path, img_path.replace('.jpg', '_g.jpg'))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    img_path0 = r'C:\Users\Administrator\Desktop\sheet\mark-test\002_mark.jpg'
			
 
				+    image0 = cv2.imread(img_path0)
			
 
				+    t1 = time.time()
			
 
				+    res = get_ocr_text_and_coordinate(image0)
			
 
				+    t2 = time.time()
			
 
				+    print(t2 - t1)
			
 
				+    print(res)