|
@@ -1,4 +1,4 @@
|
|
|
-#include "pch.h"
|
|
|
+锘�#include "pch.h"
|
|
|
#include "AssignWords.h"
|
|
|
#include "CvxText.h"
|
|
|
#include <iostream>
|
|
@@ -68,8 +68,8 @@ void DivideEquallyStr(const char *buff, int len, char left[], char right[])
|
|
|
strncpy(left, buff, i);
|
|
|
strncpy(right, buff + i, len - i);
|
|
|
return;
|
|
|
- /* 直接中间分开,不用特殊处理*/
|
|
|
- std::string strKey1 = ";", strKey2 = " ";
|
|
|
+ /* 鐩存帴涓�棿鍒嗗紑锛屼笉鐢ㄧ壒娈婂�鐞�*/
|
|
|
+ std::string strKey1 = "锛�", strKey2 = " ";
|
|
|
std::size_t found = strtemp.find(strKey1);
|
|
|
bool flag1 = false, flag2 = false;
|
|
|
while (found != std::string::npos && found < strtemp.length() / 2)
|
|
@@ -129,7 +129,7 @@ void DivideEquallyStr(const char *buff, int len, char left[], char right[])
|
|
|
|
|
|
void RandReplaceStr(std::vector<std::string> list, std::string &ret)
|
|
|
{
|
|
|
- std::string to_replaced = ";";
|
|
|
+ std::string to_replaced = "锛�";
|
|
|
srand((unsigned)time(NULL));
|
|
|
std::string newchars;
|
|
|
int index = rand() % list.size();
|
|
@@ -184,13 +184,60 @@ void AssignWordsFromString(std::string strContent, int maxTwoWith, int maxTreeWi
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+// 鍒ゆ柇鏄�惁鏄痷tf-8
|
|
|
+int preNUm(unsigned char byte) {
|
|
|
+ unsigned char mask = 0x80;
|
|
|
+ int num = 0;
|
|
|
+ for (int i = 0; i < 8; i++) {
|
|
|
+ if ((byte & mask) == mask) {
|
|
|
+ mask = mask >> 1;
|
|
|
+ num++;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return num;
|
|
|
+}
|
|
|
|
|
|
+bool isUtf8(unsigned char* data, int len) {
|
|
|
+ int num = 0;
|
|
|
+ int i = 0;
|
|
|
+ while (i < len) {
|
|
|
+ if ((data[i] & 0x80) == 0x00) {
|
|
|
+ // 0XXX_XXXX
|
|
|
+ i++;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ else if ((num = preNUm(data[i])) > 2) {
|
|
|
+ // 110X_XXXX 10XX_XXXX
|
|
|
+ // 1110_XXXX 10XX_XXXX 10XX_XXXX
|
|
|
+ // 1111_0XXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
|
|
|
+ // 1111_10XX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
|
|
|
+ // 1111_110X 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
|
|
|
+ // preNUm() 杩斿洖棣栦釜瀛楄妭8涓猙its涓��锟�?0bit鍓嶉潰1bit鐨勪釜鏁帮紝璇ユ暟閲忎篃鏄��瀛楃�鎵€浣跨敤鐨勫瓧鑺傛暟
|
|
|
+ i++;
|
|
|
+ for (int j = 0; j < num - 1; j++) {
|
|
|
+ //鍒ゆ柇鍚庨潰num - 1 涓�瓧鑺傛槸涓嶆槸閮芥槸10寮€
|
|
|
+ if ((data[i] & 0xc0) != 0x80) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ //鍏朵粬鎯呭喌璇存槑涓嶆槸utf-8
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return true;
|
|
|
+}
|
|
|
|
|
|
int AssignWordsFromTest(std::string pathName, std::vector<tuple< string, string>> &oneList, std::vector<tuple< string, string>> &twoList,
|
|
|
std::vector<tuple< string, string>> &threeList, std::vector<tuple< string, string>> &fourList, int &ret)
|
|
|
{
|
|
|
int nHangCount = 1;
|
|
|
- // 词性处理
|
|
|
+ // 璇嶆€у�鐞�
|
|
|
HMODULE module = GetModuleHandle(0);
|
|
|
TCHAR pFileName[MAX_PATH + 2] = { 0 };
|
|
|
GetModuleFileName(module, pFileName, MAX_PATH);
|
|
@@ -241,8 +288,12 @@ int AssignWordsFromTest(std::string pathName, std::vector<tuple< string, string>
|
|
|
}
|
|
|
|
|
|
std::string strTemp = line,strTempAnother = lineOther;
|
|
|
- UTF8toANSI(strTemp);
|
|
|
- UTF8toANSI(strTempAnother);
|
|
|
+ if (isUtf8((unsigned char*)strTemp.c_str(), strTemp.length()))
|
|
|
+ UTF8toANSI(strTemp);
|
|
|
+ //UTF8toANSI(strTempAnother);
|
|
|
+ if(isUtf8((unsigned char*)strTempAnother.c_str(), strTempAnother.length()))
|
|
|
+ UTF8toANSI(strTempAnother);
|
|
|
+ //strTempAnother;
|
|
|
if (strTemp == "" && strTempAnother=="")
|
|
|
continue;
|
|
|
else if (strTemp == "")
|
|
@@ -276,7 +327,7 @@ int AssignWordsFromTest(std::string pathName, std::vector<tuple< string, string>
|
|
|
memset(lineOther, 0, 4096);
|
|
|
nHangCount++;
|
|
|
}
|
|
|
- // 源文件结束,查看答案文件是否结束
|
|
|
+ // 婧愭枃浠剁粨鏉燂紝鏌ョ湅绛旀�鏂囦欢鏄�惁缁撴潫
|
|
|
if (finOther.getline(lineOther, sizeof(lineOther)))
|
|
|
{
|
|
|
return -2;
|