#include "pch.h" #include "AssignWords.h" #include "CvxText.h" #include #include #include #include #include "BaseUtility.h" /* string& replace_str(string& str, const string& to_replaced, const string& newchars) { for (string::size_type pos(0); pos != string::npos; pos += newchars.length()) { pos = str.find(to_replaced, pos); if (pos != string::npos) str.replace(pos, to_replaced.length(), newchars); else break; } return str; } */ int preNUm(unsigned char byte) { unsigned char mask = 0x80; int num = 0; for (int i = 0; i < 8; i++) { if ((byte & mask) == mask) { mask = mask >> 1; num++; } else { break; } } return num; } bool isUtf8(unsigned char* data, int len) { int num = 0; int i = 0; while (i < len) { if ((data[i] & 0x80) == 0x00) { // 0XXX_XXXX i++; continue; } else if ((num = preNUm(data[i])) > 2) { // 110X_XXXX 10XX_XXXX // 1110_XXXX 10XX_XXXX 10XX_XXXX // 1111_0XXX 10XX_XXXX 10XX_XXXX 10XX_XXXX // 1111_10XX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX // 1111_110X 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX // preNUm() 返回首个字节8个bits中首 ?0bit前面1bit的个数,该数量也是该字符所使用的字节数 i++; for (int j = 0; j < num - 1; j++) { //判断后面num - 1 个字节是不是都是10开 if ((data[i] & 0xc0) != 0x80) { return false; } i++; } } else { //其他情况说明不是utf-8 return false; } } return true; } void split(const std::string& s, const std::string& delim, std::vector< std::string >* ret) { size_t last = 0; size_t index = s.find_first_of(delim, last); while (index != std::string::npos) { ret->push_back(s.substr(last, index - last)); last = index + 1; index = s.find_first_of(delim, last); } if (index - last > 0) { ret->push_back(s.substr(last, index - last)); } } void deleteFlagWord(std::string& str, const std::string& to_replaced) { std::string::size_type pos = str.find(to_replaced, 0); if (pos == 0) str.replace(pos, to_replaced.length(), ""); } void ReplaceFlagWord(std::vector &list, std::string &strRet) { for (std::vector::iterator it = list.begin(); it != list.end(); it++) { deleteFlagWord(strRet, (*it)); } } void DivideEquallyStr(const char *buff, int len, char left[], char right[]) { std::string strtemp = buff; std::size_t keyindex1 = std::string::npos, keyindex2 = std::string::npos, keyindex; int i = 0; for (i = 0; i < len / 2; i++) { if (!(buff[i] < 0x80 && buff[i] >= 0x0)) { i++; } } strncpy(left, buff, i); strncpy(right, buff + i, len - i); return; /* 直接中间分开,不用特殊处理*/ std::string strKey1 = ";", strKey2 = " "; std::size_t found = strtemp.find(strKey1); bool flag1 = false, flag2 = false; while (found != std::string::npos && found < strtemp.length() / 2) { keyindex1 = found; found = strtemp.find(strKey1, found + 1); flag1 = true; } int w = 1; found = strtemp.find(strKey2); while (found != std::string::npos && found < strtemp.length() / 2) { keyindex2 = found; found = strtemp.find(strKey2, found + 1); flag2 = true; } if (flag1 && flag2) { if (keyindex1 > keyindex2) { keyindex = keyindex1; w = 2; } else { keyindex = keyindex2; } } else if (flag1) { keyindex = keyindex1; w = 2; } else if (flag2) { keyindex = keyindex2; } else { int i = 0; for (i = 0; i < len / 2; i++) { if (!(buff[i] < 0x80 && buff[i] >= 0x0)) { i++; } } strncpy(left, buff, i); strncpy(right, buff + i, len - i); return; } std::string ttm = strtemp.substr(0, keyindex + w); std::string ttm2 = strtemp.substr(keyindex + w, strtemp.size()); strncpy(left, ttm.c_str(), ttm.length()); strncpy(right, ttm2.c_str(), ttm2.length()); } void RandReplaceStr(std::vector list, std::string &ret) { std::string to_replaced = ";"; srand((unsigned)time(NULL)); std::string newchars; int index = rand() % list.size(); newchars = list[index]; for (string::size_type pos(0); pos != string::npos; ) { pos = ret.find(to_replaced, pos); if (pos != string::npos) ret.replace(pos, to_replaced.length(), newchars); else break; pos += newchars.length(); index = rand() % list.size(); newchars = list[index]; } } void AssignWordsFromString(std::string strContent, int maxTwoWith, int maxTreeWith, int maxFourWith, std::vector &rePlaceList, std::vector &twoList, std::vector &threeList, std::vector &fourList) { cv::Size WordsWith = GetTextSize(strContent.c_str(), font_size, font_family); if (WordsWith.width < maxFourWith) { if (rePlaceList.size() > 0) { RandReplaceStr(rePlaceList, strContent); } fourList.push_back(strContent); } else if (WordsWith.width < maxTreeWith) { if (rePlaceList.size() > 0) { RandReplaceStr(rePlaceList, strContent); } threeList.push_back(strContent); } else if (WordsWith.width < maxTwoWith) { if (rePlaceList.size() > 0) { RandReplaceStr(rePlaceList, strContent); } twoList.push_back(strContent); } else { char left[2048] = { 0 }, right[2048] = { 0 }; DivideEquallyStr(strContent.c_str(), strContent.length(), left, right); AssignWordsFromString(left, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList); AssignWordsFromString(right, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList); } } int AssignWordsFromTest(std::string pathName, std::vector &twoList, std::vector &threeList, std::vector &fourList) { // 词性处理 HMODULE module = GetModuleHandle(0); TCHAR pFileName[MAX_PATH + 2] = { 0 }; GetModuleFileName(module, pFileName, MAX_PATH); CString csFullPath(pFileName); int nPos = csFullPath.ReverseFind(_T('\\')); CString filepath = csFullPath.Left(nPos); CString FilePath1 = filepath + L"\\config.ini"; CString FilePath2 = filepath + L"\\words.txt"; WCHAR wflagWord[10],wFourColScaleParam[10],wThreeColScaleParam[10],wTwoColScaleParam[10]; GetPrivateProfileString(L"USER", L"flagWord", L"1", wflagWord, 10, FilePath1); GetPrivateProfileString(L"USER", L"TwoColScaleParam", L"6", wTwoColScaleParam, 10, FilePath1); GetPrivateProfileString(L"USER", L"ThreeColScaleParam", L"9", wThreeColScaleParam, 10, FilePath1); GetPrivateProfileString(L"USER", L"FourColScaleParam", L"16", wFourColScaleParam, 10, FilePath1); int nFourColScaleParam = 0, nThreeColScaleParam = 0, nTwoColScaleParam = 0; nFourColScaleParam = _wtoi(wFourColScaleParam); nFourColScaleParam = nFourColScaleParam > 0 ? nFourColScaleParam : 16; nThreeColScaleParam = _wtoi(wThreeColScaleParam); nThreeColScaleParam = nThreeColScaleParam > 0 ? nThreeColScaleParam : 9; nTwoColScaleParam = _wtoi(wTwoColScaleParam); nTwoColScaleParam = nTwoColScaleParam > 0 ? nTwoColScaleParam : 6; CString lpszflagWord(wflagWord); std::vector WordList; WordList.push_back("vt."); WordList.push_back("vi."); WordList.push_back("n."); WordList.push_back("pron."); WordList.push_back("adj."); WordList.push_back("num."); WordList.push_back("v."); WordList.push_back("adv."); WordList.push_back("art."); WordList.push_back("prep."); WordList.push_back("conj."); WordList.push_back("int."); // 随机替换规则 std::vector rePlaceList; std::string strFilePath2 = CT2A(FilePath2); if (_access(strFilePath2.c_str(), 00) != -1) { char line[100] = { 0 }; std::ifstream finfile(strFilePath2.c_str(), std::ios::in); while (finfile.getline(line, sizeof(line))) { std::vector retlist; std::string strTemp = line; UTF8toANSI(strTemp); split(strTemp, "@", &retlist); if (retlist.size() == 2) { int x = atoi(retlist[1].c_str()); if (x > 0) { for (int i = 0; i < x; i++) { rePlaceList.push_back(retlist[0]); } } } } memset(line, 0, 100); } int cout = 0; if (_access(pathName.c_str(), 00) != -1) { int maxTwoWith = main_wdith / 2 - main_wdith / nTwoColScaleParam - chk_width; int maxTreeWith = main_wdith / 3 - main_wdith / nThreeColScaleParam - chk_width; int maxFourWith = main_wdith / 4 - main_wdith/ nFourColScaleParam - chk_width; std::ifstream fin(pathName.c_str(), std::ios::in); char line[4096] = { 0 }; while (fin.getline(line, sizeof(line))) { std::string strTemp = line; if(isUtf8((unsigned char*)strTemp.c_str(),strTemp.length())) UTF8toANSI(strTemp); if (lpszflagWord == "0") { ReplaceFlagWord(WordList, strTemp); } if (strTemp == "") continue; cv::Size WordsWith = GetTextSize(strTemp.c_str(), font_size, font_family); if (WordsWith.width < maxFourWith) { if (rePlaceList.size() > 0) { RandReplaceStr(rePlaceList, strTemp); } fourList.push_back(strTemp); } else if (WordsWith.width < maxTreeWith) { if (rePlaceList.size() > 0) { RandReplaceStr(rePlaceList, strTemp); } threeList.push_back(strTemp); } else if (WordsWith.width < maxTwoWith) { if (rePlaceList.size() > 0) { RandReplaceStr(rePlaceList, strTemp); } twoList.push_back(strTemp); } else { char left[2048] = { 0 }, right[2048] = { 0 }; DivideEquallyStr(strTemp.c_str(), strTemp.length(), left,right); AssignWordsFromString(left, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList); AssignWordsFromString(right, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList); } memset(line, 0, 4096); } } return cout; }