123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363 |
- #include "pch.h"
- #include "AssignWords.h"
- #include "CvxText.h"
- #include <iostream>
- #include <sstream>
- #include <fstream>
- #include <io.h>
- #include "BaseUtility.h"
- /*
- string& replace_str(string& str, const string& to_replaced, const string& newchars)
- {
- for (string::size_type pos(0); pos != string::npos; pos += newchars.length())
- {
- pos = str.find(to_replaced, pos);
- if (pos != string::npos)
- str.replace(pos, to_replaced.length(), newchars);
- else
- break;
- }
- return str;
- }
- */
- int preNUm(unsigned char byte) {
- unsigned char mask = 0x80;
- int num = 0;
- for (int i = 0; i < 8; i++) {
- if ((byte & mask) == mask) {
- mask = mask >> 1;
- num++;
- }
- else {
- break;
- }
- }
- return num;
- }
- bool isUtf8(unsigned char* data, int len) {
- int num = 0;
- int i = 0;
- while (i < len) {
- if ((data[i] & 0x80) == 0x00) {
- // 0XXX_XXXX
- i++;
- continue;
- }
- else if ((num = preNUm(data[i])) > 2) {
- // 110X_XXXX 10XX_XXXX
- // 1110_XXXX 10XX_XXXX 10XX_XXXX
- // 1111_0XXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
- // 1111_10XX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
- // 1111_110X 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
- // preNUm() 返回首个字节8个bits中首 ?0bit前面1bit的个数,该数量也是该字符所使用的字节数
- i++;
- for (int j = 0; j < num - 1; j++) {
- //判断后面num - 1 个字节是不是都是10开
- if ((data[i] & 0xc0) != 0x80) {
- return false;
- }
- i++;
- }
- }
- else {
- //其他情况说明不是utf-8
- return false;
- }
- }
- return true;
- }
- void split(const std::string& s, const std::string& delim, std::vector< std::string >* ret)
- {
- size_t last = 0;
- size_t index = s.find_first_of(delim, last);
- while (index != std::string::npos)
- {
- ret->push_back(s.substr(last, index - last));
- last = index + 1;
- index = s.find_first_of(delim, last);
- }
- if (index - last > 0)
- {
- ret->push_back(s.substr(last, index - last));
- }
- }
- void deleteFlagWord(std::string& str, const std::string& to_replaced)
- {
- std::string::size_type pos = str.find(to_replaced, 0);
- if (pos == 0)
- str.replace(pos, to_replaced.length(), "");
- }
- void ReplaceFlagWord(std::vector<std::string> &list, std::string &strRet)
- {
- for (std::vector<std::string>::iterator it = list.begin(); it != list.end(); it++)
- {
- deleteFlagWord(strRet, (*it));
- }
- }
- void DivideEquallyStr(const char *buff, int len, char left[], char right[])
- {
- std::string strtemp = buff;
- std::size_t keyindex1 = std::string::npos, keyindex2 = std::string::npos, keyindex;
- int i = 0;
- for (i = 0; i < len / 2; i++)
- {
- if (!(buff[i] < 0x80 && buff[i] >= 0x0))
- {
- i++;
- }
- }
- strncpy(left, buff, i);
- strncpy(right, buff + i, len - i);
- return;
- /* 直接中间分开,不用特殊处理*/
- std::string strKey1 = ";", strKey2 = " ";
- std::size_t found = strtemp.find(strKey1);
- bool flag1 = false, flag2 = false;
- while (found != std::string::npos && found < strtemp.length() / 2)
- {
- keyindex1 = found;
- found = strtemp.find(strKey1, found + 1);
- flag1 = true;
- }
- int w = 1;
- found = strtemp.find(strKey2);
- while (found != std::string::npos && found < strtemp.length() / 2)
- {
- keyindex2 = found;
- found = strtemp.find(strKey2, found + 1);
- flag2 = true;
- }
- if (flag1 && flag2)
- {
- if (keyindex1 > keyindex2)
- {
- keyindex = keyindex1;
- w = 2;
- }
- else
- {
- keyindex = keyindex2;
- }
- }
- else if (flag1)
- {
- keyindex = keyindex1;
- w = 2;
- }
- else if (flag2)
- {
- keyindex = keyindex2;
- }
- else
- {
- int i = 0;
- for (i = 0; i < len / 2; i++)
- {
- if (!(buff[i] < 0x80 && buff[i] >= 0x0))
- {
- i++;
- }
- }
- strncpy(left, buff, i);
- strncpy(right, buff + i, len - i);
- return;
- }
- std::string ttm = strtemp.substr(0, keyindex + w);
- std::string ttm2 = strtemp.substr(keyindex + w, strtemp.size());
- strncpy(left, ttm.c_str(), ttm.length());
- strncpy(right, ttm2.c_str(), ttm2.length());
- }
- void RandReplaceStr(std::vector<std::string> list, std::string &ret)
- {
- std::string to_replaced = ";";
- srand((unsigned)time(NULL));
- std::string newchars;
- int index = rand() % list.size();
- newchars = list[index];
- for (string::size_type pos(0); pos != string::npos; )
- {
- pos = ret.find(to_replaced, pos);
- if (pos != string::npos)
- ret.replace(pos, to_replaced.length(), newchars);
- else
- break;
- pos += newchars.length();
- index = rand() % list.size();
- newchars = list[index];
- }
- }
- void AssignWordsFromString(std::string strContent, int maxTwoWith, int maxTreeWith, int maxFourWith, std::vector<std::string> &rePlaceList,
- std::vector<std::string> &twoList, std::vector<std::string> &threeList, std::vector<std::string> &fourList)
- {
- cv::Size WordsWith = GetTextSize(strContent.c_str(), font_size, font_family);
- if (WordsWith.width < maxFourWith)
- {
- if (rePlaceList.size() > 0)
- {
- RandReplaceStr(rePlaceList, strContent);
- }
- fourList.push_back(strContent);
- }
- else if (WordsWith.width < maxTreeWith)
- {
- if (rePlaceList.size() > 0)
- {
- RandReplaceStr(rePlaceList, strContent);
- }
- threeList.push_back(strContent);
- }
- else if (WordsWith.width < maxTwoWith)
- {
- if (rePlaceList.size() > 0)
- {
- RandReplaceStr(rePlaceList, strContent);
- }
- twoList.push_back(strContent);
- }
- else
- {
- char left[2048] = { 0 }, right[2048] = { 0 };
- DivideEquallyStr(strContent.c_str(), strContent.length(), left, right);
- AssignWordsFromString(left, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList);
- AssignWordsFromString(right, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList);
- }
- }
- int AssignWordsFromTest(std::string pathName, std::vector<std::string> &twoList, std::vector<std::string> &threeList, std::vector<std::string> &fourList)
- {
- // 词性处理
- HMODULE module = GetModuleHandle(0);
- TCHAR pFileName[MAX_PATH + 2] = { 0 };
- GetModuleFileName(module, pFileName, MAX_PATH);
- CString csFullPath(pFileName);
- int nPos = csFullPath.ReverseFind(_T('\\'));
- CString filepath = csFullPath.Left(nPos);
- CString FilePath1 = filepath + L"\\config.ini";
- CString FilePath2 = filepath + L"\\words.txt";
- WCHAR wflagWord[10],wFourColScaleParam[10],wThreeColScaleParam[10],wTwoColScaleParam[10];
- GetPrivateProfileString(L"USER", L"flagWord", L"1", wflagWord, 10, FilePath1);
- GetPrivateProfileString(L"USER", L"TwoColScaleParam", L"6", wTwoColScaleParam, 10, FilePath1);
- GetPrivateProfileString(L"USER", L"ThreeColScaleParam", L"9", wThreeColScaleParam, 10, FilePath1);
- GetPrivateProfileString(L"USER", L"FourColScaleParam", L"16", wFourColScaleParam, 10, FilePath1);
- int nFourColScaleParam = 0, nThreeColScaleParam = 0, nTwoColScaleParam = 0;
- nFourColScaleParam = _wtoi(wFourColScaleParam);
- nFourColScaleParam = nFourColScaleParam > 0 ? nFourColScaleParam : 16;
- nThreeColScaleParam = _wtoi(wThreeColScaleParam);
- nThreeColScaleParam = nThreeColScaleParam > 0 ? nThreeColScaleParam : 9;
- nTwoColScaleParam = _wtoi(wTwoColScaleParam);
- nTwoColScaleParam = nTwoColScaleParam > 0 ? nTwoColScaleParam : 6;
- CString lpszflagWord(wflagWord);
- std::vector<std::string> WordList;
- WordList.push_back("vt.");
- WordList.push_back("vi.");
- WordList.push_back("n.");
- WordList.push_back("pron.");
- WordList.push_back("adj.");
- WordList.push_back("num.");
- WordList.push_back("v.");
- WordList.push_back("adv.");
- WordList.push_back("art.");
- WordList.push_back("prep.");
- WordList.push_back("conj.");
- WordList.push_back("int.");
- // 随机替换规则
- std::vector<std::string> rePlaceList;
- std::string strFilePath2 = CT2A(FilePath2);
- if (_access(strFilePath2.c_str(), 00) != -1)
- {
- char line[100] = { 0 };
- std::ifstream finfile(strFilePath2.c_str(), std::ios::in);
- while (finfile.getline(line, sizeof(line)))
- {
- std::vector<std::string> retlist;
- std::string strTemp = line;
- UTF8toANSI(strTemp);
- split(strTemp, "@", &retlist);
- if (retlist.size() == 2)
- {
- int x = atoi(retlist[1].c_str());
- if (x > 0)
- {
- for (int i = 0; i < x; i++)
- {
- rePlaceList.push_back(retlist[0]);
- }
- }
- }
- }
- memset(line, 0, 100);
- }
- int cout = 0;
- if (_access(pathName.c_str(), 00) != -1)
- {
- int maxTwoWith = main_wdith / 2 - main_wdith / nTwoColScaleParam - chk_width;
- int maxTreeWith = main_wdith / 3 - main_wdith / nThreeColScaleParam - chk_width;
- int maxFourWith = main_wdith / 4 - main_wdith/ nFourColScaleParam - chk_width;
- std::ifstream fin(pathName.c_str(), std::ios::in);
- char line[4096] = { 0 };
- while (fin.getline(line, sizeof(line)))
- {
- std::string strTemp = line;
- if(isUtf8((unsigned char*)strTemp.c_str(),strTemp.length()))
- UTF8toANSI(strTemp);
- if (lpszflagWord == "0")
- {
- ReplaceFlagWord(WordList, strTemp);
- }
- if (strTemp == "")
- continue;
-
- cv::Size WordsWith = GetTextSize(strTemp.c_str(), font_size, font_family);
- if (WordsWith.width < maxFourWith)
- {
- if (rePlaceList.size() > 0)
- {
- RandReplaceStr(rePlaceList, strTemp);
- }
- fourList.push_back(strTemp);
- }
- else if (WordsWith.width < maxTreeWith)
- {
- if (rePlaceList.size() > 0)
- {
- RandReplaceStr(rePlaceList, strTemp);
- }
- threeList.push_back(strTemp);
- }
- else if (WordsWith.width < maxTwoWith)
- {
- if (rePlaceList.size() > 0)
- {
- RandReplaceStr(rePlaceList, strTemp);
- }
- twoList.push_back(strTemp);
- }
- else
- {
- char left[2048] = { 0 }, right[2048] = { 0 };
- DivideEquallyStr(strTemp.c_str(), strTemp.length(), left,right);
- AssignWordsFromString(left, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList);
- AssignWordsFromString(right, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList);
- }
- memset(line, 0, 4096);
- }
-
- }
- return cout;
- }
|