AssignWords.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. #include "pch.h"
  2. #include "AssignWords.h"
  3. #include "CvxText.h"
  4. #include <iostream>
  5. #include <sstream>
  6. #include <fstream>
  7. #include <io.h>
  8. #include "BaseUtility.h"
  9. /*
  10. string& replace_str(string& str, const string& to_replaced, const string& newchars)
  11. {
  12. for (string::size_type pos(0); pos != string::npos; pos += newchars.length())
  13. {
  14. pos = str.find(to_replaced, pos);
  15. if (pos != string::npos)
  16. str.replace(pos, to_replaced.length(), newchars);
  17. else
  18. break;
  19. }
  20. return str;
  21. }
  22. */
  23. int preNUm(unsigned char byte) {
  24. unsigned char mask = 0x80;
  25. int num = 0;
  26. for (int i = 0; i < 8; i++) {
  27. if ((byte & mask) == mask) {
  28. mask = mask >> 1;
  29. num++;
  30. }
  31. else {
  32. break;
  33. }
  34. }
  35. return num;
  36. }
  37. bool isUtf8(unsigned char* data, int len) {
  38. int num = 0;
  39. int i = 0;
  40. while (i < len) {
  41. if ((data[i] & 0x80) == 0x00) {
  42. // 0XXX_XXXX
  43. i++;
  44. continue;
  45. }
  46. else if ((num = preNUm(data[i])) > 2) {
  47. // 110X_XXXX 10XX_XXXX
  48. // 1110_XXXX 10XX_XXXX 10XX_XXXX
  49. // 1111_0XXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
  50. // 1111_10XX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
  51. // 1111_110X 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
  52. // preNUm() 返回首个字节8个bits中首 ?0bit前面1bit的个数,该数量也是该字符所使用的字节数
  53. i++;
  54. for (int j = 0; j < num - 1; j++) {
  55. //判断后面num - 1 个字节是不是都是10开
  56. if ((data[i] & 0xc0) != 0x80) {
  57. return false;
  58. }
  59. i++;
  60. }
  61. }
  62. else {
  63. //其他情况说明不是utf-8
  64. return false;
  65. }
  66. }
  67. return true;
  68. }
  69. void split(const std::string& s, const std::string& delim, std::vector< std::string >* ret)
  70. {
  71. size_t last = 0;
  72. size_t index = s.find_first_of(delim, last);
  73. while (index != std::string::npos)
  74. {
  75. ret->push_back(s.substr(last, index - last));
  76. last = index + 1;
  77. index = s.find_first_of(delim, last);
  78. }
  79. if (index - last > 0)
  80. {
  81. ret->push_back(s.substr(last, index - last));
  82. }
  83. }
  84. void deleteFlagWord(std::string& str, const std::string& to_replaced)
  85. {
  86. std::string::size_type pos = str.find(to_replaced, 0);
  87. if (pos == 0)
  88. str.replace(pos, to_replaced.length(), "");
  89. }
  90. void ReplaceFlagWord(std::vector<std::string> &list, std::string &strRet)
  91. {
  92. for (std::vector<std::string>::iterator it = list.begin(); it != list.end(); it++)
  93. {
  94. deleteFlagWord(strRet, (*it));
  95. }
  96. }
  97. void DivideEquallyStr(const char *buff, int len, char left[], char right[])
  98. {
  99. std::string strtemp = buff;
  100. std::size_t keyindex1 = std::string::npos, keyindex2 = std::string::npos, keyindex;
  101. int i = 0;
  102. for (i = 0; i < len / 2; i++)
  103. {
  104. if (!(buff[i] < 0x80 && buff[i] >= 0x0))
  105. {
  106. i++;
  107. }
  108. }
  109. strncpy(left, buff, i);
  110. strncpy(right, buff + i, len - i);
  111. return;
  112. /* 直接中间分开,不用特殊处理*/
  113. std::string strKey1 = ";", strKey2 = " ";
  114. std::size_t found = strtemp.find(strKey1);
  115. bool flag1 = false, flag2 = false;
  116. while (found != std::string::npos && found < strtemp.length() / 2)
  117. {
  118. keyindex1 = found;
  119. found = strtemp.find(strKey1, found + 1);
  120. flag1 = true;
  121. }
  122. int w = 1;
  123. found = strtemp.find(strKey2);
  124. while (found != std::string::npos && found < strtemp.length() / 2)
  125. {
  126. keyindex2 = found;
  127. found = strtemp.find(strKey2, found + 1);
  128. flag2 = true;
  129. }
  130. if (flag1 && flag2)
  131. {
  132. if (keyindex1 > keyindex2)
  133. {
  134. keyindex = keyindex1;
  135. w = 2;
  136. }
  137. else
  138. {
  139. keyindex = keyindex2;
  140. }
  141. }
  142. else if (flag1)
  143. {
  144. keyindex = keyindex1;
  145. w = 2;
  146. }
  147. else if (flag2)
  148. {
  149. keyindex = keyindex2;
  150. }
  151. else
  152. {
  153. int i = 0;
  154. for (i = 0; i < len / 2; i++)
  155. {
  156. if (!(buff[i] < 0x80 && buff[i] >= 0x0))
  157. {
  158. i++;
  159. }
  160. }
  161. strncpy(left, buff, i);
  162. strncpy(right, buff + i, len - i);
  163. return;
  164. }
  165. std::string ttm = strtemp.substr(0, keyindex + w);
  166. std::string ttm2 = strtemp.substr(keyindex + w, strtemp.size());
  167. strncpy(left, ttm.c_str(), ttm.length());
  168. strncpy(right, ttm2.c_str(), ttm2.length());
  169. }
  170. void RandReplaceStr(std::vector<std::string> list, std::string &ret)
  171. {
  172. std::string to_replaced = ";";
  173. srand((unsigned)time(NULL));
  174. std::string newchars;
  175. int index = rand() % list.size();
  176. newchars = list[index];
  177. for (string::size_type pos(0); pos != string::npos; )
  178. {
  179. pos = ret.find(to_replaced, pos);
  180. if (pos != string::npos)
  181. ret.replace(pos, to_replaced.length(), newchars);
  182. else
  183. break;
  184. pos += newchars.length();
  185. index = rand() % list.size();
  186. newchars = list[index];
  187. }
  188. }
  189. void AssignWordsFromString(std::string strContent, int maxTwoWith, int maxTreeWith, int maxFourWith, std::vector<std::string> &rePlaceList,
  190. std::vector<std::string> &twoList, std::vector<std::string> &threeList, std::vector<std::string> &fourList)
  191. {
  192. cv::Size WordsWith = GetTextSize(strContent.c_str(), font_size, font_family);
  193. if (WordsWith.width < maxFourWith)
  194. {
  195. if (rePlaceList.size() > 0)
  196. {
  197. RandReplaceStr(rePlaceList, strContent);
  198. }
  199. fourList.push_back(strContent);
  200. }
  201. else if (WordsWith.width < maxTreeWith)
  202. {
  203. if (rePlaceList.size() > 0)
  204. {
  205. RandReplaceStr(rePlaceList, strContent);
  206. }
  207. threeList.push_back(strContent);
  208. }
  209. else if (WordsWith.width < maxTwoWith)
  210. {
  211. if (rePlaceList.size() > 0)
  212. {
  213. RandReplaceStr(rePlaceList, strContent);
  214. }
  215. twoList.push_back(strContent);
  216. }
  217. else
  218. {
  219. char left[2048] = { 0 }, right[2048] = { 0 };
  220. DivideEquallyStr(strContent.c_str(), strContent.length(), left, right);
  221. AssignWordsFromString(left, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList);
  222. AssignWordsFromString(right, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList);
  223. }
  224. }
  225. int AssignWordsFromTest(std::string pathName, std::vector<std::string> &twoList, std::vector<std::string> &threeList, std::vector<std::string> &fourList)
  226. {
  227. // 词性处理
  228. HMODULE module = GetModuleHandle(0);
  229. TCHAR pFileName[MAX_PATH + 2] = { 0 };
  230. GetModuleFileName(module, pFileName, MAX_PATH);
  231. CString csFullPath(pFileName);
  232. int nPos = csFullPath.ReverseFind(_T('\\'));
  233. CString filepath = csFullPath.Left(nPos);
  234. CString FilePath1 = filepath + L"\\config.ini";
  235. CString FilePath2 = filepath + L"\\words.txt";
  236. WCHAR wflagWord[10],wFourColScaleParam[10],wThreeColScaleParam[10],wTwoColScaleParam[10];
  237. GetPrivateProfileString(L"USER", L"flagWord", L"1", wflagWord, 10, FilePath1);
  238. GetPrivateProfileString(L"USER", L"TwoColScaleParam", L"6", wTwoColScaleParam, 10, FilePath1);
  239. GetPrivateProfileString(L"USER", L"ThreeColScaleParam", L"9", wThreeColScaleParam, 10, FilePath1);
  240. GetPrivateProfileString(L"USER", L"FourColScaleParam", L"16", wFourColScaleParam, 10, FilePath1);
  241. int nFourColScaleParam = 0, nThreeColScaleParam = 0, nTwoColScaleParam = 0;
  242. nFourColScaleParam = _wtoi(wFourColScaleParam);
  243. nFourColScaleParam = nFourColScaleParam > 0 ? nFourColScaleParam : 16;
  244. nThreeColScaleParam = _wtoi(wThreeColScaleParam);
  245. nThreeColScaleParam = nThreeColScaleParam > 0 ? nThreeColScaleParam : 9;
  246. nTwoColScaleParam = _wtoi(wTwoColScaleParam);
  247. nTwoColScaleParam = nTwoColScaleParam > 0 ? nTwoColScaleParam : 6;
  248. CString lpszflagWord(wflagWord);
  249. std::vector<std::string> WordList;
  250. WordList.push_back("vt.");
  251. WordList.push_back("vi.");
  252. WordList.push_back("n.");
  253. WordList.push_back("pron.");
  254. WordList.push_back("adj.");
  255. WordList.push_back("num.");
  256. WordList.push_back("v.");
  257. WordList.push_back("adv.");
  258. WordList.push_back("art.");
  259. WordList.push_back("prep.");
  260. WordList.push_back("conj.");
  261. WordList.push_back("int.");
  262. // 随机替换规则
  263. std::vector<std::string> rePlaceList;
  264. std::string strFilePath2 = CT2A(FilePath2);
  265. if (_access(strFilePath2.c_str(), 00) != -1)
  266. {
  267. char line[100] = { 0 };
  268. std::ifstream finfile(strFilePath2.c_str(), std::ios::in);
  269. while (finfile.getline(line, sizeof(line)))
  270. {
  271. std::vector<std::string> retlist;
  272. std::string strTemp = line;
  273. UTF8toANSI(strTemp);
  274. split(strTemp, "@", &retlist);
  275. if (retlist.size() == 2)
  276. {
  277. int x = atoi(retlist[1].c_str());
  278. if (x > 0)
  279. {
  280. for (int i = 0; i < x; i++)
  281. {
  282. rePlaceList.push_back(retlist[0]);
  283. }
  284. }
  285. }
  286. }
  287. memset(line, 0, 100);
  288. }
  289. int cout = 0;
  290. if (_access(pathName.c_str(), 00) != -1)
  291. {
  292. int maxTwoWith = main_wdith / 2 - main_wdith / nTwoColScaleParam - chk_width;
  293. int maxTreeWith = main_wdith / 3 - main_wdith / nThreeColScaleParam - chk_width;
  294. int maxFourWith = main_wdith / 4 - main_wdith/ nFourColScaleParam - chk_width;
  295. std::ifstream fin(pathName.c_str(), std::ios::in);
  296. char line[4096] = { 0 };
  297. while (fin.getline(line, sizeof(line)))
  298. {
  299. std::string strTemp = line;
  300. if(isUtf8((unsigned char*)strTemp.c_str(),strTemp.length()))
  301. UTF8toANSI(strTemp);
  302. if (lpszflagWord == "0")
  303. {
  304. ReplaceFlagWord(WordList, strTemp);
  305. }
  306. if (strTemp == "")
  307. continue;
  308. cv::Size WordsWith = GetTextSize(strTemp.c_str(), font_size, font_family);
  309. if (WordsWith.width < maxFourWith)
  310. {
  311. if (rePlaceList.size() > 0)
  312. {
  313. RandReplaceStr(rePlaceList, strTemp);
  314. }
  315. fourList.push_back(strTemp);
  316. }
  317. else if (WordsWith.width < maxTreeWith)
  318. {
  319. if (rePlaceList.size() > 0)
  320. {
  321. RandReplaceStr(rePlaceList, strTemp);
  322. }
  323. threeList.push_back(strTemp);
  324. }
  325. else if (WordsWith.width < maxTwoWith)
  326. {
  327. if (rePlaceList.size() > 0)
  328. {
  329. RandReplaceStr(rePlaceList, strTemp);
  330. }
  331. twoList.push_back(strTemp);
  332. }
  333. else
  334. {
  335. char left[2048] = { 0 }, right[2048] = { 0 };
  336. DivideEquallyStr(strTemp.c_str(), strTemp.length(), left,right);
  337. AssignWordsFromString(left, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList);
  338. AssignWordsFromString(right, maxTwoWith, maxTreeWith, maxFourWith, rePlaceList, twoList, threeList, fourList);
  339. }
  340. memset(line, 0, 4096);
  341. }
  342. }
  343. return cout;
  344. }