data_config.json 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452
  1. [
  2. {
  3. "name": "stackexchange_title_body/skeptics.stackexchange.com.jsonl.gz",
  4. "lines": 10009,
  5. "weight": 1
  6. },
  7. {
  8. "name": "stackexchange_TitleBody_Answer/islam.stackexchange.com.jsonl.gz",
  9. "lines": 10052,
  10. "weight": 1
  11. },
  12. {
  13. "name": "stackexchange_Title_Answer/islam.stackexchange.com.jsonl.gz",
  14. "lines": 10052,
  15. "weight": 1
  16. },
  17. {
  18. "name": "stackexchange_TitleBody_Answer/anime.stackexchange.com.jsonl.gz",
  19. "lines": 10131,
  20. "weight": 1
  21. },
  22. {
  23. "name": "stackexchange_Title_Answer/anime.stackexchange.com.jsonl.gz",
  24. "lines": 10131,
  25. "weight": 1
  26. },
  27. {
  28. "name": "stackexchange_title_body/writers.stackexchange.com.jsonl.gz",
  29. "lines": 10157,
  30. "weight": 1
  31. },
  32. {
  33. "name": "stackexchange_title_body/astronomy.stackexchange.com.jsonl.gz",
  34. "lines": 10462,
  35. "weight": 1
  36. },
  37. {
  38. "name": "stackexchange_title_body/vi.stackexchange.com.jsonl.gz",
  39. "lines": 10551,
  40. "weight": 1
  41. },
  42. {
  43. "name": "stackexchange_TitleBody_Answer/french.stackexchange.com.jsonl.gz",
  44. "lines": 10578,
  45. "weight": 1
  46. },
  47. {
  48. "name": "stackexchange_Title_Answer/french.stackexchange.com.jsonl.gz",
  49. "lines": 10578,
  50. "weight": 1
  51. },
  52. {
  53. "name": "stackexchange_title_body/cstheory.stackexchange.com.jsonl.gz",
  54. "lines": 10642,
  55. "weight": 1
  56. },
  57. {
  58. "name": "stackexchange_TitleBody_Answer/civicrm.stackexchange.com.jsonl.gz",
  59. "lines": 10648,
  60. "weight": 1
  61. },
  62. {
  63. "name": "stackexchange_Title_Answer/civicrm.stackexchange.com.jsonl.gz",
  64. "lines": 10648,
  65. "weight": 1
  66. },
  67. {
  68. "name": "stackexchange_TitleBody_Answer/expressionengine.stackexchange.com.jsonl.gz",
  69. "lines": 10742,
  70. "weight": 1
  71. },
  72. {
  73. "name": "stackexchange_Title_Answer/expressionengine.stackexchange.com.jsonl.gz",
  74. "lines": 10742,
  75. "weight": 1
  76. },
  77. {
  78. "name": "stackexchange_title_body/engineering.stackexchange.com.jsonl.gz",
  79. "lines": 10753,
  80. "weight": 1
  81. },
  82. {
  83. "name": "stackexchange_TitleBody_Answer/history.stackexchange.com.jsonl.gz",
  84. "lines": 10766,
  85. "weight": 1
  86. },
  87. {
  88. "name": "stackexchange_Title_Answer/history.stackexchange.com.jsonl.gz",
  89. "lines": 10766,
  90. "weight": 1
  91. },
  92. {
  93. "name": "stackexchange_title_body/french.stackexchange.com.jsonl.gz",
  94. "lines": 10794,
  95. "weight": 1
  96. },
  97. {
  98. "name": "stackexchange_TitleBody_Answer/politics.stackexchange.com.jsonl.gz",
  99. "lines": 11047,
  100. "weight": 1
  101. },
  102. {
  103. "name": "stackexchange_Title_Answer/politics.stackexchange.com.jsonl.gz",
  104. "lines": 11047,
  105. "weight": 1
  106. },
  107. {
  108. "name": "stackexchange_title_body/economics.stackexchange.com.jsonl.gz",
  109. "lines": 11115,
  110. "weight": 1
  111. },
  112. {
  113. "name": "stackexchange_TitleBody_Answer/craftcms.stackexchange.com.jsonl.gz",
  114. "lines": 11236,
  115. "weight": 1
  116. },
  117. {
  118. "name": "stackexchange_Title_Answer/craftcms.stackexchange.com.jsonl.gz",
  119. "lines": 11236,
  120. "weight": 1
  121. },
  122. {
  123. "name": "stackexchange_title_body/anime.stackexchange.com.jsonl.gz",
  124. "lines": 11444,
  125. "weight": 1
  126. },
  127. {
  128. "name": "stackexchange_TitleBody_Answer/christianity.stackexchange.com.jsonl.gz",
  129. "lines": 11498,
  130. "weight": 1
  131. },
  132. {
  133. "name": "stackexchange_Title_Answer/christianity.stackexchange.com.jsonl.gz",
  134. "lines": 11498,
  135. "weight": 1
  136. },
  137. {
  138. "name": "stackexchange_TitleBody_Answer/softwarerecs.stackexchange.com.jsonl.gz",
  139. "lines": 11761,
  140. "weight": 1
  141. },
  142. {
  143. "name": "stackexchange_Title_Answer/softwarerecs.stackexchange.com.jsonl.gz",
  144. "lines": 11761,
  145. "weight": 1
  146. },
  147. {
  148. "name": "stackexchange_TitleBody_Answer/boardgames.stackexchange.com.jsonl.gz",
  149. "lines": 11805,
  150. "weight": 1
  151. },
  152. {
  153. "name": "stackexchange_Title_Answer/boardgames.stackexchange.com.jsonl.gz",
  154. "lines": 11805,
  155. "weight": 1
  156. },
  157. {
  158. "name": "stackexchange_title_body/islam.stackexchange.com.jsonl.gz",
  159. "lines": 11853,
  160. "weight": 1
  161. },
  162. {
  163. "name": "stackexchange_title_body/expressionengine.stackexchange.com.jsonl.gz",
  164. "lines": 11866,
  165. "weight": 1
  166. },
  167. {
  168. "name": "stackexchange_title_body/politics.stackexchange.com.jsonl.gz",
  169. "lines": 11894,
  170. "weight": 1
  171. },
  172. {
  173. "name": "stackexchange_title_body/history.stackexchange.com.jsonl.gz",
  174. "lines": 12021,
  175. "weight": 1
  176. },
  177. {
  178. "name": "stackexchange_title_body/christianity.stackexchange.com.jsonl.gz",
  179. "lines": 12108,
  180. "weight": 1
  181. },
  182. {
  183. "name": "stackexchange_title_body/boardgames.stackexchange.com.jsonl.gz",
  184. "lines": 12149,
  185. "weight": 1
  186. },
  187. {
  188. "name": "flickr30k_captions.jsonl.gz",
  189. "lines": 317695,
  190. "weight": 1
  191. },
  192. {
  193. "name": "coco_captions.jsonl.gz",
  194. "lines": 828395,
  195. "weight": 1
  196. },
  197. {
  198. "name": "codesearchnet.jsonl.gz",
  199. "lines": 1151414,
  200. "weight": 1
  201. },
  202. {
  203. "name": "stackexchange_title_body/civicrm.stackexchange.com.jsonl.gz",
  204. "lines": 12543,
  205. "weight": 2
  206. },
  207. {
  208. "name": "stackexchange_title_body/craftcms.stackexchange.com.jsonl.gz",
  209. "lines": 12574,
  210. "weight": 2
  211. },
  212. {
  213. "name": "stackexchange_TitleBody_Answer/networkengineering.stackexchange.com.jsonl.gz",
  214. "lines": 12590,
  215. "weight": 2
  216. },
  217. {
  218. "name": "stackexchange_Title_Answer/networkengineering.stackexchange.com.jsonl.gz",
  219. "lines": 12590,
  220. "weight": 2
  221. },
  222. {
  223. "name": "stackexchange_TitleBody_Answer/space.stackexchange.com.jsonl.gz",
  224. "lines": 12893,
  225. "weight": 2
  226. },
  227. {
  228. "name": "stackexchange_Title_Answer/space.stackexchange.com.jsonl.gz",
  229. "lines": 12893,
  230. "weight": 2
  231. },
  232. {
  233. "name": "stackexchange_TitleBody_Answer/quant.stackexchange.com.jsonl.gz",
  234. "lines": 12933,
  235. "weight": 2
  236. },
  237. {
  238. "name": "stackexchange_Title_Answer/quant.stackexchange.com.jsonl.gz",
  239. "lines": 12933,
  240. "weight": 2
  241. },
  242. {
  243. "name": "stackexchange_TitleBody_Answer/philosophy.stackexchange.com.jsonl.gz",
  244. "lines": 13114,
  245. "weight": 2
  246. },
  247. {
  248. "name": "stackexchange_Title_Answer/philosophy.stackexchange.com.jsonl.gz",
  249. "lines": 13114,
  250. "weight": 2
  251. },
  252. {
  253. "name": "stackexchange_TitleBody_Answer/gardening.stackexchange.com.jsonl.gz",
  254. "lines": 13246,
  255. "weight": 2
  256. },
  257. {
  258. "name": "stackexchange_Title_Answer/gardening.stackexchange.com.jsonl.gz",
  259. "lines": 13246,
  260. "weight": 2
  261. },
  262. {
  263. "name": "stackexchange_title_body/hinduism.stackexchange.com.jsonl.gz",
  264. "lines": 13450,
  265. "weight": 2
  266. },
  267. {
  268. "name": "stackexchange_title_body/networkengineering.stackexchange.com.jsonl.gz",
  269. "lines": 13454,
  270. "weight": 2
  271. },
  272. {
  273. "name": "stackexchange_TitleBody_Answer/german.stackexchange.com.jsonl.gz",
  274. "lines": 13733,
  275. "weight": 2
  276. },
  277. {
  278. "name": "stackexchange_Title_Answer/german.stackexchange.com.jsonl.gz",
  279. "lines": 13733,
  280. "weight": 2
  281. },
  282. {
  283. "name": "stackexchange_title_body/german.stackexchange.com.jsonl.gz",
  284. "lines": 13950,
  285. "weight": 2
  286. },
  287. {
  288. "name": "stackexchange_title_body/philosophy.stackexchange.com.jsonl.gz",
  289. "lines": 14829,
  290. "weight": 2
  291. },
  292. {
  293. "name": "stackexchange_title_body/gardening.stackexchange.com.jsonl.gz",
  294. "lines": 15136,
  295. "weight": 2
  296. },
  297. {
  298. "name": "stackexchange_title_body/space.stackexchange.com.jsonl.gz",
  299. "lines": 15142,
  300. "weight": 2
  301. },
  302. {
  303. "name": "stackexchange_TitleBody_Answer/bicycles.stackexchange.com.jsonl.gz",
  304. "lines": 15708,
  305. "weight": 2
  306. },
  307. {
  308. "name": "stackexchange_Title_Answer/bicycles.stackexchange.com.jsonl.gz",
  309. "lines": 15708,
  310. "weight": 2
  311. },
  312. {
  313. "name": "stackexchange_TitleBody_Answer/law.stackexchange.com.jsonl.gz",
  314. "lines": 16133,
  315. "weight": 2
  316. },
  317. {
  318. "name": "stackexchange_Title_Answer/law.stackexchange.com.jsonl.gz",
  319. "lines": 16133,
  320. "weight": 2
  321. },
  322. {
  323. "name": "stackexchange_TitleBody_Answer/arduino.stackexchange.com.jsonl.gz",
  324. "lines": 16281,
  325. "weight": 2
  326. },
  327. {
  328. "name": "stackexchange_Title_Answer/arduino.stackexchange.com.jsonl.gz",
  329. "lines": 16281,
  330. "weight": 2
  331. },
  332. {
  333. "name": "stackexchange_title_body/bicycles.stackexchange.com.jsonl.gz",
  334. "lines": 16353,
  335. "weight": 2
  336. },
  337. {
  338. "name": "stackexchange_TitleBody_Answer/emacs.stackexchange.com.jsonl.gz",
  339. "lines": 16830,
  340. "weight": 2
  341. },
  342. {
  343. "name": "stackexchange_Title_Answer/emacs.stackexchange.com.jsonl.gz",
  344. "lines": 16830,
  345. "weight": 2
  346. },
  347. {
  348. "name": "stackexchange_title_body/quant.stackexchange.com.jsonl.gz",
  349. "lines": 17261,
  350. "weight": 2
  351. },
  352. {
  353. "name": "stackexchange_TitleBody_Answer/dsp.stackexchange.com.jsonl.gz",
  354. "lines": 17430,
  355. "weight": 2
  356. },
  357. {
  358. "name": "stackexchange_Title_Answer/dsp.stackexchange.com.jsonl.gz",
  359. "lines": 17430,
  360. "weight": 2
  361. },
  362. {
  363. "name": "stackexchange_TitleBody_Answer/puzzling.stackexchange.com.jsonl.gz",
  364. "lines": 17448,
  365. "weight": 2
  366. },
  367. {
  368. "name": "stackexchange_Title_Answer/puzzling.stackexchange.com.jsonl.gz",
  369. "lines": 17448,
  370. "weight": 2
  371. },
  372. {
  373. "name": "stackexchange_title_body/puzzling.stackexchange.com.jsonl.gz",
  374. "lines": 17851,
  375. "weight": 2
  376. },
  377. {
  378. "name": "stackexchange_title_body/law.stackexchange.com.jsonl.gz",
  379. "lines": 17941,
  380. "weight": 2
  381. },
  382. {
  383. "name": "stackexchange_TitleBody_Answer/movies.stackexchange.com.jsonl.gz",
  384. "lines": 18243,
  385. "weight": 2
  386. },
  387. {
  388. "name": "stackexchange_Title_Answer/movies.stackexchange.com.jsonl.gz",
  389. "lines": 18243,
  390. "weight": 2
  391. },
  392. {
  393. "name": "stackexchange_TitleBody_Answer/mechanics.stackexchange.com.jsonl.gz",
  394. "lines": 18613,
  395. "weight": 2
  396. },
  397. {
  398. "name": "stackexchange_Title_Answer/mechanics.stackexchange.com.jsonl.gz",
  399. "lines": 18613,
  400. "weight": 2
  401. },
  402. {
  403. "name": "stackexchange_TitleBody_Answer/aviation.stackexchange.com.jsonl.gz",
  404. "lines": 18755,
  405. "weight": 2
  406. },
  407. {
  408. "name": "stackexchange_Title_Answer/aviation.stackexchange.com.jsonl.gz",
  409. "lines": 18755,
  410. "weight": 2
  411. },
  412. {
  413. "name": "stackexchange_TitleBody_Answer/biology.stackexchange.com.jsonl.gz",
  414. "lines": 19277,
  415. "weight": 2
  416. },
  417. {
  418. "name": "stackexchange_Title_Answer/biology.stackexchange.com.jsonl.gz",
  419. "lines": 19277,
  420. "weight": 2
  421. },
  422. {
  423. "name": "stackexchange_TitleBody_Answer/crypto.stackexchange.com.jsonl.gz",
  424. "lines": 19404,
  425. "weight": 2
  426. },
  427. {
  428. "name": "stackexchange_Title_Answer/crypto.stackexchange.com.jsonl.gz",
  429. "lines": 19404,
  430. "weight": 2
  431. },
  432. {
  433. "name": "stackexchange_title_body/arduino.stackexchange.com.jsonl.gz",
  434. "lines": 19553,
  435. "weight": 2
  436. },
  437. {
  438. "name": "stackexchange_TitleBody_Answer/music.stackexchange.com.jsonl.gz",
  439. "lines": 19936,
  440. "weight": 2
  441. },
  442. {
  443. "name": "stackexchange_Title_Answer/music.stackexchange.com.jsonl.gz",
  444. "lines": 19936,
  445. "weight": 2
  446. },
  447. {
  448. "name": "stackexchange_title_body/aviation.stackexchange.com.jsonl.gz",
  449. "lines": 20139,
  450. "weight": 2
  451. },
  452. {
  453. "name": "stackexchange_title_body/softwarerecs.stackexchange.com.jsonl.gz",
  454. "lines": 20142,
  455. "weight": 2
  456. },
  457. {
  458. "name": "stackexchange_title_body/movies.stackexchange.com.jsonl.gz",
  459. "lines": 20181,
  460. "weight": 2
  461. },
  462. {
  463. "name": "stackexchange_TitleBody_Answer/datascience.stackexchange.com.jsonl.gz",
  464. "lines": 20503,
  465. "weight": 2
  466. },
  467. {
  468. "name": "stackexchange_Title_Answer/datascience.stackexchange.com.jsonl.gz",
  469. "lines": 20503,
  470. "weight": 2
  471. },
  472. {
  473. "name": "stackexchange_title_body/music.stackexchange.com.jsonl.gz",
  474. "lines": 20636,
  475. "weight": 2
  476. },
  477. {
  478. "name": "stackexchange_TitleBody_Answer/japanese.stackexchange.com.jsonl.gz",
  479. "lines": 20948,
  480. "weight": 2
  481. },
  482. {
  483. "name": "stackexchange_Title_Answer/japanese.stackexchange.com.jsonl.gz",
  484. "lines": 20948,
  485. "weight": 2
  486. },
  487. {
  488. "name": "stackexchange_title_body/emacs.stackexchange.com.jsonl.gz",
  489. "lines": 21055,
  490. "weight": 2
  491. },
  492. {
  493. "name": "stackexchange_title_body/dsp.stackexchange.com.jsonl.gz",
  494. "lines": 21252,
  495. "weight": 2
  496. },
  497. {
  498. "name": "stackexchange_title_body/japanese.stackexchange.com.jsonl.gz",
  499. "lines": 22056,
  500. "weight": 2
  501. },
  502. {
  503. "name": "stackexchange_TitleBody_Answer/bitcoin.stackexchange.com.jsonl.gz",
  504. "lines": 22474,
  505. "weight": 2
  506. },
  507. {
  508. "name": "stackexchange_Title_Answer/bitcoin.stackexchange.com.jsonl.gz",
  509. "lines": 22474,
  510. "weight": 2
  511. },
  512. {
  513. "name": "stackexchange_TitleBody_Answer/cooking.stackexchange.com.jsonl.gz",
  514. "lines": 22641,
  515. "weight": 2
  516. },
  517. {
  518. "name": "stackexchange_Title_Answer/cooking.stackexchange.com.jsonl.gz",
  519. "lines": 22641,
  520. "weight": 2
  521. },
  522. {
  523. "name": "stackexchange_title_body/mechanics.stackexchange.com.jsonl.gz",
  524. "lines": 22868,
  525. "weight": 2
  526. },
  527. {
  528. "name": "stackexchange_TitleBody_Answer/photo.stackexchange.com.jsonl.gz",
  529. "lines": 23204,
  530. "weight": 2
  531. },
  532. {
  533. "name": "stackexchange_Title_Answer/photo.stackexchange.com.jsonl.gz",
  534. "lines": 23204,
  535. "weight": 2
  536. },
  537. {
  538. "name": "stackexchange_title_body/crypto.stackexchange.com.jsonl.gz",
  539. "lines": 23231,
  540. "weight": 2
  541. },
  542. {
  543. "name": "stackexchange_title_body/cooking.stackexchange.com.jsonl.gz",
  544. "lines": 23705,
  545. "weight": 2
  546. },
  547. {
  548. "name": "stackexchange_title_body/photo.stackexchange.com.jsonl.gz",
  549. "lines": 23753,
  550. "weight": 2
  551. },
  552. {
  553. "name": "stackexchange_TitleBody_Answer/workplace.stackexchange.com.jsonl.gz",
  554. "lines": 24012,
  555. "weight": 2
  556. },
  557. {
  558. "name": "stackexchange_Title_Answer/workplace.stackexchange.com.jsonl.gz",
  559. "lines": 24012,
  560. "weight": 2
  561. },
  562. {
  563. "name": "stackexchange_TitleBody_Answer/meta.stackoverflow.com.jsonl.gz",
  564. "lines": 24044,
  565. "weight": 2
  566. },
  567. {
  568. "name": "stackexchange_Title_Answer/meta.stackoverflow.com.jsonl.gz",
  569. "lines": 24044,
  570. "weight": 2
  571. },
  572. {
  573. "name": "stackexchange_TitleBody_Answer/raspberrypi.stackexchange.com.jsonl.gz",
  574. "lines": 24143,
  575. "weight": 2
  576. },
  577. {
  578. "name": "stackexchange_Title_Answer/raspberrypi.stackexchange.com.jsonl.gz",
  579. "lines": 24143,
  580. "weight": 2
  581. },
  582. {
  583. "name": "stackexchange_title_body/workplace.stackexchange.com.jsonl.gz",
  584. "lines": 24189,
  585. "weight": 2
  586. },
  587. {
  588. "name": "stackexchange_title_body/biology.stackexchange.com.jsonl.gz",
  589. "lines": 24447,
  590. "weight": 3
  591. },
  592. {
  593. "name": "stackexchange_TitleBody_Answer/webapps.stackexchange.com.jsonl.gz",
  594. "lines": 24867,
  595. "weight": 3
  596. },
  597. {
  598. "name": "stackexchange_Title_Answer/webapps.stackexchange.com.jsonl.gz",
  599. "lines": 24867,
  600. "weight": 3
  601. },
  602. {
  603. "name": "stackexchange_title_body/bitcoin.stackexchange.com.jsonl.gz",
  604. "lines": 25374,
  605. "weight": 3
  606. },
  607. {
  608. "name": "stackexchange_TitleBody_Answer/judaism.stackexchange.com.jsonl.gz",
  609. "lines": 26085,
  610. "weight": 3
  611. },
  612. {
  613. "name": "stackexchange_Title_Answer/judaism.stackexchange.com.jsonl.gz",
  614. "lines": 26085,
  615. "weight": 3
  616. },
  617. {
  618. "name": "stackexchange_TitleBody_Answer/ethereum.stackexchange.com.jsonl.gz",
  619. "lines": 26124,
  620. "weight": 3
  621. },
  622. {
  623. "name": "stackexchange_Title_Answer/ethereum.stackexchange.com.jsonl.gz",
  624. "lines": 26124,
  625. "weight": 3
  626. },
  627. {
  628. "name": "stackexchange_TitleBody_Answer/worldbuilding.stackexchange.com.jsonl.gz",
  629. "lines": 26210,
  630. "weight": 3
  631. },
  632. {
  633. "name": "stackexchange_Title_Answer/worldbuilding.stackexchange.com.jsonl.gz",
  634. "lines": 26210,
  635. "weight": 3
  636. },
  637. {
  638. "name": "stackexchange_title_body/worldbuilding.stackexchange.com.jsonl.gz",
  639. "lines": 26763,
  640. "weight": 3
  641. },
  642. {
  643. "name": "stackexchange_TitleBody_Answer/chemistry.stackexchange.com.jsonl.gz",
  644. "lines": 27061,
  645. "weight": 3
  646. },
  647. {
  648. "name": "stackexchange_Title_Answer/chemistry.stackexchange.com.jsonl.gz",
  649. "lines": 27061,
  650. "weight": 3
  651. },
  652. {
  653. "name": "stackexchange_title_body/datascience.stackexchange.com.jsonl.gz",
  654. "lines": 27397,
  655. "weight": 3
  656. },
  657. {
  658. "name": "stackexchange_TitleBody_Answer/graphicdesign.stackexchange.com.jsonl.gz",
  659. "lines": 28083,
  660. "weight": 3
  661. },
  662. {
  663. "name": "stackexchange_Title_Answer/graphicdesign.stackexchange.com.jsonl.gz",
  664. "lines": 28083,
  665. "weight": 3
  666. },
  667. {
  668. "name": "stackexchange_TitleBody_Answer/ux.stackexchange.com.jsonl.gz",
  669. "lines": 28901,
  670. "weight": 3
  671. },
  672. {
  673. "name": "stackexchange_Title_Answer/ux.stackexchange.com.jsonl.gz",
  674. "lines": 28901,
  675. "weight": 3
  676. },
  677. {
  678. "name": "stackexchange_title_body/ux.stackexchange.com.jsonl.gz",
  679. "lines": 29403,
  680. "weight": 3
  681. },
  682. {
  683. "name": "stackexchange_TitleBody_Answer/money.stackexchange.com.jsonl.gz",
  684. "lines": 29404,
  685. "weight": 3
  686. },
  687. {
  688. "name": "stackexchange_Title_Answer/money.stackexchange.com.jsonl.gz",
  689. "lines": 29404,
  690. "weight": 3
  691. },
  692. {
  693. "name": "stackexchange_title_body/webapps.stackexchange.com.jsonl.gz",
  694. "lines": 29697,
  695. "weight": 3
  696. },
  697. {
  698. "name": "stackexchange_TitleBody_Answer/cs.stackexchange.com.jsonl.gz",
  699. "lines": 30010,
  700. "weight": 3
  701. },
  702. {
  703. "name": "stackexchange_Title_Answer/cs.stackexchange.com.jsonl.gz",
  704. "lines": 30010,
  705. "weight": 3
  706. },
  707. {
  708. "name": "stackexchange_title_body/graphicdesign.stackexchange.com.jsonl.gz",
  709. "lines": 30233,
  710. "weight": 3
  711. },
  712. {
  713. "name": "stackexchange_TitleBody_Answer/webmasters.stackexchange.com.jsonl.gz",
  714. "lines": 30370,
  715. "weight": 3
  716. },
  717. {
  718. "name": "stackexchange_Title_Answer/webmasters.stackexchange.com.jsonl.gz",
  719. "lines": 30370,
  720. "weight": 3
  721. },
  722. {
  723. "name": "stackexchange_title_body/raspberrypi.stackexchange.com.jsonl.gz",
  724. "lines": 30625,
  725. "weight": 3
  726. },
  727. {
  728. "name": "stackexchange_title_body/money.stackexchange.com.jsonl.gz",
  729. "lines": 32021,
  730. "weight": 3
  731. },
  732. {
  733. "name": "stackexchange_title_body/judaism.stackexchange.com.jsonl.gz",
  734. "lines": 32028,
  735. "weight": 3
  736. },
  737. {
  738. "name": "stackexchange_TitleBody_Answer/academia.stackexchange.com.jsonl.gz",
  739. "lines": 32137,
  740. "weight": 3
  741. },
  742. {
  743. "name": "stackexchange_Title_Answer/academia.stackexchange.com.jsonl.gz",
  744. "lines": 32137,
  745. "weight": 3
  746. },
  747. {
  748. "name": "stackexchange_title_body/ethereum.stackexchange.com.jsonl.gz",
  749. "lines": 32760,
  750. "weight": 3
  751. },
  752. {
  753. "name": "stackexchange_title_body/academia.stackexchange.com.jsonl.gz",
  754. "lines": 34331,
  755. "weight": 3
  756. },
  757. {
  758. "name": "stackexchange_title_body/chemistry.stackexchange.com.jsonl.gz",
  759. "lines": 34506,
  760. "weight": 3
  761. },
  762. {
  763. "name": "stackexchange_title_body/webmasters.stackexchange.com.jsonl.gz",
  764. "lines": 34559,
  765. "weight": 3
  766. },
  767. {
  768. "name": "stackexchange_title_body/meta.stackoverflow.com.jsonl.gz",
  769. "lines": 36456,
  770. "weight": 3
  771. },
  772. {
  773. "name": "stackexchange_TitleBody_Answer/travel.stackexchange.com.jsonl.gz",
  774. "lines": 36533,
  775. "weight": 4
  776. },
  777. {
  778. "name": "stackexchange_Title_Answer/travel.stackexchange.com.jsonl.gz",
  779. "lines": 36533,
  780. "weight": 4
  781. },
  782. {
  783. "name": "stackexchange_TitleBody_Answer/android.stackexchange.com.jsonl.gz",
  784. "lines": 38077,
  785. "weight": 4
  786. },
  787. {
  788. "name": "stackexchange_Title_Answer/android.stackexchange.com.jsonl.gz",
  789. "lines": 38077,
  790. "weight": 4
  791. },
  792. {
  793. "name": "stackexchange_title_body/cs.stackexchange.com.jsonl.gz",
  794. "lines": 38314,
  795. "weight": 4
  796. },
  797. {
  798. "name": "stackexchange_TitleBody_Answer/gamedev.stackexchange.com.jsonl.gz",
  799. "lines": 40154,
  800. "weight": 4
  801. },
  802. {
  803. "name": "stackexchange_Title_Answer/gamedev.stackexchange.com.jsonl.gz",
  804. "lines": 40154,
  805. "weight": 4
  806. },
  807. {
  808. "name": "stackexchange_TitleBody_Answer/rpg.stackexchange.com.jsonl.gz",
  809. "lines": 40435,
  810. "weight": 4
  811. },
  812. {
  813. "name": "stackexchange_Title_Answer/rpg.stackexchange.com.jsonl.gz",
  814. "lines": 40435,
  815. "weight": 4
  816. },
  817. {
  818. "name": "stackexchange_title_body/travel.stackexchange.com.jsonl.gz",
  819. "lines": 41227,
  820. "weight": 4
  821. },
  822. {
  823. "name": "stackexchange_TitleBody_Answer/codereview.stackexchange.com.jsonl.gz",
  824. "lines": 41748,
  825. "weight": 4
  826. },
  827. {
  828. "name": "stackexchange_Title_Answer/codereview.stackexchange.com.jsonl.gz",
  829. "lines": 41748,
  830. "weight": 4
  831. },
  832. {
  833. "name": "stackexchange_title_body/rpg.stackexchange.com.jsonl.gz",
  834. "lines": 42303,
  835. "weight": 4
  836. },
  837. {
  838. "name": "stackexchange_title_body/codereview.stackexchange.com.jsonl.gz",
  839. "lines": 45765,
  840. "weight": 4
  841. },
  842. {
  843. "name": "stackexchange_title_body/gamedev.stackexchange.com.jsonl.gz",
  844. "lines": 46485,
  845. "weight": 4
  846. },
  847. {
  848. "name": "stackexchange_TitleBody_Answer/softwareengineering.stackexchange.com.jsonl.gz",
  849. "lines": 51326,
  850. "weight": 5
  851. },
  852. {
  853. "name": "stackexchange_Title_Answer/softwareengineering.stackexchange.com.jsonl.gz",
  854. "lines": 51326,
  855. "weight": 5
  856. },
  857. {
  858. "name": "stackexchange_TitleBody_Answer/security.stackexchange.com.jsonl.gz",
  859. "lines": 51355,
  860. "weight": 5
  861. },
  862. {
  863. "name": "stackexchange_Title_Answer/security.stackexchange.com.jsonl.gz",
  864. "lines": 51355,
  865. "weight": 5
  866. },
  867. {
  868. "name": "stackexchange_title_body/android.stackexchange.com.jsonl.gz",
  869. "lines": 51608,
  870. "weight": 5
  871. },
  872. {
  873. "name": "stackexchange_TitleBody_Answer/diy.stackexchange.com.jsonl.gz",
  874. "lines": 52896,
  875. "weight": 5
  876. },
  877. {
  878. "name": "stackexchange_Title_Answer/diy.stackexchange.com.jsonl.gz",
  879. "lines": 52896,
  880. "weight": 5
  881. },
  882. {
  883. "name": "stackexchange_title_body/softwareengineering.stackexchange.com.jsonl.gz",
  884. "lines": 53942,
  885. "weight": 5
  886. },
  887. {
  888. "name": "stackexchange_TitleBody_Answer/blender.stackexchange.com.jsonl.gz",
  889. "lines": 54153,
  890. "weight": 5
  891. },
  892. {
  893. "name": "stackexchange_Title_Answer/blender.stackexchange.com.jsonl.gz",
  894. "lines": 54153,
  895. "weight": 5
  896. },
  897. {
  898. "name": "stackexchange_TitleBody_Answer/scifi.stackexchange.com.jsonl.gz",
  899. "lines": 54805,
  900. "weight": 5
  901. },
  902. {
  903. "name": "stackexchange_Title_Answer/scifi.stackexchange.com.jsonl.gz",
  904. "lines": 54805,
  905. "weight": 5
  906. },
  907. {
  908. "name": "stackexchange_title_body/security.stackexchange.com.jsonl.gz",
  909. "lines": 58000,
  910. "weight": 5
  911. },
  912. {
  913. "name": "stackexchange_TitleBody_Answer/mathematica.stackexchange.com.jsonl.gz",
  914. "lines": 59895,
  915. "weight": 5
  916. },
  917. {
  918. "name": "stackexchange_Title_Answer/mathematica.stackexchange.com.jsonl.gz",
  919. "lines": 59895,
  920. "weight": 5
  921. },
  922. {
  923. "name": "stackexchange_title_body/diy.stackexchange.com.jsonl.gz",
  924. "lines": 60083,
  925. "weight": 5
  926. },
  927. {
  928. "name": "stackexchange_TitleBody_Answer/meta.stackexchange.com.jsonl.gz",
  929. "lines": 60744,
  930. "weight": 5
  931. },
  932. {
  933. "name": "stackexchange_Title_Answer/meta.stackexchange.com.jsonl.gz",
  934. "lines": 60744,
  935. "weight": 5
  936. },
  937. {
  938. "name": "stackexchange_title_body/scifi.stackexchange.com.jsonl.gz",
  939. "lines": 61528,
  940. "weight": 6
  941. },
  942. {
  943. "name": "stackexchange_TitleBody_Answer/drupal.stackexchange.com.jsonl.gz",
  944. "lines": 67817,
  945. "weight": 6
  946. },
  947. {
  948. "name": "stackexchange_Title_Answer/drupal.stackexchange.com.jsonl.gz",
  949. "lines": 67817,
  950. "weight": 6
  951. },
  952. {
  953. "name": "stackexchange_TitleBody_Answer/dba.stackexchange.com.jsonl.gz",
  954. "lines": 71449,
  955. "weight": 6
  956. },
  957. {
  958. "name": "stackexchange_Title_Answer/dba.stackexchange.com.jsonl.gz",
  959. "lines": 71449,
  960. "weight": 6
  961. },
  962. {
  963. "name": "stackexchange_title_body/mathematica.stackexchange.com.jsonl.gz",
  964. "lines": 73131,
  965. "weight": 7
  966. },
  967. {
  968. "name": "stackexchange_TitleBody_Answer/ell.stackexchange.com.jsonl.gz",
  969. "lines": 77892,
  970. "weight": 7
  971. },
  972. {
  973. "name": "stackexchange_Title_Answer/ell.stackexchange.com.jsonl.gz",
  974. "lines": 77892,
  975. "weight": 7
  976. },
  977. {
  978. "name": "stackexchange_TitleBody_Answer/magento.stackexchange.com.jsonl.gz",
  979. "lines": 79241,
  980. "weight": 7
  981. },
  982. {
  983. "name": "stackexchange_Title_Answer/magento.stackexchange.com.jsonl.gz",
  984. "lines": 79241,
  985. "weight": 7
  986. },
  987. {
  988. "name": "stackexchange_title_body/drupal.stackexchange.com.jsonl.gz",
  989. "lines": 79717,
  990. "weight": 7
  991. },
  992. {
  993. "name": "stackexchange_TitleBody_Answer/sharepoint.stackexchange.com.jsonl.gz",
  994. "lines": 80420,
  995. "weight": 7
  996. },
  997. {
  998. "name": "stackexchange_Title_Answer/sharepoint.stackexchange.com.jsonl.gz",
  999. "lines": 80420,
  1000. "weight": 7
  1001. },
  1002. {
  1003. "name": "stackexchange_title_body/blender.stackexchange.com.jsonl.gz",
  1004. "lines": 80766,
  1005. "weight": 7
  1006. },
  1007. {
  1008. "name": "stackexchange_title_body/dba.stackexchange.com.jsonl.gz",
  1009. "lines": 81871,
  1010. "weight": 7
  1011. },
  1012. {
  1013. "name": "stackexchange_TitleBody_Answer/gaming.stackexchange.com.jsonl.gz",
  1014. "lines": 82887,
  1015. "weight": 7
  1016. },
  1017. {
  1018. "name": "stackexchange_Title_Answer/gaming.stackexchange.com.jsonl.gz",
  1019. "lines": 82887,
  1020. "weight": 7
  1021. },
  1022. {
  1023. "name": "stackexchange_title_body/ell.stackexchange.com.jsonl.gz",
  1024. "lines": 83271,
  1025. "weight": 7
  1026. },
  1027. {
  1028. "name": "stackexchange_title_body/meta.stackexchange.com.jsonl.gz",
  1029. "lines": 83510,
  1030. "weight": 7
  1031. },
  1032. {
  1033. "name": "stackexchange_TitleBody_Answer/wordpress.stackexchange.com.jsonl.gz",
  1034. "lines": 83621,
  1035. "weight": 7
  1036. },
  1037. {
  1038. "name": "stackexchange_Title_Answer/wordpress.stackexchange.com.jsonl.gz",
  1039. "lines": 83621,
  1040. "weight": 7
  1041. },
  1042. {
  1043. "name": "stackexchange_TitleBody_Answer/mathoverflow.net.jsonl.gz",
  1044. "lines": 85289,
  1045. "weight": 8
  1046. },
  1047. {
  1048. "name": "stackexchange_Title_Answer/mathoverflow.net.jsonl.gz",
  1049. "lines": 85289,
  1050. "weight": 8
  1051. },
  1052. {
  1053. "name": "stackexchange_TitleBody_Answer/salesforce.stackexchange.com.jsonl.gz",
  1054. "lines": 87272,
  1055. "weight": 8
  1056. },
  1057. {
  1058. "name": "stackexchange_Title_Answer/salesforce.stackexchange.com.jsonl.gz",
  1059. "lines": 87272,
  1060. "weight": 8
  1061. },
  1062. {
  1063. "name": "stackexchange_title_body/gaming.stackexchange.com.jsonl.gz",
  1064. "lines": 88912,
  1065. "weight": 8
  1066. },
  1067. {
  1068. "name": "stackexchange_TitleBody_Answer/apple.stackexchange.com.jsonl.gz",
  1069. "lines": 92487,
  1070. "weight": 8
  1071. },
  1072. {
  1073. "name": "stackexchange_Title_Answer/apple.stackexchange.com.jsonl.gz",
  1074. "lines": 92487,
  1075. "weight": 8
  1076. },
  1077. {
  1078. "name": "stackexchange_title_body/sharepoint.stackexchange.com.jsonl.gz",
  1079. "lines": 94011,
  1080. "weight": 8
  1081. },
  1082. {
  1083. "name": "stackexchange_title_body/magento.stackexchange.com.jsonl.gz",
  1084. "lines": 99991,
  1085. "weight": 9
  1086. },
  1087. {
  1088. "name": "stackexchange_TitleBody_Answer/gis.stackexchange.com.jsonl.gz",
  1089. "lines": 100254,
  1090. "weight": 9
  1091. },
  1092. {
  1093. "name": "stackexchange_Title_Answer/gis.stackexchange.com.jsonl.gz",
  1094. "lines": 100254,
  1095. "weight": 9
  1096. },
  1097. {
  1098. "name": "stackexchange_title_body/wordpress.stackexchange.com.jsonl.gz",
  1099. "lines": 100474,
  1100. "weight": 9
  1101. },
  1102. {
  1103. "name": "stackexchange_TitleBody_Answer/english.stackexchange.com.jsonl.gz",
  1104. "lines": 100640,
  1105. "weight": 9
  1106. },
  1107. {
  1108. "name": "stackexchange_Title_Answer/english.stackexchange.com.jsonl.gz",
  1109. "lines": 100640,
  1110. "weight": 9
  1111. },
  1112. {
  1113. "name": "stackexchange_title_body/salesforce.stackexchange.com.jsonl.gz",
  1114. "lines": 105260,
  1115. "weight": 9
  1116. },
  1117. {
  1118. "name": "stackexchange_title_body/english.stackexchange.com.jsonl.gz",
  1119. "lines": 109522,
  1120. "weight": 10
  1121. },
  1122. {
  1123. "name": "stackexchange_title_body/apple.stackexchange.com.jsonl.gz",
  1124. "lines": 110622,
  1125. "weight": 10
  1126. },
  1127. {
  1128. "name": "stackexchange_TitleBody_Answer/stats.stackexchange.com.jsonl.gz",
  1129. "lines": 115679,
  1130. "weight": 10
  1131. },
  1132. {
  1133. "name": "stackexchange_Title_Answer/stats.stackexchange.com.jsonl.gz",
  1134. "lines": 115679,
  1135. "weight": 10
  1136. },
  1137. {
  1138. "name": "stackexchange_title_body/mathoverflow.net.jsonl.gz",
  1139. "lines": 120851,
  1140. "weight": 10
  1141. },
  1142. {
  1143. "name": "stackexchange_TitleBody_Answer/electronics.stackexchange.com.jsonl.gz",
  1144. "lines": 129494,
  1145. "weight": 11
  1146. },
  1147. {
  1148. "name": "stackexchange_Title_Answer/electronics.stackexchange.com.jsonl.gz",
  1149. "lines": 129494,
  1150. "weight": 11
  1151. },
  1152. {
  1153. "name": "stackexchange_title_body/gis.stackexchange.com.jsonl.gz",
  1154. "lines": 131000,
  1155. "weight": 11
  1156. },
  1157. {
  1158. "name": "stackexchange_TitleBody_Answer/physics.stackexchange.com.jsonl.gz",
  1159. "lines": 141230,
  1160. "weight": 12
  1161. },
  1162. {
  1163. "name": "stackexchange_Title_Answer/physics.stackexchange.com.jsonl.gz",
  1164. "lines": 141230,
  1165. "weight": 12
  1166. },
  1167. {
  1168. "name": "stackexchange_title_body/electronics.stackexchange.com.jsonl.gz",
  1169. "lines": 143582,
  1170. "weight": 12
  1171. },
  1172. {
  1173. "name": "stackexchange_TitleBody_Answer/unix.stackexchange.com.jsonl.gz",
  1174. "lines": 155414,
  1175. "weight": 13
  1176. },
  1177. {
  1178. "name": "stackexchange_Title_Answer/unix.stackexchange.com.jsonl.gz",
  1179. "lines": 155414,
  1180. "weight": 13
  1181. },
  1182. {
  1183. "name": "stackexchange_TitleBody_Answer/tex.stackexchange.com.jsonl.gz",
  1184. "lines": 171628,
  1185. "weight": 15
  1186. },
  1187. {
  1188. "name": "stackexchange_Title_Answer/tex.stackexchange.com.jsonl.gz",
  1189. "lines": 171628,
  1190. "weight": 15
  1191. },
  1192. {
  1193. "name": "stackexchange_title_body/physics.stackexchange.com.jsonl.gz",
  1194. "lines": 173307,
  1195. "weight": 15
  1196. },
  1197. {
  1198. "name": "stackexchange_title_body/stats.stackexchange.com.jsonl.gz",
  1199. "lines": 173466,
  1200. "weight": 15
  1201. },
  1202. {
  1203. "name": "stackexchange_title_body/unix.stackexchange.com.jsonl.gz",
  1204. "lines": 185997,
  1205. "weight": 16
  1206. },
  1207. {
  1208. "name": "stackexchange_title_body/tex.stackexchange.com.jsonl.gz",
  1209. "lines": 202954,
  1210. "weight": 17
  1211. },
  1212. {
  1213. "name": "TriviaQA_pairs.jsonl.gz",
  1214. "lines": 73346,
  1215. "weight": 19
  1216. },
  1217. {
  1218. "name": "stackexchange_TitleBody_Answer/serverfault.com.jsonl.gz",
  1219. "lines": 238507,
  1220. "weight": 20
  1221. },
  1222. {
  1223. "name": "stackexchange_Title_Answer/serverfault.com.jsonl.gz",
  1224. "lines": 238507,
  1225. "weight": 20
  1226. },
  1227. {
  1228. "name": "stackexchange_duplicate_questions_title-body_title-body.jsonl.gz",
  1229. "lines": 250460,
  1230. "weight": 21
  1231. },
  1232. {
  1233. "name": "stackexchange_duplicate_questions_body_body.jsonl.gz",
  1234. "lines": 250519,
  1235. "weight": 21
  1236. },
  1237. {
  1238. "name": "squad_pairs.jsonl.gz",
  1239. "lines": 87599,
  1240. "weight": 22
  1241. },
  1242. {
  1243. "name": "stackexchange_TitleBody_Answer/askubuntu.com.jsonl.gz",
  1244. "lines": 267135,
  1245. "weight": 22
  1246. },
  1247. {
  1248. "name": "stackexchange_Title_Answer/askubuntu.com.jsonl.gz",
  1249. "lines": 267135,
  1250. "weight": 22
  1251. },
  1252. {
  1253. "name": "stackexchange_title_body/serverfault.com.jsonl.gz",
  1254. "lines": 270904,
  1255. "weight": 23
  1256. },
  1257. {
  1258. "name": "NQ-train_pairs.jsonl.gz",
  1259. "lines": 100231,
  1260. "weight": 25
  1261. },
  1262. {
  1263. "name": "SimpleWiki.jsonl.gz",
  1264. "lines": 102225,
  1265. "weight": 26
  1266. },
  1267. {
  1268. "name": "quora_duplicates_triplets.jsonl.gz",
  1269. "lines": 103663,
  1270. "weight": 26
  1271. },
  1272. {
  1273. "name": "stackexchange_duplicate_questions_title_title.jsonl.gz",
  1274. "lines": 304525,
  1275. "weight": 26
  1276. },
  1277. {
  1278. "name": "altlex.jsonl.gz",
  1279. "lines": 112696,
  1280. "weight": 28
  1281. },
  1282. {
  1283. "name": "stackexchange_title_body/askubuntu.com.jsonl.gz",
  1284. "lines": 347925,
  1285. "weight": 29
  1286. },
  1287. {
  1288. "name": "stackexchange_TitleBody_Answer/superuser.com.jsonl.gz",
  1289. "lines": 352610,
  1290. "weight": 30
  1291. },
  1292. {
  1293. "name": "stackexchange_Title_Answer/superuser.com.jsonl.gz",
  1294. "lines": 352610,
  1295. "weight": 30
  1296. },
  1297. {
  1298. "name": "wikihow.jsonl.gz",
  1299. "lines": 128542,
  1300. "weight": 32
  1301. },
  1302. {
  1303. "name": "stackexchange_title_body/superuser.com.jsonl.gz",
  1304. "lines": 435463,
  1305. "weight": 36
  1306. },
  1307. {
  1308. "name": "stackexchange_title_body/small_stackexchanges.jsonl.gz",
  1309. "lines": 448146,
  1310. "weight": 37
  1311. },
  1312. {
  1313. "name": "stackexchange_TitleBody_Answer/small_stackexchanges.jsonl.gz",
  1314. "lines": 460256,
  1315. "weight": 38
  1316. },
  1317. {
  1318. "name": "stackexchange_Title_Answer/small_stackexchanges.jsonl.gz",
  1319. "lines": 460256,
  1320. "weight": 38
  1321. },
  1322. {
  1323. "name": "sentence-compression.jsonl.gz",
  1324. "lines": 180000,
  1325. "weight": 45
  1326. },
  1327. {
  1328. "name": "AllNLI.jsonl.gz",
  1329. "lines": 277230,
  1330. "weight": 69
  1331. },
  1332. {
  1333. "name": "eli5_question_answer.jsonl.gz",
  1334. "lines": 325475,
  1335. "weight": 81
  1336. },
  1337. {
  1338. "name": "reddit/reddit_2015.jsonl.gz",
  1339. "lines": 135108166,
  1340. "weight": 82
  1341. },
  1342. {
  1343. "name": "reddit/reddit_2016.jsonl.gz",
  1344. "lines": 159164386,
  1345. "weight": 82
  1346. },
  1347. {
  1348. "name": "reddit/reddit_2017.jsonl.gz",
  1349. "lines": 191485219,
  1350. "weight": 82
  1351. },
  1352. {
  1353. "name": "reddit/reddit_2018.jsonl.gz",
  1354. "lines": 240726659,
  1355. "weight": 82
  1356. },
  1357. {
  1358. "name": "stackexchange_TitleBody_Answer/math.stackexchange.com.jsonl.gz",
  1359. "lines": 1100953,
  1360. "weight": 83
  1361. },
  1362. {
  1363. "name": "stackexchange_Title_Answer/math.stackexchange.com.jsonl.gz",
  1364. "lines": 1100953,
  1365. "weight": 83
  1366. },
  1367. {
  1368. "name": "stackexchange_title_body/math.stackexchange.com.jsonl.gz",
  1369. "lines": 1338443,
  1370. "weight": 83
  1371. },
  1372. {
  1373. "name": "stackexchange_TitleBody_Answer/stackoverflow.com-Posts.jsonl.gz",
  1374. "lines": 15768211,
  1375. "weight": 83
  1376. },
  1377. {
  1378. "name": "stackexchange_Title_Answer/stackoverflow.com-Posts.jsonl.gz",
  1379. "lines": 15768211,
  1380. "weight": 83
  1381. },
  1382. {
  1383. "name": "stackexchange_title_body/stackoverflow.com-Posts.jsonl.gz",
  1384. "lines": 18562443,
  1385. "weight": 83
  1386. },
  1387. {
  1388. "name": "specter_train_triples.jsonl.gz",
  1389. "lines": 684100,
  1390. "weight": 84
  1391. },
  1392. {
  1393. "name": "S2ORC_title_abstract.jsonl.gz",
  1394. "lines": 41769185,
  1395. "weight": 123
  1396. },
  1397. {
  1398. "name": "S2ORC_citation_pairs.jsonl.gz",
  1399. "lines": 52603982,
  1400. "weight": 123
  1401. },
  1402. {
  1403. "name": "PAQ_pairs.jsonl.gz",
  1404. "lines": 64371441,
  1405. "weight": 123
  1406. },
  1407. {
  1408. "name": "WikiAnswers_pairs.jsonl.gz",
  1409. "lines": 77427422,
  1410. "weight": 123
  1411. },
  1412. {
  1413. "name": "S2ORC_citation_pairs_abstract.jsonl.gz",
  1414. "lines": 116288806,
  1415. "weight": 123
  1416. },
  1417. {
  1418. "name": "searchQA_question_top5_snippets_merged.jsonl.gz",
  1419. "lines": 582261,
  1420. "weight": 144
  1421. },
  1422. {
  1423. "name": "yahoo_answers_title_question.jsonl.gz",
  1424. "lines": 659896,
  1425. "weight": 163
  1426. },
  1427. {
  1428. "name": "yahoo_answers_question_answer.jsonl.gz",
  1429. "lines": 681164,
  1430. "weight": 169
  1431. },
  1432. {
  1433. "name": "yahoo_answers_title_answer.jsonl.gz",
  1434. "lines": 1198260,
  1435. "weight": 247
  1436. },
  1437. {
  1438. "name": "amazon-qa-train-pairs.jsonl.gz",
  1439. "lines": 2448839,
  1440. "weight": 247
  1441. },
  1442. {
  1443. "name": "gooaq_pairs.jsonl.gz",
  1444. "lines": 3012496,
  1445. "weight": 247
  1446. },
  1447. {
  1448. "name": "msmarco-query_passage_negative.jsonl.gz",
  1449. "lines": 9144553,
  1450. "weight": 247
  1451. }
  1452. ]