main.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. import sys
  2. import numpy as np
  3. import torch
  4. from tqdm import tqdm
  5. from torch.utils.data import DataLoader, RandomSampler
  6. from transformers import BertTokenizer, AdamW, get_linear_schedule_with_warmup
  7. # from torch.nn import DataParallel
  8. import matplotlib.pyplot as plt
  9. from PointerNet.model import UIEModel
  10. from PointerNet.config import NerArgs
  11. from PointerNet.data_loader import NerDataset, NerCollate, load_and_split_dataset, get_paper_for_predict
  12. from PointerNet.utils.decode import ner_decode, topic_ner_decode
  13. from PointerNet.utils.metrics import calculate_metric, classification_report, get_p_r_f
  14. from pprint import pprint
  15. from PointerNet import config
  16. logger = config.myLog(__name__, log_cate="train_log").getlog()
  17. sigmoid = torch.nn.Sigmoid()
  18. class NerPipeline:
  19. def __init__(self, model, args):
  20. self.model = model
  21. self.args = args
  22. if "train_path" in dir(self.args):
  23. self.train_data, self.valid_data, self.test_data = load_and_split_dataset(self.args.train_path,
  24. train_ratio=0.995, valid_ratio=0.003)
  25. def save_model(self):
  26. torch.save(self.model.state_dict(), self.args.save_dir)
  27. torch.save(self.optimizer.state_dict(), self.args.optimizer_save_dir)
  28. def load_model(self):
  29. self.model.load_state_dict(torch.load(self.args.save_dir, map_location="cpu")) #GPU 上训练的模型加载到CPU
  30. # self.model.load_state_dict(torch.load(self.args.save_dir))
  31. self.model.to(self.args.device) # 耗内存
  32. def build_optimizer_and_scheduler(self, t_total):
  33. module = (
  34. self.model.module if hasattr(self.model, "module") else self.model
  35. )
  36. # 差分学习率
  37. no_decay = ["bias", "LayerNorm.weight"]
  38. model_param = list(module.named_parameters())
  39. bert_param_optimizer = []
  40. other_param_optimizer = []
  41. for name, para in model_param:
  42. space = name.split('.')
  43. # print(name)
  44. if "bert" in space[0]:
  45. bert_param_optimizer.append((name, para))
  46. else:
  47. other_param_optimizer.append((name, para))
  48. optimizer_grouped_parameters = [
  49. # bert other module
  50. {"params": [p for n, p in bert_param_optimizer if not any(nd in n for nd in no_decay)],
  51. "weight_decay": self.args.weight_decay, 'lr': self.args.lr},
  52. {"params": [p for n, p in bert_param_optimizer if any(nd in n for nd in no_decay)],
  53. "weight_decay": 0.0, 'lr': self.args.lr},
  54. # 其他模块,差分学习率
  55. {"params": [p for n, p in other_param_optimizer if not any(nd in n for nd in no_decay)],
  56. "weight_decay": self.args.weight_decay, 'lr': self.args.other_lr},
  57. {"params": [p for n, p in other_param_optimizer if any(nd in n for nd in no_decay)],
  58. "weight_decay": 0.0, 'lr': self.args.other_lr},
  59. ]
  60. optimizer = AdamW(optimizer_grouped_parameters, lr=self.args.lr, eps=self.args.adam_epsilon)
  61. scheduler = get_linear_schedule_with_warmup(
  62. optimizer, num_warmup_steps=int(self.args.warmup_proportion * t_total), num_training_steps=t_total
  63. )
  64. return optimizer, scheduler
  65. def eval_forward(self, data_loader):
  66. s_logits, e_logits = [], []
  67. self.model.eval()
  68. for eval_step, batch_data in enumerate(data_loader):
  69. # for key in batch_data.keys():
  70. # batch_data[key] = batch_data[key].to(self.args.device)
  71. output = model(batch_data['input_ids'],
  72. batch_data['attention_mask'],
  73. )
  74. start_logits = output["ner_output"]["ner_start_logits"]
  75. end_logits = output["ner_output"]["ner_end_logits"]
  76. for i in range(len(start_logits)):
  77. s_logits.append([start_logits[i]])
  78. e_logits.append([end_logits[i]])
  79. return s_logits, e_logits
  80. def half_batch_eval_forward(self, data_loader):
  81. """
  82. 本项目batch_size=1,一个批次为一个文档样本,包含句子太多时,需要截断再分批前向计算
  83. 不过此时无须反馈学习,耗显存不会很大,一次性计算的句子可以更多
  84. """
  85. s_logits, e_logits, con_logits = [], [], []
  86. self.model.eval()
  87. for eval_step, batch_data in enumerate(data_loader):
  88. max_input_len = self.args.max_input_sent_num
  89. # 本项目中由于显存限制,batch_size=1,则batch_n=0
  90. one_data_s_logit, one_data_e_logit, one_data_con_logit = [], [], []
  91. for batch_n in range(len(batch_data['ner_end_labels'])):
  92. batch_num = int(len(batch_data['input_ids'][batch_n]) / max_input_len)
  93. if batch_num > 0:
  94. for i in range(batch_num):
  95. left, right = i*max_input_len, (i+1)*max_input_len
  96. if i == batch_num-1 and len(batch_data['input_ids'][batch_n]) - (i+1)*max_input_len<3:
  97. batch_num -= 1
  98. break
  99. output = self.model(batch_data['input_ids'][batch_n][left:right].to(self.args.device),
  100. batch_data['attention_mask'][batch_n][left:right].to(self.args.device))
  101. start_logits = output["ner_output"]["ner_start_logits"]
  102. end_logits = output["ner_output"]["ner_end_logits"]
  103. content_logits = output["ner_output"]["ner_content_logits"]
  104. one_data_s_logit.extend(start_logits)
  105. one_data_e_logit.extend(end_logits)
  106. one_data_con_logit.extend(content_logits)
  107. if len(batch_data['input_ids'][batch_n]) - batch_num * max_input_len > 0:
  108. left = batch_num*max_input_len
  109. output = self.model(batch_data['input_ids'][batch_n][left:].to(self.args.device),
  110. batch_data['attention_mask'][batch_n][left:].to(self.args.device))
  111. start_logits = output["ner_output"]["ner_start_logits"]
  112. end_logits = output["ner_output"]["ner_end_logits"]
  113. content_logits = output["ner_output"]["ner_content_logits"]
  114. one_data_s_logit.extend(start_logits)
  115. one_data_e_logit.extend(end_logits)
  116. one_data_con_logit.extend(content_logits)
  117. one_data_s_logit = torch.cat(one_data_s_logit)
  118. one_data_e_logit = torch.cat(one_data_e_logit)
  119. one_data_con_logit = torch.cat(one_data_con_logit)
  120. s_logits.append(one_data_s_logit)
  121. e_logits.append(one_data_e_logit)
  122. con_logits.append(one_data_con_logit)
  123. return s_logits, e_logits, con_logits
  124. def get_metric(self, s_logits, e_logits, con_logits, callback):
  125. # batch_size = len(callback)
  126. total_count = [0 for _ in range(len(self.args.id2label))]
  127. role_metric = np.zeros([len(self.args.id2label), 3])
  128. for s_logit, e_logit, con_logit, tmp_callback in zip(s_logits, e_logits, con_logits, callback):
  129. text_list, gt_entities = tmp_callback
  130. pred_entities = topic_ner_decode(sigmoid(s_logit), sigmoid(e_logit), sigmoid(con_logit),
  131. text_list, self.args.id2label)
  132. # print("真实起始位置及试题长度:", [i[1] for i in gt_entities['TOPIC']], [len(i[0]) for i in gt_entities['TOPIC']])
  133. # print("预测起始位置及试题长度:", [i[1] for i in pred_entities['TOPIC']], [len(i[0]) for i in pred_entities['TOPIC']])
  134. # print("=========预测===============")
  135. # print(pred_entities)
  136. # print("==========实际==============")
  137. # print(gt_entities)
  138. for idx, _type in enumerate(self.args.labels): # 只有一个label
  139. if _type not in pred_entities:
  140. pred_entities[_type] = []
  141. total_count[idx] += len(gt_entities[_type])
  142. role_metric[idx] += calculate_metric(pred_entities[_type], gt_entities[_type])
  143. return role_metric, total_count
  144. def half_batch_train(self, batch_data, epoch, global_step, t_total):
  145. """
  146. 本项目batch_size=1,一个批次为一个文档样本,包含句子太多时,需要截断再分批训练
  147. batch_data: 批次数据,list
  148. """
  149. y_loss = []
  150. max_input_len = self.args.max_input_sent_num
  151. for batch_n in range(len(batch_data['ner_end_labels'])): # 本项目中由于显存限制,batch_size=1
  152. batch_num = int(len(batch_data['input_ids'][batch_n]) / max_input_len)
  153. print("batch_num:::", batch_num, batch_n, len(batch_data['input_ids'][batch_n]))
  154. global_step += 1
  155. if batch_num > 0:
  156. for i in range(batch_num):
  157. left, right = i*max_input_len, (i+1)*max_input_len
  158. if i == batch_num-1 and len(batch_data['input_ids'][batch_n]) - (i+1)*max_input_len<3:
  159. batch_num -= 1
  160. break
  161. output = self.model(batch_data['input_ids'][batch_n][left:right].to(self.args.device),
  162. batch_data['attention_mask'][batch_n][left:right].to(self.args.device),
  163. batch_data['ner_start_labels'][batch_n][left:right].to(self.args.device),
  164. batch_data['ner_end_labels'][batch_n][left:right].to(self.args.device),
  165. batch_data['ner_content_labels'][batch_n][left:right].to(self.args.device),
  166. )
  167. loss = output["ner_output"]["ner_loss"]
  168. y_loss.append(loss.item())
  169. torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm)
  170. loss.backward() # 耗显存
  171. self.optimizer.step()
  172. self.scheduler.step()
  173. self.model.zero_grad()
  174. # global_step += 1
  175. print('【train】 Epoch: %d/%d Step: %d/%d loss: %.5f' % (
  176. epoch, self.args.train_epoch, global_step, t_total, loss.item()))
  177. if len(batch_data['input_ids']) - batch_num * max_input_len > 0:
  178. left = batch_num*max_input_len
  179. output = self.model(batch_data['input_ids'][batch_n][left:].to(self.args.device),
  180. batch_data['attention_mask'][batch_n][left:].to(self.args.device),
  181. batch_data['ner_start_labels'][batch_n][left:].to(self.args.device),
  182. batch_data['ner_end_labels'][batch_n][left:].to(self.args.device),
  183. batch_data['ner_content_labels'][batch_n][left:].to(self.args.device),
  184. )
  185. loss = output["ner_output"]["ner_loss"]
  186. y_loss.append(loss.item())
  187. torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm)
  188. loss.backward() # 耗显存
  189. self.optimizer.step()
  190. self.scheduler.step()
  191. self.model.zero_grad()
  192. # global_step += 1
  193. print('【train】 Epoch: %d/%d Step: %d/%d loss: %.5f' % (
  194. epoch, self.args.train_epoch, global_step, t_total, loss.item()))
  195. return global_step, sum(y_loss) / len(y_loss)
  196. def train(self, dev=True):
  197. train_dataset, train_callback = NerDataset(data=self.train_data,
  198. tokenizer=self.args.tokenizer,
  199. max_len=self.args.max_seq_len,
  200. label_list=self.args.labels)
  201. print(111111111111111111111111111111)
  202. collate = NerCollate(max_len=self.args.max_seq_len, label2id=self.args.label2id)
  203. print(222222222222222222222222222222)
  204. train_sampler = RandomSampler(train_dataset)
  205. train_loader = DataLoader(dataset=train_dataset,
  206. batch_size=self.args.train_batch_size,
  207. sampler=train_sampler,
  208. num_workers=10, # 多进程加载数据
  209. collate_fn=collate.collate_fn)
  210. dev_loader = None
  211. dev_callback = None
  212. if dev:
  213. dev_dataset, dev_callback = NerDataset(data=self.valid_data,
  214. tokenizer=self.args.tokenizer,
  215. max_len=self.args.max_seq_len,
  216. label_list=self.args.labels)
  217. dev_loader = DataLoader(dataset=dev_dataset,
  218. batch_size=self.args.eval_batch_size,
  219. shuffle=False,
  220. num_workers=10,
  221. collate_fn=collate.collate_fn)
  222. t_total = len(train_loader) * self.args.train_epoch
  223. self.optimizer, self.scheduler = self.build_optimizer_and_scheduler(t_total)
  224. global_step = 0
  225. self.model.zero_grad()
  226. self.model.to(self.args.device)
  227. # if torch.cuda.device_count() > 1: # 多GPU训练时,要求数据在送入model()前也要加载到gpu中
  228. # self.model = DataParallel(self.model, device_ids=[0,1])
  229. eval_step = self.args.eval_step
  230. best_f1 = 0.
  231. y_loss = []
  232. for epoch in range(1, self.args.train_epoch + 1):
  233. y_loss_bt = [] # 训练过程每一轮的所有loss
  234. for batch_data in tqdm(train_loader):
  235. self.model.train()
  236. # for key in batch_data.keys():
  237. # print(batch_data[key])
  238. # batch_data[key] = batch_data[key].to(self.args.device)
  239. global_step, mean_loss = self.half_batch_train(batch_data, epoch, global_step, t_total)
  240. y_loss_bt.append(mean_loss)
  241. if dev and global_step % eval_step == 0:
  242. # s_logits, e_logits = self.eval_forward(dev_loader)
  243. s_logits, e_logits, con_logits = self.half_batch_eval_forward(dev_loader)
  244. role_metric, _ = self.get_metric(s_logits, e_logits, con_logits, dev_callback)
  245. mirco_metrics = np.sum(role_metric, axis=0)
  246. mirco_metrics = get_p_r_f(mirco_metrics[0], mirco_metrics[1], mirco_metrics[2])
  247. print('【eval】 precision={:.4f} recall={:.4f} f1_score={:.4f}'.format(mirco_metrics[0],
  248. mirco_metrics[1],
  249. mirco_metrics[2]))
  250. logger.info('【eval】 precision={:.4f} recall={:.4f} f1_score={:.4f}'.format(mirco_metrics[0],
  251. mirco_metrics[1],
  252. mirco_metrics[2]))
  253. y_loss.append(sum(y_loss_bt) / len(y_loss_bt))
  254. y_loss_bt = []
  255. if mirco_metrics[2] > best_f1:
  256. best_f1 = mirco_metrics[2]
  257. print("best_f1:{}".format(mirco_metrics[2]))
  258. logger.info("best_f1:{}".format(mirco_metrics[2]))
  259. self.save_model()
  260. # 训练完每一轮画图,训练完一轮就很费时间
  261. x = [i for i in range(len(y_loss))]
  262. plt.figure()
  263. plt.plot(x, y_loss)
  264. plt.savefig(f'loss_curve_{epoch}_epoch.png') # 保存图片
  265. plt.show()
  266. def test(self):
  267. test_dataset, test_callback = NerDataset(data=self.test_data,
  268. tokenizer=self.args.tokenizer,
  269. max_len=self.args.max_seq_len,
  270. label_list=self.args.labels)
  271. collate = NerCollate(max_len=self.args.max_seq_len, label2id=self.args.label2id)
  272. test_loader = DataLoader(dataset=test_dataset,
  273. batch_size=self.args.eval_batch_size,
  274. shuffle=False,
  275. num_workers=2,
  276. collate_fn=collate.collate_fn)
  277. self.load_model()
  278. self.model.to(self.args.device)
  279. with torch.no_grad():
  280. # s_logits, e_logits = self.eval_forward(test_loader)
  281. s_logits, e_logits, con_logits = self.half_batch_eval_forward(test_loader)
  282. role_metric, total_count = self.get_metric(s_logits, e_logits, con_logits, test_callback)
  283. mirco_metrics = np.sum(role_metric, axis=0)
  284. mirco_metrics = get_p_r_f(mirco_metrics[0], mirco_metrics[1], mirco_metrics[2])
  285. print(
  286. '[eval] precision={:.4f} recall={:.4f} f1_score={:.4f}'.format(mirco_metrics[0], mirco_metrics[1],
  287. mirco_metrics[2]))
  288. print(classification_report(role_metric, self.args.labels, self.args.id2label, total_count))
  289. def predict(self, sentences_list):
  290. with torch.no_grad(): # 不需要梯度计算的操作
  291. inputs = self.args.tokenizer(sentences_list, padding='max_length', truncation=True,
  292. max_length=self.args.max_seq_len, return_tensors='pt')
  293. token_ids = inputs['input_ids'].to(self.args.device)
  294. attention_mask = inputs['attention_mask'].to(self.args.device)
  295. # tokens = ['[CLS]'] + tokens + ['[SEP]']
  296. # token_ids = torch.from_numpy(np.array(encode_dict['input_ids'])).unsqueeze(0).to(self.args.device)
  297. # attention_mask = torch.from_numpy(np.array(encode_dict['attention_mask'])).unsqueeze(0).to(
  298. # self.args.device)
  299. # token_type_ids = torch.from_numpy(np.array(encode_dict['token_type_ids'])).unsqueeze(0).to(self.args.device)
  300. output = self.model(token_ids, attention_mask)
  301. start_logits = output["ner_output"]["ner_start_logits"]
  302. end_logits = output["ner_output"]["ner_end_logits"]
  303. content_logits = output["ner_output"]["ner_content_logits"]
  304. start_logits = sigmoid(start_logits[0])
  305. end_logits = sigmoid(end_logits[0])
  306. con_logits = sigmoid(content_logits[0])
  307. return start_logits, end_logits, con_logits
  308. def half_batch_predict(self, **kwargs):
  309. """
  310. 将一份文档截断分批次预测
  311. """
  312. # self.load_model()
  313. # self.model.eval()
  314. # self.model.to(self.args.device)
  315. with torch.no_grad():
  316. print("\n********************************************\n")
  317. if kwargs:
  318. sentences = kwargs["text_list"]
  319. labels = kwargs["labels"] if "labels" in kwargs else []
  320. else:
  321. sentences, labels = get_paper_for_predict()
  322. print(sentences)
  323. print(labels)
  324. # 对于单个句子
  325. # encode_dict = self.args.tokenizer.encode_plus(text=[i for i in text],
  326. # max_length=self.args.max_seq_len,
  327. # padding="max_length",
  328. # truncation="only_first",
  329. # return_token_type_ids=True,
  330. # return_attention_mask=True)
  331. # 需要分固定句子处理,句子不能太长,因为显存不够
  332. print("预测文档的句子数:", len(sentences))
  333. max_input_len = 100
  334. batch_num = int(len(sentences) / max_input_len)
  335. start_logits, end_logits, con_logits = [], [], []
  336. if batch_num > 0:
  337. for i in range(batch_num):
  338. left, right = i*max_input_len, (i+1)*max_input_len
  339. if i == batch_num-1 and len(sentences) - (i+1)*max_input_len<28:
  340. batch_num -= 1
  341. break
  342. l_edge = 10 if left > 0 else 0
  343. r_edge = 10 # 左右多加几句共同参与
  344. start_logit, end_logit, con_logit = self.predict(sentences[left-l_edge: right+r_edge])
  345. start_logits.append(start_logit[l_edge:-r_edge])
  346. end_logits.append(end_logit[l_edge:-r_edge])
  347. con_logits.append(con_logit[l_edge:-r_edge])
  348. if len(sentences) - batch_num * max_input_len > 0:
  349. left = batch_num*max_input_len
  350. l_edge = 10 if left > 0 else 0
  351. start_logit, end_logit, con_logit = self.predict(sentences[left-l_edge:])
  352. start_logits.append(start_logit[l_edge:])
  353. end_logits.append(end_logit[l_edge:])
  354. con_logits.append(con_logit[l_edge:])
  355. start_logits = torch.cat(start_logits, dim=0)
  356. end_logits = torch.cat(end_logits, dim=0)
  357. con_logits = torch.cat(con_logits, dim=0)
  358. # 不分段预测
  359. # start_logits, end_logits, con_logits = self.predict(sentences)
  360. pred_entities, topic_item_pred = topic_ner_decode(start_logits, end_logits, con_logits, sentences, self.args.id2label)
  361. # pprint(dict(pred_entities))
  362. split_topic_idx = []
  363. for i in pred_entities['TOPIC']:
  364. split_topic_idx.append((i[-1], i[-1]+len(i[0])))
  365. print("split_topic_idx:",split_topic_idx)
  366. return dict(pred_entities), split_topic_idx, topic_item_pred
  367. if __name__ == '__main__':
  368. args = NerArgs()
  369. model = UIEModel(args)
  370. ner_pipeline = NerPipeline(model, args)
  371. ner_pipeline.train(dev=True)
  372. # # 批量测试
  373. ner_pipeline.test()
  374. # 单个测试
  375. # txts = [
  376. # '七、解答(本题共8小题,每小题4分,共32分,在给出的四个选项中,', '每小题中只有一个选项符合题目要求。)', '11、如图所示,在$x>0$的空间中,存在沿$x$轴方向的匀强电场$E$;在$x<0$的空间中,存在沿$x$轴负方向的匀强电场,场强大小也为$E$。一电子$(-e,m)$在$x=d$处的$P$点以沿$y$轴正方向的初速度$v__0$', '开始运动,不计电子重力。求:(1)电子的$x$方向分运动的周期。(2)电子运动的轨迹与$y$轴的各个交点中,任意两个交点的距离。【图片】', '【答案】(1)$4sqrt(((2m*d)/(eE)))$;(2)$2nv__0sqrt(((2m*d)/(eE)))(n=1,2,3…)$。', '如图,一导热性能良好、内壁光滑的汽缸水平放置,横截面积$S=1.010^^-3m^2$、质量$m=2kg$、厚度不计的活', '塞与汽缸底部之', '间封闭了一部分理想气体,此时活塞与汽缸底部之间的距离$l=36cm$,在活塞的右侧距离其$d=14cm$处有一对与汽缸固定连接的卡环。气体的温度$t=27*℃$,外界大气压强$p__0=1.010^5Pa$。现将汽缸开口向上竖直放置($g$取$10m/s^2$)。 (1)求此时活塞与汽缸底部之间的距离$h$; (2)如果将缸内气体加热到$600K$,求此时气体的压强$p$。 【图片】', '【解析】(1)汽缸水平放置时:封闭气体的压强$p__1=p__0=1.010^5Pa$,温度$T__1=300K$,体积$V__1=lS$;汽缸竖直放置时:封闭气体的压强$p__2=p__0+((mg)/S)=1.210^5Pa$,温度$T__2=T__1=300K$,体', '积$V__2=hS$;由玻意耳定律$p__1V__1=p__2V__2$,$h=((p__1V__1)/(p__2S))$,解得$h=0.3m$ (2)温度升高,活塞刚达到卡环,气体做等压变化,此时$p__3=p__2$,$V__2=hS$,$V__3=(l+d)S$,$T__2=300K$,$((V__2)/(T__2))=((V__3)/(T__3))$,$T__3=500K$;汽缸内气体温度继续升高,气体做等容变化$p__3=1.210^5Pa$,$T__3=500K$,$T__4=600K$,$((p__3)/(T__3))=(p/(T__4))$,$p=1.4410^5Pa$ 答案:(1)$0.3m$; (2)$1.4410^5Pa$', '5、中国首艘航空母舰“辽宁”号正式交接入列和歼$-15$舰载战斗机成功起降,对于有效维护国家主权、促进世界和平与共同发展,具有重要意义.据了解,该航母长$304m$,宽$70.5m$,', '满载时吃水深度$11m$;若歼$-15$战斗机的质量为$3.310^4$kg,轮胎与甲板总接触面积$6000cm^2$,战斗机在约为$2.510^5N$推力作用下滑跑$20m$用时$5s$.求:$(g=10N/kg$,$ρ__海水=1.0310^3kg/m^3)$ (1)战斗机停在甲板上对甲板的压强; (2)推力做功的功率; (3)满载时海水对舰底的压强.', '答案:(1)战斗机停在甲板上对甲板', '的压强为$3.310^5N$', '; (2)推力做功的功率为$10^6W$; (3)满载时海水对舰底的压强$1.13310^5Pa$.', '解析:已知:$h=11m$,$m=3.310^4kg$,$S=6000cm^2=0.6m^2$,$F=2.510^5N$,$s=20m$,$t=5s$,$g=10N/kg$,$ρ__海水=1.0310^3kg/m^3$ 求:(1)战', '斗机停在甲板上对甲板的压强$p__1$; (2)推力做功的功率P; (3)满载时海水对舰底的压强$p__2$. 解:(1)战斗机对甲板的压力: $F__1=G=mg=3.310^4kg10N/kg=3.310^5N$, 对甲板的压强: $p__1=F__1S=3.310^5N0.6m^2=5.510^5Pa$; (2)推力做的功: $W=Fs=2.510^5N20m=510^6J$, 推力所做的功率: $P=Wt=510^6J5s=10^6W$; (3)满载时海水对舰底的压强: $p__2=ρ__海水gh=1.0310^3kg/m^310N/kg11m=1.13310^5Pa$. 答:(1)战斗机', '停在甲板上对甲板的压强为$3.310^5N$; (2)推力做功的功率为$10^6W$; (3)满载时海水对舰底的压强$1.13310^5Pa$.', '64、已知地球半径为R,地面的重力加度为g,将地球视为均第匀球体,求: (1)距地面$12R$高度处的力加速度 (2)地球的第一宇宙速度 (3)地球的自转周期为T,', '地球同步卫星离地面物的高度', '[答案](1)距地面$12R$高度处的重力加速度是$49g$ (2)地球的步第一宇宙速度是$sqrt(gR)$ (3)若地球的自转周期为T,地球同', '步卫离地面的高度是$root3*(gR^2T^24pi^2)-R$', '解析:(1)由于地球表面物体知随地球转而做圆运动的向心加速度体很小,地面上物体所受重力近似等于地球的万有引力,对地面上的', "物体有$GMmR^2=mg$,同理,对距地高度$12R$处的物体有$GMm*(RR2)^2=mg'$,解得$g'=49g$ (2)对地卫星环绕地心的匀速圆周运动有$GMmR^2=mv^2R$,解地球宇的第速一宇速度$v=sqrt(gR)$ $GMm*(Rh)^2=m*(Rh)*(2*πT)^2$,$GMmR^2=mg$,解得$h=root3*(gR^2T^24pi^2)-R$", '25、重力的方向总是垂直于接触面向下。( )', 'A.正确', 'B,错误', '在实验中,补偿阻力时要把物悬挂在细绳的一端。 A:正确', 'B,错误', '[答案]T', '4、任何情况下,物体的加速度方向始终与它所受的合力方向一致。( ) A 正确 B.错误', '【答案】不对的', '10.如果物体处于超重状态,它必然有向上的加速度。( )', 'A.正确', 'B、错误', '7.通过打出的纸带可以确定物体运动的时间和物体运动的位移。( ) A,正确', 'B.错误', '答案:错的', '六:多空(本题共11小题,总分48分。其中,1-7题为单选,每小题4分;8-11题为多选,每小题5分,全部选对的得5', '分,选对但不全的得2分,有选错得0分)', '【图片】用烧杯盛某种液体,测得液体体积V和液体与烧杯的共同质量m的关系如图所示.请观察图象并根据图象,求:烧杯的质量__________g;液体的', '密度$ρ__液=$________$kg/m^3$;其物理意义是_____.', '汤现姆孙在研究________的实验中发现了', '电子,电子的发现打破了___________的旧观。', '答案:阴极射;原子不可再分 【解析】汤姆孙在研究阴极射线的实验中发现了电子,电子的发现打破了子不可现再分的旧观念。', '1.我国载人登月已完成关键技术攻关,可送3人', '环月、2人登月。某实验小组在完成用单摆测量重力加速度实验后,讨论在月球上用单摆测量月球表面重力加速度的实验方案。用与地球上相同的实验设备,在月球上进行相同的实验,会得到更精确的结果,试写出其中的两个原因:__', '_____。', '【解析】1.周期变长,累积法测量周期的相对误差减小;2.月球上没有空气阻力,消除了空气阻尼引起的系统误差答案:周期变长,累积法测量周期的相对误差减小;月球上没有空气阻力,消除了空气阻尼引起的系统误差', '26.【图片】电阻A和B的电流与其两端电压的关系如图所示.由图可知,电阻A的阻', '值为________$Ω$;将A和B并联后接在$2.5V$的电源上,电路中的总电流为_A.', '10.教室中未搬入桌凳前说话常有嗡嗡的尾声,摆了桌凳坐满了学生后这种现象减轻到似乎听不到了,这是因为', '___________。', '答案:搬入桌凳和坐满学生后吸声面积增大,混响时间变短。'
  377. # ]
  378. # labels = [(2, 5), (5, 10), (10, 18), (18, 24), (24, 27), (27, 30), (30, 32), (32, 35), (35, 38), (40, 42), (42, 45), (45, 49), (49, 51), (51, 54)]
  379. # ner_pipeline.predict(text_list=txts, labels=labels)
  380. # ner_pipeline.predict()
  381. # print()