BD_OCR.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. from selenium import webdriver
  2. from selenium.webdriver.support.wait import WebDriverWait
  3. from selenium.webdriver.support import expected_conditions as EC
  4. from selenium.webdriver.common.by import By
  5. import time
  6. import traceback
  7. import re
  8. def bd_ocr_file(pictures):
  9. browser = webdriver.Chrome()
  10. browser.implicitly_wait(5)
  11. browser.maximize_window()
  12. browser.get('http://ai.baidu.com/tech/ocr/general')
  13. browser.execute_script("window.scrollTo(0, 850)")
  14. texts = []
  15. wait = WebDriverWait(browser, 5)
  16. wait.until(EC.presence_of_element_located((By.ID, "demo-photo-upload"))) # 等待id为table的元素被加载出来
  17. for picture in pictures:
  18. time.sleep(2)
  19. print("开始传文件")
  20. try:
  21. browser.find_element_by_css_selector('input[type="file"]').send_keys(picture)
  22. time.sleep(3)
  23. html = browser.find_element_by_id("demo-json").text
  24. res = re.compile(r'"words": "(.*)?"').findall(html)
  25. res = ' '.join(res)
  26. # print("开始图片识别")
  27. if not res:
  28. res = '空白'
  29. # print(res)
  30. texts.append(res)
  31. # print("完成图片识别\n")
  32. except Exception as e:
  33. print(e)
  34. traceback.print_exc()
  35. browser.quit()
  36. browser.quit()
  37. return texts