12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- from selenium import webdriver
- from selenium.webdriver.support.wait import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.by import By
- import time
- import traceback
- import re
- def bd_ocr_file(pictures):
- browser = webdriver.Chrome()
- browser.implicitly_wait(5)
- browser.maximize_window()
- browser.get('http://ai.baidu.com/tech/ocr/general')
- browser.execute_script("window.scrollTo(0, 850)")
- texts = []
- wait = WebDriverWait(browser, 5)
- wait.until(EC.presence_of_element_located((By.ID, "demo-photo-upload"))) # 等待id为table的元素被加载出来
- for picture in pictures:
- time.sleep(2)
- print("开始传文件")
- try:
- browser.find_element_by_css_selector('input[type="file"]').send_keys(picture)
- time.sleep(3)
- html = browser.find_element_by_id("demo-json").text
- res = re.compile(r'"words": "(.*)?"').findall(html)
- res = ' '.join(res)
- # print("开始图片识别")
- if not res:
- res = '空白'
- # print(res)
- texts.append(res)
- # print("完成图片识别\n")
- except Exception as e:
- print(e)
- traceback.print_exc()
- browser.quit()
- browser.quit()
- return texts
|