Initial commit
This commit is contained in:
32
scraper.py
Normal file
32
scraper.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import base64
|
||||
import pickle
|
||||
from time import sleep
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium.webdriver.support import expected_conditions
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
|
||||
driver = webdriver.Chrome()
|
||||
driver.get("https://folio.rokus-klett.si/?credit=NPBPLUS4SDZ&layout=single&pages=i")
|
||||
print("Page title is: " + driver.title)
|
||||
WebDriverWait(driver, 30).until(expected_conditions.presence_of_element_located((By.CSS_SELECTOR, ".pdf-wrap")))
|
||||
element = driver.find_element(By.CSS_SELECTOR, ".next-page")
|
||||
i = -1
|
||||
|
||||
while (element.is_enabled() == True):
|
||||
WebDriverWait(driver, 30).until(expected_conditions.presence_of_element_located((By.XPATH, "//fl-single-leaf[contains(@style,'opacity: 1')]")))
|
||||
src = driver.find_element(By.XPATH, "//fl-single-leaf[contains(@style,'opacity: 1')]/fl-page/fl-page-pdf/img").get_attribute("src")
|
||||
print("Številka strani: " + str(i))
|
||||
strImage = src.split("base64,")[1]
|
||||
ime = str(i) + "-stran.jpeg"
|
||||
file = open(ime, "wb")
|
||||
file.write(base64.b64decode(strImage))
|
||||
file.close()
|
||||
driver.find_element(By.CSS_SELECTOR, ".next-page").click()
|
||||
element = driver.find_element(By.CSS_SELECTOR, ".next-page")
|
||||
i = i + 1
|
||||
driver.close()
|
||||
Reference in New Issue
Block a user