Initial commit

This commit is contained in:
Nik Rozman
2022-05-21 22:50:49 +02:00
parent 4e0dec0c2d
commit a36d8f3e63

32
scraper.py Normal file
View File

@@ -0,0 +1,32 @@
import base64
import pickle
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
driver = webdriver.Chrome()
driver.get("https://folio.rokus-klett.si/?credit=NPBPLUS4SDZ&layout=single&pages=i")
print("Page title is: " + driver.title)
WebDriverWait(driver, 30).until(expected_conditions.presence_of_element_located((By.CSS_SELECTOR, ".pdf-wrap")))
element = driver.find_element(By.CSS_SELECTOR, ".next-page")
i = -1
while (element.is_enabled() == True):
WebDriverWait(driver, 30).until(expected_conditions.presence_of_element_located((By.XPATH, "//fl-single-leaf[contains(@style,'opacity: 1')]")))
src = driver.find_element(By.XPATH, "//fl-single-leaf[contains(@style,'opacity: 1')]/fl-page/fl-page-pdf/img").get_attribute("src")
print("Številka strani: " + str(i))
strImage = src.split("base64,")[1]
ime = str(i) + "-stran.jpeg"
file = open(ime, "wb")
file.write(base64.b64decode(strImage))
file.close()
driver.find_element(By.CSS_SELECTOR, ".next-page").click()
element = driver.find_element(By.CSS_SELECTOR, ".next-page")
i = i + 1
driver.close()