diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..0b3ea01 --- /dev/null +++ b/scraper.py @@ -0,0 +1,32 @@ +import base64 +import pickle +from time import sleep +from selenium import webdriver +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.common.by import By +from selenium.webdriver.common.action_chains import ActionChains +from selenium.webdriver.support import expected_conditions +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.common.desired_capabilities import DesiredCapabilities + +driver = webdriver.Chrome() +driver.get("https://folio.rokus-klett.si/?credit=NPBPLUS4SDZ&layout=single&pages=i") +print("Page title is: " + driver.title) +WebDriverWait(driver, 30).until(expected_conditions.presence_of_element_located((By.CSS_SELECTOR, ".pdf-wrap"))) +element = driver.find_element(By.CSS_SELECTOR, ".next-page") +i = -1 + +while (element.is_enabled() == True): + WebDriverWait(driver, 30).until(expected_conditions.presence_of_element_located((By.XPATH, "//fl-single-leaf[contains(@style,'opacity: 1')]"))) + src = driver.find_element(By.XPATH, "//fl-single-leaf[contains(@style,'opacity: 1')]/fl-page/fl-page-pdf/img").get_attribute("src") + print("Številka strani: " + str(i)) + strImage = src.split("base64,")[1] + ime = str(i) + "-stran.jpeg" + file = open(ime, "wb") + file.write(base64.b64decode(strImage)) + file.close() + driver.find_element(By.CSS_SELECTOR, ".next-page").click() + element = driver.find_element(By.CSS_SELECTOR, ".next-page") + i = i + 1 +driver.close() \ No newline at end of file