Initial commit
This commit is contained in:
32
scraper.py
Normal file
32
scraper.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import base64
|
||||||
|
import pickle
|
||||||
|
from time import sleep
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
|
from selenium.webdriver.common.keys import Keys
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
|
from selenium.webdriver.support import expected_conditions
|
||||||
|
from selenium.webdriver.common.keys import Keys
|
||||||
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||||
|
|
||||||
|
driver = webdriver.Chrome()
|
||||||
|
driver.get("https://folio.rokus-klett.si/?credit=NPBPLUS4SDZ&layout=single&pages=i")
|
||||||
|
print("Page title is: " + driver.title)
|
||||||
|
WebDriverWait(driver, 30).until(expected_conditions.presence_of_element_located((By.CSS_SELECTOR, ".pdf-wrap")))
|
||||||
|
element = driver.find_element(By.CSS_SELECTOR, ".next-page")
|
||||||
|
i = -1
|
||||||
|
|
||||||
|
while (element.is_enabled() == True):
|
||||||
|
WebDriverWait(driver, 30).until(expected_conditions.presence_of_element_located((By.XPATH, "//fl-single-leaf[contains(@style,'opacity: 1')]")))
|
||||||
|
src = driver.find_element(By.XPATH, "//fl-single-leaf[contains(@style,'opacity: 1')]/fl-page/fl-page-pdf/img").get_attribute("src")
|
||||||
|
print("Številka strani: " + str(i))
|
||||||
|
strImage = src.split("base64,")[1]
|
||||||
|
ime = str(i) + "-stran.jpeg"
|
||||||
|
file = open(ime, "wb")
|
||||||
|
file.write(base64.b64decode(strImage))
|
||||||
|
file.close()
|
||||||
|
driver.find_element(By.CSS_SELECTOR, ".next-page").click()
|
||||||
|
element = driver.find_element(By.CSS_SELECTOR, ".next-page")
|
||||||
|
i = i + 1
|
||||||
|
driver.close()
|
||||||
Reference in New Issue
Block a user