You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

38 lines
1.1 KiB

import bs4
import requests
from time import sleep
from arango_client import arango
for y in range(0, 26):
# Make y a two-digit string
y1 = str(y).zfill(2)
y2 = str(y + 1).zfill(2)
page = 1
while True:
print(f"Fetching page {page}...")
url = f"https://www.riksdagen.se/sv/sok/?avd=webbtv&riksmote=20{y1}%2F{y2}&p={page}"
response = requests.get(url)
soup = bs4.BeautifulSoup(response.content, "html.parser")
# Get all links to videos
links = soup.find_all("a")
arango_docs = []
video_links = [
link["href"]
for link in links
if "https://www.riksdagen.se/sv/webb-tv/" in link.get("href", "")
]
if not video_links:
break
n = 0
for link in video_links:
if '_' in link:
l = link.split('_')[-1].replace('/', '')
arango_docs.append({"_key": l, "url": link})
n += 1
arango.db.collection("webb_tv").insert_many(arango_docs, overwrite=True)
arango_docs = []
sleep(2)
page += 1
print("Done.")