import bs4 import requests from time import sleep from arango_client import arango for y in range(0, 26): # Make y a two-digit string y1 = str(y).zfill(2) y2 = str(y + 1).zfill(2) page = 1 while True: print(f"Fetching page {page}...") url = f"https://www.riksdagen.se/sv/sok/?avd=webbtv&riksmote=20{y1}%2F{y2}&p={page}" response = requests.get(url) soup = bs4.BeautifulSoup(response.content, "html.parser") # Get all links to videos links = soup.find_all("a") arango_docs = [] video_links = [ link["href"] for link in links if "https://www.riksdagen.se/sv/webb-tv/" in link.get("href", "") ] if not video_links: break n = 0 for link in video_links: if '_' in link: l = link.split('_')[-1].replace('/', '') arango_docs.append({"_key": l, "url": link}) n += 1 arango.db.collection("webb_tv").insert_many(arango_docs, overwrite=True) arango_docs = [] sleep(2) page += 1 print("Done.")