You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

64 lines
2.3 KiB

import requests
from bs4 import BeautifulSoup
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile
import os
from time import sleep
import talks2db
def download(all=False, year=None):
if all:
for year in range(1999, 2026):
first_part = str(year)
second_part = str(year + 1)[2:]
if first_part == '1999':
url = 'https://data.riksdagen.se/dataset/anforande/anforande-19992000.json.zip'
else:
url = f"https://data.riksdagen.se/dataset/anforande/anforande-{first_part}{second_part}.json.zip"
print(url)
# Ensure the 'talks' directory exists
talks_dir = "talks"
os.makedirs(talks_dir, exist_ok=True)
# Create a subdirectory for the current year range
dir_name = os.path.join(talks_dir, f"anforande-20{first_part}{second_part}")
if os.path.exists(dir_name) and os.listdir(dir_name):
print(f"Skipping {dir_name}, already exists and is not empty.")
continue
os.makedirs(dir_name, exist_ok=True)
# Download and extract the zip file directly into the subdirectory
with urlopen(url) as zipresp:
with ZipFile(BytesIO(zipresp.read())) as zfile:
zfile.extractall(dir_name)
elif year:
first_part = str(year)
second_part = str(year + 1)[2:]
url = f"https://data.riksdagen.se/dataset/anforande/anforande-{first_part}{second_part}.json.zip"
print(url)
# Ensure the 'talks' directory exists
talks_dir = "talks"
os.makedirs(talks_dir, exist_ok=True)
# Create a subdirectory for the current year range
dir_name = os.path.join(talks_dir, f"anforande-20{first_part}{second_part}")
if os.path.exists(dir_name) and os.listdir(dir_name):
print(f"Skipping {dir_name}, already exists and is not empty.")
return
os.makedirs(dir_name, exist_ok=True)
# Download and extract the zip file directly into the subdirectory
with urlopen(url) as zipresp:
with ZipFile(BytesIO(zipresp.read())) as zfile:
zfile.extractall(dir_name)
if __name__ == "__main__":
while True:
new_files = download()