You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
64 lines
2.3 KiB
64 lines
2.3 KiB
import requests |
|
from bs4 import BeautifulSoup |
|
from io import BytesIO |
|
from urllib.request import urlopen |
|
from zipfile import ZipFile |
|
import os |
|
from time import sleep |
|
import talks2db |
|
|
|
|
|
def download(all=False, year=None): |
|
if all: |
|
for year in range(1999, 2026): |
|
first_part = str(year) |
|
second_part = str(year + 1)[2:] |
|
if first_part == '1999': |
|
url = 'https://data.riksdagen.se/dataset/anforande/anforande-19992000.json.zip' |
|
else: |
|
url = f"https://data.riksdagen.se/dataset/anforande/anforande-{first_part}{second_part}.json.zip" |
|
print(url) |
|
|
|
# Ensure the 'talks' directory exists |
|
talks_dir = "talks" |
|
os.makedirs(talks_dir, exist_ok=True) |
|
|
|
# Create a subdirectory for the current year range |
|
dir_name = os.path.join(talks_dir, f"anforande-20{first_part}{second_part}") |
|
if os.path.exists(dir_name) and os.listdir(dir_name): |
|
print(f"Skipping {dir_name}, already exists and is not empty.") |
|
continue |
|
|
|
os.makedirs(dir_name, exist_ok=True) |
|
|
|
# Download and extract the zip file directly into the subdirectory |
|
with urlopen(url) as zipresp: |
|
with ZipFile(BytesIO(zipresp.read())) as zfile: |
|
zfile.extractall(dir_name) |
|
elif year: |
|
first_part = str(year) |
|
second_part = str(year + 1)[2:] |
|
url = f"https://data.riksdagen.se/dataset/anforande/anforande-{first_part}{second_part}.json.zip" |
|
print(url) |
|
|
|
# Ensure the 'talks' directory exists |
|
talks_dir = "talks" |
|
os.makedirs(talks_dir, exist_ok=True) |
|
|
|
# Create a subdirectory for the current year range |
|
dir_name = os.path.join(talks_dir, f"anforande-20{first_part}{second_part}") |
|
if os.path.exists(dir_name) and os.listdir(dir_name): |
|
print(f"Skipping {dir_name}, already exists and is not empty.") |
|
return |
|
|
|
os.makedirs(dir_name, exist_ok=True) |
|
|
|
# Download and extract the zip file directly into the subdirectory |
|
with urlopen(url) as zipresp: |
|
with ZipFile(BytesIO(zipresp.read())) as zfile: |
|
zfile.extractall(dir_name) |
|
|
|
if __name__ == "__main__": |
|
while True: |
|
new_files = download() |
|
|
|
|