Add files via upload

2023-06-06 09:06:26 +02:00 · 2023-06-06 09:06:26 +02:00 · fb69412423
commit fb69412423
parent 917efca97a
3 changed files with 999 additions and 0 deletions
--- a/app.py
+++ b/app.py
@ -0,0 +1,807 @@
+import traceback
+from datetime import datetime
+
+import altair as alt
+import matplotlib.pyplot as plt
+import pandas as pd
+import requests
+import sqlalchemy
+import streamlit as st
+
+from config import db_name
+from config import db_user as user
+from config import ip_server as ip
+from config import pwd_postgres as pwd
+from info import (
+    explainer,
+    limit_warning,
+    months_conversion,
+    party_colors,
+    party_colors_lighten,
+    select_columns,
+    css,
+)
+
+
+class Params:
+    """Containing params."""
+
+    def __init__(self, params):
+        self.params = params
+        # Set parameters.
+        self.q = self.set_param("q")
+        self.parties = self.set_param("parties")
+        self.persons = self.set_param("persons")
+        self.from_year = self.set_param("from_year")
+        self.to_year = self.set_param("to_year")
+        self.debates = self.set_param("debates")
+
+    def set_param(self, key):
+        """Return value if in params."""
+        if key in self.params:
+            if key in ["parties", "persons", "debates"]:
+                value = self.params[key][0].split(",")
+            else:
+                value = self.params[key][0]
+
+        else:
+            value = []
+            if key == "q":
+                value = ""
+            elif key == "from_year":
+                value = 1993  # Catch all.
+            elif key == "to_year":
+                value = 2030  # Catch all.
+        return value
+
+    def update(self):
+        """Update parameters."""
+        st.experimental_set_query_params(
+            q=self.q,
+            from_year=self.from_year,
+            to_year=self.to_year,
+            parties=",".join(self.parties),
+            debates=",".join(self.debates),
+            persons=",".join(self.persons),
+        )
+
+    def reset(self, q=False):
+        for key in self.params:
+            self.params[key] = []
+        if q:
+            self.q = q
+
+
+def datestring_to_date(x):
+    print(x)
+    date_list = x.split(" ")
+    return f"{date_list[2]}-{months_conversion[date_list[1]]}-{date_list[0]}"
+
+
+def make_snippet(text, search_terms, long=False):
+    """Find the word searched for and give it some context."""
+
+    text = text.replace("Fru talman! ", "").replace("Herr talman! ", "")
+    if search_terms == "speaker":
+        if long:
+            snippet = str(text[:300])
+            if len(text) > 300:
+                snippet += "..."
+        else:
+            snippet = str(text[:80]) + "..."
+            if len(text) > 80:
+                snippet += "..."
+    else:
+        snippet = []
+        text_lower = text.lower()
+        snippet_lenght = int(8 / len(search_terms))  # * Change to another value?
+        if long:
+            snippet_lenght = snippet_lenght * 4
+        # Make the whole text to a list in lower cases.
+        text_list = text.split(" ")
+        text_list_lower = text_lower.split(" ")
+        # Try to find each for searched for and add to the snippet.
+        for word in search_terms:
+            word = word.replace("*", "").strip().lower()
+            if word in text_list_lower:
+                position = text_list_lower.index(word)
+
+                position_start = position - snippet_lenght
+                if position_start < 0:
+                    position_start = 0
+
+                position_end = position + int(snippet_lenght / 2)
+                if position_end > len(text_list_lower):
+                    position_end = len(text_list_lower) - 1
+                word_context_list = text_list[position_start:position_end]
+
+                snippet.append(" ".join(word_context_list))
+
+            elif word in text_lower:
+                position = text_lower.find(word)
+                # Find start position.
+                if position - snippet_lenght * 5 < 0:
+                    start_snippet = 0
+                else:
+                    start_snippet = text_lower.find(" ", position - snippet_lenght * 5)
+                # Find end position.
+                if position + len(word) + snippet_lenght * 4 > len(text):
+                    end_snippet = len(text)
+                else:
+                    end_snippet = text_lower.find(
+                        " ", position + len(word) + snippet_lenght * 4
+                    )
+                text = text[start_snippet:end_snippet]
+                snippet.append(text)
+
+            else:
+                position = 0
+                for listword in text_list:
+                    position += 1
+                    if word in listword.lower():
+                        word_context_list = text_list[
+                            position
+                            - snippet_lenght : position
+                            + int(snippet_lenght / 2)
+                        ]
+                        snippet.append(" ".join(word_context_list))
+
+        snippet = "|".join(snippet)
+        snippet = f"...{snippet}..."
+    return snippet
+
+
+def build_style_parties(parties):
+    """Build a CSS styl for party names buttons."""
+    style = "<style> "
+    for party in parties:
+        style += f' span[data-baseweb="tag"][aria-label="{party}, close by backspace"]{{ background-color: {party_colors[party]}}} .st-eg {{min-width: 14px;}} '  # max-width: 328px;
+    style += "</style>"
+    return style
+
+
+def build_style_mps(mps):
+    """Build a CSS styl for party names buttons."""
+    style = "<style> "
+    for mp in mps:
+        party = mp[mp.find("(") + 1 : mp.find(")")].upper()
+        party = fix_party(party)
+        try:
+            style += f' span[data-baseweb="tag"][aria-label="{mp}, close by backspace"]{{ background-color: {party_colors[party]};}} .st-eg {{min-width: 14px;}} '  # max-width: 328px;
+        except KeyError:
+            style += f' span[data-baseweb="tag"][aria-label="{mp}, close by backspace"]{{ background-color: {party_colors["-"]};}} .st-eg {{min-width: 14px;}} '
+    style += "</style>"
+    return style
+
+
+def fix_party(party):
+    """Replace old party codes with new ones."""
+    party = party.upper().replace("KDS", "KD").replace("FP", "L")
+    return party
+
+
+def build_style_debate_types(debates):
+    """Build a CSS style for debate type buttons."""
+    style = "<style> "
+
+    for debate in debates:
+        style += f' span[data-baseweb="tag"][aria-label="{debate}, close by backspace"]{{ background-color: #767676;}} .st-eg {{min-width: 14px;}}'  # max-width: 328px;
+    style += "</style>"
+    return style
+
+
+def highlight_cells(party):
+    if party in party_colors.keys():
+        color = party_colors[party]
+        return f"background-color: {color}; font-weight: 'bold'"
+
+
+@st.cache_data
+def options_persons(df):
+    d = {}
+    for i in df.groupby("Talare"):
+        d[i[0]] = i[1].shape[0]
+    return [f"{key} - {value}" for key, value in d.items()]
+
+
+@st.cache_data
+def get_data(sql):
+    """Get data from SQL database.
+
+    Args:
+        sql (str): A SQL query string.
+
+    Returns:
+        DataFrame: Dataframe with some adjustments to the data fetched from the DB.
+    """
+    df = pd.read_sql(sql, engine)
+
+    if df.shape[0] not in [0, return_limit]:
+        # Clean the data and change some column names.
+        df["Parti"].replace("FP", "L", inplace=True)
+        df["Parti"].replace("KDS", "Kd", inplace=True)
+        df["debatetype"].replace("", "inte angiven debattyp", inplace=True)
+        df["debatetype"].replace("-", "inte angiven debattyp", inplace=True)
+        df["Anförande"] = df["Text"].apply(
+            lambda x: x.replace("</p>", "").replace("</p>", " ").replace("-\n", " ")
+        )
+        df = df.loc[df["Parti"].isin(parties)]
+        df["url_session"] = df["url_session"].apply(
+            lambda x: "https://riksdagen.se" + str(x)
+        )  # Add domain to url.
+
+        df.sort_values(["Datum", "number"], axis=0, ascending=True, inplace=True)
+
+        # Make snippets from the text field (short and long).
+        df["Utdrag"] = df["Text"].apply(lambda x: make_snippet(x, search_terms))
+        df["Utdrag_long"] = df["Text"].apply(
+            lambda x: make_snippet(x, search_terms, long=True)
+        )
+
+    df.drop_duplicates(ignore_index=True, inplace=True)
+
+    return df
+
+
+@st.cache_data
+def define_search_terms(user_input):
+    """ Takes user input and make them into search terms for SQL.
+
+    Args:
+        user_input (str): The string resulting from user input (input()).
+
+    Returns:
+        list: List of search terms.
+    """    
+    # Search for quated phrases.
+    search_terms = []
+    while '"' in user_input:
+        q1 = user_input.find('"')
+        q2 = user_input.find('"', q1 + 1)
+        quoted_term = user_input[q1 + 1 : q2]
+        search_terms.append(quoted_term.lower())
+        user_input = user_input.replace(f'"{quoted_term}"', "")
+    while "  " in user_input:
+        user_input = user_input.replace(
+            "  ", " "
+        ).strip()  # Remove double and trailing blanks.
+
+    # Add non-quoted terms.
+    if len(user_input) > 0:
+        search_terms += [i.lower() for i in user_input.strip().split(" ")]
+    return search_terms
+
+
+def user_input_to_db(user_input, engine):
+    """Writes user input to db for debugging."""
+    sql = f"INSERT INTO  searches (id, search) VALUES ({datetime.timestamp(datetime.now())}, '{user_input}')"
+
+    with engine.connect() as conn:
+        conn.execute(sql)
+
+
+def create_sql_query(search_terms):
+    """Returns a valid sql query."""
+    word_list = []
+    years = ""
+    for word in search_terms:
+        
+        # Check if years are specified.
+        if "år:" in word:
+            start = int(word[3:7])
+            end = int(word[-4:])
+            if start == end:
+                years = [start]
+            else:
+                years = [str(i) for i in range(start, end + 1)]
+            years_string = f"({', '.join(years)})"
+
+        elif "*" not in word: #Searching for the exact word.
+            word_list.append(f" {word} ")
+        else:
+            if word[0] == "*" and word[-1] == "*":
+                word_list.append(word.replace("*", ""))
+            elif word[0] == "*":
+                word_list.append(f"{word.replace('*', '')} ")
+            elif word[-1] == "*":
+                word_list.append(f" {word.replace('*', '')}")
+    
+    # Format for SQL.
+    search_list = [f"'%%{i}%%'" for i in word_list]
+
+    n = 0
+    for i in search_list:
+        if " or " in i:
+            search_list[n] = "OR"
+        n += 1
+
+    # Handle searches with OR.
+    or_terms = []
+    while "OR" in search_list:
+        n_or = search_list.count("OR")
+        or_terms.append(search_list.pop(search_list.index("OR") - 1))
+        if n_or == 1:
+            or_terms.append(search_list.pop(search_list.index("OR") + 1))
+        search_list.remove("OR")
+    or_sql = f"( text_lower LIKE {' OR text_lower LIKE '.join(or_terms)})"
+    # Handle searches with -.
+    not_terms = []
+    for term in search_list:
+        if "-" in term:  # TODO Make this not include words with hyphen.
+            not_terms.append(search_list.pop(search_list.index(term)).replace("-", ""))
+
+    # Create SQL query.
+    search_sql = ''
+    if search_list != []:
+        search_sql = f'(text_lower LIKE {" AND text_lower LIKE ".join(search_list)}) '
+    
+    if or_terms != []:
+        if search_sql == '':
+            search_sql = or_sql
+        else:
+            search_sql = search_sql + " AND " + or_sql
+
+    if len(not_terms) > 0:
+        search_sql += (
+            f' AND (text_lower NOT LIKE {" AND text_lower NOT LIKE ".join(not_terms)})'
+        )
+    if years != "": # Search for years.
+        search_sql = f"({search_sql}) AND year in {years_string}"
+    sql = f"SELECT {select_columns} FROM {db_name} WHERE {search_sql} LIMIT {return_limit}"
+    
+    return sql
+
+
+def protocol_url(id):
+    """Returns the url of the protocol."""
+    url = f"https://data.riksdagen.se/dokument/{id}.json"
+    try:
+        documents = requests.get(url).json()["dokumentlista"]["dokument"]
+        for document in documents:
+            print(document)
+            if document["dok_id"] == id:
+                for file in document["filbilaga"]["fil"]:
+                    if "prot" in file["namn"]:
+                        url = file["url"]
+    except:  # If there is no url to PDF.
+        url = f"https://data.riksdagen.se/dokument/{id}"
+    
+    return url
+
+
+def error2db(error, user_input, engine):
+    """ Write error to DB for debugging."""
+    df = pd.DataFrame(
+        {
+            "error": error,
+            "time": datetime.date(datetime.now()),
+            "user_input": str(user_input),
+        },
+        index=[0],
+    )
+    df.to_sql("errors", engine, if_exists="append", index=False)
+
+
+@st.cache_data
+def get_speakers():
+    """ Get all """
+    return pd.read_sql("select * from persons", engine)
+
+
+def search_person(user_input, df_persons):
+    """ Returns SQL query made for searching everything a defined speaker has said.
+
+    Args:
+        user_input (str): The string resulting from user input (input()).
+
+    Returns:
+        list: List of search terms.
+    """    
+    # List all alternatives.
+    options = df_persons.loc[df_persons["name"] == user_input.lower()][
+        "speaker"
+    ].tolist()
+    options = [f"Ja, sök på {i.title()}" for i in options]
+    no_option = f"Nej, jag vill söka på vad soms sagts om {user_input.title()}."
+    options += [no_option, "Välj ett alternativ"]
+    preselected_option = len(options) - 1
+    # Let the user select a person or no_alternative.
+    speaker = st.selectbox(
+        ":red[Vill du söka efter vad en specifik ledamot sagt?]",
+        options,
+        index=preselected_option,
+    )
+
+    if speaker == "Välj ett alternativ":
+        st.stop()
+    if speaker == no_option:
+        search_terms = define_search_terms(user_input) # Return "normal" query if no_alternative.
+        sql = create_sql_query(search_terms)
+    else:
+        speaker = speaker.replace("Ja, sök på ", "")
+        sql = f"SELECT {select_columns} FROM {db_name} WHERE talare = '{speaker.title()}' LIMIT {return_limit}"
+    return sql
+
+
+# Title and explainer for streamlit
+st.set_page_config(
+    page_title="Rixdagen",
+    page_icon="favicon.png",
+    initial_sidebar_state="auto",
+)
+st.title("Vad säger de i Riksdagen?")
+st.markdown(css, unsafe_allow_html=True)
+# Get params from url.
+params = Params(st.experimental_get_query_params())
+
+# The official colors of the parties
+parties = list(party_colors.keys())  # List of partycodes
+
+# Max hits returned by db.
+return_limit = 10000
+
+# Ask for word to search for.
+user_input = st.text_input(
+    " ",
+    value=params.q,
+    placeholder="Sök ett ord, vilket som helst",
+    # label_visibility="hidden",
+    help='Du kan använda asterix (*), minus (-), citattecken ("") och OR.',
+)
+params.q = user_input
+
+if len(user_input) > 2:
+    try:
+        engine = sqlalchemy.create_engine(
+            f"postgresql://{user}:{pwd}@{ip}:5432/riksdagen"
+        )
+        user_input = user_input.replace("'", '"')
+
+        # Put user input in session state (first run).
+        if "user_input" not in st.session_state:
+            st.session_state["user_input"] = user_input
+            user_input_to_db(user_input, engine)
+        else:
+            if st.session_state["user_input"] != user_input:
+                # Write user input to DB.
+                st.session_state["user_input"] = user_input
+                user_input_to_db(user_input, engine)
+                # Reser url parameters.
+                params.reset(q=user_input)
+
+        params.update()
+
+        # Check if user has searched for a specific politician.
+        if len(user_input.split(" ")) in [2, 3, 4]: #TODO Better way of telling if name?
+            df_persons = get_speakers() #TODO Get only unique values.
+            list_persons = df_persons["name"].tolist()
+            if user_input.lower() in list_persons:
+                sql = search_person(user_input, df_persons)
+                search_terms = "speaker"
+
+        if "sql" not in globals():
+            search_terms = define_search_terms(user_input)
+            sql = create_sql_query(search_terms)
+
+        # Fetch data from DB.
+        df = get_data(sql)
+
+        if len(df) == 0:  # If no hits.
+            st.write("Inga träffar. Försök igen!")
+            st.stop()
+        elif df.shape[0] == 10000:
+            st.write(limit_warning)
+            st.stop()
+
+        party_talks = pd.DataFrame(df["Parti"].value_counts())
+        party_labels = party_talks.index.to_list()  # List with active parties.
+        if type(party_labels) == "list":
+            party_labels.sort()
+
+        if search_terms != "speaker":
+            # Let the user select parties to be included.
+            container_parties = st.container()
+            with container_parties:
+                style_parties = build_style_parties(
+                    party_labels
+                )  # Make the options the right colors.
+                st.markdown(style_parties, unsafe_allow_html=True)
+                params.parties = st.multiselect(
+                    label="Välj vilka partier som ska ingå",
+                    options=party_labels,
+                    default=party_labels,
+                )
+            if params.parties != []:
+                df = df.loc[df["Parti"].isin(params.parties)]
+                if len(df) == 0:
+                    st.stop()
+
+        # Let the user select type of debate.
+        container_debate = st.container()
+        with container_debate:
+            debates = df["debatetype"].unique().tolist()
+            debates.sort()
+
+            style = build_style_debate_types(debates)
+            st.markdown(style, unsafe_allow_html=True)
+            params.debates = st.multiselect(
+                label="Välj typ av debatt",
+                options=debates,
+                default=debates,
+            )
+        if params.debates != []:
+            df = df.loc[df["debatetype"].isin(params.debates)]
+            if len(df) == 0:
+                st.stop()
+        params.update()
+
+        # Let the user select a range of years.
+        from_year = int(params.from_year)
+        to_year = int(params.to_year)
+        df_ = df.loc[
+            df["År"].isin([i for i in range(from_year, to_year)])
+        ]  # TODO Ugly.
+        years = list(range(int(df["År"].min()), int(df["År"].max()) + 1))
+        if len(years) > 1:
+            params.from_year, params.to_year = st.select_slider(
+                "Välj tidsspann",
+                list(range(int(df["År"].min()), int(df["År"].max()) + 1)),
+                value=(years[0], years[-1]),
+            )
+            df = df.loc[
+                df["År"].isin(list(range(params.from_year, params.to_year + 1)))
+            ]
+        elif len(years) == 1:
+            df = df.loc[df["År"] == years[0]]
+
+        params.update()
+
+        if search_terms != "speaker":
+            # Let the user select talkers.
+            options = options_persons(df)
+            style_mps = build_style_mps(options)  # Make the options the right colors.
+            st.markdown(style_mps, unsafe_allow_html=True)
+            col1_persons, col2_persons = st.columns([5, 2])
+            # Sort alternatives in column to the right.
+            with col2_persons:
+                sort = st.selectbox(
+                    "Sortera på", options=["Bokstavsordning", "Flest anföranden"]
+                )
+                if sort == "Flest anföranden":
+                    options = sorted(
+                        options,
+                        key=lambda x: [int(i) for i in x.split() if i.isdigit()][-1],
+                        reverse=True,
+                    )
+                else:
+                    options.sort()
+            # Present options in column to the left.
+            with col1_persons:
+                expand_persons = st.container()
+                with expand_persons:
+                    params.persons = st.multiselect(
+                        label="Filtrera på personer",
+                        options=options,
+                        default=[],
+                    )
+            # Filter df.
+            if params.persons != []:
+                params.persons = [i[: i.find(")") + 1] for i in params.persons]
+                df = df.loc[df["Talare"].isin(params.persons)]
+        params.update()
+
+        # Give df an index.
+        df.index = range(1, df.shape[0] + 1)
+
+        ##* Start render. *##
+
+        st.markdown("---")  # Draw line after filtering.
+        st.write(f"**Träffar: {df.shape[0]}**")
+
+        ## Short snippets,
+        expand_short = st.expander("Visa tabell med korta utdrag", expanded=False)
+        with expand_short:
+            st.dataframe(df[["Utdrag", "Parti"]].style.applymap(highlight_cells))
+
+        ## Long snippets.
+        expand_long = st.expander(
+            "Visa tabell med längre utdrag (kan ta lång tid om många träffar).",
+            expanded=False,
+        )
+        with expand_long:
+            n = 0
+
+            # st.markdown(style, unsafe_allow_html=True)
+            # df["date"] = df["Datum"].apply(lambda x: datestring_to_date(x))
+            df.sort_values(["Datum", "dok_id", "number"], axis=0, inplace=True)
+            new_debate = True
+            dok_id = None
+
+            for row in df.iterrows():
+                n += 1
+                row = row[1]
+
+                # Find out if it's a new debate.
+                if row["dok_id"] == dok_id:
+                    new_debate = False
+                else:
+                    new_debate = True
+                dok_id = row["dok_id"]
+
+                # Remove title for ministers. #TODO Remove "statsråd" etc.
+                if "minister" in row["Talare"]:
+                    row["Talare"] = row["Talare"][
+                        row["Talare"].find("minister") + len("minister") :
+                    ]
+
+                # Write to table.
+
+                if new_debate:
+                    # st.write("---", unsafe_allow_html=True)
+                    st.markdown(
+                        f""" <span style="font-weight: bold;">{row['Datum']}</span> """,
+                        unsafe_allow_html=True,
+                    )
+                col1, col2, col3 = st.columns([2, 7, 2])
+                with col1:
+                    st.write(f"{row['Talare']}", unsafe_allow_html=True)
+                with col2:
+                    snippet = (
+                        row["Utdrag_long"]
+                        .replace(":", "\:")
+                        .replace("<p>", "")
+                        .replace("</p>", "")
+                    )
+                    st.markdown(
+                        f""" <span style="background-color:{party_colors_lighten[row['Parti']]}; color:black;">{snippet}</span> """,
+                        unsafe_allow_html=True,
+                    )
+                with col3:
+                    full_text = st.button("Fulltext", key=n)
+                    if full_text:
+                        with st.sidebar:
+                            data_person = requests.get(
+                                f'https://data.riksdagen.se/personlista/?iid={row["intressent_id"]}&utformat=json'
+                            ).json()["personlista"]["person"]  
+                            name_person = data_person["sorteringsnamn"].lower().replace(",", "-").replace(' ', '-')
+                            url_person = f'https://www.riksdagen.se/sv/ledamoter-partier/ledamot/{name_person}_{row["intressent_id"]}'
+                            st.markdown(
+                                f""" <span class="{row['Parti']}" style="font-weight: bold;">[ {row['Talare']} ]({url_person})</span> """,
+                                unsafe_allow_html=True,
+                            )
+                            st.markdown(
+                                f""" <span style="font-style: italic;">{row["Datum"]} - {row['debatetype']}</span> """,
+                                unsafe_allow_html=True,
+                            )
+                            st.write(
+                                row["Text"].replace(":", "\:"), unsafe_allow_html=True
+                            )
+                            if row["url_session"] != "https://riksdagen.se":
+                                st.markdown(
+                                    f'📺 [Se debatten i Riksdagen]({row["url_session"]})'
+                                )
+                            if row["url_audio"] != "":
+                                h = str(int(int(row["start"]) / 3600))
+                                m = str(int((int(row["start"]) % 3600) / 60))
+                                if len(m) == 1:
+                                    m = "0" + m
+                                s = str(int((int(row["start"]) % 3600) % 60))
+                                if len(s) == 1:
+                                    s = "0" + s
+                                start_time = ""
+                                if h != "0":
+                                    start_time += f"{h}:"
+                                start_time += f"{m}:{s}"
+                                st.markdown(
+                                    f'💬 [Ladda ner ljudet]({row["url_audio"]}) (Anförandet börjar vid {start_time})'
+                                )
+
+                            url_protocol = protocol_url(dok_id)
+                            st.markdown(f"📝 [Ladda ner protokollet]({url_protocol})")
+
+        # Download all data in df.
+        st.download_button(
+            "Ladda ner datan som CSV",
+            data=df.to_csv(
+                index=False,
+                sep=";",
+                columns=[
+                    "talk_id",
+                    "Anförande",
+                    "Parti",
+                    "Talare",
+                    "Datum",
+                    "url_session",
+                ],
+            ).encode("utf-8"),
+            file_name=f"{user_input}.csv",
+            mime="text/csv",
+        )
+
+        # Remove talks from same party within the same session to make the
+        # statistics more representative.
+        df_ = df[["talk_id", "Parti", "År"]].drop_duplicates()
+
+        if search_terms != "speaker":
+            ## Make pie chart.
+            party_talks = pd.DataFrame(df_["Parti"].value_counts())
+            party_labels = party_talks.index.to_list()
+            fig, ax1 = plt.subplots()
+            total = party_talks["Parti"].sum()
+            mentions = party_talks["Parti"]
+            ax1.pie(
+                mentions,
+                labels=party_labels,
+                autopct=lambda p: "{:.0f}".format(p * total / 100),
+                colors=[party_colors[key] for key in party_labels],
+                startangle=90,
+            )
+
+        # Make bars per year.
+        years = set(df["År"].tolist())
+
+        df_years = pd.DataFrame(columns=["Parti", "År"])
+        for i in df.groupby("År"):
+            dff = pd.DataFrame(data=i[1]["Parti"].value_counts())
+            dff["År"] = str(i[0])
+            df_years = pd.concat([df_years, dff])
+        df_years["party_code"] = df_years.index
+        df_years["color"] = df_years["party_code"].apply(lambda x: party_colors[x])
+        df_years.rename(columns={"Parti": "Antal", "party_code": "Parti"}, inplace=True)
+
+        chart = (
+            alt.Chart(df_years)
+            .mark_bar()
+            .encode(
+                x="År",
+                y="Antal",
+                color=alt.Color("color", scale=None),
+                tooltip=["Parti", "Antal"],
+            )
+        )
+
+        if search_terms == "speaker":
+            st.altair_chart(chart, use_container_width=True)
+
+        else:
+            # Put the charts in a table.
+            fig1, fig2 = st.columns(2)
+            with fig1:
+                st.pyplot(fig)
+            with fig2:
+                st.altair_chart(chart, use_container_width=True)
+
+        # Get feedback.
+        st.empty()
+        feedback_container = st.empty()
+
+        with feedback_container.container():
+            feedback = st.text_area(
+                "*Skriv gärna förslag på funktioner och förbättringar här!*"
+            )
+            send = st.button("Skicka")
+            if len(feedback) > 2 and send:
+                df = pd.DataFrame(
+                    {"feedback": feedback, "time": datetime.date(datetime.now())},
+                    index=[0],
+                )
+                df.to_sql("feedback", engine, if_exists="append", index=False)
+                feedback_container.write("*Tack!*")
+        params.update()
+        # st.markdown("##")
+
+    except Exception as e:
+        if (
+            e == "streamlit.runtime.scriptrunner.script_runner.StopException"
+        ):  # If st.stop() is used.
+            pass
+        else:
+            print(traceback.format_exc())
+            error2db(traceback.format_exc(), user_input, engine)
+            st.markdown(
+                ":red[Något har blivit fel, jag försöker lösa det så snart som möjligt. Testa gärna att söka på något annat.]"
+            )
+
+expand_explainer = st.expander("*Vad är det här? Var kommer datan ifrån? Hur gör jag?*")
+with expand_explainer:
+    st.markdown(explainer)
--- a/info.py
+++ b/info.py
@ -0,0 +1,135 @@
+""" Information and constants are put here and imported into app.py. """
+
+party_colors = {
+    "MP": "#83CF39",
+    "V": "#b51a0e",
+    "S": "#E8112d",
+    "C": "#009933",
+    "M": "#52BDEC",
+    "KD": "#000077",
+    "SD": "#DDDD00",
+    "L": "#006AB3",
+    "NYD": "#ffff2b",
+    '': 'white',
+    '-': 'white'
+}
+
+select_columns = '''
+                talk_id,
+                dok_id,
+                "anforandetext" AS "Text", 
+                anforande_nummer AS number, 
+                kammaraktivitet as debatetype, 
+                talare AS "Talare", 
+                datum AS "Datum", 
+                year AS År, 
+                debateurl AS url_session, 
+                parti AS "Parti",
+                audiofileurl as url_audio,
+                startpos as start,
+                intressent_id
+                '''
+
+
+
+
+
+
+# 70 % lighter party colors.
+party_colors_lighten = {
+    "MP": '#daf1c4',
+    "V": '#f8ada7',
+    "S": '#fab6bf',
+    "C": '#94ffb8',
+    "M": '#cbebf9',
+    "KD": "#b1b1ff", # 80 %
+    "SD": "#ffffa8",
+    "L": "#9cd6ff",
+    "NYD": "#ffffbf",
+    '': 'white',
+    '-': 'white'
+}
+
+css = """ <style>
+a:link {
+  color: black;
+}
+a:visited {
+  color: black;
+}
+a:hover {
+  color: grey;
+}
+"""
+for p, c in party_colors.items():
+    if p == 'NYD':
+        c = '#FFC000'
+    if p == 'SD':
+        c = '#E5AC00'
+    if p in ['', '-']:
+        c = 'black'
+    css += f"\n.{p} a{{color: {c};}}"
+css += '\n</style>'
+
+# css = '''
+#     <style>
+#       .C a{
+#         color: green;
+#    }
+#     </style>
+# '''
+
+months_conversion = {
+    'januari': '01',
+    'februari': '02',
+    'mars': '03',
+    'april': '04',
+    'maj': '05',
+    'juni': '06',
+    'juli': '07',
+    'augusti': '08',
+    'september': '09',
+    'oktober': '10',
+    'november': '11',
+    'december': '12'
+    }
+explainer = """Det här är en databas över vad svenska riksdagspolitiker har sagt i olika debatter i Riksdagen sedan 1993.
+Datan kommer dels från data.riksdagen.se och dels från transkriberingar av vad som sänts i Riksdagens videotjänst (från år 2000).  
+- Börja med att skriva ett eller flera sökord nedan. Du kan använda asterix (*), minus(-), citattecken (""), OR och år\:yyyy-yyyy. Sökningen    
+`energikris* baskraft OR kärnkraft "fossilfria energikällor" -vindkraft år:2015-2022` söker anföranden som\:  
+    - nämner "energikris" (inkl. ex. "energikris*en*")  
+    - nämner antingen "baskraft" *eller* "kärnkraft"  
+    - nämner den *exakta frasen* "fossilfria energikällor"  
+    - *inte* nämner "vindkraft"  
+    - återfinns under åren 2015-2022  
+- När du fått dina resultat kan sedan klicka bort parier eller ändra vilka år och debatttyper du är intresserad av.
+- Under "Längre utdrag" kan du välja att se hela anförandet i text, och under texten finns länkar till Riksdagens Webb-TV och nedladdningsbart ljud (i de fall
+där debatten har sänts).  
+
+Berätta gärna hur du skulle vilja använda datan och om sånt som inte funkar. [Mejla mig](mailto:lasse@edfast.se) eller [skriv till mig på Twitter](https://twitter.com/lasseedfast).  
+Jag som gjort den här sidan heter [Lasse Edfast och är journalist](https://lasseedfast.se).
+"""
+
+debate_types = {
+            "kam-vo": "Beslut",
+            "bet": "Debatt om beslut",
+            "kam-fs": "Frågestund",
+            "kam-ar": "Information från regeringen",
+            "ip": "Interpellationsdebatt",
+            "kam-sf": "Statsministerns frågestund",
+            "sam-ou": "Öppen utfrågning",
+            "kam-ad": "Aktuell debatt",
+            "kam-al": "Allmänpolitisk debatt",
+            "kam-bu": "Budgetdebatt",
+            'kam-bp': 'Bordläggning',
+            'kam-pd': 'Partiledardebatt',
+            'kam-dv': 'Debatt med anledning av vårpropositionen',
+            'sam-se': 'Öppet seminarium',
+            'kam-ud': 'Utrikespolitisk debatt'
+        }
+
+limit_warning = '''
+        Din sökning ger fler än 10 000 träffar. Försök gör den mer specifik, exempelvis genom att
+        använda minustecken eller specificera årtal genom att skriva år\:yyyy-yyyy (ex. år:2019-2020, utan mellanrum efter kolon).
+        Gränsen på 10 000 träffar finns för att servern inte ska krascha och kommer att höjas när jag har en starkare server.
+        '''
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,57 @@
+altair==4.2.2
+attrs==22.2.0
+backports.zoneinfo==0.2.1
+blinker==1.5
+cachetools==5.3.0
+certifi==2022.12.7
+charset-normalizer==3.0.1
+click==8.1.3
+contourpy==1.0.7
+cycler==0.11.0
+decorator==5.1.1
+entrypoints==0.4
+fonttools==4.38.0
+gitdb==4.0.10
+GitPython==3.1.31
+greenlet==2.0.2
+idna==3.4
+importlib-metadata==6.0.0
+importlib-resources==5.12.0
+Jinja2==3.1.2
+jsonschema==4.17.3
+kiwisolver==1.4.4
+markdown-it-py==2.1.0
+MarkupSafe==2.1.2
+matplotlib==3.7.0
+mdurl==0.1.2
+numpy==1.24.2
+packaging==23.0
+pandas==1.5.3
+Pillow==9.4.0
+pkgutil_resolve_name==1.3.10
+protobuf==3.20.3
+pyarrow==11.0.0
+pydeck==0.8.0
+Pygments==2.14.0
+Pympler==1.0.1
+pyparsing==3.0.9
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+pytz==2022.7.1
+pytz-deprecation-shim==0.1.0.post0
+requests==2.28.2
+rich==13.3.1
+semver==2.13.0
+six==1.16.0
+smmap==5.0.0
+SQLAlchemy==2.0.4
+streamlit==1.18.1
+toml==0.10.2
+toolz==0.12.0
+tornado==6.2
+typing_extensions==4.5.0
+tzdata==2022.7
+tzlocal==4.2
+urllib3==1.26.14
+validators==0.20.0
+zipp==3.14.0