first commit

3 years ago · 9fa9572987
commit 9fa9572987
4 changed files with 401 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
 *
 !app.py
 !requirements.txt
 !create_credentials.py
 !.gitignore
--- a/app.py
+++ b/app.py
@ -0,0 +1,263 @@
 import streamlit as st
 import pandas as pd
 from sqlalchemy import create_engine
 import streamlit_authenticator as stauth
 import yaml
 from yaml.loader import SafeLoader
 class Params():
    def __init__(self, d):
        for key in d:
            setattr(self, key, d[key])
        for attr in ['q', 'category']:
            if attr not in d:
                setattr(self, attr, '')
        self.set_params()
    def set_params(self):
        st.experimental_set_query_params(q=self.q, category=self.category)
    def update(self, param, value):
        setattr(self, param, value)
        self.set_params()
 def reset_q():
    st.experimental_set_query_params(q='')
 def download(df):
    st.download_button(
    "CSV",
    df.to_csv(index=False, sep=';').encode('utf-8'),
    "file.csv",
    "text/csv",
    key='download-csv')
 def define_search_terms(user_input):
    """ Takes user input and make them into search terms for SQL.
    Args:
        user_input (str): The string resulting from user input (input()).
    Returns:
        list: List of search terms.
    """    
    # Search for quated phrases.
    search_terms = []
    while '"' in user_input:
        q1 = user_input.find('"')
        q2 = user_input.find('"', q1 + 1)
        quoted_term = user_input[q1 + 1 : q2]
        search_terms.append(quoted_term.lower())
        user_input = user_input.replace(f'"{quoted_term}"', "")
    while "  " in user_input:
        user_input = user_input.replace(
            "  ", " "
        ).strip()  # Remove double and trailing blanks.
    # Add non-quoted terms.
    if len(user_input) > 0:
        search_terms += [i.lower() for i in user_input.strip().split(" ")]
    return search_terms
 def create_sql_query(search_terms, table):
    """Returns a valid sql query."""
    word_list = []
    select_columns = 'body, "to" as m2, "from" as m1, senddate_str'
    return_limit = 1000
    for word in search_terms:
        if "*" not in word: #Searching for the exact word.
            word_list.append(f" {word} ")
        else:
            if word[0] == "*" and word[-1] == "*":
                word_list.append(word.replace("*", ""))
            elif word[0] == "*":
                word_list.append(f"{word.replace('*', '')} ")
            elif word[-1] == "*":
                word_list.append(f" {word.replace('*', '')}")
    # Format for SQL.
    search_list = [f"'%%{i}%%'" for i in word_list]
    n = 0
    for i in search_list:
        if " or " in i:
            search_list[n] = "OR"
        n += 1
    # Handle searches with OR.
    or_terms = []
    while "OR" in search_list:
        n_or = search_list.count("OR")
        or_terms.append(search_list.pop(search_list.index("OR") - 1))
        if n_or == 1:
            or_terms.append(search_list.pop(search_list.index("OR") + 1))
        search_list.remove("OR")
    or_sql = f"( body_lower LIKE {' OR body_lower LIKE '.join(or_terms)})"
    # Handle searches with -.
    not_terms = []
    for term in search_list:
        if "-" in term:  # TODO Make this not include words with hyphen.
            not_terms.append(search_list.pop(search_list.index(term)).replace("-", ""))
    # Create SQL query.
    search_sql = ''
    if search_list != []:
        search_sql = f'(body_lower LIKE {" AND body_lower LIKE ".join(search_list)}) '
    if or_terms != []:
        if search_sql == '':
            search_sql = or_sql
        else:
            search_sql = search_sql + " AND " + or_sql
    if len(not_terms) > 0:
        search_sql += (
            f' AND (body_lower NOT LIKE {" AND body_lower NOT LIKE ".join(not_terms)})'
        )
    sql = f"SELECT {select_columns} FROM {table} WHERE {search_sql} LIMIT {return_limit}"
    return sql
 def search_messages(search_for, engine, user=False):
    if user: # Search for all messages from/to a single user.
        select_columns = 'body, "to" as m2, "from" as m1, senddate_str'
        sql = f'select {select_columns} from messages where "to" == "{search_for}" or "from" == "{search_for}"'
    else: # Search for keywords.
        sql = create_sql_query(define_search_terms(search_for), 'messages')
    # Get data from db.
    df = pd.read_sql(sql, engine)
    download(df)
    st.write(f'Träffar: {len(df)}')
    talkers = list(set(df['m1'].tolist() + df['m2'].tolist()))
    if 'admin' in talkers: # Remove admin from conversations.
        talkers.remove('admin')
    conversations = {}
    for talker in talkers:
        df_ = df.query(f'm1=="{talker}" | m2=="{talker}" ').copy()
        others = list(set(df_['m1'].tolist() + df_['m2'].tolist()))
        others.remove(talker)
        for other in others:
            parts_list = [str(talker), str(other)]
            parts_list.sort()
            parts = '_'.join(parts_list)
            if parts not in conversations:
                df_ = df_.query(f'm1=="{other}" | m2=="{other}" ').copy()
                df_.sort_values('senddate_str', inplace=True)
                conversations[parts] = {'df': df_[['m1', 'm2', 'body', 'senddate_str']], 'parts': parts_list}
    for _, data in conversations.items():
        st.write(' - '.join(data['parts']))
        df = data['df']
        df.rename({'m1': 'från', 'm2':'till', 'body':'meddelande', 'senddate_str':'datum'}, inplace=True, axis=1)
        st.dataframe(df, hide_index=True)
 def search_user(search_for, engine):
    search_for = search_for.lower()
    select = 'member_id, email, username'
    if '@' in search_for:
        search_columns = 'email_lower'
    else:
        search_columns = 'username_lower'
    df = pd.read_sql(f'select {select} from members_expanded where {search_columns} == "{search_for}"', engine)
    st.dataframe(df, use_container_width=True)
    params = st.experimental_get_query_params()
    # if 'user' in params:
    if df.shape[0] == 1:
        search_messages(df.username[0], engine, user=True)
 def main():
    engine = create_engine('sqlite:///db.sqlite')
    #global params
    params = Params(st.experimental_get_query_params())
    categories = ['Meddelanden', 'Användare']
    if params.category != '':
        index_category = categories.index(params.category[0])
    else:
        index_category = 0
    search_category = st.selectbox('Vad vill du söka efter?', categories, index_category, on_change=reset_q)
    params.update('category', search_category)
    if params.q != '':
        placeholder=params.q[0]
    else:
        placeholder = 'Skriv här'
    search_for = st.text_input('Vad vill du söka efter?', placeholder=placeholder)
    if search_for == '' and params.q != '':
        search_for = params.q[0]
    if search_for != '':
        params.update('q', search_for)
        search_for.replace('å', '?').replace('ä', '?').replace('ö', '?') 
        #* Sök meddelanden
        if search_category == 'Meddelanden':
            search_messages(search_for, engine)
                # m1, m2, body, senddate_str = st.columns([1, 1, 5, 1])
                # for row in data['df'].iterrows():
                #     row = row[1]
                #     with m1:
                #         row['m1']
                #     with m2:
                #         row['m2']
                #     with body:
                #         row['body']
                #     with senddate_str:
                #         row['senddate_str']
        #* Sök användare
        elif search_category == 'Användare':
            search_user(search_for, engine)
 #main()
 with open('credentials.yaml') as file:
    config = yaml.load(file, Loader=SafeLoader)
 authenticator = stauth.Authenticate(
    config['credentials'],
    config['cookie']['name'],
    config['cookie']['key'],
    config['cookie']['expiry_days'],
    config['preauthorized']
 )
 name, authentication_status, username = authenticator.login('Login', 'main')
 if authentication_status:
    main()
 elif authentication_status is False:
    st.error('Username/password is incorrect')
 elif authentication_status is None:
    st.warning('Please enter your username and password')
--- a/create_credentials.py
+++ b/create_credentials.py
@ -0,0 +1,76 @@
 import streamlit_authenticator as stauth
 from sys import argv
 import yaml
 def load_credentials():
    with open('credentials.yaml','r') as f:
        return yaml.safe_load(f)
 def credentials(username, name, pwd):
    pwd = stauth.Hasher([pwd]).generate()[0]
    if username in list(load_credentials()['credentials']['usernames'].keys()):
        print('Användarnamnet finns redan.')
        return 'Användarnamnet finns redan.', False
    credentials = {
        'text': f'''
    {username}:
        email: '',
        name: {name}
        password: '***'
 ''',
    'data': {username:
        {'email': '',
        'name': name,
        'password': pwd}
        } 
    }
    for k, v in credentials.items():
        print(k, v)
    return credentials, True
 def update(c, print_out=True, update=False):
    if print_out:
        print(c['text'])
    if not update:
        if input('Update credentials file? (y/n)') in ['y', 'yes']:
            update = True
    if update:
        try:
            cur_yaml = load_credentials()
            cur_yaml['credentials']['usernames'].update(c['data'])
            if cur_yaml:
                with open('credentials.yaml','w') as f:
                    yaml.safe_dump(cur_yaml, f)
        except FileNotFoundError:
            print('Found no yaml file')
    return c['text'], False
 if __name__ == '__main__':
    if len(argv) == 2:
        if argv[1] == 'help':
            print('username, name, pwd')
            exit()
        pwd = argv[1]
        print(stauth.Hasher([pwd]).generate()[0])
    elif len(argv) == 4:
        username = argv[1]
        name = argv[2]
        pwd = argv[3]
        c = credentials(username, name, pwd)
        update(c)
    else:
        pwd = input('Password: ').strip()
        username = input('Username: ')
        if username != '':
            name = input('Name: ')
            c = credentials(username, name, pwd)
            update(c[0])
        else:
            print(stauth.Hasher([pwd]).generate()[0])
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,57 @@
 altair==5.0.1
 attrs==23.1.0
 backports.zoneinfo==0.2.1
 bcrypt==4.0.1
 blinker==1.6.2
 cachetools==5.3.1
 certifi==2023.5.7
 charset-normalizer==3.2.0
 click==8.1.5
 decorator==5.1.1
 extra-streamlit-components==0.1.56
 gitdb==4.0.10
 GitPython==3.1.32
 greenlet==2.0.2
 idna==3.4
 importlib-metadata==6.8.0
 importlib-resources==6.0.0
 Jinja2==3.1.2
 jsonschema==4.18.4
 jsonschema-specifications==2023.6.1
 markdown-it-py==3.0.0
 MarkupSafe==2.1.3
 mdurl==0.1.2
 numpy==1.24.4
 packaging==23.1
 pandas==2.0.3
 Pillow==9.5.0
 pkgutil_resolve_name==1.3.10
 protobuf==4.23.4
 pyarrow==12.0.1
 pydeck==0.8.1b0
 Pygments==2.15.1
 PyJWT==2.7.0
 Pympler==1.0.1
 python-dateutil==2.8.2
 pytz==2023.3
 pytz-deprecation-shim==0.1.0.post0
 PyYAML==6.0.1
 referencing==0.29.3
 requests==2.31.0
 rich==13.4.2
 rpds-py==0.9.2
 six==1.16.0
 smmap==5.0.0
 SQLAlchemy==2.0.19
 streamlit==1.24.1
 streamlit-authenticator==0.2.2
 tenacity==8.2.2
 toml==0.10.2
 toolz==0.12.0
 tornado==6.3.2
 typing_extensions==4.7.1
 tzdata==2023.3
 tzlocal==4.3.1
 urllib3==2.0.3
 validators==0.20.0
 zipp==3.16.2