From aed352bd47aa14da8e526bd4d639e240a6b7922f Mon Sep 17 00:00:00 2001 From: Lasse Studion Date: Wed, 3 May 2023 19:39:16 +0200 Subject: [PATCH] first commit --- .gitignore | 5 ++ app.py | 146 +++++++++++++++++++++++++++++++++++++++++++++++ info.py | 20 +++++++ requirements.txt | 50 ++++++++++++++++ 4 files changed, 221 insertions(+) create mode 100644 .gitignore create mode 100644 app.py create mode 100644 info.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5102592 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +* +!app.py +!info.py +!.gitignore +!requirements.txt \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..526a9e7 --- /dev/null +++ b/app.py @@ -0,0 +1,146 @@ +import networkx as nx +import pandas as pd +import streamlit as st + +import info + +def add_edges(G, df, source, target, chosen_columns): + # Iterate over each row in the DataFrame and add an edge to the graph. + attrs = {} + for key, row in df.iterrows(): + # Add edge with key. + G.add_edge(row[source], row[target], key) + + # Set attributes for edge. + d_attrs = {} + for column in chosen_columns: + try: + d_attrs[column] = int(row[column]) + except ValueError: + d_attrs[column] = row[column] + attrs[(row[source], row[target], key)] = d_attrs + + + # Add the attributes to the edges. + nx.set_edge_attributes(G, attrs) + + return G + + +def add_nodes(G, df): + """Add nodes to the graph.""" + d = df.to_dict(orient="index") + nodes = [(k, v) for k, v in d.items()] + G.add_nodes_from(nodes) + return G + +# Set CSS. +st.markdown(info.css, unsafe_allow_html=True) + +# Print title. +st.title("Make :green[GEXF] from :red[CSV]") + +# Print tagline. +st.markdown( + """*Upload your data as CSV to make it into a gexf-file compatible + with Gephi and [Gephi Light](https://gephi.org/gephi-lite/).*""" +) + +# Print explainer. +expl = st.expander(label="More info") +with expl: + st.write(info.explainer) + +# Ask for nodes file. +csv_nodes = st.file_uploader( + label="Upload file with **nodes** (if you have one).", key="nodes", help=f'[Example]({info.node_example})' +) + +# Ask for relations file. +csv_edges = st.file_uploader(label="Upload file with **relations**.", key="relations", help=f'[Example]({info.relations_example})') + +if csv_edges is not None: + df = pd.read_csv(csv_edges) + df.rename({'type': 'relation_type'}, inplace=True, axis=1) # 'type' can't be used as attribute. + df.columns = [i.lower() for i in df.columns] # Remove capital letters from column names. + columns = df.columns.tolist() + + # Find and store target column. + if "target" not in st.session_state: + if "target" in columns: + preselected_target = "target" + else: + columns.append("") + preselected_target = len(columns) - 1 + + st.session_state["target"] = st.selectbox( + label="Which one is the target column?", + options=columns, + index=columns.index(preselected_target), + ) + + # Find and store source column. + if "source" not in st.session_state: + if "source" in columns: + preselected_source = "source" + else: + columns.append("") + preselected_source = len(columns) - 1 + st.session_state["source"] = st.selectbox( + label="Which one is the source column?", + options=columns, + index=columns.index(preselected_source), + ) + + # Remove source and target columns from list of options. + columns.remove(st.session_state["target"]) + columns.remove(st.session_state["source"]) + + if all([st.session_state["source"] != "", st.session_state["target"] != ""]): + source = st.session_state["source"] + target = st.session_state["target"] + chosen_columns = st.multiselect( + label="Chose other columns to include.", options=columns, default=columns + ) + + if csv_nodes != None: # When a nodes file is uploaded. + df_nodes = pd.read_csv(csv_nodes, sep=";") + df_nodes.columns = [i.lower() for i in df_nodes.columns] # Remove capital letters from column names. + columns = df_nodes.columns.tolist() + if "label" in columns: + preselected_label = "label" + else: + columns.append("") + preselected_label = len(columns) - 1 + label_column = st.selectbox( + label="Which one is the label column in the nodes file?", + options=columns, + index=columns.index(preselected_label), + ) + df_nodes.set_index(label_column, inplace=True) + + else: # If no node file provided. + nodes = list(set(df[source].tolist() + df[target].tolist())) + df_nodes = pd.DataFrame( + nodes, index=range(0, len(nodes)), columns=["labels"] + ) + df_nodes.set_index("labels", inplace=True) + + gexf_file = "output.gexf" + with open(gexf_file) as f: + # Make empty graph. + G = nx.MultiDiGraph() + # Add nodes. + G = add_nodes(G, df_nodes) + # Add edges. + G = add_edges( + G, df, source=source, target=target, chosen_columns=chosen_columns + ) + + # Turn the graph into text. + graph_text = "\n".join([line for line in nx.generate_gexf(G)]) + + # Download gexf-file. + st.download_button( + "Download gexf-file", graph_text, file_name=gexf_file + ) diff --git a/info.py b/info.py new file mode 100644 index 0000000..59546c8 --- /dev/null +++ b/info.py @@ -0,0 +1,20 @@ +# URL to gist files. +node_example = 'https://gist.githubusercontent.com/lasseedfast/1d43f23da417d64c34d304048ff58b70/raw/64b68b32318b18cfb69a509f9285f6fd0cfc4af0/nodes_example.csv' +relations_example = 'https://gist.githubusercontent.com/lasseedfast/c8548d13202d434e6801c3eed70d0586/raw/71da6a4d5a2129c5a55b8a656d53d7dddebb7ae4/relations_example.csv' + +# Make links grey/black. +css = """ +""" + +explainer = f""" + If you upload only a file with relations the nodes will be created from that one and will only have a label. + You can also upload a separate file with nodes and more information about them. If so make sure the nodes correlate + to the source/target column in the relations file. + If you use *source*, *target* and *label* in your files the process here is almost automatic (see examples). + Examples for [relation file]({relations_example}) and [node file]({node_example}). + *No data is stored.* Made by [Lasse Edfast](https://lasseedfast.se). + """ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..60a9d01 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,50 @@ +altair==4.2.2 +attrs==23.1.0 +backports.zoneinfo==0.2.1 +blinker==1.6.2 +cachetools==5.3.0 +certifi==2022.12.7 +charset-normalizer==3.1.0 +click==8.1.3 +decorator==5.1.1 +entrypoints==0.4 +gitdb==4.0.10 +GitPython==3.1.31 +idna==3.4 +importlib-metadata==6.6.0 +importlib-resources==5.12.0 +Jinja2==3.1.2 +jsonschema==4.17.3 +markdown-it-py==2.2.0 +MarkupSafe==2.1.2 +mdurl==0.1.2 +networkx==3.1 +numpy==1.24.3 +packaging==23.1 +pandas==2.0.1 +Pillow==9.5.0 +pkgutil_resolve_name==1.3.10 +protobuf==3.20.3 +pyarrow==11.0.0 +pydeck==0.8.1b0 +Pygments==2.15.1 +Pympler==1.0.1 +pyrsistent==0.19.3 +python-dateutil==2.8.2 +pytz==2023.3 +pytz-deprecation-shim==0.1.0.post0 +requests==2.29.0 +rich==13.3.5 +six==1.16.0 +smmap==5.0.0 +streamlit==1.22.0 +tenacity==8.2.2 +toml==0.10.2 +toolz==0.12.0 +tornado==6.3.1 +typing_extensions==4.5.0 +tzdata==2023.3 +tzlocal==4.3 +urllib3==1.26.15 +validators==0.20.0 +zipp==3.15.0