first commit

3 years ago · aed352bd47
commit aed352bd47
4 changed files with 221 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+*
+!app.py
+!info.py
+!.gitignore
+!requirements.txt
--- a/app.py
+++ b/app.py
@ -0,0 +1,146 @@
+import networkx as nx
+import pandas as pd
+import streamlit as st
+
+import info
+
+def add_edges(G, df, source, target, chosen_columns):
+    # Iterate over each row in the DataFrame and add an edge to the graph.
+    attrs = {}
+    for key, row in df.iterrows():
+        # Add edge with key.
+        G.add_edge(row[source], row[target], key)
+
+        # Set attributes for edge.
+        d_attrs = {}
+        for column in chosen_columns:
+            try:
+                d_attrs[column] = int(row[column])
+            except ValueError:
+                d_attrs[column] = row[column]
+        attrs[(row[source], row[target], key)] = d_attrs
+
+
+    # Add the attributes to the edges.
+    nx.set_edge_attributes(G, attrs)
+
+    return G
+
+
+def add_nodes(G, df):
+    """Add nodes to the graph."""
+    d = df.to_dict(orient="index")
+    nodes = [(k, v) for k, v in d.items()]
+    G.add_nodes_from(nodes)
+    return G
+
+# Set CSS.
+st.markdown(info.css, unsafe_allow_html=True)
+
+# Print title.
+st.title("Make :green[GEXF] from :red[CSV]")
+
+# Print tagline.
+st.markdown(
+    """*Upload your data as CSV to make it into a gexf-file compatible 
+    with Gephi and [Gephi Light](https://gephi.org/gephi-lite/).*"""
+)
+
+# Print explainer.
+expl = st.expander(label="More info")
+with expl:
+    st.write(info.explainer)
+
+# Ask for nodes file.
+csv_nodes = st.file_uploader(
+    label="Upload file with **nodes** (if you have one).", key="nodes", help=f'[Example]({info.node_example})'
+)
+
+# Ask for relations file.
+csv_edges = st.file_uploader(label="Upload file with **relations**.", key="relations", help=f'[Example]({info.relations_example})')
+
+if csv_edges is not None:
+    df = pd.read_csv(csv_edges)
+    df.rename({'type': 'relation_type'}, inplace=True, axis=1) # 'type' can't be used as attribute.
+    df.columns = [i.lower() for i in df.columns] # Remove capital letters from column names.
+    columns = df.columns.tolist()
+
+    # Find and store target column.
+    if "target" not in st.session_state:
+        if "target" in columns:
+            preselected_target = "target"
+        else:
+            columns.append("")
+            preselected_target = len(columns) - 1
+
+        st.session_state["target"] = st.selectbox(
+            label="Which one is the target column?",
+            options=columns,
+            index=columns.index(preselected_target),
+        )
+
+    # Find and store source column.
+    if "source" not in st.session_state:
+        if "source" in columns:
+            preselected_source = "source"
+        else:
+            columns.append("")
+            preselected_source = len(columns) - 1
+        st.session_state["source"] = st.selectbox(
+            label="Which one is the source column?",
+            options=columns,
+            index=columns.index(preselected_source),
+        )
+
+    # Remove source and target columns from list of options.
+    columns.remove(st.session_state["target"])
+    columns.remove(st.session_state["source"])
+
+    if all([st.session_state["source"] != "", st.session_state["target"] != ""]):
+        source = st.session_state["source"]
+        target = st.session_state["target"]
+        chosen_columns = st.multiselect(
+            label="Chose other columns to include.", options=columns, default=columns
+        )
+
+        if csv_nodes != None: # When a nodes file is uploaded.
+            df_nodes = pd.read_csv(csv_nodes, sep=";")
+            df_nodes.columns = [i.lower() for i in df_nodes.columns] # Remove capital letters from column names.
+            columns = df_nodes.columns.tolist()
+            if "label" in columns:
+                preselected_label = "label"
+            else:
+                columns.append("")
+                preselected_label = len(columns) - 1
+            label_column = st.selectbox(
+                label="Which one is the label column in the nodes file?",
+                options=columns,
+                index=columns.index(preselected_label),
+            )
+            df_nodes.set_index(label_column, inplace=True)
+
+        else: # If no node file provided.
+            nodes = list(set(df[source].tolist() + df[target].tolist()))
+            df_nodes = pd.DataFrame(
+                nodes, index=range(0, len(nodes)), columns=["labels"]
+            )
+            df_nodes.set_index("labels", inplace=True)
+
+        gexf_file = "output.gexf"
+        with open(gexf_file) as f:
+            # Make empty graph.
+            G = nx.MultiDiGraph()
+            # Add nodes.
+            G = add_nodes(G, df_nodes)
+            # Add edges.
+            G = add_edges(
+                G, df, source=source, target=target, chosen_columns=chosen_columns
+            )
+
+            # Turn the graph into text.
+            graph_text = "\n".join([line for line in nx.generate_gexf(G)])
+            
+            # Download gexf-file.
+            st.download_button(
+                "Download gexf-file", graph_text, file_name=gexf_file
+            )
--- a/info.py
+++ b/info.py
@ -0,0 +1,20 @@
+# URL to gist files.
+node_example = 'https://gist.githubusercontent.com/lasseedfast/1d43f23da417d64c34d304048ff58b70/raw/64b68b32318b18cfb69a509f9285f6fd0cfc4af0/nodes_example.csv'
+relations_example = 'https://gist.githubusercontent.com/lasseedfast/c8548d13202d434e6801c3eed70d0586/raw/71da6a4d5a2129c5a55b8a656d53d7dddebb7ae4/relations_example.csv'
+
+# Make links grey/black.
+css = """<style>
+a:link {color: black;}
+a:visited {color: black;}
+a:hover {color: grey;}
+</style>
+"""
+
+explainer = f"""
+    If you upload only a file with relations the nodes will be created from that one and will only have a label.  
+    You can also upload a separate file with nodes and more information about them. If so make sure the nodes correlate 
+    to the source/target column in the relations file.  
+    If you use *source*, *target* and *label* in your files the process here is almost automatic (see examples).  
+    Examples for [relation file]({relations_example}) and [node file]({node_example}).  
+    *No data is stored.* Made by [Lasse Edfast](https://lasseedfast.se). 
+    """
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,50 @@
+altair==4.2.2
+attrs==23.1.0
+backports.zoneinfo==0.2.1
+blinker==1.6.2
+cachetools==5.3.0
+certifi==2022.12.7
+charset-normalizer==3.1.0
+click==8.1.3
+decorator==5.1.1
+entrypoints==0.4
+gitdb==4.0.10
+GitPython==3.1.31
+idna==3.4
+importlib-metadata==6.6.0
+importlib-resources==5.12.0
+Jinja2==3.1.2
+jsonschema==4.17.3
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+mdurl==0.1.2
+networkx==3.1
+numpy==1.24.3
+packaging==23.1
+pandas==2.0.1
+Pillow==9.5.0
+pkgutil_resolve_name==1.3.10
+protobuf==3.20.3
+pyarrow==11.0.0
+pydeck==0.8.1b0
+Pygments==2.15.1
+Pympler==1.0.1
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+pytz==2023.3
+pytz-deprecation-shim==0.1.0.post0
+requests==2.29.0
+rich==13.3.5
+six==1.16.0
+smmap==5.0.0
+streamlit==1.22.0
+tenacity==8.2.2
+toml==0.10.2
+toolz==0.12.0
+tornado==6.3.1
+typing_extensions==4.5.0
+tzdata==2023.3
+tzlocal==4.3
+urllib3==1.26.15
+validators==0.20.0
+zipp==3.15.0