csv2gexf/app.py

import networkx as nx
import pandas as pd
import streamlit as st

import info

def add_edges(G, df, source, target, chosen_columns):
    # Iterate over each row in the DataFrame and add an edge to the graph.
    attrs = {}
    for key, row in df.iterrows():
        # Add edge with key.
        G.add_edge(row[source], row[target], key)

        # Set attributes for edge.
        d_attrs = {}
        for column in chosen_columns:
            try:
                d_attrs[column] = int(row[column])
            except ValueError:
                d_attrs[column] = row[column]
        attrs[(row[source], row[target], key)] = d_attrs


    # Add the attributes to the edges.
    nx.set_edge_attributes(G, attrs)

    return G


def add_nodes(G, df):
    """Add nodes to the graph."""
    d = df.to_dict(orient="index")
    nodes = [(k, v) for k, v in d.items()]
    G.add_nodes_from(nodes)
    return G

# Set CSS.
st.markdown(info.css, unsafe_allow_html=True)

# Print title.
st.title("Make :green[GEXF] from :red[CSV]")

# Print tagline.
st.markdown(
    """*Upload your data as CSV to make it into a gexf-file compatible
    with Gephi and [Gephi Light](https://gephi.org/gephi-lite/).*"""
)

# Print explainer.
expl = st.expander(label="More info")
with expl:
    st.write(info.explainer)

# Ask for nodes file.
csv_nodes = st.file_uploader(
    label="Upload file with **nodes** (if you have one).", key="nodes", help=f'[Example]({info.node_example})'
)

# Ask for relations file.
csv_edges = st.file_uploader(label="Upload file with **relations**.", key="relations", help=f'[Example]({info.relations_example})')

if csv_edges is not None:
    df = pd.read_csv(csv_edges)
    df.rename({'type': 'relation_type'}, inplace=True, axis=1) # 'type' can't be used as attribute.
    df.columns = [i.lower() for i in df.columns] # Remove capital letters from column names.
    columns = df.columns.tolist()

    # Find and store target column.
    if "target" not in st.session_state:
        if "target" in columns:
            preselected_target = "target"
        else:
            columns.append("")
            preselected_target = len(columns) - 1

        st.session_state["target"] = st.selectbox(
            label="Which one is the target column?",
            options=columns,
            index=columns.index(preselected_target),
        )

    # Find and store source column.
    if "source" not in st.session_state:
        if "source" in columns:
            preselected_source = "source"
        else:
            columns.append("")
            preselected_source = len(columns) - 1
        st.session_state["source"] = st.selectbox(
            label="Which one is the source column?",
            options=columns,
            index=columns.index(preselected_source),
        )

    # Remove source and target columns from list of options.
    columns.remove(st.session_state["target"])
    columns.remove(st.session_state["source"])

    if all([st.session_state["source"] != "", st.session_state["target"] != ""]):
        source = st.session_state["source"]
        target = st.session_state["target"]
        chosen_columns = st.multiselect(
            label="Chose other columns to include.", options=columns, default=columns
        )

        if csv_nodes != None: # When a nodes file is uploaded.
            df_nodes = pd.read_csv(csv_nodes, sep=";")
            df_nodes.columns = [i.lower() for i in df_nodes.columns] # Remove capital letters from column names.
            columns = df_nodes.columns.tolist()
            if "label" in columns:
                preselected_label = "label"
            else:
                columns.append("")
                preselected_label = len(columns) - 1
            label_column = st.selectbox(
                label="Which one is the label column in the nodes file?",
                options=columns,
                index=columns.index(preselected_label),
            )
            df_nodes.set_index(label_column, inplace=True)

        else: # If no node file provided.
            nodes = list(set(df[source].tolist() + df[target].tolist()))
            df_nodes = pd.DataFrame(
                nodes, index=range(0, len(nodes)), columns=["labels"]
            )
            df_nodes.set_index("labels", inplace=True)

        gexf_file = "output.gexf"
        with open(gexf_file) as f:
            # Make empty graph.
            G = nx.MultiDiGraph()
            # Add nodes.
            G = add_nodes(G, df_nodes)
            # Add edges.
            G = add_edges(
                G, df, source=source, target=target, chosen_columns=chosen_columns
            )

            # Turn the graph into text.
            graph_text = "\n".join([line for line in nx.generate_gexf(G)])

            # Download gexf-file.
            st.download_button(
                "Download gexf-file", graph_text, file_name=gexf_file
            )