import networkx as nx import pandas as pd import streamlit as st import info def add_edges(G, df, source, target, chosen_columns): # Iterate over each row in the DataFrame and add an edge to the graph. attrs = {} for key, row in df.iterrows(): # Add edge with key. G.add_edge(row[source], row[target], key) # Set attributes for edge. d_attrs = {} for column in chosen_columns: try: d_attrs[column] = int(row[column]) except ValueError: d_attrs[column] = row[column] attrs[(row[source], row[target], key)] = d_attrs # Add the attributes to the edges. nx.set_edge_attributes(G, attrs) return G def add_nodes(G, df): """Add nodes to the graph.""" d = df.to_dict(orient="index") nodes = [(k, v) for k, v in d.items()] G.add_nodes_from(nodes) return G def find_columns(column, columns): if column in columns: selected = column else: columns.append('') selected = '' # Let user select target. selected = st.selectbox( label = f"Which one is the {column} column?", options=columns, format_func=lambda x: 'Select an option' if x == '' else x, index=columns.index(selected), key=column ) return selected # Set oage config and CSS. st.set_page_config(page_title='CSV→Gephi', page_icon='🎭') st.markdown(info.css, unsafe_allow_html=True) # Print title. st.title("Make :green[Gephi] from :red[CSV]") # Print tagline. st.markdown( """*Upload your data as CSV to make it into a GraphML-file compatible with Gephi and [Gephi Light](https://gephi.org/gephi-lite/).*""" ) #try: # Print explainer. expl = st.expander(label="More info") with expl: st.write(info.explainer) # Ask for nodes file. csv_nodes = st.file_uploader( label="Upload file with **nodes** (if you have one).", key="nodes", help=f'[Example]({info.node_example})' ) # Ask for relations file. csv_edges = st.file_uploader(label="Upload file with **edges/relations**.", key="relations", help=f'[Example]({info.relations_example})') col1, col2 = st.columns([1,2]) # Chose separator with col1: # Set standard separator. st.session_state["sep"] = ',' # Ask for separator. separators = {'comma ( , )': ',', 'semicolon ( ; )': ';', 'tab ( \u21E5 )': '\t', 'pipe (|)': '|', 'space ( )': ' ', '':''} sep = st.radio( 'Separator in your files:', options=['comma ( , )', 'semicolon ( ; )', 'tab ( \u21E5 )', 'pipe (|)', 'space ( )', 'custom'], help='What are the values in your files separated with?' ) if sep == 'custom': sep = st.text_input('Custom delimiter:') separators[sep] = sep st.session_state["sep"] = separators[sep] # Preview file with col2: preview = st.button('Preview file.') if preview: try: st.dataframe(pd.read_csv(csv_edges, sep=st.session_state["sep"]), use_container_width=True) except pd.errors.ParserError: st.markdown(':red[Have you selected a correct separator?]') files_uploaded = st.button('Done', 'files_uploaded') if files_uploaded or 'files_already_uploaded' in st.session_state: st.session_state['files_already_uploaded'] = True if csv_edges == None: st.markdown(':red[You need to upload a file with relations.]') st.stop() try: df = pd.read_csv(csv_edges, sep=st.session_state["sep"]) except pd.errors.EmptyDataError: st.markdown(':red[Have you chosen the right kind of separator?]') st.stop() df.rename({'type': 'relation_type'}, inplace=True, axis=1) # 'type' can't be used as attribute. df.columns = [i.lower() for i in df.columns] # Remove capital letters from column names. # Find and store target column. target = find_columns('target', df.columns.tolist()) # Find and store source column. source = find_columns('source', df.columns.tolist()) # Remove source and target columns from list of options. columns = df.columns.tolist() columns.remove(st.session_state["target"]) columns.remove(st.session_state["source"]) if all([st.session_state["source"] != "", st.session_state["target"] != ""]): source = st.session_state["source"] target = st.session_state["target"] # Let the user chose what columns that should be included. chosen_columns = st.multiselect( label="Chose other columns to include.", options=columns, default=columns ) if csv_nodes != None: # When a nodes file is uploaded. df_nodes = pd.read_csv(csv_nodes, sep=st.session_state["sep"]) df_nodes.columns = [i.lower() for i in df_nodes.columns] # Remove capital letters from column names. st.session_state['label_column'] = find_columns('label', df_nodes.columns.tolist()) if st.session_state['label_column'] != '': df_nodes.set_index(st.session_state['label_column'], inplace=True) else: # If no node file provided. nodes = list(set(df[source].tolist() + df[target].tolist())) df_nodes = pd.DataFrame( nodes, index=range(0, len(nodes)), columns=["labels"] ) st.session_state['label_column'] = 'labels' if st.session_state['label_column'] != '' and df_nodes.index.name != st.session_state['label_column']: df_nodes.set_index(st.session_state['label_column'], inplace=True) # Make empty graph. G = nx.MultiDiGraph() # Add nodes. G = add_nodes(G, df_nodes) # Add edges. G = add_edges( G, df, source=source, target=target, chosen_columns=chosen_columns ) # Turn the graph into a string. graph_text = "\n".join([line for line in nx.generate_graphml(G)]) # Download graphml-file. graphml_file = "output.graphml" st.download_button( "Download grampml-file", graph_text, file_name=graphml_file ) st.write('Import the file to Gephi/Gephi Light, or try [Gephisto](https://jacomyma.github.io/gephisto/) to get an idea of the network.') # except: # st.markdown(':red[Something went wrong, please try again or [write to me](https://twitter.com/lasseedfast).]')