Now works with example file

This commit is contained in:
Lasse Studion 2023-05-09 08:08:14 +02:00
parent 82a240f7ab
commit 6c88e12121

228
app.py
View File

@ -34,6 +34,26 @@ def add_nodes(G, df):
G.add_nodes_from(nodes)
return G
def find_columns(column, columns):
if column in columns:
selected = column
else:
columns.append('')
selected = ''
# Let user select target.
selected = st.selectbox(
label = f"Which one is the {column} column?",
options=columns,
format_func=lambda x: 'Select an option' if x == '' else x,
index=columns.index(selected),
key=column
)
return selected
# Set oage config and CSS.
st.set_page_config(page_title='CSV→GEXF', page_icon='🎭')
st.markdown(info.css, unsafe_allow_html=True)
@ -46,12 +66,14 @@ st.markdown(
with Gephi and [Gephi Light](https://gephi.org/gephi-lite/).*"""
)
# Print explainer.
expl = st.expander(label="More info")
with expl:
st.write(info.explainer)
try:
# Print explainer.
expl = st.expander(label="More info")
with expl:
st.write(info.explainer)
with st.form("files"):
# Ask for nodes file.
csv_nodes = st.file_uploader(
label="Upload file with **nodes** (if you have one).", key="nodes", help=f'[Example]({info.node_example})'
@ -60,102 +82,108 @@ with st.form("files"):
# Ask for relations file.
csv_edges = st.file_uploader(label="Upload file with **relations**.", key="relations", help=f'[Example]({info.relations_example})')
sep = st.radio('Separator in your files:', options=['comma ( , )', 'semicolon ( ; )', 'tab ( \u21E5 )'], help='What are the values in your files separated with?')
col1, col2 = st.columns([1,2])
files_uploaded = st.form_submit_button("Done")
# Chose separator
with col1:
# Set standard separator.
st.session_state["sep"] = ','
if files_uploaded:
separators = {'comma ( , )': ',', 'semicolon ( ; )': ';', 'tab ( \u21E5 )': '\t'}
if 'sep' not in st.session_state:
st.session_state["sep"] = separators[sep]
if csv_edges == None:
st.markdown(':red[You need to upload a file with relations.]')
st.stop()
df = pd.read_csv(csv_edges, sep=st.session_state["sep"])
df.rename({'type': 'relation_type'}, inplace=True, axis=1) # 'type' can't be used as attribute.
df.columns = [i.lower() for i in df.columns] # Remove capital letters from column names.
columns = df.columns.tolist()
# Find and store target column.
if "target" not in st.session_state:
if "target" in columns:
preselected_target = "target"
else:
columns.append("")
preselected_target = len(columns) - 1
st.session_state["target"] = st.selectbox(
label="Which one is the target column?",
options=columns,
index=columns.index(preselected_target),
)
# Find and store source column.
if "source" not in st.session_state:
if "source" in columns:
preselected_source = "source"
else:
columns.append("")
preselected_source = len(columns) - 1
st.session_state["source"] = st.selectbox(
label="Which one is the source column?",
options=columns,
index=columns.index(preselected_source),
)
# Remove source and target columns from list of options.
columns.remove(st.session_state["target"])
columns.remove(st.session_state["source"])
if all([st.session_state["source"] != "", st.session_state["target"] != ""]):
source = st.session_state["source"]
target = st.session_state["target"]
chosen_columns = st.multiselect(
label="Chose other columns to include.", options=columns, default=columns
)
if csv_nodes != None: # When a nodes file is uploaded.
df_nodes = pd.read_csv(csv_nodes, sep=st.session_state["sep"])
df_nodes.columns = [i.lower() for i in df_nodes.columns] # Remove capital letters from column names.
columns = df_nodes.columns.tolist()
if "label" in columns:
preselected_label = "label"
else:
columns.append("")
preselected_label = len(columns) - 1
label_column = st.selectbox(
label="Which one is the label column in the nodes file?",
options=columns,
index=columns.index(preselected_label),
# Ask for separator.
sep = st.radio(
'Separator in your files:',
options=['comma ( , )', 'semicolon ( ; )', 'tab ( \u21E5 )', 'pipe (|)', 'space ( )'],
help='What are the values in your files separated with?'
)
df_nodes.set_index(label_column, inplace=True)
else: # If no node file provided.
nodes = list(set(df[source].tolist() + df[target].tolist()))
df_nodes = pd.DataFrame(
nodes, index=range(0, len(nodes)), columns=["labels"]
)
df_nodes.set_index("labels", inplace=True)
# Make empty graph.
G = nx.MultiDiGraph()
# Add nodes.
G = add_nodes(G, df_nodes)
# Add edges.
G = add_edges(
G, df, source=source, target=target, chosen_columns=chosen_columns
)
# Turn the graph into a string.
graph_text = "\n".join([line for line in nx.generate_gexf(G)])
# Download gexf-file.
gexf_file = "output.gexf"
st.download_button(
"Download gexf-file", graph_text, file_name=gexf_file
)
separators = {'comma ( , )': ',', 'semicolon ( ; )': ';', 'tab ( \u21E5 )': '\t', 'pipe (|)': '|', 'space ( )': ' '}
st.session_state["sep"] = separators[sep]
# Preview file
with col2:
preview = st.button('Preview file.')
if preview:
st.dataframe(pd.read_csv(csv_edges, sep=st.session_state["sep"]))
files_uploaded = st.button('Done', 'files_uploaded')
print(st.session_state['files_uploaded'])
if files_uploaded or 'files_already_uploaded' in st.session_state:
st.session_state['files_already_uploaded'] = True
if csv_edges == None:
st.markdown(':red[You need to upload a file with relations.]')
st.stop()
print('SEP', st.session_state["sep"])
try:
df = pd.read_csv(csv_edges, sep=st.session_state["sep"])
except pd.errors.EmptyDataError:
st.markdown(':red[Have you chosen the right kind of separator?]')
st.stop()
df.rename({'type': 'relation_type'}, inplace=True, axis=1) # 'type' can't be used as attribute.
df.columns = [i.lower() for i in df.columns] # Remove capital letters from column names.
with st.form('select_columns'):
# Find and store target column.
target = find_columns('target', df.columns.tolist())
# Find and store source column.
source = find_columns('source', df.columns.tolist())
# Confirm choise.
columns_selected = st.form_submit_button("Done")
if columns_selected:
# Remove source and target columns from list of options.
columns = df.columns.tolist()
columns.remove(st.session_state["target"])
columns.remove(st.session_state["source"])
if all([st.session_state["source"] != "", st.session_state["target"] != ""]):
source = st.session_state["source"]
target = st.session_state["target"]
# Let the user chose what columns that should be included.
chosen_columns = st.multiselect(
label="Chose other columns to include.", options=columns, default=columns
)
if csv_nodes != None: # When a nodes file is uploaded.
df_nodes = pd.read_csv(csv_nodes, sep=st.session_state["sep"])
df_nodes.columns = [i.lower() for i in df_nodes.columns] # Remove capital letters from column names.
label_column = find_columns('label', df_nodes.columns.tolist())
df_nodes.set_index(label_column, inplace=True)
else: # If no node file provided.
nodes = list(set(df[source].tolist() + df[target].tolist()))
df_nodes = pd.DataFrame(
nodes, index=range(0, len(nodes)), columns=["labels"]
)
df_nodes.set_index("labels", inplace=True)
# Make empty graph.
G = nx.MultiDiGraph()
# Add nodes.
G = add_nodes(G, df_nodes)
# Add edges.
G = add_edges(
G, df, source=source, target=target, chosen_columns=chosen_columns
)
# Turn the graph into a string.
graph_text = "\n".join([line for line in nx.generate_gexf(G)])
# Download gexf-file.
gexf_file = "output.gexf"
st.download_button(
"Download gexf-file", graph_text, file_name=gexf_file
)
st.write('Import the file to Gephi/Gephi Light, or try [Gephisto](https://jacomyma.github.io/gephisto/) to get an idea of the network.')
except:
st.markdown(':red[Something went wrong, please try again or [write to me](https://twitter.com/lasseedfast).]')