first commit
This commit is contained in:
commit
aed352bd47
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
*
|
||||
!app.py
|
||||
!info.py
|
||||
!.gitignore
|
||||
!requirements.txt
|
146
app.py
Normal file
146
app.py
Normal file
@ -0,0 +1,146 @@
|
||||
import networkx as nx
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
|
||||
import info
|
||||
|
||||
def add_edges(G, df, source, target, chosen_columns):
|
||||
# Iterate over each row in the DataFrame and add an edge to the graph.
|
||||
attrs = {}
|
||||
for key, row in df.iterrows():
|
||||
# Add edge with key.
|
||||
G.add_edge(row[source], row[target], key)
|
||||
|
||||
# Set attributes for edge.
|
||||
d_attrs = {}
|
||||
for column in chosen_columns:
|
||||
try:
|
||||
d_attrs[column] = int(row[column])
|
||||
except ValueError:
|
||||
d_attrs[column] = row[column]
|
||||
attrs[(row[source], row[target], key)] = d_attrs
|
||||
|
||||
|
||||
# Add the attributes to the edges.
|
||||
nx.set_edge_attributes(G, attrs)
|
||||
|
||||
return G
|
||||
|
||||
|
||||
def add_nodes(G, df):
|
||||
"""Add nodes to the graph."""
|
||||
d = df.to_dict(orient="index")
|
||||
nodes = [(k, v) for k, v in d.items()]
|
||||
G.add_nodes_from(nodes)
|
||||
return G
|
||||
|
||||
# Set CSS.
|
||||
st.markdown(info.css, unsafe_allow_html=True)
|
||||
|
||||
# Print title.
|
||||
st.title("Make :green[GEXF] from :red[CSV]")
|
||||
|
||||
# Print tagline.
|
||||
st.markdown(
|
||||
"""*Upload your data as CSV to make it into a gexf-file compatible
|
||||
with Gephi and [Gephi Light](https://gephi.org/gephi-lite/).*"""
|
||||
)
|
||||
|
||||
# Print explainer.
|
||||
expl = st.expander(label="More info")
|
||||
with expl:
|
||||
st.write(info.explainer)
|
||||
|
||||
# Ask for nodes file.
|
||||
csv_nodes = st.file_uploader(
|
||||
label="Upload file with **nodes** (if you have one).", key="nodes", help=f'[Example]({info.node_example})'
|
||||
)
|
||||
|
||||
# Ask for relations file.
|
||||
csv_edges = st.file_uploader(label="Upload file with **relations**.", key="relations", help=f'[Example]({info.relations_example})')
|
||||
|
||||
if csv_edges is not None:
|
||||
df = pd.read_csv(csv_edges)
|
||||
df.rename({'type': 'relation_type'}, inplace=True, axis=1) # 'type' can't be used as attribute.
|
||||
df.columns = [i.lower() for i in df.columns] # Remove capital letters from column names.
|
||||
columns = df.columns.tolist()
|
||||
|
||||
# Find and store target column.
|
||||
if "target" not in st.session_state:
|
||||
if "target" in columns:
|
||||
preselected_target = "target"
|
||||
else:
|
||||
columns.append("")
|
||||
preselected_target = len(columns) - 1
|
||||
|
||||
st.session_state["target"] = st.selectbox(
|
||||
label="Which one is the target column?",
|
||||
options=columns,
|
||||
index=columns.index(preselected_target),
|
||||
)
|
||||
|
||||
# Find and store source column.
|
||||
if "source" not in st.session_state:
|
||||
if "source" in columns:
|
||||
preselected_source = "source"
|
||||
else:
|
||||
columns.append("")
|
||||
preselected_source = len(columns) - 1
|
||||
st.session_state["source"] = st.selectbox(
|
||||
label="Which one is the source column?",
|
||||
options=columns,
|
||||
index=columns.index(preselected_source),
|
||||
)
|
||||
|
||||
# Remove source and target columns from list of options.
|
||||
columns.remove(st.session_state["target"])
|
||||
columns.remove(st.session_state["source"])
|
||||
|
||||
if all([st.session_state["source"] != "", st.session_state["target"] != ""]):
|
||||
source = st.session_state["source"]
|
||||
target = st.session_state["target"]
|
||||
chosen_columns = st.multiselect(
|
||||
label="Chose other columns to include.", options=columns, default=columns
|
||||
)
|
||||
|
||||
if csv_nodes != None: # When a nodes file is uploaded.
|
||||
df_nodes = pd.read_csv(csv_nodes, sep=";")
|
||||
df_nodes.columns = [i.lower() for i in df_nodes.columns] # Remove capital letters from column names.
|
||||
columns = df_nodes.columns.tolist()
|
||||
if "label" in columns:
|
||||
preselected_label = "label"
|
||||
else:
|
||||
columns.append("")
|
||||
preselected_label = len(columns) - 1
|
||||
label_column = st.selectbox(
|
||||
label="Which one is the label column in the nodes file?",
|
||||
options=columns,
|
||||
index=columns.index(preselected_label),
|
||||
)
|
||||
df_nodes.set_index(label_column, inplace=True)
|
||||
|
||||
else: # If no node file provided.
|
||||
nodes = list(set(df[source].tolist() + df[target].tolist()))
|
||||
df_nodes = pd.DataFrame(
|
||||
nodes, index=range(0, len(nodes)), columns=["labels"]
|
||||
)
|
||||
df_nodes.set_index("labels", inplace=True)
|
||||
|
||||
gexf_file = "output.gexf"
|
||||
with open(gexf_file) as f:
|
||||
# Make empty graph.
|
||||
G = nx.MultiDiGraph()
|
||||
# Add nodes.
|
||||
G = add_nodes(G, df_nodes)
|
||||
# Add edges.
|
||||
G = add_edges(
|
||||
G, df, source=source, target=target, chosen_columns=chosen_columns
|
||||
)
|
||||
|
||||
# Turn the graph into text.
|
||||
graph_text = "\n".join([line for line in nx.generate_gexf(G)])
|
||||
|
||||
# Download gexf-file.
|
||||
st.download_button(
|
||||
"Download gexf-file", graph_text, file_name=gexf_file
|
||||
)
|
20
info.py
Normal file
20
info.py
Normal file
@ -0,0 +1,20 @@
|
||||
# URL to gist files.
|
||||
node_example = 'https://gist.githubusercontent.com/lasseedfast/1d43f23da417d64c34d304048ff58b70/raw/64b68b32318b18cfb69a509f9285f6fd0cfc4af0/nodes_example.csv'
|
||||
relations_example = 'https://gist.githubusercontent.com/lasseedfast/c8548d13202d434e6801c3eed70d0586/raw/71da6a4d5a2129c5a55b8a656d53d7dddebb7ae4/relations_example.csv'
|
||||
|
||||
# Make links grey/black.
|
||||
css = """<style>
|
||||
a:link {color: black;}
|
||||
a:visited {color: black;}
|
||||
a:hover {color: grey;}
|
||||
</style>
|
||||
"""
|
||||
|
||||
explainer = f"""
|
||||
If you upload only a file with relations the nodes will be created from that one and will only have a label.
|
||||
You can also upload a separate file with nodes and more information about them. If so make sure the nodes correlate
|
||||
to the source/target column in the relations file.
|
||||
If you use *source*, *target* and *label* in your files the process here is almost automatic (see examples).
|
||||
Examples for [relation file]({relations_example}) and [node file]({node_example}).
|
||||
*No data is stored.* Made by [Lasse Edfast](https://lasseedfast.se).
|
||||
"""
|
50
requirements.txt
Normal file
50
requirements.txt
Normal file
@ -0,0 +1,50 @@
|
||||
altair==4.2.2
|
||||
attrs==23.1.0
|
||||
backports.zoneinfo==0.2.1
|
||||
blinker==1.6.2
|
||||
cachetools==5.3.0
|
||||
certifi==2022.12.7
|
||||
charset-normalizer==3.1.0
|
||||
click==8.1.3
|
||||
decorator==5.1.1
|
||||
entrypoints==0.4
|
||||
gitdb==4.0.10
|
||||
GitPython==3.1.31
|
||||
idna==3.4
|
||||
importlib-metadata==6.6.0
|
||||
importlib-resources==5.12.0
|
||||
Jinja2==3.1.2
|
||||
jsonschema==4.17.3
|
||||
markdown-it-py==2.2.0
|
||||
MarkupSafe==2.1.2
|
||||
mdurl==0.1.2
|
||||
networkx==3.1
|
||||
numpy==1.24.3
|
||||
packaging==23.1
|
||||
pandas==2.0.1
|
||||
Pillow==9.5.0
|
||||
pkgutil_resolve_name==1.3.10
|
||||
protobuf==3.20.3
|
||||
pyarrow==11.0.0
|
||||
pydeck==0.8.1b0
|
||||
Pygments==2.15.1
|
||||
Pympler==1.0.1
|
||||
pyrsistent==0.19.3
|
||||
python-dateutil==2.8.2
|
||||
pytz==2023.3
|
||||
pytz-deprecation-shim==0.1.0.post0
|
||||
requests==2.29.0
|
||||
rich==13.3.5
|
||||
six==1.16.0
|
||||
smmap==5.0.0
|
||||
streamlit==1.22.0
|
||||
tenacity==8.2.2
|
||||
toml==0.10.2
|
||||
toolz==0.12.0
|
||||
tornado==6.3.1
|
||||
typing_extensions==4.5.0
|
||||
tzdata==2023.3
|
||||
tzlocal==4.3
|
||||
urllib3==1.26.15
|
||||
validators==0.20.0
|
||||
zipp==3.15.0
|
Loading…
x
Reference in New Issue
Block a user