commit
aed352bd47
4 changed files with 221 additions and 0 deletions
@ -0,0 +1,5 @@ |
|||||||
|
* |
||||||
|
!app.py |
||||||
|
!info.py |
||||||
|
!.gitignore |
||||||
|
!requirements.txt |
||||||
@ -0,0 +1,146 @@ |
|||||||
|
import networkx as nx |
||||||
|
import pandas as pd |
||||||
|
import streamlit as st |
||||||
|
|
||||||
|
import info |
||||||
|
|
||||||
|
def add_edges(G, df, source, target, chosen_columns): |
||||||
|
# Iterate over each row in the DataFrame and add an edge to the graph. |
||||||
|
attrs = {} |
||||||
|
for key, row in df.iterrows(): |
||||||
|
# Add edge with key. |
||||||
|
G.add_edge(row[source], row[target], key) |
||||||
|
|
||||||
|
# Set attributes for edge. |
||||||
|
d_attrs = {} |
||||||
|
for column in chosen_columns: |
||||||
|
try: |
||||||
|
d_attrs[column] = int(row[column]) |
||||||
|
except ValueError: |
||||||
|
d_attrs[column] = row[column] |
||||||
|
attrs[(row[source], row[target], key)] = d_attrs |
||||||
|
|
||||||
|
|
||||||
|
# Add the attributes to the edges. |
||||||
|
nx.set_edge_attributes(G, attrs) |
||||||
|
|
||||||
|
return G |
||||||
|
|
||||||
|
|
||||||
|
def add_nodes(G, df): |
||||||
|
"""Add nodes to the graph.""" |
||||||
|
d = df.to_dict(orient="index") |
||||||
|
nodes = [(k, v) for k, v in d.items()] |
||||||
|
G.add_nodes_from(nodes) |
||||||
|
return G |
||||||
|
|
||||||
|
# Set CSS. |
||||||
|
st.markdown(info.css, unsafe_allow_html=True) |
||||||
|
|
||||||
|
# Print title. |
||||||
|
st.title("Make :green[GEXF] from :red[CSV]") |
||||||
|
|
||||||
|
# Print tagline. |
||||||
|
st.markdown( |
||||||
|
"""*Upload your data as CSV to make it into a gexf-file compatible |
||||||
|
with Gephi and [Gephi Light](https://gephi.org/gephi-lite/).*""" |
||||||
|
) |
||||||
|
|
||||||
|
# Print explainer. |
||||||
|
expl = st.expander(label="More info") |
||||||
|
with expl: |
||||||
|
st.write(info.explainer) |
||||||
|
|
||||||
|
# Ask for nodes file. |
||||||
|
csv_nodes = st.file_uploader( |
||||||
|
label="Upload file with **nodes** (if you have one).", key="nodes", help=f'[Example]({info.node_example})' |
||||||
|
) |
||||||
|
|
||||||
|
# Ask for relations file. |
||||||
|
csv_edges = st.file_uploader(label="Upload file with **relations**.", key="relations", help=f'[Example]({info.relations_example})') |
||||||
|
|
||||||
|
if csv_edges is not None: |
||||||
|
df = pd.read_csv(csv_edges) |
||||||
|
df.rename({'type': 'relation_type'}, inplace=True, axis=1) # 'type' can't be used as attribute. |
||||||
|
df.columns = [i.lower() for i in df.columns] # Remove capital letters from column names. |
||||||
|
columns = df.columns.tolist() |
||||||
|
|
||||||
|
# Find and store target column. |
||||||
|
if "target" not in st.session_state: |
||||||
|
if "target" in columns: |
||||||
|
preselected_target = "target" |
||||||
|
else: |
||||||
|
columns.append("") |
||||||
|
preselected_target = len(columns) - 1 |
||||||
|
|
||||||
|
st.session_state["target"] = st.selectbox( |
||||||
|
label="Which one is the target column?", |
||||||
|
options=columns, |
||||||
|
index=columns.index(preselected_target), |
||||||
|
) |
||||||
|
|
||||||
|
# Find and store source column. |
||||||
|
if "source" not in st.session_state: |
||||||
|
if "source" in columns: |
||||||
|
preselected_source = "source" |
||||||
|
else: |
||||||
|
columns.append("") |
||||||
|
preselected_source = len(columns) - 1 |
||||||
|
st.session_state["source"] = st.selectbox( |
||||||
|
label="Which one is the source column?", |
||||||
|
options=columns, |
||||||
|
index=columns.index(preselected_source), |
||||||
|
) |
||||||
|
|
||||||
|
# Remove source and target columns from list of options. |
||||||
|
columns.remove(st.session_state["target"]) |
||||||
|
columns.remove(st.session_state["source"]) |
||||||
|
|
||||||
|
if all([st.session_state["source"] != "", st.session_state["target"] != ""]): |
||||||
|
source = st.session_state["source"] |
||||||
|
target = st.session_state["target"] |
||||||
|
chosen_columns = st.multiselect( |
||||||
|
label="Chose other columns to include.", options=columns, default=columns |
||||||
|
) |
||||||
|
|
||||||
|
if csv_nodes != None: # When a nodes file is uploaded. |
||||||
|
df_nodes = pd.read_csv(csv_nodes, sep=";") |
||||||
|
df_nodes.columns = [i.lower() for i in df_nodes.columns] # Remove capital letters from column names. |
||||||
|
columns = df_nodes.columns.tolist() |
||||||
|
if "label" in columns: |
||||||
|
preselected_label = "label" |
||||||
|
else: |
||||||
|
columns.append("") |
||||||
|
preselected_label = len(columns) - 1 |
||||||
|
label_column = st.selectbox( |
||||||
|
label="Which one is the label column in the nodes file?", |
||||||
|
options=columns, |
||||||
|
index=columns.index(preselected_label), |
||||||
|
) |
||||||
|
df_nodes.set_index(label_column, inplace=True) |
||||||
|
|
||||||
|
else: # If no node file provided. |
||||||
|
nodes = list(set(df[source].tolist() + df[target].tolist())) |
||||||
|
df_nodes = pd.DataFrame( |
||||||
|
nodes, index=range(0, len(nodes)), columns=["labels"] |
||||||
|
) |
||||||
|
df_nodes.set_index("labels", inplace=True) |
||||||
|
|
||||||
|
gexf_file = "output.gexf" |
||||||
|
with open(gexf_file) as f: |
||||||
|
# Make empty graph. |
||||||
|
G = nx.MultiDiGraph() |
||||||
|
# Add nodes. |
||||||
|
G = add_nodes(G, df_nodes) |
||||||
|
# Add edges. |
||||||
|
G = add_edges( |
||||||
|
G, df, source=source, target=target, chosen_columns=chosen_columns |
||||||
|
) |
||||||
|
|
||||||
|
# Turn the graph into text. |
||||||
|
graph_text = "\n".join([line for line in nx.generate_gexf(G)]) |
||||||
|
|
||||||
|
# Download gexf-file. |
||||||
|
st.download_button( |
||||||
|
"Download gexf-file", graph_text, file_name=gexf_file |
||||||
|
) |
||||||
@ -0,0 +1,20 @@ |
|||||||
|
# URL to gist files. |
||||||
|
node_example = 'https://gist.githubusercontent.com/lasseedfast/1d43f23da417d64c34d304048ff58b70/raw/64b68b32318b18cfb69a509f9285f6fd0cfc4af0/nodes_example.csv' |
||||||
|
relations_example = 'https://gist.githubusercontent.com/lasseedfast/c8548d13202d434e6801c3eed70d0586/raw/71da6a4d5a2129c5a55b8a656d53d7dddebb7ae4/relations_example.csv' |
||||||
|
|
||||||
|
# Make links grey/black. |
||||||
|
css = """<style> |
||||||
|
a:link {color: black;} |
||||||
|
a:visited {color: black;} |
||||||
|
a:hover {color: grey;} |
||||||
|
</style> |
||||||
|
""" |
||||||
|
|
||||||
|
explainer = f""" |
||||||
|
If you upload only a file with relations the nodes will be created from that one and will only have a label. |
||||||
|
You can also upload a separate file with nodes and more information about them. If so make sure the nodes correlate |
||||||
|
to the source/target column in the relations file. |
||||||
|
If you use *source*, *target* and *label* in your files the process here is almost automatic (see examples). |
||||||
|
Examples for [relation file]({relations_example}) and [node file]({node_example}). |
||||||
|
*No data is stored.* Made by [Lasse Edfast](https://lasseedfast.se). |
||||||
|
""" |
||||||
@ -0,0 +1,50 @@ |
|||||||
|
altair==4.2.2 |
||||||
|
attrs==23.1.0 |
||||||
|
backports.zoneinfo==0.2.1 |
||||||
|
blinker==1.6.2 |
||||||
|
cachetools==5.3.0 |
||||||
|
certifi==2022.12.7 |
||||||
|
charset-normalizer==3.1.0 |
||||||
|
click==8.1.3 |
||||||
|
decorator==5.1.1 |
||||||
|
entrypoints==0.4 |
||||||
|
gitdb==4.0.10 |
||||||
|
GitPython==3.1.31 |
||||||
|
idna==3.4 |
||||||
|
importlib-metadata==6.6.0 |
||||||
|
importlib-resources==5.12.0 |
||||||
|
Jinja2==3.1.2 |
||||||
|
jsonschema==4.17.3 |
||||||
|
markdown-it-py==2.2.0 |
||||||
|
MarkupSafe==2.1.2 |
||||||
|
mdurl==0.1.2 |
||||||
|
networkx==3.1 |
||||||
|
numpy==1.24.3 |
||||||
|
packaging==23.1 |
||||||
|
pandas==2.0.1 |
||||||
|
Pillow==9.5.0 |
||||||
|
pkgutil_resolve_name==1.3.10 |
||||||
|
protobuf==3.20.3 |
||||||
|
pyarrow==11.0.0 |
||||||
|
pydeck==0.8.1b0 |
||||||
|
Pygments==2.15.1 |
||||||
|
Pympler==1.0.1 |
||||||
|
pyrsistent==0.19.3 |
||||||
|
python-dateutil==2.8.2 |
||||||
|
pytz==2023.3 |
||||||
|
pytz-deprecation-shim==0.1.0.post0 |
||||||
|
requests==2.29.0 |
||||||
|
rich==13.3.5 |
||||||
|
six==1.16.0 |
||||||
|
smmap==5.0.0 |
||||||
|
streamlit==1.22.0 |
||||||
|
tenacity==8.2.2 |
||||||
|
toml==0.10.2 |
||||||
|
toolz==0.12.0 |
||||||
|
tornado==6.3.1 |
||||||
|
typing_extensions==4.5.0 |
||||||
|
tzdata==2023.3 |
||||||
|
tzlocal==4.3 |
||||||
|
urllib3==1.26.15 |
||||||
|
validators==0.20.0 |
||||||
|
zipp==3.15.0 |
||||||
Loading…
Reference in new issue