Added module for export to gephi

pull/5/head
Lasse Edfast 5 years ago
parent fbde804f06
commit 54657ef3d7
  1. 181
      facebook/gephi.py

@ -0,0 +1,181 @@
import locale
import re
from datetime import datetime
import networkx as nx
import pandas as pd
from numpy.core.numeric import NaN
locale.setlocale(locale.LC_TIME, "en_US")
from arangodb import db
def nodes_from_list(
nodes, collection="members", return_fields="{'_key': doc._key, 'name': doc.name}"
):
aql_edges = f"""
FOR doc IN @@ecollection
FILTER doc._id IN @nodes
RETURN {return_fields}
"""
cursor = db.aql.execute(
aql_edges, bind_vars={"@ecollection": collection, "nodes": nodes}
)
return [doc for doc in cursor]
def edges_from_nodes(
nodes, edge_collections=["picture_reactions"], simple=True, mode="or"
):
"""
Returnerar en df med relationer för valda noder och relationtabeller.
Args:
nodes (list): Noder som ska ingå i relationerna
edge_collections (list, optional): Relationtabeller att hämta relationer från. Defaults to ['messages'].
simple (bool, optional): Simple ger bara _from, _to och _key. Defaults to True.
Returns:
pd.DataFrame: DataFrame.
"""
if simple:
return_fields = (
"{'_to': doc._to, '_from': doc._from, '_id':doc._id, '_key':doc._key}"
)
else:
return_fields = "doc"
edges = []
for collection in edge_collections:
aql_edges = f"""
FOR doc IN @@edge_collection
FILTER doc._from IN @nodes {mode} doc._to IN @nodes
RETURN {return_fields}
"""
cursor = db.aql.execute(
aql_edges,
bind_vars={
"@edge_collection": collection,
"nodes": nodes,
},
)
edges = edges + [doc for doc in cursor]
return edges
def convert_date(date):
try:
new_date = datetime.strptime(date, "%d %b %Y")
except ValueError:
try:
new_date = datetime.strptime(date, "%d %B %Y")
except ValueError:
try:
new_date = datetime.strptime(date, "%b %d, %Y")
except ValueError:
try:
new_date = datetime.strptime(date, "%B %d, %Y")
except ValueError:
try:
new_date = datetime.strptime(date + " 2021", "%d %b %Y")
except ValueError:
return ""
return new_date.strftime("%Y-%d-%d")
# return f'{new_date.date().year}-{new_date.date().month}-{new_date.date().day}'
def export_network(member, n=2):
""" Exporterar en gexf-fil med noder utifrån en medlem. """
member = f"members/{member}"
member_friends = set()
# Hämta relationer kopplade till member från databasen
for edge in edges_from_nodes([member]):
member_friends.add(edge["_from"])
member_friends.add(edge["_to"])
edges = edges_from_nodes(list(member_friends))
# Skapa en dict där det syns vem som har interagerat med hur många
d = {}
for i in edges:
_to = i["_to"]
_from = i["_from"]
if _to not in d:
d[_to] = set([i["_from"]])
else:
d[_to] = d[_to] | set([i["_from"]])
if _from not in d:
d[_from] = set([i["_to"]])
else:
d[_from] = d[_from] | set([i["_to"]])
# Sålla ut så bara medlemmar som reagerat med [n] två av grundanvändarens vänner kommer med
member_friends = set(member_friends)
members = []
for key, value in d.items():
if len(value & member_friends) >= n:
members.append(key)
# Skapa df med edges
edges = pd.DataFrame(
edges_from_nodes(members, mode="and", simple=False),
columns=["_key", "_to", "_from", "reaction", "picture"],
)
edges.set_index("_key", inplace=True)
# En lista på användare att ta med till nätverket
members = list(set(edges["_from"].unique()) | set(edges["_to"].unique()))
# Skapa noder till nätverket
nodes = nodes_from_list(
members
) # , return_fields="{'id':doc._key, 'label':doc.name")
nodes = [(i["_key"], i) for i in nodes]
# Lägg till några kolumner i edges-tabellen
edges._from = edges._from.apply(lambda x: x[8:])
edges._to = edges._to.apply(lambda x: x[8:])
edges.picture = edges.picture.apply(
lambda x: re.search("\d+", x).group()
) # Rensa bort url-info i de fall bilden har fått fel id
# Hämta bilder för att kunna lägga datum till edges
p = ["pictures/" + i for i in edges.picture.unique().tolist()]
d = {}
pictures = nodes_from_list(
p, collection="pictures", return_fields="{'id': doc._key, 'date':doc.date}"
)
for picture in pictures:
d[picture["id"]] = convert_date(picture["date"])
edges["date"] = edges.picture.apply(lambda x: d[x])
# Skapa graf utifrån relationer
G = nx.from_pandas_edgelist(
edges,
source="_from",
target="_to",
edge_attr=["reaction", "date", "now"],
create_using=nx.MultiDiGraph,
)
# Lägg till noderna i grafen
G.add_nodes_from(nodes)
# Exportera till filer
nx.write_gexf(
G,
f"data/network_test.gexf",
)
if __name__ == "__main__":
export_network("maria.hansson.botin")
# export_network(input('Member: '))
Loading…
Cancel
Save