parent
fbde804f06
commit
54657ef3d7
1 changed files with 181 additions and 0 deletions
@ -0,0 +1,181 @@ |
||||
import locale |
||||
import re |
||||
from datetime import datetime |
||||
|
||||
import networkx as nx |
||||
import pandas as pd |
||||
from numpy.core.numeric import NaN |
||||
|
||||
locale.setlocale(locale.LC_TIME, "en_US") |
||||
|
||||
from arangodb import db |
||||
|
||||
|
||||
def nodes_from_list( |
||||
nodes, collection="members", return_fields="{'_key': doc._key, 'name': doc.name}" |
||||
): |
||||
aql_edges = f""" |
||||
FOR doc IN @@ecollection |
||||
FILTER doc._id IN @nodes |
||||
RETURN {return_fields} |
||||
""" |
||||
cursor = db.aql.execute( |
||||
aql_edges, bind_vars={"@ecollection": collection, "nodes": nodes} |
||||
) |
||||
return [doc for doc in cursor] |
||||
|
||||
|
||||
def edges_from_nodes( |
||||
nodes, edge_collections=["picture_reactions"], simple=True, mode="or" |
||||
): |
||||
""" |
||||
Returnerar en df med relationer för valda noder och relationtabeller. |
||||
|
||||
Args: |
||||
nodes (list): Noder som ska ingå i relationerna |
||||
edge_collections (list, optional): Relationtabeller att hämta relationer från. Defaults to ['messages']. |
||||
simple (bool, optional): Simple ger bara _from, _to och _key. Defaults to True. |
||||
|
||||
Returns: |
||||
pd.DataFrame: DataFrame. |
||||
""" |
||||
|
||||
if simple: |
||||
return_fields = ( |
||||
"{'_to': doc._to, '_from': doc._from, '_id':doc._id, '_key':doc._key}" |
||||
) |
||||
else: |
||||
return_fields = "doc" |
||||
|
||||
edges = [] |
||||
|
||||
for collection in edge_collections: |
||||
aql_edges = f""" |
||||
FOR doc IN @@edge_collection |
||||
FILTER doc._from IN @nodes {mode} doc._to IN @nodes |
||||
RETURN {return_fields} |
||||
""" |
||||
cursor = db.aql.execute( |
||||
aql_edges, |
||||
bind_vars={ |
||||
"@edge_collection": collection, |
||||
"nodes": nodes, |
||||
}, |
||||
) |
||||
|
||||
edges = edges + [doc for doc in cursor] |
||||
|
||||
return edges |
||||
|
||||
|
||||
def convert_date(date): |
||||
try: |
||||
new_date = datetime.strptime(date, "%d %b %Y") |
||||
except ValueError: |
||||
try: |
||||
new_date = datetime.strptime(date, "%d %B %Y") |
||||
except ValueError: |
||||
try: |
||||
new_date = datetime.strptime(date, "%b %d, %Y") |
||||
except ValueError: |
||||
try: |
||||
new_date = datetime.strptime(date, "%B %d, %Y") |
||||
except ValueError: |
||||
try: |
||||
new_date = datetime.strptime(date + " 2021", "%d %b %Y") |
||||
except ValueError: |
||||
return "" |
||||
return new_date.strftime("%Y-%d-%d") |
||||
# return f'{new_date.date().year}-{new_date.date().month}-{new_date.date().day}' |
||||
|
||||
|
||||
def export_network(member, n=2): |
||||
""" Exporterar en gexf-fil med noder utifrån en medlem. """ |
||||
|
||||
member = f"members/{member}" |
||||
member_friends = set() |
||||
|
||||
# Hämta relationer kopplade till member från databasen |
||||
for edge in edges_from_nodes([member]): |
||||
member_friends.add(edge["_from"]) |
||||
member_friends.add(edge["_to"]) |
||||
edges = edges_from_nodes(list(member_friends)) |
||||
|
||||
# Skapa en dict där det syns vem som har interagerat med hur många |
||||
d = {} |
||||
for i in edges: |
||||
_to = i["_to"] |
||||
_from = i["_from"] |
||||
if _to not in d: |
||||
d[_to] = set([i["_from"]]) |
||||
else: |
||||
d[_to] = d[_to] | set([i["_from"]]) |
||||
|
||||
if _from not in d: |
||||
d[_from] = set([i["_to"]]) |
||||
else: |
||||
d[_from] = d[_from] | set([i["_to"]]) |
||||
|
||||
# Sålla ut så bara medlemmar som reagerat med [n] två av grundanvändarens vänner kommer med |
||||
member_friends = set(member_friends) |
||||
members = [] |
||||
for key, value in d.items(): |
||||
if len(value & member_friends) >= n: |
||||
members.append(key) |
||||
|
||||
# Skapa df med edges |
||||
edges = pd.DataFrame( |
||||
edges_from_nodes(members, mode="and", simple=False), |
||||
columns=["_key", "_to", "_from", "reaction", "picture"], |
||||
) |
||||
edges.set_index("_key", inplace=True) |
||||
|
||||
# En lista på användare att ta med till nätverket |
||||
members = list(set(edges["_from"].unique()) | set(edges["_to"].unique())) |
||||
|
||||
# Skapa noder till nätverket |
||||
nodes = nodes_from_list( |
||||
members |
||||
) # , return_fields="{'id':doc._key, 'label':doc.name") |
||||
nodes = [(i["_key"], i) for i in nodes] |
||||
|
||||
# Lägg till några kolumner i edges-tabellen |
||||
edges._from = edges._from.apply(lambda x: x[8:]) |
||||
edges._to = edges._to.apply(lambda x: x[8:]) |
||||
edges.picture = edges.picture.apply( |
||||
lambda x: re.search("\d+", x).group() |
||||
) # Rensa bort url-info i de fall bilden har fått fel id |
||||
|
||||
# Hämta bilder för att kunna lägga datum till edges |
||||
p = ["pictures/" + i for i in edges.picture.unique().tolist()] |
||||
d = {} |
||||
pictures = nodes_from_list( |
||||
p, collection="pictures", return_fields="{'id': doc._key, 'date':doc.date}" |
||||
) |
||||
for picture in pictures: |
||||
d[picture["id"]] = convert_date(picture["date"]) |
||||
|
||||
edges["date"] = edges.picture.apply(lambda x: d[x]) |
||||
|
||||
# Skapa graf utifrån relationer |
||||
G = nx.from_pandas_edgelist( |
||||
edges, |
||||
source="_from", |
||||
target="_to", |
||||
edge_attr=["reaction", "date", "now"], |
||||
create_using=nx.MultiDiGraph, |
||||
) |
||||
|
||||
# Lägg till noderna i grafen |
||||
G.add_nodes_from(nodes) |
||||
|
||||
# Exportera till filer |
||||
nx.write_gexf( |
||||
G, |
||||
f"data/network_test.gexf", |
||||
) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
export_network("maria.hansson.botin") |
||||
# export_network(input('Member: ')) |
||||
Loading…
Reference in new issue