You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

185 lines
5.3 KiB

import locale
import re
from datetime import datetime
import networkx as nx
import pandas as pd
from numpy.core.numeric import NaN
locale.setlocale(locale.LC_TIME, "en_US")
from arangodb import db
def nodes_from_list(
nodes, collection="members", return_fields="{'_key': doc._key, 'name': doc.name}"
):
aql_edges = f"""
FOR doc IN @@ecollection
FILTER doc._id IN @nodes
RETURN {return_fields}
"""
cursor = db.aql.execute(
aql_edges, bind_vars={"@ecollection": collection, "nodes": nodes}
)
return [doc for doc in cursor]
def edges_from_nodes(
nodes, edge_collections=["picture_reactions"], simple=True, mode="or"
):
"""
Returnerar en df med relationer för valda noder och relationtabeller.
Args:
nodes (list): Noder som ska ingå i relationerna
edge_collections (list, optional): Relationtabeller att hämta relationer från. Defaults to ['messages'].
simple (bool, optional): Simple ger bara _from, _to och _key. Defaults to True.
Returns:
pd.DataFrame: DataFrame.
"""
if simple:
return_fields = (
"{'_to': doc._to, '_from': doc._from, '_id':doc._id, '_key':doc._key}"
)
else:
return_fields = "doc"
edges = []
for collection in edge_collections:
aql = f"""
FOR doc IN @@edge_collection
FILTER doc._from IN @nodes {mode} doc._to IN @nodes
RETURN {return_fields}
"""
cursor = db.aql.execute(
aql,
bind_vars={
"@edge_collection": collection,
"nodes": nodes,
},
)
edges = edges + [doc for doc in cursor]
return edges
def convert_date(date):
try:
new_date = datetime.strptime(date, "%d %b %Y")
except ValueError:
try:
new_date = datetime.strptime(date, "%d %B %Y")
except ValueError:
try:
new_date = datetime.strptime(date, "%b %d, %Y")
except ValueError:
try:
new_date = datetime.strptime(date, "%B %d, %Y")
except ValueError:
try:
new_date = datetime.strptime(date + " 2021", "%d %b %Y")
except ValueError:
return ""
return new_date.strftime("%Y-%d-%d")
# return f'{new_date.date().year}-{new_date.date().month}-{new_date.date().day}'
def export_network(members, n=2):
""" Exporterar en gexf-fil med noder utifrån en lista med medlemmar. """
filename = f"data/-.join({members}).-old.gexf"
ids = []
for member in members:
ids.append(f"members/{member}")
friends = set()
# Hämta relationer kopplade till members från databasen
edges = edges_from_nodes(ids)
for edge in edges:
friends.add(edge["_from"])
friends.add(edge["_to"])
edges = edges_from_nodes(list(friends))
# Skapa en dict där det syns vem som har interagerat med hur många
d = {}
for i in edges:
_to = i["_to"]
_from = i["_from"]
if _to not in d:
d[_to] = set([i["_from"]])
else:
d[_to] = d[_to] | set([i["_from"]])
if _from not in d:
d[_from] = set([i["_to"]])
else:
d[_from] = d[_from] | set([i["_to"]])
# Sålla ut så bara medlemmar som reagerat med [n] två av grundanvändarens vänner kommer med
friends = set(friends)
members = []
for key, value in d.items():
if len(value & friends) >= n or key in friends:
members.append(key)
# Skapa df med edges
edges = pd.DataFrame(
edges_from_nodes(members, mode="and", simple=False),
columns=["_key", "_to", "_from", "reaction", "picture"],
)
edges.set_index("_key", inplace=True)
# En lista på användare att ta med till nätverket
members = list(set(edges["_from"].unique()) | set(edges["_to"].unique()))
# Skapa noder till nätverket
nodes = nodes_from_list(
members
) # , return_fields="{'id':doc._key, 'label':doc.name")
nodes = [(i["_key"], i) for i in nodes]
# Lägg till några kolumner i edges-tabellen
edges._from = edges._from.apply(lambda x: x[8:])
edges._to = edges._to.apply(lambda x: x[8:])
edges.picture = edges.picture.apply(
lambda x: re.search("\d+", x).group()
) # Rensa bort url-info i de fall bilden har fått fel id
# Hämta bilder för att kunna lägga datum till edges
p = ["pictures/" + i for i in edges.picture.unique().tolist()]
d = {}
pictures = nodes_from_list(
p, collection="pictures", return_fields="{'id': doc._key, 'date':doc.date}"
)
for picture in pictures:
d[picture["id"]] = convert_date(picture["date"])
edges["date"] = edges.picture.apply(lambda x: d[x])
# Skapa graf utifrån relationer
G = nx.from_pandas_edgelist(
edges,
source="_from",
target="_to",
edge_attr=["reaction", "date"], #, "now"
create_using=nx.MultiDiGraph,
)
# Lägg till noderna i grafen
G.add_nodes_from(nodes)
# Exportera till filer
nx.write_gexf(
G,
filename
)
if __name__ == "__main__":
export_network(["linda.kakuli"])
# export_network(input('Member: '))