import locale import re from datetime import datetime import networkx as nx import pandas as pd from numpy.core.numeric import NaN locale.setlocale(locale.LC_TIME, "en_US") from arangodb import db def nodes_from_list( nodes, collection="members", return_fields="{'_key': doc._key, 'name': doc.name}" ): aql_edges = f""" FOR doc IN @@ecollection FILTER doc._id IN @nodes RETURN {return_fields} """ cursor = db.aql.execute( aql_edges, bind_vars={"@ecollection": collection, "nodes": nodes} ) return [doc for doc in cursor] def edges_from_nodes( nodes, edge_collections=["picture_reactions"], simple=True, mode="or" ): """ Returnerar en df med relationer för valda noder och relationtabeller. Args: nodes (list): Noder som ska ingå i relationerna edge_collections (list, optional): Relationtabeller att hämta relationer från. Defaults to ['messages']. simple (bool, optional): Simple ger bara _from, _to och _key. Defaults to True. Returns: pd.DataFrame: DataFrame. """ if simple: return_fields = ( "{'_to': doc._to, '_from': doc._from, '_id':doc._id, '_key':doc._key}" ) else: return_fields = "doc" edges = [] for collection in edge_collections: aql = f""" FOR doc IN @@edge_collection FILTER doc._from IN @nodes {mode} doc._to IN @nodes RETURN {return_fields} """ cursor = db.aql.execute( aql, bind_vars={ "@edge_collection": collection, "nodes": nodes, }, ) edges = edges + [doc for doc in cursor] return edges def convert_date(date): try: new_date = datetime.strptime(date, "%d %b %Y") except ValueError: try: new_date = datetime.strptime(date, "%d %B %Y") except ValueError: try: new_date = datetime.strptime(date, "%b %d, %Y") except ValueError: try: new_date = datetime.strptime(date, "%B %d, %Y") except ValueError: try: new_date = datetime.strptime(date + " 2021", "%d %b %Y") except ValueError: return "" return new_date.strftime("%Y-%d-%d") # return f'{new_date.date().year}-{new_date.date().month}-{new_date.date().day}' def export_network(members, n=2): """ Exporterar en gexf-fil med noder utifrån en lista med medlemmar. """ filename = f"data/-.join({members}).-old.gexf" ids = [] for member in members: ids.append(f"members/{member}") friends = set() # Hämta relationer kopplade till members från databasen edges = edges_from_nodes(ids) for edge in edges: friends.add(edge["_from"]) friends.add(edge["_to"]) edges = edges_from_nodes(list(friends)) # Skapa en dict där det syns vem som har interagerat med hur många d = {} for i in edges: _to = i["_to"] _from = i["_from"] if _to not in d: d[_to] = set([i["_from"]]) else: d[_to] = d[_to] | set([i["_from"]]) if _from not in d: d[_from] = set([i["_to"]]) else: d[_from] = d[_from] | set([i["_to"]]) # Sålla ut så bara medlemmar som reagerat med [n] två av grundanvändarens vänner kommer med friends = set(friends) members = [] for key, value in d.items(): if len(value & friends) >= n or key in friends: members.append(key) # Skapa df med edges edges = pd.DataFrame( edges_from_nodes(members, mode="and", simple=False), columns=["_key", "_to", "_from", "reaction", "picture"], ) edges.set_index("_key", inplace=True) # En lista på användare att ta med till nätverket members = list(set(edges["_from"].unique()) | set(edges["_to"].unique())) # Skapa noder till nätverket nodes = nodes_from_list( members ) # , return_fields="{'id':doc._key, 'label':doc.name") nodes = [(i["_key"], i) for i in nodes] # Lägg till några kolumner i edges-tabellen edges._from = edges._from.apply(lambda x: x[8:]) edges._to = edges._to.apply(lambda x: x[8:]) edges.picture = edges.picture.apply( lambda x: re.search("\d+", x).group() ) # Rensa bort url-info i de fall bilden har fått fel id # Hämta bilder för att kunna lägga datum till edges p = ["pictures/" + i for i in edges.picture.unique().tolist()] d = {} pictures = nodes_from_list( p, collection="pictures", return_fields="{'id': doc._key, 'date':doc.date}" ) for picture in pictures: d[picture["id"]] = convert_date(picture["date"]) edges["date"] = edges.picture.apply(lambda x: d[x]) # Skapa graf utifrån relationer G = nx.from_pandas_edgelist( edges, source="_from", target="_to", edge_attr=["reaction", "date"], #, "now" create_using=nx.MultiDiGraph, ) # Lägg till noderna i grafen G.add_nodes_from(nodes) # Exportera till filer nx.write_gexf( G, filename ) if __name__ == "__main__": export_network(["linda.kakuli"]) # export_network(input('Member: '))