import locale import re from datetime import datetime from sys import argv import networkx as nx import pandas as pd from numpy.core.numeric import NaN from getpass import getpass import arangodb locale.setlocale(locale.LC_TIME, "en_US") def nodes_from_list( nodes, collection="members", return_fields="{'_key': doc._key, 'name': doc.name}" ): aql_edges = f""" FOR doc IN @@ecollection FILTER doc._id IN @nodes RETURN {return_fields} """ cursor = db.aql.execute( aql_edges, bind_vars={"@ecollection": collection, "nodes": nodes} ) return [doc for doc in cursor] def edges_from_nodes( nodes, edge_collections=["picture_reactions"], mode="or" ): """ Returnerar en dict med relationer för valda noder och relationtabeller. Args: nodes (list): Noder som ska ingå i relationerna edge_collections (list, optional): Relationtabeller att hämta relationer från. Defaults to ['messages']. Returns: dict: Dict med relationer """ edges = [] for collection in edge_collections: aql_edges = f""" FOR doc IN @@edge_collection FILTER doc._from IN @nodes {mode} doc._to IN @nodes RETURN doc """ cursor = db.aql.execute( aql_edges, bind_vars={ "@edge_collection": collection, "nodes": nodes, }, stream=True ) edges = edges + [doc for doc in cursor] return edges def convert_date(date): try: new_date = datetime.strptime(date, "%d %b %Y") except ValueError: try: new_date = datetime.strptime(date, "%d %B %Y") except ValueError: try: new_date = datetime.strptime(date, "%b %d, %Y") except ValueError: try: new_date = datetime.strptime(date, "%B %d, %Y") except ValueError: try: new_date = datetime.strptime(date + " 2021", "%d %b %Y") except ValueError: return "" return new_date.strftime("%Y-%d-%d") # return f'{new_date.date().year}-{new_date.date().month}-{new_date.date().day}' def get_edges(member, n=2, lookups=[], common=True): """ Returnerar en df med edges för vald member. Args: member (str): Username for member. lookups (list): Användare att hitta gemensamt nätverk för noncommon (bool): Om den ena användarens förstakontakter ska räknas till den andra användarens nätverk Returns: df: Dataframe with edges """ member = f"members/{member}" lookups = [f"members/{i}" for i in lookups] member_friends = set() # Hämta relationer kopplade till member från databasen for edge in edges_from_nodes([member]): member_friends.add(edge["_from"]) member_friends.add(edge["_to"]) member_friends = list(member_friends) if not common: # Ta bort de andra i lookups så inte de får kompisars kompisar member_friends = [friend for friend in member_friends if friend not in lookups] # ! Ska den här vara kvar? for member in lookups: member_friends.append(member) edges = edges_from_nodes(member_friends) # Skapa en dict där det syns vem som har interagerat med hur många d = {} for i in edges: _to = i["_to"] _from = i["_from"] if _to not in d: d[_to] = set([i["_from"]]) else: d[_to] = d[_to] | set([i["_from"]]) if _from not in d: d[_from] = set([i["_to"]]) else: d[_from] = d[_from] | set([i["_to"]]) # Sålla ut så bara medlemmar som reagerat med [n] två av grundanvändarens vänner kommer med member_friends = set(member_friends) members = [] for key, value in d.items(): if len(value & member_friends) >= n or key in member_friends: members.append(key) # Skapa df med edges edges = pd.DataFrame( edges_from_nodes(members, mode="and"), columns=["_key", "_to", "_from", "reaction", "picture"], ) edges.set_index("_key", inplace=True) return edges def members_from_edges(edges): """ En lista på användare att ta med till nätverket. Args: edges (df): Dataframe with edges. Returns: list: List of unique members in edges (to and from). """ return list(set(edges["_from"].unique()) | set(edges["_to"].unique())) def edges_for_network(edges): """ Prepare edges for the network Args: edges (df): Dataframe with edges Returns: df: Dataframe with edges prepared for network. """ # Lägg till några kolumner i edges-tabellen edges._from = edges._from.apply(lambda x: x[8:]) # Ta bort "members/" edges._to = edges._to.apply(lambda x: x[8:]) # Ta bort "members/" edges.picture = edges.picture.apply( lambda x: re.search("\d+", x).group() ) # Rensa bort url-info i de fall bilden har fått fel id # Hämta bilder för att kunna lägga datum till edges p = ["pictures/" + i for i in edges.picture.unique().tolist()] d = {} pictures = nodes_from_list( p, collection="pictures", return_fields="{'id': doc._key, 'date':doc.date}" ) for picture in pictures: d[picture["id"]] = convert_date(picture["date"]) edges["date"] = edges.picture.apply(lambda x: d[x]) return edges def export_network(member): """ Exporterar en gexf-fil med noder utifrån en medlem. """ filename = f"data/{member}_.gexf" edges = get_edges(member, n=3) members = members_from_edges(edges) # Skapa graf utifrån relationer G = nx.from_pandas_edgelist( edges_for_network(edges), source="_from", target="_to", edge_attr=["reaction", "date"], #, "now" create_using=nx.MultiDiGraph, ) ## Skapa noder till nätverket nodes = nodes_from_list( members ) # , return_fields="{'id':doc._key, 'label':doc.name") # Lägg till noderna i grafen G.add_nodes_from([(i["_key"], i) for i in nodes]) # Exportera till filer nx.write_gexf( G, filename ) def common_friends(d, n=2): """ Filtrera ut gemensamma vänner """ common_friends = {} for _, value in d.items(): for friend in set(value): if friend not in common_friends: common_friends[friend] = 1 else: common_friends[friend] += 1 l = [] for key, value in common_friends.items(): if value >= n: l.append(key) if l == []: print('Inga gemensamma i nätverken.') exit() return l pwd = getpass('Password for Lasse: ') db = arangodb.arango_connect(pwd) if __name__ == "__main__": lookups = [ 'katherine.zimmerman.754', 'boogiesaman.bakhtiari', 'lena.tidestromsagstrom', 'bibi.rodoo', 'mina.benaissa', 'henrik.johnsson.73', 'fabian.asserback', '100005696055822', 'fia.wiren', 'daniel.kjellander.5' ] print('Samlar data för:') for i in lookups: print(i) print(f'({len(lookups)} stycken\n') # Hur många vänners vänners ska känna if len(lookups) == 1: n = 1 elif len(argv) > 1: n = int(argv[1]) else: #from math import sqrt n = round(len(lookups)/2.2 + 1) print(f'n = {n}') if len(lookups) <= 3: filename = f"../data/{'-'.join(lookups).replace('.','')}.gexf" else: from datetime import datetime filename = f"../data/{datetime.now()}.gexf" if len (lookups) == 1: export_network(lookups[0]) exit() d = {} for member in lookups: edges = get_edges(member, lookups=lookups, common = False, n=n) friends = members_from_edges(edges) d[member] = friends print(member, len(friends)) # Filtrera gemensamma vänner common = common_friends(d) print('Common friends: ', len(common)) edges = pd.DataFrame(edges_from_nodes(common, mode='and')) # and om båda noderna ska vara med i common friends, annars or members = members_from_edges(edges) edges = edges_for_network(edges) # Skapa graf utifrån relationer G = nx.from_pandas_edgelist( edges, source="_from", target="_to", edge_attr=["reaction", "date"], #, "now" create_using=nx.MultiDiGraph, ) ## Skapa noder till nätverket nodes = nodes_from_list( members ) # , return_fields="{'id':doc._key, 'label':doc.name") # Lägg till noderna i grafen G.add_nodes_from([(i["_key"], i) for i in nodes]) # Exportera till filer nx.write_gexf( G, filename ) #export_network("asifasghar") # export_network(input('Member: '))