import locale import re from datetime import datetime from sys import argv import networkx as nx import pandas as pd from numpy.core.numeric import NaN locale.setlocale(locale.LC_TIME, "en_US") from arangodb import db def nodes_from_list( nodes, collection="members", return_fields="{'_key': doc._key, 'name': doc.name}" ): aql_edges = f""" FOR doc IN @@ecollection FILTER doc._id IN @nodes RETURN {return_fields} """ cursor = db.aql.execute( aql_edges, bind_vars={"@ecollection": collection, "nodes": nodes} ) return [doc for doc in cursor] def edges_from_nodes( nodes, edge_collections=["picture_reactions"], mode="or" ): """ Returnerar en dict med relationer för valda noder och relationtabeller. Args: nodes (list): Noder som ska ingå i relationerna edge_collections (list, optional): Relationtabeller att hämta relationer från. Defaults to ['messages']. Returns: dict: Dict med relationer """ edges = [] for collection in edge_collections: aql_edges = f""" FOR doc IN @@edge_collection FILTER doc._from IN @nodes {mode} doc._to IN @nodes RETURN doc """ cursor = db.aql.execute( aql_edges, bind_vars={ "@edge_collection": collection, "nodes": nodes, }, stream=True ) edges = edges + [doc for doc in cursor] return edges def convert_date(date): try: new_date = datetime.strptime(date, "%d %b %Y") except ValueError: try: new_date = datetime.strptime(date, "%d %B %Y") except ValueError: try: new_date = datetime.strptime(date, "%b %d, %Y") except ValueError: try: new_date = datetime.strptime(date, "%B %d, %Y") except ValueError: try: new_date = datetime.strptime(date + " 2021", "%d %b %Y") except ValueError: return "" return new_date.strftime("%Y-%d-%d") # return f'{new_date.date().year}-{new_date.date().month}-{new_date.date().day}' def get_edges(member, n=2, lookups=[], common=True): """ Returnerar en df med edges för vald member. Args: member (str): Username for member. lookups (list): Användare att hitta gemensamt nätverk för noncommon (bool): Om den ena användarens förstakontakter ska räknas till den andra användarens nätverk Returns: df: Dataframe with edges """ member = f"members/{member}" lookups = [f"members/{i}" for i in lookups] member_friends = set() # Hämta relationer kopplade till member från databasen for edge in edges_from_nodes([member]): member_friends.add(edge["_from"]) member_friends.add(edge["_to"]) member_friends = list(member_friends) if not common: # Ta bort de andra i lookups så inte de får kompisars kompisar member_friends = [friend for friend in member_friends if friend not in lookups] # ! Ska den här vara kvar? for member in lookups: member_friends.append(member) edges = edges_from_nodes(member_friends) # Skapa en dict där det syns vem som har interagerat med hur många d = {} for i in edges: _to = i["_to"] _from = i["_from"] if _to not in d: d[_to] = set([i["_from"]]) else: d[_to] = d[_to] | set([i["_from"]]) if _from not in d: d[_from] = set([i["_to"]]) else: d[_from] = d[_from] | set([i["_to"]]) # Sålla ut så bara medlemmar som reagerat med [n] två av grundanvändarens vänner kommer med member_friends = set(member_friends) members = [] for key, value in d.items(): if len(value & member_friends) >= n or key in member_friends: members.append(key) # Skapa df med edges edges = pd.DataFrame( edges_from_nodes(members, mode="and"), columns=["_key", "_to", "_from", "reaction", "picture"], ) edges.set_index("_key", inplace=True) return edges def members_from_edges(edges): """ En lista på användare att ta med till nätverket. Args: edges (df): Dataframe with edges. Returns: list: List of unique members in edges (to and from). """ return list(set(edges["_from"].unique()) | set(edges["_to"].unique())) def edges_for_network(edges): """ Prepare edges for the network Args: edges (df): Dataframe with edges Returns: df: Dataframe with edges prepared for network. """ # Lägg till några kolumner i edges-tabellen edges._from = edges._from.apply(lambda x: x[8:]) # Ta bort "members/" edges._to = edges._to.apply(lambda x: x[8:]) # Ta bort "members/" edges.picture = edges.picture.apply( lambda x: re.search("\d+", x).group() ) # Rensa bort url-info i de fall bilden har fått fel id # Hämta bilder för att kunna lägga datum till edges p = ["pictures/" + i for i in edges.picture.unique().tolist()] d = {} pictures = nodes_from_list( p, collection="pictures", return_fields="{'id': doc._key, 'date':doc.date}" ) for picture in pictures: d[picture["id"]] = convert_date(picture["date"]) edges["date"] = edges.picture.apply(lambda x: d[x]) return edges def export_network(member): """ Exporterar en gexf-fil med noder utifrån en medlem. """ filename = f"data/{member}_.gexf" edges = get_edges(member, n=3) members = members_from_edges(edges) # Skapa graf utifrån relationer G = nx.from_pandas_edgelist( edges_for_network(edges), source="_from", target="_to", edge_attr=["reaction", "date"], #, "now" create_using=nx.MultiDiGraph, ) ## Skapa noder till nätverket nodes = nodes_from_list( members ) # , return_fields="{'id':doc._key, 'label':doc.name") # Lägg till noderna i grafen G.add_nodes_from([(i["_key"], i) for i in nodes]) # Exportera till filer nx.write_gexf( G, filename ) def common_friends(d, n=2): """ Filtrera ut gemensamma vänner """ common_friends = {} for _, value in d.items(): for friend in set(value): if friend not in common_friends: common_friends[friend] = 1 else: common_friends[friend] += 1 l = [] for key, value in common_friends.items(): if value >= n: l.append(key) if l == []: print('Inga gemensamma i nätverken.') exit() return l if __name__ == "__main__": lookups = [ 'katherine.zimmerman.754', 'boogiesaman.bakhtiari', 'lena.tidestromsagstrom', 'bibi.rodoo', 'mina.benaissa', 'henrik.johnsson.73', 'fabian.asserback', '100005696055822', 'fia.wiren', 'daniel.kjellander.5' ] print('Samlar data för:') for i in lookups: print(i) print(f'({len(lookups)} stycken\n') # Hur många vänners vänners ska känna if len(lookups) == 1: n = 1 elif len(argv) > 1: n = int(argv[1]) else: #from math import sqrt n = round(len(lookups)/2.2 + 1) print(f'n = {n}') if len(lookups) <= 3: filename = f"../data/{'-'.join(lookups).replace('.','')}.gexf" else: from datetime import datetime filename = f"../data/{datetime.now()}.gexf" if len (lookups) == 1: export_network(lookups[0]) exit() d = {} for member in lookups: edges = get_edges(member, lookups=lookups, common = False, n=n) friends = members_from_edges(edges) d[member] = friends print(member, len(friends)) # Filtrera gemensamma vänner common = common_friends(d) print('Common friends: ', len(common)) edges = pd.DataFrame(edges_from_nodes(common, mode='and')) # and om båda noderna ska vara med i common friends, annars or members = members_from_edges(edges) edges = edges_for_network(edges) # Skapa graf utifrån relationer G = nx.from_pandas_edgelist( edges, source="_from", target="_to", edge_attr=["reaction", "date"], #, "now" create_using=nx.MultiDiGraph, ) ## Skapa noder till nätverket nodes = nodes_from_list( members ) # , return_fields="{'id':doc._key, 'label':doc.name") # Lägg till noderna i grafen G.add_nodes_from([(i["_key"], i) for i in nodes]) # Exportera till filer nx.write_gexf( G, filename ) #export_network("asifasghar") # export_network(input('Member: '))