diff --git a/facebook/gephi.py b/facebook/gephi.py new file mode 100644 index 0000000..88c8487 --- /dev/null +++ b/facebook/gephi.py @@ -0,0 +1,181 @@ +import locale +import re +from datetime import datetime + +import networkx as nx +import pandas as pd +from numpy.core.numeric import NaN + +locale.setlocale(locale.LC_TIME, "en_US") + +from arangodb import db + + +def nodes_from_list( + nodes, collection="members", return_fields="{'_key': doc._key, 'name': doc.name}" +): + aql_edges = f""" + FOR doc IN @@ecollection + FILTER doc._id IN @nodes + RETURN {return_fields} + """ + cursor = db.aql.execute( + aql_edges, bind_vars={"@ecollection": collection, "nodes": nodes} + ) + return [doc for doc in cursor] + + +def edges_from_nodes( + nodes, edge_collections=["picture_reactions"], simple=True, mode="or" +): + """ + Returnerar en df med relationer för valda noder och relationtabeller. + + Args: + nodes (list): Noder som ska ingå i relationerna + edge_collections (list, optional): Relationtabeller att hämta relationer från. Defaults to ['messages']. + simple (bool, optional): Simple ger bara _from, _to och _key. Defaults to True. + + Returns: + pd.DataFrame: DataFrame. + """ + + if simple: + return_fields = ( + "{'_to': doc._to, '_from': doc._from, '_id':doc._id, '_key':doc._key}" + ) + else: + return_fields = "doc" + + edges = [] + + for collection in edge_collections: + aql_edges = f""" + FOR doc IN @@edge_collection + FILTER doc._from IN @nodes {mode} doc._to IN @nodes + RETURN {return_fields} + """ + cursor = db.aql.execute( + aql_edges, + bind_vars={ + "@edge_collection": collection, + "nodes": nodes, + }, + ) + + edges = edges + [doc for doc in cursor] + + return edges + + +def convert_date(date): + try: + new_date = datetime.strptime(date, "%d %b %Y") + except ValueError: + try: + new_date = datetime.strptime(date, "%d %B %Y") + except ValueError: + try: + new_date = datetime.strptime(date, "%b %d, %Y") + except ValueError: + try: + new_date = datetime.strptime(date, "%B %d, %Y") + except ValueError: + try: + new_date = datetime.strptime(date + " 2021", "%d %b %Y") + except ValueError: + return "" + return new_date.strftime("%Y-%d-%d") + # return f'{new_date.date().year}-{new_date.date().month}-{new_date.date().day}' + + +def export_network(member, n=2): + """ Exporterar en gexf-fil med noder utifrån en medlem. """ + + member = f"members/{member}" + member_friends = set() + + # Hämta relationer kopplade till member från databasen + for edge in edges_from_nodes([member]): + member_friends.add(edge["_from"]) + member_friends.add(edge["_to"]) + edges = edges_from_nodes(list(member_friends)) + + # Skapa en dict där det syns vem som har interagerat med hur många + d = {} + for i in edges: + _to = i["_to"] + _from = i["_from"] + if _to not in d: + d[_to] = set([i["_from"]]) + else: + d[_to] = d[_to] | set([i["_from"]]) + + if _from not in d: + d[_from] = set([i["_to"]]) + else: + d[_from] = d[_from] | set([i["_to"]]) + + # Sålla ut så bara medlemmar som reagerat med [n] två av grundanvändarens vänner kommer med + member_friends = set(member_friends) + members = [] + for key, value in d.items(): + if len(value & member_friends) >= n: + members.append(key) + + # Skapa df med edges + edges = pd.DataFrame( + edges_from_nodes(members, mode="and", simple=False), + columns=["_key", "_to", "_from", "reaction", "picture"], + ) + edges.set_index("_key", inplace=True) + + # En lista på användare att ta med till nätverket + members = list(set(edges["_from"].unique()) | set(edges["_to"].unique())) + + # Skapa noder till nätverket + nodes = nodes_from_list( + members + ) # , return_fields="{'id':doc._key, 'label':doc.name") + nodes = [(i["_key"], i) for i in nodes] + + # Lägg till några kolumner i edges-tabellen + edges._from = edges._from.apply(lambda x: x[8:]) + edges._to = edges._to.apply(lambda x: x[8:]) + edges.picture = edges.picture.apply( + lambda x: re.search("\d+", x).group() + ) # Rensa bort url-info i de fall bilden har fått fel id + + # Hämta bilder för att kunna lägga datum till edges + p = ["pictures/" + i for i in edges.picture.unique().tolist()] + d = {} + pictures = nodes_from_list( + p, collection="pictures", return_fields="{'id': doc._key, 'date':doc.date}" + ) + for picture in pictures: + d[picture["id"]] = convert_date(picture["date"]) + + edges["date"] = edges.picture.apply(lambda x: d[x]) + + # Skapa graf utifrån relationer + G = nx.from_pandas_edgelist( + edges, + source="_from", + target="_to", + edge_attr=["reaction", "date", "now"], + create_using=nx.MultiDiGraph, + ) + + # Lägg till noderna i grafen + G.add_nodes_from(nodes) + + # Exportera till filer + nx.write_gexf( + G, + f"data/network_test.gexf", + ) + + +if __name__ == "__main__": + export_network("maria.hansson.botin") + # export_network(input('Member: '))