You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
333 lines
8.9 KiB
333 lines
8.9 KiB
import locale |
|
import re |
|
from datetime import datetime |
|
from sys import argv |
|
|
|
import networkx as nx |
|
import pandas as pd |
|
from numpy.core.numeric import NaN |
|
from getpass import getpass |
|
|
|
import arangodb |
|
locale.setlocale(locale.LC_TIME, "en_US") |
|
|
|
|
|
|
|
|
|
def nodes_from_list( |
|
nodes, collection="members", return_fields="{'_key': doc._key, 'name': doc.name}" |
|
): |
|
aql_edges = f""" |
|
FOR doc IN @@ecollection |
|
FILTER doc._id IN @nodes |
|
RETURN {return_fields} |
|
""" |
|
cursor = db.aql.execute( |
|
aql_edges, bind_vars={"@ecollection": collection, "nodes": nodes} |
|
) |
|
return [doc for doc in cursor] |
|
|
|
|
|
|
|
def edges_from_nodes( |
|
nodes, edge_collections=["picture_reactions"], mode="or" |
|
): |
|
""" |
|
Returnerar en dict med relationer för valda noder och relationtabeller. |
|
|
|
Args: |
|
nodes (list): Noder som ska ingå i relationerna |
|
edge_collections (list, optional): Relationtabeller att hämta relationer från. Defaults to ['messages']. |
|
|
|
Returns: |
|
dict: Dict med relationer |
|
""" |
|
|
|
|
|
edges = [] |
|
|
|
for collection in edge_collections: |
|
aql_edges = f""" |
|
FOR doc IN @@edge_collection |
|
FILTER doc._from IN @nodes {mode} doc._to IN @nodes |
|
RETURN doc |
|
""" |
|
cursor = db.aql.execute( |
|
aql_edges, |
|
bind_vars={ |
|
"@edge_collection": collection, |
|
"nodes": nodes, |
|
}, stream=True |
|
) |
|
|
|
edges = edges + [doc for doc in cursor] |
|
|
|
return edges |
|
|
|
|
|
def convert_date(date): |
|
try: |
|
new_date = datetime.strptime(date, "%d %b %Y") |
|
except ValueError: |
|
try: |
|
new_date = datetime.strptime(date, "%d %B %Y") |
|
except ValueError: |
|
try: |
|
new_date = datetime.strptime(date, "%b %d, %Y") |
|
except ValueError: |
|
try: |
|
new_date = datetime.strptime(date, "%B %d, %Y") |
|
except ValueError: |
|
try: |
|
new_date = datetime.strptime(date + " 2021", "%d %b %Y") |
|
except ValueError: |
|
return "" |
|
return new_date.strftime("%Y-%d-%d") |
|
# return f'{new_date.date().year}-{new_date.date().month}-{new_date.date().day}' |
|
|
|
|
|
def get_edges(member, n=2, lookups=[], common=True): |
|
""" Returnerar en df med edges för vald member. |
|
|
|
Args: |
|
member (str): Username for member. |
|
lookups (list): Användare att hitta gemensamt nätverk för |
|
noncommon (bool): Om den ena användarens förstakontakter ska räknas till den andra användarens nätverk |
|
|
|
Returns: |
|
df: Dataframe with edges |
|
""" |
|
member = f"members/{member}" |
|
lookups = [f"members/{i}" for i in lookups] |
|
member_friends = set() |
|
# Hämta relationer kopplade till member från databasen |
|
for edge in edges_from_nodes([member]): |
|
member_friends.add(edge["_from"]) |
|
member_friends.add(edge["_to"]) |
|
member_friends = list(member_friends) |
|
|
|
if not common: |
|
# Ta bort de andra i lookups så inte de får kompisars kompisar |
|
member_friends = [friend for friend in member_friends if friend not in lookups] # ! Ska den här vara kvar? |
|
for member in lookups: |
|
member_friends.append(member) |
|
edges = edges_from_nodes(member_friends) |
|
|
|
# Skapa en dict där det syns vem som har interagerat med hur många |
|
d = {} |
|
for i in edges: |
|
_to = i["_to"] |
|
_from = i["_from"] |
|
if _to not in d: |
|
d[_to] = set([i["_from"]]) |
|
else: |
|
d[_to] = d[_to] | set([i["_from"]]) |
|
|
|
if _from not in d: |
|
d[_from] = set([i["_to"]]) |
|
else: |
|
d[_from] = d[_from] | set([i["_to"]]) |
|
|
|
# Sålla ut så bara medlemmar som reagerat med [n] två av grundanvändarens vänner kommer med |
|
member_friends = set(member_friends) |
|
members = [] |
|
for key, value in d.items(): |
|
if len(value & member_friends) >= n or key in member_friends: |
|
members.append(key) |
|
|
|
# Skapa df med edges |
|
edges = pd.DataFrame( |
|
edges_from_nodes(members, mode="and"), |
|
columns=["_key", "_to", "_from", "reaction", "picture"], |
|
) |
|
edges.set_index("_key", inplace=True) |
|
|
|
return edges |
|
|
|
|
|
def members_from_edges(edges): |
|
""" En lista på användare att ta med till nätverket. |
|
|
|
Args: |
|
edges (df): Dataframe with edges. |
|
|
|
Returns: |
|
list: List of unique members in edges (to and from). |
|
""" |
|
|
|
return list(set(edges["_from"].unique()) | set(edges["_to"].unique())) |
|
|
|
|
|
def edges_for_network(edges): |
|
""" Prepare edges for the network |
|
|
|
Args: |
|
edges (df): Dataframe with edges |
|
|
|
Returns: |
|
df: Dataframe with edges prepared for network. |
|
""" |
|
# Lägg till några kolumner i edges-tabellen |
|
edges._from = edges._from.apply(lambda x: x[8:]) # Ta bort "members/" |
|
edges._to = edges._to.apply(lambda x: x[8:]) # Ta bort "members/" |
|
edges.picture = edges.picture.apply( |
|
lambda x: re.search("\d+", x).group() |
|
) # Rensa bort url-info i de fall bilden har fått fel id |
|
|
|
# Hämta bilder för att kunna lägga datum till edges |
|
p = ["pictures/" + i for i in edges.picture.unique().tolist()] |
|
|
|
d = {} |
|
pictures = nodes_from_list( |
|
p, collection="pictures", return_fields="{'id': doc._key, 'date':doc.date}" |
|
) |
|
for picture in pictures: |
|
d[picture["id"]] = convert_date(picture["date"]) |
|
|
|
edges["date"] = edges.picture.apply(lambda x: d[x]) |
|
|
|
return edges |
|
|
|
|
|
def export_network(member): |
|
""" Exporterar en gexf-fil med noder utifrån en medlem. """ |
|
filename = f"data/{member}_.gexf" |
|
|
|
edges = get_edges(member, n=3) |
|
members = members_from_edges(edges) |
|
|
|
# Skapa graf utifrån relationer |
|
G = nx.from_pandas_edgelist( |
|
edges_for_network(edges), |
|
source="_from", |
|
target="_to", |
|
edge_attr=["reaction", "date"], #, "now" |
|
create_using=nx.MultiDiGraph, |
|
) |
|
|
|
## Skapa noder till nätverket |
|
nodes = nodes_from_list( |
|
members |
|
) # , return_fields="{'id':doc._key, 'label':doc.name") |
|
|
|
# Lägg till noderna i grafen |
|
G.add_nodes_from([(i["_key"], i) for i in nodes]) |
|
|
|
# Exportera till filer |
|
nx.write_gexf( |
|
G, |
|
filename |
|
) |
|
|
|
|
|
def common_friends(d, n=2): |
|
""" Filtrera ut gemensamma vänner """ |
|
|
|
common_friends = {} |
|
for _, value in d.items(): |
|
for friend in set(value): |
|
if friend not in common_friends: |
|
common_friends[friend] = 1 |
|
else: |
|
common_friends[friend] += 1 |
|
|
|
l = [] |
|
for key, value in common_friends.items(): |
|
if value >= n: |
|
l.append(key) |
|
|
|
if l == []: |
|
print('Inga gemensamma i nätverken.') |
|
exit() |
|
|
|
return l |
|
|
|
pwd = getpass('Password for Lasse: ') |
|
db = arangodb.arango_connect(pwd) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
lookups = [ |
|
'katherine.zimmerman.754', |
|
'boogiesaman.bakhtiari', |
|
'lena.tidestromsagstrom', |
|
'bibi.rodoo', |
|
'mina.benaissa', |
|
'henrik.johnsson.73', |
|
'fabian.asserback', |
|
'100005696055822', |
|
'fia.wiren', |
|
'daniel.kjellander.5' |
|
] |
|
|
|
print('Samlar data för:') |
|
for i in lookups: |
|
print(i) |
|
print(f'({len(lookups)} stycken\n') |
|
|
|
# Hur många vänners vänners ska känna |
|
if len(lookups) == 1: |
|
n = 1 |
|
elif len(argv) > 1: |
|
n = int(argv[1]) |
|
else: |
|
#from math import sqrt |
|
n = round(len(lookups)/2.2 + 1) |
|
print(f'n = {n}') |
|
|
|
if len(lookups) <= 3: |
|
filename = f"../data/{'-'.join(lookups).replace('.','')}.gexf" |
|
else: |
|
from datetime import datetime |
|
filename = f"../data/{datetime.now()}.gexf" |
|
|
|
if len (lookups) == 1: |
|
export_network(lookups[0]) |
|
exit() |
|
|
|
d = {} |
|
for member in lookups: |
|
edges = get_edges(member, lookups=lookups, common = False, n=n) |
|
friends = members_from_edges(edges) |
|
d[member] = friends |
|
print(member, len(friends)) |
|
|
|
# Filtrera gemensamma vänner |
|
common = common_friends(d) |
|
|
|
print('Common friends: ', len(common)) |
|
|
|
edges = pd.DataFrame(edges_from_nodes(common, mode='and')) # and om båda noderna ska vara med i common friends, annars or |
|
|
|
members = members_from_edges(edges) |
|
|
|
edges = edges_for_network(edges) |
|
# Skapa graf utifrån relationer |
|
G = nx.from_pandas_edgelist( |
|
edges, |
|
source="_from", |
|
target="_to", |
|
edge_attr=["reaction", "date"], #, "now" |
|
create_using=nx.MultiDiGraph, |
|
) |
|
|
|
## Skapa noder till nätverket |
|
nodes = nodes_from_list( |
|
members |
|
) # , return_fields="{'id':doc._key, 'label':doc.name") |
|
|
|
# Lägg till noderna i grafen |
|
G.add_nodes_from([(i["_key"], i) for i in nodes]) |
|
|
|
# Exportera till filer |
|
nx.write_gexf( |
|
G, |
|
filename |
|
) |
|
|
|
|
|
|
|
|
|
#export_network("asifasghar") |
|
# export_network(input('Member: '))
|
|
|