You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

333 lines
8.9 KiB

import locale
import re
from datetime import datetime
from sys import argv
import networkx as nx
import pandas as pd
from numpy.core.numeric import NaN
from getpass import getpass
import arangodb
locale.setlocale(locale.LC_TIME, "en_US")
def nodes_from_list(
nodes, collection="members", return_fields="{'_key': doc._key, 'name': doc.name}"
):
aql_edges = f"""
FOR doc IN @@ecollection
FILTER doc._id IN @nodes
RETURN {return_fields}
"""
cursor = db.aql.execute(
aql_edges, bind_vars={"@ecollection": collection, "nodes": nodes}
)
return [doc for doc in cursor]
def edges_from_nodes(
nodes, edge_collections=["picture_reactions"], mode="or"
):
"""
Returnerar en dict med relationer för valda noder och relationtabeller.
Args:
nodes (list): Noder som ska ingå i relationerna
edge_collections (list, optional): Relationtabeller att hämta relationer från. Defaults to ['messages'].
Returns:
dict: Dict med relationer
"""
edges = []
for collection in edge_collections:
aql_edges = f"""
FOR doc IN @@edge_collection
FILTER doc._from IN @nodes {mode} doc._to IN @nodes
RETURN doc
"""
cursor = db.aql.execute(
aql_edges,
bind_vars={
"@edge_collection": collection,
"nodes": nodes,
}, stream=True
)
edges = edges + [doc for doc in cursor]
return edges
def convert_date(date):
try:
new_date = datetime.strptime(date, "%d %b %Y")
except ValueError:
try:
new_date = datetime.strptime(date, "%d %B %Y")
except ValueError:
try:
new_date = datetime.strptime(date, "%b %d, %Y")
except ValueError:
try:
new_date = datetime.strptime(date, "%B %d, %Y")
except ValueError:
try:
new_date = datetime.strptime(date + " 2021", "%d %b %Y")
except ValueError:
return ""
return new_date.strftime("%Y-%d-%d")
# return f'{new_date.date().year}-{new_date.date().month}-{new_date.date().day}'
def get_edges(member, n=2, lookups=[], common=True):
""" Returnerar en df med edges för vald member.
Args:
member (str): Username for member.
lookups (list): Användare att hitta gemensamt nätverk för
noncommon (bool): Om den ena användarens förstakontakter ska räknas till den andra användarens nätverk
Returns:
df: Dataframe with edges
"""
member = f"members/{member}"
lookups = [f"members/{i}" for i in lookups]
member_friends = set()
# Hämta relationer kopplade till member från databasen
for edge in edges_from_nodes([member]):
member_friends.add(edge["_from"])
member_friends.add(edge["_to"])
member_friends = list(member_friends)
if not common:
# Ta bort de andra i lookups så inte de får kompisars kompisar
member_friends = [friend for friend in member_friends if friend not in lookups] # ! Ska den här vara kvar?
for member in lookups:
member_friends.append(member)
edges = edges_from_nodes(member_friends)
# Skapa en dict där det syns vem som har interagerat med hur många
d = {}
for i in edges:
_to = i["_to"]
_from = i["_from"]
if _to not in d:
d[_to] = set([i["_from"]])
else:
d[_to] = d[_to] | set([i["_from"]])
if _from not in d:
d[_from] = set([i["_to"]])
else:
d[_from] = d[_from] | set([i["_to"]])
# Sålla ut så bara medlemmar som reagerat med [n] två av grundanvändarens vänner kommer med
member_friends = set(member_friends)
members = []
for key, value in d.items():
if len(value & member_friends) >= n or key in member_friends:
members.append(key)
# Skapa df med edges
edges = pd.DataFrame(
edges_from_nodes(members, mode="and"),
columns=["_key", "_to", "_from", "reaction", "picture"],
)
edges.set_index("_key", inplace=True)
return edges
def members_from_edges(edges):
""" En lista på användare att ta med till nätverket.
Args:
edges (df): Dataframe with edges.
Returns:
list: List of unique members in edges (to and from).
"""
return list(set(edges["_from"].unique()) | set(edges["_to"].unique()))
def edges_for_network(edges):
""" Prepare edges for the network
Args:
edges (df): Dataframe with edges
Returns:
df: Dataframe with edges prepared for network.
"""
# Lägg till några kolumner i edges-tabellen
edges._from = edges._from.apply(lambda x: x[8:]) # Ta bort "members/"
edges._to = edges._to.apply(lambda x: x[8:]) # Ta bort "members/"
edges.picture = edges.picture.apply(
lambda x: re.search("\d+", x).group()
) # Rensa bort url-info i de fall bilden har fått fel id
# Hämta bilder för att kunna lägga datum till edges
p = ["pictures/" + i for i in edges.picture.unique().tolist()]
d = {}
pictures = nodes_from_list(
p, collection="pictures", return_fields="{'id': doc._key, 'date':doc.date}"
)
for picture in pictures:
d[picture["id"]] = convert_date(picture["date"])
edges["date"] = edges.picture.apply(lambda x: d[x])
return edges
def export_network(member):
""" Exporterar en gexf-fil med noder utifrån en medlem. """
filename = f"data/{member}_.gexf"
edges = get_edges(member, n=3)
members = members_from_edges(edges)
# Skapa graf utifrån relationer
G = nx.from_pandas_edgelist(
edges_for_network(edges),
source="_from",
target="_to",
edge_attr=["reaction", "date"], #, "now"
create_using=nx.MultiDiGraph,
)
## Skapa noder till nätverket
nodes = nodes_from_list(
members
) # , return_fields="{'id':doc._key, 'label':doc.name")
# Lägg till noderna i grafen
G.add_nodes_from([(i["_key"], i) for i in nodes])
# Exportera till filer
nx.write_gexf(
G,
filename
)
def common_friends(d, n=2):
""" Filtrera ut gemensamma vänner """
common_friends = {}
for _, value in d.items():
for friend in set(value):
if friend not in common_friends:
common_friends[friend] = 1
else:
common_friends[friend] += 1
l = []
for key, value in common_friends.items():
if value >= n:
l.append(key)
if l == []:
print('Inga gemensamma i nätverken.')
exit()
return l
pwd = getpass('Password for Lasse: ')
db = arangodb.arango_connect(pwd)
if __name__ == "__main__":
lookups = [
'katherine.zimmerman.754',
'boogiesaman.bakhtiari',
'lena.tidestromsagstrom',
'bibi.rodoo',
'mina.benaissa',
'henrik.johnsson.73',
'fabian.asserback',
'100005696055822',
'fia.wiren',
'daniel.kjellander.5'
]
print('Samlar data för:')
for i in lookups:
print(i)
print(f'({len(lookups)} stycken\n')
# Hur många vänners vänners ska känna
if len(lookups) == 1:
n = 1
elif len(argv) > 1:
n = int(argv[1])
else:
#from math import sqrt
n = round(len(lookups)/2.2 + 1)
print(f'n = {n}')
if len(lookups) <= 3:
filename = f"../data/{'-'.join(lookups).replace('.','')}.gexf"
else:
from datetime import datetime
filename = f"../data/{datetime.now()}.gexf"
if len (lookups) == 1:
export_network(lookups[0])
exit()
d = {}
for member in lookups:
edges = get_edges(member, lookups=lookups, common = False, n=n)
friends = members_from_edges(edges)
d[member] = friends
print(member, len(friends))
# Filtrera gemensamma vänner
common = common_friends(d)
print('Common friends: ', len(common))
edges = pd.DataFrame(edges_from_nodes(common, mode='and')) # and om båda noderna ska vara med i common friends, annars or
members = members_from_edges(edges)
edges = edges_for_network(edges)
# Skapa graf utifrån relationer
G = nx.from_pandas_edgelist(
edges,
source="_from",
target="_to",
edge_attr=["reaction", "date"], #, "now"
create_using=nx.MultiDiGraph,
)
## Skapa noder till nätverket
nodes = nodes_from_list(
members
) # , return_fields="{'id':doc._key, 'label':doc.name")
# Lägg till noderna i grafen
G.add_nodes_from([(i["_key"], i) for i in nodes])
# Exportera till filer
nx.write_gexf(
G,
filename
)
#export_network("asifasghar")
# export_network(input('Member: '))