You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

67 lines
1.9 KiB

import networkx as nx
from _arango import arango
import random
from print_color import *
import json
import datetime
# Create a new directed graph
G = nx.DiGraph()
q = "for doc in rumors filter doc.sexual_content != null return doc"
rumors = list(arango.db.aql.execute(q))
not_heard_from = 0
for rumor in rumors:
interrogation = arango.db.collection("interrogations").get(rumor["_key"])
if not rumor["sexual_summary"]:
rumor["sexual_summary"] = ""
# From person
if 'heard_from' not in rumor or not rumor['heard_from']:
not_heard_from += 1
rumor["heard_from"] = "Unknown_" + str(random.randint(1, 1000000))
# To person
if "person_id" in interrogation:
rumor["heard_person"] = interrogation["person"]
if 'class' not in rumor:
rumor['class'] = 'Unknown'
if 'class_description' not in rumor:
rumor['class_description'] = 'Unknown'
# Add an edge to the graph with 'sexual_summary' as an attribute
G.add_edge(
rumor["heard_from"],
rumor["heard_person"],
label=rumor["_key"],
content=rumor["sexual_summary"],
class_=rumor["class"],
class_description=rumor["class_description"],
)
heards_froms = set([rumor["heard_from"] for rumor in rumors])
heard_persons = set([rumor["heard_person"] for rumor in rumors])
all_nodes = list(heards_froms.union(heard_persons))
q = "for doc in persons filter doc.name in @all_nodes return doc"
persons = list(arango.db.aql.execute(q, bind_vars={"all_nodes": all_nodes}))
G.add_nodes_from(
[
(
person["name"],
{"_key": json.dumps(person["_key"]), "info": json.dumps(person["info"])},
)
for person in persons
]
)
# Write the graph to a GEXF file
current_time = datetime.datetime.now().strftime("%H-%M-%S")
filename = f"output_files/rumors_{current_time}.gexf"
nx.write_gexf(G, filename)
print(len(rumors))
print(not_heard_from)