diff --git a/facebook/__main__.py b/facebook/__main__.py index 68a173b..13ab25d 100644 --- a/facebook/__main__.py +++ b/facebook/__main__.py @@ -20,6 +20,7 @@ if __name__ == "__main__": write = True mode = 'all' pwd = None + proxieservers = 'mullvad' argv = argv[1:] @@ -60,6 +61,8 @@ if __name__ == "__main__": mode_nr = 1.7 elif mode == "few": mode_nr = 1.4 + elif mode == "solo": + mode_nr = 1.4 elif mode == "force": mode_nr = 1 @@ -120,117 +123,105 @@ if __name__ == "__main__": # Gå igenom de användare som efterfrågats - while True: - - if lookups == "leak_lookups": - id = user.username - check_profile_status(profile, user) - if profile.blocked: - profile = blocked_profile(profile, proxieservers=proxieservers) - profile.open(url_bas + "/" + user.username) - url = profile.browser.state.url.strip("/").strip("?_rdr") - if "php?" not in url: - user = User(str(url[url.rfind("/") + 1 :]).strip(), mode) - user.id = id - sleep_(4) - container = str(user.username) - profile.container = container - - if "container" not in globals(): - container = str(user.username) - profile.container = container - - profile.users_checked += 1 - - # Hämta reaktioner för den första användaren - if any([not check_for_user(user.username, mode=mode), mode == "force"]): - try: - while True: - # Uppdatera in_use - profile.update_time() - profile = profile_picture_reactions( - profile, user, first_user=True, mode=mode - ) - if profile.blocked: - profile = blocked_profile(profile, proxieservers=proxieservers) - else: - break - except: - _print(profile, user, traceback.format_exc()) - - friends = friends_of_user(user.username) - _print(profile, user, f"\nKlar med, {user.username}\n") - _print(profile, user, f"Vänner som reagerat: {len(friends)}") - _print(profile, user, "\nVänner att kolla:") - - friends_unchecked = [] - for friend in friends: - if not check_for_user(friend): - print(friend) - friends_unchecked.append(friend) - - _print(profile, user, [friends_unchecked], silent=True) - _print(profile, user, f'Totalt: {len(friends_unchecked)}') - print() - - # Hämta reaktioner för users vänner (som reagerat) - count_friends = 0 - for friend in friends_unchecked: - if datetime.now().strftime("%H") == '03' and int(datetime.now().strftime("%M")) < 30: # Sov för att kunna säkerhetskopieraa - sleep(1800) - count_friends += 1 - user = User(str(friend), mode, other_pictures=[]) - sleep_(2) - - # Uppdatera in_use - profile.update_time() - try: - if not check_for_user(user.username): - p = profile_picture_reactions(profile, user, mode=mode) - if isinstance(p, Profile): - profile = p - - except Exception as e: # Fel4 - write_error( - 4, - profile, - e=e, - user=user, - traceback=traceback.format_exc(), - soup=profile.viewing(), + if lookups == "leak_lookups": + id = user.username + check_profile_status(profile, user) + if profile.blocked: + profile = blocked_profile(profile, proxieservers=proxieservers) + profile.open(url_bas + "/" + user.username) + url = profile.browser.state.url.strip("/").strip("?_rdr") + if "php?" not in url: + user = User(str(url[url.rfind("/") + 1 :]).strip(), mode) + user.id = id + sleep_(4) + container = str(user.username) + profile.container = container + + if "container" not in globals(): + container = str(user.username) + profile.container = container + + profile.users_checked += 1 + + # Hämta reaktioner för den första användaren + if any([not check_for_user(user.username, mode=mode), mode == "force"]): + try: + while True: + # Uppdatera in_use + profile.update_time() + profile = profile_picture_reactions( + profile, user, first_user=True, mode=mode ) - _print(profile, user, f"\nFel: {str(user.username)}\n") - sleep_(15) - - if not profile.blocked: - _print(profile, user, f"Klar med {user.username} \n") - - # Rotera fb-profiler - if count_friends > 2 * mode_nr: - if random.randrange(0, 2, 1) == 1: - profile = new_profile(container, proxieservers=proxieservers) - count_friends = 0 - _print(profile, user, f"Växlar till {profile.name}") - elif count_friends > 4 * mode_nr: + if profile.blocked: + profile = blocked_profile(profile, proxieservers=proxieservers) + else: + break + except: + _print(profile, user, traceback.format_exc()) + + if mode == 'solo': + exit() + + friends = friends_of_user(user.username) + _print(profile, user, f"\nKlar med, {user.username}\n") + _print(profile, user, f"Vänner som reagerat: {len(friends)}") + _print(profile, user, "\nVänner att kolla:") + + friends_unchecked = [] + for friend in friends: + if not check_for_user(friend): + print(friend) + friends_unchecked.append(friend) + + _print(profile, user, [friends_unchecked], silent=True) + _print(profile, user, f'Totalt: {len(friends_unchecked)}') + print() + + # Hämta reaktioner för users vänner (som reagerat) + count_friends = 0 + for friend in friends_unchecked: + if datetime.now().strftime("%H") == '03' and int(datetime.now().strftime("%M")) < 30: # Sov för att kunna säkerhetskopieraa + sleep(1800) + count_friends += 1 + user = User(str(friend), mode, other_pictures=[]) + sleep_(2) + + # Uppdatera in_use + profile.update_time() + try: + if not check_for_user(user.username): + p = profile_picture_reactions(profile, user, mode=mode) + if isinstance(p, Profile): + profile = p + + except Exception as e: # Fel4 + write_error( + 4, + profile, + e=e, + user=user, + traceback=traceback.format_exc(), + soup=profile.viewing(), + ) + _print(profile, user, f"\nFel: {str(user.username)}\n") + sleep_(15) + + if not profile.blocked: + _print(profile, user, f"Klar med {user.username} \n") + + # Rotera fb-profiler + if count_friends > 2 * mode_nr: + if random.randrange(0, 2, 1) == 1: profile = new_profile(container, proxieservers=proxieservers) count_friends = 0 _print(profile, user, f"Växlar till {profile.name}") + elif count_friends > 4 * mode_nr: + profile = new_profile(container, proxieservers=proxieservers) + count_friends = 0 + _print(profile, user, f"Växlar till {profile.name}") - elif profile.blocked: - profile = blocked_profile(profile, proxieservers=proxieservers) - - _print(profile, None, f"Klar med alla vänner.") - sleep(3) + elif profile.blocked: + profile = blocked_profile(profile, proxieservers=proxieservers) - # Hämta ny användare från databasen när alla är genomgångna - while True: - new_user = get_user(collection=lookups) - print(new_user) - _print(profile, None, f"Ny user hämtad") - if new_user == None: - sleep(300) - _print(profile, None, "Väntar på ny user.") - else: - user = User(str(new_user["_key"]), mode) - _print(profile, user, f"Förberett ny user: {user.username}") - break + _print(profile, None, f"Klar med alla vänner.") + diff --git a/facebook/arangodb.py b/facebook/arangodb.py index 9e119a5..e1fa92a 100644 --- a/facebook/arangodb.py +++ b/facebook/arangodb.py @@ -3,6 +3,7 @@ from random import randint from time import sleep import json from datetime import datetime +from json2html import json2html from arango import ArangoClient @@ -17,7 +18,7 @@ for i in range(0, 6, 1): with open("../password_arango.txt") as f: pwd = f.readline() except FileNotFoundError: - if pwd == None: + if 'pwd' not in globals(): pwd = getpass(f'Lösenord för {user_arango}: ') try: @@ -74,12 +75,10 @@ def report_blocked(profile): _print(profile, profile.container, f'Kunde inte rapportera blockerad: {profile.name}.') -def get_profile(db=db, proxieservers='mullvad', collection='profiles'): +def get_profile(db=db, collection='mullvad'): """ Hämtar profil från profiles """ - if proxieservers != 'mullvad': - collection = f'profiles_{proxieservers}' #TODO Byt namn på profiles till profiles_mullvad i DB - + while True: cursor = db.aql.execute( """ @@ -87,14 +86,12 @@ def get_profile(db=db, proxieservers='mullvad', collection='profiles'): FILTER doc.in_use < @inuse RETURN doc """, - bind_vars={"inuse": nowstamp() - 1200, '@col': collection} + bind_vars={"inuse": nowstamp() - 1200, '@col': f'profiles_{collection}'} ) profiles = [profile for profile in cursor] if profiles == []: sleep(180) - if proxieservers=='test': # Om det är ett test - profile = profiles[0] else: profile = profiles[randint(0, len(profiles) - 1)] return profile @@ -113,10 +110,11 @@ def friends_of_user(user): return [doc[8:] for doc in cursor] -def remove_profile(profile): +def remove_profile(profile, proxieservers='mullvad'): """ Tar bort en blockerad profil från databasen. """ _print(profile, None, f"Tar bort {profile.name}.") - db.collection("profiles").delete( + + db.collection(f'profiles_{proxieservers}').delete( profile.doc["_key"], silent=True, ignore_missing=True ) _print(profile, profile.container, f"{profile.name} blockerad och borttagen {now()}.") @@ -124,7 +122,6 @@ def remove_profile(profile): # TODO #2 Bättre funktion för backup av databasen - def arango_connect(pwd): return ArangoClient(hosts=host_arango).db( db_arango, username=user_arango, password=pwd @@ -244,6 +241,15 @@ def write_stats(continuous=False): d['_key'] = now()[:13] db.insert_document( "stats", d, overwrite=True) + + # Skriv en html-fil + with open('webbapp/templates/stats.html', 'a+') as html: + html.truncate(0) + html.write('
') + + html.write(json2html.convert(json = d)) + + # Sov för att fortsätta senare if continuous: sleep(86400) else: @@ -254,7 +260,7 @@ def blocked_profile(profile, proxieservers): _print(profile, None, f'Rapporterar att {profile.name} blockats.') report_blocked(profile) _print(profile, None, f'Tar bort {profile.name} från databasen.') - remove_profile(profile) + remove_profile(profile, proxieservers) _print(profile, None, f'Hämtar en ny profil.') profile = new_profile(profile.container, proxieservers) return profile diff --git a/facebook/config.py b/facebook/config.py index 7e018b6..c84e19e 100644 --- a/facebook/config.py +++ b/facebook/config.py @@ -1,6 +1,8 @@ - -def set_pwd(_pwd): +from getpass import getpass +def set_pwd(_pwd=None): global pwd + if _pwd == None: + _pwd = getpass('Lösenord för Arango-användaren:') pwd = _pwd # Info för arangodb diff --git a/facebook/images_pi.py b/facebook/images_pi.py index e7bec63..660773d 100644 --- a/facebook/images_pi.py +++ b/facebook/images_pi.py @@ -32,24 +32,24 @@ def download_image(url, user, id): elif r.status_code == 403: exit() - image_name = f"profile_pictures/{user}/{id}.jpg" + image_name = f"/ssd/profile_pictures/{user}/{id}.jpg" img_data = r.content with open(image_name, "wb") as handler: handler.write(img_data) - nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}/{id}.jpg" + #nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}/{id}.jpg" - headers = {"Content-type": "image/jpeg", "Slug": "heart"} - while True: - try: - r = requests.put( - nc_path, data=open(image_name, "rb"), headers=headers, auth=auth, verify=False - ) - break + # headers = {"Content-type": "image/jpeg", "Slug": "heart"} + # while True: + # try: + # r = requests.put( + # nc_path, data=open(image_name, "rb"), headers=headers, auth=auth, verify=False + # ) + # break - except: - print('Kunde inte ladda upp', nc_path) - sleep(5) + # except: + # print('Kunde inte ladda upp', nc_path) + # sleep(5) print(f"{user}\t{id}\t{r.status_code}") @@ -66,22 +66,26 @@ def get_pictures(day): bind_vars={"date": str(day)}, ) + # Skapa en lista med bilder att gå igenom. + images = [] for doc in cursor: + images.append(doc) + for doc in images: user = doc["member"] - # Skapa mapp för användarens bilder på NC... - nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}" - while True: - try: - requests.request("MKCOL", nc_path, verify=False, auth=auth) - break - except: - print('Kunde inte skapa', nc_path) - sleep(5) + # # Skapa mapp för användarens bilder på NC... + # nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}" + # while True: + # try: + # requests.request("MKCOL", nc_path, verify=False, auth=auth) + # break + # except: + # print('Kunde inte skapa', nc_path) + # sleep(5) # ...och på datorn (för backup) - if not os.path.isdir(f"profile_pictures/{user}"): - os.mkdir(f"profile_pictures/{user}") + if not os.path.isdir(f"/ssd/profile_pictures/{user}"): + os.mkdir(f"/ssd/profile_pictures/{user}") pictures = [] for picture in doc["pictures"]: @@ -121,14 +125,14 @@ def get_pictures(day): if __name__ == '__main__': # Info för arangodb - user_arango = "Pi" + user_arango = "Lasse" db_arango = "facebook" - host_arango = "http://192.168.0.3:8529" + host_arango = "http://192.168.0.4:8529" # Starta koppling till arangodb # Avkryptera lösen till arango - pwd = getpass("Arangolösenord för Pi: ") + pwd = getpass(f"Arangolösenord för {user_arango}: ") db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd) @@ -156,9 +160,6 @@ if __name__ == '__main__': "se27-wg.socks5.mullvad.net:1080", "se28-wg.socks5.mullvad.net:1080", ] - - if not os.path.isdir("profile_pictures"): - os.mkdir("profile_pictures") while True: today = date.today().strftime('%Y%m%d') diff --git a/facebook/scrapers.py b/facebook/scrapers.py index 3795536..0f46bfc 100644 --- a/facebook/scrapers.py +++ b/facebook/scrapers.py @@ -157,7 +157,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): if first_user == False: if mode == "single" and user.reactions > 30: break - elif all([mode == "few", user.reactions > 80, pic != url_pics[-1]]): + elif all([any[mode == "few", mode == "solo"], user.reactions > 80, pic != url_pics[-1]]): # Kolla den sista bilder check_picture(url_bas + url_pics[-1], user, profile) user.checked_pictures.append(url_bas + pic) diff --git a/facebook/stats.py b/facebook/stats.py new file mode 100644 index 0000000..bcf653f --- /dev/null +++ b/facebook/stats.py @@ -0,0 +1,74 @@ +from datetime import datetime +from getpass import getpass +from time import sleep + +from arango import ArangoClient +from json2html import json2html + + +def now(): + """ Returns current date and time as string""" + return datetime.now().strftime("%Y-%m-%d_%H:%M:%S") + +def write_stats(db, continuous=False): + while True: + d = {} + for col in db.collections(): + if not col['system']: + d[col['name']] = db.collection(col['name']).count() + del d['stats'] + #d['time'] = now() + cursor = db.aql.execute( + """ + FOR doc IN members + FILTER doc.checked == true + COLLECT WITH COUNT INTO length + RETURN length + """ + ) + d['checked_members'] = cursor.next() + + + # Hur många konton per säljare som finns kvar + cursor = db.aql.execute( + ''' + for doc in profiles + filter has(doc, "vendor") + COLLECT vendor = doc.vendor WITH COUNT INTO length + RETURN { + "vendor" : vendor, + "active" : length + } + ''') + d['active_vendors'] = [doc for doc in cursor] + + d['_key'] = now()[:13] + db.insert_document( "stats", d, overwrite=True) + + # Skriv en html-fil + with open('website/fb-webbapp/stats.html', 'a+') as html: + html.truncate(0) + html.write('
') + + html.write(json2html.convert(json = d)) + + # Sov för att fortsätta senare + if continuous: + sleep(86400) + else: + break + +# Info för arangodb +user_arango = "Lasse" +db_arango = "facebook" +host_arango = "http://192.168.0.4:8529" + +# Starta koppling till arangodb +# Avkryptera lösen till arango +pwd = getpass(f'Arangolösenord för {user_arango}:').strip() + +db = ArangoClient(hosts=host_arango).db( + db_arango, username=user_arango, password=pwd +) + +write_stats(db, continuous=True) diff --git a/fb-webbapp/main.py b/fb-webbapp/main.py new file mode 100644 index 0000000..8a562e5 --- /dev/null +++ b/fb-webbapp/main.py @@ -0,0 +1,14 @@ +from flask import Flask, render_template +import json +from json2html import json2html + +app = Flask(__name__) + + +@app.route("/") +def stats(): + return render_template("stats.html") + +if __name__ == "__main__": + app.run(debug=True) + diff --git a/requirements.txt b/requirements.txt index d58913b..47443c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,4 @@ soupsieve==2.2 toml==0.10.2 urllib3==1.26.3 Werkzeug==1.0.1 +json2html diff --git a/workspace.code-workspace b/workspace.code-workspace index 0495218..189cad0 100644 --- a/workspace.code-workspace +++ b/workspace.code-workspace @@ -1,16 +1,17 @@ { - "folders": [ - { - "path": "." - }, - { - "path": "facebook" - }, - { - "path": "../mrkoll" - } - ], - "settings": { - "python.pythonPath": "/Users/Lasse/.pyenv/versions/3.9.5/bin/python" - } + "folders": [ + { + "path": "." + }, + { + "path": "../mrkoll" + }, + { + "path": "facebook" + } + ], + "settings": { + "python.pythonPath": "/Users/Lasse/Datorgemensamt/Programmeringsprojekt/Facebook/fb-scraper/.venv/bin/python" + }, + } \ No newline at end of file