diff --git a/facebook/__main__.py b/facebook/__main__.py index 1539d12..3af1d13 100644 --- a/facebook/__main__.py +++ b/facebook/__main__.py @@ -3,21 +3,37 @@ import traceback from getopt import GetoptError, getopt from sys import argv, exit from time import sleep +from subprocess import check_output +from re import split +from socket import gethostname -import arangodb -from arangodb import db, write_report, backup, report_blocked +from arangodb import db, write_report, backup, report_blocked, get_profile, remove_profile, checked_members, friends_of_user from classes import Profile, User -from helpers import sleep_, write_error +from helpers import sleep_, write_error, _print from scrapers import profile_picture_reactions -# import werkzeug -# werkzeug.cached_property = werkzeug.utils.cached_property -# from arango import ArangoClient +def finish(): + """ Avslutar: skriver rapport och gör profilerna oanvända """ + for profile in profiles: + profile.unused() + write_report(users, list(all_pictures.difference(all_pictures_start))) + exit() if __name__ == "__main__": print() + + if gethostname() not in ['macbook.local']: # Lägg till för studiodatorn + # Hämta namn för containern där skriptet körs + try: + containers = check_output(['docker', 'container', 'ls']).decode() + container = split('\W\W+', containers.split('\n')[1])[-1] + except FileNotFoundError: + pass + else: + container_name = 'macbook' + # Argument och alternativ argv = argv[1:] try: @@ -30,16 +46,21 @@ if __name__ == "__main__": mode_nr = 1.7 elif mode == 'few': mode_nr = 1.4 + elif mode == 'force': + mode_nr = 1 else: mode = 'all' mode_nr = 1 for o, a in opts: if o in ["-u", "--user"]: - users = [ - User(str(i).strip(), mode) - for i in [(str(i).strip()) for i in a.split(",")] - ] + try: + users = [ + User(str(i).strip(), mode) + for i in [(str(i).strip()) for i in a.split(",")] + ] + except StopIteration: + raise Exception if o in ["-o", "--other"]: url_other_picture = a if o in ['-b', '--backup']: @@ -72,14 +93,20 @@ if __name__ == "__main__": print("-", user.username) print() + if 'container' not in globals(): + usernames = [user.username for user in users] + if len(usernames) == 1: + container = usernames[0] + else: + container = '-'.join(usernames) + # Skapa tre olika profiler att besöka Facebook med profiles = [] for i in range(0, 3): - doc = arangodb.get_profile() - profile = Profile(doc) + doc = get_profile() + profile = Profile(doc, container) profile.browser.open("https://api.ipify.org") - print( - f"Profil {profile.name} använder IP-adress {profile.viewing().text}." + print(f"Profil {profile.name} använder IP-adress {profile.viewing().text}." ) if profile.logged_in == False: profile.accept_cookies() @@ -92,35 +119,37 @@ if __name__ == "__main__": profile_nr = 1 profile = profiles[profile_nr] - print("Börjar med profilen", profile.name) + _print(profile.container, user.username, f"Börjar med profilen {profile.name}") # Gå igenom de användare som efterfrågats - try: + try: while True: for user in users: # Set för kollade bilder och kollade medlemmar all_pictures = set([doc["_key"] for doc in db.collection("pictures").all()]) all_pictures_start = all_pictures.copy() - members_checked = arangodb.checked_members() + members_checked = checked_members() + profile.container = user.username - if user.username not in members_checked:# Hämta reaktioner för den första användaren LÄGG TILL NOT IN MEMBERS_CHECKED + # Hämta reaktioner för den första användaren + if any([user.username not in members_checked, mode == 'force']): try: profile_picture_reactions(profile, user, all_pictures, first_user=True, mode=mode) except: - print(traceback.format_exc()) + _print(profile.container, user.username, traceback.format_exc()) if len(users) == 1: for profile in profiles: profile.unused() - friends = arangodb.friends_of_user(user.username) + friends = friends_of_user(user.username) friends_unchecked = list(set(friends) - set(members_checked)) - # Här följer cookien med så att vi fortfarnade är inloggade - print("\nKlar med", user.username, "\n") - print("Vänner som reagerat:", len(friends)) - print("Vänner att kolla:") + _print(profile.container, user.username, f"\nKlar med, {user.username}\n") + _print(profile.container, user.username, f"Vänner som reagerat: {len(friends)}") + _print(profile.container, user.username, "\nVänner att kolla:") for friend in friends_unchecked: print(friend) + _print(profile.container, user.username, ', '.join([friend for friend in friends_unchecked]), silent=True) print() # Hämta reaktioner för users vänner (som reagerat) @@ -139,25 +168,26 @@ if __name__ == "__main__": 4, e=e, user=user.username, + profile=profile.container, traceback=traceback.format_exc(), soup=profile.viewing(), ) - print("\nFel: ", str(user.username), "\n") + _print(profile.container, user.username, f"\nFel: {str(user.username)}\n") sleep_(15) if profile.blocked == False: - print("Klar med", user.username, "\n") + _print(profile.container, user.username, f"Klar med {user.username} \n") # Rotera fb-profiler if count_friends > 5 * mode_nr: if random.randrange(0, 2, 1) == 1: profile_nr += 1 count_friends = 0 - print("Växlar till", profiles[profile_nr].name) + _print(profile.container, user.username, f"Växlar till {profiles[profile_nr].name}") elif count_friends > 9 * mode_nr: profile_nr += 1 count_friends = 0 - print("Växlar till", profiles[profile_nr].name) + _print(profile.container, user.username, f"Växlar till {profiles[profile_nr].name}") if profile_nr > len(profiles) - 1: profile_nr = 0 @@ -165,31 +195,27 @@ if __name__ == "__main__": elif profile.blocked == True: # Ta bort profilen ur databasen report_blocked(profile, users) - arangodb.remove_profile(profile.doc) + remove_profile(profile.doc) # Ta bort från listan på fb-profiler som används profiles.remove(profile) # Försök lägga till en ny fb-profil (om det finns en skapad och ledig i databasen) try: - profiles[profile_nr] = Profile(new=True) - print("Laddat ny profil:", profiles[profile_nr].name) + doc = get_profile() + profiles[profile_nr] = Profile(doc, container) + _print(profile.container, user.username, f"Laddat ny profil: {profiles[profile_nr].name}") sleep(3) except e: - print("Det behövs nya profiler...") + _print(profile.container, user.username, "Det behövs nya profiler...") if len(profiles) == 0: break for s in range(0, 1600 / len(profiles)): - print(f"Sover {600-s} sekunder till... ", end="\r") + print(user, f"Sover {600-s} sekunder till... ", end="\r") profile_nr += 1 - print(f"Försöker med {profiles[profile_nr].name}.") + _print(profile.container, user.username, f"Försöker med {profiles[profile_nr].name}.") profile = profiles[profile_nr] except: - pass - - # Gör profilerna oanvända - for profile in profiles: - profile.unused() - write_report(users, list(all_pictures.difference(all_pictures_start))) - exit() \ No newline at end of file + finish() + \ No newline at end of file diff --git a/facebook/arangodb.py b/facebook/arangodb.py index 2f2a6d2..5e24e6d 100644 --- a/facebook/arangodb.py +++ b/facebook/arangodb.py @@ -32,7 +32,7 @@ except FileNotFoundError: db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd) -from helpers import now +from helpers import now, _print def checked_members(): @@ -106,7 +106,7 @@ def friends_of_user(user): def remove_profile(profile): db.collection("profiles").delete(profile['_key'], silent=True, ignore_missing=True) - _print(profile.name, f'{profile.name} blockerad och borttagen {now()}.' + _print(profile.container, f'{profile.name} blockerad och borttagen {now()}.' ) # TODO #2 Bättre funktion för backup av databasen diff --git a/facebook/scrapers.py b/facebook/scrapers.py index 0f0c5d0..185d0b2 100644 --- a/facebook/scrapers.py +++ b/facebook/scrapers.py @@ -4,10 +4,12 @@ import traceback from arangodb import db from classes import Friend, Picture, Reaction from config import * -from helpers import sleep_, update_cookie, write_error +from helpers import sleep_, update_cookie, write_error, _print -def profile_picture_reactions(profile, user, all_pictures, first_user=False, mode = 'all'): +def profile_picture_reactions( + profile, user, all_pictures, first_user=False, mode="all" +): # Fixa url:er osv if user.username.isnumeric(): @@ -22,20 +24,20 @@ def profile_picture_reactions(profile, user, all_pictures, first_user=False, mod profile.browser.open(user.url_photos) sleep_(4) - + if ( """You can't use Facebook because your account, or activity on it, doesn't follow our Community Standards.""" in profile.viewing().text - ): - print("{} blocked\n".format(profile.name).upper()) + ): + _print(profile.container, user.username, f"{profile.name} blocked\n".upper()) profile.blocked = True - return None + return "blocked" - elif 'accept all' in profile.viewing().text.lower(): + elif "accept all" in profile.viewing().text.lower(): profile.accept_cookies() profile.browser.open(user.url_photos) - - user.name = user.username # Om inte namnet hittas senare + + user.name = user.username # Om inte namnet hittas senare try: for i in profile.viewing().find_all("strong"): if "Notifications" in str(i): @@ -47,21 +49,18 @@ def profile_picture_reactions(profile, user, all_pictures, first_user=False, mod 6, e=e, traceback=traceback.format_exc(), + profile=profile.container, soup=profile.viewing(), - user=user.username, + user=user, url=user.url_photos, ) if first_user == True: - print(profile.viewing().prettify()) + _print(profile.container, user.username, profile.viewing().prettify()) exit() - print( - "Hämtar reaktioner på profilbilder för {name} ({user})".format( - name=user.name, user=user.username - ) - ) + _print(profile.container, user.username, f"Hämtar reaktioner på profilbilder för {user.name} ({user.username})") # Hitta länk till olika saker hos användarem, inkl facebook-id - + for a in profile.viewing().find_all("a", href=True): if "Profile pictures" in a.text: user.url_album = url_bas + a["href"] # Länk till album för profilbulder @@ -80,35 +79,42 @@ def profile_picture_reactions(profile, user, all_pictures, first_user=False, mod user.url_timeline = url_bas + a["href"] if "Cover photos" in a.text: user.url_coverphotos = url_bas + a["href"] - + # Om det inte finns något profilalbum # Testa ta bort mellanrum och små bokstäver if not hasattr(user, "url_album"): for a in profile.viewing().find_all("a", href=True): - if "profilepictures" in a.text.lower().replace(' ', ''): + if "profilepictures" in a.text.lower().replace(" ", ""): user.url_album = url_bas + a["href"] - + user.add_to_db() # Gå till profilbilden (den första som kommer upp när man går till profilen) - - if not hasattr(user, "url_album"): # Om profilen inte har profilalbum - write_error(9, soup=profile.viewing(), user=user.username) - if user.url_other_picture != '': + + if not hasattr(user, "url_album"): # Om profilen inte har profilalbum + write_error(9, soup=profile.viewing(), user=user, profile=profile.container) + if user.url_other_picture != "": # Använd eventuell extrabild och ta bort den från användaren url_pics = [user.url_other_picture] - user.url_other_picture = '' + user.url_other_picture = "" else: # Spara ner profilen till databasen och avsluta sökningen på användaren user.url_album = False if first_user == False: user.checked() user.add_to_db() - print('Hittar inget album för profilbilder.') - write_error(7, soup=profile.viewing(), user=user.username, url=user.url_album, url_name='user.url_album') + _print(profile.container, user.username, "Hittar inget album för profilbilder.") + write_error(#fel7 + 7, + soup=profile.viewing(), + profile=profile.container, + user=user, + url=user.url_album, + url_name="user.url_album", + ) return None # ATT GÖRA Här kan andra bilder väljas istället - else: # Normalfallet där användaren har profilbildsalbum + else: # Normalfallet där användaren har profilbildsalbum profile.browser.open(user.url_album) # Samla alla profilbilder i en lista @@ -117,10 +123,10 @@ def profile_picture_reactions(profile, user, all_pictures, first_user=False, mod for i in pics.find_all("a"): a = i["href"] url_pics.append(a[: a.find("&id")]) - if user.url_other_picture != '': + if user.url_other_picture != "": # Lägg till eventuell extrabild och ta bort den från användaren url_pics.append(user.url_other_picture) - user.url_other_picture = '' + user.url_other_picture = "" try: user.profile_pictures = len(url_pics) except: @@ -132,156 +138,198 @@ def profile_picture_reactions(profile, user, all_pictures, first_user=False, mod user.add_to_db() # Välj vilja bilder som ska kollas. - if first_user== False: - if mode == 'single': + if first_user == False: + if mode == "single": url_pics = url_pics[:1] - elif mode == 'few' and len(url_pics) > 1: + elif mode == "few" and len(url_pics) > 1: url_pics = url_pics[:1] + url_pics[-1:] # Gå igenom valda bilder. for pic in url_pics: # Skriv ut vilken bild som behandlas. - print(f"Bild {url_pics.index(pic) + 1} av {user.profile_pictures}", end="\r",) + _print(profile.container, user.username, + f"Bild {url_pics.index(pic) + 1} av {user.profile_pictures}", + end="\r", + ) + check_picture(url_bas + pic, user, profile) - picture = Picture(user.username) - picture.url = url_bas + pic - picture.id = str(picture.url[picture.url.find("fbid=") + 5 :]) - try: - picture.id = str(re.search('\d+', picture.id).group()) - except: - pass - # if picture.id in all_pictures: - # continue - sleep_(5) + # Välj vilja bilder som ska kollas. + if first_user == False: + if mode == "single" and user.reactions > 30: + break + elif all([mode == "few", user.reactions > 50, pic != url_pics[-1]]): + # Kolla den sista bilder + check_picture(url_bas + url_pics[-1], user, profile) + break + user.checked() - try: - profile.browser.open(picture.url) - except Exception as e: # Fel3 - write_error( - 3, - e=e, - soup=profile.viewing(), - user=user.username, - url=picture.url, - url_name="url_pic", - traceback=traceback.format_exc(), - ) - update_cookie(profile.browser.session.cookies, profile.name) +def check_picture(url_picture, user, profile): + """ Hämtar reaktioner för en bildprint """ - # Hitta info om bilden - try: - picture.date = profile.viewing().find("abbr").text - except Exception as e: # Fel8 - write_error(8, e=e, soup=profile.viewing(), url=pic, url_name='picture url', user=user.name, traceback=traceback.format_exc()) - # TODO #3 lägg till fler bilder som kan gås igenom om det är få profilbilder. + picture = Picture(user.username) + picture.url = url_picture + picture.id = str(picture.url[picture.url.find("fbid=") + 5 :]) + try: + picture.id = str(re.search("\d+", picture.id).group()) + except: + pass + # if picture.id in all_pictures: + # continue + sleep_(5) + + try: + profile.browser.open(picture.url) + except Exception as e: # Fel3 + write_error( + 3, + e=e, + profile=profile.container, + soup=profile.viewing(), + user=user, + url=picture.url, + url_name="url_pic", + traceback=traceback.format_exc(), + ) + + update_cookie(profile.browser.session.cookies, profile.name) + + # Hitta info om bilden + try: + picture.date = profile.viewing().find("abbr").text + except Exception as e: # Fel8 + write_error( + 8, + e=e, + soup=profile.viewing(), + profile=profile.container, + url=picture.url, + url_name="picture url", + user=user, + traceback=traceback.format_exc(), + ) + # TODO #3 lägg till fler bilder som kan gås igenom om det är få profilbilder. - # Hämta länkar för bilden att anvrända sen - #print(profile.viewing().prettify()) + # Hämta länkar för bilden att anvrända sen + # _print(profile.container, user.username, profile.viewing().prettify()) + for a in profile.viewing().find_all("a", href=True): + if all( + [ + "reaction" in a["href"], + "reactions" not in a["href"], + "=R" not in a["href"], + ] + ): + url_reactions = url_bas + str( + a["href"] + ) # Länk till reaktionerna för bilden + elif a.text == "View full size": + pic = url_bas + a["href"] + picture.url_full = pic[ + : pic.find("&") + ] # Den fullständiga adressen till bilden, används som _key i pictures + if "url_reactions" not in locals(): for a in profile.viewing().find_all("a", href=True): - if all( - [ - "reaction" in a["href"], - "reactions" not in a["href"], - "=R" not in a["href"], - ] - ): - url_reactions = url_bas + str(a["href"]) # Länk till reaktionerna för bilden - elif a.text == "View full size": - pic = url_bas + a["href"] - picture.url_full = pic[ - : pic.find("&") - ] # Den fullständiga adressen till bilden, används som _key i pictures - if 'url_reactions' not in locals(): - for a in profile.viewing().find_all("a", href=True): - if '/likes/' in a["href"]: - url_reactions = url_bas + str(a["href"]) - if 'url_reactions' not in locals(): - for div in profile.viewing().find_all("div", href=True): - if 'like this' in div.text: - url_reactions = url_bas + str(div["href"]) - - # Hämta reaktioner för bilden - sleep_(3) - - profile.browser.open(url_reactions) + if "/likes/" in a["href"]: + url_reactions = url_bas + str(a["href"]) + if "url_reactions" not in locals(): + for div in profile.viewing().find_all("div", href=True): + if "like this" in div.text: + url_reactions = url_bas + str(div["href"]) - update_cookie(profile.browser.session.cookies, profile.name) + # Hämta reaktioner för bilden + sleep_(3) - try: - for a in profile.viewing().find_all("a", {"class": "z ba"}, href=True): - url_limit = a["href"] + profile.browser.open(url_reactions) - picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1) - limit = re.search(r"limit=(\d+)", url_limit).group(1) # TODO Fortfarande problem med det här - except UnboundLocalError: #fel9 - write_error(9, soup=profile.viewing(), traceback=traceback.format_exc(), url=url_reactions, url_name='url_reactions') - # Bilder med väldigt många likes går inte att visa så här? - continue + update_cookie(profile.browser.session.cookies, profile.name) - # Addera bilden till arrango - picture.add_to_db() + try: + for a in profile.viewing().find_all("a", {"class": "z ba"}, href=True): + url_limit = a["href"] - url_limit = url_bas + url_limit.replace( - "limit=" + str(limit), "limit=" + str(picture.no_reactions) + picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1) + limit = re.search(r"limit=(\d+)", url_limit).group( + 1 + ) # TODO Fortfarande problem med det här + except UnboundLocalError: # fel9 + write_error( + 9, + user=user, + profile=profile.container, + soup=profile.viewing(), + traceback=traceback.format_exc(), + url=url_reactions, + url_name="url_reactions", ) + # Bilder med väldigt många likes går inte att visa så här? + return None - try: - sleep_(4) - profile.browser.open(url_limit) - url_limit = '' - update_cookie(profile.browser.session.cookies, profile.name) - - - # Gå igenom alla som reagerat och för in i arango - for li in profile.viewing().find_all("li"): - friend = Friend(user.username, mode) - if "see more" in li.text.lower(): - continue - try: - friend_html = li.find("h3").find("a") - friend.name = friend_html.text - friend.url = friend_html["href"] - if "profile.php" in friend.url: - friend.username = friend.url[friend.url.find("id=") + 3 :] - else: - friend.username = friend.url[friend.url.find("/") + 1 :] - - reaction = Reaction(user.username, friend.username, picture.id) - for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: - if type in str(li): - reaction.type = type - picture.reactions.append(reaction.get_dict()) - # Lägg till vännens profil till arrango - friend.add_to_db() - - except AttributeError as e: # Fel1 - write_error( - 1, - e=e, - soup=str(li), - user=user.username, - traceback=traceback.format_exc(), - ) - pass - - # Lägg till reaktion till databasen - db.collection("picture_reactions").insert_many( - picture.reactions, silent=True, overwrite=True - ) - db.collection("picture_reactions").insert_many(picture.reactions, silent=True, overwrite=True) - except Exception as e: # Fel2 - write_error( - 2, - e=e, - soup=profile.viewing(), - user=user.username, - url=url_limit, - url_name="url_limit", - traceback=traceback.format_exc(), - ) - pass + # Addera bilden till arrango + picture.add_to_db() - user.checked() + url_limit = url_bas + url_limit.replace( + "limit=" + str(limit), "limit=" + str(picture.no_reactions) + ) + + try: + sleep_(4) + profile.browser.open(url_limit) + url_limit = "" + update_cookie(profile.browser.session.cookies, profile.name) - \ No newline at end of file + # Gå igenom alla som reagerat och för in i arango + for li in profile.viewing().find_all("li"): + friend = Friend(user.username) + if "see more" in li.text.lower(): + continue + try: + friend_html = li.find("h3").find("a") + friend.name = friend_html.text + friend.url = friend_html["href"] + if "profile.php" in friend.url: + friend.username = friend.url[friend.url.find("id=") + 3 :] + else: + friend.username = friend.url[friend.url.find("/") + 1 :] + + reaction = Reaction(user.username, friend.username, picture.id) + for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: + if type in str(li): + reaction.type = type + picture.reactions.append(reaction.get_dict()) + # Lägg till vännens profil till arrango + friend.add_to_db() + + except AttributeError as e: # Fel1 + write_error( + 1, + e=e, + soup=str(li), + user=user, + profile=profile.container, + traceback=traceback.format_exc(), + ) + pass + + # Lägg till reaktioner till databasen + db.collection("picture_reactions").insert_many( + picture.reactions, silent=True, overwrite=True + ) + db.collection("picture_reactions").insert_many( + picture.reactions, silent=True, overwrite=True + ) + + # Uppdatera antalet reaktioner användaren fått + user.reactions += len(picture.reactions) + except Exception as e: # Fel2 + write_error( + 2, + e=e, + soup=profile.viewing(), + profile=profile.container, + user=user, + url=url_limit, + url_name="url_limit", + traceback=traceback.format_exc(), + ) + pass \ No newline at end of file