import json import os import pickle import random import re import traceback from datetime import datetime from getopt import GetoptError, getopt from getpass import getpass from sys import argv from time import sleep import nacl.secret import nacl.utils import requests import werkzeug werkzeug.cached_property = werkzeug.utils.cached_property import robobrowser from arango import ArangoClient from bs4 import BeautifulSoup # import other_pictures # Måste uppdateras def sleep_(t): """ Sover en tid nära den angivna (för att inte sökningarna ska bli för lika varandra) """ variation = 4 # Testa olika sovlängder för att inte få användaren blockerad sleep(t * variation * random.randrange(85, 115, 1) / 100) if random.randrange(0, 60, 1) == 1: for s in range(0, 300): print(f"Sover {300 - s} sekunder till... ", end="\r") sleep(1) print() sleep(random.randrange(0, 10, 1) / 4) def update_cookie(cookies, profile_name): """ Uppdaterar cookie för browser """ with open("data/cookie_{}.pkl".format(profile_name), "wb") as f: pickle.dump(cookies, f) def write_error(nr, e="", traceback="", soup="", user="", url="", count=0, url_name=""): """Skriver info efter error till arango Args: nr ([type]): error number e (str, optional): error. Defaults to "". traceback (str, optional): The traceback from traceback.format_exc(). Defaults to "". soup (str, optional): Soup. Defaults to "". user (str, optional): The user. Defaults to "". url (str, optional): Url, if any. Defaults to "". count (int, optional): Count, if any. Defaults to 0. url_name (str, optional): The description of the url, if any. Defaults to "". """ if url == "": url = "ingen url" url_name = "ingen url" if soup != "": soup = str(soup.prettify()) print(e) # FELSÖKNING key = datetime.now().strftime("%Y%m%d_%H:%M:%S") doc = { "_key": key, "number": nr, "error": nr, "user": str(user), "error": str(e), "url": str(url), "url_name": url_name, "soup": soup, "traceback": str(traceback), } try: db.insert_document( "errors", doc, overwrite_mode="update", silent=True, ) except Exception as e: print(e) def facebook_reactions(user, first=False): # Fixa url:er osv if user.username.isnumeric(): user.url = url_bas + "/profile.php?id=" + str(user.username) user.url_photos = user.url + "&v=photos" else: user.username = user.username.replace("/", "") user.url = url_bas + "/" + user.username user.url_photos = user.url + "/photos" if user.username in members_checked: print('Redan kollat', user.username) return {"friends": friends_of_user(user.username)} # Gå till sidan för profilbilder fb_profile.browser.open(user.url_photos) sleep_(4) soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") if ( """You can't use Facebook because your account, or activity on it, doesn't follow our Community Standards.""" in soup.text ): print("{} blocked\n".format(fb_profile.name).upper()) return "blocked" elif 'accept all' in soup.text.lower(): fb_profile.accept_cookies() fb_profile.browser.open(user.url_photos) soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") user.name = user.username # Om inte namnet hittas senare try: for i in soup.find_all("strong"): if "Notifications" in str(i): continue else: user.name = i.text.strip() except Exception as e: write_error( 6, e=e, traceback=traceback.format_exc(), soup=soup, user=user.username, url=user.url_profil_photos, ) if first == True: print(soup.prettify()) exit() print( "Hämtar reaktioner på profilbilder för {name} ({user})".format( name=user.name, user=user.username ) ) # Hitta länk till olika saker hos användarem, inkl facebook-id user.id = "" for a in soup.find_all("a", href=True): if "Profile pictures" in a.text: user.url_album = url_bas + a["href"] # Länk till album för profilbulder if "profile_id" in a["href"]: l = a["href"] user.id = re.search("\d+", l[l.find("id=") + 3 :]).group(0) if "Likes" in a.text: user.url_likes = url_bas + a["href"] if "About" in a.text: user.url_about = url_bas + a["href"] if "Timeline" in a.text: user.url_timeline = url_bas + a["href"] if "Cover photos" in a.text: user.url_coverphotos = url_bas + a["href"] # Gå till profilbilden (den första som kommer upp när man går till profilen) if not hasattr(user, "url_album"): user.url_album = '' user.add_to_db() print('Hittar inget album för profilbilder.') write_error(7, soup=soup, user=user.username, url=user.url_album, url_name='user.url_album') return None # ATT GÖRA Här kan andra bilder väljas istället fb_profile.browser.open(user.url_album) soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") # Samla alla profilbilder i en lista url_pics = [] pics = soup.find("div", {"id": "thumbnail_area"}) for i in pics.find_all("a"): a = i["href"] url_pics.append(a[: a.find("&id")]) try: user.profile_pictures = len(url_pics) except: user.profile_pictures = 0 # Lägg till profilen till arrango user.add_to_db() # Gå igenom alla profilbilder upp till ett maximalt antal count = 0 if single == True: max_pic = 1 else: max_pic = 15 for pic in url_pics: picture = Picture(user.username) if count == max_pic: break else: count += 1 picture.url = url_bas + pic picture.id = str(picture.url[picture.url.find("fbid=") + 5 :]) if picture.id in all_pictures: print('Redan kollat bild', picture.id) continue sleep_(5) try: fb_profile.browser.open(picture.url) except Exception as e: # Fel3 write_error( 3, e=e, soup=soup, user=user.username, url=picture.url, url_name="url_pic", traceback=traceback.format_exc(), ) update_cookie(fb_profile.browser.session.cookies, fb_profile.name) # Hitta info om bilden soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") picture.date = soup.find("abbr").text # Mer info att lägga in? # Hämta länkar för bilden att userända sen for a in soup.find_all("a", href=True): if all( [ "reaction" in a["href"], "reactions" not in a["href"], "=R" not in a["href"], ] ): url_reactions = url_bas + str( a["href"] ) # Länk till reaktionerna för bilden elif a.text == "Visa i fullständig storlek" or a.text == "View full size": pic = url_bas + a["href"] picture.url_full = pic[ : pic.find("&") ] # Den fullständiga adressen till bilden, används som _key i pictures # Skriv ut vilken bild som behandlas print( "Bild {count} av {total}".format(count=count, total=user.profile_pictures), end="\r", ) # Hämta reaktioner för bilden sleep_(3) fb_profile.browser.open(url_reactions) update_cookie(fb_profile.browser.session.cookies, fb_profile.name) soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") try: for a in soup.find_all("a", {"class": "z ba"}, href=True): url_limit = a["href"] picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1) limit = re.search(r"limit=(\d+)", url_limit).group(1) except UnboundLocalError: limit = 999 # Addera bilden till arrango picture.add_to_db() url_limit = url_bas + url_limit.replace( "limit=" + str(limit), "limit=" + str(picture.no_reactions) ) try: sleep_(4) fb_profile.browser.open(url_limit) update_cookie(fb_profile.browser.session.cookies, fb_profile.name) soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") # Gå igenom alla som reagerat och för in i arango for li in soup.find_all("li"): friend = Friend(user.username) if single == True: friend.single = True if "See more" in li.text: continue try: profile = li.find("h3").find("a") friend.name = profile.text friend.url = profile["href"] if "profile.php" in friend.url: friend.username = friend.url[friend.url.find("id=") + 3 :] else: friend.username = friend.url[friend.url.find("/") + 1 :] reaction = Reaction(user.username, friend.username, picture.id) for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: if type in str(li): reaction.type = type picture.reactions.append(reaction.get_dict()) # Lägg till vännens profil till arrango friend.add_to_db() # Lägg till reaktion till arrango except AttributeError as e: # Fel1 write_error( 1, e=e, soup=soup, user=user.username, traceback=traceback.format_exc(), ) pass if count == max_pic: db.collection("picture_reactions").insert_many( picture.reactions, silent=True, overwrite=True ) db.collection("picture_reactions").insert_many(picture.reactions, silent=True, overwrite=True) except Exception as e: # Fel2 write_error( 2, e=e, soup=soup, user=user.username, url=url_limit, url_name="url_limit", traceback=traceback.format_exc(), ) pass ## ATT GÖRA För att lägga till fler reaktioner om det är få reaktioner på profilbilderna (måste uppdateras) print() db.update_document( { "_id": "members/" + str(user.username), "checked": True, "pictures_checked": user.profile_pictures, } ) if first == True: return {"friends": friends} else: pass def friends_of_user(user): """Returnernar userändare som reagerat på user:s bilder""" cursor = db.aql.execute( """ FOR doc IN @@col FILTER doc._to == @user RETURN DISTINCT doc._from """, bind_vars={"@col": "picture_reactions", "user": "members/" + user}, ) return [doc[8:] for doc in cursor] def checked_members(): cursor = db.aql.execute( """ FOR doc IN @@col FILTER doc.checked == @bool RETURN doc._key """, bind_vars={"@col": "members", "bool": True}, ) members_checked = set([doc for doc in cursor]) return members_checked def get_profile(nr): """ Hämtar profil om det inte gjorts förut """ cursor = db.aql.execute( """ FOR doc IN @@col FILTER doc.in_use == @bool RETURN doc """, bind_vars={"@col": "profiles", "bool": False} ) profile = cursor.next() # Skriv till fil att använda sen with open('data/profile{}.json'.format(nr), 'w') as outfile: json.dump(profile, outfile) # Uppdatera dokumentet i arango profile['in_use'] = True db.update_document(profile, check_rev=False) return profile class Proxies: def __init__(self): self.proxies = [ 'gb25-wg.socks5.mullvad.net:1080', 'gb26-wg.socks5.mullvad.net:1080', 'gb27-wg.socks5.mullvad.net:1080', 'gb28-wg.socks5.mullvad.net:1080', 'gb29-wg.socks5.mullvad.net:1080' ] def get_proxie(self): return self.proxies.pop(random.randrange(0, len(self.proxies), 1)) class Friend: def __init__(self, user): self.collection = "members" self.user = user # The friends friend self.username = '' self.url = '' self.name = '' self.single = '' def add_to_db(self): db.insert_document( self.collection, { "_key": self.username, "url": url_bas + self.url, "name": self.name, }, overwrite_mode="update", silent=True, ) class Reaction: def __init__(self, user, friend_username, picture_id): self.collection = "picture_reactions" self.user = user self.picture_id = picture_id self.user_name_friend = friend_username self.type = False def get_dict(self): key = str(self.picture_id) + "_" + str(self.user_name_friend) return { "_to": "members/" + str(self.user), "_from": "members/" + str(self.user_name_friend), "_key": key, "_id": "picture_reactions/" + key, "picture": self.picture_id, "reaction": self.type, } class User: def __init__(self, username): self.collection = "members" self.username = str(username) self.fetched = datetime.now().strftime("%Y%m%d_%H:%M:%S") self.url_coverphotos = '' self.id = '' self.url_likes = '' self.url_about = '' self.url_timeline = '' self.profile_pictures = '' self.url = '' self.name = '' def add_to_db(self): # Lägg till profilen till arrango db.insert_document( self.collection, { "_key": self.username, "url": self.url, "name": self.name, "profile_pictures": self.profile_pictures, "facebook_id": self.id, "timeline": self.url_timeline, "likes": self.url_likes, "about": self.url_about, "cover photos": self.url_coverphotos, "fetched": self.fetched }, overwrite_mode="update", silent=True, keep_none=False ) class Picture: def __init__(self, user): self.collection = "pictures" self.user = user self.id = '' self.url_full = '' self.date = '' self.url = '' self.no_reactions = '' self.reactions = [] def add_to_db(self): db.insert_document( self.collection, { "_key": self.id, "url": self.url_full, "date": self.date, "url": self.url, "no_reactions": self.no_reactions, "user": self.user, }, overwrite_mode="update", silent=True, keep_none=False ) class Profile: def __init__(self, nr, new=False): try: with open("data/profile{}.json".format(nr)) as f: self.doc = json.load(f) except: self.doc = get_profile(nr) if 'blocked' in self.doc or new == True: self.doc = get_profile(nr) # Användaruppgifter self.name = self.doc["name"].strip() self.email = self.doc["email"] self.pwd = self.doc["pwd"] self.server = self.doc["server"] self.nr = nr # Ange proxies session = requests.Session() session.proxies = { "https": "socks5://'8155249667566524'@{}".format(self.server), "http": "socks5://'8155249667566524'@{}".format(self.server), } # Starta browser user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1" self.browser = robobrowser.RoboBrowser( session=session, user_agent=user_agent, history=False, parser="lxml" ) try: self.browser.session.cookies = pickle.load( open("data/cookie_{}.pkl".format(self.name), "rb") ) self.logged_in = True except: self.logged_in = False def accept_cookies(self): """ Accepterar cookies """ self.browser.open("https://mbasic.facebook.com") soup = BeautifulSoup(str(self.browser.parsed), "lxml") if 'accept all' not in soup.text.lower(): sleep_(2) cookie_accept_url = "https://mbasic.facebook.com/cookie/consent-page" self.browser.open(cookie_accept_url) sleep_(2) try: form = self.browser.get_form() self.browser.submit_form(form) print(f"Accepterade cookies för {self.name}") sleep_(2) update_cookie(self.browser.session.cookies, self.name) except Exception as e: print(f"\nAccepterade inte cookies för {self.name}\n") def login(self): """ Loggar in på Facebook. """ print("Loggar in {}\n".format(self.name)) # Gå till log in-sidan self.browser.open("https://mbasic.facebook.com/login") # Kolla om browser redan är inloggad soup = BeautifulSoup(str(self.browser.parsed), "lxml") if 'log out' in soup.text.lower(): print("Redan inloggad.") # Hitta och fyll i formulär form = self.browser.get_form(id="login_form") form["email"].value = self.email form["pass"].value = self.pwd self.browser.submit_form(form, submit=form["login"]) # Vänta lite och uppdatera cookie print("\nLoggade in\n") sleep_(2) def block(self): """ Blockerar profilen """ if "blocked" not in self.doc: self.doc["blocked"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") db.update_document(self.doc, silent=True, check_rev=False) with open("data/profile{}.json".format(self.nr), "w") as outfile: json.dump(self.doc, outfile) if __name__ == "__main__": print() # Säkerställ att arbetsmappen är samma som den där scriptet ligger os.chdir(os.path.dirname(__file__)) # Starta koppling till arangodb # Info för arangodb user_arango = "Lasse" pwd_arango = "4c071768bedc259288361c07aafd8535fca546086fada4e7b5de4e2bb26b0e70fa8d348c998b90d032a5b8f3fdbae1881b843021e3475198e6fb45f58d8dc450bd52f77d" db_arango = "facebook" host_arango = "http://arango.lasseedfast.se" # Avkryptera lösen till arango for i in range(0, 6, 1): if i == 5: exit() try: key = "sssladnnklja" + getpass() pwd = ( nacl.secret.SecretBox(key.encode()) .decrypt(pwd_arango, encoder=nacl.encoding.HexEncoder) .decode("utf-8") ) break except: print("Fel lösenord.") sleep(1) client = ArangoClient(hosts=host_arango) db = client.db(db_arango, username=user_arango, password=pwd) members = db.collection("members") pictures = db.collection("pictures") argv = argv[1:] try: opts, args = getopt(argv, "su:", ["single", "user="]) single = True if "-s" in [o[0] for o in opts] else False for o, a in opts: if o in ["-u", "--user"]: users = [ User(str(i).strip()) for i in [(str(i).strip()) for i in a.split(",")] ] if "users" not in globals(): users = [ User(str(i).strip()) for i in input("Vem/vilka vill du kolla bilder för? ").split(",") ] except GetoptError: users = [ User(str(i).strip()) for i in input("Vem/vilka vill du kolla bilder för? ").split(",") ] single = ( True if input("Söka bara en bild (single)?").lower() in ["ja, yes, j, y"] else False ) print("Kollar profilbilder för:") for user in users: print("-", user.username) print() # Skapa tre olika profiler att besöka Facebook med fb_profiles = {} extra_proxies = Proxies() for nr in range(1, 4): fb_profiles[nr] = Profile(nr) fb_profiles[nr].browser.open('https://api.ipify.org') soup = BeautifulSoup(str(fb_profiles[nr].browser.parsed), "lxml") print(soup.text) if fb_profiles[nr].logged_in == False: fb_profiles[nr].accept_cookies() sleep_(2) fb_profiles[nr].login() sleep(3) fb_profile_nr = 1 fb_profile = fb_profiles[fb_profile_nr] print("Börjar med profilen", fb_profile.name) url_bas = "https://mbasic.facebook.com" while True: for user in users: # Set för kollade bilder och kollade medlemmar all_pictures = set([doc["_key"] for doc in pictures.all()]) members_checked = checked_members() # Hämta reaktioner för den första användaren facebook_reactions(user, first=True) friends = friends_of_user(user.username) friends_unchecked = list(set(friends) - set(members_checked)) # Här följer cookien med så att vi fortfarnade är inloggade print("\nKlar med", user.username, "\n") print("Vänner som reagerat:", len(friends)) print("Vänner att kolla:") for friend in friends_unchecked: print(friend) print() # Hämta reaktioner för den första användarens vänner (som reagerat) count_friends = 0 for f in friends: count_friends += 1 user = User(str(f)) sleep_(2) try: out = facebook_reactions(user) if out == "blocked": # Ta bort profilen ur databasen db.collection('profiles').delete(fb_profile.doc['_key'], silent=True, ignore_missing=True) print( f'{fb_profile.name} blockerad och borttagen {datetime.now().strftime("%Y%m%d_%H:%M:%S")}.' ) fb_profiles.remove(fb_profile) try: # l = [p['nr'] for p in fb_profiles] # l.sort() # nr = int(l[-1]+1) fb_profiles[fb_profile_nr] = Profile(nr, new=True) print("Laddat ny profil:", fb_profiles[fb_profile_nr].name) sleep(3) except e: print("Det behövs nya profiler...") for s in range(0, 1600/len(fb_profiles)): print(f'Sover {600-s} sekunder till... ', end='\r') fb_profile_nr += 1 print(f"Försöker med {fb_profiles[fb_profile_nr].name}.") else: print("Klar med", user.username, "\n") # Rotera fb-profiler if count_friends == 6: if random.randrange(0, 2, 1) == 1: fb_profile_nr += 1 count_friends = 0 print("Växlar till", fb_profiles[fb_profile_nr].name) elif count_friends == 10: fb_profile_nr += 1 count_friends = 0 print("Växlar till", fb_profiles[fb_profile_nr].name) if fb_profile_nr > len(fb_profiles): fb_profile_nr = 1 fb_profile = fb_profiles[fb_profile_nr] except Exception as e: # Fel4 soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") write_error( 4, e=e, user=user.username, traceback=traceback.format_exc(), soup=soup, ) print("\nFel: ", str(user.username), "\n") sleep_(15) pass # Ladda in nya användare att kolla print("\nVem vill du kolla upp?") users = [User(str(i).strip()) for i in input(">>> ").split(",")]