From 5e60067d6b1fe71cf99afbd58b5def5c5222935d Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 16 Mar 2021 21:21:28 +0100 Subject: [PATCH] first commit --- .gitignore | 4 + Dockerfile | 21 ++ facebook_reactions.py | 787 +++++++++++++++++++++++++++++++++++++++ requirements.txt | 20 + workspace.code-workspace | 10 + 5 files changed, 842 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 facebook_reactions.py create mode 100644 requirements.txt create mode 100644 workspace.code-workspace diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3dc1d2d --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/.DS_Store +/.venv +/.vscode +/__pycache__ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..423adad --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ + +FROM python:3.8 + +WORKDIR / + +COPY requirements.txt . + +RUN pip install -r requirements.txt + +#RUN apt-get install build-essential libssl-dev libffi-dev python-dev + +COPY facebook_reactions.py . + +CMD [ "python", "./facebook_reactions.py" ] + +# BUILD: +# docker buildx create --use +#docker buildx build --platform linux/arm -t l3224/fb-reactions:pi --push . + +# START +# docker run -it --name fb1 -v vol1:/data l3224/fb-reactions:latest \ No newline at end of file diff --git a/facebook_reactions.py b/facebook_reactions.py new file mode 100644 index 0000000..14f939b --- /dev/null +++ b/facebook_reactions.py @@ -0,0 +1,787 @@ +import json +import os +import pickle +import random +import re +import traceback +from datetime import datetime +from getopt import GetoptError, getopt +from getpass import getpass +from sys import argv +from time import sleep + +import nacl.secret +import nacl.utils +import requests +import werkzeug + +werkzeug.cached_property = werkzeug.utils.cached_property +import robobrowser +from arango import ArangoClient +from bs4 import BeautifulSoup + +# import other_pictures # Måste uppdateras + + +def sleep_(t): + """ + Sover en tid nära den angivna (för att inte sökningarna ska bli för lika varandra) + """ + variation = 4 # Testa olika sovlängder för att inte få användaren blockerad + sleep(t * variation * random.randrange(85, 115, 1) / 100) + if random.randrange(0, 60, 1) == 1: + for s in range(0, 300): + print(f"Sover {300 - s} sekunder till... ", end="\r") + sleep(1) + print() + sleep(random.randrange(0, 10, 1) / 4) + + +def update_cookie(cookies, profile_name): + """ Uppdaterar cookie för browser """ + with open("data/cookie_{}.pkl".format(profile_name), "wb") as f: + pickle.dump(cookies, f) + + +def write_error(nr, e="", traceback="", soup="", user="", url="", count=0, url_name=""): + """Skriver info efter error till arango + + Args: + nr ([type]): error number + e (str, optional): error. Defaults to "". + traceback (str, optional): The traceback from traceback.format_exc(). Defaults to "". + soup (str, optional): Soup. Defaults to "". + user (str, optional): The user. Defaults to "". + url (str, optional): Url, if any. Defaults to "". + count (int, optional): Count, if any. Defaults to 0. + url_name (str, optional): The description of the url, if any. Defaults to "". + """ + if url == "": + url = "ingen url" + url_name = "ingen url" + + if soup != "": + soup = str(soup.prettify()) + + print(e) # FELSÖKNING + + key = datetime.now().strftime("%Y%m%d_%H:%M:%S") + doc = { + "_key": key, + "number": nr, + "error": nr, + "user": str(user), + "error": str(e), + "url": str(url), + "url_name": url_name, + "soup": soup, + "traceback": str(traceback), + } + + try: + db.insert_document( + "errors", + doc, + overwrite_mode="update", + silent=True, + ) + except Exception as e: + print(e) + + +def facebook_reactions(user, first=False): + + # Fixa url:er osv + if user.username.isnumeric(): + user.url = url_bas + "/profile.php?id=" + str(user.username) + user.url_photos = user.url + "&v=photos" + else: + user.username = user.username.replace("/", "") + user.url = url_bas + "/" + user.username + user.url_photos = user.url + "/photos" + + if user.username in members_checked: + print('Redan kollat', user.username) + return {"friends": friends_of_user(user.username)} + + # Gå till sidan för profilbilder + fb_profile.browser.open(user.url_photos) + + sleep_(4) + + soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") + + if ( + """You can't use Facebook because your account, or activity on it, doesn't follow our Community Standards.""" + in soup.text + ): + print("{} blocked\n".format(fb_profile.name).upper()) + return "blocked" + elif 'accept all' in soup.text.lower(): + fb_profile.accept_cookies() + fb_profile.browser.open(user.url_photos) + soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") + + user.name = user.username # Om inte namnet hittas senare + try: + for i in soup.find_all("strong"): + if "Notifications" in str(i): + continue + else: + user.name = i.text.strip() + except Exception as e: + write_error( + 6, + e=e, + traceback=traceback.format_exc(), + soup=soup, + user=user.username, + url=user.url_profil_photos, + ) + if first == True: + print(soup.prettify()) + exit() + print( + "Hämtar reaktioner på profilbilder för {name} ({user})".format( + name=user.name, user=user.username + ) + ) + + # Hitta länk till olika saker hos användarem, inkl facebook-id + + user.id = "" + for a in soup.find_all("a", href=True): + if "Profile pictures" in a.text: + user.url_album = url_bas + a["href"] # Länk till album för profilbulder + if "profile_id" in a["href"]: + l = a["href"] + user.id = re.search("\d+", l[l.find("id=") + 3 :]).group(0) + if "Likes" in a.text: + user.url_likes = url_bas + a["href"] + if "About" in a.text: + user.url_about = url_bas + a["href"] + if "Timeline" in a.text: + user.url_timeline = url_bas + a["href"] + if "Cover photos" in a.text: + user.url_coverphotos = url_bas + a["href"] + + # Gå till profilbilden (den första som kommer upp när man går till profilen) + if not hasattr(user, "url_album"): + user.url_album = '' + user.add_to_db() + print('Hittar inget album för profilbilder.') + write_error(7, soup=soup, user=user.username, url=user.url_album, url_name='user.url_album') + return None + # ATT GÖRA Här kan andra bilder väljas istället + + fb_profile.browser.open(user.url_album) + soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") + + # Samla alla profilbilder i en lista + url_pics = [] + pics = soup.find("div", {"id": "thumbnail_area"}) + for i in pics.find_all("a"): + a = i["href"] + url_pics.append(a[: a.find("&id")]) + + try: + user.profile_pictures = len(url_pics) + except: + user.profile_pictures = 0 + + # Lägg till profilen till arrango + user.add_to_db() + + # Gå igenom alla profilbilder upp till ett maximalt antal + count = 0 + if single == True: + max_pic = 1 + else: + max_pic = 15 + for pic in url_pics: + picture = Picture(user.username) + if count == max_pic: + break + else: + count += 1 + picture.url = url_bas + pic + picture.id = str(picture.url[picture.url.find("fbid=") + 5 :]) + if picture.id in all_pictures: + print('Redan kollat bild', picture.id) + continue + sleep_(5) + + try: + fb_profile.browser.open(picture.url) + except Exception as e: # Fel3 + write_error( + 3, + e=e, + soup=soup, + user=user.username, + url=picture.url, + url_name="url_pic", + traceback=traceback.format_exc(), + ) + + update_cookie(fb_profile.browser.session.cookies, fb_profile.name) + + # Hitta info om bilden + soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") + picture.date = soup.find("abbr").text + # Mer info att lägga in? + + # Hämta länkar för bilden att userända sen + for a in soup.find_all("a", href=True): + if all( + [ + "reaction" in a["href"], + "reactions" not in a["href"], + "=R" not in a["href"], + ] + ): + url_reactions = url_bas + str( + a["href"] + ) # Länk till reaktionerna för bilden + elif a.text == "Visa i fullständig storlek" or a.text == "View full size": + pic = url_bas + a["href"] + picture.url_full = pic[ + : pic.find("&") + ] # Den fullständiga adressen till bilden, används som _key i pictures + + # Skriv ut vilken bild som behandlas + print( + "Bild {count} av {total}".format(count=count, total=user.profile_pictures), + end="\r", + ) + + # Hämta reaktioner för bilden + sleep_(3) + fb_profile.browser.open(url_reactions) + update_cookie(fb_profile.browser.session.cookies, fb_profile.name) + + soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") + + try: + for a in soup.find_all("a", {"class": "z ba"}, href=True): + url_limit = a["href"] + + picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1) + limit = re.search(r"limit=(\d+)", url_limit).group(1) + except UnboundLocalError: + limit = 999 + + # Addera bilden till arrango + picture.add_to_db() + + url_limit = url_bas + url_limit.replace( + "limit=" + str(limit), "limit=" + str(picture.no_reactions) + ) + + try: + sleep_(4) + fb_profile.browser.open(url_limit) + update_cookie(fb_profile.browser.session.cookies, fb_profile.name) + soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") + + # Gå igenom alla som reagerat och för in i arango + for li in soup.find_all("li"): + friend = Friend(user.username) + if single == True: + friend.single = True + if "See more" in li.text: + continue + try: + profile = li.find("h3").find("a") + friend.name = profile.text + friend.url = profile["href"] + if "profile.php" in friend.url: + friend.username = friend.url[friend.url.find("id=") + 3 :] + else: + friend.username = friend.url[friend.url.find("/") + 1 :] + + reaction = Reaction(user.username, friend.username, picture.id) + for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: + if type in str(li): + reaction.type = type + picture.reactions.append(reaction.get_dict()) + # Lägg till vännens profil till arrango + friend.add_to_db() + + # Lägg till reaktion till arrango + + except AttributeError as e: # Fel1 + write_error( + 1, + e=e, + soup=soup, + user=user.username, + traceback=traceback.format_exc(), + ) + pass + + if count == max_pic: + db.collection("picture_reactions").insert_many( + picture.reactions, silent=True, overwrite=True + ) + db.collection("picture_reactions").insert_many(picture.reactions, silent=True, overwrite=True) + except Exception as e: # Fel2 + write_error( + 2, + e=e, + soup=soup, + user=user.username, + url=url_limit, + url_name="url_limit", + traceback=traceback.format_exc(), + ) + pass + + ## ATT GÖRA För att lägga till fler reaktioner om det är få reaktioner på profilbilderna (måste uppdateras) + + print() + + db.update_document( + { + "_id": "members/" + str(user.username), + "checked": True, + "pictures_checked": user.profile_pictures, + } + ) + + + if first == True: + return {"friends": friends} + + else: + pass + + +def friends_of_user(user): + """Returnernar userändare som reagerat på user:s bilder""" + + cursor = db.aql.execute( + """ + FOR doc IN @@col + FILTER doc._to == @user + RETURN DISTINCT doc._from + """, + bind_vars={"@col": "picture_reactions", "user": "members/" + user}, + ) + + return [doc[8:] for doc in cursor] + + +def checked_members(): + cursor = db.aql.execute( + """ + FOR doc IN @@col + FILTER doc.checked == @bool + RETURN doc._key + """, + bind_vars={"@col": "members", "bool": True}, + ) + + members_checked = set([doc for doc in cursor]) + return members_checked + + +def get_profile(nr): + """ Hämtar profil om det inte gjorts förut """ + cursor = db.aql.execute( + """ + FOR doc IN @@col + FILTER doc.in_use == @bool + RETURN doc + """, + bind_vars={"@col": "profiles", "bool": False} + ) + profile = cursor.next() + + # Skriv till fil att använda sen + with open('data/profile{}.json'.format(nr), 'w') as outfile: + json.dump(profile, outfile) + + # Uppdatera dokumentet i arango + profile['in_use'] = True + db.update_document(profile, check_rev=False) + + return profile + +class Proxies: + def __init__(self): + self.proxies = [ + 'gb25-wg.socks5.mullvad.net:1080', + 'gb26-wg.socks5.mullvad.net:1080', + 'gb27-wg.socks5.mullvad.net:1080', + 'gb28-wg.socks5.mullvad.net:1080', + 'gb29-wg.socks5.mullvad.net:1080' + ] + def get_proxie(self): + return self.proxies.pop(random.randrange(0, len(self.proxies), 1)) + +class Friend: + def __init__(self, user): + self.collection = "members" + self.user = user # The friends friend + self.username = '' + self.url = '' + self.name = '' + self.single = '' + + def add_to_db(self): + db.insert_document( + self.collection, + { + "_key": self.username, + "url": url_bas + self.url, + "name": self.name, + }, + overwrite_mode="update", + silent=True, + ) + + +class Reaction: + def __init__(self, user, friend_username, picture_id): + self.collection = "picture_reactions" + self.user = user + self.picture_id = picture_id + self.user_name_friend = friend_username + self.type = False + + def get_dict(self): + key = str(self.picture_id) + "_" + str(self.user_name_friend) + return { + "_to": "members/" + str(self.user), + "_from": "members/" + str(self.user_name_friend), + "_key": key, + "_id": "picture_reactions/" + key, + "picture": self.picture_id, + "reaction": self.type, + } + + +class User: + def __init__(self, username): + self.collection = "members" + self.username = str(username) + self.fetched = datetime.now().strftime("%Y%m%d_%H:%M:%S") + self.url_coverphotos = '' + self.id = '' + self.url_likes = '' + self.url_about = '' + self.url_timeline = '' + self.profile_pictures = '' + self.url = '' + self.name = '' + + def add_to_db(self): + # Lägg till profilen till arrango + db.insert_document( + self.collection, + { + "_key": self.username, + "url": self.url, + "name": self.name, + "profile_pictures": self.profile_pictures, + "facebook_id": self.id, + "timeline": self.url_timeline, + "likes": self.url_likes, + "about": self.url_about, + "cover photos": self.url_coverphotos, + "fetched": self.fetched + }, + overwrite_mode="update", + silent=True, + keep_none=False + ) + + +class Picture: + def __init__(self, user): + self.collection = "pictures" + self.user = user + self.id = '' + self.url_full = '' + self.date = '' + self.url = '' + self.no_reactions = '' + self.reactions = [] + + def add_to_db(self): + db.insert_document( + self.collection, + { + "_key": self.id, + "url": self.url_full, + "date": self.date, + "url": self.url, + "no_reactions": self.no_reactions, + "user": self.user, + }, + overwrite_mode="update", + silent=True, + keep_none=False + ) + +class Profile: + def __init__(self, nr, new=False): + + try: + with open("data/profile{}.json".format(nr)) as f: + self.doc = json.load(f) + except: + self.doc = get_profile(nr) + + if 'blocked' in self.doc or new == True: + self.doc = get_profile(nr) + + # Användaruppgifter + self.name = self.doc["name"].strip() + self.email = self.doc["email"] + self.pwd = self.doc["pwd"] + self.server = self.doc["server"] + self.nr = nr + + # Ange proxies + session = requests.Session() + session.proxies = { + "https": "socks5://'8155249667566524'@{}".format(self.server), + "http": "socks5://'8155249667566524'@{}".format(self.server), + } + + # Starta browser + user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1" + self.browser = robobrowser.RoboBrowser( + session=session, user_agent=user_agent, history=False, parser="lxml" + ) + try: + self.browser.session.cookies = pickle.load( + open("data/cookie_{}.pkl".format(self.name), "rb") + ) + self.logged_in = True + except: + self.logged_in = False + + def accept_cookies(self): + """ Accepterar cookies """ + self.browser.open("https://mbasic.facebook.com") + soup = BeautifulSoup(str(self.browser.parsed), "lxml") + if 'accept all' not in soup.text.lower(): + sleep_(2) + cookie_accept_url = "https://mbasic.facebook.com/cookie/consent-page" + self.browser.open(cookie_accept_url) + sleep_(2) + try: + form = self.browser.get_form() + self.browser.submit_form(form) + print(f"Accepterade cookies för {self.name}") + sleep_(2) + update_cookie(self.browser.session.cookies, self.name) + except Exception as e: + print(f"\nAccepterade inte cookies för {self.name}\n") + + def login(self): + """ Loggar in på Facebook. """ + + print("Loggar in {}\n".format(self.name)) + + # Gå till log in-sidan + self.browser.open("https://mbasic.facebook.com/login") + + # Kolla om browser redan är inloggad + soup = BeautifulSoup(str(self.browser.parsed), "lxml") + if 'log out' in soup.text.lower(): + print("Redan inloggad.") + + # Hitta och fyll i formulär + form = self.browser.get_form(id="login_form") + form["email"].value = self.email + form["pass"].value = self.pwd + self.browser.submit_form(form, submit=form["login"]) + # Vänta lite och uppdatera cookie + print("\nLoggade in\n") + sleep_(2) + + def block(self): + """ Blockerar profilen """ + if "blocked" not in self.doc: + self.doc["blocked"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + db.update_document(self.doc, silent=True, check_rev=False) + with open("data/profile{}.json".format(self.nr), "w") as outfile: + json.dump(self.doc, outfile) + + + +if __name__ == "__main__": + print() + + # Säkerställ att arbetsmappen är samma som den där scriptet ligger + os.chdir(os.path.dirname(__file__)) + + # Starta koppling till arangodb + # Info för arangodb + user_arango = "Lasse" + pwd_arango = "4c071768bedc259288361c07aafd8535fca546086fada4e7b5de4e2bb26b0e70fa8d348c998b90d032a5b8f3fdbae1881b843021e3475198e6fb45f58d8dc450bd52f77d" + db_arango = "facebook" + host_arango = "http://arango.lasseedfast.se" + + # Avkryptera lösen till arango + for i in range(0, 6, 1): + if i == 5: + exit() + try: + key = "sssladnnklja" + getpass() + pwd = ( + nacl.secret.SecretBox(key.encode()) + .decrypt(pwd_arango, encoder=nacl.encoding.HexEncoder) + .decode("utf-8") + ) + break + except: + print("Fel lösenord.") + sleep(1) + client = ArangoClient(hosts=host_arango) + db = client.db(db_arango, username=user_arango, password=pwd) + + members = db.collection("members") + pictures = db.collection("pictures") + + argv = argv[1:] + + try: + opts, args = getopt(argv, "su:", ["single", "user="]) + single = True if "-s" in [o[0] for o in opts] else False + for o, a in opts: + if o in ["-u", "--user"]: + users = [ + User(str(i).strip()) + for i in [(str(i).strip()) for i in a.split(",")] + ] + if "users" not in globals(): + users = [ + User(str(i).strip()) + for i in input("Vem/vilka vill du kolla bilder för? ").split(",") + ] + + except GetoptError: + users = [ + User(str(i).strip()) + for i in input("Vem/vilka vill du kolla bilder för? ").split(",") + ] + single = ( + True + if input("Söka bara en bild (single)?").lower() in ["ja, yes, j, y"] + else False + ) + + print("Kollar profilbilder för:") + for user in users: + print("-", user.username) + print() + + # Skapa tre olika profiler att besöka Facebook med + fb_profiles = {} + extra_proxies = Proxies() + for nr in range(1, 4): + fb_profiles[nr] = Profile(nr) + fb_profiles[nr].browser.open('https://api.ipify.org') + soup = BeautifulSoup(str(fb_profiles[nr].browser.parsed), "lxml") + print(soup.text) + if fb_profiles[nr].logged_in == False: + fb_profiles[nr].accept_cookies() + sleep_(2) + fb_profiles[nr].login() + sleep(3) + + fb_profile_nr = 1 + fb_profile = fb_profiles[fb_profile_nr] + + print("Börjar med profilen", fb_profile.name) + + url_bas = "https://mbasic.facebook.com" + + while True: + for user in users: + # Set för kollade bilder och kollade medlemmar + all_pictures = set([doc["_key"] for doc in pictures.all()]) + members_checked = checked_members() + + # Hämta reaktioner för den första användaren + facebook_reactions(user, first=True) + friends = friends_of_user(user.username) + friends_unchecked = list(set(friends) - set(members_checked)) + # Här följer cookien med så att vi fortfarnade är inloggade + print("\nKlar med", user.username, "\n") + + print("Vänner som reagerat:", len(friends)) + print("Vänner att kolla:") + + for friend in friends_unchecked: + print(friend) + print() + + # Hämta reaktioner för den första användarens vänner (som reagerat) + count_friends = 0 + for f in friends: + count_friends += 1 + user = User(str(f)) + sleep_(2) + try: + out = facebook_reactions(user) + if out == "blocked": + # Ta bort profilen ur databasen + db.collection('profiles').delete(fb_profile.doc['_key'], silent=True, ignore_missing=True) + print( + f'{fb_profile.name} blockerad och borttagen {datetime.now().strftime("%Y%m%d_%H:%M:%S")}.' + ) + fb_profiles.remove(fb_profile) + try: + # l = [p['nr'] for p in fb_profiles] + # l.sort() + # nr = int(l[-1]+1) + fb_profiles[fb_profile_nr] = Profile(nr, new=True) + print("Laddat ny profil:", fb_profiles[fb_profile_nr].name) + sleep(3) + except e: + print("Det behövs nya profiler...") + for s in range(0, 1600/len(fb_profiles)): + print(f'Sover {600-s} sekunder till... ', end='\r') + fb_profile_nr += 1 + print(f"Försöker med {fb_profiles[fb_profile_nr].name}.") + + else: + print("Klar med", user.username, "\n") + + # Rotera fb-profiler + if count_friends == 6: + if random.randrange(0, 2, 1) == 1: + fb_profile_nr += 1 + count_friends = 0 + print("Växlar till", fb_profiles[fb_profile_nr].name) + elif count_friends == 10: + fb_profile_nr += 1 + count_friends = 0 + print("Växlar till", fb_profiles[fb_profile_nr].name) + + if fb_profile_nr > len(fb_profiles): + fb_profile_nr = 1 + fb_profile = fb_profiles[fb_profile_nr] + + except Exception as e: # Fel4 + soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") + write_error( + 4, + e=e, + user=user.username, + traceback=traceback.format_exc(), + soup=soup, + ) + print("\nFel: ", str(user.username), "\n") + sleep_(15) + pass + + # Ladda in nya användare att kolla + print("\nVem vill du kolla upp?") + users = [User(str(i).strip()) for i in input(">>> ").split(",")] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..974731b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +beautifulsoup4==4.9.3 +certifi==2020.12.5 +cffi==1.14.5 +chardet==4.0.0 +idna==2.10 +lxml==4.6.2 +pycparser==2.20 +PyJWT==2.0.1 +PyNaCl==1.4.0 +PySocks==1.7.1 +python-arango==7.1.0 +requests==2.25.1 +requests-toolbelt==0.9.1 +robobrowser==0.5.3 +setuptools-scm==5.0.2 +six==1.15.0 +soupsieve==2.2 +toml==0.10.2 +urllib3==1.26.3 +Werkzeug==1.0.1 diff --git a/workspace.code-workspace b/workspace.code-workspace new file mode 100644 index 0000000..8725783 --- /dev/null +++ b/workspace.code-workspace @@ -0,0 +1,10 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "python.pythonPath": ".venv/bin/python" + } +} \ No newline at end of file