From 2ee1d93d48417cb2a9a6e13679a300c1b594919f Mon Sep 17 00:00:00 2001 From: Lasse Date: Thu, 18 Mar 2021 09:48:12 +0100 Subject: [PATCH] Divided into modules --- .gitignore | 6 +- Dockerfile | 10 +- facebook/__main__.py | 173 +++++++++ facebook/arangodb.py | 76 ++++ facebook/classes.py | 225 ++++++++++++ facebook/config.py | 9 + facebook/helpers.py | 70 ++++ facebook_reactions.py | 787 ---------------------------------------- htmlerror.html | 824 ++++++++++++++++++++++++++++++++++++++++++ scrapers.py | 267 ++++++++++++++ 10 files changed, 1655 insertions(+), 792 deletions(-) create mode 100644 facebook/__main__.py create mode 100644 facebook/arangodb.py create mode 100644 facebook/classes.py create mode 100644 facebook/config.py create mode 100644 facebook/helpers.py delete mode 100644 facebook_reactions.py create mode 100644 htmlerror.html create mode 100644 scrapers.py diff --git a/.gitignore b/.gitignore index 3dc1d2d..599d36e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,8 @@ /.DS_Store /.venv /.vscode -/__pycache__ \ No newline at end of file +/__pycache__ +*.json +*.pkl +/facebook/test.py +/data/* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 423adad..5ba3576 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,15 +7,17 @@ COPY requirements.txt . RUN pip install -r requirements.txt -#RUN apt-get install build-essential libssl-dev libffi-dev python-dev +ADD data /data -COPY facebook_reactions.py . +COPY main.py . -CMD [ "python", "./facebook_reactions.py" ] +ENTRYPOINT [ "python", "./main.py" ] + +CMD ["",""] # BUILD: # docker buildx create --use #docker buildx build --platform linux/arm -t l3224/fb-reactions:pi --push . # START -# docker run -it --name fb1 -v vol1:/data l3224/fb-reactions:latest \ No newline at end of file +# docker run -it --name fb1 -v vol1:/data l3224/fb-reactions:latest [-s -u user1,user2] \ No newline at end of file diff --git a/facebook/__main__.py b/facebook/__main__.py new file mode 100644 index 0000000..edd1eec --- /dev/null +++ b/facebook/__main__.py @@ -0,0 +1,173 @@ +import os +import random +import traceback +from datetime import datetime +from getopt import GetoptError, getopt +from sys import argv +from time import sleep + +import arangodb +from arangodb import db +from classes import Profile, User +from helpers import sleep_, write_error +from scrapers import profile_picture_reactions + +# import werkzeug +# werkzeug.cached_property = werkzeug.utils.cached_property +# from arango import ArangoClient + + +if __name__ == "__main__": + print() + + # Säkerställ att arbetsmappen är samma som den där scriptet ligger + os.chdir(os.path.dirname(__file__)) + + # Argument och alternativ + argv = argv[1:] + try: + opts, args = getopt(argv, "su:o:", ["single", "users=", "other="]) + single = True if "-s" in [o[0] for o in opts] else False + for o, a in opts: + if o in ["-u", "--user"]: + users = [ + User(str(i).strip()) + for i in [(str(i).strip()) for i in a.split(",")] + ] + if o in ["-o", "--other"]: + url_other_picture = a + + if "users" not in globals(): + users = [ + User(str(i).strip()) + for i in input("Vem/vilka vill du kolla bilder för? ").split(",") + ] + + except GetoptError: + users = [ + User(str(i).strip()) + for i in input("Vem/vilka vill du kolla bilder för? ").split(",") + ] + single = ( + True + if input("Söka bara en bild (single)?").lower() in ["ja, yes, j, y"] + else False + ) + + if "url_other_picture" in globals(): + users[0].url_other_picture = url_other_picture[url_other_picture.find('facebook.com') + 12:] + + print("Kollar profilbilder för:") + for user in users: + print("-", user.username) + print() + + # Skapa tre olika profiler att besöka Facebook med + profiles = [] + for i in range(0, 3): + doc = arangodb.get_profile() + profile = Profile(doc) + profile.browser.open("https://api.ipify.org") + print( + f"Profil {profile.name} använder IP-adress {profile.viewing().text}." + ) + if profile.logged_in == False: + profile.accept_cookies() + sleep_(2) + profile.login() + profiles.append(profile) + print() + sleep(3) + + profile_nr = 1 + profile = profiles[profile_nr] + + print("Börjar med profilen", profile.name) + + # Gå igenom de användare som efterfrågats + while True: + for user in users: + # Set för kollade bilder och kollade medlemmar + all_pictures = set([doc["_key"] for doc in db.collection("pictures").all()]) + members_checked = arangodb.checked_members() + + if user.username not in members_checked:# Hämta reaktioner för den första användaren LÄGG TILL NOT IN MEMBERS_CHECKED + profile_picture_reactions(profile, user, all_pictures, first=True, single=single) + friends = arangodb.friends_of_user(user.username) + friends_unchecked = list(set(friends) - set(members_checked)) + # Här följer cookien med så att vi fortfarnade är inloggade + print("\nKlar med", user.username, "\n") + + print("Vänner som reagerat:", len(friends)) + print("Vänner att kolla:") + + for friend in friends_unchecked: + print(friend) + print() + + # Hämta reaktioner för den första användarens vänner (som reagerat) + count_friends = 0 + for friend in friends_unchecked: + count_friends += 1 + user = User(str(friend)) + sleep_(2) + try: + profile_picture_reactions( + profile, user, members_checked, all_pictures + ) + if profile.blocked == True: + # Ta bort profilen ur databasen + arangodb.remove_profile(profile.doc["_key"]) + # Ta bort från listan på fb-profiler som används + profiles.remove(profile) + # Försök lägga till en ny fb-profil (om det finns en skapad och ledig i databasen) + try: + profiles[profile_nr] = Profile(new=True) + print("Laddat ny profil:", profiles[profile_nr].name) + sleep(3) + except e: + print("Det behövs nya profiler...") + for s in range(0, 1600 / len(profiles)): + print(f"Sover {600-s} sekunder till... ", end="\r") + profile_nr += 1 + print(f"Försöker med {profiles[profile_nr].name}.") + + else: + print("Klar med", user.username, "\n") + + # Rotera fb-profiler + if count_friends == 6: + if random.randrange(0, 2, 1) == 1: + profile_nr += 1 + count_friends = 0 + print("Växlar till", profiles[profile_nr].name) + elif count_friends == 10: + profile_nr += 1 + count_friends = 0 + print("Växlar till", profiles[profile_nr].name) + + if profile_nr > len(profiles) - 1: + profile_nr = 0 + profile = profiles[profile_nr] + + except Exception as e: # Fel4 + write_error( + 4, + e=e, + user=user.username, + traceback=traceback.format_exc(), + soup=profile.viewing(), + ) + print("\nFel: ", str(user.username), "\n") + sleep_(15) + pass + + # Ladda in nya användare att kolla + print("\nVem vill du kolla upp?") + user_input = input(">>> ") + if user_input in ['exit', '', 'ingen']: + for profile in profiles: + profile.unused() + break + else: + users = [User(str(i).strip()) for i in user_input.split(",")] diff --git a/facebook/arangodb.py b/facebook/arangodb.py new file mode 100644 index 0000000..ae5c009 --- /dev/null +++ b/facebook/arangodb.py @@ -0,0 +1,76 @@ +from time import sleep +from arango import ArangoClient +from getpass import getpass +from sys import argv +from config import * +from datetime import datetime + +import nacl.secret +import nacl.utils + + +def checked_members(): + cursor = db.aql.execute( + """ + FOR doc IN members + FILTER doc.checked == true + RETURN doc._key + """ + ) + members_checked = set([doc for doc in cursor]) + return members_checked + + +def get_profile(): + """ Hämtar profil om det inte gjorts förut """ + cursor = db.aql.execute( + """ + FOR doc IN profiles + FILTER doc.in_use == false + FILTER doc.created == true + RETURN doc + """ + ) + return cursor.next() + + +def friends_of_user(user): + """Returnernar användare som reagerat på user:s bilder""" + cursor = db.aql.execute( + """ + FOR doc IN picture_reactions + FILTER doc._to == @user + RETURN DISTINCT doc._from + """, + bind_vars={"user": "members/" + user}, + ) + return [doc[8:] for doc in cursor] + + +def remove_profile(profile): + db.collection("profiles").delete(profile['_key'], silent=True, ignore_missing=True) + print( + f'{profile} blockerad och borttagen {datetime.now().strftime("%Y%m%d_%H:%M:%S")}.' + ) + + +# Starta koppling till arangodb + +# Avkryptera lösen till arango +for i in range(0, 6, 1): + if i == 5: + exit() + try: + key = "sssladnnklja" + getpass() + pwd = ( + nacl.secret.SecretBox(key.encode()) + .decrypt(pwd_arango, encoder=nacl.encoding.HexEncoder) + .decode("utf-8") + ) + break + except: + print("Fel lösenord.") + sleep(1) + + +db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd) diff --git a/facebook/classes.py b/facebook/classes.py new file mode 100644 index 0000000..09f0e05 --- /dev/null +++ b/facebook/classes.py @@ -0,0 +1,225 @@ +from datetime import datetime +import json +import pickle +from bs4 import BeautifulSoup +import requests +import werkzeug +import random + +werkzeug.cached_property = werkzeug.utils.cached_property +from robobrowser import RoboBrowser + +from arangodb import db +from helpers import sleep_, update_cookie +from config import * + + +class User: + def __init__(self, username): + self.collection = "members" + self.username = str(username) + self.fetched = datetime.now().strftime("%Y%m%d_%H:%M:%S") + self.url_coverphotos = '' + self.id = '' + self.url_likes = '' + self.url_about = '' + self.url_timeline = '' + self.profile_pictures = '' + self.url = '' + self.name = '' + self.url_other_picture = '' + + def add_to_db(self): + # Lägg till profilen till arrango + db.insert_document( + self.collection, + { + "_key": self.username, + "url": self.url, + "name": self.name, + "profile_pictures": self.profile_pictures, + "facebook_id": self.id, + "timeline": self.url_timeline, + "likes": self.url_likes, + "about": self.url_about, + "cover photos": self.url_coverphotos, + "fetched": self.fetched + }, + overwrite_mode="update", + silent=True, + keep_none=False + ) + + def checked(self): + db.update_document( + { + "_id": "members/" + str(self.username), + "checked": True, + "pictures_checked": self.profile_pictures, + }) + + +class Picture: + def __init__(self, user): + self.collection = "pictures" + self.user = user + self.id = '' + self.url_full = '' + self.date = '' + self.url = '' + self.no_reactions = '' + self.reactions = [] + + def add_to_db(self): + db.insert_document( + self.collection, + { + "_key": self.id, + "url": self.url_full, + "date": self.date, + "url": self.url, + "no_reactions": self.no_reactions, + "user": self.user, + }, + overwrite_mode="update", + silent=True, + keep_none=False + ) + +class Profile: + def __init__(self, profile): + # Uppdatera dokumentet i arango + self.doc = profile + self.doc['in_use'] = True + db.update_document(self.doc, check_rev=False) + + # Användaruppgifter + self.name = self.doc["name"].strip() + self.email = self.doc["email"] + self.pwd = self.doc["pwd"] + self.server = self.doc["server"] + + self.blocked = False + + # Ange proxies + session = requests.Session() + session.proxies = { + "https": "socks5://'8155249667566524'@{}".format(self.server), + "http": "socks5://'8155249667566524'@{}".format(self.server), + } + + # Starta browser + user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1" + self.browser = RoboBrowser( + session=session, user_agent=user_agent, history=False, parser="lxml" + ) + try: + self.browser.session.cookies = pickle.load( + open("data/cookie_{}.pkl".format(self.name), "rb") + ) + self.logged_in = True + except: + self.logged_in = False + + def viewing(self): + """ Returnerar browser i html-format """ + return self.browser.parsed + + + def accept_cookies(self): + """ Accepterar cookies """ + self.browser.open("https://mbasic.facebook.com") + soup = BeautifulSoup(str(self.browser.parsed), "lxml") + if 'accept all' not in soup.text.lower(): + sleep_(2) + cookie_accept_url = "https://mbasic.facebook.com/cookie/consent-page" + self.browser.open(cookie_accept_url) + sleep_(2) + try: + form = self.browser.get_form() + self.browser.submit_form(form) + print(f"Accepterade cookies för {self.name}") + sleep_(2) + update_cookie(self.browser.session.cookies, self.name) + except Exception as e: + print(f"Accepterade inte cookies för {self.name}") + + def login(self): + """ Loggar in på Facebook. """ + + print("Loggar in {}".format(self.name)) + + # Gå till log in-sidan + self.browser.open("https://mbasic.facebook.com/login") + + # Kolla om browser redan är inloggad + soup = BeautifulSoup(str(self.browser.parsed), "lxml") + if 'log out' in soup.text.lower(): + print("Redan inloggad.") + + # Hitta och fyll i formulär + form = self.browser.get_form(id="login_form") + form["email"].value = self.email + form["pass"].value = self.pwd + self.browser.submit_form(form, submit=form["login"]) + # Vänta lite och uppdatera cookie + print("Loggade in.") + sleep_(2) + + def unused(self): + """ Sätter user till False för valda profiler """ + self.doc["in_use"] = False + db.update_document(self.doc['_key'], silent=True) + +class Proxies: + def __init__(self): + self.proxies = [ + 'gb25-wg.socks5.mullvad.net:1080', + 'gb26-wg.socks5.mullvad.net:1080', + 'gb27-wg.socks5.mullvad.net:1080', + 'gb28-wg.socks5.mullvad.net:1080', + 'gb29-wg.socks5.mullvad.net:1080' + ] + def get_proxie(self): + return self.proxies.pop(random.randrange(0, len(self.proxies), 1)) + +class Friend: + def __init__(self, user): + self.collection = "members" + self.user = user # The friends friend + self.username = '' + self.url = '' + self.name = '' + self.single = '' + + def add_to_db(self): + db.insert_document( + self.collection, + { + "_key": self.username, + "url": url_bas + self.url, + "name": self.name, + }, + overwrite_mode="update", + silent=True, + ) + + +class Reaction: + def __init__(self, user, friend_username, picture_id): + self.collection = "picture_reactions" + self.user = user + self.picture_id = picture_id + self.user_name_friend = friend_username + self.type = False + + def get_dict(self): + key = str(self.picture_id) + "_" + str(self.user_name_friend) + return { + "_to": "members/" + str(self.user), + "_from": "members/" + str(self.user_name_friend), + "_key": key, + "_id": "picture_reactions/" + key, + "picture": self.picture_id, + "reaction": self.type, + } diff --git a/facebook/config.py b/facebook/config.py new file mode 100644 index 0000000..da2f82e --- /dev/null +++ b/facebook/config.py @@ -0,0 +1,9 @@ + +# Info för arangodb +user_arango = "Lasse" +pwd_arango = "4c071768bedc259288361c07aafd8535fca546086fada4e7b5de4e2bb26b0e70fa8d348c998b90d032a5b8f3fdbae1881b843021e3475198e6fb45f58d8dc450bd52f77d" +db_arango = "facebook" +host_arango = "http://arango.lasseedfast.se" + +# Andra uppgifter +url_bas = "https://mbasic.facebook.com" \ No newline at end of file diff --git a/facebook/helpers.py b/facebook/helpers.py new file mode 100644 index 0000000..437ccb3 --- /dev/null +++ b/facebook/helpers.py @@ -0,0 +1,70 @@ +from time import sleep +import random +import pickle +from datetime import datetime +from arangodb import db + +def sleep_(t): + """ + Sover en tid nära den angivna (för att inte sökningarna ska bli för lika varandra) + """ + variation = 4 # Testa olika sovlängder för att inte få användaren blockerad + sleep(t * variation * random.randrange(85, 115, 1) / 100) + if random.randrange(0, 60, 1) == 1: + for s in range(0, 300): + print(f"Sover {300 - s} sekunder till... ", end="\r") + sleep(1) + print() + sleep(random.randrange(0, 10, 1) / 4) + + +def update_cookie(cookies, profile_name): + """ Uppdaterar cookie för browser """ + with open("./data/cookie_{}.pkl".format(profile_name), "wb") as f: + pickle.dump(cookies, f) + + +def write_error(nr, e="", traceback="", soup="", user="", url="", url_name=""): + """Skriver info efter error till arango + + Args: + nr ([type]): error number + e (str, optional): error. Defaults to "". + traceback (str, optional): The traceback from traceback.format_exc(). Defaults to "". + soup (str, optional): Soup. Defaults to "". + user (str, optional): The user. Defaults to "". + url (str, optional): Url, if any. Defaults to "". + count (int, optional): Count, if any. Defaults to 0. + url_name (str, optional): The description of the url, if any. Defaults to "". + """ + if url == "": + url = "ingen url" + url_name = "ingen url" + + if soup != "": + soup = str(soup.prettify()) + + print(e) # FELSÖKNING + + key = datetime.now().strftime("%Y%m%d_%H:%M:%S") + doc = { + "_key": key, + "number": nr, + "error": nr, + "user": str(user), + "error": str(e), + "url": str(url), + "url_name": url_name, + "soup": soup, + "traceback": str(traceback), + } + + try: + db.insert_document( + "errors", + doc, + overwrite_mode="update", + silent=True, + ) + except Exception as e: + print(e) \ No newline at end of file diff --git a/facebook_reactions.py b/facebook_reactions.py deleted file mode 100644 index 14f939b..0000000 --- a/facebook_reactions.py +++ /dev/null @@ -1,787 +0,0 @@ -import json -import os -import pickle -import random -import re -import traceback -from datetime import datetime -from getopt import GetoptError, getopt -from getpass import getpass -from sys import argv -from time import sleep - -import nacl.secret -import nacl.utils -import requests -import werkzeug - -werkzeug.cached_property = werkzeug.utils.cached_property -import robobrowser -from arango import ArangoClient -from bs4 import BeautifulSoup - -# import other_pictures # Måste uppdateras - - -def sleep_(t): - """ - Sover en tid nära den angivna (för att inte sökningarna ska bli för lika varandra) - """ - variation = 4 # Testa olika sovlängder för att inte få användaren blockerad - sleep(t * variation * random.randrange(85, 115, 1) / 100) - if random.randrange(0, 60, 1) == 1: - for s in range(0, 300): - print(f"Sover {300 - s} sekunder till... ", end="\r") - sleep(1) - print() - sleep(random.randrange(0, 10, 1) / 4) - - -def update_cookie(cookies, profile_name): - """ Uppdaterar cookie för browser """ - with open("data/cookie_{}.pkl".format(profile_name), "wb") as f: - pickle.dump(cookies, f) - - -def write_error(nr, e="", traceback="", soup="", user="", url="", count=0, url_name=""): - """Skriver info efter error till arango - - Args: - nr ([type]): error number - e (str, optional): error. Defaults to "". - traceback (str, optional): The traceback from traceback.format_exc(). Defaults to "". - soup (str, optional): Soup. Defaults to "". - user (str, optional): The user. Defaults to "". - url (str, optional): Url, if any. Defaults to "". - count (int, optional): Count, if any. Defaults to 0. - url_name (str, optional): The description of the url, if any. Defaults to "". - """ - if url == "": - url = "ingen url" - url_name = "ingen url" - - if soup != "": - soup = str(soup.prettify()) - - print(e) # FELSÖKNING - - key = datetime.now().strftime("%Y%m%d_%H:%M:%S") - doc = { - "_key": key, - "number": nr, - "error": nr, - "user": str(user), - "error": str(e), - "url": str(url), - "url_name": url_name, - "soup": soup, - "traceback": str(traceback), - } - - try: - db.insert_document( - "errors", - doc, - overwrite_mode="update", - silent=True, - ) - except Exception as e: - print(e) - - -def facebook_reactions(user, first=False): - - # Fixa url:er osv - if user.username.isnumeric(): - user.url = url_bas + "/profile.php?id=" + str(user.username) - user.url_photos = user.url + "&v=photos" - else: - user.username = user.username.replace("/", "") - user.url = url_bas + "/" + user.username - user.url_photos = user.url + "/photos" - - if user.username in members_checked: - print('Redan kollat', user.username) - return {"friends": friends_of_user(user.username)} - - # Gå till sidan för profilbilder - fb_profile.browser.open(user.url_photos) - - sleep_(4) - - soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") - - if ( - """You can't use Facebook because your account, or activity on it, doesn't follow our Community Standards.""" - in soup.text - ): - print("{} blocked\n".format(fb_profile.name).upper()) - return "blocked" - elif 'accept all' in soup.text.lower(): - fb_profile.accept_cookies() - fb_profile.browser.open(user.url_photos) - soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") - - user.name = user.username # Om inte namnet hittas senare - try: - for i in soup.find_all("strong"): - if "Notifications" in str(i): - continue - else: - user.name = i.text.strip() - except Exception as e: - write_error( - 6, - e=e, - traceback=traceback.format_exc(), - soup=soup, - user=user.username, - url=user.url_profil_photos, - ) - if first == True: - print(soup.prettify()) - exit() - print( - "Hämtar reaktioner på profilbilder för {name} ({user})".format( - name=user.name, user=user.username - ) - ) - - # Hitta länk till olika saker hos användarem, inkl facebook-id - - user.id = "" - for a in soup.find_all("a", href=True): - if "Profile pictures" in a.text: - user.url_album = url_bas + a["href"] # Länk till album för profilbulder - if "profile_id" in a["href"]: - l = a["href"] - user.id = re.search("\d+", l[l.find("id=") + 3 :]).group(0) - if "Likes" in a.text: - user.url_likes = url_bas + a["href"] - if "About" in a.text: - user.url_about = url_bas + a["href"] - if "Timeline" in a.text: - user.url_timeline = url_bas + a["href"] - if "Cover photos" in a.text: - user.url_coverphotos = url_bas + a["href"] - - # Gå till profilbilden (den första som kommer upp när man går till profilen) - if not hasattr(user, "url_album"): - user.url_album = '' - user.add_to_db() - print('Hittar inget album för profilbilder.') - write_error(7, soup=soup, user=user.username, url=user.url_album, url_name='user.url_album') - return None - # ATT GÖRA Här kan andra bilder väljas istället - - fb_profile.browser.open(user.url_album) - soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") - - # Samla alla profilbilder i en lista - url_pics = [] - pics = soup.find("div", {"id": "thumbnail_area"}) - for i in pics.find_all("a"): - a = i["href"] - url_pics.append(a[: a.find("&id")]) - - try: - user.profile_pictures = len(url_pics) - except: - user.profile_pictures = 0 - - # Lägg till profilen till arrango - user.add_to_db() - - # Gå igenom alla profilbilder upp till ett maximalt antal - count = 0 - if single == True: - max_pic = 1 - else: - max_pic = 15 - for pic in url_pics: - picture = Picture(user.username) - if count == max_pic: - break - else: - count += 1 - picture.url = url_bas + pic - picture.id = str(picture.url[picture.url.find("fbid=") + 5 :]) - if picture.id in all_pictures: - print('Redan kollat bild', picture.id) - continue - sleep_(5) - - try: - fb_profile.browser.open(picture.url) - except Exception as e: # Fel3 - write_error( - 3, - e=e, - soup=soup, - user=user.username, - url=picture.url, - url_name="url_pic", - traceback=traceback.format_exc(), - ) - - update_cookie(fb_profile.browser.session.cookies, fb_profile.name) - - # Hitta info om bilden - soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") - picture.date = soup.find("abbr").text - # Mer info att lägga in? - - # Hämta länkar för bilden att userända sen - for a in soup.find_all("a", href=True): - if all( - [ - "reaction" in a["href"], - "reactions" not in a["href"], - "=R" not in a["href"], - ] - ): - url_reactions = url_bas + str( - a["href"] - ) # Länk till reaktionerna för bilden - elif a.text == "Visa i fullständig storlek" or a.text == "View full size": - pic = url_bas + a["href"] - picture.url_full = pic[ - : pic.find("&") - ] # Den fullständiga adressen till bilden, används som _key i pictures - - # Skriv ut vilken bild som behandlas - print( - "Bild {count} av {total}".format(count=count, total=user.profile_pictures), - end="\r", - ) - - # Hämta reaktioner för bilden - sleep_(3) - fb_profile.browser.open(url_reactions) - update_cookie(fb_profile.browser.session.cookies, fb_profile.name) - - soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") - - try: - for a in soup.find_all("a", {"class": "z ba"}, href=True): - url_limit = a["href"] - - picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1) - limit = re.search(r"limit=(\d+)", url_limit).group(1) - except UnboundLocalError: - limit = 999 - - # Addera bilden till arrango - picture.add_to_db() - - url_limit = url_bas + url_limit.replace( - "limit=" + str(limit), "limit=" + str(picture.no_reactions) - ) - - try: - sleep_(4) - fb_profile.browser.open(url_limit) - update_cookie(fb_profile.browser.session.cookies, fb_profile.name) - soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") - - # Gå igenom alla som reagerat och för in i arango - for li in soup.find_all("li"): - friend = Friend(user.username) - if single == True: - friend.single = True - if "See more" in li.text: - continue - try: - profile = li.find("h3").find("a") - friend.name = profile.text - friend.url = profile["href"] - if "profile.php" in friend.url: - friend.username = friend.url[friend.url.find("id=") + 3 :] - else: - friend.username = friend.url[friend.url.find("/") + 1 :] - - reaction = Reaction(user.username, friend.username, picture.id) - for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: - if type in str(li): - reaction.type = type - picture.reactions.append(reaction.get_dict()) - # Lägg till vännens profil till arrango - friend.add_to_db() - - # Lägg till reaktion till arrango - - except AttributeError as e: # Fel1 - write_error( - 1, - e=e, - soup=soup, - user=user.username, - traceback=traceback.format_exc(), - ) - pass - - if count == max_pic: - db.collection("picture_reactions").insert_many( - picture.reactions, silent=True, overwrite=True - ) - db.collection("picture_reactions").insert_many(picture.reactions, silent=True, overwrite=True) - except Exception as e: # Fel2 - write_error( - 2, - e=e, - soup=soup, - user=user.username, - url=url_limit, - url_name="url_limit", - traceback=traceback.format_exc(), - ) - pass - - ## ATT GÖRA För att lägga till fler reaktioner om det är få reaktioner på profilbilderna (måste uppdateras) - - print() - - db.update_document( - { - "_id": "members/" + str(user.username), - "checked": True, - "pictures_checked": user.profile_pictures, - } - ) - - - if first == True: - return {"friends": friends} - - else: - pass - - -def friends_of_user(user): - """Returnernar userändare som reagerat på user:s bilder""" - - cursor = db.aql.execute( - """ - FOR doc IN @@col - FILTER doc._to == @user - RETURN DISTINCT doc._from - """, - bind_vars={"@col": "picture_reactions", "user": "members/" + user}, - ) - - return [doc[8:] for doc in cursor] - - -def checked_members(): - cursor = db.aql.execute( - """ - FOR doc IN @@col - FILTER doc.checked == @bool - RETURN doc._key - """, - bind_vars={"@col": "members", "bool": True}, - ) - - members_checked = set([doc for doc in cursor]) - return members_checked - - -def get_profile(nr): - """ Hämtar profil om det inte gjorts förut """ - cursor = db.aql.execute( - """ - FOR doc IN @@col - FILTER doc.in_use == @bool - RETURN doc - """, - bind_vars={"@col": "profiles", "bool": False} - ) - profile = cursor.next() - - # Skriv till fil att använda sen - with open('data/profile{}.json'.format(nr), 'w') as outfile: - json.dump(profile, outfile) - - # Uppdatera dokumentet i arango - profile['in_use'] = True - db.update_document(profile, check_rev=False) - - return profile - -class Proxies: - def __init__(self): - self.proxies = [ - 'gb25-wg.socks5.mullvad.net:1080', - 'gb26-wg.socks5.mullvad.net:1080', - 'gb27-wg.socks5.mullvad.net:1080', - 'gb28-wg.socks5.mullvad.net:1080', - 'gb29-wg.socks5.mullvad.net:1080' - ] - def get_proxie(self): - return self.proxies.pop(random.randrange(0, len(self.proxies), 1)) - -class Friend: - def __init__(self, user): - self.collection = "members" - self.user = user # The friends friend - self.username = '' - self.url = '' - self.name = '' - self.single = '' - - def add_to_db(self): - db.insert_document( - self.collection, - { - "_key": self.username, - "url": url_bas + self.url, - "name": self.name, - }, - overwrite_mode="update", - silent=True, - ) - - -class Reaction: - def __init__(self, user, friend_username, picture_id): - self.collection = "picture_reactions" - self.user = user - self.picture_id = picture_id - self.user_name_friend = friend_username - self.type = False - - def get_dict(self): - key = str(self.picture_id) + "_" + str(self.user_name_friend) - return { - "_to": "members/" + str(self.user), - "_from": "members/" + str(self.user_name_friend), - "_key": key, - "_id": "picture_reactions/" + key, - "picture": self.picture_id, - "reaction": self.type, - } - - -class User: - def __init__(self, username): - self.collection = "members" - self.username = str(username) - self.fetched = datetime.now().strftime("%Y%m%d_%H:%M:%S") - self.url_coverphotos = '' - self.id = '' - self.url_likes = '' - self.url_about = '' - self.url_timeline = '' - self.profile_pictures = '' - self.url = '' - self.name = '' - - def add_to_db(self): - # Lägg till profilen till arrango - db.insert_document( - self.collection, - { - "_key": self.username, - "url": self.url, - "name": self.name, - "profile_pictures": self.profile_pictures, - "facebook_id": self.id, - "timeline": self.url_timeline, - "likes": self.url_likes, - "about": self.url_about, - "cover photos": self.url_coverphotos, - "fetched": self.fetched - }, - overwrite_mode="update", - silent=True, - keep_none=False - ) - - -class Picture: - def __init__(self, user): - self.collection = "pictures" - self.user = user - self.id = '' - self.url_full = '' - self.date = '' - self.url = '' - self.no_reactions = '' - self.reactions = [] - - def add_to_db(self): - db.insert_document( - self.collection, - { - "_key": self.id, - "url": self.url_full, - "date": self.date, - "url": self.url, - "no_reactions": self.no_reactions, - "user": self.user, - }, - overwrite_mode="update", - silent=True, - keep_none=False - ) - -class Profile: - def __init__(self, nr, new=False): - - try: - with open("data/profile{}.json".format(nr)) as f: - self.doc = json.load(f) - except: - self.doc = get_profile(nr) - - if 'blocked' in self.doc or new == True: - self.doc = get_profile(nr) - - # Användaruppgifter - self.name = self.doc["name"].strip() - self.email = self.doc["email"] - self.pwd = self.doc["pwd"] - self.server = self.doc["server"] - self.nr = nr - - # Ange proxies - session = requests.Session() - session.proxies = { - "https": "socks5://'8155249667566524'@{}".format(self.server), - "http": "socks5://'8155249667566524'@{}".format(self.server), - } - - # Starta browser - user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1" - self.browser = robobrowser.RoboBrowser( - session=session, user_agent=user_agent, history=False, parser="lxml" - ) - try: - self.browser.session.cookies = pickle.load( - open("data/cookie_{}.pkl".format(self.name), "rb") - ) - self.logged_in = True - except: - self.logged_in = False - - def accept_cookies(self): - """ Accepterar cookies """ - self.browser.open("https://mbasic.facebook.com") - soup = BeautifulSoup(str(self.browser.parsed), "lxml") - if 'accept all' not in soup.text.lower(): - sleep_(2) - cookie_accept_url = "https://mbasic.facebook.com/cookie/consent-page" - self.browser.open(cookie_accept_url) - sleep_(2) - try: - form = self.browser.get_form() - self.browser.submit_form(form) - print(f"Accepterade cookies för {self.name}") - sleep_(2) - update_cookie(self.browser.session.cookies, self.name) - except Exception as e: - print(f"\nAccepterade inte cookies för {self.name}\n") - - def login(self): - """ Loggar in på Facebook. """ - - print("Loggar in {}\n".format(self.name)) - - # Gå till log in-sidan - self.browser.open("https://mbasic.facebook.com/login") - - # Kolla om browser redan är inloggad - soup = BeautifulSoup(str(self.browser.parsed), "lxml") - if 'log out' in soup.text.lower(): - print("Redan inloggad.") - - # Hitta och fyll i formulär - form = self.browser.get_form(id="login_form") - form["email"].value = self.email - form["pass"].value = self.pwd - self.browser.submit_form(form, submit=form["login"]) - # Vänta lite och uppdatera cookie - print("\nLoggade in\n") - sleep_(2) - - def block(self): - """ Blockerar profilen """ - if "blocked" not in self.doc: - self.doc["blocked"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - db.update_document(self.doc, silent=True, check_rev=False) - with open("data/profile{}.json".format(self.nr), "w") as outfile: - json.dump(self.doc, outfile) - - - -if __name__ == "__main__": - print() - - # Säkerställ att arbetsmappen är samma som den där scriptet ligger - os.chdir(os.path.dirname(__file__)) - - # Starta koppling till arangodb - # Info för arangodb - user_arango = "Lasse" - pwd_arango = "4c071768bedc259288361c07aafd8535fca546086fada4e7b5de4e2bb26b0e70fa8d348c998b90d032a5b8f3fdbae1881b843021e3475198e6fb45f58d8dc450bd52f77d" - db_arango = "facebook" - host_arango = "http://arango.lasseedfast.se" - - # Avkryptera lösen till arango - for i in range(0, 6, 1): - if i == 5: - exit() - try: - key = "sssladnnklja" + getpass() - pwd = ( - nacl.secret.SecretBox(key.encode()) - .decrypt(pwd_arango, encoder=nacl.encoding.HexEncoder) - .decode("utf-8") - ) - break - except: - print("Fel lösenord.") - sleep(1) - client = ArangoClient(hosts=host_arango) - db = client.db(db_arango, username=user_arango, password=pwd) - - members = db.collection("members") - pictures = db.collection("pictures") - - argv = argv[1:] - - try: - opts, args = getopt(argv, "su:", ["single", "user="]) - single = True if "-s" in [o[0] for o in opts] else False - for o, a in opts: - if o in ["-u", "--user"]: - users = [ - User(str(i).strip()) - for i in [(str(i).strip()) for i in a.split(",")] - ] - if "users" not in globals(): - users = [ - User(str(i).strip()) - for i in input("Vem/vilka vill du kolla bilder för? ").split(",") - ] - - except GetoptError: - users = [ - User(str(i).strip()) - for i in input("Vem/vilka vill du kolla bilder för? ").split(",") - ] - single = ( - True - if input("Söka bara en bild (single)?").lower() in ["ja, yes, j, y"] - else False - ) - - print("Kollar profilbilder för:") - for user in users: - print("-", user.username) - print() - - # Skapa tre olika profiler att besöka Facebook med - fb_profiles = {} - extra_proxies = Proxies() - for nr in range(1, 4): - fb_profiles[nr] = Profile(nr) - fb_profiles[nr].browser.open('https://api.ipify.org') - soup = BeautifulSoup(str(fb_profiles[nr].browser.parsed), "lxml") - print(soup.text) - if fb_profiles[nr].logged_in == False: - fb_profiles[nr].accept_cookies() - sleep_(2) - fb_profiles[nr].login() - sleep(3) - - fb_profile_nr = 1 - fb_profile = fb_profiles[fb_profile_nr] - - print("Börjar med profilen", fb_profile.name) - - url_bas = "https://mbasic.facebook.com" - - while True: - for user in users: - # Set för kollade bilder och kollade medlemmar - all_pictures = set([doc["_key"] for doc in pictures.all()]) - members_checked = checked_members() - - # Hämta reaktioner för den första användaren - facebook_reactions(user, first=True) - friends = friends_of_user(user.username) - friends_unchecked = list(set(friends) - set(members_checked)) - # Här följer cookien med så att vi fortfarnade är inloggade - print("\nKlar med", user.username, "\n") - - print("Vänner som reagerat:", len(friends)) - print("Vänner att kolla:") - - for friend in friends_unchecked: - print(friend) - print() - - # Hämta reaktioner för den första användarens vänner (som reagerat) - count_friends = 0 - for f in friends: - count_friends += 1 - user = User(str(f)) - sleep_(2) - try: - out = facebook_reactions(user) - if out == "blocked": - # Ta bort profilen ur databasen - db.collection('profiles').delete(fb_profile.doc['_key'], silent=True, ignore_missing=True) - print( - f'{fb_profile.name} blockerad och borttagen {datetime.now().strftime("%Y%m%d_%H:%M:%S")}.' - ) - fb_profiles.remove(fb_profile) - try: - # l = [p['nr'] for p in fb_profiles] - # l.sort() - # nr = int(l[-1]+1) - fb_profiles[fb_profile_nr] = Profile(nr, new=True) - print("Laddat ny profil:", fb_profiles[fb_profile_nr].name) - sleep(3) - except e: - print("Det behövs nya profiler...") - for s in range(0, 1600/len(fb_profiles)): - print(f'Sover {600-s} sekunder till... ', end='\r') - fb_profile_nr += 1 - print(f"Försöker med {fb_profiles[fb_profile_nr].name}.") - - else: - print("Klar med", user.username, "\n") - - # Rotera fb-profiler - if count_friends == 6: - if random.randrange(0, 2, 1) == 1: - fb_profile_nr += 1 - count_friends = 0 - print("Växlar till", fb_profiles[fb_profile_nr].name) - elif count_friends == 10: - fb_profile_nr += 1 - count_friends = 0 - print("Växlar till", fb_profiles[fb_profile_nr].name) - - if fb_profile_nr > len(fb_profiles): - fb_profile_nr = 1 - fb_profile = fb_profiles[fb_profile_nr] - - except Exception as e: # Fel4 - soup = BeautifulSoup(str(fb_profile.browser.parsed), "lxml") - write_error( - 4, - e=e, - user=user.username, - traceback=traceback.format_exc(), - soup=soup, - ) - print("\nFel: ", str(user.username), "\n") - sleep_(15) - pass - - # Ladda in nya användare att kolla - print("\nVem vill du kolla upp?") - users = [User(str(i).strip()) for i in input(">>> ").split(",")] diff --git a/htmlerror.html b/htmlerror.html new file mode 100644 index 0000000..4661f64 --- /dev/null +++ b/htmlerror.html @@ -0,0 +1,824 @@ +"soup": " +\n +\n +\n + +\n \n Nils Edfast\n \n + \n + \n \n + \n +\n + +\n
\n
\n
\n
\n
\n \n + \n \n \n \n \n \n \n \n \n
\n \n + \"Facebook\n \n \n \n + \n \n
\n
\n \n
\n
\n
\n
\n
\n
\n \n \"Nils\n \n
\n \n \n Nils + Edfast\n \n \n \n
\n
\n \n + \n \n \n \n + \n
\n \n + Add Friend\n \n \n \n + Message\n \n \n \n More\n \n
\n
\n
\n
\n
\n
\n \n
\n
\n
\n

\n Uploads\n

\n
\n \n \n \n \n \n \n \n \n
\n + \n + \n \n \n \n + \n \n \n \n + \n \n
\n +
\n \n
\n
\n

\n + Albums\n

\n
\n \n
\n \n
\n
\n
\n
\n \n \n
\n
\n
\n
+ \n
\n
\n
\n
\n
\n
\n
\n \n
\n
\n \n \n \n \n \n \n \n
\n \n English (UK)\n \n \n + Polski\n \n \n + Português (Brasil)\n \n + \n \n + English (US)\n \n \n + Español\n \n \n +
\n +\n
\n
\n
+ \n
\n
\n
\n
\n
\n \n \n
\n \n \n \n \n \n \n \n \n
\n \n \n + \n \n + \n +
\n
\n
\n
\n \n \n \n \n \n \n + \n
\n + \n + Create Page\n \n \n + Help\n \n \n + Settings & privacy\n \n + \n \n + Report a Problem\n \n \n + Terms & Policies\n \n \n Log Out (Tina Shiawi)\n \n
\n \n Back to Top\n \n
\n
\n +
\n
\n \n + +", \ No newline at end of file diff --git a/scrapers.py b/scrapers.py new file mode 100644 index 0000000..447ac77 --- /dev/null +++ b/scrapers.py @@ -0,0 +1,267 @@ +from classes import Picture, Friend, Reaction +from helpers import sleep_, write_error, update_cookie +from config import * +import traceback +import re +from arangodb import db + +def profile_picture_reactions(profile, user, all_pictures, first=False, single = False): + + # Fixa url:er osv + if user.username.isnumeric(): + user.url = url_bas + "/profile.php?id=" + str(user.username) + user.url_photos = user.url + "&v=photos" + else: + user.username = user.username.replace("/", "") + user.url = url_bas + "/" + user.username + user.url_photos = user.url + "/photos" + + # Gå till sidan för profilbilder + profile.browser.open(user.url_photos) + + sleep_(4) + + if ( + """You can't use Facebook because your account, or activity on it, doesn't follow our Community Standards.""" + in profile.viewing().text + ): + print("{} blocked\n".format(profile.name).upper()) + profile.blocked = True + return None + + elif 'accept all' in profile.viewing().text.lower(): + profile.accept_cookies() + profile.browser.open(user.url_photos) + + user.name = user.username # Om inte namnet hittas senare + try: + for i in profile.viewing().find_all("strong"): + if "Notifications" in str(i): + continue + else: + user.name = i.text.strip() + except Exception as e: + write_error( + 6, + e=e, + traceback=traceback.format_exc(), + soup=profile.viewing(), + user=user.username, + url=user.url_photos, + ) + if first == True: + print(profile.viewing().prettify()) + exit() + print( + "Hämtar reaktioner på profilbilder för {name} ({user})".format( + name=user.name, user=user.username + ) + ) + + # Hitta länk till olika saker hos användarem, inkl facebook-id + user.id = "" + for a in profile.viewing().find_all("a", href=True): + if "Profile pictures" in a.text: + user.url_album = url_bas + a["href"] # Länk till album för profilbulder + if "profile_id" in a["href"]: + l = a["href"] + user.id = re.search("\d+", l[l.find("id=") + 3 :]).group(0) + if "Likes" in a.text: + user.url_likes = url_bas + a["href"] + if "About" in a.text: + user.url_about = url_bas + a["href"] + if "Timeline" in a.text: + user.url_timeline = url_bas + a["href"] + if "Cover photos" in a.text: + user.url_coverphotos = url_bas + a["href"] + + user.add_to_db() + # Gå till profilbilden (den första som kommer upp när man går till profilen) + if not hasattr(user, "url_album"): + write_error(9, soup=profile.viewing(), user=user.username) + if user.url_other_picture != '': + # Använd eventuell extrabild och ta bort den från användaren + url_pics = [user.url_other_picture] + user.url_other_picture = '' + else: + # Spara ner profilen till databasen och avsluta sökningen på användaren + user.url_album = False + if first == False: + user.doc['checked'] = True + user.add_to_db() + print('Hittar inget album för profilbilder.') + write_error(7, soup=profile.viewing(), user=user.username, url=user.url_album, url_name='user.url_album') + return None + # ATT GÖRA Här kan andra bilder väljas istället + + else: + profile.browser.open(user.url_album) + + # Samla alla profilbilder i en lista + url_pics = [] + pics = profile.viewing().find("div", {"id": "thumbnail_area"}) + for i in pics.find_all("a"): + a = i["href"] + url_pics.append(a[: a.find("&id")]) + if user.url_other_picture != '': + # Lägg till eventuell extrabild och ta bort den från användaren + url_pics.append(user.url_other_picture) + user.url_other_picture = '' + try: + user.profile_pictures = len(url_pics) + except: + user.profile_pictures = 0 + user.doc['checked'] = True + user.add_to_db() + return + # Lägg till profilen till arrango + user.add_to_db() + + # Gå igenom alla profilbilder + if single == True and first == False: + url_pics = url_pics[0] + for pic in url_pics: + # Skriv ut vilken bild som behandlas + print(f"Bild {url_pics.index(pic) + 1} av {user.profile_pictures}", end="\r",) + + picture = Picture(user.username) + picture.url = url_bas + pic + picture.id = str(picture.url[picture.url.find("fbid=") + 5 :]) + picture.id = str(re.search('\d+', picture.id).group()) + # if picture.id in all_pictures: + # print('Redan kollat bild', picture.id) + # continue + sleep_(5) + + try: + profile.browser.open(picture.url) + except Exception as e: # Fel3 + write_error( + 3, + e=e, + soup=profile.viewing(), + user=user.username, + url=picture.url, + url_name="url_pic", + traceback=traceback.format_exc(), + ) + + update_cookie(profile.browser.session.cookies, profile.name) + + # Hitta info om bilden + try: + picture.date = profile.viewing().find("abbr").text + except Exception as e: # Fel8 + write_error(8, e=e, soup=profile.viewing(), url=pic, url_name='picture url', user=user.name, traceback=traceback.format_exc()) + # ATT GÖRA Mer info att lägga in? + + # Hämta länkar för bilden att userända sen + #print(profile.viewing().prettify()) + for a in profile.viewing().find_all("a", href=True): + if all( + [ + "reaction" in a["href"], + "reactions" not in a["href"], + "=R" not in a["href"], + ] + ): + url_reactions = url_bas + str(a["href"]) # Länk till reaktionerna för bilden + elif a.text == "View full size": + pic = url_bas + a["href"] + picture.url_full = pic[ + : pic.find("&") + ] # Den fullständiga adressen till bilden, används som _key i pictures + if 'url_reactions' not in globals(): + for a in profile.viewing().find_all("a", href=True): + if '/likes/' in a["href"]: + url_reactions = url_bas + str(a["href"]) + if 'url_reactions' not in globals(): + for div in profile.viewing().find_all("div", href=True): + if 'like this' in div.text: + url_reactions = url_bas + str(div["href"]) + + + # Hämta reaktioner för bilden + sleep_(3) + profile.browser.open(url_reactions) + update_cookie(profile.browser.session.cookies, profile.name) + + try: + for a in profile.viewing().find_all("a", {"class": "z ba"}, href=True): + url_limit = a["href"] + + picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1) + limit = re.search(r"limit=(\d+)", url_limit).group(1) + except UnboundLocalError: + limit = 999 + + # Addera bilden till arrango + picture.add_to_db() + + url_limit = url_bas + url_limit.replace( + "limit=" + str(limit), "limit=" + str(picture.no_reactions) + ) + + try: + sleep_(4) + profile.browser.open(url_limit) + update_cookie(profile.browser.session.cookies, profile.name) + + # Gå igenom alla som reagerat och för in i arango + for li in profile.viewing().find_all("li"): + friend = Friend(user.username) + if single == True: + friend.single = True + if "See more" in li.text: + continue + try: + friend_html = li.find("h3").find("a") + friend.name = friend_html.text + friend.url = friend_html["href"] + if "profile.php" in friend.url: + friend.username = friend.url[friend.url.find("id=") + 3 :] + else: + friend.username = friend.url[friend.url.find("/") + 1 :] + + reaction = Reaction(user.username, friend.username, picture.id) + for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: + if type in str(li): + reaction.type = type + picture.reactions.append(reaction.get_dict()) + # Lägg till vännens profil till arrango + friend.add_to_db() + + # Lägg till reaktion till arrango + + except AttributeError as e: # Fel1 + write_error( + 1, + e=e, + soup=profile.viewing(), + user=user.username, + traceback=traceback.format_exc(), + ) + pass + + if count == max_pic: + db.collection("picture_reactions").insert_many( + picture.reactions, silent=True, overwrite=True + ) + db.collection("picture_reactions").insert_many(picture.reactions, silent=True, overwrite=True) + except Exception as e: # Fel2 + write_error( + 2, + e=e, + soup=profile.viewing(), + user=user.username, + url=url_limit, + url_name="url_limit", + traceback=traceback.format_exc(), + ) + pass + + ## ATT GÖRA För att lägga till fler reaktioner om det är få reaktioner på profilbilderna (måste uppdateras) + + user.checked() + + \ No newline at end of file