diff --git a/.gitignore b/.gitignore index a9f76ee..8d545d4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,18 +1,28 @@ + +# Blandat /.DS_Store -/.venv +*.venv /.vscode /__pycache__ *.json *.pkl -facebook/test.py /data/* *.html *.code-workspace workspace.code-workspace password_arango.txt *.gexf -facebook/mrkoll. *.pyc +*.sqlite3 + +#facebook /facebook !/facebook/*.py -*.sqlite3 \ No newline at end of file +facebook/test.py +facebook/mrkoll. + +# docker +/stats/* +!/stats/*.py + +requirements2.txt diff --git a/Dockerfile b/Dockerfile index c69a275..74dbd33 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,5 +17,5 @@ CMD ["",""] # BUILD: # docker buildx create --use -#docker buildx build --platform linux/arm64,linux/arm64,linux/amd64 -t l3224/fb-scraper:pi --push . +#docker buildx build --platform linux/arm,linux/arm64,linux/amd64 -t l3224/fb-scraper:VERSION --push . diff --git a/docker/free/Dockerfile b/docker/free/Dockerfile deleted file mode 100644 index c289f9d..0000000 --- a/docker/free/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ - -FROM python:3.8 - -WORKDIR / - -COPY requirements.txt . - -RUN pip install -r requirements.txt - -ADD . . - -ENTRYPOINT [ "python", "facebook/__main__.py", "-p free" ] - -CMD ["",""] - -# BUILD: -# docker buildx create --use -#docker buildx build --file docker/free/Dockerfile --platform linux/arm -t l3224/fb-scraper:free --push . - diff --git a/docker/mrkoll/Dockerfile b/docker/mrkoll/Dockerfile index d61b23a..27234c4 100644 --- a/docker/mrkoll/Dockerfile +++ b/docker/mrkoll/Dockerfile @@ -1,14 +1,15 @@ +# syntax=docker/dockerfile:1 -FROM python:3.8 +FROM python:3.8-slim-buster -WORKDIR / +COPY requirements.txt requirements.txt -COPY requirements.txt . +RUN pip3 install -r requirements.txt -RUN pip install -r requirements.txt +COPY . . -ADD . . +ENTRYPOINT [ "python3", "mrkoll_scraperapi.py" ] -ENTRYPOINT [ "python", "facebook/mrkoll.py" ] +CMD [""] # docker buildx build --file docker/mrkoll/Dockerfile --platform linux/arm -t l3224/fb-scraper:mrkoll --push . \ No newline at end of file diff --git a/docker/mrkoll/mrkoll_scraperapi.py b/docker/mrkoll/mrkoll_scraperapi.py new file mode 100644 index 0000000..3d9b5ab --- /dev/null +++ b/docker/mrkoll/mrkoll_scraperapi.py @@ -0,0 +1,194 @@ +import re +import subprocess +import requests +from sys import argv +from time import sleep +from bs4 import BeautifulSoup +from arango import ArangoClient + + + + +def find_person(number): + """ + Söker personuppgifter utifrån telefonnummer. + """ + + sleep(2) + + url = f'https://mrkoll.se/resultat?n={number}' + + api_key = 'fcfe011cf66fddb61bb6425fcb5cb5e9' + payload = {'api_key': api_key, 'url': url, 'country_code': 'se', 'device_type':'desktop'} + + response = requests.get('http://api.scraperapi.com', params=payload) + r = response.text + # Hämta sidan + + soup = BeautifulSoup(r, 'html.parser') + + if ( + "Du har gjort för många anrop" in soup.text + or response.url == "https://mrkoll.se/om/limit/" + ): + sleep(10) + return None + + # Lägg in data i dictionary + d = {} + + d["url_via_telefonnummer"] = response.url + try: + for a in soup.find_all("a", href=True): + if "boende-med-" in a["href"]: + d["lives_with_url"] = a["href"] + if "-hushall" in a["href"]: + d["lives_with"] = a.text + except: + pass + + if "Sökningen gav 0 träffar..." in soup.text: + return {} + + info = soup.find("div", {"class": "block_col1"}) + + try: + d["first_name"] = info.find( + "span", {"title": "Detta är personens tilltalsnamn"} + ).text + except: + pass + try: + d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text + except: + pass + try: + d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text + except: + pass + try: + adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) + d["adress_line1"] = adress[0].text + if len(adress) > 1: + d["adress_line2"] = adress[1].text + except: + pass + + try: + d["history"] = info.find("div", {"class": "history_container"}).text + except: + pass + + # Personnummer + ## Födelsedatum + for i in soup.find_all("div", {"class": "col_block1"}): + if "Personnummer" in i.text: + d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( + "-XXXX", "" + ) + ## Fyra sista + try: + start = "showPersnr" + end = ">Jag godkänner" + t = str(soup) + v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") + url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] + sleep(2) # Vänta lite + four_last = requests.get("http://mrkoll.se" + url_ajax).text + d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) + except: + pass + + try: + neighbours = {} + for div in soup.find_all("div", {"class": "peoplecont"}): + persons = div.find_all("a", href=True) + for person in persons: + neighbours[person.find("strong").text] = { + "link": person["href"], + "lived_years": re.search( + "\d+", person.find("span", {"class": "flyttclass"}).text + ).group()[0], + } + d["neighbours"] = neighbours + except: + pass + + try: + d["name_change"] = [ + div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) + ] + except: + pass + + try: + prosecuted = {} + prosecuted["brottsmål"] = ( + True if soup.find("div", {"class": "resmark res_b"}) != None else False + ) + prosecuted["tvistemål"] = ( + True if soup.find("div", {"class": "resmark res_t"}) != None else False + ) + prosecuted["straffföreläggande"] = ( + True if soup.find("div", {"class": "resmark res_s"}) != None else False + ) + d["prosecuted"] = prosecuted + except: + pass + + return d + + +if __name__ == "__main__": + + ip = 'scraperapi' + + if requests.get('https://icanhazip.com').text.strip() == '98.128.172.12': + print('\nMULLVAD INTE AKTIV\n') + exit() + + # Info för arangodb + user_arango = "Phone" + db_arango = "facebook" + host_arango = "http://192.168.1.10:8529" + + # Starta koppling till arangodb + + db = ArangoClient(hosts=host_arango).db( + db_arango, username=user_arango, password=argv[1] + ) + leak = db.collection("phoneleak") + + count = 0 + scraper_count = 0 + + global errors + errors = 0 + + while True: + count += 1 + + # Hämta en random person + doc = leak.random() + + # Gör sökningen på mrkoll.se + d = find_person(doc["phone"]) + + try: + name = d["first_name"] + ' ' + except: + name = ' ' + print(f'{count} - {errors} {name}', end="\r") + + if d == None: # Om ip-adressen är blockad eller något hänt + continue + + d["_key"] = doc["_key"] + d["_id"] = "phone/" + str(d["_key"]) + d["phone"] = doc["phone"] + d["checked_from_ip"] = f'{ip} - cache' + try: + db.collection("phone").insert(d) + leak.delete(doc["_key"]) + except: + pass diff --git a/docker/mrkoll/requirements.txt b/docker/mrkoll/requirements.txt new file mode 100644 index 0000000..6a7859b --- /dev/null +++ b/docker/mrkoll/requirements.txt @@ -0,0 +1,14 @@ +beautifulsoup4==4.9.3 +bs4==0.0.1 +certifi==2021.5.30 +charset-normalizer==2.0.4 +idna==3.2 +PyJWT==2.1.0 +python-arango==7.2.0 +requests==2.26.0 +requests-toolbelt==0.9.1 +setuptools-scm==6.0.1 +soupsieve==2.2.1 +toml==0.10.2 +urllib3==1.26.6 +requests_cache==0.7.4 diff --git a/docker/profile_pictures/images.py b/docker/profile_pictures/images.py new file mode 100644 index 0000000..352f27b --- /dev/null +++ b/docker/profile_pictures/images.py @@ -0,0 +1,70 @@ +import requests +import os +from datetime import date, datetime, timedelta +from time import sleep + +from arangodb import db + + +def download_image(url, user, id): + + # Kolla så användarmappen finns + if not os.path.isdir(f'../profile_pictures/{user}'): + os.mkdir(f'../profile_pictures/{user}') + + # Ladda ner bilden + r = requests.get(url) + if r.text == 'URL signature expired': + print('För gammal länk.') + exit() + elif r.status_code == 403: + exit() + img_data = r.content + with open(f'../profile_pictures/{user}/{id}.jpg', 'wb') as handler: + handler.write(img_data) + + +def get_pictures(day): + cursor = db.aql.execute( + """ + for doc in members + filter doc.fetched == @date + filter has(doc, "checked_pictures") + filter not has(doc, "pictures_downloaded") + return {'member': doc._key, 'pictures':doc.checked_pictures} + """, + bind_vars={'date': day} + ) + + for doc in cursor: + pictures = [] + for picture in doc['pictures']: + pictures.append(picture[picture.find('fbid=')+5:]) + + + cursor = db.aql.execute( + """ + for doc in pictures + filter doc._key in @list + limit 10 + return {'_key': doc._key, 'user':doc.user, 'url': doc.src} + """, + bind_vars={"list": pictures}, + ) + + for picture in cursor: + download_image(picture['url'], picture['user'], picture['_key']) + print(picture['_key']) + sleep(2) + + db.update_document({'_id': 'members/' + str(doc['member']), 'pictures_downloaded': True}, silent=True, check_rev=False) + +def old_pics(): + if not os.path.isdir(f'../profile_pictures'): + os.mkdir(f'../profile_pictures') + start = date.today() + for i in range(1,60): + d = start - timedelta(days=i) + get_pictures(d.strftime('%Y%m%d')) + + diff --git a/docker/stats/Dockerfile b/docker/stats/Dockerfile new file mode 100644 index 0000000..f90f8ac --- /dev/null +++ b/docker/stats/Dockerfile @@ -0,0 +1,15 @@ +FROM python:alpine + +WORKDIR / + +RUN apk add --update --no-cache g++ gcc libxslt-dev + +COPY requirements.txt . + +RUN pip install -r requirements.txt + +ADD . . + +ENTRYPOINT [ "python", "stats.py" ] + +# docker buildx build --file docker/stats/Dockerfile --platform linux/arm64,linux/amd64 -t mrkoll . \ No newline at end of file diff --git a/docker/stats/requirements.txt b/docker/stats/requirements.txt new file mode 100644 index 0000000..e392166 --- /dev/null +++ b/docker/stats/requirements.txt @@ -0,0 +1,27 @@ +black==21.8b0 +certifi==2020.6.20 +chardet==4.0.0 +click==8.0.1 +httplib2==0.18.1 +idna==2.10 +mypy-extensions==0.4.3 +packaging==21.0 +pathspec==0.9.0 +platformdirs==2.3.0 +#pycurl==7.43.0.6 +PyJWT==2.1.0 +pyparsing==2.4.7 +PySimpleSOAP==1.16.2 +#python-apt==2.2.1 +python-arango==7.2.0 +python-debian==0.1.39 +python-debianbts==3.1.0 +regex==2021.8.28 +#reportbug==7.10.3 +requests==2.25.1 +requests-toolbelt==0.9.1 +setuptools-scm==6.3.1 +six==1.16.0 +tomli==1.2.1 +typing-extensions==3.10.0.2 +urllib3==1.26.5 diff --git a/facebook/stats.py b/docker/stats/stats.py similarity index 53% rename from facebook/stats.py rename to docker/stats/stats.py index 60764f2..d4b8f92 100644 --- a/facebook/stats.py +++ b/docker/stats/stats.py @@ -3,21 +3,20 @@ from getpass import getpass from time import sleep from arango import ArangoClient -from json2html import json2html def now(): - """ Returns current date and time as string""" + """Returns current date and time as string""" return datetime.now().strftime("%Y-%m-%d_%H:%M:%S") def write_stats(db, continuous=False): while True: d = {} for col in db.collections(): - if not col['system']: - d[col['name']] = db.collection(col['name']).count() - del d['stats'] - #d['time'] = now() + if not col["system"]: + d[col["name"]] = db.collection(col["name"]).count() + del d["stats"] + # d['time'] = now() cursor = db.aql.execute( """ FOR doc IN members @@ -25,50 +24,43 @@ def write_stats(db, continuous=False): COLLECT WITH COUNT INTO length RETURN length """ - ) - d['checked_members'] = cursor.next() - + ) + d["checked_members"] = cursor.next() # Hur många konton per säljare som finns kvar cursor = db.aql.execute( - ''' - for doc in profiles + """ + for doc in profiles_webshare filter has(doc, "vendor") COLLECT vendor = doc.vendor WITH COUNT INTO length RETURN { "vendor" : vendor, "active" : length } - ''') - d['active_vendors'] = [doc for doc in cursor] - - d['_key'] = now()[:13] - db.insert_document( "stats", d, overwrite=True) + """ + ) + d["active_vendors"] = [doc for doc in cursor] - # Skriv en html-fil - with open('website/fb-webbapp/stats.html', 'a+') as html: - html.truncate(0) - html.write('
') + d["_key"] = now()[:13] + db.insert_document("stats", d, overwrite=True) - html.write(json2html.convert(json = d)) - # Sov för att fortsätta senare if continuous: + print(now()) sleep(86400) else: break - + + # Info för arangodb user_arango = "Stats" db_arango = "facebook" -host_arango = "http://192.168.0.4:8529" +host_arango = "http://192.168.1.10:8529" # Starta koppling till arangodb # Avkryptera lösen till arango -pwd = getpass(f'Arangolösenord för {user_arango}:').strip() +pwd = getpass(f"Arangolösenord för {user_arango}:").strip() -db = ArangoClient(hosts=host_arango).db( - db_arango, username=user_arango, password=pwd -) +db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd) write_stats(db, continuous=True) diff --git a/facebook/__main__.py b/facebook/__main__.py index 6904fdd..b7a24da 100644 --- a/facebook/__main__.py +++ b/facebook/__main__.py @@ -160,6 +160,7 @@ if __name__ == "__main__": ) if profile.blocked: profile = blocked_profile(profile, proxieservers=proxieservers) + user = User(str(userdoc['_key']).strip(), mode, other_pictures=url_other_pictures) else: break except: @@ -175,7 +176,7 @@ if __name__ == "__main__": friends_unchecked = [] for friend in friends: - if not check_for_user(friend): + if not check_for_user(friend) and friend not in friends_unchecked: print(friend) friends_unchecked.append(friend) diff --git a/facebook/accs_to_db.py b/facebook/accs_to_db.py index acb3f07..db3d0f4 100644 --- a/facebook/accs_to_db.py +++ b/facebook/accs_to_db.py @@ -6,12 +6,10 @@ from time import sleep import base64 import json import requests -from sshtunnel import open_tunnel -import paramiko -from getpass import getpass -import arangodb + import config from helpers import now +import dbViaSSH # Gör fb-scraper till arbetsmapp chdir(dirname(dirname(abspath(__file__)))) @@ -86,18 +84,23 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"): n0 = 0 n1 = 0 - + for profile in data: + if len(profile) < 3: continue doc = {} doc["vendor"] = vendor doc["created"] = now() + if "email" in info: doc["email"] = profile[info.index("email")] + elif "login" in info: doc["email"] = profile[info.index("login")] + + if doc["email"] in used_accs or doc["email"] in used_profiles: n1 += 1 continue @@ -120,37 +123,45 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"): for c in cookies.split(";"): cookie[c[: c.find("=")].strip()] = c[c.find("=") + 1 :].strip() else: - try: - cookies_base64 = cookies.strip() # .strip('=') - # print() - # print(cookies_base64) - # print() - cookies64_bytes = cookies_base64.encode("ascii") - cookies_bytes = base64.b64decode(cookies64_bytes) - # exit() - cookies_str = ( - cookies_bytes.decode("ascii") - .replace("'", '"') - .replace("False", "false") - .replace("True", "true") - ) + #try: + cookies_base64 = cookies.strip() # .strip('=') + + cookies64_bytes = cookies_base64.encode("ascii") + cookies_bytes = base64.b64decode(cookies64_bytes) + # exit() + cookies_str = ( + cookies_bytes.decode("ascii") + .replace("'", '"') + .replace("False", "false") + .replace("True", "true") + ) + if vendor in ["827"]: + cookies = {} + for c in cookies_str.split(';'): + cookies[c[:c.find('=')]] = c[c.find('=')+1:] + + else: cookies = json.loads(cookies_str) - - cookie = {} - if vendor == "159": - for c in cookies["cookies"]: - cookie[c["name"]] = c["value"] - else: - for c in cookies: - name = c["name"] - del c["name"] - cookie[name] = c["value"] - doc["cookie"] = cookie - except Exception as e: - print('\n\nFel på cookie.\n', e, '\n') - for i in profile: - print(i) - continue + + cookie = {} + if vendor in ["159"]: + for c in cookies["cookies"]: + cookie[c["name"]] = c["value"] + elif vendor in ["827"]: + cookie = cookies + else: + for c in cookies: + name = c["name"] + del c["name"] + cookie[name] = c["value"] + doc["cookie"] = cookie + # except Exception as e: + # print('\n\nFel på cookie.\n', e, '\n') + # for i in profile: + # print(i) + + # exit() + else: cookie = {} if "birthday" in info: @@ -163,7 +174,6 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"): print(f'\nInlagda profiler: {n0}\nProfiler redan i db: {n1}') - def used_servers(profiles="profiles"): cursor = db.aql.execute( """ @@ -231,7 +241,9 @@ if __name__ == "__main__": }, #'1113': {'info': 'login:mail:password:emailpassword:birthday:useragent:token:cookie', 'sep': '|'}, "159": {"info": "login:password:mail:email password:birthday:id", "sep": ":"}, - #'159': {'info': 'login:password:birthday:id:cookie', 'sep':':' + #'159': {'info': 'login:password:birthday:id:cookie', 'sep':':', + "827": {"info": "login:password:mail:email password:birthday:useragent:token:cookie", + "sep": "|"} } ############################### @@ -254,26 +266,15 @@ if __name__ == "__main__": row = row.replace("https:", "https;") data.append(row.split(sep)) - # Lägg in i accs - # Öppna SSH-tunnel till RBP/db. - pwd_key = getpass("Password for rsa-key: ") - with open_tunnel( - ("studio-garda.asuscomm.com", 2200), - ssh_username="Lasse", - ssh_pkey=paramiko.RSAKey.from_private_key_file( - "/Users/Lasse/.ssh/id_rsa", password=pwd_key - ), - ssh_private_key_password=pwd_key, - remote_bind_address=("127.0.0.1", 8529), - ) as server: - port_arango = server.local_bind_port - - db = arangodb.arango_connect( - "concert-hangar-mirth-salk-DECAL", - username="Accs", - host_arango="http://127.0.0.1", - port_arango=port_arango, - ) - #webshare_proxies() - - to_accs(db, data, info, profiles, vendor) + # # Lägg in i accs + + db = dbViaSSH.db_over_tunnel('Accs') + + #webshare_proxies() + + to_accs(db, data, info, profiles, vendor) + + dbViaSSH.stop_server() + + + diff --git a/facebook/accs_to_profiles.py b/facebook/accs_to_profiles.py index 3ffbf09..1f50c0b 100644 --- a/facebook/accs_to_profiles.py +++ b/facebook/accs_to_profiles.py @@ -5,16 +5,14 @@ from getpass import getpass from os.path import abspath, dirname from random import randint from time import sleep -import base64 -import json -import requests + # Gör fb-scraper till arbetsmapp chdir(dirname(dirname(abspath(__file__)))) from arangodb import arango_connect import config -from helpers import now, nowstamp +from helpers import nowstamp def used_servers(profiles='profiles'): cursor = db.aql.execute( diff --git a/facebook/arangodb.py b/facebook/arangodb.py index 29e634e..5e24531 100644 --- a/facebook/arangodb.py +++ b/facebook/arangodb.py @@ -19,6 +19,9 @@ if __name__ != '__main__.py': exit('Fel lösenord, kunde inte logga in i DB.') if 'pwd' not in globals(): pwd = getpass(f'Lösenord för {user_arango}: ') + if pwd == '': + db = None + break try: db = ArangoClient(hosts=f'{host_arango}:{port_arango}').db(db_arango, username=user_arango, password=pwd) @@ -79,7 +82,8 @@ def report_blocked(profile): }, overwrite=True, ) - except: + except Exception as e: + print(e) _print(profile, profile.container, f'Kunde inte rapportera blockerad: {profile.name}.') diff --git a/facebook/classes.py b/facebook/classes.py index 4b64e5a..4c03070 100644 --- a/facebook/classes.py +++ b/facebook/classes.py @@ -25,6 +25,7 @@ class User: self.url_likes = "" self.url_about = "" self.url_timeline = "" + self.url_album = "" self.url_profilepictures = "" self.profile_pictures = 0 self.pictures = [] @@ -270,6 +271,7 @@ class Friend: self.username = "" self.url = "" self.name = "" + self.id = "" def add_to_db(self): db.insert_document( @@ -278,6 +280,7 @@ class Friend: "_key": str(self.username), "url": url_bas + self.url, "name": self.name, + 'id_from_seemore_url': self.id }, overwrite_mode="update", silent=True, diff --git a/facebook/config.py b/facebook/config.py index 91c4e90..713e569 100644 --- a/facebook/config.py +++ b/facebook/config.py @@ -11,6 +11,7 @@ user_arango = "Lasse" db_arango = "facebook" host_arango = 'http://192.168.1.10' port_arango = '8529' +host_adress = "studio-garda.asuscomm.com" #IP/adress till där db finns # Andra uppgifter url_bas = "https://mbasic.facebook.com" diff --git a/facebook/gephi.py b/facebook/gephi.py index 05803f4..98584ad 100644 --- a/facebook/gephi.py +++ b/facebook/gephi.py @@ -10,8 +10,7 @@ from getpass import getpass import arangodb locale.setlocale(locale.LC_TIME, "en_US") - - +import dbViaSSH def nodes_from_list( @@ -244,6 +243,12 @@ def common_friends(d, n=2): pwd = getpass('Password for Lasse: ') db = arangodb.arango_connect(pwd) +db.collecion('members').random() +try: + db.collecion('members').random() + +except: + pass if __name__ == "__main__": diff --git a/facebook/scrapers.py b/facebook/scrapers.py index 5260666..d76e013 100644 --- a/facebook/scrapers.py +++ b/facebook/scrapers.py @@ -82,7 +82,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): # Om det inte finns något profilalbum # Testa ta bort mellanrum och små bokstäver - if not hasattr(user, "url_album"): + if user.url_album == "": for a in profile.viewing().find_all("a", href=True): if "profilepictures" in a.text.lower().replace(" ", ""): user.url_album = url_bas + a["href"] @@ -91,7 +91,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): # Gå till profilbilden (den första som kommer upp när man går till profilen) # Om profilen inte har profilalbum - if not hasattr(user, "url_album"): + if user.url_album == "": write_error(9, profile, soup=profile.viewing(), user=user) if user.url_other_pictures != []: # Använd eventuella extrabilder och ta bort den från användaren @@ -158,7 +158,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): if mode == "single" and user.reactions > 30: break elif all([any([mode == "few", mode == "solo"]), user.reactions > 80, pic != url_pics[-1]]): - # Kolla den sista bilder + # Kolla den sista bilden check_picture(url_bas + url_pics[-1], user, profile) user.checked_pictures.append(url_bas + pic) break @@ -171,7 +171,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): def check_picture(url_picture, user, profile): - """ Hämtar reaktioner för en bildprint """ + """ Hämtar reaktioner för en bild. """ picture = Picture(user.username) picture.url = url_picture @@ -180,8 +180,7 @@ def check_picture(url_picture, user, profile): picture.id = str(re.search("\d+", picture.id).group()) except: pass - # if picture.id in all_pictures: - # continue + sleep_(5) try: @@ -249,8 +248,9 @@ def check_picture(url_picture, user, profile): for div in profile.viewing().find_all("div", href=True): if "like this" in div.text: url_reactions = url_bas + str(div["href"]) - + # Hämta reaktioner för bilden + sleep_(3) profile.open(url_reactions) @@ -264,7 +264,7 @@ def check_picture(url_picture, user, profile): picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1) limit = re.search(r"limit=(\d+)", url_limit).group( 1 - ) # TODO Fortfarande problem med det här + ) except UnboundLocalError: # fel9 write_error( 9, @@ -281,82 +281,149 @@ def check_picture(url_picture, user, profile): # Addera bilden till arrango picture.add_to_db() + # Begränsa limit till 50 då Facebook inte ger fler (för första "klick"). + try: + if int(picture.no_reactions) > 50: + no_reactions = 50 + elif int(picture.no_reactions) == 0: + no_reactions = 0 + else: + no_reactions = int(picture.no_reactions) - 1 + except TypeError: + #print(picture.no_reactions, type(picture.no_reactions)) + no_reactions = picture.no_reactions + + #print('\nANTAL REAKTIONER TOTALT PÅ BILDEN:', picture.no_reactions) + url_limit = url_bas + url_limit.replace( - "limit=" + str(limit), "limit=" + str(picture.no_reactions) + "limit=" + str(limit), "limit=" + str(no_reactions) ) + + list_ids = [] - try: + while True: + #try: sleep_(4) + #print('\nurl_limit'.upper(), url_limit, '\n') profile.open(url_limit) - url_limit = "" - update_cookie(profile.browser.session.cookies, profile) + #url_limit = "" # Vad gjorde den här? + update_cookie(profile.browser.session.cookies, profile) + + # Hämta länk för "See more" för att se vilka ID:s som visas + url_see_more = None + #print('\nVARJE LÄNK PÅ SIDAN') + for a in profile.viewing().find_all("a"): + #print(a) + if "See More" in a.text: # Om det finns fler reaktioner att hämta + #print('\nHITTADE "SEE MORE"\n') + + url_see_more = a['href'] + ids_url = url_see_more[url_see_more.find('ids=')+4:url_see_more.find('&total')] + list_ids_from_url = ids_url.split('%2C') # Alla IDs hittills + + #print('\nlist_pictures_from_url\n'.upper(), list_ids_from_url) # Lista från länk med profiler kollade hittills(?) + + list_ids_page = list_ids_from_url[len(list_ids):] # Profilerna på den här sidan + #print('\nlist_ids_picture\n'.upper(), list_ids_page) + + list_ids.extend(list_ids_page) #Lägg nästa sidas IDs till listan på alla IDs hittills + + # Sätt rätt limit för nästa sida + limit_next_page = int(picture.no_reactions) - len(list_ids_from_url) + if limit_next_page > 50: + limit_next_page = 50 + url_limit = url_bas + url_see_more.replace('limit=10', f'limit={limit_next_page}') # Länken till fler profiler + #print('\nurl_limit', url_limit, '\n') + # Gå igenom alla som reagerat och för in i arango - for li in profile.viewing().find_all("li"): - friend = Friend(user.username) - if "see more" in li.text.lower(): + get_reactions(profile, user, picture, list_ids_page) + + if url_see_more == None: # När det inte finns fler reaktioner + break + + # except Exception as e: # Fel2 + # write_error( + # 2, + # profile, + # e=e, + # soup=profile.viewing(), + # user=user, + # url=url_limit, + # url_name="url_limit", + # traceback=traceback.format_exc(), + # ) + # pass + + # Lägg till reaktioner till databasen + db.collection("picture_reactions").insert_many( + picture.reactions, silent=True, overwrite=True + ) + db.collection("picture_reactions").insert_many( + picture.reactions, silent=True, overwrite=True + ) + + # Uppdatera antalet reaktioner användaren fått + user.reactions += len(picture.reactions) + + +def get_reactions(profile, user, picture, list_ids_page): + """ Gather the reactions on the picture. + + Args: + profile (class): The active profile. + user (class): The user being scraped. + picture (class): The picture. + list_ids_picture (list): List of ID:s fetched from "See more"-url + """ + + # Gå igenom alla som reagerat och för in i arango + #print('list_ids_picture: ', list_ids_page) + list_ids = list_ids_page.copy() + for li in profile.viewing().find_all("li"): + friend = Friend(user.username) + if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''): continue - try: - friend_html = li.find("h3").find("a") - friend.name = friend_html.text - friend.url = friend_html["href"] - if "profile.php" in friend.url: - if "&paipv" in friend.url: - friend.username = friend.url[ - friend.url.find("=") + 1 : friend.url.find("&") - ] - else: - friend.username = friend.url[friend.url.find("id=") + 3 :] + try: + friend_html = li.find("h3").find("a") + friend.name = friend_html.text + friend.url = friend_html["href"] + friend.id = list_ids.pop(0) + if "profile.php" in friend.url: + if "&paipv" in friend.url: + friend.username = friend.url[ + friend.url.find("=") + 1 : friend.url.find("&") + ] else: - if "?" in friend.url: - friend.username = friend.url[ - friend.url.find("/") + 1 : friend.url.find("?") - ] - else: - friend.username = friend.url[friend.url.find("/") + 1 :] - - reaction = Reaction(user.username, friend.username, picture.id) - for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: - if type in str(li): - reaction.type = type - picture.reactions.append(reaction.get_dict()) - # Lägg till vännens profil till arrango - try: - friend.add_to_db() - except: - _print(profile, user, f"Kunde inte lägga till vän {friend.url}") - - except AttributeError as e: # Fel1 - write_error( - 1, - profile, - e=e, - soup=str(li), - user=user, - traceback=traceback.format_exc(), - ) - pass - - # Lägg till reaktioner till databasen - db.collection("picture_reactions").insert_many( - picture.reactions, silent=True, overwrite=True - ) - db.collection("picture_reactions").insert_many( - picture.reactions, silent=True, overwrite=True - ) - - # Uppdatera antalet reaktioner användaren fått - user.reactions += len(picture.reactions) - except Exception as e: # Fel2 - write_error( - 2, - profile, - e=e, - soup=profile.viewing(), - user=user, - url=url_limit, - url_name="url_limit", - traceback=traceback.format_exc(), - ) - pass + friend.username = friend.url[friend.url.find("id=") + 3 :] + else: + if "?" in friend.url: + friend.username = friend.url[ + friend.url.find("/") + 1 : friend.url.find("?") + ] + else: + friend.username = friend.url[friend.url.find("/") + 1 :] + + reaction = Reaction(user.username, friend.username, picture.id) + + for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: + if type in str(li): + reaction.type = type + + picture.reactions.append(reaction.get_dict()) + # Lägg till vännens profil till arrango + try: + friend.add_to_db() + except: + _print(profile, user, f"Kunde inte lägga till vän {friend.url}") + except AttributeError as e: # Fel1 + write_error( + 1, + profile, + e=e, + soup=str(li), + user=user, + traceback=traceback.format_exc(), + ) + pass diff --git a/facebook/search_leak.py b/facebook/search_leak.py index a433fa6..bc479e9 100644 --- a/facebook/search_leak.py +++ b/facebook/search_leak.py @@ -3,22 +3,21 @@ Skript för att söka i FB-läckan. """ import re -import paramiko -import arangodb -from getpass import getpass -from sshtunnel import open_tunnel from termcolor import cprint +import dbViaSSH + + def search(db, attribute, value): """ Search for attribute in db. Returns list of matching documents. """ - if '%' in value or '_' in value: - match = 'like' + if "%" in value or "_" in value: + match = "like" else: - match = '==' + match = "==" cursor = db.aql.execute( f""" @@ -30,74 +29,60 @@ def search(db, attribute, value): ) return [doc for doc in cursor] -pwd_key = getpass(f"Password key: ") - -with open_tunnel( - ("studio-garda.asuscomm.com", 2200), - ssh_username="Lasse", - ssh_pkey=paramiko.RSAKey.from_private_key_file( - "/Users/Lasse/.ssh/id_rsa", password=pwd_key - ), - ssh_private_key_password=pwd_key, - remote_bind_address=("127.0.0.1", 8529), -) as server: - # server.start() - port_arango = server.local_bind_port - - db = arangodb.arango_connect( - "gruel-ADOBE-foolish-winy-borax", - username="Leak", - host_arango="http://127.0.0.1", - port_arango=port_arango, - ) - - cprint("\n\nVad vill du söka efter?", attrs=['bold']) - print("1 - Telefonnummer") - print("2 - Facebook-ID") - print('3 - Namn') - print("4 - Arbete") - print('5 - Bostadsort') - print("6 - Födelseort") - print("7 - Epost") - - # Få input för attribut - attribute = input("\n>>> ") - attributes = { - "1": ("telefonnummer", "phone"), - "2": ("Facebook-ID", "_key"), - "3": ("namn", "full_name"), - "4": ("arbete", "work"), - "5": ('bostadsort', "lives_in"), - "6": ('födelseort', 'from'), - "7": ('epost', 'email') - } - - # Bestäm n- eller t-form och få input för värde. - if attribute in ['5', '6', '7']: - genus = 'n' - else: - genus = 't' - - cprint(f"\nVilke{genus} {attributes[attribute][0]}? ", attrs=['bold']) - cprint('Använd % för att ersätta flera okända tecken, _ för att ersätta ett.', attrs=['dark']) - value = input('\n>>> ') - - if attribute == '1': # telefonnummer - value = ''.join(re.findall(r'\d+', value)) - if value[0] == '0': - value = f'46{value[1:]}' - elif attribute == '3': # namn - value = value.upper() - - # Sök i databasen. - result = search(db, attributes[attribute][1], value) - - # Presentera reultaten #TODO hur vill man få dem? Spara ner? - for i in result: - print('\n', i['full_name']) - for key, value in i.items(): - print(f'{key}: {value}') - print(f'https://facebook.com/{i["_key"]}') - - print(f'\nAntal träffar: {len(result)}\n') +db = dbViaSSH.db_over_tunnel("Leak") + +cprint("\n\nVad vill du söka efter?", attrs=["bold"]) +print("1 - Telefonnummer") +print("2 - Facebook-ID") +print("3 - Namn") +print("4 - Arbete") +print("5 - Bostadsort") +print("6 - Födelseort") +print("7 - Epost") + +# Få input för attribut +attribute = input("\n>>> ") +attributes = { + "1": ("telefonnummer", "phone"), + "2": ("Facebook-ID", "_key"), + "3": ("namn", "full_name"), + "4": ("arbete", "work"), + "5": ("bostadsort", "lives_in"), + "6": ("födelseort", "from"), + "7": ("epost", "email"), +} + +# Bestäm n- eller t-form och få input för värde. +if attribute in ["5", "6", "7"]: + genus = "n" +else: + genus = "t" + +cprint(f"\nVilke{genus} {attributes[attribute][0]}? ", attrs=["bold"]) +cprint( + "Använd % för att ersätta flera okända tecken, _ för att ersätta ett.", + attrs=["dark"], +) +value = input("\n>>> ") + +if attribute == "1": # telefonnummer + value = "".join(re.findall(r"\d+", value)) + if value[0] == "0": + value = f"46{value[1:]}" +elif attribute == "3": # namn + value = value.upper() + +# Sök i databasen. +result = search(db, attributes[attribute][1], value) + +# Presentera reultaten #TODO hur vill man få dem? Spara ner? +for i in result: + print("\n", i["full_name"]) + for key, value in i.items(): + print(f"{key}: {value}") + print(f'https://facebook.com/{i["_key"]}') + +print(f"\nAntal träffar: {len(result)}\n") + +dbViaSSH.stop_server() diff --git a/requirements.txt b/requirements.txt index 574069e..327ba58 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ idna==2.10 lxml pycparser==2.20 PyJWT==2.0.1 -#PyNaCl==1.4.0 PySocks==1.7.1 python-arango==7.1.0 requests==2.25.1 @@ -18,4 +17,4 @@ soupsieve==2.2 toml==0.10.2 urllib3==1.26.3 Werkzeug==1.0.1 -json2html +json2html \ No newline at end of file