Lasse Studion 4 years ago
commit 6698ab6faa
  1. 18
      .gitignore
  2. 2
      Dockerfile
  3. 19
      docker/free/Dockerfile
  4. 13
      docker/mrkoll/Dockerfile
  5. 194
      docker/mrkoll/mrkoll_scraperapi.py
  6. 14
      docker/mrkoll/requirements.txt
  7. 70
      docker/profile_pictures/images.py
  8. 15
      docker/stats/Dockerfile
  9. 27
      docker/stats/requirements.txt
  10. 48
      docker/stats/stats.py
  11. 3
      facebook/__main__.py
  12. 121
      facebook/accs_to_db.py
  13. 6
      facebook/accs_to_profiles.py
  14. 6
      facebook/arangodb.py
  15. 3
      facebook/classes.py
  16. 1
      facebook/config.py
  17. 9
      facebook/gephi.py
  18. 221
      facebook/scrapers.py
  19. 139
      facebook/search_leak.py
  20. 3
      requirements.txt

18
.gitignore vendored

@ -1,18 +1,28 @@
# Blandat
/.DS_Store
/.venv
*.venv
/.vscode
/__pycache__
*.json
*.pkl
facebook/test.py
/data/*
*.html
*.code-workspace
workspace.code-workspace
password_arango.txt
*.gexf
facebook/mrkoll.
*.pyc
*.sqlite3
#facebook
/facebook
!/facebook/*.py
*.sqlite3
facebook/test.py
facebook/mrkoll.
# docker
/stats/*
!/stats/*.py
requirements2.txt

@ -17,5 +17,5 @@ CMD ["",""]
# BUILD:
# docker buildx create --use
#docker buildx build --platform linux/arm64,linux/arm64,linux/amd64 -t l3224/fb-scraper:pi --push .
#docker buildx build --platform linux/arm,linux/arm64,linux/amd64 -t l3224/fb-scraper:VERSION --push .

@ -1,19 +0,0 @@
FROM python:3.8
WORKDIR /
COPY requirements.txt .
RUN pip install -r requirements.txt
ADD . .
ENTRYPOINT [ "python", "facebook/__main__.py", "-p free" ]
CMD ["",""]
# BUILD:
# docker buildx create --use
#docker buildx build --file docker/free/Dockerfile --platform linux/arm -t l3224/fb-scraper:free --push .

@ -1,14 +1,15 @@
# syntax=docker/dockerfile:1
FROM python:3.8
FROM python:3.8-slim-buster
WORKDIR /
COPY requirements.txt requirements.txt
COPY requirements.txt .
RUN pip3 install -r requirements.txt
RUN pip install -r requirements.txt
COPY . .
ADD . .
ENTRYPOINT [ "python3", "mrkoll_scraperapi.py" ]
ENTRYPOINT [ "python", "facebook/mrkoll.py" ]
CMD [""]
# docker buildx build --file docker/mrkoll/Dockerfile --platform linux/arm -t l3224/fb-scraper:mrkoll --push .

@ -0,0 +1,194 @@
import re
import subprocess
import requests
from sys import argv
from time import sleep
from bs4 import BeautifulSoup
from arango import ArangoClient
def find_person(number):
"""
Söker personuppgifter utifrån telefonnummer.
"""
sleep(2)
url = f'https://mrkoll.se/resultat?n={number}'
api_key = 'fcfe011cf66fddb61bb6425fcb5cb5e9'
payload = {'api_key': api_key, 'url': url, 'country_code': 'se', 'device_type':'desktop'}
response = requests.get('http://api.scraperapi.com', params=payload)
r = response.text
# Hämta sidan
soup = BeautifulSoup(r, 'html.parser')
if (
"Du har gjort för många anrop" in soup.text
or response.url == "https://mrkoll.se/om/limit/"
):
sleep(10)
return None
# Lägg in data i dictionary
d = {}
d["url_via_telefonnummer"] = response.url
try:
for a in soup.find_all("a", href=True):
if "boende-med-" in a["href"]:
d["lives_with_url"] = a["href"]
if "-hushall" in a["href"]:
d["lives_with"] = a.text
except:
pass
if "Sökningen gav 0 träffar..." in soup.text:
return {}
info = soup.find("div", {"class": "block_col1"})
try:
d["first_name"] = info.find(
"span", {"title": "Detta är personens tilltalsnamn"}
).text
except:
pass
try:
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text
except:
pass
try:
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text
except:
pass
try:
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"})
d["adress_line1"] = adress[0].text
if len(adress) > 1:
d["adress_line2"] = adress[1].text
except:
pass
try:
d["history"] = info.find("div", {"class": "history_container"}).text
except:
pass
# Personnummer
## Födelsedatum
for i in soup.find_all("div", {"class": "col_block1"}):
if "Personnummer" in i.text:
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace(
"-XXXX", ""
)
## Fyra sista
try:
start = "showPersnr"
end = ">Jag godkänner</span>"
t = str(soup)
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",")
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1]
sleep(2) # Vänta lite
four_last = requests.get("http://mrkoll.se" + url_ajax).text
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last)
except:
pass
try:
neighbours = {}
for div in soup.find_all("div", {"class": "peoplecont"}):
persons = div.find_all("a", href=True)
for person in persons:
neighbours[person.find("strong").text] = {
"link": person["href"],
"lived_years": re.search(
"\d+", person.find("span", {"class": "flyttclass"}).text
).group()[0],
}
d["neighbours"] = neighbours
except:
pass
try:
d["name_change"] = [
div.text.strip() for div in soup.find_all("div", {"class": "name_change"})
]
except:
pass
try:
prosecuted = {}
prosecuted["brottsmål"] = (
True if soup.find("div", {"class": "resmark res_b"}) != None else False
)
prosecuted["tvistemål"] = (
True if soup.find("div", {"class": "resmark res_t"}) != None else False
)
prosecuted["straffföreläggande"] = (
True if soup.find("div", {"class": "resmark res_s"}) != None else False
)
d["prosecuted"] = prosecuted
except:
pass
return d
if __name__ == "__main__":
ip = 'scraperapi'
if requests.get('https://icanhazip.com').text.strip() == '98.128.172.12':
print('\nMULLVAD INTE AKTIV\n')
exit()
# Info för arangodb
user_arango = "Phone"
db_arango = "facebook"
host_arango = "http://192.168.1.10:8529"
# Starta koppling till arangodb
db = ArangoClient(hosts=host_arango).db(
db_arango, username=user_arango, password=argv[1]
)
leak = db.collection("phoneleak")
count = 0
scraper_count = 0
global errors
errors = 0
while True:
count += 1
# Hämta en random person
doc = leak.random()
# Gör sökningen på mrkoll.se
d = find_person(doc["phone"])
try:
name = d["first_name"] + ' '
except:
name = ' '
print(f'{count} - {errors} {name}', end="\r")
if d == None: # Om ip-adressen är blockad eller något hänt
continue
d["_key"] = doc["_key"]
d["_id"] = "phone/" + str(d["_key"])
d["phone"] = doc["phone"]
d["checked_from_ip"] = f'{ip} - cache'
try:
db.collection("phone").insert(d)
leak.delete(doc["_key"])
except:
pass

@ -0,0 +1,14 @@
beautifulsoup4==4.9.3
bs4==0.0.1
certifi==2021.5.30
charset-normalizer==2.0.4
idna==3.2
PyJWT==2.1.0
python-arango==7.2.0
requests==2.26.0
requests-toolbelt==0.9.1
setuptools-scm==6.0.1
soupsieve==2.2.1
toml==0.10.2
urllib3==1.26.6
requests_cache==0.7.4

@ -0,0 +1,70 @@
import requests
import os
from datetime import date, datetime, timedelta
from time import sleep
from arangodb import db
def download_image(url, user, id):
# Kolla så användarmappen finns
if not os.path.isdir(f'../profile_pictures/{user}'):
os.mkdir(f'../profile_pictures/{user}')
# Ladda ner bilden
r = requests.get(url)
if r.text == 'URL signature expired':
print('För gammal länk.')
exit()
elif r.status_code == 403:
exit()
img_data = r.content
with open(f'../profile_pictures/{user}/{id}.jpg', 'wb') as handler:
handler.write(img_data)
def get_pictures(day):
cursor = db.aql.execute(
"""
for doc in members
filter doc.fetched == @date
filter has(doc, "checked_pictures")
filter not has(doc, "pictures_downloaded")
return {'member': doc._key, 'pictures':doc.checked_pictures}
""",
bind_vars={'date': day}
)
for doc in cursor:
pictures = []
for picture in doc['pictures']:
pictures.append(picture[picture.find('fbid=')+5:])
cursor = db.aql.execute(
"""
for doc in pictures
filter doc._key in @list
limit 10
return {'_key': doc._key, 'user':doc.user, 'url': doc.src}
""",
bind_vars={"list": pictures},
)
for picture in cursor:
download_image(picture['url'], picture['user'], picture['_key'])
print(picture['_key'])
sleep(2)
db.update_document({'_id': 'members/' + str(doc['member']), 'pictures_downloaded': True}, silent=True, check_rev=False)
def old_pics():
if not os.path.isdir(f'../profile_pictures'):
os.mkdir(f'../profile_pictures')
start = date.today()
for i in range(1,60):
d = start - timedelta(days=i)
get_pictures(d.strftime('%Y%m%d'))

@ -0,0 +1,15 @@
FROM python:alpine
WORKDIR /
RUN apk add --update --no-cache g++ gcc libxslt-dev
COPY requirements.txt .
RUN pip install -r requirements.txt
ADD . .
ENTRYPOINT [ "python", "stats.py" ]
# docker buildx build --file docker/stats/Dockerfile --platform linux/arm64,linux/amd64 -t mrkoll .

@ -0,0 +1,27 @@
black==21.8b0
certifi==2020.6.20
chardet==4.0.0
click==8.0.1
httplib2==0.18.1
idna==2.10
mypy-extensions==0.4.3
packaging==21.0
pathspec==0.9.0
platformdirs==2.3.0
#pycurl==7.43.0.6
PyJWT==2.1.0
pyparsing==2.4.7
PySimpleSOAP==1.16.2
#python-apt==2.2.1
python-arango==7.2.0
python-debian==0.1.39
python-debianbts==3.1.0
regex==2021.8.28
#reportbug==7.10.3
requests==2.25.1
requests-toolbelt==0.9.1
setuptools-scm==6.3.1
six==1.16.0
tomli==1.2.1
typing-extensions==3.10.0.2
urllib3==1.26.5

@ -3,21 +3,20 @@ from getpass import getpass
from time import sleep
from arango import ArangoClient
from json2html import json2html
def now():
""" Returns current date and time as string"""
"""Returns current date and time as string"""
return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
def write_stats(db, continuous=False):
while True:
d = {}
for col in db.collections():
if not col['system']:
d[col['name']] = db.collection(col['name']).count()
del d['stats']
#d['time'] = now()
if not col["system"]:
d[col["name"]] = db.collection(col["name"]).count()
del d["stats"]
# d['time'] = now()
cursor = db.aql.execute(
"""
FOR doc IN members
@ -25,50 +24,43 @@ def write_stats(db, continuous=False):
COLLECT WITH COUNT INTO length
RETURN length
"""
)
d['checked_members'] = cursor.next()
)
d["checked_members"] = cursor.next()
# Hur många konton per säljare som finns kvar
cursor = db.aql.execute(
'''
for doc in profiles
"""
for doc in profiles_webshare
filter has(doc, "vendor")
COLLECT vendor = doc.vendor WITH COUNT INTO length
RETURN {
"vendor" : vendor,
"active" : length
}
''')
d['active_vendors'] = [doc for doc in cursor]
d['_key'] = now()[:13]
db.insert_document( "stats", d, overwrite=True)
"""
)
d["active_vendors"] = [doc for doc in cursor]
# Skriv en html-fil
with open('website/fb-webbapp/stats.html', 'a+') as html:
html.truncate(0)
html.write('<!DOCTYPE html> <br>')
d["_key"] = now()[:13]
db.insert_document("stats", d, overwrite=True)
html.write(json2html.convert(json = d))
# Sov för att fortsätta senare
if continuous:
print(now())
sleep(86400)
else:
break
# Info för arangodb
user_arango = "Stats"
db_arango = "facebook"
host_arango = "http://192.168.0.4:8529"
host_arango = "http://192.168.1.10:8529"
# Starta koppling till arangodb
# Avkryptera lösen till arango
pwd = getpass(f'Arangolösenord för {user_arango}:').strip()
pwd = getpass(f"Arangolösenord för {user_arango}:").strip()
db = ArangoClient(hosts=host_arango).db(
db_arango, username=user_arango, password=pwd
)
db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd)
write_stats(db, continuous=True)

@ -160,6 +160,7 @@ if __name__ == "__main__":
)
if profile.blocked:
profile = blocked_profile(profile, proxieservers=proxieservers)
user = User(str(userdoc['_key']).strip(), mode, other_pictures=url_other_pictures)
else:
break
except:
@ -175,7 +176,7 @@ if __name__ == "__main__":
friends_unchecked = []
for friend in friends:
if not check_for_user(friend):
if not check_for_user(friend) and friend not in friends_unchecked:
print(friend)
friends_unchecked.append(friend)

@ -6,12 +6,10 @@ from time import sleep
import base64
import json
import requests
from sshtunnel import open_tunnel
import paramiko
from getpass import getpass
import arangodb
import config
from helpers import now
import dbViaSSH
# Gör fb-scraper till arbetsmapp
chdir(dirname(dirname(abspath(__file__))))
@ -86,18 +84,23 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"):
n0 = 0
n1 = 0
for profile in data:
if len(profile) < 3:
continue
doc = {}
doc["vendor"] = vendor
doc["created"] = now()
if "email" in info:
doc["email"] = profile[info.index("email")]
elif "login" in info:
doc["email"] = profile[info.index("login")]
if doc["email"] in used_accs or doc["email"] in used_profiles:
n1 += 1
continue
@ -120,37 +123,45 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"):
for c in cookies.split(";"):
cookie[c[: c.find("=")].strip()] = c[c.find("=") + 1 :].strip()
else:
try:
cookies_base64 = cookies.strip() # .strip('=')
# print()
# print(cookies_base64)
# print()
cookies64_bytes = cookies_base64.encode("ascii")
cookies_bytes = base64.b64decode(cookies64_bytes)
# exit()
cookies_str = (
cookies_bytes.decode("ascii")
.replace("'", '"')
.replace("False", "false")
.replace("True", "true")
)
#try:
cookies_base64 = cookies.strip() # .strip('=')
cookies64_bytes = cookies_base64.encode("ascii")
cookies_bytes = base64.b64decode(cookies64_bytes)
# exit()
cookies_str = (
cookies_bytes.decode("ascii")
.replace("'", '"')
.replace("False", "false")
.replace("True", "true")
)
if vendor in ["827"]:
cookies = {}
for c in cookies_str.split(';'):
cookies[c[:c.find('=')]] = c[c.find('=')+1:]
else:
cookies = json.loads(cookies_str)
cookie = {}
if vendor == "159":
for c in cookies["cookies"]:
cookie[c["name"]] = c["value"]
else:
for c in cookies:
name = c["name"]
del c["name"]
cookie[name] = c["value"]
doc["cookie"] = cookie
except Exception as e:
print('\n\nFel på cookie.\n', e, '\n')
for i in profile:
print(i)
continue
cookie = {}
if vendor in ["159"]:
for c in cookies["cookies"]:
cookie[c["name"]] = c["value"]
elif vendor in ["827"]:
cookie = cookies
else:
for c in cookies:
name = c["name"]
del c["name"]
cookie[name] = c["value"]
doc["cookie"] = cookie
# except Exception as e:
# print('\n\nFel på cookie.\n', e, '\n')
# for i in profile:
# print(i)
# exit()
else:
cookie = {}
if "birthday" in info:
@ -163,7 +174,6 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"):
print(f'\nInlagda profiler: {n0}\nProfiler redan i db: {n1}')
def used_servers(profiles="profiles"):
cursor = db.aql.execute(
"""
@ -231,7 +241,9 @@ if __name__ == "__main__":
},
#'1113': {'info': 'login:mail:password:emailpassword:birthday:useragent:token:cookie', 'sep': '|'},
"159": {"info": "login:password:mail:email password:birthday:id", "sep": ":"},
#'159': {'info': 'login:password:birthday:id:cookie', 'sep':':'
#'159': {'info': 'login:password:birthday:id:cookie', 'sep':':',
"827": {"info": "login:password:mail:email password:birthday:useragent:token:cookie",
"sep": "|"}
}
###############################
@ -254,26 +266,15 @@ if __name__ == "__main__":
row = row.replace("https:", "https;")
data.append(row.split(sep))
# Lägg in i accs
# Öppna SSH-tunnel till RBP/db.
pwd_key = getpass("Password for rsa-key: ")
with open_tunnel(
("studio-garda.asuscomm.com", 2200),
ssh_username="Lasse",
ssh_pkey=paramiko.RSAKey.from_private_key_file(
"/Users/Lasse/.ssh/id_rsa", password=pwd_key
),
ssh_private_key_password=pwd_key,
remote_bind_address=("127.0.0.1", 8529),
) as server:
port_arango = server.local_bind_port
db = arangodb.arango_connect(
"concert-hangar-mirth-salk-DECAL",
username="Accs",
host_arango="http://127.0.0.1",
port_arango=port_arango,
)
#webshare_proxies()
to_accs(db, data, info, profiles, vendor)
# # Lägg in i accs
db = dbViaSSH.db_over_tunnel('Accs')
#webshare_proxies()
to_accs(db, data, info, profiles, vendor)
dbViaSSH.stop_server()

@ -5,16 +5,14 @@ from getpass import getpass
from os.path import abspath, dirname
from random import randint
from time import sleep
import base64
import json
import requests
# Gör fb-scraper till arbetsmapp
chdir(dirname(dirname(abspath(__file__))))
from arangodb import arango_connect
import config
from helpers import now, nowstamp
from helpers import nowstamp
def used_servers(profiles='profiles'):
cursor = db.aql.execute(

@ -19,6 +19,9 @@ if __name__ != '__main__.py':
exit('Fel lösenord, kunde inte logga in i DB.')
if 'pwd' not in globals():
pwd = getpass(f'Lösenord för {user_arango}: ')
if pwd == '':
db = None
break
try:
db = ArangoClient(hosts=f'{host_arango}:{port_arango}').db(db_arango, username=user_arango, password=pwd)
@ -79,7 +82,8 @@ def report_blocked(profile):
},
overwrite=True,
)
except:
except Exception as e:
print(e)
_print(profile, profile.container, f'Kunde inte rapportera blockerad: {profile.name}.')

@ -25,6 +25,7 @@ class User:
self.url_likes = ""
self.url_about = ""
self.url_timeline = ""
self.url_album = ""
self.url_profilepictures = ""
self.profile_pictures = 0
self.pictures = []
@ -270,6 +271,7 @@ class Friend:
self.username = ""
self.url = ""
self.name = ""
self.id = ""
def add_to_db(self):
db.insert_document(
@ -278,6 +280,7 @@ class Friend:
"_key": str(self.username),
"url": url_bas + self.url,
"name": self.name,
'id_from_seemore_url': self.id
},
overwrite_mode="update",
silent=True,

@ -11,6 +11,7 @@ user_arango = "Lasse"
db_arango = "facebook"
host_arango = 'http://192.168.1.10'
port_arango = '8529'
host_adress = "studio-garda.asuscomm.com" #IP/adress till där db finns
# Andra uppgifter
url_bas = "https://mbasic.facebook.com"

@ -10,8 +10,7 @@ from getpass import getpass
import arangodb
locale.setlocale(locale.LC_TIME, "en_US")
import dbViaSSH
def nodes_from_list(
@ -244,6 +243,12 @@ def common_friends(d, n=2):
pwd = getpass('Password for Lasse: ')
db = arangodb.arango_connect(pwd)
db.collecion('members').random()
try:
db.collecion('members').random()
except:
pass
if __name__ == "__main__":

@ -82,7 +82,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"):
# Om det inte finns något profilalbum
# Testa ta bort mellanrum och små bokstäver
if not hasattr(user, "url_album"):
if user.url_album == "":
for a in profile.viewing().find_all("a", href=True):
if "profilepictures" in a.text.lower().replace(" ", ""):
user.url_album = url_bas + a["href"]
@ -91,7 +91,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"):
# Gå till profilbilden (den första som kommer upp när man går till profilen)
# Om profilen inte har profilalbum
if not hasattr(user, "url_album"):
if user.url_album == "":
write_error(9, profile, soup=profile.viewing(), user=user)
if user.url_other_pictures != []:
# Använd eventuella extrabilder och ta bort den från användaren
@ -158,7 +158,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"):
if mode == "single" and user.reactions > 30:
break
elif all([any([mode == "few", mode == "solo"]), user.reactions > 80, pic != url_pics[-1]]):
# Kolla den sista bilder
# Kolla den sista bilden
check_picture(url_bas + url_pics[-1], user, profile)
user.checked_pictures.append(url_bas + pic)
break
@ -171,7 +171,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"):
def check_picture(url_picture, user, profile):
""" Hämtar reaktioner för en bildprint """
""" Hämtar reaktioner för en bild. """
picture = Picture(user.username)
picture.url = url_picture
@ -180,8 +180,7 @@ def check_picture(url_picture, user, profile):
picture.id = str(re.search("\d+", picture.id).group())
except:
pass
# if picture.id in all_pictures:
# continue
sleep_(5)
try:
@ -249,8 +248,9 @@ def check_picture(url_picture, user, profile):
for div in profile.viewing().find_all("div", href=True):
if "like this" in div.text:
url_reactions = url_bas + str(div["href"])
# Hämta reaktioner för bilden
sleep_(3)
profile.open(url_reactions)
@ -264,7 +264,7 @@ def check_picture(url_picture, user, profile):
picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1)
limit = re.search(r"limit=(\d+)", url_limit).group(
1
) # TODO Fortfarande problem med det här
)
except UnboundLocalError: # fel9
write_error(
9,
@ -281,82 +281,149 @@ def check_picture(url_picture, user, profile):
# Addera bilden till arrango
picture.add_to_db()
# Begränsa limit till 50 då Facebook inte ger fler (för första "klick").
try:
if int(picture.no_reactions) > 50:
no_reactions = 50
elif int(picture.no_reactions) == 0:
no_reactions = 0
else:
no_reactions = int(picture.no_reactions) - 1
except TypeError:
#print(picture.no_reactions, type(picture.no_reactions))
no_reactions = picture.no_reactions
#print('\nANTAL REAKTIONER TOTALT PÅ BILDEN:', picture.no_reactions)
url_limit = url_bas + url_limit.replace(
"limit=" + str(limit), "limit=" + str(picture.no_reactions)
"limit=" + str(limit), "limit=" + str(no_reactions)
)
list_ids = []
try:
while True:
#try:
sleep_(4)
#print('\nurl_limit'.upper(), url_limit, '\n')
profile.open(url_limit)
url_limit = ""
update_cookie(profile.browser.session.cookies, profile)
#url_limit = "" # Vad gjorde den här?
update_cookie(profile.browser.session.cookies, profile)
# Hämta länk för "See more" för att se vilka ID:s som visas
url_see_more = None
#print('\nVARJE LÄNK PÅ SIDAN')
for a in profile.viewing().find_all("a"):
#print(a)
if "See More" in a.text: # Om det finns fler reaktioner att hämta
#print('\nHITTADE "SEE MORE"\n')
url_see_more = a['href']
ids_url = url_see_more[url_see_more.find('ids=')+4:url_see_more.find('&total')]
list_ids_from_url = ids_url.split('%2C') # Alla IDs hittills
#print('\nlist_pictures_from_url\n'.upper(), list_ids_from_url) # Lista från länk med profiler kollade hittills(?)
list_ids_page = list_ids_from_url[len(list_ids):] # Profilerna på den här sidan
#print('\nlist_ids_picture\n'.upper(), list_ids_page)
list_ids.extend(list_ids_page) #Lägg nästa sidas IDs till listan på alla IDs hittills
# Sätt rätt limit för nästa sida
limit_next_page = int(picture.no_reactions) - len(list_ids_from_url)
if limit_next_page > 50:
limit_next_page = 50
url_limit = url_bas + url_see_more.replace('limit=10', f'limit={limit_next_page}') # Länken till fler profiler
#print('\nurl_limit', url_limit, '\n')
# Gå igenom alla som reagerat och för in i arango
for li in profile.viewing().find_all("li"):
friend = Friend(user.username)
if "see more" in li.text.lower():
get_reactions(profile, user, picture, list_ids_page)
if url_see_more == None: # När det inte finns fler reaktioner
break
# except Exception as e: # Fel2
# write_error(
# 2,
# profile,
# e=e,
# soup=profile.viewing(),
# user=user,
# url=url_limit,
# url_name="url_limit",
# traceback=traceback.format_exc(),
# )
# pass
# Lägg till reaktioner till databasen
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
# Uppdatera antalet reaktioner användaren fått
user.reactions += len(picture.reactions)
def get_reactions(profile, user, picture, list_ids_page):
""" Gather the reactions on the picture.
Args:
profile (class): The active profile.
user (class): The user being scraped.
picture (class): The picture.
list_ids_picture (list): List of ID:s fetched from "See more"-url
"""
# Gå igenom alla som reagerat och för in i arango
#print('list_ids_picture: ', list_ids_page)
list_ids = list_ids_page.copy()
for li in profile.viewing().find_all("li"):
friend = Friend(user.username)
if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''):
continue
try:
friend_html = li.find("h3").find("a")
friend.name = friend_html.text
friend.url = friend_html["href"]
if "profile.php" in friend.url:
if "&paipv" in friend.url:
friend.username = friend.url[
friend.url.find("=") + 1 : friend.url.find("&")
]
else:
friend.username = friend.url[friend.url.find("id=") + 3 :]
try:
friend_html = li.find("h3").find("a")
friend.name = friend_html.text
friend.url = friend_html["href"]
friend.id = list_ids.pop(0)
if "profile.php" in friend.url:
if "&paipv" in friend.url:
friend.username = friend.url[
friend.url.find("=") + 1 : friend.url.find("&")
]
else:
if "?" in friend.url:
friend.username = friend.url[
friend.url.find("/") + 1 : friend.url.find("?")
]
else:
friend.username = friend.url[friend.url.find("/") + 1 :]
reaction = Reaction(user.username, friend.username, picture.id)
for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]:
if type in str(li):
reaction.type = type
picture.reactions.append(reaction.get_dict())
# Lägg till vännens profil till arrango
try:
friend.add_to_db()
except:
_print(profile, user, f"Kunde inte lägga till vän {friend.url}")
except AttributeError as e: # Fel1
write_error(
1,
profile,
e=e,
soup=str(li),
user=user,
traceback=traceback.format_exc(),
)
pass
# Lägg till reaktioner till databasen
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
# Uppdatera antalet reaktioner användaren fått
user.reactions += len(picture.reactions)
except Exception as e: # Fel2
write_error(
2,
profile,
e=e,
soup=profile.viewing(),
user=user,
url=url_limit,
url_name="url_limit",
traceback=traceback.format_exc(),
)
pass
friend.username = friend.url[friend.url.find("id=") + 3 :]
else:
if "?" in friend.url:
friend.username = friend.url[
friend.url.find("/") + 1 : friend.url.find("?")
]
else:
friend.username = friend.url[friend.url.find("/") + 1 :]
reaction = Reaction(user.username, friend.username, picture.id)
for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]:
if type in str(li):
reaction.type = type
picture.reactions.append(reaction.get_dict())
# Lägg till vännens profil till arrango
try:
friend.add_to_db()
except:
_print(profile, user, f"Kunde inte lägga till vän {friend.url}")
except AttributeError as e: # Fel1
write_error(
1,
profile,
e=e,
soup=str(li),
user=user,
traceback=traceback.format_exc(),
)
pass

@ -3,22 +3,21 @@ Skript för att söka i FB-läckan.
"""
import re
import paramiko
import arangodb
from getpass import getpass
from sshtunnel import open_tunnel
from termcolor import cprint
import dbViaSSH
def search(db, attribute, value):
"""
Search for attribute in db.
Returns list of matching documents.
"""
if '%' in value or '_' in value:
match = 'like'
if "%" in value or "_" in value:
match = "like"
else:
match = '=='
match = "=="
cursor = db.aql.execute(
f"""
@ -30,74 +29,60 @@ def search(db, attribute, value):
)
return [doc for doc in cursor]
pwd_key = getpass(f"Password key: ")
with open_tunnel(
("studio-garda.asuscomm.com", 2200),
ssh_username="Lasse",
ssh_pkey=paramiko.RSAKey.from_private_key_file(
"/Users/Lasse/.ssh/id_rsa", password=pwd_key
),
ssh_private_key_password=pwd_key,
remote_bind_address=("127.0.0.1", 8529),
) as server:
# server.start()
port_arango = server.local_bind_port
db = arangodb.arango_connect(
"gruel-ADOBE-foolish-winy-borax",
username="Leak",
host_arango="http://127.0.0.1",
port_arango=port_arango,
)
cprint("\n\nVad vill du söka efter?", attrs=['bold'])
print("1 - Telefonnummer")
print("2 - Facebook-ID")
print('3 - Namn')
print("4 - Arbete")
print('5 - Bostadsort')
print("6 - Födelseort")
print("7 - Epost")
# Få input för attribut
attribute = input("\n>>> ")
attributes = {
"1": ("telefonnummer", "phone"),
"2": ("Facebook-ID", "_key"),
"3": ("namn", "full_name"),
"4": ("arbete", "work"),
"5": ('bostadsort', "lives_in"),
"6": ('födelseort', 'from'),
"7": ('epost', 'email')
}
# Bestäm n- eller t-form och få input för värde.
if attribute in ['5', '6', '7']:
genus = 'n'
else:
genus = 't'
cprint(f"\nVilke{genus} {attributes[attribute][0]}? ", attrs=['bold'])
cprint('Använd % för att ersätta flera okända tecken, _ för att ersätta ett.', attrs=['dark'])
value = input('\n>>> ')
if attribute == '1': # telefonnummer
value = ''.join(re.findall(r'\d+', value))
if value[0] == '0':
value = f'46{value[1:]}'
elif attribute == '3': # namn
value = value.upper()
# Sök i databasen.
result = search(db, attributes[attribute][1], value)
# Presentera reultaten #TODO hur vill man få dem? Spara ner?
for i in result:
print('\n', i['full_name'])
for key, value in i.items():
print(f'{key}: {value}')
print(f'https://facebook.com/{i["_key"]}')
print(f'\nAntal träffar: {len(result)}\n')
db = dbViaSSH.db_over_tunnel("Leak")
cprint("\n\nVad vill du söka efter?", attrs=["bold"])
print("1 - Telefonnummer")
print("2 - Facebook-ID")
print("3 - Namn")
print("4 - Arbete")
print("5 - Bostadsort")
print("6 - Födelseort")
print("7 - Epost")
# Få input för attribut
attribute = input("\n>>> ")
attributes = {
"1": ("telefonnummer", "phone"),
"2": ("Facebook-ID", "_key"),
"3": ("namn", "full_name"),
"4": ("arbete", "work"),
"5": ("bostadsort", "lives_in"),
"6": ("födelseort", "from"),
"7": ("epost", "email"),
}
# Bestäm n- eller t-form och få input för värde.
if attribute in ["5", "6", "7"]:
genus = "n"
else:
genus = "t"
cprint(f"\nVilke{genus} {attributes[attribute][0]}? ", attrs=["bold"])
cprint(
"Använd % för att ersätta flera okända tecken, _ för att ersätta ett.",
attrs=["dark"],
)
value = input("\n>>> ")
if attribute == "1": # telefonnummer
value = "".join(re.findall(r"\d+", value))
if value[0] == "0":
value = f"46{value[1:]}"
elif attribute == "3": # namn
value = value.upper()
# Sök i databasen.
result = search(db, attributes[attribute][1], value)
# Presentera reultaten #TODO hur vill man få dem? Spara ner?
for i in result:
print("\n", i["full_name"])
for key, value in i.items():
print(f"{key}: {value}")
print(f'https://facebook.com/{i["_key"]}')
print(f"\nAntal träffar: {len(result)}\n")
dbViaSSH.stop_server()

@ -6,7 +6,6 @@ idna==2.10
lxml
pycparser==2.20
PyJWT==2.0.1
#PyNaCl==1.4.0
PySocks==1.7.1
python-arango==7.1.0
requests==2.25.1
@ -18,4 +17,4 @@ soupsieve==2.2
toml==0.10.2
urllib3==1.26.3
Werkzeug==1.0.1
json2html
json2html
Loading…
Cancel
Save