Lasse Studion 4 years ago
commit 6698ab6faa
  1. 18
      .gitignore
  2. 2
      Dockerfile
  3. 19
      docker/free/Dockerfile
  4. 13
      docker/mrkoll/Dockerfile
  5. 194
      docker/mrkoll/mrkoll_scraperapi.py
  6. 14
      docker/mrkoll/requirements.txt
  7. 70
      docker/profile_pictures/images.py
  8. 15
      docker/stats/Dockerfile
  9. 27
      docker/stats/requirements.txt
  10. 38
      docker/stats/stats.py
  11. 3
      facebook/__main__.py
  12. 73
      facebook/accs_to_db.py
  13. 6
      facebook/accs_to_profiles.py
  14. 6
      facebook/arangodb.py
  15. 3
      facebook/classes.py
  16. 1
      facebook/config.py
  17. 9
      facebook/gephi.py
  18. 137
      facebook/scrapers.py
  19. 79
      facebook/search_leak.py
  20. 1
      requirements.txt

18
.gitignore vendored

@ -1,18 +1,28 @@
# Blandat
/.DS_Store /.DS_Store
/.venv *.venv
/.vscode /.vscode
/__pycache__ /__pycache__
*.json *.json
*.pkl *.pkl
facebook/test.py
/data/* /data/*
*.html *.html
*.code-workspace *.code-workspace
workspace.code-workspace workspace.code-workspace
password_arango.txt password_arango.txt
*.gexf *.gexf
facebook/mrkoll.
*.pyc *.pyc
*.sqlite3
#facebook
/facebook /facebook
!/facebook/*.py !/facebook/*.py
*.sqlite3 facebook/test.py
facebook/mrkoll.
# docker
/stats/*
!/stats/*.py
requirements2.txt

@ -17,5 +17,5 @@ CMD ["",""]
# BUILD: # BUILD:
# docker buildx create --use # docker buildx create --use
#docker buildx build --platform linux/arm64,linux/arm64,linux/amd64 -t l3224/fb-scraper:pi --push . #docker buildx build --platform linux/arm,linux/arm64,linux/amd64 -t l3224/fb-scraper:VERSION --push .

@ -1,19 +0,0 @@
FROM python:3.8
WORKDIR /
COPY requirements.txt .
RUN pip install -r requirements.txt
ADD . .
ENTRYPOINT [ "python", "facebook/__main__.py", "-p free" ]
CMD ["",""]
# BUILD:
# docker buildx create --use
#docker buildx build --file docker/free/Dockerfile --platform linux/arm -t l3224/fb-scraper:free --push .

@ -1,14 +1,15 @@
# syntax=docker/dockerfile:1
FROM python:3.8 FROM python:3.8-slim-buster
WORKDIR / COPY requirements.txt requirements.txt
COPY requirements.txt . RUN pip3 install -r requirements.txt
RUN pip install -r requirements.txt COPY . .
ADD . . ENTRYPOINT [ "python3", "mrkoll_scraperapi.py" ]
ENTRYPOINT [ "python", "facebook/mrkoll.py" ] CMD [""]
# docker buildx build --file docker/mrkoll/Dockerfile --platform linux/arm -t l3224/fb-scraper:mrkoll --push . # docker buildx build --file docker/mrkoll/Dockerfile --platform linux/arm -t l3224/fb-scraper:mrkoll --push .

@ -0,0 +1,194 @@
import re
import subprocess
import requests
from sys import argv
from time import sleep
from bs4 import BeautifulSoup
from arango import ArangoClient
def find_person(number):
"""
Söker personuppgifter utifrån telefonnummer.
"""
sleep(2)
url = f'https://mrkoll.se/resultat?n={number}'
api_key = 'fcfe011cf66fddb61bb6425fcb5cb5e9'
payload = {'api_key': api_key, 'url': url, 'country_code': 'se', 'device_type':'desktop'}
response = requests.get('http://api.scraperapi.com', params=payload)
r = response.text
# Hämta sidan
soup = BeautifulSoup(r, 'html.parser')
if (
"Du har gjort för många anrop" in soup.text
or response.url == "https://mrkoll.se/om/limit/"
):
sleep(10)
return None
# Lägg in data i dictionary
d = {}
d["url_via_telefonnummer"] = response.url
try:
for a in soup.find_all("a", href=True):
if "boende-med-" in a["href"]:
d["lives_with_url"] = a["href"]
if "-hushall" in a["href"]:
d["lives_with"] = a.text
except:
pass
if "Sökningen gav 0 träffar..." in soup.text:
return {}
info = soup.find("div", {"class": "block_col1"})
try:
d["first_name"] = info.find(
"span", {"title": "Detta är personens tilltalsnamn"}
).text
except:
pass
try:
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text
except:
pass
try:
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text
except:
pass
try:
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"})
d["adress_line1"] = adress[0].text
if len(adress) > 1:
d["adress_line2"] = adress[1].text
except:
pass
try:
d["history"] = info.find("div", {"class": "history_container"}).text
except:
pass
# Personnummer
## Födelsedatum
for i in soup.find_all("div", {"class": "col_block1"}):
if "Personnummer" in i.text:
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace(
"-XXXX", ""
)
## Fyra sista
try:
start = "showPersnr"
end = ">Jag godkänner</span>"
t = str(soup)
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",")
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1]
sleep(2) # Vänta lite
four_last = requests.get("http://mrkoll.se" + url_ajax).text
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last)
except:
pass
try:
neighbours = {}
for div in soup.find_all("div", {"class": "peoplecont"}):
persons = div.find_all("a", href=True)
for person in persons:
neighbours[person.find("strong").text] = {
"link": person["href"],
"lived_years": re.search(
"\d+", person.find("span", {"class": "flyttclass"}).text
).group()[0],
}
d["neighbours"] = neighbours
except:
pass
try:
d["name_change"] = [
div.text.strip() for div in soup.find_all("div", {"class": "name_change"})
]
except:
pass
try:
prosecuted = {}
prosecuted["brottsmål"] = (
True if soup.find("div", {"class": "resmark res_b"}) != None else False
)
prosecuted["tvistemål"] = (
True if soup.find("div", {"class": "resmark res_t"}) != None else False
)
prosecuted["straffföreläggande"] = (
True if soup.find("div", {"class": "resmark res_s"}) != None else False
)
d["prosecuted"] = prosecuted
except:
pass
return d
if __name__ == "__main__":
ip = 'scraperapi'
if requests.get('https://icanhazip.com').text.strip() == '98.128.172.12':
print('\nMULLVAD INTE AKTIV\n')
exit()
# Info för arangodb
user_arango = "Phone"
db_arango = "facebook"
host_arango = "http://192.168.1.10:8529"
# Starta koppling till arangodb
db = ArangoClient(hosts=host_arango).db(
db_arango, username=user_arango, password=argv[1]
)
leak = db.collection("phoneleak")
count = 0
scraper_count = 0
global errors
errors = 0
while True:
count += 1
# Hämta en random person
doc = leak.random()
# Gör sökningen på mrkoll.se
d = find_person(doc["phone"])
try:
name = d["first_name"] + ' '
except:
name = ' '
print(f'{count} - {errors} {name}', end="\r")
if d == None: # Om ip-adressen är blockad eller något hänt
continue
d["_key"] = doc["_key"]
d["_id"] = "phone/" + str(d["_key"])
d["phone"] = doc["phone"]
d["checked_from_ip"] = f'{ip} - cache'
try:
db.collection("phone").insert(d)
leak.delete(doc["_key"])
except:
pass

@ -0,0 +1,14 @@
beautifulsoup4==4.9.3
bs4==0.0.1
certifi==2021.5.30
charset-normalizer==2.0.4
idna==3.2
PyJWT==2.1.0
python-arango==7.2.0
requests==2.26.0
requests-toolbelt==0.9.1
setuptools-scm==6.0.1
soupsieve==2.2.1
toml==0.10.2
urllib3==1.26.6
requests_cache==0.7.4

@ -0,0 +1,70 @@
import requests
import os
from datetime import date, datetime, timedelta
from time import sleep
from arangodb import db
def download_image(url, user, id):
# Kolla så användarmappen finns
if not os.path.isdir(f'../profile_pictures/{user}'):
os.mkdir(f'../profile_pictures/{user}')
# Ladda ner bilden
r = requests.get(url)
if r.text == 'URL signature expired':
print('För gammal länk.')
exit()
elif r.status_code == 403:
exit()
img_data = r.content
with open(f'../profile_pictures/{user}/{id}.jpg', 'wb') as handler:
handler.write(img_data)
def get_pictures(day):
cursor = db.aql.execute(
"""
for doc in members
filter doc.fetched == @date
filter has(doc, "checked_pictures")
filter not has(doc, "pictures_downloaded")
return {'member': doc._key, 'pictures':doc.checked_pictures}
""",
bind_vars={'date': day}
)
for doc in cursor:
pictures = []
for picture in doc['pictures']:
pictures.append(picture[picture.find('fbid=')+5:])
cursor = db.aql.execute(
"""
for doc in pictures
filter doc._key in @list
limit 10
return {'_key': doc._key, 'user':doc.user, 'url': doc.src}
""",
bind_vars={"list": pictures},
)
for picture in cursor:
download_image(picture['url'], picture['user'], picture['_key'])
print(picture['_key'])
sleep(2)
db.update_document({'_id': 'members/' + str(doc['member']), 'pictures_downloaded': True}, silent=True, check_rev=False)
def old_pics():
if not os.path.isdir(f'../profile_pictures'):
os.mkdir(f'../profile_pictures')
start = date.today()
for i in range(1,60):
d = start - timedelta(days=i)
get_pictures(d.strftime('%Y%m%d'))

@ -0,0 +1,15 @@
FROM python:alpine
WORKDIR /
RUN apk add --update --no-cache g++ gcc libxslt-dev
COPY requirements.txt .
RUN pip install -r requirements.txt
ADD . .
ENTRYPOINT [ "python", "stats.py" ]
# docker buildx build --file docker/stats/Dockerfile --platform linux/arm64,linux/amd64 -t mrkoll .

@ -0,0 +1,27 @@
black==21.8b0
certifi==2020.6.20
chardet==4.0.0
click==8.0.1
httplib2==0.18.1
idna==2.10
mypy-extensions==0.4.3
packaging==21.0
pathspec==0.9.0
platformdirs==2.3.0
#pycurl==7.43.0.6
PyJWT==2.1.0
pyparsing==2.4.7
PySimpleSOAP==1.16.2
#python-apt==2.2.1
python-arango==7.2.0
python-debian==0.1.39
python-debianbts==3.1.0
regex==2021.8.28
#reportbug==7.10.3
requests==2.25.1
requests-toolbelt==0.9.1
setuptools-scm==6.3.1
six==1.16.0
tomli==1.2.1
typing-extensions==3.10.0.2
urllib3==1.26.5

@ -3,7 +3,6 @@ from getpass import getpass
from time import sleep from time import sleep
from arango import ArangoClient from arango import ArangoClient
from json2html import json2html
def now(): def now():
@ -14,9 +13,9 @@ def write_stats(db, continuous=False):
while True: while True:
d = {} d = {}
for col in db.collections(): for col in db.collections():
if not col['system']: if not col["system"]:
d[col['name']] = db.collection(col['name']).count() d[col["name"]] = db.collection(col["name"]).count()
del d['stats'] del d["stats"]
# d['time'] = now() # d['time'] = now()
cursor = db.aql.execute( cursor = db.aql.execute(
""" """
@ -26,49 +25,42 @@ def write_stats(db, continuous=False):
RETURN length RETURN length
""" """
) )
d['checked_members'] = cursor.next() d["checked_members"] = cursor.next()
# Hur många konton per säljare som finns kvar # Hur många konton per säljare som finns kvar
cursor = db.aql.execute( cursor = db.aql.execute(
''' """
for doc in profiles for doc in profiles_webshare
filter has(doc, "vendor") filter has(doc, "vendor")
COLLECT vendor = doc.vendor WITH COUNT INTO length COLLECT vendor = doc.vendor WITH COUNT INTO length
RETURN { RETURN {
"vendor" : vendor, "vendor" : vendor,
"active" : length "active" : length
} }
''') """
d['active_vendors'] = [doc for doc in cursor] )
d["active_vendors"] = [doc for doc in cursor]
d['_key'] = now()[:13] d["_key"] = now()[:13]
db.insert_document("stats", d, overwrite=True) db.insert_document("stats", d, overwrite=True)
# Skriv en html-fil
with open('website/fb-webbapp/stats.html', 'a+') as html:
html.truncate(0)
html.write('<!DOCTYPE html> <br>')
html.write(json2html.convert(json = d))
# Sov för att fortsätta senare # Sov för att fortsätta senare
if continuous: if continuous:
print(now())
sleep(86400) sleep(86400)
else: else:
break break
# Info för arangodb # Info för arangodb
user_arango = "Stats" user_arango = "Stats"
db_arango = "facebook" db_arango = "facebook"
host_arango = "http://192.168.0.4:8529" host_arango = "http://192.168.1.10:8529"
# Starta koppling till arangodb # Starta koppling till arangodb
# Avkryptera lösen till arango # Avkryptera lösen till arango
pwd = getpass(f'Arangolösenord för {user_arango}:').strip() pwd = getpass(f"Arangolösenord för {user_arango}:").strip()
db = ArangoClient(hosts=host_arango).db( db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd)
db_arango, username=user_arango, password=pwd
)
write_stats(db, continuous=True) write_stats(db, continuous=True)

@ -160,6 +160,7 @@ if __name__ == "__main__":
) )
if profile.blocked: if profile.blocked:
profile = blocked_profile(profile, proxieservers=proxieservers) profile = blocked_profile(profile, proxieservers=proxieservers)
user = User(str(userdoc['_key']).strip(), mode, other_pictures=url_other_pictures)
else: else:
break break
except: except:
@ -175,7 +176,7 @@ if __name__ == "__main__":
friends_unchecked = [] friends_unchecked = []
for friend in friends: for friend in friends:
if not check_for_user(friend): if not check_for_user(friend) and friend not in friends_unchecked:
print(friend) print(friend)
friends_unchecked.append(friend) friends_unchecked.append(friend)

@ -6,12 +6,10 @@ from time import sleep
import base64 import base64
import json import json
import requests import requests
from sshtunnel import open_tunnel
import paramiko
from getpass import getpass
import arangodb
import config import config
from helpers import now from helpers import now
import dbViaSSH
# Gör fb-scraper till arbetsmapp # Gör fb-scraper till arbetsmapp
chdir(dirname(dirname(abspath(__file__)))) chdir(dirname(dirname(abspath(__file__))))
@ -88,16 +86,21 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"):
n1 = 0 n1 = 0
for profile in data: for profile in data:
if len(profile) < 3: if len(profile) < 3:
continue continue
doc = {} doc = {}
doc["vendor"] = vendor doc["vendor"] = vendor
doc["created"] = now() doc["created"] = now()
if "email" in info: if "email" in info:
doc["email"] = profile[info.index("email")] doc["email"] = profile[info.index("email")]
elif "login" in info: elif "login" in info:
doc["email"] = profile[info.index("login")] doc["email"] = profile[info.index("login")]
if doc["email"] in used_accs or doc["email"] in used_profiles: if doc["email"] in used_accs or doc["email"] in used_profiles:
n1 += 1 n1 += 1
continue continue
@ -120,11 +123,9 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"):
for c in cookies.split(";"): for c in cookies.split(";"):
cookie[c[: c.find("=")].strip()] = c[c.find("=") + 1 :].strip() cookie[c[: c.find("=")].strip()] = c[c.find("=") + 1 :].strip()
else: else:
try: #try:
cookies_base64 = cookies.strip() # .strip('=') cookies_base64 = cookies.strip() # .strip('=')
# print()
# print(cookies_base64)
# print()
cookies64_bytes = cookies_base64.encode("ascii") cookies64_bytes = cookies_base64.encode("ascii")
cookies_bytes = base64.b64decode(cookies64_bytes) cookies_bytes = base64.b64decode(cookies64_bytes)
# exit() # exit()
@ -134,23 +135,33 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"):
.replace("False", "false") .replace("False", "false")
.replace("True", "true") .replace("True", "true")
) )
if vendor in ["827"]:
cookies = {}
for c in cookies_str.split(';'):
cookies[c[:c.find('=')]] = c[c.find('=')+1:]
else:
cookies = json.loads(cookies_str) cookies = json.loads(cookies_str)
cookie = {} cookie = {}
if vendor == "159": if vendor in ["159"]:
for c in cookies["cookies"]: for c in cookies["cookies"]:
cookie[c["name"]] = c["value"] cookie[c["name"]] = c["value"]
elif vendor in ["827"]:
cookie = cookies
else: else:
for c in cookies: for c in cookies:
name = c["name"] name = c["name"]
del c["name"] del c["name"]
cookie[name] = c["value"] cookie[name] = c["value"]
doc["cookie"] = cookie doc["cookie"] = cookie
except Exception as e: # except Exception as e:
print('\n\nFel på cookie.\n', e, '\n') # print('\n\nFel på cookie.\n', e, '\n')
for i in profile: # for i in profile:
print(i) # print(i)
continue
# exit()
else: else:
cookie = {} cookie = {}
if "birthday" in info: if "birthday" in info:
@ -163,7 +174,6 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"):
print(f'\nInlagda profiler: {n0}\nProfiler redan i db: {n1}') print(f'\nInlagda profiler: {n0}\nProfiler redan i db: {n1}')
def used_servers(profiles="profiles"): def used_servers(profiles="profiles"):
cursor = db.aql.execute( cursor = db.aql.execute(
""" """
@ -231,7 +241,9 @@ if __name__ == "__main__":
}, },
#'1113': {'info': 'login:mail:password:emailpassword:birthday:useragent:token:cookie', 'sep': '|'}, #'1113': {'info': 'login:mail:password:emailpassword:birthday:useragent:token:cookie', 'sep': '|'},
"159": {"info": "login:password:mail:email password:birthday:id", "sep": ":"}, "159": {"info": "login:password:mail:email password:birthday:id", "sep": ":"},
#'159': {'info': 'login:password:birthday:id:cookie', 'sep':':' #'159': {'info': 'login:password:birthday:id:cookie', 'sep':':',
"827": {"info": "login:password:mail:email password:birthday:useragent:token:cookie",
"sep": "|"}
} }
############################### ###############################
@ -254,26 +266,15 @@ if __name__ == "__main__":
row = row.replace("https:", "https;") row = row.replace("https:", "https;")
data.append(row.split(sep)) data.append(row.split(sep))
# Lägg in i accs # # Lägg in i accs
# Öppna SSH-tunnel till RBP/db.
pwd_key = getpass("Password for rsa-key: ") db = dbViaSSH.db_over_tunnel('Accs')
with open_tunnel(
("studio-garda.asuscomm.com", 2200),
ssh_username="Lasse",
ssh_pkey=paramiko.RSAKey.from_private_key_file(
"/Users/Lasse/.ssh/id_rsa", password=pwd_key
),
ssh_private_key_password=pwd_key,
remote_bind_address=("127.0.0.1", 8529),
) as server:
port_arango = server.local_bind_port
db = arangodb.arango_connect(
"concert-hangar-mirth-salk-DECAL",
username="Accs",
host_arango="http://127.0.0.1",
port_arango=port_arango,
)
#webshare_proxies() #webshare_proxies()
to_accs(db, data, info, profiles, vendor) to_accs(db, data, info, profiles, vendor)
dbViaSSH.stop_server()

@ -5,16 +5,14 @@ from getpass import getpass
from os.path import abspath, dirname from os.path import abspath, dirname
from random import randint from random import randint
from time import sleep from time import sleep
import base64
import json
import requests
# Gör fb-scraper till arbetsmapp # Gör fb-scraper till arbetsmapp
chdir(dirname(dirname(abspath(__file__)))) chdir(dirname(dirname(abspath(__file__))))
from arangodb import arango_connect from arangodb import arango_connect
import config import config
from helpers import now, nowstamp from helpers import nowstamp
def used_servers(profiles='profiles'): def used_servers(profiles='profiles'):
cursor = db.aql.execute( cursor = db.aql.execute(

@ -19,6 +19,9 @@ if __name__ != '__main__.py':
exit('Fel lösenord, kunde inte logga in i DB.') exit('Fel lösenord, kunde inte logga in i DB.')
if 'pwd' not in globals(): if 'pwd' not in globals():
pwd = getpass(f'Lösenord för {user_arango}: ') pwd = getpass(f'Lösenord för {user_arango}: ')
if pwd == '':
db = None
break
try: try:
db = ArangoClient(hosts=f'{host_arango}:{port_arango}').db(db_arango, username=user_arango, password=pwd) db = ArangoClient(hosts=f'{host_arango}:{port_arango}').db(db_arango, username=user_arango, password=pwd)
@ -79,7 +82,8 @@ def report_blocked(profile):
}, },
overwrite=True, overwrite=True,
) )
except: except Exception as e:
print(e)
_print(profile, profile.container, f'Kunde inte rapportera blockerad: {profile.name}.') _print(profile, profile.container, f'Kunde inte rapportera blockerad: {profile.name}.')

@ -25,6 +25,7 @@ class User:
self.url_likes = "" self.url_likes = ""
self.url_about = "" self.url_about = ""
self.url_timeline = "" self.url_timeline = ""
self.url_album = ""
self.url_profilepictures = "" self.url_profilepictures = ""
self.profile_pictures = 0 self.profile_pictures = 0
self.pictures = [] self.pictures = []
@ -270,6 +271,7 @@ class Friend:
self.username = "" self.username = ""
self.url = "" self.url = ""
self.name = "" self.name = ""
self.id = ""
def add_to_db(self): def add_to_db(self):
db.insert_document( db.insert_document(
@ -278,6 +280,7 @@ class Friend:
"_key": str(self.username), "_key": str(self.username),
"url": url_bas + self.url, "url": url_bas + self.url,
"name": self.name, "name": self.name,
'id_from_seemore_url': self.id
}, },
overwrite_mode="update", overwrite_mode="update",
silent=True, silent=True,

@ -11,6 +11,7 @@ user_arango = "Lasse"
db_arango = "facebook" db_arango = "facebook"
host_arango = 'http://192.168.1.10' host_arango = 'http://192.168.1.10'
port_arango = '8529' port_arango = '8529'
host_adress = "studio-garda.asuscomm.com" #IP/adress till där db finns
# Andra uppgifter # Andra uppgifter
url_bas = "https://mbasic.facebook.com" url_bas = "https://mbasic.facebook.com"

@ -10,8 +10,7 @@ from getpass import getpass
import arangodb import arangodb
locale.setlocale(locale.LC_TIME, "en_US") locale.setlocale(locale.LC_TIME, "en_US")
import dbViaSSH
def nodes_from_list( def nodes_from_list(
@ -244,6 +243,12 @@ def common_friends(d, n=2):
pwd = getpass('Password for Lasse: ') pwd = getpass('Password for Lasse: ')
db = arangodb.arango_connect(pwd) db = arangodb.arango_connect(pwd)
db.collecion('members').random()
try:
db.collecion('members').random()
except:
pass
if __name__ == "__main__": if __name__ == "__main__":

@ -82,7 +82,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"):
# Om det inte finns något profilalbum # Om det inte finns något profilalbum
# Testa ta bort mellanrum och små bokstäver # Testa ta bort mellanrum och små bokstäver
if not hasattr(user, "url_album"): if user.url_album == "":
for a in profile.viewing().find_all("a", href=True): for a in profile.viewing().find_all("a", href=True):
if "profilepictures" in a.text.lower().replace(" ", ""): if "profilepictures" in a.text.lower().replace(" ", ""):
user.url_album = url_bas + a["href"] user.url_album = url_bas + a["href"]
@ -91,7 +91,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"):
# Gå till profilbilden (den första som kommer upp när man går till profilen) # Gå till profilbilden (den första som kommer upp när man går till profilen)
# Om profilen inte har profilalbum # Om profilen inte har profilalbum
if not hasattr(user, "url_album"): if user.url_album == "":
write_error(9, profile, soup=profile.viewing(), user=user) write_error(9, profile, soup=profile.viewing(), user=user)
if user.url_other_pictures != []: if user.url_other_pictures != []:
# Använd eventuella extrabilder och ta bort den från användaren # Använd eventuella extrabilder och ta bort den från användaren
@ -158,7 +158,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"):
if mode == "single" and user.reactions > 30: if mode == "single" and user.reactions > 30:
break break
elif all([any([mode == "few", mode == "solo"]), user.reactions > 80, pic != url_pics[-1]]): elif all([any([mode == "few", mode == "solo"]), user.reactions > 80, pic != url_pics[-1]]):
# Kolla den sista bilder # Kolla den sista bilden
check_picture(url_bas + url_pics[-1], user, profile) check_picture(url_bas + url_pics[-1], user, profile)
user.checked_pictures.append(url_bas + pic) user.checked_pictures.append(url_bas + pic)
break break
@ -171,7 +171,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"):
def check_picture(url_picture, user, profile): def check_picture(url_picture, user, profile):
""" Hämtar reaktioner för en bildprint """ """ Hämtar reaktioner för en bild. """
picture = Picture(user.username) picture = Picture(user.username)
picture.url = url_picture picture.url = url_picture
@ -180,8 +180,7 @@ def check_picture(url_picture, user, profile):
picture.id = str(re.search("\d+", picture.id).group()) picture.id = str(re.search("\d+", picture.id).group())
except: except:
pass pass
# if picture.id in all_pictures:
# continue
sleep_(5) sleep_(5)
try: try:
@ -251,6 +250,7 @@ def check_picture(url_picture, user, profile):
url_reactions = url_bas + str(div["href"]) url_reactions = url_bas + str(div["href"])
# Hämta reaktioner för bilden # Hämta reaktioner för bilden
sleep_(3) sleep_(3)
profile.open(url_reactions) profile.open(url_reactions)
@ -264,7 +264,7 @@ def check_picture(url_picture, user, profile):
picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1) picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1)
limit = re.search(r"limit=(\d+)", url_limit).group( limit = re.search(r"limit=(\d+)", url_limit).group(
1 1
) # TODO Fortfarande problem med det här )
except UnboundLocalError: # fel9 except UnboundLocalError: # fel9
write_error( write_error(
9, 9,
@ -281,25 +281,114 @@ def check_picture(url_picture, user, profile):
# Addera bilden till arrango # Addera bilden till arrango
picture.add_to_db() picture.add_to_db()
# Begränsa limit till 50 då Facebook inte ger fler (för första "klick").
try:
if int(picture.no_reactions) > 50:
no_reactions = 50
elif int(picture.no_reactions) == 0:
no_reactions = 0
else:
no_reactions = int(picture.no_reactions) - 1
except TypeError:
#print(picture.no_reactions, type(picture.no_reactions))
no_reactions = picture.no_reactions
#print('\nANTAL REAKTIONER TOTALT PÅ BILDEN:', picture.no_reactions)
url_limit = url_bas + url_limit.replace( url_limit = url_bas + url_limit.replace(
"limit=" + str(limit), "limit=" + str(picture.no_reactions) "limit=" + str(limit), "limit=" + str(no_reactions)
) )
try: list_ids = []
while True:
#try:
sleep_(4) sleep_(4)
#print('\nurl_limit'.upper(), url_limit, '\n')
profile.open(url_limit) profile.open(url_limit)
url_limit = "" #url_limit = "" # Vad gjorde den här?
update_cookie(profile.browser.session.cookies, profile) update_cookie(profile.browser.session.cookies, profile)
# Hämta länk för "See more" för att se vilka ID:s som visas
url_see_more = None
#print('\nVARJE LÄNK PÅ SIDAN')
for a in profile.viewing().find_all("a"):
#print(a)
if "See More" in a.text: # Om det finns fler reaktioner att hämta
#print('\nHITTADE "SEE MORE"\n')
url_see_more = a['href']
ids_url = url_see_more[url_see_more.find('ids=')+4:url_see_more.find('&total')]
list_ids_from_url = ids_url.split('%2C') # Alla IDs hittills
#print('\nlist_pictures_from_url\n'.upper(), list_ids_from_url) # Lista från länk med profiler kollade hittills(?)
list_ids_page = list_ids_from_url[len(list_ids):] # Profilerna på den här sidan
#print('\nlist_ids_picture\n'.upper(), list_ids_page)
list_ids.extend(list_ids_page) #Lägg nästa sidas IDs till listan på alla IDs hittills
# Sätt rätt limit för nästa sida
limit_next_page = int(picture.no_reactions) - len(list_ids_from_url)
if limit_next_page > 50:
limit_next_page = 50
url_limit = url_bas + url_see_more.replace('limit=10', f'limit={limit_next_page}') # Länken till fler profiler
#print('\nurl_limit', url_limit, '\n')
# Gå igenom alla som reagerat och för in i arango
get_reactions(profile, user, picture, list_ids_page)
if url_see_more == None: # När det inte finns fler reaktioner
break
# except Exception as e: # Fel2
# write_error(
# 2,
# profile,
# e=e,
# soup=profile.viewing(),
# user=user,
# url=url_limit,
# url_name="url_limit",
# traceback=traceback.format_exc(),
# )
# pass
# Lägg till reaktioner till databasen
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
# Uppdatera antalet reaktioner användaren fått
user.reactions += len(picture.reactions)
def get_reactions(profile, user, picture, list_ids_page):
""" Gather the reactions on the picture.
Args:
profile (class): The active profile.
user (class): The user being scraped.
picture (class): The picture.
list_ids_picture (list): List of ID:s fetched from "See more"-url
"""
# Gå igenom alla som reagerat och för in i arango # Gå igenom alla som reagerat och för in i arango
#print('list_ids_picture: ', list_ids_page)
list_ids = list_ids_page.copy()
for li in profile.viewing().find_all("li"): for li in profile.viewing().find_all("li"):
friend = Friend(user.username) friend = Friend(user.username)
if "see more" in li.text.lower(): if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''):
continue continue
try: try:
friend_html = li.find("h3").find("a") friend_html = li.find("h3").find("a")
friend.name = friend_html.text friend.name = friend_html.text
friend.url = friend_html["href"] friend.url = friend_html["href"]
friend.id = list_ids.pop(0)
if "profile.php" in friend.url: if "profile.php" in friend.url:
if "&paipv" in friend.url: if "&paipv" in friend.url:
friend.username = friend.url[ friend.username = friend.url[
@ -316,9 +405,11 @@ def check_picture(url_picture, user, profile):
friend.username = friend.url[friend.url.find("/") + 1 :] friend.username = friend.url[friend.url.find("/") + 1 :]
reaction = Reaction(user.username, friend.username, picture.id) reaction = Reaction(user.username, friend.username, picture.id)
for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]:
if type in str(li): if type in str(li):
reaction.type = type reaction.type = type
picture.reactions.append(reaction.get_dict()) picture.reactions.append(reaction.get_dict())
# Lägg till vännens profil till arrango # Lägg till vännens profil till arrango
try: try:
@ -336,27 +427,3 @@ def check_picture(url_picture, user, profile):
traceback=traceback.format_exc(), traceback=traceback.format_exc(),
) )
pass pass
# Lägg till reaktioner till databasen
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
# Uppdatera antalet reaktioner användaren fått
user.reactions += len(picture.reactions)
except Exception as e: # Fel2
write_error(
2,
profile,
e=e,
soup=profile.viewing(),
user=user,
url=url_limit,
url_name="url_limit",
traceback=traceback.format_exc(),
)
pass

@ -3,22 +3,21 @@ Skript för att söka i FB-läckan.
""" """
import re import re
import paramiko
import arangodb
from getpass import getpass
from sshtunnel import open_tunnel
from termcolor import cprint from termcolor import cprint
import dbViaSSH
def search(db, attribute, value): def search(db, attribute, value):
""" """
Search for attribute in db. Search for attribute in db.
Returns list of matching documents. Returns list of matching documents.
""" """
if '%' in value or '_' in value: if "%" in value or "_" in value:
match = 'like' match = "like"
else: else:
match = '==' match = "=="
cursor = db.aql.execute( cursor = db.aql.execute(
f""" f"""
@ -30,33 +29,15 @@ def search(db, attribute, value):
) )
return [doc for doc in cursor] return [doc for doc in cursor]
pwd_key = getpass(f"Password key: ")
with open_tunnel(
("studio-garda.asuscomm.com", 2200),
ssh_username="Lasse",
ssh_pkey=paramiko.RSAKey.from_private_key_file(
"/Users/Lasse/.ssh/id_rsa", password=pwd_key
),
ssh_private_key_password=pwd_key,
remote_bind_address=("127.0.0.1", 8529),
) as server:
# server.start()
port_arango = server.local_bind_port
db = arangodb.arango_connect(
"gruel-ADOBE-foolish-winy-borax",
username="Leak",
host_arango="http://127.0.0.1",
port_arango=port_arango,
)
cprint("\n\nVad vill du söka efter?", attrs=['bold']) db = dbViaSSH.db_over_tunnel("Leak")
cprint("\n\nVad vill du söka efter?", attrs=["bold"])
print("1 - Telefonnummer") print("1 - Telefonnummer")
print("2 - Facebook-ID") print("2 - Facebook-ID")
print('3 - Namn') print("3 - Namn")
print("4 - Arbete") print("4 - Arbete")
print('5 - Bostadsort') print("5 - Bostadsort")
print("6 - Födelseort") print("6 - Födelseort")
print("7 - Epost") print("7 - Epost")
@ -67,26 +48,29 @@ with open_tunnel(
"2": ("Facebook-ID", "_key"), "2": ("Facebook-ID", "_key"),
"3": ("namn", "full_name"), "3": ("namn", "full_name"),
"4": ("arbete", "work"), "4": ("arbete", "work"),
"5": ('bostadsort', "lives_in"), "5": ("bostadsort", "lives_in"),
"6": ('födelseort', 'from'), "6": ("födelseort", "from"),
"7": ('epost', 'email') "7": ("epost", "email"),
} }
# Bestäm n- eller t-form och få input för värde. # Bestäm n- eller t-form och få input för värde.
if attribute in ['5', '6', '7']: if attribute in ["5", "6", "7"]:
genus = 'n' genus = "n"
else: else:
genus = 't' genus = "t"
cprint(f"\nVilke{genus} {attributes[attribute][0]}? ", attrs=['bold']) cprint(f"\nVilke{genus} {attributes[attribute][0]}? ", attrs=["bold"])
cprint('Använd % för att ersätta flera okända tecken, _ för att ersätta ett.', attrs=['dark']) cprint(
value = input('\n>>> ') "Använd % för att ersätta flera okända tecken, _ för att ersätta ett.",
attrs=["dark"],
)
value = input("\n>>> ")
if attribute == '1': # telefonnummer if attribute == "1": # telefonnummer
value = ''.join(re.findall(r'\d+', value)) value = "".join(re.findall(r"\d+", value))
if value[0] == '0': if value[0] == "0":
value = f'46{value[1:]}' value = f"46{value[1:]}"
elif attribute == '3': # namn elif attribute == "3": # namn
value = value.upper() value = value.upper()
# Sök i databasen. # Sök i databasen.
@ -94,10 +78,11 @@ with open_tunnel(
# Presentera reultaten #TODO hur vill man få dem? Spara ner? # Presentera reultaten #TODO hur vill man få dem? Spara ner?
for i in result: for i in result:
print('\n', i['full_name']) print("\n", i["full_name"])
for key, value in i.items(): for key, value in i.items():
print(f'{key}: {value}') print(f"{key}: {value}")
print(f'https://facebook.com/{i["_key"]}') print(f'https://facebook.com/{i["_key"]}')
print(f'\nAntal träffar: {len(result)}\n') print(f"\nAntal träffar: {len(result)}\n")
dbViaSSH.stop_server()

@ -6,7 +6,6 @@ idna==2.10
lxml lxml
pycparser==2.20 pycparser==2.20
PyJWT==2.0.1 PyJWT==2.0.1
#PyNaCl==1.4.0
PySocks==1.7.1 PySocks==1.7.1
python-arango==7.1.0 python-arango==7.1.0
requests==2.25.1 requests==2.25.1

Loading…
Cancel
Save