From 0ea4ea529b1584e9135e53e96d6de7c91583b303 Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 7 Sep 2021 20:30:59 +0200 Subject: [PATCH 01/24] Added 827 as vendor --- facebook/accs_to_db.py | 84 ++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/facebook/accs_to_db.py b/facebook/accs_to_db.py index acb3f07..01887ea 100644 --- a/facebook/accs_to_db.py +++ b/facebook/accs_to_db.py @@ -86,18 +86,23 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"): n0 = 0 n1 = 0 - + for profile in data: + if len(profile) < 3: continue doc = {} doc["vendor"] = vendor doc["created"] = now() + if "email" in info: doc["email"] = profile[info.index("email")] + elif "login" in info: doc["email"] = profile[info.index("login")] + + if doc["email"] in used_accs or doc["email"] in used_profiles: n1 += 1 continue @@ -120,37 +125,45 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"): for c in cookies.split(";"): cookie[c[: c.find("=")].strip()] = c[c.find("=") + 1 :].strip() else: - try: - cookies_base64 = cookies.strip() # .strip('=') - # print() - # print(cookies_base64) - # print() - cookies64_bytes = cookies_base64.encode("ascii") - cookies_bytes = base64.b64decode(cookies64_bytes) - # exit() - cookies_str = ( - cookies_bytes.decode("ascii") - .replace("'", '"') - .replace("False", "false") - .replace("True", "true") - ) + #try: + cookies_base64 = cookies.strip() # .strip('=') + + cookies64_bytes = cookies_base64.encode("ascii") + cookies_bytes = base64.b64decode(cookies64_bytes) + # exit() + cookies_str = ( + cookies_bytes.decode("ascii") + .replace("'", '"') + .replace("False", "false") + .replace("True", "true") + ) + if vendor in ["827"]: + cookies = {} + for c in cookies_str.split(';'): + cookies[c[:c.find('=')]] = c[c.find('=')+1:] + + else: cookies = json.loads(cookies_str) - - cookie = {} - if vendor == "159": - for c in cookies["cookies"]: - cookie[c["name"]] = c["value"] - else: - for c in cookies: - name = c["name"] - del c["name"] - cookie[name] = c["value"] - doc["cookie"] = cookie - except Exception as e: - print('\n\nFel på cookie.\n', e, '\n') - for i in profile: - print(i) - continue + + cookie = {} + if vendor in ["159"]: + for c in cookies["cookies"]: + cookie[c["name"]] = c["value"] + elif vendor in ["827"]: + cookie = cookies + else: + for c in cookies: + name = c["name"] + del c["name"] + cookie[name] = c["value"] + doc["cookie"] = cookie + # except Exception as e: + # print('\n\nFel på cookie.\n', e, '\n') + # for i in profile: + # print(i) + + # exit() + else: cookie = {} if "birthday" in info: @@ -163,7 +176,6 @@ def to_accs(db, data, info, profiles, vendor, accs="accs"): print(f'\nInlagda profiler: {n0}\nProfiler redan i db: {n1}') - def used_servers(profiles="profiles"): cursor = db.aql.execute( """ @@ -231,7 +243,9 @@ if __name__ == "__main__": }, #'1113': {'info': 'login:mail:password:emailpassword:birthday:useragent:token:cookie', 'sep': '|'}, "159": {"info": "login:password:mail:email password:birthday:id", "sep": ":"}, - #'159': {'info': 'login:password:birthday:id:cookie', 'sep':':' + #'159': {'info': 'login:password:birthday:id:cookie', 'sep':':', + "827": {"info": "login:password:mail:email password:birthday:useragent:token:cookie", + "sep": "|"} } ############################### @@ -258,8 +272,8 @@ if __name__ == "__main__": # Öppna SSH-tunnel till RBP/db. pwd_key = getpass("Password for rsa-key: ") with open_tunnel( - ("studio-garda.asuscomm.com", 2200), - ssh_username="Lasse", + ("studio-garda.asuscomm.com", 2210), + ssh_username="lasse", ssh_pkey=paramiko.RSAKey.from_private_key_file( "/Users/Lasse/.ssh/id_rsa", password=pwd_key ), From e1aee2158158c06f25244915793996ead00575d2 Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 7 Sep 2021 20:31:18 +0200 Subject: [PATCH 02/24] Changed how password is fetched --- facebook/arangodb.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/facebook/arangodb.py b/facebook/arangodb.py index 29e634e..fce5886 100644 --- a/facebook/arangodb.py +++ b/facebook/arangodb.py @@ -19,6 +19,9 @@ if __name__ != '__main__.py': exit('Fel lösenord, kunde inte logga in i DB.') if 'pwd' not in globals(): pwd = getpass(f'Lösenord för {user_arango}: ') + if pwd == '': + db = None + break try: db = ArangoClient(hosts=f'{host_arango}:{port_arango}').db(db_arango, username=user_arango, password=pwd) From 3501763a6df8a2224c5802a9b2aa84f70b36ad24 Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 7 Sep 2021 20:32:04 +0200 Subject: [PATCH 03/24] . --- docker/free/Dockerfile | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 docker/free/Dockerfile diff --git a/docker/free/Dockerfile b/docker/free/Dockerfile deleted file mode 100644 index c289f9d..0000000 --- a/docker/free/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ - -FROM python:3.8 - -WORKDIR / - -COPY requirements.txt . - -RUN pip install -r requirements.txt - -ADD . . - -ENTRYPOINT [ "python", "facebook/__main__.py", "-p free" ] - -CMD ["",""] - -# BUILD: -# docker buildx create --use -#docker buildx build --file docker/free/Dockerfile --platform linux/arm -t l3224/fb-scraper:free --push . - From 9f21a1abaea0126537362d79d59d39e3997b5c70 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Wed, 8 Sep 2021 14:47:23 +0200 Subject: [PATCH 04/24] . --- .gitignore | 2 ++ docker/free/Dockerfile | 19 ----------- facebook/stats.py | 74 ------------------------------------------ requirements.txt | 31 +++++++----------- 4 files changed, 13 insertions(+), 113 deletions(-) delete mode 100644 docker/free/Dockerfile delete mode 100644 facebook/stats.py diff --git a/.gitignore b/.gitignore index a9f76ee..6fb1199 100644 --- a/.gitignore +++ b/.gitignore @@ -14,5 +14,7 @@ password_arango.txt facebook/mrkoll. *.pyc /facebook +/docker +!/docker/*.py !/facebook/*.py *.sqlite3 \ No newline at end of file diff --git a/docker/free/Dockerfile b/docker/free/Dockerfile deleted file mode 100644 index c289f9d..0000000 --- a/docker/free/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ - -FROM python:3.8 - -WORKDIR / - -COPY requirements.txt . - -RUN pip install -r requirements.txt - -ADD . . - -ENTRYPOINT [ "python", "facebook/__main__.py", "-p free" ] - -CMD ["",""] - -# BUILD: -# docker buildx create --use -#docker buildx build --file docker/free/Dockerfile --platform linux/arm -t l3224/fb-scraper:free --push . - diff --git a/facebook/stats.py b/facebook/stats.py deleted file mode 100644 index 60764f2..0000000 --- a/facebook/stats.py +++ /dev/null @@ -1,74 +0,0 @@ -from datetime import datetime -from getpass import getpass -from time import sleep - -from arango import ArangoClient -from json2html import json2html - - -def now(): - """ Returns current date and time as string""" - return datetime.now().strftime("%Y-%m-%d_%H:%M:%S") - -def write_stats(db, continuous=False): - while True: - d = {} - for col in db.collections(): - if not col['system']: - d[col['name']] = db.collection(col['name']).count() - del d['stats'] - #d['time'] = now() - cursor = db.aql.execute( - """ - FOR doc IN members - FILTER doc.checked == true - COLLECT WITH COUNT INTO length - RETURN length - """ - ) - d['checked_members'] = cursor.next() - - - # Hur många konton per säljare som finns kvar - cursor = db.aql.execute( - ''' - for doc in profiles - filter has(doc, "vendor") - COLLECT vendor = doc.vendor WITH COUNT INTO length - RETURN { - "vendor" : vendor, - "active" : length - } - ''') - d['active_vendors'] = [doc for doc in cursor] - - d['_key'] = now()[:13] - db.insert_document( "stats", d, overwrite=True) - - # Skriv en html-fil - with open('website/fb-webbapp/stats.html', 'a+') as html: - html.truncate(0) - html.write('
') - - html.write(json2html.convert(json = d)) - - # Sov för att fortsätta senare - if continuous: - sleep(86400) - else: - break - -# Info för arangodb -user_arango = "Stats" -db_arango = "facebook" -host_arango = "http://192.168.0.4:8529" - -# Starta koppling till arangodb -# Avkryptera lösen till arango -pwd = getpass(f'Arangolösenord för {user_arango}:').strip() - -db = ArangoClient(hosts=host_arango).db( - db_arango, username=user_arango, password=pwd -) - -write_stats(db, continuous=True) diff --git a/requirements.txt b/requirements.txt index 574069e..c610c7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,12 @@ -beautifulsoup4==4.9.3 -certifi==2020.12.5 -cffi==1.14.5 -chardet==4.0.0 -idna==2.10 -lxml -pycparser==2.20 -PyJWT==2.0.1 -#PyNaCl==1.4.0 -PySocks==1.7.1 -python-arango==7.1.0 -requests==2.25.1 +certifi==2021.5.30 +charset-normalizer==2.0.4 +idna==3.2 +packaging==21.0 +PyJWT==2.1.0 +pyparsing==2.4.7 +python-arango==7.2.0 +requests==2.26.0 requests-toolbelt==0.9.1 -robobrowser==0.5.3 -setuptools-scm==5.0.2 -six==1.15.0 -soupsieve==2.2 -toml==0.10.2 -urllib3==1.26.3 -Werkzeug==1.0.1 -json2html +setuptools-scm==6.3.1 +tomli==1.2.1 +urllib3==1.26.6 From 8bb207ebfcd25a783b8ca1b264910161fb1c012c Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Wed, 8 Sep 2021 14:48:30 +0200 Subject: [PATCH 05/24] . --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6fb1199..8324173 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ password_arango.txt facebook/mrkoll. *.pyc /facebook -/docker +/docker/* !/docker/*.py !/facebook/*.py *.sqlite3 \ No newline at end of file From 64eb32245b05138e39a397c21dff8ac7062d7819 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Wed, 8 Sep 2021 14:53:43 +0200 Subject: [PATCH 06/24] Made it cleaner --- .gitignore | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 8324173..e25c77b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,20 +1,27 @@ + +# Blandat /.DS_Store -/.venv +*.venv /.vscode /__pycache__ *.json *.pkl -facebook/test.py /data/* *.html *.code-workspace workspace.code-workspace password_arango.txt *.gexf -facebook/mrkoll. *.pyc +*.sqlite3 + +#facebook /facebook +!/facebook/*.py +facebook/test.py +facebook/mrkoll. + +# docker /docker/* !/docker/*.py -!/facebook/*.py -*.sqlite3 \ No newline at end of file + From 91046da67fd333c09668a3ebe6dd6f9421b703a7 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Wed, 8 Sep 2021 14:55:49 +0200 Subject: [PATCH 07/24] . --- .gitignore | 4 +-- docker/stats/Dockerfile | 13 +++++++ docker/stats/requirements.txt | 27 ++++++++++++++ docker/stats/stats.py | 66 +++++++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 docker/stats/Dockerfile create mode 100644 docker/stats/requirements.txt create mode 100644 docker/stats/stats.py diff --git a/.gitignore b/.gitignore index e25c77b..5169577 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,6 @@ facebook/test.py facebook/mrkoll. # docker -/docker/* -!/docker/*.py +/stats/* +!/stats/*.py diff --git a/docker/stats/Dockerfile b/docker/stats/Dockerfile new file mode 100644 index 0000000..8cd6c9a --- /dev/null +++ b/docker/stats/Dockerfile @@ -0,0 +1,13 @@ +FROM python:alpine + +WORKDIR / + +COPY requirements.txt . + +RUN pip install -r requirements.txt + +ADD docker/stats/. . + +ENTRYPOINT [ "python", "facebook/mrkoll.py" ] + +# docker buildx build --file docker/stats/Dockerfile --platform linux/arm64,linux/amd64 -t mrkoll . \ No newline at end of file diff --git a/docker/stats/requirements.txt b/docker/stats/requirements.txt new file mode 100644 index 0000000..2ba3112 --- /dev/null +++ b/docker/stats/requirements.txt @@ -0,0 +1,27 @@ +black==21.8b0 +certifi==2020.6.20 +chardet==4.0.0 +click==8.0.1 +httplib2==0.18.1 +idna==2.10 +mypy-extensions==0.4.3 +packaging==21.0 +pathspec==0.9.0 +platformdirs==2.3.0 +pycurl==7.43.0.6 +PyJWT==2.1.0 +pyparsing==2.4.7 +PySimpleSOAP==1.16.2 +python-apt==2.2.1 +python-arango==7.2.0 +python-debian==0.1.39 +python-debianbts==3.1.0 +regex==2021.8.28 +reportbug==7.10.3 +requests==2.25.1 +requests-toolbelt==0.9.1 +setuptools-scm==6.3.1 +six==1.16.0 +tomli==1.2.1 +typing-extensions==3.10.0.2 +urllib3==1.26.5 diff --git a/docker/stats/stats.py b/docker/stats/stats.py new file mode 100644 index 0000000..d4b8f92 --- /dev/null +++ b/docker/stats/stats.py @@ -0,0 +1,66 @@ +from datetime import datetime +from getpass import getpass +from time import sleep + +from arango import ArangoClient + + +def now(): + """Returns current date and time as string""" + return datetime.now().strftime("%Y-%m-%d_%H:%M:%S") + +def write_stats(db, continuous=False): + while True: + d = {} + for col in db.collections(): + if not col["system"]: + d[col["name"]] = db.collection(col["name"]).count() + del d["stats"] + # d['time'] = now() + cursor = db.aql.execute( + """ + FOR doc IN members + FILTER doc.checked == true + COLLECT WITH COUNT INTO length + RETURN length + """ + ) + d["checked_members"] = cursor.next() + + # Hur många konton per säljare som finns kvar + cursor = db.aql.execute( + """ + for doc in profiles_webshare + filter has(doc, "vendor") + COLLECT vendor = doc.vendor WITH COUNT INTO length + RETURN { + "vendor" : vendor, + "active" : length + } + """ + ) + d["active_vendors"] = [doc for doc in cursor] + + d["_key"] = now()[:13] + db.insert_document("stats", d, overwrite=True) + + # Sov för att fortsätta senare + if continuous: + print(now()) + sleep(86400) + else: + break + + +# Info för arangodb +user_arango = "Stats" +db_arango = "facebook" +host_arango = "http://192.168.1.10:8529" + +# Starta koppling till arangodb +# Avkryptera lösen till arango +pwd = getpass(f"Arangolösenord för {user_arango}:").strip() + +db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd) + +write_stats(db, continuous=True) From 6d24628bb967c6e0acba818f7214f857d99b903d Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Wed, 8 Sep 2021 15:15:36 +0200 Subject: [PATCH 08/24] Removed requirements and added apk add --- docker/stats/Dockerfile | 6 ++++-- docker/stats/requirements.txt | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docker/stats/Dockerfile b/docker/stats/Dockerfile index 8cd6c9a..f90f8ac 100644 --- a/docker/stats/Dockerfile +++ b/docker/stats/Dockerfile @@ -2,12 +2,14 @@ FROM python:alpine WORKDIR / +RUN apk add --update --no-cache g++ gcc libxslt-dev + COPY requirements.txt . RUN pip install -r requirements.txt -ADD docker/stats/. . +ADD . . -ENTRYPOINT [ "python", "facebook/mrkoll.py" ] +ENTRYPOINT [ "python", "stats.py" ] # docker buildx build --file docker/stats/Dockerfile --platform linux/arm64,linux/amd64 -t mrkoll . \ No newline at end of file diff --git a/docker/stats/requirements.txt b/docker/stats/requirements.txt index 2ba3112..e392166 100644 --- a/docker/stats/requirements.txt +++ b/docker/stats/requirements.txt @@ -8,16 +8,16 @@ mypy-extensions==0.4.3 packaging==21.0 pathspec==0.9.0 platformdirs==2.3.0 -pycurl==7.43.0.6 +#pycurl==7.43.0.6 PyJWT==2.1.0 pyparsing==2.4.7 PySimpleSOAP==1.16.2 -python-apt==2.2.1 +#python-apt==2.2.1 python-arango==7.2.0 python-debian==0.1.39 python-debianbts==3.1.0 regex==2021.8.28 -reportbug==7.10.3 +#reportbug==7.10.3 requests==2.25.1 requests-toolbelt==0.9.1 setuptools-scm==6.3.1 From df39ff447bd62323c9e1c89ec27afb728c539108 Mon Sep 17 00:00:00 2001 From: Lasse Date: Thu, 9 Sep 2021 13:08:01 +0200 Subject: [PATCH 09/24] Put ssh_tunnel in dbViaSSH --- facebook/accs_to_db.py | 41 ++++++++++++++--------------------------- facebook/config.py | 1 + 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/facebook/accs_to_db.py b/facebook/accs_to_db.py index 01887ea..db3d0f4 100644 --- a/facebook/accs_to_db.py +++ b/facebook/accs_to_db.py @@ -6,12 +6,10 @@ from time import sleep import base64 import json import requests -from sshtunnel import open_tunnel -import paramiko -from getpass import getpass -import arangodb + import config from helpers import now +import dbViaSSH # Gör fb-scraper till arbetsmapp chdir(dirname(dirname(abspath(__file__)))) @@ -268,26 +266,15 @@ if __name__ == "__main__": row = row.replace("https:", "https;") data.append(row.split(sep)) - # Lägg in i accs - # Öppna SSH-tunnel till RBP/db. - pwd_key = getpass("Password for rsa-key: ") - with open_tunnel( - ("studio-garda.asuscomm.com", 2210), - ssh_username="lasse", - ssh_pkey=paramiko.RSAKey.from_private_key_file( - "/Users/Lasse/.ssh/id_rsa", password=pwd_key - ), - ssh_private_key_password=pwd_key, - remote_bind_address=("127.0.0.1", 8529), - ) as server: - port_arango = server.local_bind_port - - db = arangodb.arango_connect( - "concert-hangar-mirth-salk-DECAL", - username="Accs", - host_arango="http://127.0.0.1", - port_arango=port_arango, - ) - #webshare_proxies() - - to_accs(db, data, info, profiles, vendor) + # # Lägg in i accs + + db = dbViaSSH.db_over_tunnel('Accs') + + #webshare_proxies() + + to_accs(db, data, info, profiles, vendor) + + dbViaSSH.stop_server() + + + diff --git a/facebook/config.py b/facebook/config.py index 91c4e90..713e569 100644 --- a/facebook/config.py +++ b/facebook/config.py @@ -11,6 +11,7 @@ user_arango = "Lasse" db_arango = "facebook" host_arango = 'http://192.168.1.10' port_arango = '8529' +host_adress = "studio-garda.asuscomm.com" #IP/adress till där db finns # Andra uppgifter url_bas = "https://mbasic.facebook.com" From 48a35f6e221948ec35be482d291d2ac633e7d182 Mon Sep 17 00:00:00 2001 From: Lasse Date: Thu, 9 Sep 2021 13:39:33 +0200 Subject: [PATCH 10/24] Implemented dbViaSSH (or started) --- facebook/gephi.py | 9 ++- facebook/search_leak.py | 139 ++++++++++++++++++---------------------- 2 files changed, 69 insertions(+), 79 deletions(-) diff --git a/facebook/gephi.py b/facebook/gephi.py index 05803f4..98584ad 100644 --- a/facebook/gephi.py +++ b/facebook/gephi.py @@ -10,8 +10,7 @@ from getpass import getpass import arangodb locale.setlocale(locale.LC_TIME, "en_US") - - +import dbViaSSH def nodes_from_list( @@ -244,6 +243,12 @@ def common_friends(d, n=2): pwd = getpass('Password for Lasse: ') db = arangodb.arango_connect(pwd) +db.collecion('members').random() +try: + db.collecion('members').random() + +except: + pass if __name__ == "__main__": diff --git a/facebook/search_leak.py b/facebook/search_leak.py index a433fa6..bc479e9 100644 --- a/facebook/search_leak.py +++ b/facebook/search_leak.py @@ -3,22 +3,21 @@ Skript för att söka i FB-läckan. """ import re -import paramiko -import arangodb -from getpass import getpass -from sshtunnel import open_tunnel from termcolor import cprint +import dbViaSSH + + def search(db, attribute, value): """ Search for attribute in db. Returns list of matching documents. """ - if '%' in value or '_' in value: - match = 'like' + if "%" in value or "_" in value: + match = "like" else: - match = '==' + match = "==" cursor = db.aql.execute( f""" @@ -30,74 +29,60 @@ def search(db, attribute, value): ) return [doc for doc in cursor] -pwd_key = getpass(f"Password key: ") - -with open_tunnel( - ("studio-garda.asuscomm.com", 2200), - ssh_username="Lasse", - ssh_pkey=paramiko.RSAKey.from_private_key_file( - "/Users/Lasse/.ssh/id_rsa", password=pwd_key - ), - ssh_private_key_password=pwd_key, - remote_bind_address=("127.0.0.1", 8529), -) as server: - # server.start() - port_arango = server.local_bind_port - - db = arangodb.arango_connect( - "gruel-ADOBE-foolish-winy-borax", - username="Leak", - host_arango="http://127.0.0.1", - port_arango=port_arango, - ) - - cprint("\n\nVad vill du söka efter?", attrs=['bold']) - print("1 - Telefonnummer") - print("2 - Facebook-ID") - print('3 - Namn') - print("4 - Arbete") - print('5 - Bostadsort') - print("6 - Födelseort") - print("7 - Epost") - - # Få input för attribut - attribute = input("\n>>> ") - attributes = { - "1": ("telefonnummer", "phone"), - "2": ("Facebook-ID", "_key"), - "3": ("namn", "full_name"), - "4": ("arbete", "work"), - "5": ('bostadsort', "lives_in"), - "6": ('födelseort', 'from'), - "7": ('epost', 'email') - } - - # Bestäm n- eller t-form och få input för värde. - if attribute in ['5', '6', '7']: - genus = 'n' - else: - genus = 't' - - cprint(f"\nVilke{genus} {attributes[attribute][0]}? ", attrs=['bold']) - cprint('Använd % för att ersätta flera okända tecken, _ för att ersätta ett.', attrs=['dark']) - value = input('\n>>> ') - - if attribute == '1': # telefonnummer - value = ''.join(re.findall(r'\d+', value)) - if value[0] == '0': - value = f'46{value[1:]}' - elif attribute == '3': # namn - value = value.upper() - - # Sök i databasen. - result = search(db, attributes[attribute][1], value) - - # Presentera reultaten #TODO hur vill man få dem? Spara ner? - for i in result: - print('\n', i['full_name']) - for key, value in i.items(): - print(f'{key}: {value}') - print(f'https://facebook.com/{i["_key"]}') - - print(f'\nAntal träffar: {len(result)}\n') +db = dbViaSSH.db_over_tunnel("Leak") + +cprint("\n\nVad vill du söka efter?", attrs=["bold"]) +print("1 - Telefonnummer") +print("2 - Facebook-ID") +print("3 - Namn") +print("4 - Arbete") +print("5 - Bostadsort") +print("6 - Födelseort") +print("7 - Epost") + +# Få input för attribut +attribute = input("\n>>> ") +attributes = { + "1": ("telefonnummer", "phone"), + "2": ("Facebook-ID", "_key"), + "3": ("namn", "full_name"), + "4": ("arbete", "work"), + "5": ("bostadsort", "lives_in"), + "6": ("födelseort", "from"), + "7": ("epost", "email"), +} + +# Bestäm n- eller t-form och få input för värde. +if attribute in ["5", "6", "7"]: + genus = "n" +else: + genus = "t" + +cprint(f"\nVilke{genus} {attributes[attribute][0]}? ", attrs=["bold"]) +cprint( + "Använd % för att ersätta flera okända tecken, _ för att ersätta ett.", + attrs=["dark"], +) +value = input("\n>>> ") + +if attribute == "1": # telefonnummer + value = "".join(re.findall(r"\d+", value)) + if value[0] == "0": + value = f"46{value[1:]}" +elif attribute == "3": # namn + value = value.upper() + +# Sök i databasen. +result = search(db, attributes[attribute][1], value) + +# Presentera reultaten #TODO hur vill man få dem? Spara ner? +for i in result: + print("\n", i["full_name"]) + for key, value in i.items(): + print(f"{key}: {value}") + print(f'https://facebook.com/{i["_key"]}') + +print(f"\nAntal träffar: {len(result)}\n") + +dbViaSSH.stop_server() From cb5821905a41c7fac36e3f63a85f234b8c06c55c Mon Sep 17 00:00:00 2001 From: Lasse Date: Sun, 12 Sep 2021 00:12:18 +0200 Subject: [PATCH 11/24] Set limit to max 50, better work around? --- facebook/scrapers.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/facebook/scrapers.py b/facebook/scrapers.py index 5260666..750fa1f 100644 --- a/facebook/scrapers.py +++ b/facebook/scrapers.py @@ -281,8 +281,13 @@ def check_picture(url_picture, user, profile): # Addera bilden till arrango picture.add_to_db() + if picture.no_reactions > 50: # TODO Går det här att komma runt? + no_reactions = 50 + else: + no_reactions = picture.no_reactions + url_limit = url_bas + url_limit.replace( - "limit=" + str(limit), "limit=" + str(picture.no_reactions) + "limit=" + str(limit), "limit=" + str(no_reactions) ) try: From 3e5b1d13084f5bafb7eafbec68994580e10c0df6 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Thu, 16 Sep 2021 14:29:20 +0200 Subject: [PATCH 12/24] Solved the limit 50 issue --- facebook/scrapers.py | 197 ++++++++++++++++++++++++++----------------- 1 file changed, 119 insertions(+), 78 deletions(-) diff --git a/facebook/scrapers.py b/facebook/scrapers.py index 5260666..a588b5d 100644 --- a/facebook/scrapers.py +++ b/facebook/scrapers.py @@ -158,7 +158,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): if mode == "single" and user.reactions > 30: break elif all([any([mode == "few", mode == "solo"]), user.reactions > 80, pic != url_pics[-1]]): - # Kolla den sista bilder + # Kolla den sista bilden check_picture(url_bas + url_pics[-1], user, profile) user.checked_pictures.append(url_bas + pic) break @@ -171,7 +171,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): def check_picture(url_picture, user, profile): - """ Hämtar reaktioner för en bildprint """ + """ Hämtar reaktioner för en bild. """ picture = Picture(user.username) picture.url = url_picture @@ -180,8 +180,7 @@ def check_picture(url_picture, user, profile): picture.id = str(re.search("\d+", picture.id).group()) except: pass - # if picture.id in all_pictures: - # continue + sleep_(5) try: @@ -249,8 +248,9 @@ def check_picture(url_picture, user, profile): for div in profile.viewing().find_all("div", href=True): if "like this" in div.text: url_reactions = url_bas + str(div["href"]) - + # Hämta reaktioner för bilden + sleep_(3) profile.open(url_reactions) @@ -281,82 +281,123 @@ def check_picture(url_picture, user, profile): # Addera bilden till arrango picture.add_to_db() + # Begränsa limit till 50 då Facebook inte ger fler (för första "klick"). + try: + if int(picture.no_reactions) > 50: + no_reactions = 50 + else: + no_reactions = int(picture.no_reactions) - 1 + except TypeError: + no_reactions = picture.no_reactions + url_limit = url_bas + url_limit.replace( - "limit=" + str(limit), "limit=" + str(picture.no_reactions) + "limit=" + str(limit), "limit=" + str(no_reactions) + ) + + list_ids = [] + while True: + try: + sleep_(4) + profile.open(url_limit) + url_limit = "" + update_cookie(profile.browser.session.cookies, profile) + + # Hämta länk för "See more" för att se vilka ID:s som visas + url_see_more = None + for li in profile.viewing().find_all("li"): + if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''): # Om det finns fler reaktioner att hämta + url_see_more = li.find('a')['href'] + ids_url = url_see_more[url_see_more.find('ids=')+4:url_see_more.find('&total')] + list_ids_picture = ids_url.split('%2C') + list_ids_picture = list_ids_picture[len(list_ids):] # Profilerna på den här sidan + list_ids.extend(list_ids_picture) # Alla profiler hittills + url_limit = url_bas + url_see_more.replace('limit=10', 'limit=50') # Länken till fler profiler + # Gå igenom alla som reagerat och för in i arango + + get_reactions(profile, user, picture, list_ids) + + if url_see_more == None: # När det inte finns fler reaktioner + break + + except Exception as e: # Fel2 + write_error( + 2, + profile, + e=e, + soup=profile.viewing(), + user=user, + url=url_limit, + url_name="url_limit", + traceback=traceback.format_exc(), + ) + pass + + # Lägg till reaktioner till databasen + db.collection("picture_reactions").insert_many( + picture.reactions, silent=True, overwrite=True + ) + db.collection("picture_reactions").insert_many( + picture.reactions, silent=True, overwrite=True ) - try: - sleep_(4) - profile.open(url_limit) - url_limit = "" - update_cookie(profile.browser.session.cookies, profile) - - # Gå igenom alla som reagerat och för in i arango - for li in profile.viewing().find_all("li"): - friend = Friend(user.username) - if "see more" in li.text.lower(): + # Uppdatera antalet reaktioner användaren fått + user.reactions += len(picture.reactions) + +def get_reactions(profile, user, picture, list_ids_picture): + """ Gather the reactions on the picture. + + Args: + profile (class): The active profile. + user (class): The user being scraped. + picture (class): The picture. + list_ids_picture (list): List of ID:s fetched from "See more"-url + """ + + # Gå igenom alla som reagerat och för in i arango + for li in profile.viewing().find_all("li"): + friend = Friend(user.username) + if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''): continue - try: - friend_html = li.find("h3").find("a") - friend.name = friend_html.text - friend.url = friend_html["href"] - if "profile.php" in friend.url: - if "&paipv" in friend.url: - friend.username = friend.url[ - friend.url.find("=") + 1 : friend.url.find("&") - ] - else: - friend.username = friend.url[friend.url.find("id=") + 3 :] + try: + friend_html = li.find("h3").find("a") + friend.name = friend_html.text + friend.url = friend_html["href"] + friend.id = list_ids_picture.pop(0) + if "profile.php" in friend.url: + if "&paipv" in friend.url: + friend.username = friend.url[ + friend.url.find("=") + 1 : friend.url.find("&") + ] else: - if "?" in friend.url: - friend.username = friend.url[ - friend.url.find("/") + 1 : friend.url.find("?") - ] - else: - friend.username = friend.url[friend.url.find("/") + 1 :] - - reaction = Reaction(user.username, friend.username, picture.id) - for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: - if type in str(li): - reaction.type = type - picture.reactions.append(reaction.get_dict()) - # Lägg till vännens profil till arrango - try: - friend.add_to_db() - except: - _print(profile, user, f"Kunde inte lägga till vän {friend.url}") - - except AttributeError as e: # Fel1 - write_error( - 1, - profile, - e=e, - soup=str(li), - user=user, - traceback=traceback.format_exc(), - ) - pass - - # Lägg till reaktioner till databasen - db.collection("picture_reactions").insert_many( - picture.reactions, silent=True, overwrite=True - ) - db.collection("picture_reactions").insert_many( - picture.reactions, silent=True, overwrite=True - ) - - # Uppdatera antalet reaktioner användaren fått - user.reactions += len(picture.reactions) - except Exception as e: # Fel2 - write_error( - 2, - profile, - e=e, - soup=profile.viewing(), - user=user, - url=url_limit, - url_name="url_limit", - traceback=traceback.format_exc(), - ) - pass + friend.username = friend.url[friend.url.find("id=") + 3 :] + else: + if "?" in friend.url: + friend.username = friend.url[ + friend.url.find("/") + 1 : friend.url.find("?") + ] + else: + friend.username = friend.url[friend.url.find("/") + 1 :] + + reaction = Reaction(user.username, friend.username, picture.id) + + for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: + if type in str(li): + reaction.type = type + + picture.reactions.append(reaction.get_dict()) + # Lägg till vännens profil till arrango + try: + friend.add_to_db() + except: + _print(profile, user, f"Kunde inte lägga till vän {friend.url}") + except AttributeError as e: # Fel1 + write_error( + 1, + profile, + e=e, + soup=str(li), + user=user, + traceback=traceback.format_exc(), + ) + pass From fcfb2bb8465104850ab24609fd4472e17b8f4f86 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Thu, 16 Sep 2021 14:29:39 +0200 Subject: [PATCH 13/24] Added this to repo --- docker/profile_pictures/images.py | 70 +++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 docker/profile_pictures/images.py diff --git a/docker/profile_pictures/images.py b/docker/profile_pictures/images.py new file mode 100644 index 0000000..352f27b --- /dev/null +++ b/docker/profile_pictures/images.py @@ -0,0 +1,70 @@ +import requests +import os +from datetime import date, datetime, timedelta +from time import sleep + +from arangodb import db + + +def download_image(url, user, id): + + # Kolla så användarmappen finns + if not os.path.isdir(f'../profile_pictures/{user}'): + os.mkdir(f'../profile_pictures/{user}') + + # Ladda ner bilden + r = requests.get(url) + if r.text == 'URL signature expired': + print('För gammal länk.') + exit() + elif r.status_code == 403: + exit() + img_data = r.content + with open(f'../profile_pictures/{user}/{id}.jpg', 'wb') as handler: + handler.write(img_data) + + +def get_pictures(day): + cursor = db.aql.execute( + """ + for doc in members + filter doc.fetched == @date + filter has(doc, "checked_pictures") + filter not has(doc, "pictures_downloaded") + return {'member': doc._key, 'pictures':doc.checked_pictures} + """, + bind_vars={'date': day} + ) + + for doc in cursor: + pictures = [] + for picture in doc['pictures']: + pictures.append(picture[picture.find('fbid=')+5:]) + + + cursor = db.aql.execute( + """ + for doc in pictures + filter doc._key in @list + limit 10 + return {'_key': doc._key, 'user':doc.user, 'url': doc.src} + """, + bind_vars={"list": pictures}, + ) + + for picture in cursor: + download_image(picture['url'], picture['user'], picture['_key']) + print(picture['_key']) + sleep(2) + + db.update_document({'_id': 'members/' + str(doc['member']), 'pictures_downloaded': True}, silent=True, check_rev=False) + +def old_pics(): + if not os.path.isdir(f'../profile_pictures'): + os.mkdir(f'../profile_pictures') + start = date.today() + for i in range(1,60): + d = start - timedelta(days=i) + get_pictures(d.strftime('%Y%m%d')) + + From 317f19f81489f5f260a95b188a14bca82b794ddc Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Thu, 16 Sep 2021 14:30:10 +0200 Subject: [PATCH 14/24] To work with arm/arm64/amd/64 --- requirements.txt | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index c610c7a..327ba58 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,20 @@ -certifi==2021.5.30 -charset-normalizer==2.0.4 -idna==3.2 -packaging==21.0 -PyJWT==2.1.0 -pyparsing==2.4.7 -python-arango==7.2.0 -requests==2.26.0 +beautifulsoup4==4.9.3 +certifi==2020.12.5 +cffi==1.14.5 +chardet==4.0.0 +idna==2.10 +lxml +pycparser==2.20 +PyJWT==2.0.1 +PySocks==1.7.1 +python-arango==7.1.0 +requests==2.25.1 requests-toolbelt==0.9.1 -setuptools-scm==6.3.1 -tomli==1.2.1 -urllib3==1.26.6 +robobrowser==0.5.3 +setuptools-scm==5.0.2 +six==1.15.0 +soupsieve==2.2 +toml==0.10.2 +urllib3==1.26.3 +Werkzeug==1.0.1 +json2html \ No newline at end of file From cde54f570a1297e383a487f72c06d9f6b7043f7d Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Thu, 16 Sep 2021 14:30:35 +0200 Subject: [PATCH 15/24] Made the build command clearer --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c69a275..74dbd33 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,5 +17,5 @@ CMD ["",""] # BUILD: # docker buildx create --use -#docker buildx build --platform linux/arm64,linux/arm64,linux/amd64 -t l3224/fb-scraper:pi --push . +#docker buildx build --platform linux/arm,linux/arm64,linux/amd64 -t l3224/fb-scraper:VERSION --push . From 153299a646cd1da1e8d88ee2822782811ad62fb5 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Thu, 16 Sep 2021 14:31:12 +0200 Subject: [PATCH 16/24] Added id_from_seemore_url to Friend class --- facebook/classes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/facebook/classes.py b/facebook/classes.py index 4b64e5a..12f23ef 100644 --- a/facebook/classes.py +++ b/facebook/classes.py @@ -270,6 +270,7 @@ class Friend: self.username = "" self.url = "" self.name = "" + self.id = "" def add_to_db(self): db.insert_document( @@ -278,6 +279,7 @@ class Friend: "_key": str(self.username), "url": url_bas + self.url, "name": self.name, + 'id_from_seemore_url': self.id }, overwrite_mode="update", silent=True, From d08cf196b1c9a3a62e64afd3021f9325881a5b49 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Thu, 16 Sep 2021 14:31:35 +0200 Subject: [PATCH 17/24] Changes in imports --- facebook/accs_to_profiles.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/facebook/accs_to_profiles.py b/facebook/accs_to_profiles.py index 3ffbf09..1f50c0b 100644 --- a/facebook/accs_to_profiles.py +++ b/facebook/accs_to_profiles.py @@ -5,16 +5,14 @@ from getpass import getpass from os.path import abspath, dirname from random import randint from time import sleep -import base64 -import json -import requests + # Gör fb-scraper till arbetsmapp chdir(dirname(dirname(abspath(__file__)))) from arangodb import arango_connect import config -from helpers import now, nowstamp +from helpers import nowstamp def used_servers(profiles='profiles'): cursor = db.aql.execute( From da94a2828896406e23757c866f7042798b673615 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Thu, 16 Sep 2021 14:32:05 +0200 Subject: [PATCH 18/24] . --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5169577..8d545d4 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ facebook/mrkoll. /stats/* !/stats/*.py +requirements2.txt From ef43210a7e7f58195394d4fa77e968b197a112c6 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Tue, 21 Sep 2021 22:16:09 +0200 Subject: [PATCH 19/24] Reser user if profile is blocked --- facebook/__main__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/facebook/__main__.py b/facebook/__main__.py index 6904fdd..b7a24da 100644 --- a/facebook/__main__.py +++ b/facebook/__main__.py @@ -160,6 +160,7 @@ if __name__ == "__main__": ) if profile.blocked: profile = blocked_profile(profile, proxieservers=proxieservers) + user = User(str(userdoc['_key']).strip(), mode, other_pictures=url_other_pictures) else: break except: @@ -175,7 +176,7 @@ if __name__ == "__main__": friends_unchecked = [] for friend in friends: - if not check_for_user(friend): + if not check_for_user(friend) and friend not in friends_unchecked: print(friend) friends_unchecked.append(friend) From 472440e2488736537282366676ef9e6fd11c9b20 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Tue, 21 Sep 2021 22:16:42 +0200 Subject: [PATCH 20/24] Print exception if report_blocked fail --- facebook/arangodb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/facebook/arangodb.py b/facebook/arangodb.py index fce5886..5e24531 100644 --- a/facebook/arangodb.py +++ b/facebook/arangodb.py @@ -82,7 +82,8 @@ def report_blocked(profile): }, overwrite=True, ) - except: + except Exception as e: + print(e) _print(profile, profile.container, f'Kunde inte rapportera blockerad: {profile.name}.') From 1e8c3b76880ce04aa30c5550bff1683eca052b1a Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Tue, 21 Sep 2021 22:17:43 +0200 Subject: [PATCH 21/24] Added self.url_album for User --- facebook/classes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/facebook/classes.py b/facebook/classes.py index 12f23ef..4c03070 100644 --- a/facebook/classes.py +++ b/facebook/classes.py @@ -25,6 +25,7 @@ class User: self.url_likes = "" self.url_about = "" self.url_timeline = "" + self.url_album = "" self.url_profilepictures = "" self.profile_pictures = 0 self.pictures = [] From b80f39bd907c58fd4f80c446fecb004c2a1bc707 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Tue, 21 Sep 2021 22:17:58 +0200 Subject: [PATCH 22/24] Trying to solf the limit problem --- facebook/scrapers.py | 104 +++++++++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 39 deletions(-) diff --git a/facebook/scrapers.py b/facebook/scrapers.py index a588b5d..d76e013 100644 --- a/facebook/scrapers.py +++ b/facebook/scrapers.py @@ -82,7 +82,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): # Om det inte finns något profilalbum # Testa ta bort mellanrum och små bokstäver - if not hasattr(user, "url_album"): + if user.url_album == "": for a in profile.viewing().find_all("a", href=True): if "profilepictures" in a.text.lower().replace(" ", ""): user.url_album = url_bas + a["href"] @@ -91,7 +91,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): # Gå till profilbilden (den första som kommer upp när man går till profilen) # Om profilen inte har profilalbum - if not hasattr(user, "url_album"): + if user.url_album == "": write_error(9, profile, soup=profile.viewing(), user=user) if user.url_other_pictures != []: # Använd eventuella extrabilder och ta bort den från användaren @@ -264,7 +264,7 @@ def check_picture(url_picture, user, profile): picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1) limit = re.search(r"limit=(\d+)", url_limit).group( 1 - ) # TODO Fortfarande problem med det här + ) except UnboundLocalError: # fel9 write_error( 9, @@ -285,52 +285,75 @@ def check_picture(url_picture, user, profile): try: if int(picture.no_reactions) > 50: no_reactions = 50 + elif int(picture.no_reactions) == 0: + no_reactions = 0 else: no_reactions = int(picture.no_reactions) - 1 except TypeError: + #print(picture.no_reactions, type(picture.no_reactions)) no_reactions = picture.no_reactions + #print('\nANTAL REAKTIONER TOTALT PÅ BILDEN:', picture.no_reactions) + url_limit = url_bas + url_limit.replace( "limit=" + str(limit), "limit=" + str(no_reactions) ) list_ids = [] + while True: - try: - sleep_(4) - profile.open(url_limit) - url_limit = "" - update_cookie(profile.browser.session.cookies, profile) - - # Hämta länk för "See more" för att se vilka ID:s som visas - url_see_more = None - for li in profile.viewing().find_all("li"): - if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''): # Om det finns fler reaktioner att hämta - url_see_more = li.find('a')['href'] - ids_url = url_see_more[url_see_more.find('ids=')+4:url_see_more.find('&total')] - list_ids_picture = ids_url.split('%2C') - list_ids_picture = list_ids_picture[len(list_ids):] # Profilerna på den här sidan - list_ids.extend(list_ids_picture) # Alla profiler hittills - url_limit = url_bas + url_see_more.replace('limit=10', 'limit=50') # Länken till fler profiler - # Gå igenom alla som reagerat och för in i arango - - get_reactions(profile, user, picture, list_ids) - - if url_see_more == None: # När det inte finns fler reaktioner - break + #try: + sleep_(4) + #print('\nurl_limit'.upper(), url_limit, '\n') + profile.open(url_limit) + #url_limit = "" # Vad gjorde den här? + + update_cookie(profile.browser.session.cookies, profile) + + # Hämta länk för "See more" för att se vilka ID:s som visas + url_see_more = None + #print('\nVARJE LÄNK PÅ SIDAN') + for a in profile.viewing().find_all("a"): + #print(a) + if "See More" in a.text: # Om det finns fler reaktioner att hämta + #print('\nHITTADE "SEE MORE"\n') + + url_see_more = a['href'] + ids_url = url_see_more[url_see_more.find('ids=')+4:url_see_more.find('&total')] + list_ids_from_url = ids_url.split('%2C') # Alla IDs hittills + + #print('\nlist_pictures_from_url\n'.upper(), list_ids_from_url) # Lista från länk med profiler kollade hittills(?) + + list_ids_page = list_ids_from_url[len(list_ids):] # Profilerna på den här sidan + #print('\nlist_ids_picture\n'.upper(), list_ids_page) + + list_ids.extend(list_ids_page) #Lägg nästa sidas IDs till listan på alla IDs hittills + + # Sätt rätt limit för nästa sida + limit_next_page = int(picture.no_reactions) - len(list_ids_from_url) + if limit_next_page > 50: + limit_next_page = 50 + url_limit = url_bas + url_see_more.replace('limit=10', f'limit={limit_next_page}') # Länken till fler profiler + #print('\nurl_limit', url_limit, '\n') + + # Gå igenom alla som reagerat och för in i arango + get_reactions(profile, user, picture, list_ids_page) + + if url_see_more == None: # När det inte finns fler reaktioner + break - except Exception as e: # Fel2 - write_error( - 2, - profile, - e=e, - soup=profile.viewing(), - user=user, - url=url_limit, - url_name="url_limit", - traceback=traceback.format_exc(), - ) - pass + # except Exception as e: # Fel2 + # write_error( + # 2, + # profile, + # e=e, + # soup=profile.viewing(), + # user=user, + # url=url_limit, + # url_name="url_limit", + # traceback=traceback.format_exc(), + # ) + # pass # Lägg till reaktioner till databasen db.collection("picture_reactions").insert_many( @@ -343,7 +366,8 @@ def check_picture(url_picture, user, profile): # Uppdatera antalet reaktioner användaren fått user.reactions += len(picture.reactions) -def get_reactions(profile, user, picture, list_ids_picture): + +def get_reactions(profile, user, picture, list_ids_page): """ Gather the reactions on the picture. Args: @@ -354,6 +378,8 @@ def get_reactions(profile, user, picture, list_ids_picture): """ # Gå igenom alla som reagerat och för in i arango + #print('list_ids_picture: ', list_ids_page) + list_ids = list_ids_page.copy() for li in profile.viewing().find_all("li"): friend = Friend(user.username) if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''): @@ -362,7 +388,7 @@ def get_reactions(profile, user, picture, list_ids_picture): friend_html = li.find("h3").find("a") friend.name = friend_html.text friend.url = friend_html["href"] - friend.id = list_ids_picture.pop(0) + friend.id = list_ids.pop(0) if "profile.php" in friend.url: if "&paipv" in friend.url: friend.username = friend.url[ From dca839a7431afd43a89eb5ea5db85ddf5ea11b32 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Wed, 22 Sep 2021 15:10:44 +0200 Subject: [PATCH 23/24] Added from RBP3 --- docker/mrkoll/Dockerfile | 13 +- docker/mrkoll/mrkoll_scraperapi.py | 194 +++++++++++++++++++++++++++++ docker/mrkoll/requirements.txt | 14 +++ 3 files changed, 215 insertions(+), 6 deletions(-) create mode 100644 docker/mrkoll/mrkoll_scraperapi.py create mode 100644 docker/mrkoll/requirements.txt diff --git a/docker/mrkoll/Dockerfile b/docker/mrkoll/Dockerfile index d61b23a..27234c4 100644 --- a/docker/mrkoll/Dockerfile +++ b/docker/mrkoll/Dockerfile @@ -1,14 +1,15 @@ +# syntax=docker/dockerfile:1 -FROM python:3.8 +FROM python:3.8-slim-buster -WORKDIR / +COPY requirements.txt requirements.txt -COPY requirements.txt . +RUN pip3 install -r requirements.txt -RUN pip install -r requirements.txt +COPY . . -ADD . . +ENTRYPOINT [ "python3", "mrkoll_scraperapi.py" ] -ENTRYPOINT [ "python", "facebook/mrkoll.py" ] +CMD [""] # docker buildx build --file docker/mrkoll/Dockerfile --platform linux/arm -t l3224/fb-scraper:mrkoll --push . \ No newline at end of file diff --git a/docker/mrkoll/mrkoll_scraperapi.py b/docker/mrkoll/mrkoll_scraperapi.py new file mode 100644 index 0000000..c73867a --- /dev/null +++ b/docker/mrkoll/mrkoll_scraperapi.py @@ -0,0 +1,194 @@ +import re +import subprocess +import requests +from sys import argv +from time import sleep +from bs4 import BeautifulSoup +from arango import ArangoClient + + + + +def find_person(number): + """ + Söker personuppgifter utifrån telefonnummer. + """ + + sleep(2) + + url = f'https://mrkoll.se/resultat?n={number}' + + api_key = 'fcfe011cf66fddb61bb6425fcb5cb5e9' + payload = {'api_key': api_key, 'url': url, 'country_code': 'se', 'device_type':'desktop'} + + response = requests.get('http://api.scraperapi.com', params=payload) + r = response.text + # Hämta sidan + + soup = BeautifulSoup(r, 'html.parser') + + if ( + "Du har gjort för många anrop" in soup.text + or response.url == "https://mrkoll.se/om/limit/" + ): + sleep(10) + return None + + # Lägg in data i dictionary + d = {} + + d["url_via_telefonnummer"] = response.url + try: + for a in soup.find_all("a", href=True): + if "boende-med-" in a["href"]: + d["lives_with_url"] = a["href"] + if "-hushall" in a["href"]: + d["lives_with"] = a.text + except: + pass + + if "Sökningen gav 0 träffar..." in soup.text: + return {} + + info = soup.find("div", {"class": "block_col1"}) + + try: + d["first_name"] = info.find( + "span", {"title": "Detta är personens tilltalsnamn"} + ).text + except: + pass + try: + d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text + except: + pass + try: + d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text + except: + pass + try: + adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) + d["adress_line1"] = adress[0].text + if len(adress) > 1: + d["adress_line2"] = adress[1].text + except: + pass + + try: + d["history"] = info.find("div", {"class": "history_container"}).text + except: + pass + + # Personnummer + ## Födelsedatum + for i in soup.find_all("div", {"class": "col_block1"}): + if "Personnummer" in i.text: + d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( + "-XXXX", "" + ) + ## Fyra sista + try: + start = "showPersnr" + end = ">Jag godkänner" + t = str(soup) + v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") + url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] + sleep(2) # Vänta lite + four_last = requests.get("http://mrkoll.se" + url_ajax).text + d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) + except: + pass + + try: + neighbours = {} + for div in soup.find_all("div", {"class": "peoplecont"}): + persons = div.find_all("a", href=True) + for person in persons: + neighbours[person.find("strong").text] = { + "link": person["href"], + "lived_years": re.search( + "\d+", person.find("span", {"class": "flyttclass"}).text + ).group()[0], + } + d["neighbours"] = neighbours + except: + pass + + try: + d["name_change"] = [ + div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) + ] + except: + pass + + try: + prosecuted = {} + prosecuted["brottsmål"] = ( + True if soup.find("div", {"class": "resmark res_b"}) != None else False + ) + prosecuted["tvistemål"] = ( + True if soup.find("div", {"class": "resmark res_t"}) != None else False + ) + prosecuted["straffföreläggande"] = ( + True if soup.find("div", {"class": "resmark res_s"}) != None else False + ) + d["prosecuted"] = prosecuted + except: + pass + + return d + + +if __name__ == "__main__": + + ip = 'scraperapi' + + if requests.get('https://icanhazip.com').text.strip() == '98.128.172.12': + subprocess.call(['wg-quick', 'up', 'mullvad-se4']) + + exit() + # Info för arangodb + user_arango = "Phone" + db_arango = "facebook" + host_arango = "http://192.168.1.10:8529" + + # Starta koppling till arangodb + + db = ArangoClient(hosts=host_arango).db( + db_arango, username=user_arango, password=argv[1] + ) + leak = db.collection("phoneleak") + + count = 0 + scraper_count = 0 + + global errors + errors = 0 + + while True: + count += 1 + + # Hämta en random person + doc = leak.random() + + # Gör sökningen på mrkoll.se + d = find_person(doc["phone"]) + + try: + name = d["first_name"] + ' ' + except: + name = ' ' + print(f'{count} - {errors} {name}', end="\r") + + if d == None: # Om ip-adressen är blockad eller något hänt + continue + + d["_key"] = doc["_key"] + d["_id"] = "phone/" + str(d["_key"]) + d["phone"] = doc["phone"] + d["checked_from_ip"] = f'{ip} - cache' + try: + db.collection("phone").insert(d) + leak.delete(doc["_key"]) + except: + pass diff --git a/docker/mrkoll/requirements.txt b/docker/mrkoll/requirements.txt new file mode 100644 index 0000000..6a7859b --- /dev/null +++ b/docker/mrkoll/requirements.txt @@ -0,0 +1,14 @@ +beautifulsoup4==4.9.3 +bs4==0.0.1 +certifi==2021.5.30 +charset-normalizer==2.0.4 +idna==3.2 +PyJWT==2.1.0 +python-arango==7.2.0 +requests==2.26.0 +requests-toolbelt==0.9.1 +setuptools-scm==6.0.1 +soupsieve==2.2.1 +toml==0.10.2 +urllib3==1.26.6 +requests_cache==0.7.4 From 46ed5e38b1b0a6fa451390e281415a4fef7a8f1e Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Wed, 22 Sep 2021 15:32:42 +0200 Subject: [PATCH 24/24] Made exit of Mullvad not connected --- docker/mrkoll/mrkoll_scraperapi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/mrkoll/mrkoll_scraperapi.py b/docker/mrkoll/mrkoll_scraperapi.py index c73867a..3d9b5ab 100644 --- a/docker/mrkoll/mrkoll_scraperapi.py +++ b/docker/mrkoll/mrkoll_scraperapi.py @@ -144,9 +144,9 @@ if __name__ == "__main__": ip = 'scraperapi' if requests.get('https://icanhazip.com').text.strip() == '98.128.172.12': - subprocess.call(['wg-quick', 'up', 'mullvad-se4']) + print('\nMULLVAD INTE AKTIV\n') + exit() - exit() # Info för arangodb user_arango = "Phone" db_arango = "facebook"