parent
b80f39bd90
commit
dca839a743
3 changed files with 215 additions and 6 deletions
@ -1,14 +1,15 @@ |
||||
# syntax=docker/dockerfile:1 |
||||
|
||||
FROM python:3.8 |
||||
FROM python:3.8-slim-buster |
||||
|
||||
WORKDIR / |
||||
COPY requirements.txt requirements.txt |
||||
|
||||
COPY requirements.txt . |
||||
RUN pip3 install -r requirements.txt |
||||
|
||||
RUN pip install -r requirements.txt |
||||
COPY . . |
||||
|
||||
ADD . . |
||||
ENTRYPOINT [ "python3", "mrkoll_scraperapi.py" ] |
||||
|
||||
ENTRYPOINT [ "python", "facebook/mrkoll.py" ] |
||||
CMD [""] |
||||
|
||||
# docker buildx build --file docker/mrkoll/Dockerfile --platform linux/arm -t l3224/fb-scraper:mrkoll --push . |
||||
@ -0,0 +1,194 @@ |
||||
import re |
||||
import subprocess |
||||
import requests |
||||
from sys import argv |
||||
from time import sleep |
||||
from bs4 import BeautifulSoup |
||||
from arango import ArangoClient |
||||
|
||||
|
||||
|
||||
|
||||
def find_person(number): |
||||
""" |
||||
Söker personuppgifter utifrån telefonnummer. |
||||
""" |
||||
|
||||
sleep(2) |
||||
|
||||
url = f'https://mrkoll.se/resultat?n={number}' |
||||
|
||||
api_key = 'fcfe011cf66fddb61bb6425fcb5cb5e9' |
||||
payload = {'api_key': api_key, 'url': url, 'country_code': 'se', 'device_type':'desktop'} |
||||
|
||||
response = requests.get('http://api.scraperapi.com', params=payload) |
||||
r = response.text |
||||
# Hämta sidan |
||||
|
||||
soup = BeautifulSoup(r, 'html.parser') |
||||
|
||||
if ( |
||||
"Du har gjort för många anrop" in soup.text |
||||
or response.url == "https://mrkoll.se/om/limit/" |
||||
): |
||||
sleep(10) |
||||
return None |
||||
|
||||
# Lägg in data i dictionary |
||||
d = {} |
||||
|
||||
d["url_via_telefonnummer"] = response.url |
||||
try: |
||||
for a in soup.find_all("a", href=True): |
||||
if "boende-med-" in a["href"]: |
||||
d["lives_with_url"] = a["href"] |
||||
if "-hushall" in a["href"]: |
||||
d["lives_with"] = a.text |
||||
except: |
||||
pass |
||||
|
||||
if "Sökningen gav 0 träffar..." in soup.text: |
||||
return {} |
||||
|
||||
info = soup.find("div", {"class": "block_col1"}) |
||||
|
||||
try: |
||||
d["first_name"] = info.find( |
||||
"span", {"title": "Detta är personens tilltalsnamn"} |
||||
).text |
||||
except: |
||||
pass |
||||
try: |
||||
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text |
||||
except: |
||||
pass |
||||
try: |
||||
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text |
||||
except: |
||||
pass |
||||
try: |
||||
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) |
||||
d["adress_line1"] = adress[0].text |
||||
if len(adress) > 1: |
||||
d["adress_line2"] = adress[1].text |
||||
except: |
||||
pass |
||||
|
||||
try: |
||||
d["history"] = info.find("div", {"class": "history_container"}).text |
||||
except: |
||||
pass |
||||
|
||||
# Personnummer |
||||
## Födelsedatum |
||||
for i in soup.find_all("div", {"class": "col_block1"}): |
||||
if "Personnummer" in i.text: |
||||
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( |
||||
"-XXXX", "" |
||||
) |
||||
## Fyra sista |
||||
try: |
||||
start = "showPersnr" |
||||
end = ">Jag godkänner</span>" |
||||
t = str(soup) |
||||
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") |
||||
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] |
||||
sleep(2) # Vänta lite |
||||
four_last = requests.get("http://mrkoll.se" + url_ajax).text |
||||
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) |
||||
except: |
||||
pass |
||||
|
||||
try: |
||||
neighbours = {} |
||||
for div in soup.find_all("div", {"class": "peoplecont"}): |
||||
persons = div.find_all("a", href=True) |
||||
for person in persons: |
||||
neighbours[person.find("strong").text] = { |
||||
"link": person["href"], |
||||
"lived_years": re.search( |
||||
"\d+", person.find("span", {"class": "flyttclass"}).text |
||||
).group()[0], |
||||
} |
||||
d["neighbours"] = neighbours |
||||
except: |
||||
pass |
||||
|
||||
try: |
||||
d["name_change"] = [ |
||||
div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) |
||||
] |
||||
except: |
||||
pass |
||||
|
||||
try: |
||||
prosecuted = {} |
||||
prosecuted["brottsmål"] = ( |
||||
True if soup.find("div", {"class": "resmark res_b"}) != None else False |
||||
) |
||||
prosecuted["tvistemål"] = ( |
||||
True if soup.find("div", {"class": "resmark res_t"}) != None else False |
||||
) |
||||
prosecuted["straffföreläggande"] = ( |
||||
True if soup.find("div", {"class": "resmark res_s"}) != None else False |
||||
) |
||||
d["prosecuted"] = prosecuted |
||||
except: |
||||
pass |
||||
|
||||
return d |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
|
||||
ip = 'scraperapi' |
||||
|
||||
if requests.get('https://icanhazip.com').text.strip() == '98.128.172.12': |
||||
subprocess.call(['wg-quick', 'up', 'mullvad-se4']) |
||||
|
||||
exit() |
||||
# Info för arangodb |
||||
user_arango = "Phone" |
||||
db_arango = "facebook" |
||||
host_arango = "http://192.168.1.10:8529" |
||||
|
||||
# Starta koppling till arangodb |
||||
|
||||
db = ArangoClient(hosts=host_arango).db( |
||||
db_arango, username=user_arango, password=argv[1] |
||||
) |
||||
leak = db.collection("phoneleak") |
||||
|
||||
count = 0 |
||||
scraper_count = 0 |
||||
|
||||
global errors |
||||
errors = 0 |
||||
|
||||
while True: |
||||
count += 1 |
||||
|
||||
# Hämta en random person |
||||
doc = leak.random() |
||||
|
||||
# Gör sökningen på mrkoll.se |
||||
d = find_person(doc["phone"]) |
||||
|
||||
try: |
||||
name = d["first_name"] + ' ' |
||||
except: |
||||
name = ' ' |
||||
print(f'{count} - {errors} {name}', end="\r") |
||||
|
||||
if d == None: # Om ip-adressen är blockad eller något hänt |
||||
continue |
||||
|
||||
d["_key"] = doc["_key"] |
||||
d["_id"] = "phone/" + str(d["_key"]) |
||||
d["phone"] = doc["phone"] |
||||
d["checked_from_ip"] = f'{ip} - cache' |
||||
try: |
||||
db.collection("phone").insert(d) |
||||
leak.delete(doc["_key"]) |
||||
except: |
||||
pass |
||||
@ -0,0 +1,14 @@ |
||||
beautifulsoup4==4.9.3 |
||||
bs4==0.0.1 |
||||
certifi==2021.5.30 |
||||
charset-normalizer==2.0.4 |
||||
idna==3.2 |
||||
PyJWT==2.1.0 |
||||
python-arango==7.2.0 |
||||
requests==2.26.0 |
||||
requests-toolbelt==0.9.1 |
||||
setuptools-scm==6.0.1 |
||||
soupsieve==2.2.1 |
||||
toml==0.10.2 |
||||
urllib3==1.26.6 |
||||
requests_cache==0.7.4 |
||||
Loading…
Reference in new issue