You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

194 lines
5.2 KiB

import re
import subprocess
import requests
from sys import argv
from time import sleep
from bs4 import BeautifulSoup
from arango import ArangoClient
def find_person(number):
"""
Söker personuppgifter utifrån telefonnummer.
"""
sleep(2)
url = f'https://mrkoll.se/resultat?n={number}'
api_key = 'fcfe011cf66fddb61bb6425fcb5cb5e9'
payload = {'api_key': api_key, 'url': url, 'country_code': 'se', 'device_type':'desktop'}
response = requests.get('http://api.scraperapi.com', params=payload)
r = response.text
# Hämta sidan
soup = BeautifulSoup(r, 'html.parser')
if (
"Du har gjort för många anrop" in soup.text
or response.url == "https://mrkoll.se/om/limit/"
):
sleep(10)
return None
# Lägg in data i dictionary
d = {}
d["url_via_telefonnummer"] = response.url
try:
for a in soup.find_all("a", href=True):
if "boende-med-" in a["href"]:
d["lives_with_url"] = a["href"]
if "-hushall" in a["href"]:
d["lives_with"] = a.text
except:
pass
if "Sökningen gav 0 träffar..." in soup.text:
return {}
info = soup.find("div", {"class": "block_col1"})
try:
d["first_name"] = info.find(
"span", {"title": "Detta är personens tilltalsnamn"}
).text
except:
pass
try:
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text
except:
pass
try:
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text
except:
pass
try:
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"})
d["adress_line1"] = adress[0].text
if len(adress) > 1:
d["adress_line2"] = adress[1].text
except:
pass
try:
d["history"] = info.find("div", {"class": "history_container"}).text
except:
pass
# Personnummer
## Födelsedatum
for i in soup.find_all("div", {"class": "col_block1"}):
if "Personnummer" in i.text:
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace(
"-XXXX", ""
)
## Fyra sista
try:
start = "showPersnr"
end = ">Jag godkänner</span>"
t = str(soup)
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",")
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1]
sleep(2) # Vänta lite
four_last = requests.get("http://mrkoll.se" + url_ajax).text
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last)
except:
pass
try:
neighbours = {}
for div in soup.find_all("div", {"class": "peoplecont"}):
persons = div.find_all("a", href=True)
for person in persons:
neighbours[person.find("strong").text] = {
"link": person["href"],
"lived_years": re.search(
"\d+", person.find("span", {"class": "flyttclass"}).text
).group()[0],
}
d["neighbours"] = neighbours
except:
pass
try:
d["name_change"] = [
div.text.strip() for div in soup.find_all("div", {"class": "name_change"})
]
except:
pass
try:
prosecuted = {}
prosecuted["brottsmål"] = (
True if soup.find("div", {"class": "resmark res_b"}) != None else False
)
prosecuted["tvistemål"] = (
True if soup.find("div", {"class": "resmark res_t"}) != None else False
)
prosecuted["straffföreläggande"] = (
True if soup.find("div", {"class": "resmark res_s"}) != None else False
)
d["prosecuted"] = prosecuted
except:
pass
return d
if __name__ == "__main__":
ip = 'scraperapi'
if requests.get('https://icanhazip.com').text.strip() == '98.128.172.12':
subprocess.call(['wg-quick', 'up', 'mullvad-se4'])
exit()
# Info för arangodb
user_arango = "Phone"
db_arango = "facebook"
host_arango = "http://192.168.1.10:8529"
# Starta koppling till arangodb
db = ArangoClient(hosts=host_arango).db(
db_arango, username=user_arango, password=argv[1]
)
leak = db.collection("phoneleak")
count = 0
scraper_count = 0
global errors
errors = 0
while True:
count += 1
# Hämta en random person
doc = leak.random()
# Gör sökningen på mrkoll.se
d = find_person(doc["phone"])
try:
name = d["first_name"] + ' '
except:
name = ' '
print(f'{count} - {errors} {name}', end="\r")
if d == None: # Om ip-adressen är blockad eller något hänt
continue
d["_key"] = doc["_key"]
d["_id"] = "phone/" + str(d["_key"])
d["phone"] = doc["phone"]
d["checked_from_ip"] = f'{ip} - cache'
try:
db.collection("phone").insert(d)
leak.delete(doc["_key"])
except:
pass