Added from RBP3

master
Lasse Server 4 years ago
parent b80f39bd90
commit dca839a743
  1. 13
      docker/mrkoll/Dockerfile
  2. 194
      docker/mrkoll/mrkoll_scraperapi.py
  3. 14
      docker/mrkoll/requirements.txt

@ -1,14 +1,15 @@
# syntax=docker/dockerfile:1
FROM python:3.8 FROM python:3.8-slim-buster
WORKDIR / COPY requirements.txt requirements.txt
COPY requirements.txt . RUN pip3 install -r requirements.txt
RUN pip install -r requirements.txt COPY . .
ADD . . ENTRYPOINT [ "python3", "mrkoll_scraperapi.py" ]
ENTRYPOINT [ "python", "facebook/mrkoll.py" ] CMD [""]
# docker buildx build --file docker/mrkoll/Dockerfile --platform linux/arm -t l3224/fb-scraper:mrkoll --push . # docker buildx build --file docker/mrkoll/Dockerfile --platform linux/arm -t l3224/fb-scraper:mrkoll --push .

@ -0,0 +1,194 @@
import re
import subprocess
import requests
from sys import argv
from time import sleep
from bs4 import BeautifulSoup
from arango import ArangoClient
def find_person(number):
"""
Söker personuppgifter utifrån telefonnummer.
"""
sleep(2)
url = f'https://mrkoll.se/resultat?n={number}'
api_key = 'fcfe011cf66fddb61bb6425fcb5cb5e9'
payload = {'api_key': api_key, 'url': url, 'country_code': 'se', 'device_type':'desktop'}
response = requests.get('http://api.scraperapi.com', params=payload)
r = response.text
# Hämta sidan
soup = BeautifulSoup(r, 'html.parser')
if (
"Du har gjort för många anrop" in soup.text
or response.url == "https://mrkoll.se/om/limit/"
):
sleep(10)
return None
# Lägg in data i dictionary
d = {}
d["url_via_telefonnummer"] = response.url
try:
for a in soup.find_all("a", href=True):
if "boende-med-" in a["href"]:
d["lives_with_url"] = a["href"]
if "-hushall" in a["href"]:
d["lives_with"] = a.text
except:
pass
if "Sökningen gav 0 träffar..." in soup.text:
return {}
info = soup.find("div", {"class": "block_col1"})
try:
d["first_name"] = info.find(
"span", {"title": "Detta är personens tilltalsnamn"}
).text
except:
pass
try:
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text
except:
pass
try:
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text
except:
pass
try:
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"})
d["adress_line1"] = adress[0].text
if len(adress) > 1:
d["adress_line2"] = adress[1].text
except:
pass
try:
d["history"] = info.find("div", {"class": "history_container"}).text
except:
pass
# Personnummer
## Födelsedatum
for i in soup.find_all("div", {"class": "col_block1"}):
if "Personnummer" in i.text:
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace(
"-XXXX", ""
)
## Fyra sista
try:
start = "showPersnr"
end = ">Jag godkänner</span>"
t = str(soup)
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",")
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1]
sleep(2) # Vänta lite
four_last = requests.get("http://mrkoll.se" + url_ajax).text
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last)
except:
pass
try:
neighbours = {}
for div in soup.find_all("div", {"class": "peoplecont"}):
persons = div.find_all("a", href=True)
for person in persons:
neighbours[person.find("strong").text] = {
"link": person["href"],
"lived_years": re.search(
"\d+", person.find("span", {"class": "flyttclass"}).text
).group()[0],
}
d["neighbours"] = neighbours
except:
pass
try:
d["name_change"] = [
div.text.strip() for div in soup.find_all("div", {"class": "name_change"})
]
except:
pass
try:
prosecuted = {}
prosecuted["brottsmål"] = (
True if soup.find("div", {"class": "resmark res_b"}) != None else False
)
prosecuted["tvistemål"] = (
True if soup.find("div", {"class": "resmark res_t"}) != None else False
)
prosecuted["straffföreläggande"] = (
True if soup.find("div", {"class": "resmark res_s"}) != None else False
)
d["prosecuted"] = prosecuted
except:
pass
return d
if __name__ == "__main__":
ip = 'scraperapi'
if requests.get('https://icanhazip.com').text.strip() == '98.128.172.12':
subprocess.call(['wg-quick', 'up', 'mullvad-se4'])
exit()
# Info för arangodb
user_arango = "Phone"
db_arango = "facebook"
host_arango = "http://192.168.1.10:8529"
# Starta koppling till arangodb
db = ArangoClient(hosts=host_arango).db(
db_arango, username=user_arango, password=argv[1]
)
leak = db.collection("phoneleak")
count = 0
scraper_count = 0
global errors
errors = 0
while True:
count += 1
# Hämta en random person
doc = leak.random()
# Gör sökningen på mrkoll.se
d = find_person(doc["phone"])
try:
name = d["first_name"] + ' '
except:
name = ' '
print(f'{count} - {errors} {name}', end="\r")
if d == None: # Om ip-adressen är blockad eller något hänt
continue
d["_key"] = doc["_key"]
d["_id"] = "phone/" + str(d["_key"])
d["phone"] = doc["phone"]
d["checked_from_ip"] = f'{ip} - cache'
try:
db.collection("phone").insert(d)
leak.delete(doc["_key"])
except:
pass

@ -0,0 +1,14 @@
beautifulsoup4==4.9.3
bs4==0.0.1
certifi==2021.5.30
charset-normalizer==2.0.4
idna==3.2
PyJWT==2.1.0
python-arango==7.2.0
requests==2.26.0
requests-toolbelt==0.9.1
setuptools-scm==6.0.1
soupsieve==2.2.1
toml==0.10.2
urllib3==1.26.6
requests_cache==0.7.4
Loading…
Cancel
Save