diff --git a/docker/mrkoll/Dockerfile b/docker/mrkoll/Dockerfile index d61b23a..27234c4 100644 --- a/docker/mrkoll/Dockerfile +++ b/docker/mrkoll/Dockerfile @@ -1,14 +1,15 @@ +# syntax=docker/dockerfile:1 -FROM python:3.8 +FROM python:3.8-slim-buster -WORKDIR / +COPY requirements.txt requirements.txt -COPY requirements.txt . +RUN pip3 install -r requirements.txt -RUN pip install -r requirements.txt +COPY . . -ADD . . +ENTRYPOINT [ "python3", "mrkoll_scraperapi.py" ] -ENTRYPOINT [ "python", "facebook/mrkoll.py" ] +CMD [""] # docker buildx build --file docker/mrkoll/Dockerfile --platform linux/arm -t l3224/fb-scraper:mrkoll --push . \ No newline at end of file diff --git a/docker/mrkoll/mrkoll_scraperapi.py b/docker/mrkoll/mrkoll_scraperapi.py new file mode 100644 index 0000000..c73867a --- /dev/null +++ b/docker/mrkoll/mrkoll_scraperapi.py @@ -0,0 +1,194 @@ +import re +import subprocess +import requests +from sys import argv +from time import sleep +from bs4 import BeautifulSoup +from arango import ArangoClient + + + + +def find_person(number): + """ + Söker personuppgifter utifrån telefonnummer. + """ + + sleep(2) + + url = f'https://mrkoll.se/resultat?n={number}' + + api_key = 'fcfe011cf66fddb61bb6425fcb5cb5e9' + payload = {'api_key': api_key, 'url': url, 'country_code': 'se', 'device_type':'desktop'} + + response = requests.get('http://api.scraperapi.com', params=payload) + r = response.text + # Hämta sidan + + soup = BeautifulSoup(r, 'html.parser') + + if ( + "Du har gjort för många anrop" in soup.text + or response.url == "https://mrkoll.se/om/limit/" + ): + sleep(10) + return None + + # Lägg in data i dictionary + d = {} + + d["url_via_telefonnummer"] = response.url + try: + for a in soup.find_all("a", href=True): + if "boende-med-" in a["href"]: + d["lives_with_url"] = a["href"] + if "-hushall" in a["href"]: + d["lives_with"] = a.text + except: + pass + + if "Sökningen gav 0 träffar..." in soup.text: + return {} + + info = soup.find("div", {"class": "block_col1"}) + + try: + d["first_name"] = info.find( + "span", {"title": "Detta är personens tilltalsnamn"} + ).text + except: + pass + try: + d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text + except: + pass + try: + d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text + except: + pass + try: + adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) + d["adress_line1"] = adress[0].text + if len(adress) > 1: + d["adress_line2"] = adress[1].text + except: + pass + + try: + d["history"] = info.find("div", {"class": "history_container"}).text + except: + pass + + # Personnummer + ## Födelsedatum + for i in soup.find_all("div", {"class": "col_block1"}): + if "Personnummer" in i.text: + d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( + "-XXXX", "" + ) + ## Fyra sista + try: + start = "showPersnr" + end = ">Jag godkänner" + t = str(soup) + v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") + url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] + sleep(2) # Vänta lite + four_last = requests.get("http://mrkoll.se" + url_ajax).text + d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) + except: + pass + + try: + neighbours = {} + for div in soup.find_all("div", {"class": "peoplecont"}): + persons = div.find_all("a", href=True) + for person in persons: + neighbours[person.find("strong").text] = { + "link": person["href"], + "lived_years": re.search( + "\d+", person.find("span", {"class": "flyttclass"}).text + ).group()[0], + } + d["neighbours"] = neighbours + except: + pass + + try: + d["name_change"] = [ + div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) + ] + except: + pass + + try: + prosecuted = {} + prosecuted["brottsmål"] = ( + True if soup.find("div", {"class": "resmark res_b"}) != None else False + ) + prosecuted["tvistemål"] = ( + True if soup.find("div", {"class": "resmark res_t"}) != None else False + ) + prosecuted["straffföreläggande"] = ( + True if soup.find("div", {"class": "resmark res_s"}) != None else False + ) + d["prosecuted"] = prosecuted + except: + pass + + return d + + +if __name__ == "__main__": + + ip = 'scraperapi' + + if requests.get('https://icanhazip.com').text.strip() == '98.128.172.12': + subprocess.call(['wg-quick', 'up', 'mullvad-se4']) + + exit() + # Info för arangodb + user_arango = "Phone" + db_arango = "facebook" + host_arango = "http://192.168.1.10:8529" + + # Starta koppling till arangodb + + db = ArangoClient(hosts=host_arango).db( + db_arango, username=user_arango, password=argv[1] + ) + leak = db.collection("phoneleak") + + count = 0 + scraper_count = 0 + + global errors + errors = 0 + + while True: + count += 1 + + # Hämta en random person + doc = leak.random() + + # Gör sökningen på mrkoll.se + d = find_person(doc["phone"]) + + try: + name = d["first_name"] + ' ' + except: + name = ' ' + print(f'{count} - {errors} {name}', end="\r") + + if d == None: # Om ip-adressen är blockad eller något hänt + continue + + d["_key"] = doc["_key"] + d["_id"] = "phone/" + str(d["_key"]) + d["phone"] = doc["phone"] + d["checked_from_ip"] = f'{ip} - cache' + try: + db.collection("phone").insert(d) + leak.delete(doc["_key"]) + except: + pass diff --git a/docker/mrkoll/requirements.txt b/docker/mrkoll/requirements.txt new file mode 100644 index 0000000..6a7859b --- /dev/null +++ b/docker/mrkoll/requirements.txt @@ -0,0 +1,14 @@ +beautifulsoup4==4.9.3 +bs4==0.0.1 +certifi==2021.5.30 +charset-normalizer==2.0.4 +idna==3.2 +PyJWT==2.1.0 +python-arango==7.2.0 +requests==2.26.0 +requests-toolbelt==0.9.1 +setuptools-scm==6.0.1 +soupsieve==2.2.1 +toml==0.10.2 +urllib3==1.26.6 +requests_cache==0.7.4