import re import requests from datetime import datetime from random import randint from time import sleep from getpass import getpass import socket import cloudscraper socket.setdefaulttimeout(20) from arango import ArangoClient import werkzeug import subprocess werkzeug.cached_property = werkzeug.utils.cached_property from robobrowser import RoboBrowser class Scraper: def __init__(self): self.start_browser() self.ip = '' def start_browser(self): """ Start at browser. """ session = requests.Session() user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" self.browser = RoboBrowser( session=session, user_agent=user_agent, history=True, parser="lxml" ) sleep(2) self.open("https://mrkoll.se/") def open(self, url): """ Open a URL, restart modem if blocked. """ while True: self.browser.open(url) if ( "Du har gjort för många anrop" in self.browser.parsed.text or self.browser.state.url == "https://mrkoll.se/om/limit/" ): self.restart() else: break def restart(self): """ Restart the modem. """ # Sätt in ett doc med info om blockerad IP. db.collection("phone").insert({'_key': datetime.now().strftime("%Y%m%d%H%M"), 'blocked': requests.get("http://api.ipify.org/").text}) # Starta om modemet. self.ip = restart_modem() self.browser = self.start_browser(self) def viewing(self): """Returnerar browser i html-format""" return self.browser.parsed def restart_modem(): import requests import subprocess from selenium import webdriver from selenium.webdriver.chrome.options import Options from time import sleep print('\nStartar om modem...\n') while True: try: # Set up selenium browser options = Options() options.headless = True browser = webdriver.Chrome(options=options) # Login to modem browser.get("http://192.168.100.1/cellweb/login.asp") sleep(3) username = browser.find_element_by_id("user_name") password = browser.find_element_by_id("user_password") username.send_keys("admin") password.send_keys("1340asde") # Go to reboot and accept browser.find_element_by_xpath("/html/body/section/form/button").click() # Login sleep(3) browser.find_element_by_xpath("/html/body/section/div[2]/div[6]/a").click() # More sleep(3) browser.find_element_by_xpath( "/html/body/section[2]/div/div[2]/div/a" ).click() # Reboot sleep(3) browser.find_element_by_xpath( "/html/body/div[4]/div/div/div[2]/div[2]" ).click() # Accept sleep(3) browser.switch_to.alert.accept() # Accept again (alert) browser.close() break except Exception as e: # Om det inte funkar att restarta modemet så slå av strömen. #subprocess.run(['sudo', 'uhubctl', '-l', '1-1', '-a', 'cycle', '-d', '5', '-p', '3']) # Om modemet är kopplat till USB på RBP. print('Kunde inte starta om.') print(e) sleep(120) # Vänta på att modemet ska starta om for i in range(180, 16, -1): print(i, ' ', end='\r') sleep(1) while True: # Se till så att Raspberry ansluter till wifi (Hallon) process = subprocess.run(["sudo", "systemctl", "daemon-reload"]) for i in range(15, 10, -1): print(i, ' ', end='\r') sleep(1) process = subprocess.run(["sudo", "systemctl", "restart", "dhcpcd"]) for i in range(10, 0, -1): print(i, ' ', end='\r') sleep(1) # Kontrollera nätverksanamn process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE) wlan = process.communicate()[0].decode() if "4G-UFI-5671" in wlan: ip = requests.get("https://api.ipify.org").text sleep(5) print("Ny ip:", ip) if ip != '98.128.174.224': return ip else: print('Testar snart igen...') def find_person(number, scraper): d = {} # Kolla så att vi är på rätt plats if scraper.browser.state.url != "https://mrkoll.se/": scraper.browser.back() sleep(randint(2, 3)) form = scraper.browser.get_form(action="requestSearch/") form["n"].value = number sleep(randint(2, 3)) while True: try: scraper.browser.submit_form(form) break except: restart_modem() soup = scraper.viewing() d["url_via_telefonnummer"] = scraper.browser.state.url try: for a in scraper.viewing().find_all("a", href=True): if "boende-med-" in a["href"]: d["lives_with_url"] = a["href"] if "-hushall" in a["href"]: d["lives_with"] = a.text except: pass if "Sökningen gav 0 träffar..." in soup.text: return {} info = soup.find("div", {"class": "block_col1"}) try: d["first_name"] = info.find( "span", {"title": "Detta är personens tilltalsnamn"} ).text except: pass try: d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text except: pass try: d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text except: pass try: adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) d["adress_line1"] = adress[0].text if len(adress) > 1: d["adress_line2"] = adress[1].text except: pass try: d["history"] = info.find("div", {"class": "history_container"}).text except: pass # Personnummer ## Födelsedatum for i in soup.find_all("div", {"class": "col_block1"}): if "Personnummer" in i.text: d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( "-XXXX", "" ) ## Fyra sista try: start = "showPersnr" end = ">Jag godkänner" t = str(soup) v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] sleep(2) # Vänta lite four_last = requests.get("http://mrkoll.se" + url_ajax).text d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) except: pass try: neighbours = {} for div in soup.find_all("div", {"class": "peoplecont"}): persons = div.find_all("a", href=True) for person in persons: neighbours[person.find("strong").text] = { "link": person["href"], "lived_years": re.search( "\d+", person.find("span", {"class": "flyttclass"}).text ).group()[0], } d["neighbours"] = neighbours except: pass try: d["name_change"] = [ div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) ] except: pass try: prosecuted = {} prosecuted["brottsmål"] = ( True if soup.find("div", {"class": "resmark res_b"}) != None else False ) prosecuted["tvistemål"] = ( True if soup.find("div", {"class": "resmark res_t"}) != None else False ) prosecuted["straffföreläggande"] = ( True if soup.find("div", {"class": "resmark res_s"}) != None else False ) d["prosecuted"] = prosecuted except: pass return d if __name__ == "__main__": # proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' } # Info för arangodb user_arango = "Phone" db_arango = "facebook" host_arango = "http://192.168.0.4:8529" # Starta koppling till arangodb # Avkryptera lösen till arango pwd = getpass('Arangolösenord för Phone:').strip() db = ArangoClient(hosts=host_arango).db( db_arango, username=user_arango, password=pwd ) leak = db.collection("phoneleak") # Kontrollera nätverksanamn process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE) wlan = process.communicate()[0].decode() while True: if "4G-UFI-5671" in wlan: ip = requests.get("https://api.ipify.org").text print("IP:", ip) break else: print('Testar snart igen...') for i in range(30, 0, -1): print(i, end='\r') scraper = Scraper() scraper.ip = ip scraper.open("https://mrkoll.se/") count = 0 scraper_count = 0 while True: count += 1 print(count, end="\r") doc = leak.random() leak.delete(doc["_key"]) # Gör sökningen på mrkoll.se d = find_person(doc["phone"], scraper) sleep(2) d["_key"] = doc["_key"] d["_id"] = "phone/" + str(d["_key"]) d["phone"] = doc["phone"] d["checked_from_ip"] = scraper.ip db.collection("phone").insert(d)