diff --git a/mrkoll/mr_koll_cloudscraper.py b/mrkoll/mr_koll_cloudscraper.py new file mode 100644 index 0000000..0947a37 --- /dev/null +++ b/mrkoll/mr_koll_cloudscraper.py @@ -0,0 +1,9 @@ +import requests +from urllib.parse import urlencode + +list_of_urls = ['https://mrkoll.se/resultat?n=46730674398'] + +for url in list_of_urls: + params = {'api_key': 'fcfe011cf66fddb61bb6425fcb5cb5e9', 'url': url, 'country_code': 'se'} + response = requests.get('http://api.scraperapi.com/', params=urlencode(params), verify=False) + print(response.text) \ No newline at end of file diff --git a/mrkoll/mrkoll.py b/mrkoll/mrkoll.py new file mode 100644 index 0000000..f6e28d0 --- /dev/null +++ b/mrkoll/mrkoll.py @@ -0,0 +1,323 @@ +import re +import requests +from datetime import datetime +from random import randint +from time import sleep +from getpass import getpass +import socket +import cloudscraper + +socket.setdefaulttimeout(20) + +from arango import ArangoClient +import werkzeug + +import subprocess + +werkzeug.cached_property = werkzeug.utils.cached_property +from robobrowser import RoboBrowser + +class Scraper: + def __init__(self): + self.start_browser() + self.ip = '' + + def start_browser(self): + """ Start at browser. """ + session = requests.Session() + user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" + self.browser = RoboBrowser( + session=session, user_agent=user_agent, history=True, parser="lxml" + ) + sleep(2) + + self.open("https://mrkoll.se/") + + def open(self, url): + """ Open a URL, restart modem if blocked. """ + while True: + self.browser.open(url) + + if ( + "Du har gjort för många anrop" in self.browser.parsed.text + or self.browser.state.url == "https://mrkoll.se/om/limit/" + ): + self.restart() + else: + break + + def restart(self): + """ Restart the modem. """ + + # Sätt in ett doc med info om blockerad IP. + db.collection("phone").insert({'_key': datetime.now().strftime("%Y%m%d%H%M"), 'blocked': requests.get("http://api.ipify.org/").text}) + + # Starta om modemet. + + self.ip = restart_modem() + + self.browser = self.start_browser(self) + + def viewing(self): + """Returnerar browser i html-format""" + return self.browser.parsed + + +def restart_modem(): + + import requests + import subprocess + from selenium import webdriver + from selenium.webdriver.chrome.options import Options + from time import sleep + + print('\nStartar om modem...\n') + + while True: + try: + # Set up selenium browser + options = Options() + options.headless = True + browser = webdriver.Chrome(options=options) + + # Login to modem + browser.get("http://192.168.100.1/cellweb/login.asp") + sleep(3) + username = browser.find_element_by_id("user_name") + password = browser.find_element_by_id("user_password") + username.send_keys("admin") + password.send_keys("1340asde") + + # Go to reboot and accept + browser.find_element_by_xpath("/html/body/section/form/button").click() # Login + sleep(3) + browser.find_element_by_xpath("/html/body/section/div[2]/div[6]/a").click() # More + sleep(3) + browser.find_element_by_xpath( + "/html/body/section[2]/div/div[2]/div/a" + ).click() # Reboot + sleep(3) + browser.find_element_by_xpath( + "/html/body/div[4]/div/div/div[2]/div[2]" + ).click() # Accept + sleep(3) + browser.switch_to.alert.accept() # Accept again (alert) + browser.close() + break + + except Exception as e: # Om det inte funkar att restarta modemet så slå av strömen. + #subprocess.run(['sudo', 'uhubctl', '-l', '1-1', '-a', 'cycle', '-d', '5', '-p', '3']) # Om modemet är kopplat till USB på RBP. + print('Kunde inte starta om.') + print(e) + sleep(120) + + # Vänta på att modemet ska starta om + for i in range(180, 16, -1): + print(i, ' ', end='\r') + sleep(1) + while True: + # Se till så att Raspberry ansluter till wifi (Hallon) + process = subprocess.run(["sudo", "systemctl", "daemon-reload"]) + for i in range(15, 10, -1): + print(i, ' ', end='\r') + sleep(1) + process = subprocess.run(["sudo", "systemctl", "restart", "dhcpcd"]) + for i in range(10, 0, -1): + print(i, ' ', end='\r') + sleep(1) + + # Kontrollera nätverksanamn + process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE) + wlan = process.communicate()[0].decode() + if "4G-UFI-5671" in wlan: + ip = requests.get("https://api.ipify.org").text + sleep(5) + print("Ny ip:", ip) + if ip != '98.128.174.224': + return ip + + else: + print('Testar snart igen...') + + + +def find_person(number, scraper): + + d = {} + + # Kolla så att vi är på rätt plats + if scraper.browser.state.url != "https://mrkoll.se/": + scraper.browser.back() + sleep(randint(2, 3)) + + form = scraper.browser.get_form(action="requestSearch/") + form["n"].value = number + sleep(randint(2, 3)) + + while True: + try: + scraper.browser.submit_form(form) + break + except: + restart_modem() + + soup = scraper.viewing() + + d["url_via_telefonnummer"] = scraper.browser.state.url + try: + for a in scraper.viewing().find_all("a", href=True): + if "boende-med-" in a["href"]: + d["lives_with_url"] = a["href"] + if "-hushall" in a["href"]: + d["lives_with"] = a.text + except: + pass + + if "Sökningen gav 0 träffar..." in soup.text: + return {} + + info = soup.find("div", {"class": "block_col1"}) + + try: + d["first_name"] = info.find( + "span", {"title": "Detta är personens tilltalsnamn"} + ).text + except: + pass + try: + d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text + except: + pass + try: + d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text + except: + pass + try: + adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) + d["adress_line1"] = adress[0].text + if len(adress) > 1: + d["adress_line2"] = adress[1].text + except: + pass + + try: + d["history"] = info.find("div", {"class": "history_container"}).text + except: + pass + + # Personnummer + ## Födelsedatum + for i in soup.find_all("div", {"class": "col_block1"}): + if "Personnummer" in i.text: + d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( + "-XXXX", "" + ) + ## Fyra sista + try: + start = "showPersnr" + end = ">Jag godkänner" + t = str(soup) + v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") + url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] + sleep(2) # Vänta lite + four_last = requests.get("http://mrkoll.se" + url_ajax).text + d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) + except: + pass + + try: + neighbours = {} + for div in soup.find_all("div", {"class": "peoplecont"}): + persons = div.find_all("a", href=True) + for person in persons: + neighbours[person.find("strong").text] = { + "link": person["href"], + "lived_years": re.search( + "\d+", person.find("span", {"class": "flyttclass"}).text + ).group()[0], + } + d["neighbours"] = neighbours + except: + pass + + try: + d["name_change"] = [ + div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) + ] + except: + pass + + try: + prosecuted = {} + prosecuted["brottsmål"] = ( + True if soup.find("div", {"class": "resmark res_b"}) != None else False + ) + prosecuted["tvistemål"] = ( + True if soup.find("div", {"class": "resmark res_t"}) != None else False + ) + prosecuted["straffföreläggande"] = ( + True if soup.find("div", {"class": "resmark res_s"}) != None else False + ) + d["prosecuted"] = prosecuted + except: + pass + + return d + + +if __name__ == "__main__": + + # proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' } + + # Info för arangodb + user_arango = "Phone" + db_arango = "facebook" + host_arango = "http://192.168.0.4:8529" + + # Starta koppling till arangodb + # Avkryptera lösen till arango + pwd = getpass('Arangolösenord för Phone:').strip() + + db = ArangoClient(hosts=host_arango).db( + db_arango, username=user_arango, password=pwd + ) + leak = db.collection("phoneleak") + + # Kontrollera nätverksanamn + process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE) + wlan = process.communicate()[0].decode() + + while True: + if "4G-UFI-5671" in wlan: + ip = requests.get("https://api.ipify.org").text + print("IP:", ip) + break + + else: + print('Testar snart igen...') + for i in range(30, 0, -1): + print(i, end='\r') + + scraper = Scraper() + scraper.ip = ip + scraper.open("https://mrkoll.se/") + + count = 0 + scraper_count = 0 + + while True: + count += 1 + print(count, end="\r") + doc = leak.random() + leak.delete(doc["_key"]) + + # Gör sökningen på mrkoll.se + d = find_person(doc["phone"], scraper) + + sleep(2) + + d["_key"] = doc["_key"] + d["_id"] = "phone/" + str(d["_key"]) + d["phone"] = doc["phone"] + d["checked_from_ip"] = scraper.ip + db.collection("phone").insert(d) diff --git a/mrkoll/mrkoll_mac.py b/mrkoll/mrkoll_mac.py new file mode 100644 index 0000000..570da84 --- /dev/null +++ b/mrkoll/mrkoll_mac.py @@ -0,0 +1,185 @@ +import re +import requests +from datetime import datetime +from random import randint +from time import sleep + +from pymongo import MongoClient +import werkzeug +werkzeug.cached_property = werkzeug.utils.cached_property +from robobrowser import RoboBrowser +import json + + +class Scraper: + def __init__(self): + session = requests.Session() + + # Starta browser + user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" + self.browser = RoboBrowser( + session=session, user_agent=user_agent, history=True, parser="lxml" + ) + sleep(2) + self.browser.open("https://mrkoll.se/") + + def open(self, url): + self.browser.open(url) + + + def viewing(self): + """ Returnerar browser i html-format """ + return self.browser.parsed + + +def find_person(number, scraper): + + d = {} + if scraper.browser.state.url != "https://mrkoll.se/": + scraper.browser.back() + sleep(randint(2, 3)) + form = scraper.browser.get_form(action="requestSearch/") + form["n"].value = number + sleep(randint(2, 3)) + scraper.browser.submit_form(form) + soup = scraper.viewing() + + d["url_via_telefonnummer"] = scraper.browser.state.url + try: + for a in scraper.viewing().find_all("a", href=True): + if "boende-med-" in a["href"]: + d["lives_with_url"] = a["href"] + if "-hushall" in a["href"]: + d["lives_with"] = a.text + except: + pass + + if "Sökningen gav 0 träffar..." in soup.text: + return {} + elif "Du har gjort för många anrop" in soup or scraper.browser.state.url == "https://mrkoll.se/om/limit/": + return "blocked" + + info = soup.find("div", {"class": "block_col1"}) + + try: + d["first_name"] = info.find( + "span", {"title": "Detta är personens tilltalsnamn"} + ).text + except: + pass + try: + d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text + except: + pass + try: + d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text + except: + pass + try: + adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) + d["adress_line1"] = adress[0].text + if len(adress) > 1: + d["adress_line2"] = adress[1].text + except: + pass + + try: + d["history"] = info.find("div", {"class": "history_container"}).text + except: + pass + # Personnummer + ## Födelsedatum + for i in soup.find_all("div", {"class": "col_block1"}): + if "Personnummer" in i.text: + d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( + "-XXXX", "" + ) + ## Fyra sista + try: + start = "showPersnr" + end = ">Jag godkänner" + t = str(soup) + v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") + url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] + sleep(2) # Vänta lite + four_last = requests.get("http://mrkoll.se" + url_ajax).text + d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) + except: + pass + + try: + neighbours = {} + for div in soup.find_all("div", {"class": "peoplecont"}): + persons = div.find_all("a", href=True) + for person in persons: + neighbours[person.find("strong").text] = { + "link": person["href"], + "lived_years": re.search( + "\d+", person.find("span", {"class": "flyttclass"}).text + ).group()[0], + } + d['neighbours'] = neighbours + except: + pass + + try: + d['name_change'] = [div.text.strip() for div in soup.find_all('div', {'class':"name_change"})] + except: + pass + + try: + prosecuted = {} + prosecuted['brottsmål'] = True if soup.find('div', {'class': 'resmark res_b'}) != None else False + prosecuted['tvistemål'] = True if soup.find('div', {'class': 'resmark res_t'}) != None else False + prosecuted['straffföreläggande'] = True if soup.find('div', {'class': 'resmark res_s'}) != None else False + d['prosecuted'] = prosecuted + except: + pass + + return d + +if __name__ == '__main__': + client = MongoClient('mongodb://localhost:27017') + db_client = client['phone_db'] + db = db_client['phone'] + + + leak = db_client['leak'] + print('Nummer kvar att kolla:', leak.count_documents({})) + + + scraper = Scraper() + count = 0 + scraper_count = 0 + while True: + count += 1 + print(count, end="\r") + doc = leak.find_one() + leak.delete_one(doc) + d = find_person(doc["phone"], scraper) + # cursor = leak.aggregate([{'$sample': {'size': leak.estimated_document_count()}}], allowDiskUse=True) + # for doc in cursor: + # print(doc['phone']) + # # Kolla om numret är kollat + # q = { "phone": doc['phone'] } + # if len(list(db.find(q))) == 0: + # d = find_person(doc["phone"], scraper) + # continue + + if datetime.now().strftime("%H") == '01': + sleep(18000) + + sleep(10) + if d == "blocked": + client.close() + print(doc) + print(count, 'blocked') + exit() + + d["_key"] = doc["_key"] + d["_id"] = 'phone/' + str(d["_key"]) + d["phone"] = doc["phone"] + db.insert_one(d) + + + diff --git a/mrkoll/mrkoll_oxylabs.py b/mrkoll/mrkoll_oxylabs.py new file mode 100644 index 0000000..4aee884 --- /dev/null +++ b/mrkoll/mrkoll_oxylabs.py @@ -0,0 +1,215 @@ +import re +import requests +import json +from requests.auth import HTTPProxyAuth +from time import sleep +from getpass import getpass +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +import socket +socket.setdefaulttimeout(20) +import ssl +ssl._create_default_https_context = ssl._create_unverified_context +import urllib +import urllib.request as request +from bs4 import BeautifulSoup +from arango import ArangoClient + +def find_person(number, errors, server): + + password = 'T8ARbTg6qY' + user = 'edfast' + ip = server['ip'] + + proxy = (f'http://{user}:{password}@{ip}:6000') + + url = f'https://mrkoll.se/resultat?n={number}' + user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" + headers = {'User-agent': user_agent} + query = request.build_opener(request.ProxyHandler({'http': proxy})) + req = request.Request(url, headers=headers) + + n = 0 + + n += 1 + if n == 3: + return None, errors + try: + sleep(2) + response = query.open(req) + r = response.read().decode() + + except (urllib.error.HTTPError, socket.timeout) as e: + print(e) + sleep(2) + errors += 1 + return None, errors + + soup = BeautifulSoup(r, 'html.parser') + + if ( + "Du har gjort för många anrop" in soup.text + or response.geturl() == "https://mrkoll.se/om/limit/" # TODO Hur får man url från r med urllib3? + ): + errors += 1 + return None, errors + + # Lägg in data i dictionary + d = {} + + d["url_via_telefonnummer"] = response.geturl() + try: + for a in soup.find_all("a", href=True): + if "boende-med-" in a["href"]: + d["lives_with_url"] = a["href"] + if "-hushall" in a["href"]: + d["lives_with"] = a.text + except: + pass + + if "Sökningen gav 0 träffar..." in soup.text: + return {}, errors + + info = soup.find("div", {"class": "block_col1"}) + + try: + d["first_name"] = info.find( + "span", {"title": "Detta är personens tilltalsnamn"} + ).text + except: + pass + try: + d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text + except: + pass + try: + d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text + except: + pass + try: + adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) + d["adress_line1"] = adress[0].text + if len(adress) > 1: + d["adress_line2"] = adress[1].text + except: + pass + + try: + d["history"] = info.find("div", {"class": "history_container"}).text + except: + pass + + # Personnummer + ## Födelsedatum + for i in soup.find_all("div", {"class": "col_block1"}): + if "Personnummer" in i.text: + d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( + "-XXXX", "" + ) + ## Fyra sista + try: + start = "showPersnr" + end = ">Jag godkänner" + t = str(soup) + v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") + url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] + sleep(2) # Vänta lite + four_last = requests.get("http://mrkoll.se" + url_ajax).text + d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) + except: + pass + + try: + neighbours = {} + for div in soup.find_all("div", {"class": "peoplecont"}): + persons = div.find_all("a", href=True) + for person in persons: + neighbours[person.find("strong").text] = { + "link": person["href"], + "lived_years": re.search( + "\d+", person.find("span", {"class": "flyttclass"}).text + ).group()[0], + } + d["neighbours"] = neighbours + except: + pass + + try: + d["name_change"] = [ + div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) + ] + except: + pass + + try: + prosecuted = {} + prosecuted["brottsmål"] = ( + True if soup.find("div", {"class": "resmark res_b"}) != None else False + ) + prosecuted["tvistemål"] = ( + True if soup.find("div", {"class": "resmark res_t"}) != None else False + ) + prosecuted["straffföreläggande"] = ( + True if soup.find("div", {"class": "resmark res_s"}) != None else False + ) + d["prosecuted"] = prosecuted + except: + pass + + return d, errors + + +if __name__ == "__main__": + + # proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' } + + servers_json = json.load(open('oxylabs_servers_trail.json')) + + + # Info för arangodb + user_arango = "Phone" + db_arango = "facebook" + host_arango = "http://192.168.1.20:8529" + + # Starta koppling till arangodb + # Avkryptera lösen till arango + pwd = getpass('Arangolösenord för Phone:').strip() + + db = ArangoClient(hosts=host_arango).db( + db_arango, username=user_arango, password=pwd + ) + leak = db.collection("phoneleak") + + + + count = 0 + scraper_count = 0 + + global errors + errors = 0 + + while True: + for server in servers_json: + count += 1 + + # Hämta en random person + doc = leak.random() + + # Gör sökningen på mrkoll.se + d, errors = find_person(doc["phone"], errors, server) + print(f'{count} - {errors}', end="\r") + sleep(2) + + if d == None: # Om ip-adressen är blockad eller något hänt + continue + + d["_key"] = doc["_key"] + d["_id"] = "phone/" + str(d["_key"]) + d["phone"] = doc["phone"] + d["checked_from_ip"] = 'oxylabs' + try: + db.collection("phone").insert(d) + leak.delete(doc["_key"]) + except: + pass + diff --git a/mrkoll/mrkoll_proxyland.py b/mrkoll/mrkoll_proxyland.py new file mode 100644 index 0000000..9ee2b54 --- /dev/null +++ b/mrkoll/mrkoll_proxyland.py @@ -0,0 +1,264 @@ +import re +import requests +from requests.auth import HTTPProxyAuth +from time import sleep +from getpass import getpass +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +import socket +socket.setdefaulttimeout(20) +import ssl +ssl._create_default_https_context = ssl._create_unverified_context +import urllib +import urllib.request as request +from bs4 import BeautifulSoup +from arango import ArangoClient + +def find_person(number, errors): + print(number) + # Sök nummer + server = 'server.proxyland.io:9090' + password = 'znzqUZwnYucVbaMUIJhgJlNpX' + user = 'MQlbuTVPhwhOwYlyerwBLuzKI' + proxies = { + "https": f"http://{user}:{password}@{server}", + "http": f"http://{user}:{password}@{server}", + } + session = requests.Session(proxies=proxies) + url = 'https://api.ipify.org' + print(session.get(url)) + + proxy = (f'http://{user}:{password}@{server}') + + url = f'https://mrkoll.se/resultat?n={number}' + user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" + headers = {'User-agent': user_agent} + query = request.build_opener(request.ProxyHandler({'http': proxy})) + print(url) + url = 'https://api.ipify.org' + req = request.Request(url, headers=headers) + + n = 0 + + n += 1 + if n == 3: + return None, errors + try: + sleep(2) + response = query.open(req) + r = response.read().decode() + + except (urllib.error.HTTPError, socket.timeout) as e: + print(e) + sleep(2) + errors += 1 + return None, errors + + soup = BeautifulSoup(r, 'html.parser') + print(r) + with open('html_out.html', 'w') as html: + html.write(str(soup)) + exit() + + if ( + "Du har gjort för många anrop" in soup.text + or response.geturl() == "https://mrkoll.se/om/limit/" # TODO Hur får man url från r med urllib3? + ): + errors += 1 + return None, errors + + # Lägg in data i dictionary + d = {} + + d["url_via_telefonnummer"] = response.geturl() + try: + for a in soup.find_all("a", href=True): + if "boende-med-" in a["href"]: + d["lives_with_url"] = a["href"] + if "-hushall" in a["href"]: + d["lives_with"] = a.text + except: + pass + + if "Sökningen gav 0 träffar..." in soup.text: + return {}, errors + + info = soup.find("div", {"class": "block_col1"}) + + try: + d["first_name"] = info.find( + "span", {"title": "Detta är personens tilltalsnamn"} + ).text + except: + pass + try: + d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text + except: + pass + try: + d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text + except: + pass + try: + adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) + d["adress_line1"] = adress[0].text + if len(adress) > 1: + d["adress_line2"] = adress[1].text + except: + pass + + try: + d["history"] = info.find("div", {"class": "history_container"}).text + except: + pass + + # Personnummer + ## Födelsedatum + for i in soup.find_all("div", {"class": "col_block1"}): + if "Personnummer" in i.text: + d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( + "-XXXX", "" + ) + ## Fyra sista + try: + start = "showPersnr" + end = ">Jag godkänner" + t = str(soup) + v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") + url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] + sleep(2) # Vänta lite + four_last = requests.get("http://mrkoll.se" + url_ajax).text + d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) + except: + pass + + try: + neighbours = {} + for div in soup.find_all("div", {"class": "peoplecont"}): + persons = div.find_all("a", href=True) + for person in persons: + neighbours[person.find("strong").text] = { + "link": person["href"], + "lived_years": re.search( + "\d+", person.find("span", {"class": "flyttclass"}).text + ).group()[0], + } + d["neighbours"] = neighbours + except: + pass + + try: + d["name_change"] = [ + div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) + ] + except: + pass + + try: + prosecuted = {} + prosecuted["brottsmål"] = ( + True if soup.find("div", {"class": "resmark res_b"}) != None else False + ) + prosecuted["tvistemål"] = ( + True if soup.find("div", {"class": "resmark res_t"}) != None else False + ) + prosecuted["straffföreläggande"] = ( + True if soup.find("div", {"class": "resmark res_s"}) != None else False + ) + d["prosecuted"] = prosecuted + except: + pass + + return d, errors + + +if __name__ == "__main__": + server = 'server.proxyland.io:9090' + user = 'MQlbuTVPhwhOwYlyerwBLuzKI' + password = 'znzqUZwnYucVbaMUIJhgJlNpX' + proxy = f'http://{user}:{password}@{server}' + + query = request.build_opener(request.ProxyHandler({'https': proxy})) + + user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" + headers = {'User-agent': user_agent} + url = 'https://api.ipify.org' + url = "http://mrkoll.se/resultat?n=46730341876" + + req = request.Request(url, headers=headers) + + response = query.open(req) + r = response.read().decode() + print(r) + exit() + + + # from requests.auth import HTTPProxyAuth + # import requests_toolbelt + # from requests_toolbelt.auth.http_proxy_digest import HTTPProxyDigestAuth + # server = 'server.proxyland.io:9090' + # user = 'MQlbuTVPhwhOwYlyerwBLuzKI' + # password = 'znzqUZwnYucVbaMUIJhgJlNpX' + # proxies = { + # "https": f"http://{user}:{password}@{server}", + # "http": f"http://{user}:{password}@{server}", + # } + + # proxies = {"http":'http://MQlbuTVPhwhOwYlyerwBLuzKI:znzqUZwnYucVbaMUIJhgJlNpX@server.proxyland.io:9090', + # "https":'http://MQlbuTVPhwhOwYlyerwBLuzKI:znzqUZwnYucVbaMUIJhgJlNpX@server.proxyland.io:9090'} + # #session.trust_env=False + # #session.auth = HTTPProxyAuth(user,password) + # url = 'https://api.ipify.org' + # #url = "https://mrkoll.se/resultat?n=46730341876" + # auth = HTTPProxyDigestAuth(user, password) + # r = requests.get(url, proxies=proxies, auth=auth) + # print(r) + + # exit() + + + # proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' } + + # Info för arangodb + user_arango = "Phone" + db_arango = "facebook" + host_arango = "http://192.168.1.20:8529" + + # Starta koppling till arangodb + # Avkryptera lösen till arango + pwd = getpass('Arangolösenord för Phone:').strip() + + db = ArangoClient(hosts=host_arango).db( + db_arango, username=user_arango, password=pwd + ) + leak = db.collection("phoneleak") + + count = 0 + scraper_count = 0 + + global errors + errors = 0 + + while True: + count += 1 + + # Hämta en random person + doc = leak.random() + + # Gör sökningen på mrkoll.se + d, errors = find_person(doc["phone"], errors) + print(f'{count} - {errors}', end="\r") + sleep(2) + + if d == None: # Om ip-adressen är blockad eller något hänt + continue + + d["_key"] = doc["_key"] + d["_id"] = "phone/" + str(d["_key"]) + d["phone"] = doc["phone"] + d["checked_from_ip"] = 'proxyland' + try: + db.collection("phone").insert(d) + leak.delete(doc["_key"]) + except: + pass diff --git a/mrkoll/mrkoll_proxyland_requestS.py b/mrkoll/mrkoll_proxyland_requestS.py new file mode 100644 index 0000000..8c573d4 --- /dev/null +++ b/mrkoll/mrkoll_proxyland_requestS.py @@ -0,0 +1,217 @@ +import re +import requests +from requests.auth import HTTPProxyAuth +from time import sleep +from getpass import getpass +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +import socket +socket.setdefaulttimeout(20) +import ssl +ssl._create_default_https_context = ssl._create_unverified_context +from requests.sessions import Session +import urllib.request as request +from bs4 import BeautifulSoup +from arango import ArangoClient + +def find_person(number, errors): + + # Sök nummer + server = 'server.proxyland.io:9090' + password = 'znzqUZwnYucVbaMUIJhgJlNpX' + user = 'MQlbuTVPhwhOwYlyerwBLuzKI' + proxies = { + "https": f"http://{user}:{password}@{server}", + "http": f"http://{user}:{password}@{server}", + } + + proxy = (f'http://{user}:{password}@server.proxyland.io:9090') + + url = f'https://mrkoll.se/resultat?n={number}' + #url = 'ipinfo.io/?token=11eaaa2cf4c00e' + + user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" + # session = requests.Session() + # session.proxies = proxies + # session.auth = HTTPProxyAuth(user,password) + # session.trust_env = False + # session.headers = {'User-agent': user_agent} + headers = {'User-agent': user_agent} + + query = request.build_opener(request.ProxyHandler({'http': proxy})) + + req = request.Request(url, headers=headers) + + n = 0 + while True: + n += 1 + if n == 3: + return None, errors + try: + sleep(2) + response = query.open(req) + r = response.read().decode() + #r = session.get(url, verify=False, stream=True) + break + except requests.exceptions.SSLError: + print('SSLError') + sleep(3) + + soup = BeautifulSoup(r, 'html.parser') + + if ( + "Du har gjort för många anrop" in soup.text + #or r.url == "https://mrkoll.se/om/limit/" + ): + errors += 1 + return None, errors + + # Lägg in data i dictionary + d = {} + + #d["url_via_telefonnummer"] = r.url + try: + for a in soup.find_all("a", href=True): + if "boende-med-" in a["href"]: + d["lives_with_url"] = a["href"] + if "-hushall" in a["href"]: + d["lives_with"] = a.text + except: + pass + + if "Sökningen gav 0 träffar..." in soup.text: + return {}, errors + + info = soup.find("div", {"class": "block_col1"}) + + try: + d["first_name"] = info.find( + "span", {"title": "Detta är personens tilltalsnamn"} + ).text + except: + pass + try: + d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text + except: + pass + try: + d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text + except: + pass + try: + adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) + d["adress_line1"] = adress[0].text + if len(adress) > 1: + d["adress_line2"] = adress[1].text + except: + pass + + try: + d["history"] = info.find("div", {"class": "history_container"}).text + except: + pass + + # Personnummer + ## Födelsedatum + for i in soup.find_all("div", {"class": "col_block1"}): + if "Personnummer" in i.text: + d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( + "-XXXX", "" + ) + ## Fyra sista + try: + start = "showPersnr" + end = ">Jag godkänner" + t = str(soup) + v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") + url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] + sleep(2) # Vänta lite + four_last = requests.get("http://mrkoll.se" + url_ajax).text + d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) + except: + pass + + try: + neighbours = {} + for div in soup.find_all("div", {"class": "peoplecont"}): + persons = div.find_all("a", href=True) + for person in persons: + neighbours[person.find("strong").text] = { + "link": person["href"], + "lived_years": re.search( + "\d+", person.find("span", {"class": "flyttclass"}).text + ).group()[0], + } + d["neighbours"] = neighbours + except: + pass + + try: + d["name_change"] = [ + div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) + ] + except: + pass + + try: + prosecuted = {} + prosecuted["brottsmål"] = ( + True if soup.find("div", {"class": "resmark res_b"}) != None else False + ) + prosecuted["tvistemål"] = ( + True if soup.find("div", {"class": "resmark res_t"}) != None else False + ) + prosecuted["straffföreläggande"] = ( + True if soup.find("div", {"class": "resmark res_s"}) != None else False + ) + d["prosecuted"] = prosecuted + except: + pass + + return d, errors + + +if __name__ == "__main__": + + # proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' } + + # Info för arangodb + user_arango = "Phone" + db_arango = "facebook" + host_arango = "http://192.168.0.4:8529" + + # Starta koppling till arangodb + # Avkryptera lösen till arango + pwd = getpass('Arangolösenord för Phone:').strip() + + db = ArangoClient(hosts=host_arango).db( + db_arango, username=user_arango, password=pwd + ) + leak = db.collection("phoneleak") + + count = 0 + scraper_count = 0 + + global errors + errors = 0 + + while True: + count += 1 + + # Hämta en random person + doc = leak.random() + + # Gör sökningen på mrkoll.se + d, errors = find_person(doc["phone"], errors) + print(f'{count} - {errors}', end="\r") + sleep(2) + + if d == None: # Om ip-adressen är blockad + continue + + d["_key"] = doc["_key"] + d["_id"] = "phone/" + str(d["_key"]) + d["phone"] = doc["phone"] + d["checked_from_ip"] = 'proxyland' + db.collection("phone").insert(d) + leak.delete(doc["_key"]) diff --git a/mrkoll/proxyland.py b/mrkoll/proxyland.py new file mode 100644 index 0000000..6c4b807 --- /dev/null +++ b/mrkoll/proxyland.py @@ -0,0 +1,36 @@ +#import urllib.request as request + + +# url = 'https://ipinfo.io' +# username = 'MQlbuTVPhwhOwYlyerwBLuzKI' +# password = 'znzqUZwnYucVbaMUIJhgJlNpX' + +# proxy = (f'http://{username}:{password}@server.proxyland.io:9090') + +# query = request.build_opener(request.ProxyHandler({ 'http': proxy })) + +# print(query.open(url).read()) + + +# url = "http://api.ipify.org/" + +# r = requests.get(url, proxies=proxy) + +# print(r.text) + +# exit() + +mode ='few' +print(any([mode == "few", mode == "solo"])) +exit() +proxy_resp = requests.get('https://server.proxyland.io') +print(proxy_resp.headers['Proxy-Authenticate']) + + +server = 'server.proxyland.io:9090' +pwd = 'znzqUZwnYucVbaMUIJhgJlNpX' +user = 'MQlbuTVPhwhOwYlyerwBLuzKI' +proxies = { + "https": f"http://{user}:{pwd}@{server}", + "http": f"https://{user}:{pwd}@{server}", + } diff --git a/mrkoll/proxytest.py b/mrkoll/proxytest.py new file mode 100644 index 0000000..872d0ca --- /dev/null +++ b/mrkoll/proxytest.py @@ -0,0 +1,60 @@ +from time import sleep +import urllib.request as request + +server = "server.proxyland.io:9090" +user = "MQlbuTVPhwhOwYlyerwBLuzKI" +password = "znzqUZwnYucVbaMUIJhgJlNpX" +proxy = f"http://{user}:{password}@{server}" + +query = request.build_opener(request.ProxyHandler({"https": proxy})) + +user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" +headers = {"User-agent": user_agent} +url = "https://api.ipify.org" + +count = 0 +success = 0 +fail = 0 + +while True: + count += 1 + if count == 40: + break + + try: + req = request.Request(url, headers=headers) + response = query.open(req) + r = response.read().decode() + + success += 1 + + except Exception as e: + r = str(e) + fail += 1 + + print(f"Success: {success} Fail {fail} ({r})") + sleep(3) + + + +exit() + +from time import sleep +import urllib.request as request + +server = "server.proxyland.io:9090" +user = "MQlbuTVPhwhOwYlyerwBLuzKI" +password = "znzqUZwnYucVbaMUIJhgJlNpX" +proxy = f"http://{user}:{password}@{server}" + +query = request.build_opener(request.ProxyHandler({"https": proxy})) + +user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" +headers = {"User-agent": user_agent} +url = "https://api.ipify.org" + +req = request.Request(url, headers=headers) +response = query.open(req) +r = response.read().decode() + + diff --git a/mrkoll/restart.py b/mrkoll/restart.py new file mode 100644 index 0000000..78537a3 --- /dev/null +++ b/mrkoll/restart.py @@ -0,0 +1,76 @@ +def restart_modem(): + import requests + import subprocess + from selenium import webdriver + from selenium.webdriver.chrome.options import Options + #from chromedriver_py import binary_path + from time import sleep + + + import subprocess + + # # determine desired usb device + + # # to disable + # subprocess.run(['echo', '0', '>' '/sys/bus/usb/devices/usbX/power/autosuspend_delay_ms']) + # subprocess.run(['echo', 'auto', '>' '/sys/bus/usb/devices/usbX/power/control']) + # # to enable + + # #subprocess.run(['echo', 'on', '>' '/sys/bus/usb/devices/usbX/power/control']) + + + print("Nuvarande ip:", requests.get("https://api.ipify.org").text, '\nStartar om modem...') + + # Set up selenium browser + options = Options() + options.headless = True + browser = webdriver.Chrome(executable_path='/home/pi/chromedriver.apk', options=options) + + # Login to modem + browser.get("http://192.168.100.1/cellweb/login.asp") + sleep(3) + username = browser.find_element_by_id("user_name") + password = browser.find_element_by_id("user_password") + username.send_keys("admin") + password.send_keys("1340asde") + + # Go to reboot and accept + browser.find_element_by_xpath("/html/body/section/form/button").click() # Login + sleep(1) + browser.find_element_by_xpath("/html/body/section/div[2]/div[6]/a").click() # More + sleep(1) + browser.find_element_by_xpath( + "/html/body/section[2]/div/div[2]/div/a" + ).click() # Reboot + sleep(1) + browser.find_element_by_xpath( + "/html/body/div[4]/div/div/div[2]/div[2]" + ).click() # Accept + sleep(1) + browser.switch_to_alert().accept() # Accept again (alert) + browser.close() + + # Vänta på att modemet ska starta om + for i in range(180, 0, -1): + print(i, end='\r') + while True: + # Se till så att Raspberry ansluter till wifi (Hallon) + process = subprocess.run(["sudo", "systemctl", "daemon-reload"]) + sleep(5) + process = subprocess.run(["sudo", "systemctl", "restart", "dhcpcd"]) + sleep(10) + + # Kontrollera nätverksanamn + process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE) + wlan = process.communicate()[0].decode() + if "4G-UFI-5671" in wlan: + ip = requests.get("https://api.ipify.org").text + print("Ny ip:", ip) + return ip + else: + print('Testar snart igen...') + for i in range(30, 0, -1): + print(i, end='\r') + + +restart_modem() \ No newline at end of file diff --git a/mrkoll/test_pi.py b/mrkoll/test_pi.py new file mode 100644 index 0000000..4d74137 --- /dev/null +++ b/mrkoll/test_pi.py @@ -0,0 +1,9 @@ +import subprocess +import requests +from time import sleep + +print("Nuvarande ip:", requests.get("https://api.ipify.org").text, '\nStartar om modem...') +sleep(5) +subprocess.run(['sudo', 'uhubctl', '-l', '1-1', '-a', 'cycle', '-d', '5', '-p', '3']) +sleep(400) +print("Nuvarande ip:", requests.get("https://api.ipify.org").text, '\nStartar om modem...') \ No newline at end of file diff --git a/mrkoll/testmongo.py b/mrkoll/testmongo.py new file mode 100644 index 0000000..315b036 --- /dev/null +++ b/mrkoll/testmongo.py @@ -0,0 +1,12 @@ +import pymongo +myclient = pymongo.MongoClient("192.168.0.10:27017") +mydb = myclient["mydatabase"] +mycol = mydb["customers"] +mydict = { "name": "John", "address": "Highway 37" } +mycol.insert_one(mydict) +x = mycol.find({}) +for i in x: + print(i) + + +rootarango = '88ccbf65a4e8865ce7da86f72a5fc3cf21e805e0137213214c95a3eb2e8e9817' \ No newline at end of file