You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
323 lines
9.5 KiB
323 lines
9.5 KiB
import re |
|
import requests |
|
from datetime import datetime |
|
from random import randint |
|
from time import sleep |
|
from getpass import getpass |
|
import socket |
|
import cloudscraper |
|
|
|
socket.setdefaulttimeout(20) |
|
|
|
from arango import ArangoClient |
|
import werkzeug |
|
|
|
import subprocess |
|
|
|
werkzeug.cached_property = werkzeug.utils.cached_property |
|
from robobrowser import RoboBrowser |
|
|
|
class Scraper: |
|
def __init__(self): |
|
self.start_browser() |
|
self.ip = '' |
|
|
|
def start_browser(self): |
|
""" Start at browser. """ |
|
session = requests.Session() |
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" |
|
self.browser = RoboBrowser( |
|
session=session, user_agent=user_agent, history=True, parser="lxml" |
|
) |
|
sleep(2) |
|
|
|
self.open("https://mrkoll.se/") |
|
|
|
def open(self, url): |
|
""" Open a URL, restart modem if blocked. """ |
|
while True: |
|
self.browser.open(url) |
|
|
|
if ( |
|
"Du har gjort för många anrop" in self.browser.parsed.text |
|
or self.browser.state.url == "https://mrkoll.se/om/limit/" |
|
): |
|
self.restart() |
|
else: |
|
break |
|
|
|
def restart(self): |
|
""" Restart the modem. """ |
|
|
|
# Sätt in ett doc med info om blockerad IP. |
|
db.collection("phone").insert({'_key': datetime.now().strftime("%Y%m%d%H%M"), 'blocked': requests.get("http://api.ipify.org/").text}) |
|
|
|
# Starta om modemet. |
|
|
|
self.ip = restart_modem() |
|
|
|
self.browser = self.start_browser(self) |
|
|
|
def viewing(self): |
|
"""Returnerar browser i html-format""" |
|
return self.browser.parsed |
|
|
|
|
|
def restart_modem(): |
|
|
|
import requests |
|
import subprocess |
|
from selenium import webdriver |
|
from selenium.webdriver.chrome.options import Options |
|
from time import sleep |
|
|
|
print('\nStartar om modem...\n') |
|
|
|
while True: |
|
try: |
|
# Set up selenium browser |
|
options = Options() |
|
options.headless = True |
|
browser = webdriver.Chrome(options=options) |
|
|
|
# Login to modem |
|
browser.get("http://192.168.100.1/cellweb/login.asp") |
|
sleep(3) |
|
username = browser.find_element_by_id("user_name") |
|
password = browser.find_element_by_id("user_password") |
|
username.send_keys("admin") |
|
password.send_keys("1340asde") |
|
|
|
# Go to reboot and accept |
|
browser.find_element_by_xpath("/html/body/section/form/button").click() # Login |
|
sleep(3) |
|
browser.find_element_by_xpath("/html/body/section/div[2]/div[6]/a").click() # More |
|
sleep(3) |
|
browser.find_element_by_xpath( |
|
"/html/body/section[2]/div/div[2]/div/a" |
|
).click() # Reboot |
|
sleep(3) |
|
browser.find_element_by_xpath( |
|
"/html/body/div[4]/div/div/div[2]/div[2]" |
|
).click() # Accept |
|
sleep(3) |
|
browser.switch_to.alert.accept() # Accept again (alert) |
|
browser.close() |
|
break |
|
|
|
except Exception as e: # Om det inte funkar att restarta modemet så slå av strömen. |
|
#subprocess.run(['sudo', 'uhubctl', '-l', '1-1', '-a', 'cycle', '-d', '5', '-p', '3']) # Om modemet är kopplat till USB på RBP. |
|
print('Kunde inte starta om.') |
|
print(e) |
|
sleep(120) |
|
|
|
# Vänta på att modemet ska starta om |
|
for i in range(180, 16, -1): |
|
print(i, ' ', end='\r') |
|
sleep(1) |
|
while True: |
|
# Se till så att Raspberry ansluter till wifi (Hallon) |
|
process = subprocess.run(["sudo", "systemctl", "daemon-reload"]) |
|
for i in range(15, 10, -1): |
|
print(i, ' ', end='\r') |
|
sleep(1) |
|
process = subprocess.run(["sudo", "systemctl", "restart", "dhcpcd"]) |
|
for i in range(10, 0, -1): |
|
print(i, ' ', end='\r') |
|
sleep(1) |
|
|
|
# Kontrollera nätverksanamn |
|
process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE) |
|
wlan = process.communicate()[0].decode() |
|
if "4G-UFI-5671" in wlan: |
|
ip = requests.get("https://api.ipify.org").text |
|
sleep(5) |
|
print("Ny ip:", ip) |
|
if ip != '98.128.174.224': |
|
return ip |
|
|
|
else: |
|
print('Testar snart igen...') |
|
|
|
|
|
|
|
def find_person(number, scraper): |
|
|
|
d = {} |
|
|
|
# Kolla så att vi är på rätt plats |
|
if scraper.browser.state.url != "https://mrkoll.se/": |
|
scraper.browser.back() |
|
sleep(randint(2, 3)) |
|
|
|
form = scraper.browser.get_form(action="requestSearch/") |
|
form["n"].value = number |
|
sleep(randint(2, 3)) |
|
|
|
while True: |
|
try: |
|
scraper.browser.submit_form(form) |
|
break |
|
except: |
|
restart_modem() |
|
|
|
soup = scraper.viewing() |
|
|
|
d["url_via_telefonnummer"] = scraper.browser.state.url |
|
try: |
|
for a in scraper.viewing().find_all("a", href=True): |
|
if "boende-med-" in a["href"]: |
|
d["lives_with_url"] = a["href"] |
|
if "-hushall" in a["href"]: |
|
d["lives_with"] = a.text |
|
except: |
|
pass |
|
|
|
if "Sökningen gav 0 träffar..." in soup.text: |
|
return {} |
|
|
|
info = soup.find("div", {"class": "block_col1"}) |
|
|
|
try: |
|
d["first_name"] = info.find( |
|
"span", {"title": "Detta är personens tilltalsnamn"} |
|
).text |
|
except: |
|
pass |
|
try: |
|
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text |
|
except: |
|
pass |
|
try: |
|
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text |
|
except: |
|
pass |
|
try: |
|
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) |
|
d["adress_line1"] = adress[0].text |
|
if len(adress) > 1: |
|
d["adress_line2"] = adress[1].text |
|
except: |
|
pass |
|
|
|
try: |
|
d["history"] = info.find("div", {"class": "history_container"}).text |
|
except: |
|
pass |
|
|
|
# Personnummer |
|
## Födelsedatum |
|
for i in soup.find_all("div", {"class": "col_block1"}): |
|
if "Personnummer" in i.text: |
|
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( |
|
"-XXXX", "" |
|
) |
|
## Fyra sista |
|
try: |
|
start = "showPersnr" |
|
end = ">Jag godkänner</span>" |
|
t = str(soup) |
|
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") |
|
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] |
|
sleep(2) # Vänta lite |
|
four_last = requests.get("http://mrkoll.se" + url_ajax).text |
|
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) |
|
except: |
|
pass |
|
|
|
try: |
|
neighbours = {} |
|
for div in soup.find_all("div", {"class": "peoplecont"}): |
|
persons = div.find_all("a", href=True) |
|
for person in persons: |
|
neighbours[person.find("strong").text] = { |
|
"link": person["href"], |
|
"lived_years": re.search( |
|
"\d+", person.find("span", {"class": "flyttclass"}).text |
|
).group()[0], |
|
} |
|
d["neighbours"] = neighbours |
|
except: |
|
pass |
|
|
|
try: |
|
d["name_change"] = [ |
|
div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) |
|
] |
|
except: |
|
pass |
|
|
|
try: |
|
prosecuted = {} |
|
prosecuted["brottsmål"] = ( |
|
True if soup.find("div", {"class": "resmark res_b"}) != None else False |
|
) |
|
prosecuted["tvistemål"] = ( |
|
True if soup.find("div", {"class": "resmark res_t"}) != None else False |
|
) |
|
prosecuted["straffföreläggande"] = ( |
|
True if soup.find("div", {"class": "resmark res_s"}) != None else False |
|
) |
|
d["prosecuted"] = prosecuted |
|
except: |
|
pass |
|
|
|
return d |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
# proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' } |
|
|
|
# Info för arangodb |
|
user_arango = "Phone" |
|
db_arango = "facebook" |
|
host_arango = "http://192.168.0.4:8529" |
|
|
|
# Starta koppling till arangodb |
|
# Avkryptera lösen till arango |
|
pwd = getpass('Arangolösenord för Phone:').strip() |
|
|
|
db = ArangoClient(hosts=host_arango).db( |
|
db_arango, username=user_arango, password=pwd |
|
) |
|
leak = db.collection("phoneleak") |
|
|
|
# Kontrollera nätverksanamn |
|
process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE) |
|
wlan = process.communicate()[0].decode() |
|
|
|
while True: |
|
if "4G-UFI-5671" in wlan: |
|
ip = requests.get("https://api.ipify.org").text |
|
print("IP:", ip) |
|
break |
|
|
|
else: |
|
print('Testar snart igen...') |
|
for i in range(30, 0, -1): |
|
print(i, end='\r') |
|
|
|
scraper = Scraper() |
|
scraper.ip = ip |
|
scraper.open("https://mrkoll.se/") |
|
|
|
count = 0 |
|
scraper_count = 0 |
|
|
|
while True: |
|
count += 1 |
|
print(count, end="\r") |
|
doc = leak.random() |
|
leak.delete(doc["_key"]) |
|
|
|
# Gör sökningen på mrkoll.se |
|
d = find_person(doc["phone"], scraper) |
|
|
|
sleep(2) |
|
|
|
d["_key"] = doc["_key"] |
|
d["_id"] = "phone/" + str(d["_key"]) |
|
d["phone"] = doc["phone"] |
|
d["checked_from_ip"] = scraper.ip |
|
db.collection("phone").insert(d)
|
|
|