You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

323 lines
9.5 KiB

import re
import requests
from datetime import datetime
from random import randint
from time import sleep
from getpass import getpass
import socket
import cloudscraper
socket.setdefaulttimeout(20)
from arango import ArangoClient
import werkzeug
import subprocess
werkzeug.cached_property = werkzeug.utils.cached_property
from robobrowser import RoboBrowser
class Scraper:
def __init__(self):
self.start_browser()
self.ip = ''
def start_browser(self):
""" Start at browser. """
session = requests.Session()
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
self.browser = RoboBrowser(
session=session, user_agent=user_agent, history=True, parser="lxml"
)
sleep(2)
self.open("https://mrkoll.se/")
def open(self, url):
""" Open a URL, restart modem if blocked. """
while True:
self.browser.open(url)
if (
"Du har gjort för många anrop" in self.browser.parsed.text
or self.browser.state.url == "https://mrkoll.se/om/limit/"
):
self.restart()
else:
break
def restart(self):
""" Restart the modem. """
# Sätt in ett doc med info om blockerad IP.
db.collection("phone").insert({'_key': datetime.now().strftime("%Y%m%d%H%M"), 'blocked': requests.get("http://api.ipify.org/").text})
# Starta om modemet.
self.ip = restart_modem()
self.browser = self.start_browser(self)
def viewing(self):
"""Returnerar browser i html-format"""
return self.browser.parsed
def restart_modem():
import requests
import subprocess
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep
print('\nStartar om modem...\n')
while True:
try:
# Set up selenium browser
options = Options()
options.headless = True
browser = webdriver.Chrome(options=options)
# Login to modem
browser.get("http://192.168.100.1/cellweb/login.asp")
sleep(3)
username = browser.find_element_by_id("user_name")
password = browser.find_element_by_id("user_password")
username.send_keys("admin")
password.send_keys("1340asde")
# Go to reboot and accept
browser.find_element_by_xpath("/html/body/section/form/button").click() # Login
sleep(3)
browser.find_element_by_xpath("/html/body/section/div[2]/div[6]/a").click() # More
sleep(3)
browser.find_element_by_xpath(
"/html/body/section[2]/div/div[2]/div/a"
).click() # Reboot
sleep(3)
browser.find_element_by_xpath(
"/html/body/div[4]/div/div/div[2]/div[2]"
).click() # Accept
sleep(3)
browser.switch_to.alert.accept() # Accept again (alert)
browser.close()
break
except Exception as e: # Om det inte funkar att restarta modemet så slå av strömen.
#subprocess.run(['sudo', 'uhubctl', '-l', '1-1', '-a', 'cycle', '-d', '5', '-p', '3']) # Om modemet är kopplat till USB på RBP.
print('Kunde inte starta om.')
print(e)
sleep(120)
# Vänta på att modemet ska starta om
for i in range(180, 16, -1):
print(i, ' ', end='\r')
sleep(1)
while True:
# Se till så att Raspberry ansluter till wifi (Hallon)
process = subprocess.run(["sudo", "systemctl", "daemon-reload"])
for i in range(15, 10, -1):
print(i, ' ', end='\r')
sleep(1)
process = subprocess.run(["sudo", "systemctl", "restart", "dhcpcd"])
for i in range(10, 0, -1):
print(i, ' ', end='\r')
sleep(1)
# Kontrollera nätverksanamn
process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE)
wlan = process.communicate()[0].decode()
if "4G-UFI-5671" in wlan:
ip = requests.get("https://api.ipify.org").text
sleep(5)
print("Ny ip:", ip)
if ip != '98.128.174.224':
return ip
else:
print('Testar snart igen...')
def find_person(number, scraper):
d = {}
# Kolla så att vi är på rätt plats
if scraper.browser.state.url != "https://mrkoll.se/":
scraper.browser.back()
sleep(randint(2, 3))
form = scraper.browser.get_form(action="requestSearch/")
form["n"].value = number
sleep(randint(2, 3))
while True:
try:
scraper.browser.submit_form(form)
break
except:
restart_modem()
soup = scraper.viewing()
d["url_via_telefonnummer"] = scraper.browser.state.url
try:
for a in scraper.viewing().find_all("a", href=True):
if "boende-med-" in a["href"]:
d["lives_with_url"] = a["href"]
if "-hushall" in a["href"]:
d["lives_with"] = a.text
except:
pass
if "Sökningen gav 0 träffar..." in soup.text:
return {}
info = soup.find("div", {"class": "block_col1"})
try:
d["first_name"] = info.find(
"span", {"title": "Detta är personens tilltalsnamn"}
).text
except:
pass
try:
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text
except:
pass
try:
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text
except:
pass
try:
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"})
d["adress_line1"] = adress[0].text
if len(adress) > 1:
d["adress_line2"] = adress[1].text
except:
pass
try:
d["history"] = info.find("div", {"class": "history_container"}).text
except:
pass
# Personnummer
## Födelsedatum
for i in soup.find_all("div", {"class": "col_block1"}):
if "Personnummer" in i.text:
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace(
"-XXXX", ""
)
## Fyra sista
try:
start = "showPersnr"
end = ">Jag godkänner</span>"
t = str(soup)
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",")
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1]
sleep(2) # Vänta lite
four_last = requests.get("http://mrkoll.se" + url_ajax).text
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last)
except:
pass
try:
neighbours = {}
for div in soup.find_all("div", {"class": "peoplecont"}):
persons = div.find_all("a", href=True)
for person in persons:
neighbours[person.find("strong").text] = {
"link": person["href"],
"lived_years": re.search(
"\d+", person.find("span", {"class": "flyttclass"}).text
).group()[0],
}
d["neighbours"] = neighbours
except:
pass
try:
d["name_change"] = [
div.text.strip() for div in soup.find_all("div", {"class": "name_change"})
]
except:
pass
try:
prosecuted = {}
prosecuted["brottsmål"] = (
True if soup.find("div", {"class": "resmark res_b"}) != None else False
)
prosecuted["tvistemål"] = (
True if soup.find("div", {"class": "resmark res_t"}) != None else False
)
prosecuted["straffföreläggande"] = (
True if soup.find("div", {"class": "resmark res_s"}) != None else False
)
d["prosecuted"] = prosecuted
except:
pass
return d
if __name__ == "__main__":
# proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' }
# Info för arangodb
user_arango = "Phone"
db_arango = "facebook"
host_arango = "http://192.168.0.4:8529"
# Starta koppling till arangodb
# Avkryptera lösen till arango
pwd = getpass('Arangolösenord för Phone:').strip()
db = ArangoClient(hosts=host_arango).db(
db_arango, username=user_arango, password=pwd
)
leak = db.collection("phoneleak")
# Kontrollera nätverksanamn
process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE)
wlan = process.communicate()[0].decode()
while True:
if "4G-UFI-5671" in wlan:
ip = requests.get("https://api.ipify.org").text
print("IP:", ip)
break
else:
print('Testar snart igen...')
for i in range(30, 0, -1):
print(i, end='\r')
scraper = Scraper()
scraper.ip = ip
scraper.open("https://mrkoll.se/")
count = 0
scraper_count = 0
while True:
count += 1
print(count, end="\r")
doc = leak.random()
leak.delete(doc["_key"])
# Gör sökningen på mrkoll.se
d = find_person(doc["phone"], scraper)
sleep(2)
d["_key"] = doc["_key"]
d["_id"] = "phone/" + str(d["_key"])
d["phone"] = doc["phone"]
d["checked_from_ip"] = scraper.ip
db.collection("phone").insert(d)