parent
22fc2580a2
commit
39bc11b1a4
11 changed files with 1406 additions and 0 deletions
@ -0,0 +1,9 @@ |
|||||||
|
import requests |
||||||
|
from urllib.parse import urlencode |
||||||
|
|
||||||
|
list_of_urls = ['https://mrkoll.se/resultat?n=46730674398'] |
||||||
|
|
||||||
|
for url in list_of_urls: |
||||||
|
params = {'api_key': 'fcfe011cf66fddb61bb6425fcb5cb5e9', 'url': url, 'country_code': 'se'} |
||||||
|
response = requests.get('http://api.scraperapi.com/', params=urlencode(params), verify=False) |
||||||
|
print(response.text) |
||||||
@ -0,0 +1,323 @@ |
|||||||
|
import re |
||||||
|
import requests |
||||||
|
from datetime import datetime |
||||||
|
from random import randint |
||||||
|
from time import sleep |
||||||
|
from getpass import getpass |
||||||
|
import socket |
||||||
|
import cloudscraper |
||||||
|
|
||||||
|
socket.setdefaulttimeout(20) |
||||||
|
|
||||||
|
from arango import ArangoClient |
||||||
|
import werkzeug |
||||||
|
|
||||||
|
import subprocess |
||||||
|
|
||||||
|
werkzeug.cached_property = werkzeug.utils.cached_property |
||||||
|
from robobrowser import RoboBrowser |
||||||
|
|
||||||
|
class Scraper: |
||||||
|
def __init__(self): |
||||||
|
self.start_browser() |
||||||
|
self.ip = '' |
||||||
|
|
||||||
|
def start_browser(self): |
||||||
|
""" Start at browser. """ |
||||||
|
session = requests.Session() |
||||||
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" |
||||||
|
self.browser = RoboBrowser( |
||||||
|
session=session, user_agent=user_agent, history=True, parser="lxml" |
||||||
|
) |
||||||
|
sleep(2) |
||||||
|
|
||||||
|
self.open("https://mrkoll.se/") |
||||||
|
|
||||||
|
def open(self, url): |
||||||
|
""" Open a URL, restart modem if blocked. """ |
||||||
|
while True: |
||||||
|
self.browser.open(url) |
||||||
|
|
||||||
|
if ( |
||||||
|
"Du har gjort för många anrop" in self.browser.parsed.text |
||||||
|
or self.browser.state.url == "https://mrkoll.se/om/limit/" |
||||||
|
): |
||||||
|
self.restart() |
||||||
|
else: |
||||||
|
break |
||||||
|
|
||||||
|
def restart(self): |
||||||
|
""" Restart the modem. """ |
||||||
|
|
||||||
|
# Sätt in ett doc med info om blockerad IP. |
||||||
|
db.collection("phone").insert({'_key': datetime.now().strftime("%Y%m%d%H%M"), 'blocked': requests.get("http://api.ipify.org/").text}) |
||||||
|
|
||||||
|
# Starta om modemet. |
||||||
|
|
||||||
|
self.ip = restart_modem() |
||||||
|
|
||||||
|
self.browser = self.start_browser(self) |
||||||
|
|
||||||
|
def viewing(self): |
||||||
|
"""Returnerar browser i html-format""" |
||||||
|
return self.browser.parsed |
||||||
|
|
||||||
|
|
||||||
|
def restart_modem(): |
||||||
|
|
||||||
|
import requests |
||||||
|
import subprocess |
||||||
|
from selenium import webdriver |
||||||
|
from selenium.webdriver.chrome.options import Options |
||||||
|
from time import sleep |
||||||
|
|
||||||
|
print('\nStartar om modem...\n') |
||||||
|
|
||||||
|
while True: |
||||||
|
try: |
||||||
|
# Set up selenium browser |
||||||
|
options = Options() |
||||||
|
options.headless = True |
||||||
|
browser = webdriver.Chrome(options=options) |
||||||
|
|
||||||
|
# Login to modem |
||||||
|
browser.get("http://192.168.100.1/cellweb/login.asp") |
||||||
|
sleep(3) |
||||||
|
username = browser.find_element_by_id("user_name") |
||||||
|
password = browser.find_element_by_id("user_password") |
||||||
|
username.send_keys("admin") |
||||||
|
password.send_keys("1340asde") |
||||||
|
|
||||||
|
# Go to reboot and accept |
||||||
|
browser.find_element_by_xpath("/html/body/section/form/button").click() # Login |
||||||
|
sleep(3) |
||||||
|
browser.find_element_by_xpath("/html/body/section/div[2]/div[6]/a").click() # More |
||||||
|
sleep(3) |
||||||
|
browser.find_element_by_xpath( |
||||||
|
"/html/body/section[2]/div/div[2]/div/a" |
||||||
|
).click() # Reboot |
||||||
|
sleep(3) |
||||||
|
browser.find_element_by_xpath( |
||||||
|
"/html/body/div[4]/div/div/div[2]/div[2]" |
||||||
|
).click() # Accept |
||||||
|
sleep(3) |
||||||
|
browser.switch_to.alert.accept() # Accept again (alert) |
||||||
|
browser.close() |
||||||
|
break |
||||||
|
|
||||||
|
except Exception as e: # Om det inte funkar att restarta modemet så slå av strömen. |
||||||
|
#subprocess.run(['sudo', 'uhubctl', '-l', '1-1', '-a', 'cycle', '-d', '5', '-p', '3']) # Om modemet är kopplat till USB på RBP. |
||||||
|
print('Kunde inte starta om.') |
||||||
|
print(e) |
||||||
|
sleep(120) |
||||||
|
|
||||||
|
# Vänta på att modemet ska starta om |
||||||
|
for i in range(180, 16, -1): |
||||||
|
print(i, ' ', end='\r') |
||||||
|
sleep(1) |
||||||
|
while True: |
||||||
|
# Se till så att Raspberry ansluter till wifi (Hallon) |
||||||
|
process = subprocess.run(["sudo", "systemctl", "daemon-reload"]) |
||||||
|
for i in range(15, 10, -1): |
||||||
|
print(i, ' ', end='\r') |
||||||
|
sleep(1) |
||||||
|
process = subprocess.run(["sudo", "systemctl", "restart", "dhcpcd"]) |
||||||
|
for i in range(10, 0, -1): |
||||||
|
print(i, ' ', end='\r') |
||||||
|
sleep(1) |
||||||
|
|
||||||
|
# Kontrollera nätverksanamn |
||||||
|
process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE) |
||||||
|
wlan = process.communicate()[0].decode() |
||||||
|
if "4G-UFI-5671" in wlan: |
||||||
|
ip = requests.get("https://api.ipify.org").text |
||||||
|
sleep(5) |
||||||
|
print("Ny ip:", ip) |
||||||
|
if ip != '98.128.174.224': |
||||||
|
return ip |
||||||
|
|
||||||
|
else: |
||||||
|
print('Testar snart igen...') |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def find_person(number, scraper): |
||||||
|
|
||||||
|
d = {} |
||||||
|
|
||||||
|
# Kolla så att vi är på rätt plats |
||||||
|
if scraper.browser.state.url != "https://mrkoll.se/": |
||||||
|
scraper.browser.back() |
||||||
|
sleep(randint(2, 3)) |
||||||
|
|
||||||
|
form = scraper.browser.get_form(action="requestSearch/") |
||||||
|
form["n"].value = number |
||||||
|
sleep(randint(2, 3)) |
||||||
|
|
||||||
|
while True: |
||||||
|
try: |
||||||
|
scraper.browser.submit_form(form) |
||||||
|
break |
||||||
|
except: |
||||||
|
restart_modem() |
||||||
|
|
||||||
|
soup = scraper.viewing() |
||||||
|
|
||||||
|
d["url_via_telefonnummer"] = scraper.browser.state.url |
||||||
|
try: |
||||||
|
for a in scraper.viewing().find_all("a", href=True): |
||||||
|
if "boende-med-" in a["href"]: |
||||||
|
d["lives_with_url"] = a["href"] |
||||||
|
if "-hushall" in a["href"]: |
||||||
|
d["lives_with"] = a.text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
if "Sökningen gav 0 träffar..." in soup.text: |
||||||
|
return {} |
||||||
|
|
||||||
|
info = soup.find("div", {"class": "block_col1"}) |
||||||
|
|
||||||
|
try: |
||||||
|
d["first_name"] = info.find( |
||||||
|
"span", {"title": "Detta är personens tilltalsnamn"} |
||||||
|
).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) |
||||||
|
d["adress_line1"] = adress[0].text |
||||||
|
if len(adress) > 1: |
||||||
|
d["adress_line2"] = adress[1].text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
d["history"] = info.find("div", {"class": "history_container"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
# Personnummer |
||||||
|
## Födelsedatum |
||||||
|
for i in soup.find_all("div", {"class": "col_block1"}): |
||||||
|
if "Personnummer" in i.text: |
||||||
|
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( |
||||||
|
"-XXXX", "" |
||||||
|
) |
||||||
|
## Fyra sista |
||||||
|
try: |
||||||
|
start = "showPersnr" |
||||||
|
end = ">Jag godkänner</span>" |
||||||
|
t = str(soup) |
||||||
|
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") |
||||||
|
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] |
||||||
|
sleep(2) # Vänta lite |
||||||
|
four_last = requests.get("http://mrkoll.se" + url_ajax).text |
||||||
|
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
neighbours = {} |
||||||
|
for div in soup.find_all("div", {"class": "peoplecont"}): |
||||||
|
persons = div.find_all("a", href=True) |
||||||
|
for person in persons: |
||||||
|
neighbours[person.find("strong").text] = { |
||||||
|
"link": person["href"], |
||||||
|
"lived_years": re.search( |
||||||
|
"\d+", person.find("span", {"class": "flyttclass"}).text |
||||||
|
).group()[0], |
||||||
|
} |
||||||
|
d["neighbours"] = neighbours |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
d["name_change"] = [ |
||||||
|
div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) |
||||||
|
] |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
prosecuted = {} |
||||||
|
prosecuted["brottsmål"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_b"}) != None else False |
||||||
|
) |
||||||
|
prosecuted["tvistemål"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_t"}) != None else False |
||||||
|
) |
||||||
|
prosecuted["straffföreläggande"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_s"}) != None else False |
||||||
|
) |
||||||
|
d["prosecuted"] = prosecuted |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
return d |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
|
||||||
|
# proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' } |
||||||
|
|
||||||
|
# Info för arangodb |
||||||
|
user_arango = "Phone" |
||||||
|
db_arango = "facebook" |
||||||
|
host_arango = "http://192.168.0.4:8529" |
||||||
|
|
||||||
|
# Starta koppling till arangodb |
||||||
|
# Avkryptera lösen till arango |
||||||
|
pwd = getpass('Arangolösenord för Phone:').strip() |
||||||
|
|
||||||
|
db = ArangoClient(hosts=host_arango).db( |
||||||
|
db_arango, username=user_arango, password=pwd |
||||||
|
) |
||||||
|
leak = db.collection("phoneleak") |
||||||
|
|
||||||
|
# Kontrollera nätverksanamn |
||||||
|
process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE) |
||||||
|
wlan = process.communicate()[0].decode() |
||||||
|
|
||||||
|
while True: |
||||||
|
if "4G-UFI-5671" in wlan: |
||||||
|
ip = requests.get("https://api.ipify.org").text |
||||||
|
print("IP:", ip) |
||||||
|
break |
||||||
|
|
||||||
|
else: |
||||||
|
print('Testar snart igen...') |
||||||
|
for i in range(30, 0, -1): |
||||||
|
print(i, end='\r') |
||||||
|
|
||||||
|
scraper = Scraper() |
||||||
|
scraper.ip = ip |
||||||
|
scraper.open("https://mrkoll.se/") |
||||||
|
|
||||||
|
count = 0 |
||||||
|
scraper_count = 0 |
||||||
|
|
||||||
|
while True: |
||||||
|
count += 1 |
||||||
|
print(count, end="\r") |
||||||
|
doc = leak.random() |
||||||
|
leak.delete(doc["_key"]) |
||||||
|
|
||||||
|
# Gör sökningen på mrkoll.se |
||||||
|
d = find_person(doc["phone"], scraper) |
||||||
|
|
||||||
|
sleep(2) |
||||||
|
|
||||||
|
d["_key"] = doc["_key"] |
||||||
|
d["_id"] = "phone/" + str(d["_key"]) |
||||||
|
d["phone"] = doc["phone"] |
||||||
|
d["checked_from_ip"] = scraper.ip |
||||||
|
db.collection("phone").insert(d) |
||||||
@ -0,0 +1,185 @@ |
|||||||
|
import re |
||||||
|
import requests |
||||||
|
from datetime import datetime |
||||||
|
from random import randint |
||||||
|
from time import sleep |
||||||
|
|
||||||
|
from pymongo import MongoClient |
||||||
|
import werkzeug |
||||||
|
werkzeug.cached_property = werkzeug.utils.cached_property |
||||||
|
from robobrowser import RoboBrowser |
||||||
|
import json |
||||||
|
|
||||||
|
|
||||||
|
class Scraper: |
||||||
|
def __init__(self): |
||||||
|
session = requests.Session() |
||||||
|
|
||||||
|
# Starta browser |
||||||
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" |
||||||
|
self.browser = RoboBrowser( |
||||||
|
session=session, user_agent=user_agent, history=True, parser="lxml" |
||||||
|
) |
||||||
|
sleep(2) |
||||||
|
self.browser.open("https://mrkoll.se/") |
||||||
|
|
||||||
|
def open(self, url): |
||||||
|
self.browser.open(url) |
||||||
|
|
||||||
|
|
||||||
|
def viewing(self): |
||||||
|
""" Returnerar browser i html-format """ |
||||||
|
return self.browser.parsed |
||||||
|
|
||||||
|
|
||||||
|
def find_person(number, scraper): |
||||||
|
|
||||||
|
d = {} |
||||||
|
if scraper.browser.state.url != "https://mrkoll.se/": |
||||||
|
scraper.browser.back() |
||||||
|
sleep(randint(2, 3)) |
||||||
|
form = scraper.browser.get_form(action="requestSearch/") |
||||||
|
form["n"].value = number |
||||||
|
sleep(randint(2, 3)) |
||||||
|
scraper.browser.submit_form(form) |
||||||
|
soup = scraper.viewing() |
||||||
|
|
||||||
|
d["url_via_telefonnummer"] = scraper.browser.state.url |
||||||
|
try: |
||||||
|
for a in scraper.viewing().find_all("a", href=True): |
||||||
|
if "boende-med-" in a["href"]: |
||||||
|
d["lives_with_url"] = a["href"] |
||||||
|
if "-hushall" in a["href"]: |
||||||
|
d["lives_with"] = a.text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
if "Sökningen gav 0 träffar..." in soup.text: |
||||||
|
return {} |
||||||
|
elif "Du har gjort för många anrop" in soup or scraper.browser.state.url == "https://mrkoll.se/om/limit/": |
||||||
|
return "blocked" |
||||||
|
|
||||||
|
info = soup.find("div", {"class": "block_col1"}) |
||||||
|
|
||||||
|
try: |
||||||
|
d["first_name"] = info.find( |
||||||
|
"span", {"title": "Detta är personens tilltalsnamn"} |
||||||
|
).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) |
||||||
|
d["adress_line1"] = adress[0].text |
||||||
|
if len(adress) > 1: |
||||||
|
d["adress_line2"] = adress[1].text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
d["history"] = info.find("div", {"class": "history_container"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
# Personnummer |
||||||
|
## Födelsedatum |
||||||
|
for i in soup.find_all("div", {"class": "col_block1"}): |
||||||
|
if "Personnummer" in i.text: |
||||||
|
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( |
||||||
|
"-XXXX", "" |
||||||
|
) |
||||||
|
## Fyra sista |
||||||
|
try: |
||||||
|
start = "showPersnr" |
||||||
|
end = ">Jag godkänner</span>" |
||||||
|
t = str(soup) |
||||||
|
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") |
||||||
|
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] |
||||||
|
sleep(2) # Vänta lite |
||||||
|
four_last = requests.get("http://mrkoll.se" + url_ajax).text |
||||||
|
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
neighbours = {} |
||||||
|
for div in soup.find_all("div", {"class": "peoplecont"}): |
||||||
|
persons = div.find_all("a", href=True) |
||||||
|
for person in persons: |
||||||
|
neighbours[person.find("strong").text] = { |
||||||
|
"link": person["href"], |
||||||
|
"lived_years": re.search( |
||||||
|
"\d+", person.find("span", {"class": "flyttclass"}).text |
||||||
|
).group()[0], |
||||||
|
} |
||||||
|
d['neighbours'] = neighbours |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
d['name_change'] = [div.text.strip() for div in soup.find_all('div', {'class':"name_change"})] |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
prosecuted = {} |
||||||
|
prosecuted['brottsmål'] = True if soup.find('div', {'class': 'resmark res_b'}) != None else False |
||||||
|
prosecuted['tvistemål'] = True if soup.find('div', {'class': 'resmark res_t'}) != None else False |
||||||
|
prosecuted['straffföreläggande'] = True if soup.find('div', {'class': 'resmark res_s'}) != None else False |
||||||
|
d['prosecuted'] = prosecuted |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
return d |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
client = MongoClient('mongodb://localhost:27017') |
||||||
|
db_client = client['phone_db'] |
||||||
|
db = db_client['phone'] |
||||||
|
|
||||||
|
|
||||||
|
leak = db_client['leak'] |
||||||
|
print('Nummer kvar att kolla:', leak.count_documents({})) |
||||||
|
|
||||||
|
|
||||||
|
scraper = Scraper() |
||||||
|
count = 0 |
||||||
|
scraper_count = 0 |
||||||
|
while True: |
||||||
|
count += 1 |
||||||
|
print(count, end="\r") |
||||||
|
doc = leak.find_one() |
||||||
|
leak.delete_one(doc) |
||||||
|
d = find_person(doc["phone"], scraper) |
||||||
|
# cursor = leak.aggregate([{'$sample': {'size': leak.estimated_document_count()}}], allowDiskUse=True) |
||||||
|
# for doc in cursor: |
||||||
|
# print(doc['phone']) |
||||||
|
# # Kolla om numret är kollat |
||||||
|
# q = { "phone": doc['phone'] } |
||||||
|
# if len(list(db.find(q))) == 0: |
||||||
|
# d = find_person(doc["phone"], scraper) |
||||||
|
# continue |
||||||
|
|
||||||
|
if datetime.now().strftime("%H") == '01': |
||||||
|
sleep(18000) |
||||||
|
|
||||||
|
sleep(10) |
||||||
|
if d == "blocked": |
||||||
|
client.close() |
||||||
|
print(doc) |
||||||
|
print(count, 'blocked') |
||||||
|
exit() |
||||||
|
|
||||||
|
d["_key"] = doc["_key"] |
||||||
|
d["_id"] = 'phone/' + str(d["_key"]) |
||||||
|
d["phone"] = doc["phone"] |
||||||
|
db.insert_one(d) |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,215 @@ |
|||||||
|
import re |
||||||
|
import requests |
||||||
|
import json |
||||||
|
from requests.auth import HTTPProxyAuth |
||||||
|
from time import sleep |
||||||
|
from getpass import getpass |
||||||
|
import urllib3 |
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
||||||
|
import socket |
||||||
|
socket.setdefaulttimeout(20) |
||||||
|
import ssl |
||||||
|
ssl._create_default_https_context = ssl._create_unverified_context |
||||||
|
import urllib |
||||||
|
import urllib.request as request |
||||||
|
from bs4 import BeautifulSoup |
||||||
|
from arango import ArangoClient |
||||||
|
|
||||||
|
def find_person(number, errors, server): |
||||||
|
|
||||||
|
password = 'T8ARbTg6qY' |
||||||
|
user = 'edfast' |
||||||
|
ip = server['ip'] |
||||||
|
|
||||||
|
proxy = (f'http://{user}:{password}@{ip}:6000') |
||||||
|
|
||||||
|
url = f'https://mrkoll.se/resultat?n={number}' |
||||||
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" |
||||||
|
headers = {'User-agent': user_agent} |
||||||
|
query = request.build_opener(request.ProxyHandler({'http': proxy})) |
||||||
|
req = request.Request(url, headers=headers) |
||||||
|
|
||||||
|
n = 0 |
||||||
|
|
||||||
|
n += 1 |
||||||
|
if n == 3: |
||||||
|
return None, errors |
||||||
|
try: |
||||||
|
sleep(2) |
||||||
|
response = query.open(req) |
||||||
|
r = response.read().decode() |
||||||
|
|
||||||
|
except (urllib.error.HTTPError, socket.timeout) as e: |
||||||
|
print(e) |
||||||
|
sleep(2) |
||||||
|
errors += 1 |
||||||
|
return None, errors |
||||||
|
|
||||||
|
soup = BeautifulSoup(r, 'html.parser') |
||||||
|
|
||||||
|
if ( |
||||||
|
"Du har gjort för många anrop" in soup.text |
||||||
|
or response.geturl() == "https://mrkoll.se/om/limit/" # TODO Hur får man url från r med urllib3? |
||||||
|
): |
||||||
|
errors += 1 |
||||||
|
return None, errors |
||||||
|
|
||||||
|
# Lägg in data i dictionary |
||||||
|
d = {} |
||||||
|
|
||||||
|
d["url_via_telefonnummer"] = response.geturl() |
||||||
|
try: |
||||||
|
for a in soup.find_all("a", href=True): |
||||||
|
if "boende-med-" in a["href"]: |
||||||
|
d["lives_with_url"] = a["href"] |
||||||
|
if "-hushall" in a["href"]: |
||||||
|
d["lives_with"] = a.text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
if "Sökningen gav 0 träffar..." in soup.text: |
||||||
|
return {}, errors |
||||||
|
|
||||||
|
info = soup.find("div", {"class": "block_col1"}) |
||||||
|
|
||||||
|
try: |
||||||
|
d["first_name"] = info.find( |
||||||
|
"span", {"title": "Detta är personens tilltalsnamn"} |
||||||
|
).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) |
||||||
|
d["adress_line1"] = adress[0].text |
||||||
|
if len(adress) > 1: |
||||||
|
d["adress_line2"] = adress[1].text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
d["history"] = info.find("div", {"class": "history_container"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
# Personnummer |
||||||
|
## Födelsedatum |
||||||
|
for i in soup.find_all("div", {"class": "col_block1"}): |
||||||
|
if "Personnummer" in i.text: |
||||||
|
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( |
||||||
|
"-XXXX", "" |
||||||
|
) |
||||||
|
## Fyra sista |
||||||
|
try: |
||||||
|
start = "showPersnr" |
||||||
|
end = ">Jag godkänner</span>" |
||||||
|
t = str(soup) |
||||||
|
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") |
||||||
|
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] |
||||||
|
sleep(2) # Vänta lite |
||||||
|
four_last = requests.get("http://mrkoll.se" + url_ajax).text |
||||||
|
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
neighbours = {} |
||||||
|
for div in soup.find_all("div", {"class": "peoplecont"}): |
||||||
|
persons = div.find_all("a", href=True) |
||||||
|
for person in persons: |
||||||
|
neighbours[person.find("strong").text] = { |
||||||
|
"link": person["href"], |
||||||
|
"lived_years": re.search( |
||||||
|
"\d+", person.find("span", {"class": "flyttclass"}).text |
||||||
|
).group()[0], |
||||||
|
} |
||||||
|
d["neighbours"] = neighbours |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
d["name_change"] = [ |
||||||
|
div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) |
||||||
|
] |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
prosecuted = {} |
||||||
|
prosecuted["brottsmål"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_b"}) != None else False |
||||||
|
) |
||||||
|
prosecuted["tvistemål"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_t"}) != None else False |
||||||
|
) |
||||||
|
prosecuted["straffföreläggande"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_s"}) != None else False |
||||||
|
) |
||||||
|
d["prosecuted"] = prosecuted |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
return d, errors |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
|
||||||
|
# proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' } |
||||||
|
|
||||||
|
servers_json = json.load(open('oxylabs_servers_trail.json')) |
||||||
|
|
||||||
|
|
||||||
|
# Info för arangodb |
||||||
|
user_arango = "Phone" |
||||||
|
db_arango = "facebook" |
||||||
|
host_arango = "http://192.168.1.20:8529" |
||||||
|
|
||||||
|
# Starta koppling till arangodb |
||||||
|
# Avkryptera lösen till arango |
||||||
|
pwd = getpass('Arangolösenord för Phone:').strip() |
||||||
|
|
||||||
|
db = ArangoClient(hosts=host_arango).db( |
||||||
|
db_arango, username=user_arango, password=pwd |
||||||
|
) |
||||||
|
leak = db.collection("phoneleak") |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
count = 0 |
||||||
|
scraper_count = 0 |
||||||
|
|
||||||
|
global errors |
||||||
|
errors = 0 |
||||||
|
|
||||||
|
while True: |
||||||
|
for server in servers_json: |
||||||
|
count += 1 |
||||||
|
|
||||||
|
# Hämta en random person |
||||||
|
doc = leak.random() |
||||||
|
|
||||||
|
# Gör sökningen på mrkoll.se |
||||||
|
d, errors = find_person(doc["phone"], errors, server) |
||||||
|
print(f'{count} - {errors}', end="\r") |
||||||
|
sleep(2) |
||||||
|
|
||||||
|
if d == None: # Om ip-adressen är blockad eller något hänt |
||||||
|
continue |
||||||
|
|
||||||
|
d["_key"] = doc["_key"] |
||||||
|
d["_id"] = "phone/" + str(d["_key"]) |
||||||
|
d["phone"] = doc["phone"] |
||||||
|
d["checked_from_ip"] = 'oxylabs' |
||||||
|
try: |
||||||
|
db.collection("phone").insert(d) |
||||||
|
leak.delete(doc["_key"]) |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
@ -0,0 +1,264 @@ |
|||||||
|
import re |
||||||
|
import requests |
||||||
|
from requests.auth import HTTPProxyAuth |
||||||
|
from time import sleep |
||||||
|
from getpass import getpass |
||||||
|
import urllib3 |
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
||||||
|
import socket |
||||||
|
socket.setdefaulttimeout(20) |
||||||
|
import ssl |
||||||
|
ssl._create_default_https_context = ssl._create_unverified_context |
||||||
|
import urllib |
||||||
|
import urllib.request as request |
||||||
|
from bs4 import BeautifulSoup |
||||||
|
from arango import ArangoClient |
||||||
|
|
||||||
|
def find_person(number, errors): |
||||||
|
print(number) |
||||||
|
# Sök nummer |
||||||
|
server = 'server.proxyland.io:9090' |
||||||
|
password = 'znzqUZwnYucVbaMUIJhgJlNpX' |
||||||
|
user = 'MQlbuTVPhwhOwYlyerwBLuzKI' |
||||||
|
proxies = { |
||||||
|
"https": f"http://{user}:{password}@{server}", |
||||||
|
"http": f"http://{user}:{password}@{server}", |
||||||
|
} |
||||||
|
session = requests.Session(proxies=proxies) |
||||||
|
url = 'https://api.ipify.org' |
||||||
|
print(session.get(url)) |
||||||
|
|
||||||
|
proxy = (f'http://{user}:{password}@{server}') |
||||||
|
|
||||||
|
url = f'https://mrkoll.se/resultat?n={number}' |
||||||
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" |
||||||
|
headers = {'User-agent': user_agent} |
||||||
|
query = request.build_opener(request.ProxyHandler({'http': proxy})) |
||||||
|
print(url) |
||||||
|
url = 'https://api.ipify.org' |
||||||
|
req = request.Request(url, headers=headers) |
||||||
|
|
||||||
|
n = 0 |
||||||
|
|
||||||
|
n += 1 |
||||||
|
if n == 3: |
||||||
|
return None, errors |
||||||
|
try: |
||||||
|
sleep(2) |
||||||
|
response = query.open(req) |
||||||
|
r = response.read().decode() |
||||||
|
|
||||||
|
except (urllib.error.HTTPError, socket.timeout) as e: |
||||||
|
print(e) |
||||||
|
sleep(2) |
||||||
|
errors += 1 |
||||||
|
return None, errors |
||||||
|
|
||||||
|
soup = BeautifulSoup(r, 'html.parser') |
||||||
|
print(r) |
||||||
|
with open('html_out.html', 'w') as html: |
||||||
|
html.write(str(soup)) |
||||||
|
exit() |
||||||
|
|
||||||
|
if ( |
||||||
|
"Du har gjort för många anrop" in soup.text |
||||||
|
or response.geturl() == "https://mrkoll.se/om/limit/" # TODO Hur får man url från r med urllib3? |
||||||
|
): |
||||||
|
errors += 1 |
||||||
|
return None, errors |
||||||
|
|
||||||
|
# Lägg in data i dictionary |
||||||
|
d = {} |
||||||
|
|
||||||
|
d["url_via_telefonnummer"] = response.geturl() |
||||||
|
try: |
||||||
|
for a in soup.find_all("a", href=True): |
||||||
|
if "boende-med-" in a["href"]: |
||||||
|
d["lives_with_url"] = a["href"] |
||||||
|
if "-hushall" in a["href"]: |
||||||
|
d["lives_with"] = a.text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
if "Sökningen gav 0 träffar..." in soup.text: |
||||||
|
return {}, errors |
||||||
|
|
||||||
|
info = soup.find("div", {"class": "block_col1"}) |
||||||
|
|
||||||
|
try: |
||||||
|
d["first_name"] = info.find( |
||||||
|
"span", {"title": "Detta är personens tilltalsnamn"} |
||||||
|
).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) |
||||||
|
d["adress_line1"] = adress[0].text |
||||||
|
if len(adress) > 1: |
||||||
|
d["adress_line2"] = adress[1].text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
d["history"] = info.find("div", {"class": "history_container"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
# Personnummer |
||||||
|
## Födelsedatum |
||||||
|
for i in soup.find_all("div", {"class": "col_block1"}): |
||||||
|
if "Personnummer" in i.text: |
||||||
|
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( |
||||||
|
"-XXXX", "" |
||||||
|
) |
||||||
|
## Fyra sista |
||||||
|
try: |
||||||
|
start = "showPersnr" |
||||||
|
end = ">Jag godkänner</span>" |
||||||
|
t = str(soup) |
||||||
|
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") |
||||||
|
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] |
||||||
|
sleep(2) # Vänta lite |
||||||
|
four_last = requests.get("http://mrkoll.se" + url_ajax).text |
||||||
|
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
neighbours = {} |
||||||
|
for div in soup.find_all("div", {"class": "peoplecont"}): |
||||||
|
persons = div.find_all("a", href=True) |
||||||
|
for person in persons: |
||||||
|
neighbours[person.find("strong").text] = { |
||||||
|
"link": person["href"], |
||||||
|
"lived_years": re.search( |
||||||
|
"\d+", person.find("span", {"class": "flyttclass"}).text |
||||||
|
).group()[0], |
||||||
|
} |
||||||
|
d["neighbours"] = neighbours |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
d["name_change"] = [ |
||||||
|
div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) |
||||||
|
] |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
prosecuted = {} |
||||||
|
prosecuted["brottsmål"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_b"}) != None else False |
||||||
|
) |
||||||
|
prosecuted["tvistemål"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_t"}) != None else False |
||||||
|
) |
||||||
|
prosecuted["straffföreläggande"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_s"}) != None else False |
||||||
|
) |
||||||
|
d["prosecuted"] = prosecuted |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
return d, errors |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
server = 'server.proxyland.io:9090' |
||||||
|
user = 'MQlbuTVPhwhOwYlyerwBLuzKI' |
||||||
|
password = 'znzqUZwnYucVbaMUIJhgJlNpX' |
||||||
|
proxy = f'http://{user}:{password}@{server}' |
||||||
|
|
||||||
|
query = request.build_opener(request.ProxyHandler({'https': proxy})) |
||||||
|
|
||||||
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" |
||||||
|
headers = {'User-agent': user_agent} |
||||||
|
url = 'https://api.ipify.org' |
||||||
|
url = "http://mrkoll.se/resultat?n=46730341876" |
||||||
|
|
||||||
|
req = request.Request(url, headers=headers) |
||||||
|
|
||||||
|
response = query.open(req) |
||||||
|
r = response.read().decode() |
||||||
|
print(r) |
||||||
|
exit() |
||||||
|
|
||||||
|
|
||||||
|
# from requests.auth import HTTPProxyAuth |
||||||
|
# import requests_toolbelt |
||||||
|
# from requests_toolbelt.auth.http_proxy_digest import HTTPProxyDigestAuth |
||||||
|
# server = 'server.proxyland.io:9090' |
||||||
|
# user = 'MQlbuTVPhwhOwYlyerwBLuzKI' |
||||||
|
# password = 'znzqUZwnYucVbaMUIJhgJlNpX' |
||||||
|
# proxies = { |
||||||
|
# "https": f"http://{user}:{password}@{server}", |
||||||
|
# "http": f"http://{user}:{password}@{server}", |
||||||
|
# } |
||||||
|
|
||||||
|
# proxies = {"http":'http://MQlbuTVPhwhOwYlyerwBLuzKI:znzqUZwnYucVbaMUIJhgJlNpX@server.proxyland.io:9090', |
||||||
|
# "https":'http://MQlbuTVPhwhOwYlyerwBLuzKI:znzqUZwnYucVbaMUIJhgJlNpX@server.proxyland.io:9090'} |
||||||
|
# #session.trust_env=False |
||||||
|
# #session.auth = HTTPProxyAuth(user,password) |
||||||
|
# url = 'https://api.ipify.org' |
||||||
|
# #url = "https://mrkoll.se/resultat?n=46730341876" |
||||||
|
# auth = HTTPProxyDigestAuth(user, password) |
||||||
|
# r = requests.get(url, proxies=proxies, auth=auth) |
||||||
|
# print(r) |
||||||
|
|
||||||
|
# exit() |
||||||
|
|
||||||
|
|
||||||
|
# proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' } |
||||||
|
|
||||||
|
# Info för arangodb |
||||||
|
user_arango = "Phone" |
||||||
|
db_arango = "facebook" |
||||||
|
host_arango = "http://192.168.1.20:8529" |
||||||
|
|
||||||
|
# Starta koppling till arangodb |
||||||
|
# Avkryptera lösen till arango |
||||||
|
pwd = getpass('Arangolösenord för Phone:').strip() |
||||||
|
|
||||||
|
db = ArangoClient(hosts=host_arango).db( |
||||||
|
db_arango, username=user_arango, password=pwd |
||||||
|
) |
||||||
|
leak = db.collection("phoneleak") |
||||||
|
|
||||||
|
count = 0 |
||||||
|
scraper_count = 0 |
||||||
|
|
||||||
|
global errors |
||||||
|
errors = 0 |
||||||
|
|
||||||
|
while True: |
||||||
|
count += 1 |
||||||
|
|
||||||
|
# Hämta en random person |
||||||
|
doc = leak.random() |
||||||
|
|
||||||
|
# Gör sökningen på mrkoll.se |
||||||
|
d, errors = find_person(doc["phone"], errors) |
||||||
|
print(f'{count} - {errors}', end="\r") |
||||||
|
sleep(2) |
||||||
|
|
||||||
|
if d == None: # Om ip-adressen är blockad eller något hänt |
||||||
|
continue |
||||||
|
|
||||||
|
d["_key"] = doc["_key"] |
||||||
|
d["_id"] = "phone/" + str(d["_key"]) |
||||||
|
d["phone"] = doc["phone"] |
||||||
|
d["checked_from_ip"] = 'proxyland' |
||||||
|
try: |
||||||
|
db.collection("phone").insert(d) |
||||||
|
leak.delete(doc["_key"]) |
||||||
|
except: |
||||||
|
pass |
||||||
@ -0,0 +1,217 @@ |
|||||||
|
import re |
||||||
|
import requests |
||||||
|
from requests.auth import HTTPProxyAuth |
||||||
|
from time import sleep |
||||||
|
from getpass import getpass |
||||||
|
import urllib3 |
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
||||||
|
import socket |
||||||
|
socket.setdefaulttimeout(20) |
||||||
|
import ssl |
||||||
|
ssl._create_default_https_context = ssl._create_unverified_context |
||||||
|
from requests.sessions import Session |
||||||
|
import urllib.request as request |
||||||
|
from bs4 import BeautifulSoup |
||||||
|
from arango import ArangoClient |
||||||
|
|
||||||
|
def find_person(number, errors): |
||||||
|
|
||||||
|
# Sök nummer |
||||||
|
server = 'server.proxyland.io:9090' |
||||||
|
password = 'znzqUZwnYucVbaMUIJhgJlNpX' |
||||||
|
user = 'MQlbuTVPhwhOwYlyerwBLuzKI' |
||||||
|
proxies = { |
||||||
|
"https": f"http://{user}:{password}@{server}", |
||||||
|
"http": f"http://{user}:{password}@{server}", |
||||||
|
} |
||||||
|
|
||||||
|
proxy = (f'http://{user}:{password}@server.proxyland.io:9090') |
||||||
|
|
||||||
|
url = f'https://mrkoll.se/resultat?n={number}' |
||||||
|
#url = 'ipinfo.io/?token=11eaaa2cf4c00e' |
||||||
|
|
||||||
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" |
||||||
|
# session = requests.Session() |
||||||
|
# session.proxies = proxies |
||||||
|
# session.auth = HTTPProxyAuth(user,password) |
||||||
|
# session.trust_env = False |
||||||
|
# session.headers = {'User-agent': user_agent} |
||||||
|
headers = {'User-agent': user_agent} |
||||||
|
|
||||||
|
query = request.build_opener(request.ProxyHandler({'http': proxy})) |
||||||
|
|
||||||
|
req = request.Request(url, headers=headers) |
||||||
|
|
||||||
|
n = 0 |
||||||
|
while True: |
||||||
|
n += 1 |
||||||
|
if n == 3: |
||||||
|
return None, errors |
||||||
|
try: |
||||||
|
sleep(2) |
||||||
|
response = query.open(req) |
||||||
|
r = response.read().decode() |
||||||
|
#r = session.get(url, verify=False, stream=True) |
||||||
|
break |
||||||
|
except requests.exceptions.SSLError: |
||||||
|
print('SSLError') |
||||||
|
sleep(3) |
||||||
|
|
||||||
|
soup = BeautifulSoup(r, 'html.parser') |
||||||
|
|
||||||
|
if ( |
||||||
|
"Du har gjort för många anrop" in soup.text |
||||||
|
#or r.url == "https://mrkoll.se/om/limit/" |
||||||
|
): |
||||||
|
errors += 1 |
||||||
|
return None, errors |
||||||
|
|
||||||
|
# Lägg in data i dictionary |
||||||
|
d = {} |
||||||
|
|
||||||
|
#d["url_via_telefonnummer"] = r.url |
||||||
|
try: |
||||||
|
for a in soup.find_all("a", href=True): |
||||||
|
if "boende-med-" in a["href"]: |
||||||
|
d["lives_with_url"] = a["href"] |
||||||
|
if "-hushall" in a["href"]: |
||||||
|
d["lives_with"] = a.text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
if "Sökningen gav 0 träffar..." in soup.text: |
||||||
|
return {}, errors |
||||||
|
|
||||||
|
info = soup.find("div", {"class": "block_col1"}) |
||||||
|
|
||||||
|
try: |
||||||
|
d["first_name"] = info.find( |
||||||
|
"span", {"title": "Detta är personens tilltalsnamn"} |
||||||
|
).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
try: |
||||||
|
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"}) |
||||||
|
d["adress_line1"] = adress[0].text |
||||||
|
if len(adress) > 1: |
||||||
|
d["adress_line2"] = adress[1].text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
d["history"] = info.find("div", {"class": "history_container"}).text |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
# Personnummer |
||||||
|
## Födelsedatum |
||||||
|
for i in soup.find_all("div", {"class": "col_block1"}): |
||||||
|
if "Personnummer" in i.text: |
||||||
|
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace( |
||||||
|
"-XXXX", "" |
||||||
|
) |
||||||
|
## Fyra sista |
||||||
|
try: |
||||||
|
start = "showPersnr" |
||||||
|
end = ">Jag godkänner</span>" |
||||||
|
t = str(soup) |
||||||
|
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",") |
||||||
|
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1] |
||||||
|
sleep(2) # Vänta lite |
||||||
|
four_last = requests.get("http://mrkoll.se" + url_ajax).text |
||||||
|
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last) |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
neighbours = {} |
||||||
|
for div in soup.find_all("div", {"class": "peoplecont"}): |
||||||
|
persons = div.find_all("a", href=True) |
||||||
|
for person in persons: |
||||||
|
neighbours[person.find("strong").text] = { |
||||||
|
"link": person["href"], |
||||||
|
"lived_years": re.search( |
||||||
|
"\d+", person.find("span", {"class": "flyttclass"}).text |
||||||
|
).group()[0], |
||||||
|
} |
||||||
|
d["neighbours"] = neighbours |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
d["name_change"] = [ |
||||||
|
div.text.strip() for div in soup.find_all("div", {"class": "name_change"}) |
||||||
|
] |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
try: |
||||||
|
prosecuted = {} |
||||||
|
prosecuted["brottsmål"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_b"}) != None else False |
||||||
|
) |
||||||
|
prosecuted["tvistemål"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_t"}) != None else False |
||||||
|
) |
||||||
|
prosecuted["straffföreläggande"] = ( |
||||||
|
True if soup.find("div", {"class": "resmark res_s"}) != None else False |
||||||
|
) |
||||||
|
d["prosecuted"] = prosecuted |
||||||
|
except: |
||||||
|
pass |
||||||
|
|
||||||
|
return d, errors |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
|
||||||
|
# proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' } |
||||||
|
|
||||||
|
# Info för arangodb |
||||||
|
user_arango = "Phone" |
||||||
|
db_arango = "facebook" |
||||||
|
host_arango = "http://192.168.0.4:8529" |
||||||
|
|
||||||
|
# Starta koppling till arangodb |
||||||
|
# Avkryptera lösen till arango |
||||||
|
pwd = getpass('Arangolösenord för Phone:').strip() |
||||||
|
|
||||||
|
db = ArangoClient(hosts=host_arango).db( |
||||||
|
db_arango, username=user_arango, password=pwd |
||||||
|
) |
||||||
|
leak = db.collection("phoneleak") |
||||||
|
|
||||||
|
count = 0 |
||||||
|
scraper_count = 0 |
||||||
|
|
||||||
|
global errors |
||||||
|
errors = 0 |
||||||
|
|
||||||
|
while True: |
||||||
|
count += 1 |
||||||
|
|
||||||
|
# Hämta en random person |
||||||
|
doc = leak.random() |
||||||
|
|
||||||
|
# Gör sökningen på mrkoll.se |
||||||
|
d, errors = find_person(doc["phone"], errors) |
||||||
|
print(f'{count} - {errors}', end="\r") |
||||||
|
sleep(2) |
||||||
|
|
||||||
|
if d == None: # Om ip-adressen är blockad |
||||||
|
continue |
||||||
|
|
||||||
|
d["_key"] = doc["_key"] |
||||||
|
d["_id"] = "phone/" + str(d["_key"]) |
||||||
|
d["phone"] = doc["phone"] |
||||||
|
d["checked_from_ip"] = 'proxyland' |
||||||
|
db.collection("phone").insert(d) |
||||||
|
leak.delete(doc["_key"]) |
||||||
@ -0,0 +1,36 @@ |
|||||||
|
#import urllib.request as request |
||||||
|
|
||||||
|
|
||||||
|
# url = 'https://ipinfo.io' |
||||||
|
# username = 'MQlbuTVPhwhOwYlyerwBLuzKI' |
||||||
|
# password = 'znzqUZwnYucVbaMUIJhgJlNpX' |
||||||
|
|
||||||
|
# proxy = (f'http://{username}:{password}@server.proxyland.io:9090') |
||||||
|
|
||||||
|
# query = request.build_opener(request.ProxyHandler({ 'http': proxy })) |
||||||
|
|
||||||
|
# print(query.open(url).read()) |
||||||
|
|
||||||
|
|
||||||
|
# url = "http://api.ipify.org/" |
||||||
|
|
||||||
|
# r = requests.get(url, proxies=proxy) |
||||||
|
|
||||||
|
# print(r.text) |
||||||
|
|
||||||
|
# exit() |
||||||
|
|
||||||
|
mode ='few' |
||||||
|
print(any([mode == "few", mode == "solo"])) |
||||||
|
exit() |
||||||
|
proxy_resp = requests.get('https://server.proxyland.io') |
||||||
|
print(proxy_resp.headers['Proxy-Authenticate']) |
||||||
|
|
||||||
|
|
||||||
|
server = 'server.proxyland.io:9090' |
||||||
|
pwd = 'znzqUZwnYucVbaMUIJhgJlNpX' |
||||||
|
user = 'MQlbuTVPhwhOwYlyerwBLuzKI' |
||||||
|
proxies = { |
||||||
|
"https": f"http://{user}:{pwd}@{server}", |
||||||
|
"http": f"https://{user}:{pwd}@{server}", |
||||||
|
} |
||||||
@ -0,0 +1,60 @@ |
|||||||
|
from time import sleep |
||||||
|
import urllib.request as request |
||||||
|
|
||||||
|
server = "server.proxyland.io:9090" |
||||||
|
user = "MQlbuTVPhwhOwYlyerwBLuzKI" |
||||||
|
password = "znzqUZwnYucVbaMUIJhgJlNpX" |
||||||
|
proxy = f"http://{user}:{password}@{server}" |
||||||
|
|
||||||
|
query = request.build_opener(request.ProxyHandler({"https": proxy})) |
||||||
|
|
||||||
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" |
||||||
|
headers = {"User-agent": user_agent} |
||||||
|
url = "https://api.ipify.org" |
||||||
|
|
||||||
|
count = 0 |
||||||
|
success = 0 |
||||||
|
fail = 0 |
||||||
|
|
||||||
|
while True: |
||||||
|
count += 1 |
||||||
|
if count == 40: |
||||||
|
break |
||||||
|
|
||||||
|
try: |
||||||
|
req = request.Request(url, headers=headers) |
||||||
|
response = query.open(req) |
||||||
|
r = response.read().decode() |
||||||
|
|
||||||
|
success += 1 |
||||||
|
|
||||||
|
except Exception as e: |
||||||
|
r = str(e) |
||||||
|
fail += 1 |
||||||
|
|
||||||
|
print(f"Success: {success} Fail {fail} ({r})") |
||||||
|
sleep(3) |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
exit() |
||||||
|
|
||||||
|
from time import sleep |
||||||
|
import urllib.request as request |
||||||
|
|
||||||
|
server = "server.proxyland.io:9090" |
||||||
|
user = "MQlbuTVPhwhOwYlyerwBLuzKI" |
||||||
|
password = "znzqUZwnYucVbaMUIJhgJlNpX" |
||||||
|
proxy = f"http://{user}:{password}@{server}" |
||||||
|
|
||||||
|
query = request.build_opener(request.ProxyHandler({"https": proxy})) |
||||||
|
|
||||||
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" |
||||||
|
headers = {"User-agent": user_agent} |
||||||
|
url = "https://api.ipify.org" |
||||||
|
|
||||||
|
req = request.Request(url, headers=headers) |
||||||
|
response = query.open(req) |
||||||
|
r = response.read().decode() |
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,76 @@ |
|||||||
|
def restart_modem(): |
||||||
|
import requests |
||||||
|
import subprocess |
||||||
|
from selenium import webdriver |
||||||
|
from selenium.webdriver.chrome.options import Options |
||||||
|
#from chromedriver_py import binary_path |
||||||
|
from time import sleep |
||||||
|
|
||||||
|
|
||||||
|
import subprocess |
||||||
|
|
||||||
|
# # determine desired usb device |
||||||
|
|
||||||
|
# # to disable |
||||||
|
# subprocess.run(['echo', '0', '>' '/sys/bus/usb/devices/usbX/power/autosuspend_delay_ms']) |
||||||
|
# subprocess.run(['echo', 'auto', '>' '/sys/bus/usb/devices/usbX/power/control']) |
||||||
|
# # to enable |
||||||
|
|
||||||
|
# #subprocess.run(['echo', 'on', '>' '/sys/bus/usb/devices/usbX/power/control']) |
||||||
|
|
||||||
|
|
||||||
|
print("Nuvarande ip:", requests.get("https://api.ipify.org").text, '\nStartar om modem...') |
||||||
|
|
||||||
|
# Set up selenium browser |
||||||
|
options = Options() |
||||||
|
options.headless = True |
||||||
|
browser = webdriver.Chrome(executable_path='/home/pi/chromedriver.apk', options=options) |
||||||
|
|
||||||
|
# Login to modem |
||||||
|
browser.get("http://192.168.100.1/cellweb/login.asp") |
||||||
|
sleep(3) |
||||||
|
username = browser.find_element_by_id("user_name") |
||||||
|
password = browser.find_element_by_id("user_password") |
||||||
|
username.send_keys("admin") |
||||||
|
password.send_keys("1340asde") |
||||||
|
|
||||||
|
# Go to reboot and accept |
||||||
|
browser.find_element_by_xpath("/html/body/section/form/button").click() # Login |
||||||
|
sleep(1) |
||||||
|
browser.find_element_by_xpath("/html/body/section/div[2]/div[6]/a").click() # More |
||||||
|
sleep(1) |
||||||
|
browser.find_element_by_xpath( |
||||||
|
"/html/body/section[2]/div/div[2]/div/a" |
||||||
|
).click() # Reboot |
||||||
|
sleep(1) |
||||||
|
browser.find_element_by_xpath( |
||||||
|
"/html/body/div[4]/div/div/div[2]/div[2]" |
||||||
|
).click() # Accept |
||||||
|
sleep(1) |
||||||
|
browser.switch_to_alert().accept() # Accept again (alert) |
||||||
|
browser.close() |
||||||
|
|
||||||
|
# Vänta på att modemet ska starta om |
||||||
|
for i in range(180, 0, -1): |
||||||
|
print(i, end='\r') |
||||||
|
while True: |
||||||
|
# Se till så att Raspberry ansluter till wifi (Hallon) |
||||||
|
process = subprocess.run(["sudo", "systemctl", "daemon-reload"]) |
||||||
|
sleep(5) |
||||||
|
process = subprocess.run(["sudo", "systemctl", "restart", "dhcpcd"]) |
||||||
|
sleep(10) |
||||||
|
|
||||||
|
# Kontrollera nätverksanamn |
||||||
|
process = subprocess.Popen(["iwgetid"], stdout=subprocess.PIPE) |
||||||
|
wlan = process.communicate()[0].decode() |
||||||
|
if "4G-UFI-5671" in wlan: |
||||||
|
ip = requests.get("https://api.ipify.org").text |
||||||
|
print("Ny ip:", ip) |
||||||
|
return ip |
||||||
|
else: |
||||||
|
print('Testar snart igen...') |
||||||
|
for i in range(30, 0, -1): |
||||||
|
print(i, end='\r') |
||||||
|
|
||||||
|
|
||||||
|
restart_modem() |
||||||
@ -0,0 +1,9 @@ |
|||||||
|
import subprocess |
||||||
|
import requests |
||||||
|
from time import sleep |
||||||
|
|
||||||
|
print("Nuvarande ip:", requests.get("https://api.ipify.org").text, '\nStartar om modem...') |
||||||
|
sleep(5) |
||||||
|
subprocess.run(['sudo', 'uhubctl', '-l', '1-1', '-a', 'cycle', '-d', '5', '-p', '3']) |
||||||
|
sleep(400) |
||||||
|
print("Nuvarande ip:", requests.get("https://api.ipify.org").text, '\nStartar om modem...') |
||||||
@ -0,0 +1,12 @@ |
|||||||
|
import pymongo |
||||||
|
myclient = pymongo.MongoClient("192.168.0.10:27017") |
||||||
|
mydb = myclient["mydatabase"] |
||||||
|
mycol = mydb["customers"] |
||||||
|
mydict = { "name": "John", "address": "Highway 37" } |
||||||
|
mycol.insert_one(mydict) |
||||||
|
x = mycol.find({}) |
||||||
|
for i in x: |
||||||
|
print(i) |
||||||
|
|
||||||
|
|
||||||
|
rootarango = '88ccbf65a4e8865ce7da86f72a5fc3cf21e805e0137213214c95a3eb2e8e9817' |
||||||
Loading…
Reference in new issue