You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

264 lines
7.9 KiB

import re
import requests
from requests.auth import HTTPProxyAuth
from time import sleep
from getpass import getpass
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import socket
socket.setdefaulttimeout(20)
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import urllib
import urllib.request as request
from bs4 import BeautifulSoup
from arango import ArangoClient
def find_person(number, errors):
print(number)
# Sök nummer
server = 'server.proxyland.io:9090'
password = 'znzqUZwnYucVbaMUIJhgJlNpX'
user = 'MQlbuTVPhwhOwYlyerwBLuzKI'
proxies = {
"https": f"http://{user}:{password}@{server}",
"http": f"http://{user}:{password}@{server}",
}
session = requests.Session(proxies=proxies)
url = 'https://api.ipify.org'
print(session.get(url))
proxy = (f'http://{user}:{password}@{server}')
url = f'https://mrkoll.se/resultat?n={number}'
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
headers = {'User-agent': user_agent}
query = request.build_opener(request.ProxyHandler({'http': proxy}))
print(url)
url = 'https://api.ipify.org'
req = request.Request(url, headers=headers)
n = 0
n += 1
if n == 3:
return None, errors
try:
sleep(2)
response = query.open(req)
r = response.read().decode()
except (urllib.error.HTTPError, socket.timeout) as e:
print(e)
sleep(2)
errors += 1
return None, errors
soup = BeautifulSoup(r, 'html.parser')
print(r)
with open('html_out.html', 'w') as html:
html.write(str(soup))
exit()
if (
"Du har gjort för många anrop" in soup.text
or response.geturl() == "https://mrkoll.se/om/limit/" # TODO Hur får man url från r med urllib3?
):
errors += 1
return None, errors
# Lägg in data i dictionary
d = {}
d["url_via_telefonnummer"] = response.geturl()
try:
for a in soup.find_all("a", href=True):
if "boende-med-" in a["href"]:
d["lives_with_url"] = a["href"]
if "-hushall" in a["href"]:
d["lives_with"] = a.text
except:
pass
if "Sökningen gav 0 träffar..." in soup.text:
return {}, errors
info = soup.find("div", {"class": "block_col1"})
try:
d["first_name"] = info.find(
"span", {"title": "Detta är personens tilltalsnamn"}
).text
except:
pass
try:
d["middle_name"] = info.find("span", {"title": "Detta är ett förnamn"}).text
except:
pass
try:
d["last_name"] = info.find("span", {"title": "Detta är ett efternamn"}).text
except:
pass
try:
adress = info.find_all("span", {"class": "f_line2 pl65 pl65-border"})
d["adress_line1"] = adress[0].text
if len(adress) > 1:
d["adress_line2"] = adress[1].text
except:
pass
try:
d["history"] = info.find("div", {"class": "history_container"}).text
except:
pass
# Personnummer
## Födelsedatum
for i in soup.find_all("div", {"class": "col_block1"}):
if "Personnummer" in i.text:
d["date_of_birth"] = i.find("span", {"class": "f_line2"}).text.replace(
"-XXXX", ""
)
## Fyra sista
try:
start = "showPersnr"
end = ">Jag godkänner</span>"
t = str(soup)
v = t[t.find(start) + 11 : t.find(end) - 2].replace("'", "").split(",")
url_ajax = "/ajax/lastDigits/?p=" + v[0] + "&k=" + v[1]
sleep(2) # Vänta lite
four_last = requests.get("http://mrkoll.se" + url_ajax).text
d["personal_number"] = "{dob}-{fl}".format(dob=d["date_of_birth"], fl=four_last)
except:
pass
try:
neighbours = {}
for div in soup.find_all("div", {"class": "peoplecont"}):
persons = div.find_all("a", href=True)
for person in persons:
neighbours[person.find("strong").text] = {
"link": person["href"],
"lived_years": re.search(
"\d+", person.find("span", {"class": "flyttclass"}).text
).group()[0],
}
d["neighbours"] = neighbours
except:
pass
try:
d["name_change"] = [
div.text.strip() for div in soup.find_all("div", {"class": "name_change"})
]
except:
pass
try:
prosecuted = {}
prosecuted["brottsmål"] = (
True if soup.find("div", {"class": "resmark res_b"}) != None else False
)
prosecuted["tvistemål"] = (
True if soup.find("div", {"class": "resmark res_t"}) != None else False
)
prosecuted["straffföreläggande"] = (
True if soup.find("div", {"class": "resmark res_s"}) != None else False
)
d["prosecuted"] = prosecuted
except:
pass
return d, errors
if __name__ == "__main__":
server = 'server.proxyland.io:9090'
user = 'MQlbuTVPhwhOwYlyerwBLuzKI'
password = 'znzqUZwnYucVbaMUIJhgJlNpX'
proxy = f'http://{user}:{password}@{server}'
query = request.build_opener(request.ProxyHandler({'https': proxy}))
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
headers = {'User-agent': user_agent}
url = 'https://api.ipify.org'
url = "http://mrkoll.se/resultat?n=46730341876"
req = request.Request(url, headers=headers)
response = query.open(req)
r = response.read().decode()
print(r)
exit()
# from requests.auth import HTTPProxyAuth
# import requests_toolbelt
# from requests_toolbelt.auth.http_proxy_digest import HTTPProxyDigestAuth
# server = 'server.proxyland.io:9090'
# user = 'MQlbuTVPhwhOwYlyerwBLuzKI'
# password = 'znzqUZwnYucVbaMUIJhgJlNpX'
# proxies = {
# "https": f"http://{user}:{password}@{server}",
# "http": f"http://{user}:{password}@{server}",
# }
# proxies = {"http":'http://MQlbuTVPhwhOwYlyerwBLuzKI:znzqUZwnYucVbaMUIJhgJlNpX@server.proxyland.io:9090',
# "https":'http://MQlbuTVPhwhOwYlyerwBLuzKI:znzqUZwnYucVbaMUIJhgJlNpX@server.proxyland.io:9090'}
# #session.trust_env=False
# #session.auth = HTTPProxyAuth(user,password)
# url = 'https://api.ipify.org'
# #url = "https://mrkoll.se/resultat?n=46730341876"
# auth = HTTPProxyDigestAuth(user, password)
# r = requests.get(url, proxies=proxies, auth=auth)
# print(r)
# exit()
# proxies = { 'https': 'https://il061376:"typical-humidify-upheave-aback-rusty"@lexvpn.integrity.st:1723' }
# Info för arangodb
user_arango = "Phone"
db_arango = "facebook"
host_arango = "http://192.168.1.20:8529"
# Starta koppling till arangodb
# Avkryptera lösen till arango
pwd = getpass('Arangolösenord för Phone:').strip()
db = ArangoClient(hosts=host_arango).db(
db_arango, username=user_arango, password=pwd
)
leak = db.collection("phoneleak")
count = 0
scraper_count = 0
global errors
errors = 0
while True:
count += 1
# Hämta en random person
doc = leak.random()
# Gör sökningen på mrkoll.se
d, errors = find_person(doc["phone"], errors)
print(f'{count} - {errors}', end="\r")
sleep(2)
if d == None: # Om ip-adressen är blockad eller något hänt
continue
d["_key"] = doc["_key"]
d["_id"] = "phone/" + str(d["_key"])
d["phone"] = doc["phone"]
d["checked_from_ip"] = 'proxyland'
try:
db.collection("phone").insert(d)
leak.delete(doc["_key"])
except:
pass