Anpassat för swarm (en användare åt gången, solo)

pull/5/head
Lasse Edfast 5 years ago
parent 52353def15
commit 7699053193
  1. 207
      facebook/__main__.py
  2. 30
      facebook/arangodb.py
  3. 6
      facebook/config.py
  4. 59
      facebook/images_pi.py
  5. 2
      facebook/scrapers.py
  6. 74
      facebook/stats.py
  7. 14
      fb-webbapp/main.py
  8. 1
      requirements.txt
  9. 29
      workspace.code-workspace

@ -20,6 +20,7 @@ if __name__ == "__main__":
write = True write = True
mode = 'all' mode = 'all'
pwd = None pwd = None
proxieservers = 'mullvad'
argv = argv[1:] argv = argv[1:]
@ -60,6 +61,8 @@ if __name__ == "__main__":
mode_nr = 1.7 mode_nr = 1.7
elif mode == "few": elif mode == "few":
mode_nr = 1.4 mode_nr = 1.4
elif mode == "solo":
mode_nr = 1.4
elif mode == "force": elif mode == "force":
mode_nr = 1 mode_nr = 1
@ -120,117 +123,105 @@ if __name__ == "__main__":
# Gå igenom de användare som efterfrågats # Gå igenom de användare som efterfrågats
while True: if lookups == "leak_lookups":
id = user.username
if lookups == "leak_lookups": check_profile_status(profile, user)
id = user.username if profile.blocked:
check_profile_status(profile, user) profile = blocked_profile(profile, proxieservers=proxieservers)
if profile.blocked: profile.open(url_bas + "/" + user.username)
profile = blocked_profile(profile, proxieservers=proxieservers) url = profile.browser.state.url.strip("/").strip("?_rdr")
profile.open(url_bas + "/" + user.username) if "php?" not in url:
url = profile.browser.state.url.strip("/").strip("?_rdr") user = User(str(url[url.rfind("/") + 1 :]).strip(), mode)
if "php?" not in url: user.id = id
user = User(str(url[url.rfind("/") + 1 :]).strip(), mode) sleep_(4)
user.id = id container = str(user.username)
sleep_(4) profile.container = container
container = str(user.username)
profile.container = container if "container" not in globals():
container = str(user.username)
if "container" not in globals(): profile.container = container
container = str(user.username)
profile.container = container profile.users_checked += 1
profile.users_checked += 1 # Hämta reaktioner för den första användaren
if any([not check_for_user(user.username, mode=mode), mode == "force"]):
# Hämta reaktioner för den första användaren try:
if any([not check_for_user(user.username, mode=mode), mode == "force"]): while True:
try: # Uppdatera in_use
while True: profile.update_time()
# Uppdatera in_use profile = profile_picture_reactions(
profile.update_time() profile, user, first_user=True, mode=mode
profile = profile_picture_reactions(
profile, user, first_user=True, mode=mode
)
if profile.blocked:
profile = blocked_profile(profile, proxieservers=proxieservers)
else:
break
except:
_print(profile, user, traceback.format_exc())
friends = friends_of_user(user.username)
_print(profile, user, f"\nKlar med, {user.username}\n")
_print(profile, user, f"Vänner som reagerat: {len(friends)}")
_print(profile, user, "\nVänner att kolla:")
friends_unchecked = []
for friend in friends:
if not check_for_user(friend):
print(friend)
friends_unchecked.append(friend)
_print(profile, user, [friends_unchecked], silent=True)
_print(profile, user, f'Totalt: {len(friends_unchecked)}')
print()
# Hämta reaktioner för users vänner (som reagerat)
count_friends = 0
for friend in friends_unchecked:
if datetime.now().strftime("%H") == '03' and int(datetime.now().strftime("%M")) < 30: # Sov för att kunna säkerhetskopieraa
sleep(1800)
count_friends += 1
user = User(str(friend), mode, other_pictures=[])
sleep_(2)
# Uppdatera in_use
profile.update_time()
try:
if not check_for_user(user.username):
p = profile_picture_reactions(profile, user, mode=mode)
if isinstance(p, Profile):
profile = p
except Exception as e: # Fel4
write_error(
4,
profile,
e=e,
user=user,
traceback=traceback.format_exc(),
soup=profile.viewing(),
) )
_print(profile, user, f"\nFel: {str(user.username)}\n") if profile.blocked:
sleep_(15) profile = blocked_profile(profile, proxieservers=proxieservers)
else:
if not profile.blocked: break
_print(profile, user, f"Klar med {user.username} \n") except:
_print(profile, user, traceback.format_exc())
# Rotera fb-profiler
if count_friends > 2 * mode_nr: if mode == 'solo':
if random.randrange(0, 2, 1) == 1: exit()
profile = new_profile(container, proxieservers=proxieservers)
count_friends = 0 friends = friends_of_user(user.username)
_print(profile, user, f"Växlar till {profile.name}") _print(profile, user, f"\nKlar med, {user.username}\n")
elif count_friends > 4 * mode_nr: _print(profile, user, f"Vänner som reagerat: {len(friends)}")
_print(profile, user, "\nVänner att kolla:")
friends_unchecked = []
for friend in friends:
if not check_for_user(friend):
print(friend)
friends_unchecked.append(friend)
_print(profile, user, [friends_unchecked], silent=True)
_print(profile, user, f'Totalt: {len(friends_unchecked)}')
print()
# Hämta reaktioner för users vänner (som reagerat)
count_friends = 0
for friend in friends_unchecked:
if datetime.now().strftime("%H") == '03' and int(datetime.now().strftime("%M")) < 30: # Sov för att kunna säkerhetskopieraa
sleep(1800)
count_friends += 1
user = User(str(friend), mode, other_pictures=[])
sleep_(2)
# Uppdatera in_use
profile.update_time()
try:
if not check_for_user(user.username):
p = profile_picture_reactions(profile, user, mode=mode)
if isinstance(p, Profile):
profile = p
except Exception as e: # Fel4
write_error(
4,
profile,
e=e,
user=user,
traceback=traceback.format_exc(),
soup=profile.viewing(),
)
_print(profile, user, f"\nFel: {str(user.username)}\n")
sleep_(15)
if not profile.blocked:
_print(profile, user, f"Klar med {user.username} \n")
# Rotera fb-profiler
if count_friends > 2 * mode_nr:
if random.randrange(0, 2, 1) == 1:
profile = new_profile(container, proxieservers=proxieservers) profile = new_profile(container, proxieservers=proxieservers)
count_friends = 0 count_friends = 0
_print(profile, user, f"Växlar till {profile.name}") _print(profile, user, f"Växlar till {profile.name}")
elif count_friends > 4 * mode_nr:
profile = new_profile(container, proxieservers=proxieservers)
count_friends = 0
_print(profile, user, f"Växlar till {profile.name}")
elif profile.blocked: elif profile.blocked:
profile = blocked_profile(profile, proxieservers=proxieservers) profile = blocked_profile(profile, proxieservers=proxieservers)
_print(profile, None, f"Klar med alla vänner.")
sleep(3)
# Hämta ny användare från databasen när alla är genomgångna _print(profile, None, f"Klar med alla vänner.")
while True:
new_user = get_user(collection=lookups)
print(new_user)
_print(profile, None, f"Ny user hämtad")
if new_user == None:
sleep(300)
_print(profile, None, "Väntar på ny user.")
else:
user = User(str(new_user["_key"]), mode)
_print(profile, user, f"Förberett ny user: {user.username}")
break

@ -3,6 +3,7 @@ from random import randint
from time import sleep from time import sleep
import json import json
from datetime import datetime from datetime import datetime
from json2html import json2html
from arango import ArangoClient from arango import ArangoClient
@ -17,7 +18,7 @@ for i in range(0, 6, 1):
with open("../password_arango.txt") as f: with open("../password_arango.txt") as f:
pwd = f.readline() pwd = f.readline()
except FileNotFoundError: except FileNotFoundError:
if pwd == None: if 'pwd' not in globals():
pwd = getpass(f'Lösenord för {user_arango}: ') pwd = getpass(f'Lösenord för {user_arango}: ')
try: try:
@ -74,12 +75,10 @@ def report_blocked(profile):
_print(profile, profile.container, f'Kunde inte rapportera blockerad: {profile.name}.') _print(profile, profile.container, f'Kunde inte rapportera blockerad: {profile.name}.')
def get_profile(db=db, proxieservers='mullvad', collection='profiles'): def get_profile(db=db, collection='mullvad'):
""" Hämtar profil från profiles """ """ Hämtar profil från profiles """
if proxieservers != 'mullvad':
collection = f'profiles_{proxieservers}' #TODO Byt namn på profiles till profiles_mullvad i DB
while True: while True:
cursor = db.aql.execute( cursor = db.aql.execute(
""" """
@ -87,14 +86,12 @@ def get_profile(db=db, proxieservers='mullvad', collection='profiles'):
FILTER doc.in_use < @inuse FILTER doc.in_use < @inuse
RETURN doc RETURN doc
""", """,
bind_vars={"inuse": nowstamp() - 1200, '@col': collection} bind_vars={"inuse": nowstamp() - 1200, '@col': f'profiles_{collection}'}
) )
profiles = [profile for profile in cursor] profiles = [profile for profile in cursor]
if profiles == []: if profiles == []:
sleep(180) sleep(180)
if proxieservers=='test': # Om det är ett test
profile = profiles[0]
else: else:
profile = profiles[randint(0, len(profiles) - 1)] profile = profiles[randint(0, len(profiles) - 1)]
return profile return profile
@ -113,10 +110,11 @@ def friends_of_user(user):
return [doc[8:] for doc in cursor] return [doc[8:] for doc in cursor]
def remove_profile(profile): def remove_profile(profile, proxieservers='mullvad'):
""" Tar bort en blockerad profil från databasen. """ """ Tar bort en blockerad profil från databasen. """
_print(profile, None, f"Tar bort {profile.name}.") _print(profile, None, f"Tar bort {profile.name}.")
db.collection("profiles").delete(
db.collection(f'profiles_{proxieservers}').delete(
profile.doc["_key"], silent=True, ignore_missing=True profile.doc["_key"], silent=True, ignore_missing=True
) )
_print(profile, profile.container, f"{profile.name} blockerad och borttagen {now()}.") _print(profile, profile.container, f"{profile.name} blockerad och borttagen {now()}.")
@ -124,7 +122,6 @@ def remove_profile(profile):
# TODO #2 Bättre funktion för backup av databasen # TODO #2 Bättre funktion för backup av databasen
def arango_connect(pwd): def arango_connect(pwd):
return ArangoClient(hosts=host_arango).db( return ArangoClient(hosts=host_arango).db(
db_arango, username=user_arango, password=pwd db_arango, username=user_arango, password=pwd
@ -244,6 +241,15 @@ def write_stats(continuous=False):
d['_key'] = now()[:13] d['_key'] = now()[:13]
db.insert_document( "stats", d, overwrite=True) db.insert_document( "stats", d, overwrite=True)
# Skriv en html-fil
with open('webbapp/templates/stats.html', 'a+') as html:
html.truncate(0)
html.write('<!DOCTYPE html> <br>')
html.write(json2html.convert(json = d))
# Sov för att fortsätta senare
if continuous: if continuous:
sleep(86400) sleep(86400)
else: else:
@ -254,7 +260,7 @@ def blocked_profile(profile, proxieservers):
_print(profile, None, f'Rapporterar att {profile.name} blockats.') _print(profile, None, f'Rapporterar att {profile.name} blockats.')
report_blocked(profile) report_blocked(profile)
_print(profile, None, f'Tar bort {profile.name} från databasen.') _print(profile, None, f'Tar bort {profile.name} från databasen.')
remove_profile(profile) remove_profile(profile, proxieservers)
_print(profile, None, f'Hämtar en ny profil.') _print(profile, None, f'Hämtar en ny profil.')
profile = new_profile(profile.container, proxieservers) profile = new_profile(profile.container, proxieservers)
return profile return profile

@ -1,6 +1,8 @@
from getpass import getpass
def set_pwd(_pwd): def set_pwd(_pwd=None):
global pwd global pwd
if _pwd == None:
_pwd = getpass('Lösenord för Arango-användaren:')
pwd = _pwd pwd = _pwd
# Info för arangodb # Info för arangodb

@ -32,24 +32,24 @@ def download_image(url, user, id):
elif r.status_code == 403: elif r.status_code == 403:
exit() exit()
image_name = f"profile_pictures/{user}/{id}.jpg" image_name = f"/ssd/profile_pictures/{user}/{id}.jpg"
img_data = r.content img_data = r.content
with open(image_name, "wb") as handler: with open(image_name, "wb") as handler:
handler.write(img_data) handler.write(img_data)
nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}/{id}.jpg" #nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}/{id}.jpg"
headers = {"Content-type": "image/jpeg", "Slug": "heart"} # headers = {"Content-type": "image/jpeg", "Slug": "heart"}
while True: # while True:
try: # try:
r = requests.put( # r = requests.put(
nc_path, data=open(image_name, "rb"), headers=headers, auth=auth, verify=False # nc_path, data=open(image_name, "rb"), headers=headers, auth=auth, verify=False
) # )
break # break
except: # except:
print('Kunde inte ladda upp', nc_path) # print('Kunde inte ladda upp', nc_path)
sleep(5) # sleep(5)
print(f"{user}\t{id}\t{r.status_code}") print(f"{user}\t{id}\t{r.status_code}")
@ -66,22 +66,26 @@ def get_pictures(day):
bind_vars={"date": str(day)}, bind_vars={"date": str(day)},
) )
# Skapa en lista med bilder att gå igenom.
images = []
for doc in cursor: for doc in cursor:
images.append(doc)
for doc in images:
user = doc["member"] user = doc["member"]
# Skapa mapp för användarens bilder på NC... # # Skapa mapp för användarens bilder på NC...
nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}" # nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}"
while True: # while True:
try: # try:
requests.request("MKCOL", nc_path, verify=False, auth=auth) # requests.request("MKCOL", nc_path, verify=False, auth=auth)
break # break
except: # except:
print('Kunde inte skapa', nc_path) # print('Kunde inte skapa', nc_path)
sleep(5) # sleep(5)
# ...och på datorn (för backup) # ...och på datorn (för backup)
if not os.path.isdir(f"profile_pictures/{user}"): if not os.path.isdir(f"/ssd/profile_pictures/{user}"):
os.mkdir(f"profile_pictures/{user}") os.mkdir(f"/ssd/profile_pictures/{user}")
pictures = [] pictures = []
for picture in doc["pictures"]: for picture in doc["pictures"]:
@ -121,14 +125,14 @@ def get_pictures(day):
if __name__ == '__main__': if __name__ == '__main__':
# Info för arangodb # Info för arangodb
user_arango = "Pi" user_arango = "Lasse"
db_arango = "facebook" db_arango = "facebook"
host_arango = "http://192.168.0.3:8529" host_arango = "http://192.168.0.4:8529"
# Starta koppling till arangodb # Starta koppling till arangodb
# Avkryptera lösen till arango # Avkryptera lösen till arango
pwd = getpass("Arangolösenord för Pi: ") pwd = getpass(f"Arangolösenord för {user_arango}: ")
db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd) db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd)
@ -156,9 +160,6 @@ if __name__ == '__main__':
"se27-wg.socks5.mullvad.net:1080", "se27-wg.socks5.mullvad.net:1080",
"se28-wg.socks5.mullvad.net:1080", "se28-wg.socks5.mullvad.net:1080",
] ]
if not os.path.isdir("profile_pictures"):
os.mkdir("profile_pictures")
while True: while True:
today = date.today().strftime('%Y%m%d') today = date.today().strftime('%Y%m%d')

@ -157,7 +157,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"):
if first_user == False: if first_user == False:
if mode == "single" and user.reactions > 30: if mode == "single" and user.reactions > 30:
break break
elif all([mode == "few", user.reactions > 80, pic != url_pics[-1]]): elif all([any[mode == "few", mode == "solo"], user.reactions > 80, pic != url_pics[-1]]):
# Kolla den sista bilder # Kolla den sista bilder
check_picture(url_bas + url_pics[-1], user, profile) check_picture(url_bas + url_pics[-1], user, profile)
user.checked_pictures.append(url_bas + pic) user.checked_pictures.append(url_bas + pic)

@ -0,0 +1,74 @@
from datetime import datetime
from getpass import getpass
from time import sleep
from arango import ArangoClient
from json2html import json2html
def now():
""" Returns current date and time as string"""
return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
def write_stats(db, continuous=False):
while True:
d = {}
for col in db.collections():
if not col['system']:
d[col['name']] = db.collection(col['name']).count()
del d['stats']
#d['time'] = now()
cursor = db.aql.execute(
"""
FOR doc IN members
FILTER doc.checked == true
COLLECT WITH COUNT INTO length
RETURN length
"""
)
d['checked_members'] = cursor.next()
# Hur många konton per säljare som finns kvar
cursor = db.aql.execute(
'''
for doc in profiles
filter has(doc, "vendor")
COLLECT vendor = doc.vendor WITH COUNT INTO length
RETURN {
"vendor" : vendor,
"active" : length
}
''')
d['active_vendors'] = [doc for doc in cursor]
d['_key'] = now()[:13]
db.insert_document( "stats", d, overwrite=True)
# Skriv en html-fil
with open('website/fb-webbapp/stats.html', 'a+') as html:
html.truncate(0)
html.write('<!DOCTYPE html> <br>')
html.write(json2html.convert(json = d))
# Sov för att fortsätta senare
if continuous:
sleep(86400)
else:
break
# Info för arangodb
user_arango = "Lasse"
db_arango = "facebook"
host_arango = "http://192.168.0.4:8529"
# Starta koppling till arangodb
# Avkryptera lösen till arango
pwd = getpass(f'Arangolösenord för {user_arango}:').strip()
db = ArangoClient(hosts=host_arango).db(
db_arango, username=user_arango, password=pwd
)
write_stats(db, continuous=True)

@ -0,0 +1,14 @@
from flask import Flask, render_template
import json
from json2html import json2html
app = Flask(__name__)
@app.route("/")
def stats():
return render_template("stats.html")
if __name__ == "__main__":
app.run(debug=True)

@ -18,3 +18,4 @@ soupsieve==2.2
toml==0.10.2 toml==0.10.2
urllib3==1.26.3 urllib3==1.26.3
Werkzeug==1.0.1 Werkzeug==1.0.1
json2html

@ -1,16 +1,17 @@
{ {
"folders": [ "folders": [
{ {
"path": "." "path": "."
}, },
{ {
"path": "facebook" "path": "../mrkoll"
}, },
{ {
"path": "../mrkoll" "path": "facebook"
} }
], ],
"settings": { "settings": {
"python.pythonPath": "/Users/Lasse/.pyenv/versions/3.9.5/bin/python" "python.pythonPath": "/Users/Lasse/Datorgemensamt/Programmeringsprojekt/Facebook/fb-scraper/.venv/bin/python"
} },
} }
Loading…
Cancel
Save