Added _print()

pull/5/head
Lasse Edfast 5 years ago
parent 2569dfca7f
commit 35639cfe41
  1. 104
      facebook/__main__.py
  2. 4
      facebook/arangodb.py
  3. 360
      facebook/scrapers.py

@ -3,21 +3,37 @@ import traceback
from getopt import GetoptError, getopt
from sys import argv, exit
from time import sleep
from subprocess import check_output
from re import split
from socket import gethostname
import arangodb
from arangodb import db, write_report, backup, report_blocked
from arangodb import db, write_report, backup, report_blocked, get_profile, remove_profile, checked_members, friends_of_user
from classes import Profile, User
from helpers import sleep_, write_error
from helpers import sleep_, write_error, _print
from scrapers import profile_picture_reactions
# import werkzeug
# werkzeug.cached_property = werkzeug.utils.cached_property
# from arango import ArangoClient
def finish():
""" Avslutar: skriver rapport och gör profilerna oanvända """
for profile in profiles:
profile.unused()
write_report(users, list(all_pictures.difference(all_pictures_start)))
exit()
if __name__ == "__main__":
print()
if gethostname() not in ['macbook.local']: # Lägg till för studiodatorn
# Hämta namn för containern där skriptet körs
try:
containers = check_output(['docker', 'container', 'ls']).decode()
container = split('\W\W+', containers.split('\n')[1])[-1]
except FileNotFoundError:
pass
else:
container_name = 'macbook'
# Argument och alternativ
argv = argv[1:]
try:
@ -30,16 +46,21 @@ if __name__ == "__main__":
mode_nr = 1.7
elif mode == 'few':
mode_nr = 1.4
elif mode == 'force':
mode_nr = 1
else:
mode = 'all'
mode_nr = 1
for o, a in opts:
if o in ["-u", "--user"]:
users = [
User(str(i).strip(), mode)
for i in [(str(i).strip()) for i in a.split(",")]
]
try:
users = [
User(str(i).strip(), mode)
for i in [(str(i).strip()) for i in a.split(",")]
]
except StopIteration:
raise Exception
if o in ["-o", "--other"]:
url_other_picture = a
if o in ['-b', '--backup']:
@ -72,14 +93,20 @@ if __name__ == "__main__":
print("-", user.username)
print()
if 'container' not in globals():
usernames = [user.username for user in users]
if len(usernames) == 1:
container = usernames[0]
else:
container = '-'.join(usernames)
# Skapa tre olika profiler att besöka Facebook med
profiles = []
for i in range(0, 3):
doc = arangodb.get_profile()
profile = Profile(doc)
doc = get_profile()
profile = Profile(doc, container)
profile.browser.open("https://api.ipify.org")
print(
f"Profil {profile.name} använder IP-adress {profile.viewing().text}."
print(f"Profil {profile.name} använder IP-adress {profile.viewing().text}."
)
if profile.logged_in == False:
profile.accept_cookies()
@ -92,7 +119,7 @@ if __name__ == "__main__":
profile_nr = 1
profile = profiles[profile_nr]
print("Börjar med profilen", profile.name)
_print(profile.container, user.username, f"Börjar med profilen {profile.name}")
# Gå igenom de användare som efterfrågats
try:
@ -101,26 +128,28 @@ if __name__ == "__main__":
# Set för kollade bilder och kollade medlemmar
all_pictures = set([doc["_key"] for doc in db.collection("pictures").all()])
all_pictures_start = all_pictures.copy()
members_checked = arangodb.checked_members()
members_checked = checked_members()
profile.container = user.username
if user.username not in members_checked:# Hämta reaktioner för den första användaren LÄGG TILL NOT IN MEMBERS_CHECKED
# Hämta reaktioner för den första användaren
if any([user.username not in members_checked, mode == 'force']):
try:
profile_picture_reactions(profile, user, all_pictures, first_user=True, mode=mode)
except:
print(traceback.format_exc())
_print(profile.container, user.username, traceback.format_exc())
if len(users) == 1:
for profile in profiles:
profile.unused()
friends = arangodb.friends_of_user(user.username)
friends = friends_of_user(user.username)
friends_unchecked = list(set(friends) - set(members_checked))
# Här följer cookien med så att vi fortfarnade är inloggade
print("\nKlar med", user.username, "\n")
print("Vänner som reagerat:", len(friends))
print("Vänner att kolla:")
_print(profile.container, user.username, f"\nKlar med, {user.username}\n")
_print(profile.container, user.username, f"Vänner som reagerat: {len(friends)}")
_print(profile.container, user.username, "\nVänner att kolla:")
for friend in friends_unchecked:
print(friend)
_print(profile.container, user.username, ', '.join([friend for friend in friends_unchecked]), silent=True)
print()
# Hämta reaktioner för users vänner (som reagerat)
@ -139,25 +168,26 @@ if __name__ == "__main__":
4,
e=e,
user=user.username,
profile=profile.container,
traceback=traceback.format_exc(),
soup=profile.viewing(),
)
print("\nFel: ", str(user.username), "\n")
_print(profile.container, user.username, f"\nFel: {str(user.username)}\n")
sleep_(15)
if profile.blocked == False:
print("Klar med", user.username, "\n")
_print(profile.container, user.username, f"Klar med {user.username} \n")
# Rotera fb-profiler
if count_friends > 5 * mode_nr:
if random.randrange(0, 2, 1) == 1:
profile_nr += 1
count_friends = 0
print("Växlar till", profiles[profile_nr].name)
_print(profile.container, user.username, f"Växlar till {profiles[profile_nr].name}")
elif count_friends > 9 * mode_nr:
profile_nr += 1
count_friends = 0
print("Växlar till", profiles[profile_nr].name)
_print(profile.container, user.username, f"Växlar till {profiles[profile_nr].name}")
if profile_nr > len(profiles) - 1:
profile_nr = 0
@ -165,31 +195,27 @@ if __name__ == "__main__":
elif profile.blocked == True:
# Ta bort profilen ur databasen
report_blocked(profile, users)
arangodb.remove_profile(profile.doc)
remove_profile(profile.doc)
# Ta bort från listan på fb-profiler som används
profiles.remove(profile)
# Försök lägga till en ny fb-profil (om det finns en skapad och ledig i databasen)
try:
profiles[profile_nr] = Profile(new=True)
print("Laddat ny profil:", profiles[profile_nr].name)
doc = get_profile()
profiles[profile_nr] = Profile(doc, container)
_print(profile.container, user.username, f"Laddat ny profil: {profiles[profile_nr].name}")
sleep(3)
except e:
print("Det behövs nya profiler...")
_print(profile.container, user.username, "Det behövs nya profiler...")
if len(profiles) == 0:
break
for s in range(0, 1600 / len(profiles)):
print(f"Sover {600-s} sekunder till... ", end="\r")
print(user, f"Sover {600-s} sekunder till... ", end="\r")
profile_nr += 1
print(f"Försöker med {profiles[profile_nr].name}.")
_print(profile.container, user.username, f"Försöker med {profiles[profile_nr].name}.")
profile = profiles[profile_nr]
except:
pass
finish()
# Gör profilerna oanvända
for profile in profiles:
profile.unused()
write_report(users, list(all_pictures.difference(all_pictures_start)))
exit()

@ -32,7 +32,7 @@ except FileNotFoundError:
db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd)
from helpers import now
from helpers import now, _print
def checked_members():
@ -106,7 +106,7 @@ def friends_of_user(user):
def remove_profile(profile):
db.collection("profiles").delete(profile['_key'], silent=True, ignore_missing=True)
_print(profile.name, f'{profile.name} blockerad och borttagen {now()}.'
_print(profile.container, f'{profile.name} blockerad och borttagen {now()}.'
)
# TODO #2 Bättre funktion för backup av databasen

@ -4,10 +4,12 @@ import traceback
from arangodb import db
from classes import Friend, Picture, Reaction
from config import *
from helpers import sleep_, update_cookie, write_error
from helpers import sleep_, update_cookie, write_error, _print
def profile_picture_reactions(profile, user, all_pictures, first_user=False, mode = 'all'):
def profile_picture_reactions(
profile, user, all_pictures, first_user=False, mode="all"
):
# Fixa url:er osv
if user.username.isnumeric():
@ -26,16 +28,16 @@ def profile_picture_reactions(profile, user, all_pictures, first_user=False, mod
if (
"""You can't use Facebook because your account, or activity on it, doesn't follow our Community Standards."""
in profile.viewing().text
):
print("{} blocked\n".format(profile.name).upper())
):
_print(profile.container, user.username, f"{profile.name} blocked\n".upper())
profile.blocked = True
return None
return "blocked"
elif 'accept all' in profile.viewing().text.lower():
elif "accept all" in profile.viewing().text.lower():
profile.accept_cookies()
profile.browser.open(user.url_photos)
user.name = user.username # Om inte namnet hittas senare
user.name = user.username # Om inte namnet hittas senare
try:
for i in profile.viewing().find_all("strong"):
if "Notifications" in str(i):
@ -47,18 +49,15 @@ def profile_picture_reactions(profile, user, all_pictures, first_user=False, mod
6,
e=e,
traceback=traceback.format_exc(),
profile=profile.container,
soup=profile.viewing(),
user=user.username,
user=user,
url=user.url_photos,
)
if first_user == True:
print(profile.viewing().prettify())
_print(profile.container, user.username, profile.viewing().prettify())
exit()
print(
"Hämtar reaktioner på profilbilder för {name} ({user})".format(
name=user.name, user=user.username
)
)
_print(profile.container, user.username, f"Hämtar reaktioner på profilbilder för {user.name} ({user.username})")
# Hitta länk till olika saker hos användarem, inkl facebook-id
@ -85,30 +84,37 @@ def profile_picture_reactions(profile, user, all_pictures, first_user=False, mod
# Testa ta bort mellanrum och små bokstäver
if not hasattr(user, "url_album"):
for a in profile.viewing().find_all("a", href=True):
if "profilepictures" in a.text.lower().replace(' ', ''):
if "profilepictures" in a.text.lower().replace(" ", ""):
user.url_album = url_bas + a["href"]
user.add_to_db()
# Gå till profilbilden (den första som kommer upp när man går till profilen)
if not hasattr(user, "url_album"): # Om profilen inte har profilalbum
write_error(9, soup=profile.viewing(), user=user.username)
if user.url_other_picture != '':
if not hasattr(user, "url_album"): # Om profilen inte har profilalbum
write_error(9, soup=profile.viewing(), user=user, profile=profile.container)
if user.url_other_picture != "":
# Använd eventuell extrabild och ta bort den från användaren
url_pics = [user.url_other_picture]
user.url_other_picture = ''
user.url_other_picture = ""
else:
# Spara ner profilen till databasen och avsluta sökningen på användaren
user.url_album = False
if first_user == False:
user.checked()
user.add_to_db()
print('Hittar inget album för profilbilder.')
write_error(7, soup=profile.viewing(), user=user.username, url=user.url_album, url_name='user.url_album')
_print(profile.container, user.username, "Hittar inget album för profilbilder.")
write_error(#fel7
7,
soup=profile.viewing(),
profile=profile.container,
user=user,
url=user.url_album,
url_name="user.url_album",
)
return None
# ATT GÖRA Här kan andra bilder väljas istället
else: # Normalfallet där användaren har profilbildsalbum
else: # Normalfallet där användaren har profilbildsalbum
profile.browser.open(user.url_album)
# Samla alla profilbilder i en lista
@ -117,10 +123,10 @@ def profile_picture_reactions(profile, user, all_pictures, first_user=False, mod
for i in pics.find_all("a"):
a = i["href"]
url_pics.append(a[: a.find("&id")])
if user.url_other_picture != '':
if user.url_other_picture != "":
# Lägg till eventuell extrabild och ta bort den från användaren
url_pics.append(user.url_other_picture)
user.url_other_picture = ''
user.url_other_picture = ""
try:
user.profile_pictures = len(url_pics)
except:
@ -132,156 +138,198 @@ def profile_picture_reactions(profile, user, all_pictures, first_user=False, mod
user.add_to_db()
# Välj vilja bilder som ska kollas.
if first_user== False:
if mode == 'single':
if first_user == False:
if mode == "single":
url_pics = url_pics[:1]
elif mode == 'few' and len(url_pics) > 1:
elif mode == "few" and len(url_pics) > 1:
url_pics = url_pics[:1] + url_pics[-1:]
# Gå igenom valda bilder.
for pic in url_pics:
# Skriv ut vilken bild som behandlas.
print(f"Bild {url_pics.index(pic) + 1} av {user.profile_pictures}", end="\r",)
_print(profile.container, user.username,
f"Bild {url_pics.index(pic) + 1} av {user.profile_pictures}",
end="\r",
)
check_picture(url_bas + pic, user, profile)
# Välj vilja bilder som ska kollas.
if first_user == False:
if mode == "single" and user.reactions > 30:
break
elif all([mode == "few", user.reactions > 50, pic != url_pics[-1]]):
# Kolla den sista bilder
check_picture(url_bas + url_pics[-1], user, profile)
break
user.checked()
picture = Picture(user.username)
picture.url = url_bas + pic
picture.id = str(picture.url[picture.url.find("fbid=") + 5 :])
try:
picture.id = str(re.search('\d+', picture.id).group())
except:
pass
# if picture.id in all_pictures:
# continue
sleep_(5)
try:
profile.browser.open(picture.url)
except Exception as e: # Fel3
write_error(
3,
e=e,
soup=profile.viewing(),
user=user.username,
url=picture.url,
url_name="url_pic",
traceback=traceback.format_exc(),
)
def check_picture(url_picture, user, profile):
""" Hämtar reaktioner för en bildprint """
update_cookie(profile.browser.session.cookies, profile.name)
picture = Picture(user.username)
picture.url = url_picture
picture.id = str(picture.url[picture.url.find("fbid=") + 5 :])
try:
picture.id = str(re.search("\d+", picture.id).group())
except:
pass
# if picture.id in all_pictures:
# continue
sleep_(5)
# Hitta info om bilden
try:
picture.date = profile.viewing().find("abbr").text
except Exception as e: # Fel8
write_error(8, e=e, soup=profile.viewing(), url=pic, url_name='picture url', user=user.name, traceback=traceback.format_exc())
# TODO #3 lägg till fler bilder som kan gås igenom om det är få profilbilder.
try:
profile.browser.open(picture.url)
except Exception as e: # Fel3
write_error(
3,
e=e,
profile=profile.container,
soup=profile.viewing(),
user=user,
url=picture.url,
url_name="url_pic",
traceback=traceback.format_exc(),
)
# Hämta länkar för bilden att anvrända sen
#print(profile.viewing().prettify())
for a in profile.viewing().find_all("a", href=True):
if all(
[
"reaction" in a["href"],
"reactions" not in a["href"],
"=R" not in a["href"],
]
):
url_reactions = url_bas + str(a["href"]) # Länk till reaktionerna för bilden
elif a.text == "View full size":
pic = url_bas + a["href"]
picture.url_full = pic[
: pic.find("&")
] # Den fullständiga adressen till bilden, används som _key i pictures
if 'url_reactions' not in locals():
for a in profile.viewing().find_all("a", href=True):
if '/likes/' in a["href"]:
url_reactions = url_bas + str(a["href"])
if 'url_reactions' not in locals():
for div in profile.viewing().find_all("div", href=True):
if 'like this' in div.text:
url_reactions = url_bas + str(div["href"])
# Hämta reaktioner för bilden
sleep_(3)
profile.browser.open(url_reactions)
update_cookie(profile.browser.session.cookies, profile.name)
update_cookie(profile.browser.session.cookies, profile.name)
# Hitta info om bilden
try:
picture.date = profile.viewing().find("abbr").text
except Exception as e: # Fel8
write_error(
8,
e=e,
soup=profile.viewing(),
profile=profile.container,
url=picture.url,
url_name="picture url",
user=user,
traceback=traceback.format_exc(),
)
# TODO #3 lägg till fler bilder som kan gås igenom om det är få profilbilder.
try:
for a in profile.viewing().find_all("a", {"class": "z ba"}, href=True):
url_limit = a["href"]
# Hämta länkar för bilden att anvrända sen
# _print(profile.container, user.username, profile.viewing().prettify())
for a in profile.viewing().find_all("a", href=True):
if all(
[
"reaction" in a["href"],
"reactions" not in a["href"],
"=R" not in a["href"],
]
):
url_reactions = url_bas + str(
a["href"]
) # Länk till reaktionerna för bilden
elif a.text == "View full size":
pic = url_bas + a["href"]
picture.url_full = pic[
: pic.find("&")
] # Den fullständiga adressen till bilden, används som _key i pictures
if "url_reactions" not in locals():
for a in profile.viewing().find_all("a", href=True):
if "/likes/" in a["href"]:
url_reactions = url_bas + str(a["href"])
if "url_reactions" not in locals():
for div in profile.viewing().find_all("div", href=True):
if "like this" in div.text:
url_reactions = url_bas + str(div["href"])
# Hämta reaktioner för bilden
sleep_(3)
picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1)
limit = re.search(r"limit=(\d+)", url_limit).group(1) # TODO Fortfarande problem med det här
except UnboundLocalError: #fel9
write_error(9, soup=profile.viewing(), traceback=traceback.format_exc(), url=url_reactions, url_name='url_reactions')
# Bilder med väldigt många likes går inte att visa så här?
continue
profile.browser.open(url_reactions)
# Addera bilden till arrango
picture.add_to_db()
update_cookie(profile.browser.session.cookies, profile.name)
url_limit = url_bas + url_limit.replace(
"limit=" + str(limit), "limit=" + str(picture.no_reactions)
try:
for a in profile.viewing().find_all("a", {"class": "z ba"}, href=True):
url_limit = a["href"]
picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1)
limit = re.search(r"limit=(\d+)", url_limit).group(
1
) # TODO Fortfarande problem med det här
except UnboundLocalError: # fel9
write_error(
9,
user=user,
profile=profile.container,
soup=profile.viewing(),
traceback=traceback.format_exc(),
url=url_reactions,
url_name="url_reactions",
)
# Bilder med väldigt många likes går inte att visa så här?
return None
try:
sleep_(4)
profile.browser.open(url_limit)
url_limit = ''
update_cookie(profile.browser.session.cookies, profile.name)
# Gå igenom alla som reagerat och för in i arango
for li in profile.viewing().find_all("li"):
friend = Friend(user.username, mode)
if "see more" in li.text.lower():
continue
try:
friend_html = li.find("h3").find("a")
friend.name = friend_html.text
friend.url = friend_html["href"]
if "profile.php" in friend.url:
friend.username = friend.url[friend.url.find("id=") + 3 :]
else:
friend.username = friend.url[friend.url.find("/") + 1 :]
reaction = Reaction(user.username, friend.username, picture.id)
for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]:
if type in str(li):
reaction.type = type
picture.reactions.append(reaction.get_dict())
# Lägg till vännens profil till arrango
friend.add_to_db()
except AttributeError as e: # Fel1
write_error(
1,
e=e,
soup=str(li),
user=user.username,
traceback=traceback.format_exc(),
)
pass
# Lägg till reaktion till databasen
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
db.collection("picture_reactions").insert_many(picture.reactions, silent=True, overwrite=True)
except Exception as e: # Fel2
write_error(
2,
e=e,
soup=profile.viewing(),
user=user.username,
url=url_limit,
url_name="url_limit",
traceback=traceback.format_exc(),
)
pass
# Addera bilden till arrango
picture.add_to_db()
user.checked()
url_limit = url_bas + url_limit.replace(
"limit=" + str(limit), "limit=" + str(picture.no_reactions)
)
try:
sleep_(4)
profile.browser.open(url_limit)
url_limit = ""
update_cookie(profile.browser.session.cookies, profile.name)
# Gå igenom alla som reagerat och för in i arango
for li in profile.viewing().find_all("li"):
friend = Friend(user.username)
if "see more" in li.text.lower():
continue
try:
friend_html = li.find("h3").find("a")
friend.name = friend_html.text
friend.url = friend_html["href"]
if "profile.php" in friend.url:
friend.username = friend.url[friend.url.find("id=") + 3 :]
else:
friend.username = friend.url[friend.url.find("/") + 1 :]
reaction = Reaction(user.username, friend.username, picture.id)
for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]:
if type in str(li):
reaction.type = type
picture.reactions.append(reaction.get_dict())
# Lägg till vännens profil till arrango
friend.add_to_db()
except AttributeError as e: # Fel1
write_error(
1,
e=e,
soup=str(li),
user=user,
profile=profile.container,
traceback=traceback.format_exc(),
)
pass
# Lägg till reaktioner till databasen
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
db.collection("picture_reactions").insert_many(
picture.reactions, silent=True, overwrite=True
)
# Uppdatera antalet reaktioner användaren fått
user.reactions += len(picture.reactions)
except Exception as e: # Fel2
write_error(
2,
e=e,
soup=profile.viewing(),
profile=profile.container,
user=user,
url=url_limit,
url_name="url_limit",
traceback=traceback.format_exc(),
)
pass
Loading…
Cancel
Save