You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
403 lines
14 KiB
403 lines
14 KiB
import re |
|
import traceback |
|
import requests |
|
|
|
from arangodb import db, check_for_picture |
|
from classes import Friend, Picture, Reaction |
|
from config import * |
|
from helpers import sleep_, update_cookie, write_error, _print, check_profile_status |
|
|
|
|
|
def profile_picture_reactions(profile, user, first_user=False, mode="all"): |
|
# try: |
|
|
|
# Fixa url:er osv |
|
|
|
if user.username.isnumeric(): |
|
user.url = url_bas + "/profile.php?id=" + str(user.username) |
|
user.url_photos = user.url + "&v=photos" |
|
user.id = user.username |
|
|
|
else: |
|
user.username = user.username.replace("/", "") |
|
user.url = url_bas + "/" + user.username |
|
user.url_photos = user.url + "/photos" |
|
|
|
# Gå till sidan för profilbilder |
|
profile.open(user.url_photos) |
|
# print(profile.viewing()) |
|
sleep_(4) |
|
|
|
profile = check_profile_status(profile, user) |
|
|
|
user.name = user.username # Om inte namnet hittas senare |
|
try: |
|
for i in profile.viewing().find_all("strong"): |
|
if "Notifications" in str(i): |
|
continue |
|
else: |
|
user.name = i.text.strip() |
|
except Exception as e: |
|
write_error( |
|
6, |
|
profile, |
|
e=e, |
|
traceback=traceback.format_exc(), |
|
soup=profile.viewing(), |
|
user=user, |
|
url=user.url_photos, |
|
) |
|
if first_user == True: |
|
_print(profile, user, profile.viewing().prettify()) |
|
exit() |
|
_print( |
|
profile, |
|
user, |
|
f"Hämtar reaktioner på profilbilder för {user.name} ({user.username})", |
|
) |
|
|
|
# Hitta länk till olika saker hos användarem, inkl facebook-id |
|
|
|
for a in profile.viewing().find_all("a", href=True): |
|
if "Profile pictures" in a.text: |
|
user.url_album = url_bas + a["href"] # Länk till album för profilbilder |
|
if "profile_id" in a["href"]: |
|
l = a["href"] |
|
try: |
|
user.id = re.search("\d+", l[l.find("id=") + 3 :]).group(0) |
|
except: |
|
user.id = False |
|
|
|
if "Likes" in a.text: |
|
user.url_likes = url_bas + a["href"] |
|
if "About" in a.text: |
|
user.url_about = url_bas + a["href"] |
|
user.id = user.url_about[user.url_about.find('%')+3: user.url_about.rfind('%')] |
|
if "Timeline" in a.text: |
|
user.url_timeline = url_bas + a["href"] |
|
if "Cover photos" in a.text: |
|
user.url_coverphotos = url_bas + a["href"] |
|
if a.text == "Friends": |
|
user.url_friends = url_bas + a["href"] |
|
|
|
# Om det inte finns något profilalbum |
|
# Testa ta bort mellanrum och små bokstäver |
|
if not hasattr(user, "url_album"): |
|
for a in profile.viewing().find_all("a", href=True): |
|
if "profilepictures" in a.text.lower().replace(" ", ""): |
|
user.url_album = url_bas + a["href"] |
|
|
|
user.add_to_db() |
|
# Gå till profilbilden (den första som kommer upp när man går till profilen) |
|
|
|
# Om profilen inte har profilalbum |
|
if not hasattr(user, "url_album"): |
|
write_error(9, profile, soup=profile.viewing(), user=user) |
|
if user.url_other_pictures != []: |
|
# Använd eventuella extrabilder och ta bort den från användaren |
|
url_pics = user.url_other_pictures |
|
user.url_other_pictures = [] |
|
else: |
|
# Spara ner profilen till databasen och avsluta sökningen på användaren |
|
user.url_album = False |
|
if first_user == False: |
|
user.checked() |
|
user.add_to_db() |
|
_print(profile, user, "Hittar inget album för profilbilder.") |
|
write_error( # fel7 |
|
7, |
|
profile, |
|
soup=profile.viewing(), |
|
user=user, |
|
url=user.url_album, |
|
url_name="user.url_album", |
|
) |
|
return profile |
|
# ATT GÖRA Här kan andra bilder väljas istället |
|
|
|
# Normalfallet där användaren har profilbildsalbum |
|
else: |
|
profile.open(user.url_album) |
|
|
|
# Samla alla profilbilder i en lista |
|
url_pics = user.url_other_pictures |
|
pics = profile.viewing().find("div", {"id": "thumbnail_area"}) |
|
for i in pics.find_all("a"): |
|
a = i["href"] |
|
url_pics.append(a[: a.find("&id")]) |
|
try: |
|
user.profile_pictures = len(url_pics) |
|
user.pictures = url_pics |
|
except: |
|
_print(profile, user, "Hittade inga profilbilder".upper()) |
|
user.profile_pictures = 0 |
|
user.pictures = url_pics |
|
user.checked() |
|
user.add_to_db() |
|
return profile |
|
# Lägg till profilen till arrango. |
|
user.add_to_db() |
|
|
|
# Gå igenom valda bilder. |
|
for pic in url_pics: |
|
if check_for_picture(pic[pic.find("fbid=") + 5 :]): |
|
continue |
|
|
|
# Skriv ut vilken bild som behandlas. |
|
_print( |
|
profile, |
|
user, |
|
f"Bild {url_pics.index(pic) + 1} av {user.profile_pictures}", |
|
end="\r", |
|
) |
|
check_picture(url_bas + pic, user, profile) |
|
user.checked_pictures.append(url_bas + pic) |
|
|
|
# Välj vilja bilder som ska kollas. |
|
if first_user == False: |
|
if mode == "single" and user.reactions > 30: |
|
break |
|
elif all([any([mode == "few", mode == "solo"]), user.reactions > 80, pic != url_pics[-1]]): |
|
# Kolla den sista bilden |
|
check_picture(url_bas + url_pics[-1], user, profile) |
|
user.checked_pictures.append(url_bas + pic) |
|
break |
|
user.checked() |
|
return profile |
|
|
|
# except Exception as e: |
|
# _print(None, str(e)) |
|
# return profile |
|
|
|
|
|
def check_picture(url_picture, user, profile): |
|
""" Hämtar reaktioner för en bild. """ |
|
|
|
picture = Picture(user.username) |
|
picture.url = url_picture |
|
picture.id = str(picture.url[picture.url.find("fbid=") + 5 :]) |
|
try: |
|
picture.id = str(re.search("\d+", picture.id).group()) |
|
except: |
|
pass |
|
|
|
sleep_(5) |
|
|
|
try: |
|
profile.open(picture.url) |
|
except Exception as e: # Fel3 |
|
write_error( |
|
3, |
|
profile, |
|
e=e, |
|
soup=profile.viewing(), |
|
user=user, |
|
url=picture.url, |
|
url_name="url_pic", |
|
traceback=traceback.format_exc(), |
|
) |
|
|
|
update_cookie(profile.browser.session.cookies, profile) |
|
|
|
# Hitta info om bilden |
|
try: |
|
picture.date = profile.viewing().find("abbr").text |
|
except Exception as e: # Fel8 |
|
write_error( |
|
8, |
|
e=e, |
|
soup=profile.viewing(), |
|
profile=profile, |
|
url=picture.url, |
|
url_name="picture url", |
|
user=user, |
|
traceback=traceback.format_exc(), |
|
) |
|
|
|
try: |
|
for img in profile.viewing().find_all('img'): |
|
if 'https://scontent' in img['src']: |
|
picture.src = img['src'] |
|
|
|
except Exception as e: |
|
pass |
|
|
|
# Hämta länkar för bilden att använda sen |
|
# _print(profile, user, profile.viewing().prettify()) |
|
for a in profile.viewing().find_all("a", href=True): |
|
if all( |
|
[ |
|
"reaction" in a["href"], |
|
"reactions" not in a["href"], |
|
"=R" not in a["href"], |
|
] |
|
): |
|
url_reactions = url_bas + str( |
|
a["href"] |
|
) # Länk till reaktionerna för bilden |
|
elif a.text == "View full size": |
|
pic = url_bas + a["href"] |
|
picture.url_full = pic[ |
|
: pic.find("&") |
|
] # Den fullständiga adressen till bilden, används som _key i pictures |
|
if "url_reactions" not in locals(): |
|
for a in profile.viewing().find_all("a", href=True): |
|
if "/likes/" in a["href"]: |
|
url_reactions = url_bas + str(a["href"]) |
|
if "url_reactions" not in locals(): |
|
for div in profile.viewing().find_all("div", href=True): |
|
if "like this" in div.text: |
|
url_reactions = url_bas + str(div["href"]) |
|
|
|
# Hämta reaktioner för bilden |
|
|
|
sleep_(3) |
|
|
|
profile.open(url_reactions) |
|
|
|
update_cookie(profile.browser.session.cookies, profile) |
|
|
|
try: |
|
for a in profile.viewing().find_all("a", {"class": "z ba"}, href=True): |
|
url_limit = a["href"] |
|
|
|
picture.no_reactions = re.search(r"total_count=(\d+)", url_limit).group(1) |
|
limit = re.search(r"limit=(\d+)", url_limit).group( |
|
1 |
|
) # TODO Fortfarande problem med det här |
|
except UnboundLocalError: # fel9 |
|
write_error( |
|
9, |
|
profile, |
|
user=user, |
|
soup=profile.viewing(), |
|
traceback=traceback.format_exc(), |
|
url=url_reactions, |
|
url_name="url_reactions", |
|
) |
|
# Bilder med väldigt många likes går inte att visa så här? |
|
return profile |
|
|
|
# Addera bilden till arrango |
|
picture.add_to_db() |
|
|
|
# Begränsa limit till 50 då Facebook inte ger fler (för första "klick"). |
|
try: |
|
if int(picture.no_reactions) > 50: |
|
no_reactions = 50 |
|
else: |
|
no_reactions = int(picture.no_reactions) - 1 |
|
except TypeError: |
|
no_reactions = picture.no_reactions |
|
|
|
url_limit = url_bas + url_limit.replace( |
|
"limit=" + str(limit), "limit=" + str(no_reactions) |
|
) |
|
|
|
list_ids = [] |
|
while True: |
|
try: |
|
sleep_(4) |
|
profile.open(url_limit) |
|
url_limit = "" |
|
update_cookie(profile.browser.session.cookies, profile) |
|
|
|
# Hämta länk för "See more" för att se vilka ID:s som visas |
|
url_see_more = None |
|
for li in profile.viewing().find_all("li"): |
|
if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''): # Om det finns fler reaktioner att hämta |
|
url_see_more = li.find('a')['href'] |
|
ids_url = url_see_more[url_see_more.find('ids=')+4:url_see_more.find('&total')] |
|
list_ids_picture = ids_url.split('%2C') |
|
list_ids_picture = list_ids_picture[len(list_ids):] # Profilerna på den här sidan |
|
list_ids.extend(list_ids_picture) # Alla profiler hittills |
|
url_limit = url_bas + url_see_more.replace('limit=10', 'limit=50') # Länken till fler profiler |
|
# Gå igenom alla som reagerat och för in i arango |
|
|
|
get_reactions(profile, user, picture, list_ids) |
|
|
|
if url_see_more == None: # När det inte finns fler reaktioner |
|
break |
|
|
|
except Exception as e: # Fel2 |
|
write_error( |
|
2, |
|
profile, |
|
e=e, |
|
soup=profile.viewing(), |
|
user=user, |
|
url=url_limit, |
|
url_name="url_limit", |
|
traceback=traceback.format_exc(), |
|
) |
|
pass |
|
|
|
# Lägg till reaktioner till databasen |
|
db.collection("picture_reactions").insert_many( |
|
picture.reactions, silent=True, overwrite=True |
|
) |
|
db.collection("picture_reactions").insert_many( |
|
picture.reactions, silent=True, overwrite=True |
|
) |
|
|
|
# Uppdatera antalet reaktioner användaren fått |
|
user.reactions += len(picture.reactions) |
|
|
|
def get_reactions(profile, user, picture, list_ids_picture): |
|
""" Gather the reactions on the picture. |
|
|
|
Args: |
|
profile (class): The active profile. |
|
user (class): The user being scraped. |
|
picture (class): The picture. |
|
list_ids_picture (list): List of ID:s fetched from "See more"-url |
|
""" |
|
|
|
# Gå igenom alla som reagerat och för in i arango |
|
for li in profile.viewing().find_all("li"): |
|
friend = Friend(user.username) |
|
if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''): |
|
continue |
|
try: |
|
friend_html = li.find("h3").find("a") |
|
friend.name = friend_html.text |
|
friend.url = friend_html["href"] |
|
friend.id = list_ids_picture.pop(0) |
|
if "profile.php" in friend.url: |
|
if "&paipv" in friend.url: |
|
friend.username = friend.url[ |
|
friend.url.find("=") + 1 : friend.url.find("&") |
|
] |
|
else: |
|
friend.username = friend.url[friend.url.find("id=") + 3 :] |
|
else: |
|
if "?" in friend.url: |
|
friend.username = friend.url[ |
|
friend.url.find("/") + 1 : friend.url.find("?") |
|
] |
|
else: |
|
friend.username = friend.url[friend.url.find("/") + 1 :] |
|
|
|
reaction = Reaction(user.username, friend.username, picture.id) |
|
|
|
for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: |
|
if type in str(li): |
|
reaction.type = type |
|
|
|
picture.reactions.append(reaction.get_dict()) |
|
# Lägg till vännens profil till arrango |
|
try: |
|
friend.add_to_db() |
|
except: |
|
_print(profile, user, f"Kunde inte lägga till vän {friend.url}") |
|
|
|
except AttributeError as e: # Fel1 |
|
write_error( |
|
1, |
|
profile, |
|
e=e, |
|
soup=str(li), |
|
user=user, |
|
traceback=traceback.format_exc(), |
|
) |
|
pass
|
|
|