From 3e5b1d13084f5bafb7eafbec68994580e10c0df6 Mon Sep 17 00:00:00 2001 From: Lasse Server Date: Thu, 16 Sep 2021 14:29:20 +0200 Subject: [PATCH] Solved the limit 50 issue --- facebook/scrapers.py | 197 ++++++++++++++++++++++++++----------------- 1 file changed, 119 insertions(+), 78 deletions(-) diff --git a/facebook/scrapers.py b/facebook/scrapers.py index 5260666..a588b5d 100644 --- a/facebook/scrapers.py +++ b/facebook/scrapers.py @@ -158,7 +158,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): if mode == "single" and user.reactions > 30: break elif all([any([mode == "few", mode == "solo"]), user.reactions > 80, pic != url_pics[-1]]): - # Kolla den sista bilder + # Kolla den sista bilden check_picture(url_bas + url_pics[-1], user, profile) user.checked_pictures.append(url_bas + pic) break @@ -171,7 +171,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"): def check_picture(url_picture, user, profile): - """ Hämtar reaktioner för en bildprint """ + """ Hämtar reaktioner för en bild. """ picture = Picture(user.username) picture.url = url_picture @@ -180,8 +180,7 @@ def check_picture(url_picture, user, profile): picture.id = str(re.search("\d+", picture.id).group()) except: pass - # if picture.id in all_pictures: - # continue + sleep_(5) try: @@ -249,8 +248,9 @@ def check_picture(url_picture, user, profile): for div in profile.viewing().find_all("div", href=True): if "like this" in div.text: url_reactions = url_bas + str(div["href"]) - + # Hämta reaktioner för bilden + sleep_(3) profile.open(url_reactions) @@ -281,82 +281,123 @@ def check_picture(url_picture, user, profile): # Addera bilden till arrango picture.add_to_db() + # Begränsa limit till 50 då Facebook inte ger fler (för första "klick"). + try: + if int(picture.no_reactions) > 50: + no_reactions = 50 + else: + no_reactions = int(picture.no_reactions) - 1 + except TypeError: + no_reactions = picture.no_reactions + url_limit = url_bas + url_limit.replace( - "limit=" + str(limit), "limit=" + str(picture.no_reactions) + "limit=" + str(limit), "limit=" + str(no_reactions) + ) + + list_ids = [] + while True: + try: + sleep_(4) + profile.open(url_limit) + url_limit = "" + update_cookie(profile.browser.session.cookies, profile) + + # Hämta länk för "See more" för att se vilka ID:s som visas + url_see_more = None + for li in profile.viewing().find_all("li"): + if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''): # Om det finns fler reaktioner att hämta + url_see_more = li.find('a')['href'] + ids_url = url_see_more[url_see_more.find('ids=')+4:url_see_more.find('&total')] + list_ids_picture = ids_url.split('%2C') + list_ids_picture = list_ids_picture[len(list_ids):] # Profilerna på den här sidan + list_ids.extend(list_ids_picture) # Alla profiler hittills + url_limit = url_bas + url_see_more.replace('limit=10', 'limit=50') # Länken till fler profiler + # Gå igenom alla som reagerat och för in i arango + + get_reactions(profile, user, picture, list_ids) + + if url_see_more == None: # När det inte finns fler reaktioner + break + + except Exception as e: # Fel2 + write_error( + 2, + profile, + e=e, + soup=profile.viewing(), + user=user, + url=url_limit, + url_name="url_limit", + traceback=traceback.format_exc(), + ) + pass + + # Lägg till reaktioner till databasen + db.collection("picture_reactions").insert_many( + picture.reactions, silent=True, overwrite=True + ) + db.collection("picture_reactions").insert_many( + picture.reactions, silent=True, overwrite=True ) - try: - sleep_(4) - profile.open(url_limit) - url_limit = "" - update_cookie(profile.browser.session.cookies, profile) - - # Gå igenom alla som reagerat och för in i arango - for li in profile.viewing().find_all("li"): - friend = Friend(user.username) - if "see more" in li.text.lower(): + # Uppdatera antalet reaktioner användaren fått + user.reactions += len(picture.reactions) + +def get_reactions(profile, user, picture, list_ids_picture): + """ Gather the reactions on the picture. + + Args: + profile (class): The active profile. + user (class): The user being scraped. + picture (class): The picture. + list_ids_picture (list): List of ID:s fetched from "See more"-url + """ + + # Gå igenom alla som reagerat och för in i arango + for li in profile.viewing().find_all("li"): + friend = Friend(user.username) + if "seemore" in li.text.lower().replace(' ', '').replace('\n', ''): continue - try: - friend_html = li.find("h3").find("a") - friend.name = friend_html.text - friend.url = friend_html["href"] - if "profile.php" in friend.url: - if "&paipv" in friend.url: - friend.username = friend.url[ - friend.url.find("=") + 1 : friend.url.find("&") - ] - else: - friend.username = friend.url[friend.url.find("id=") + 3 :] + try: + friend_html = li.find("h3").find("a") + friend.name = friend_html.text + friend.url = friend_html["href"] + friend.id = list_ids_picture.pop(0) + if "profile.php" in friend.url: + if "&paipv" in friend.url: + friend.username = friend.url[ + friend.url.find("=") + 1 : friend.url.find("&") + ] else: - if "?" in friend.url: - friend.username = friend.url[ - friend.url.find("/") + 1 : friend.url.find("?") - ] - else: - friend.username = friend.url[friend.url.find("/") + 1 :] - - reaction = Reaction(user.username, friend.username, picture.id) - for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: - if type in str(li): - reaction.type = type - picture.reactions.append(reaction.get_dict()) - # Lägg till vännens profil till arrango - try: - friend.add_to_db() - except: - _print(profile, user, f"Kunde inte lägga till vän {friend.url}") - - except AttributeError as e: # Fel1 - write_error( - 1, - profile, - e=e, - soup=str(li), - user=user, - traceback=traceback.format_exc(), - ) - pass - - # Lägg till reaktioner till databasen - db.collection("picture_reactions").insert_many( - picture.reactions, silent=True, overwrite=True - ) - db.collection("picture_reactions").insert_many( - picture.reactions, silent=True, overwrite=True - ) - - # Uppdatera antalet reaktioner användaren fått - user.reactions += len(picture.reactions) - except Exception as e: # Fel2 - write_error( - 2, - profile, - e=e, - soup=profile.viewing(), - user=user, - url=url_limit, - url_name="url_limit", - traceback=traceback.format_exc(), - ) - pass + friend.username = friend.url[friend.url.find("id=") + 3 :] + else: + if "?" in friend.url: + friend.username = friend.url[ + friend.url.find("/") + 1 : friend.url.find("?") + ] + else: + friend.username = friend.url[friend.url.find("/") + 1 :] + + reaction = Reaction(user.username, friend.username, picture.id) + + for type in ["Love", "Wow", "Like", "Care", "Sad", "Angry", "Haha"]: + if type in str(li): + reaction.type = type + + picture.reactions.append(reaction.get_dict()) + # Lägg till vännens profil till arrango + try: + friend.add_to_db() + except: + _print(profile, user, f"Kunde inte lägga till vän {friend.url}") + except AttributeError as e: # Fel1 + write_error( + 1, + profile, + e=e, + soup=str(li), + user=user, + traceback=traceback.format_exc(), + ) + pass