import os from datetime import date, datetime, timedelta from getpass import getpass from time import sleep import random import requests import urllib3 urllib3.disable_warnings() from arango import ArangoClient def download_image(url, user, id): # Ladda ner bilden while True: try: server = servers_mullvad[random.randint(0, len(servers_mullvad)-1)] proxies = { "https": "socks5://'8155249667566524'@{}".format(server), "http": "socks5://'8155249667566524'@{}".format(server), } r = requests.get(url, proxies=proxies) break except requests.exceptions.ConnectionError: sleep(300) if r.text == "URL signature expired": print("För gammal länk.") exit() elif r.status_code == 403: exit() image_name = f"/ssd/profile_pictures/{user}/{id}.jpg" img_data = r.content with open(image_name, "wb") as handler: handler.write(img_data) #nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}/{id}.jpg" # headers = {"Content-type": "image/jpeg", "Slug": "heart"} # while True: # try: # r = requests.put( # nc_path, data=open(image_name, "rb"), headers=headers, auth=auth, verify=False # ) # break # except: # print('Kunde inte ladda upp', nc_path) # sleep(5) print(f"{user}\t{id}\t{r.status_code}") def get_pictures(day): cursor = db.aql.execute( """ for doc in members filter doc.fetched == @date filter has(doc, "checked_pictures") filter not has(doc, "pictures_downloaded") return {'member': doc._key, 'pictures':doc.checked_pictures} """, bind_vars={"date": str(day)}, ) # Skapa en lista med bilder att gå igenom. images = [] for doc in cursor: images.append(doc) for doc in images: user = doc["member"] # # Skapa mapp för användarens bilder på NC... # nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}" # while True: # try: # requests.request("MKCOL", nc_path, verify=False, auth=auth) # break # except: # print('Kunde inte skapa', nc_path) # sleep(5) # ...och på datorn (för backup) if not os.path.isdir(f"/ssd/profile_pictures/{user}"): os.mkdir(f"/ssd/profile_pictures/{user}") pictures = [] for picture in doc["pictures"]: pictures.append(picture[picture.find("fbid=") + 5 :]) cursor = db.aql.execute( """ for doc in pictures filter doc._key in @list limit 10 return {'_key': doc._key, 'user':doc.user, 'url': doc.src} """, bind_vars={"list": pictures}, ) for picture in cursor: while True: download_image(picture["url"], picture["user"], picture["_key"]) sleep(1) break db.update_document( {"_id": "members/" + str(doc["member"]), "pictures_downloaded": True}, silent=True, check_rev=False, ) # def old_pics(): # if not os.path.isdir(f'profile_pictures'): # os.mkdir(f'profile_pictures') # start = date.today() # for i in range(1,60): # d = start - timedelta(days=i) # get_pictures(d.strftime('%Y%m%d')) if __name__ == '__main__': # Info för arangodb user_arango = "Lasse" db_arango = "facebook" host_arango = "http://192.168.0.4:8529" # Starta koppling till arangodb # Avkryptera lösen till arango pwd = getpass(f"Arangolösenord för {user_arango}: ") db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd) auth = ("Lasse", "affix-sip-jejune-epigraph-ENTROPY-stupefy1") servers_mullvad = [ "se15-wg.socks5.mullvad.net:1080", "se17-wg.socks5.mullvad.net:1080", "se18-wg.socks5.mullvad.net:1080", "se19-wg.socks5.mullvad.net:1080", "se21-wg.socks5.mullvad.net:1080", "se22-wg.socks5.mullvad.net:1080", "se23-wg.socks5.mullvad.net:1080", "se3-wg.socks5.mullvad.net:1080", "se5-wg.socks5.mullvad.net:1080", "se9-wg.socks5.mullvad.net:1080", "se10-wg.socks5.mullvad.net:1080", "se2-wg.socks5.mullvad.net:1080", "se6-wg.socks5.mullvad.net:1080", "se7-wg.socks5.mullvad.net:1080", "se8-wg.socks5.mullvad.net:1080", "se13-wg.socks5.mullvad.net:1080", "se14-wg.socks5.mullvad.net:1080", "se26-wg.socks5.mullvad.net:1080", "se27-wg.socks5.mullvad.net:1080", "se28-wg.socks5.mullvad.net:1080", ] while True: today = date.today().strftime('%Y%m%d') get_pictures(today) yesterday = date.today() - timedelta(days=1) get_pictures(yesterday.strftime('%Y%m%d')) sleep(300)