You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

169 lines
5.0 KiB

import os
from datetime import date, datetime, timedelta
from getpass import getpass
from time import sleep
import random
import requests
import urllib3
urllib3.disable_warnings()
from arango import ArangoClient
def download_image(url, user, id):
# Ladda ner bilden
while True:
try:
server = servers_mullvad[random.randint(0, len(servers_mullvad)-1)]
proxies = {
"https": "socks5://'8155249667566524'@{}".format(server),
"http": "socks5://'8155249667566524'@{}".format(server),
}
r = requests.get(url, proxies=proxies)
break
except requests.exceptions.ConnectionError:
sleep(300)
if r.text == "URL signature expired":
print("För gammal länk.")
exit()
elif r.status_code == 403:
exit()
image_name = f"/ssd/profile_pictures/{user}/{id}.jpg"
img_data = r.content
with open(image_name, "wb") as handler:
handler.write(img_data)
#nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}/{id}.jpg"
# headers = {"Content-type": "image/jpeg", "Slug": "heart"}
# while True:
# try:
# r = requests.put(
# nc_path, data=open(image_name, "rb"), headers=headers, auth=auth, verify=False
# )
# break
# except:
# print('Kunde inte ladda upp', nc_path)
# sleep(5)
print(f"{user}\t{id}\t{r.status_code}")
def get_pictures(day):
cursor = db.aql.execute(
"""
for doc in members
filter doc.fetched == @date
filter has(doc, "checked_pictures")
filter not has(doc, "pictures_downloaded")
return {'member': doc._key, 'pictures':doc.checked_pictures}
""",
bind_vars={"date": str(day)},
)
# Skapa en lista med bilder att gå igenom.
images = []
for doc in cursor:
images.append(doc)
for doc in images:
user = doc["member"]
# # Skapa mapp för användarens bilder på NC...
# nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}"
# while True:
# try:
# requests.request("MKCOL", nc_path, verify=False, auth=auth)
# break
# except:
# print('Kunde inte skapa', nc_path)
# sleep(5)
# ...och på datorn (för backup)
if not os.path.isdir(f"/ssd/profile_pictures/{user}"):
os.mkdir(f"/ssd/profile_pictures/{user}")
pictures = []
for picture in doc["pictures"]:
pictures.append(picture[picture.find("fbid=") + 5 :])
cursor = db.aql.execute(
"""
for doc in pictures
filter doc._key in @list
limit 10
return {'_key': doc._key, 'user':doc.user, 'url': doc.src}
""",
bind_vars={"list": pictures},
)
for picture in cursor:
while True:
download_image(picture["url"], picture["user"], picture["_key"])
sleep(1)
break
db.update_document(
{"_id": "members/" + str(doc["member"]), "pictures_downloaded": True},
silent=True,
check_rev=False,
)
# def old_pics():
# if not os.path.isdir(f'profile_pictures'):
# os.mkdir(f'profile_pictures')
# start = date.today()
# for i in range(1,60):
# d = start - timedelta(days=i)
# get_pictures(d.strftime('%Y%m%d'))
if __name__ == '__main__':
# Info för arangodb
user_arango = "Lasse"
db_arango = "facebook"
host_arango = "http://192.168.0.4:8529"
# Starta koppling till arangodb
# Avkryptera lösen till arango
pwd = getpass(f"Arangolösenord för {user_arango}: ")
db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd)
auth = ("Lasse", "affix-sip-jejune-epigraph-ENTROPY-stupefy1")
servers_mullvad = [
"se15-wg.socks5.mullvad.net:1080",
"se17-wg.socks5.mullvad.net:1080",
"se18-wg.socks5.mullvad.net:1080",
"se19-wg.socks5.mullvad.net:1080",
"se21-wg.socks5.mullvad.net:1080",
"se22-wg.socks5.mullvad.net:1080",
"se23-wg.socks5.mullvad.net:1080",
"se3-wg.socks5.mullvad.net:1080",
"se5-wg.socks5.mullvad.net:1080",
"se9-wg.socks5.mullvad.net:1080",
"se10-wg.socks5.mullvad.net:1080",
"se2-wg.socks5.mullvad.net:1080",
"se6-wg.socks5.mullvad.net:1080",
"se7-wg.socks5.mullvad.net:1080",
"se8-wg.socks5.mullvad.net:1080",
"se13-wg.socks5.mullvad.net:1080",
"se14-wg.socks5.mullvad.net:1080",
"se26-wg.socks5.mullvad.net:1080",
"se27-wg.socks5.mullvad.net:1080",
"se28-wg.socks5.mullvad.net:1080",
]
while True:
today = date.today().strftime('%Y%m%d')
get_pictures(today)
yesterday = date.today() - timedelta(days=1)
get_pictures(yesterday.strftime('%Y%m%d'))
sleep(300)