You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

168 lines
4.9 KiB

import os
from datetime import date, datetime, timedelta
from getpass import getpass
from time import sleep
import random
import requests
import urllib3
urllib3.disable_warnings()
from arango import ArangoClient
def download_image(url, user, id):
# Ladda ner bilden
while True:
try:
server = servers_mullvad[random.randint(0, len(servers_mullvad)-1)]
proxies = {
"https": "socks5://'8155249667566524'@{}".format(server),
"http": "socks5://'8155249667566524'@{}".format(server),
}
r = requests.get(url, proxies=proxies)
break
except requests.exceptions.ConnectionError:
sleep(300)
if r.text == "URL signature expired":
print("För gammal länk.")
exit()
elif r.status_code == 403:
exit()
image_name = f"profile_pictures/{user}/{id}.jpg"
img_data = r.content
with open(image_name, "wb") as handler:
handler.write(img_data)
nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}/{id}.jpg"
headers = {"Content-type": "image/jpeg", "Slug": "heart"}
while True:
try:
r = requests.put(
nc_path, data=open(image_name, "rb"), headers=headers, auth=auth, verify=False
)
break
except:
print('Kunde inte ladda upp', nc_path)
sleep(5)
print(f"{user}\t{id}\t{r.status_code}")
def get_pictures(day):
cursor = db.aql.execute(
"""
for doc in members
filter doc.fetched == @date
filter has(doc, "checked_pictures")
filter not has(doc, "pictures_downloaded")
return {'member': doc._key, 'pictures':doc.checked_pictures}
""",
bind_vars={"date": str(day)},
)
for doc in cursor:
user = doc["member"]
# Skapa mapp för användarens bilder på NC...
nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}"
while True:
try:
requests.request("MKCOL", nc_path, verify=False, auth=auth)
break
except:
print('Kunde inte skapa', nc_path)
sleep(5)
# ...och på datorn (för backup)
if not os.path.isdir(f"profile_pictures/{user}"):
os.mkdir(f"profile_pictures/{user}")
pictures = []
for picture in doc["pictures"]:
pictures.append(picture[picture.find("fbid=") + 5 :])
cursor = db.aql.execute(
"""
for doc in pictures
filter doc._key in @list
limit 10
return {'_key': doc._key, 'user':doc.user, 'url': doc.src}
""",
bind_vars={"list": pictures},
)
for picture in cursor:
while True:
download_image(picture["url"], picture["user"], picture["_key"])
sleep(1)
break
db.update_document(
{"_id": "members/" + str(doc["member"]), "pictures_downloaded": True},
silent=True,
check_rev=False,
)
# def old_pics():
# if not os.path.isdir(f'profile_pictures'):
# os.mkdir(f'profile_pictures')
# start = date.today()
# for i in range(1,60):
# d = start - timedelta(days=i)
# get_pictures(d.strftime('%Y%m%d'))
if __name__ == '__main__':
# Info för arangodb
user_arango = "Pi"
db_arango = "facebook"
host_arango = "http://192.168.0.3:8529"
# Starta koppling till arangodb
# Avkryptera lösen till arango
pwd = getpass("Arangolösenord för Pi: ")
db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd)
auth = ("Lasse", "affix-sip-jejune-epigraph-ENTROPY-stupefy1")
servers_mullvad = [
"se15-wg.socks5.mullvad.net:1080",
"se17-wg.socks5.mullvad.net:1080",
"se18-wg.socks5.mullvad.net:1080",
"se19-wg.socks5.mullvad.net:1080",
"se21-wg.socks5.mullvad.net:1080",
"se22-wg.socks5.mullvad.net:1080",
"se23-wg.socks5.mullvad.net:1080",
"se3-wg.socks5.mullvad.net:1080",
"se5-wg.socks5.mullvad.net:1080",
"se9-wg.socks5.mullvad.net:1080",
"se10-wg.socks5.mullvad.net:1080",
"se2-wg.socks5.mullvad.net:1080",
"se6-wg.socks5.mullvad.net:1080",
"se7-wg.socks5.mullvad.net:1080",
"se8-wg.socks5.mullvad.net:1080",
"se13-wg.socks5.mullvad.net:1080",
"se14-wg.socks5.mullvad.net:1080",
"se26-wg.socks5.mullvad.net:1080",
"se27-wg.socks5.mullvad.net:1080",
"se28-wg.socks5.mullvad.net:1080",
]
if not os.path.isdir("profile_pictures"):
os.mkdir("profile_pictures")
while True:
today = date.today().strftime('%Y%m%d')
get_pictures(today)
yesterday = date.today() - timedelta(days=1)
get_pictures(yesterday.strftime('%Y%m%d'))
sleep(300)