diff --git a/facebook/__main__.py b/facebook/__main__.py
index 68a173b..13ab25d 100644
--- a/facebook/__main__.py
+++ b/facebook/__main__.py
@@ -20,6 +20,7 @@ if __name__ == "__main__":
write = True
mode = 'all'
pwd = None
+ proxieservers = 'mullvad'
argv = argv[1:]
@@ -60,6 +61,8 @@ if __name__ == "__main__":
mode_nr = 1.7
elif mode == "few":
mode_nr = 1.4
+ elif mode == "solo":
+ mode_nr = 1.4
elif mode == "force":
mode_nr = 1
@@ -120,117 +123,105 @@ if __name__ == "__main__":
# Gå igenom de användare som efterfrågats
- while True:
-
- if lookups == "leak_lookups":
- id = user.username
- check_profile_status(profile, user)
- if profile.blocked:
- profile = blocked_profile(profile, proxieservers=proxieservers)
- profile.open(url_bas + "/" + user.username)
- url = profile.browser.state.url.strip("/").strip("?_rdr")
- if "php?" not in url:
- user = User(str(url[url.rfind("/") + 1 :]).strip(), mode)
- user.id = id
- sleep_(4)
- container = str(user.username)
- profile.container = container
-
- if "container" not in globals():
- container = str(user.username)
- profile.container = container
-
- profile.users_checked += 1
-
- # Hämta reaktioner för den första användaren
- if any([not check_for_user(user.username, mode=mode), mode == "force"]):
- try:
- while True:
- # Uppdatera in_use
- profile.update_time()
- profile = profile_picture_reactions(
- profile, user, first_user=True, mode=mode
- )
- if profile.blocked:
- profile = blocked_profile(profile, proxieservers=proxieservers)
- else:
- break
- except:
- _print(profile, user, traceback.format_exc())
-
- friends = friends_of_user(user.username)
- _print(profile, user, f"\nKlar med, {user.username}\n")
- _print(profile, user, f"Vänner som reagerat: {len(friends)}")
- _print(profile, user, "\nVänner att kolla:")
-
- friends_unchecked = []
- for friend in friends:
- if not check_for_user(friend):
- print(friend)
- friends_unchecked.append(friend)
-
- _print(profile, user, [friends_unchecked], silent=True)
- _print(profile, user, f'Totalt: {len(friends_unchecked)}')
- print()
-
- # Hämta reaktioner för users vänner (som reagerat)
- count_friends = 0
- for friend in friends_unchecked:
- if datetime.now().strftime("%H") == '03' and int(datetime.now().strftime("%M")) < 30: # Sov för att kunna säkerhetskopieraa
- sleep(1800)
- count_friends += 1
- user = User(str(friend), mode, other_pictures=[])
- sleep_(2)
-
- # Uppdatera in_use
- profile.update_time()
- try:
- if not check_for_user(user.username):
- p = profile_picture_reactions(profile, user, mode=mode)
- if isinstance(p, Profile):
- profile = p
-
- except Exception as e: # Fel4
- write_error(
- 4,
- profile,
- e=e,
- user=user,
- traceback=traceback.format_exc(),
- soup=profile.viewing(),
+ if lookups == "leak_lookups":
+ id = user.username
+ check_profile_status(profile, user)
+ if profile.blocked:
+ profile = blocked_profile(profile, proxieservers=proxieservers)
+ profile.open(url_bas + "/" + user.username)
+ url = profile.browser.state.url.strip("/").strip("?_rdr")
+ if "php?" not in url:
+ user = User(str(url[url.rfind("/") + 1 :]).strip(), mode)
+ user.id = id
+ sleep_(4)
+ container = str(user.username)
+ profile.container = container
+
+ if "container" not in globals():
+ container = str(user.username)
+ profile.container = container
+
+ profile.users_checked += 1
+
+ # Hämta reaktioner för den första användaren
+ if any([not check_for_user(user.username, mode=mode), mode == "force"]):
+ try:
+ while True:
+ # Uppdatera in_use
+ profile.update_time()
+ profile = profile_picture_reactions(
+ profile, user, first_user=True, mode=mode
)
- _print(profile, user, f"\nFel: {str(user.username)}\n")
- sleep_(15)
-
- if not profile.blocked:
- _print(profile, user, f"Klar med {user.username} \n")
-
- # Rotera fb-profiler
- if count_friends > 2 * mode_nr:
- if random.randrange(0, 2, 1) == 1:
- profile = new_profile(container, proxieservers=proxieservers)
- count_friends = 0
- _print(profile, user, f"Växlar till {profile.name}")
- elif count_friends > 4 * mode_nr:
+ if profile.blocked:
+ profile = blocked_profile(profile, proxieservers=proxieservers)
+ else:
+ break
+ except:
+ _print(profile, user, traceback.format_exc())
+
+ if mode == 'solo':
+ exit()
+
+ friends = friends_of_user(user.username)
+ _print(profile, user, f"\nKlar med, {user.username}\n")
+ _print(profile, user, f"Vänner som reagerat: {len(friends)}")
+ _print(profile, user, "\nVänner att kolla:")
+
+ friends_unchecked = []
+ for friend in friends:
+ if not check_for_user(friend):
+ print(friend)
+ friends_unchecked.append(friend)
+
+ _print(profile, user, [friends_unchecked], silent=True)
+ _print(profile, user, f'Totalt: {len(friends_unchecked)}')
+ print()
+
+ # Hämta reaktioner för users vänner (som reagerat)
+ count_friends = 0
+ for friend in friends_unchecked:
+ if datetime.now().strftime("%H") == '03' and int(datetime.now().strftime("%M")) < 30: # Sov för att kunna säkerhetskopieraa
+ sleep(1800)
+ count_friends += 1
+ user = User(str(friend), mode, other_pictures=[])
+ sleep_(2)
+
+ # Uppdatera in_use
+ profile.update_time()
+ try:
+ if not check_for_user(user.username):
+ p = profile_picture_reactions(profile, user, mode=mode)
+ if isinstance(p, Profile):
+ profile = p
+
+ except Exception as e: # Fel4
+ write_error(
+ 4,
+ profile,
+ e=e,
+ user=user,
+ traceback=traceback.format_exc(),
+ soup=profile.viewing(),
+ )
+ _print(profile, user, f"\nFel: {str(user.username)}\n")
+ sleep_(15)
+
+ if not profile.blocked:
+ _print(profile, user, f"Klar med {user.username} \n")
+
+ # Rotera fb-profiler
+ if count_friends > 2 * mode_nr:
+ if random.randrange(0, 2, 1) == 1:
profile = new_profile(container, proxieservers=proxieservers)
count_friends = 0
_print(profile, user, f"Växlar till {profile.name}")
+ elif count_friends > 4 * mode_nr:
+ profile = new_profile(container, proxieservers=proxieservers)
+ count_friends = 0
+ _print(profile, user, f"Växlar till {profile.name}")
- elif profile.blocked:
- profile = blocked_profile(profile, proxieservers=proxieservers)
-
- _print(profile, None, f"Klar med alla vänner.")
- sleep(3)
+ elif profile.blocked:
+ profile = blocked_profile(profile, proxieservers=proxieservers)
- # Hämta ny användare från databasen när alla är genomgångna
- while True:
- new_user = get_user(collection=lookups)
- print(new_user)
- _print(profile, None, f"Ny user hämtad")
- if new_user == None:
- sleep(300)
- _print(profile, None, "Väntar på ny user.")
- else:
- user = User(str(new_user["_key"]), mode)
- _print(profile, user, f"Förberett ny user: {user.username}")
- break
+ _print(profile, None, f"Klar med alla vänner.")
+
diff --git a/facebook/arangodb.py b/facebook/arangodb.py
index 9e119a5..e1fa92a 100644
--- a/facebook/arangodb.py
+++ b/facebook/arangodb.py
@@ -3,6 +3,7 @@ from random import randint
from time import sleep
import json
from datetime import datetime
+from json2html import json2html
from arango import ArangoClient
@@ -17,7 +18,7 @@ for i in range(0, 6, 1):
with open("../password_arango.txt") as f:
pwd = f.readline()
except FileNotFoundError:
- if pwd == None:
+ if 'pwd' not in globals():
pwd = getpass(f'Lösenord för {user_arango}: ')
try:
@@ -74,12 +75,10 @@ def report_blocked(profile):
_print(profile, profile.container, f'Kunde inte rapportera blockerad: {profile.name}.')
-def get_profile(db=db, proxieservers='mullvad', collection='profiles'):
+def get_profile(db=db, collection='mullvad'):
""" Hämtar profil från profiles """
- if proxieservers != 'mullvad':
- collection = f'profiles_{proxieservers}' #TODO Byt namn på profiles till profiles_mullvad i DB
-
+
while True:
cursor = db.aql.execute(
"""
@@ -87,14 +86,12 @@ def get_profile(db=db, proxieservers='mullvad', collection='profiles'):
FILTER doc.in_use < @inuse
RETURN doc
""",
- bind_vars={"inuse": nowstamp() - 1200, '@col': collection}
+ bind_vars={"inuse": nowstamp() - 1200, '@col': f'profiles_{collection}'}
)
profiles = [profile for profile in cursor]
if profiles == []:
sleep(180)
- if proxieservers=='test': # Om det är ett test
- profile = profiles[0]
else:
profile = profiles[randint(0, len(profiles) - 1)]
return profile
@@ -113,10 +110,11 @@ def friends_of_user(user):
return [doc[8:] for doc in cursor]
-def remove_profile(profile):
+def remove_profile(profile, proxieservers='mullvad'):
""" Tar bort en blockerad profil från databasen. """
_print(profile, None, f"Tar bort {profile.name}.")
- db.collection("profiles").delete(
+
+ db.collection(f'profiles_{proxieservers}').delete(
profile.doc["_key"], silent=True, ignore_missing=True
)
_print(profile, profile.container, f"{profile.name} blockerad och borttagen {now()}.")
@@ -124,7 +122,6 @@ def remove_profile(profile):
# TODO #2 Bättre funktion för backup av databasen
-
def arango_connect(pwd):
return ArangoClient(hosts=host_arango).db(
db_arango, username=user_arango, password=pwd
@@ -244,6 +241,15 @@ def write_stats(continuous=False):
d['_key'] = now()[:13]
db.insert_document( "stats", d, overwrite=True)
+
+ # Skriv en html-fil
+ with open('webbapp/templates/stats.html', 'a+') as html:
+ html.truncate(0)
+ html.write('
')
+
+ html.write(json2html.convert(json = d))
+
+ # Sov för att fortsätta senare
if continuous:
sleep(86400)
else:
@@ -254,7 +260,7 @@ def blocked_profile(profile, proxieservers):
_print(profile, None, f'Rapporterar att {profile.name} blockats.')
report_blocked(profile)
_print(profile, None, f'Tar bort {profile.name} från databasen.')
- remove_profile(profile)
+ remove_profile(profile, proxieservers)
_print(profile, None, f'Hämtar en ny profil.')
profile = new_profile(profile.container, proxieservers)
return profile
diff --git a/facebook/config.py b/facebook/config.py
index 7e018b6..c84e19e 100644
--- a/facebook/config.py
+++ b/facebook/config.py
@@ -1,6 +1,8 @@
-
-def set_pwd(_pwd):
+from getpass import getpass
+def set_pwd(_pwd=None):
global pwd
+ if _pwd == None:
+ _pwd = getpass('Lösenord för Arango-användaren:')
pwd = _pwd
# Info för arangodb
diff --git a/facebook/images_pi.py b/facebook/images_pi.py
index e7bec63..660773d 100644
--- a/facebook/images_pi.py
+++ b/facebook/images_pi.py
@@ -32,24 +32,24 @@ def download_image(url, user, id):
elif r.status_code == 403:
exit()
- image_name = f"profile_pictures/{user}/{id}.jpg"
+ image_name = f"/ssd/profile_pictures/{user}/{id}.jpg"
img_data = r.content
with open(image_name, "wb") as handler:
handler.write(img_data)
- nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}/{id}.jpg"
+ #nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}/{id}.jpg"
- headers = {"Content-type": "image/jpeg", "Slug": "heart"}
- while True:
- try:
- r = requests.put(
- nc_path, data=open(image_name, "rb"), headers=headers, auth=auth, verify=False
- )
- break
+ # headers = {"Content-type": "image/jpeg", "Slug": "heart"}
+ # while True:
+ # try:
+ # r = requests.put(
+ # nc_path, data=open(image_name, "rb"), headers=headers, auth=auth, verify=False
+ # )
+ # break
- except:
- print('Kunde inte ladda upp', nc_path)
- sleep(5)
+ # except:
+ # print('Kunde inte ladda upp', nc_path)
+ # sleep(5)
print(f"{user}\t{id}\t{r.status_code}")
@@ -66,22 +66,26 @@ def get_pictures(day):
bind_vars={"date": str(day)},
)
+ # Skapa en lista med bilder att gå igenom.
+ images = []
for doc in cursor:
+ images.append(doc)
+ for doc in images:
user = doc["member"]
- # Skapa mapp för användarens bilder på NC...
- nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}"
- while True:
- try:
- requests.request("MKCOL", nc_path, verify=False, auth=auth)
- break
- except:
- print('Kunde inte skapa', nc_path)
- sleep(5)
+ # # Skapa mapp för användarens bilder på NC...
+ # nc_path = f"https://nc.lasseedfast.se/remote.php/dav/files/Lasse/profile_pictures/{user}"
+ # while True:
+ # try:
+ # requests.request("MKCOL", nc_path, verify=False, auth=auth)
+ # break
+ # except:
+ # print('Kunde inte skapa', nc_path)
+ # sleep(5)
# ...och på datorn (för backup)
- if not os.path.isdir(f"profile_pictures/{user}"):
- os.mkdir(f"profile_pictures/{user}")
+ if not os.path.isdir(f"/ssd/profile_pictures/{user}"):
+ os.mkdir(f"/ssd/profile_pictures/{user}")
pictures = []
for picture in doc["pictures"]:
@@ -121,14 +125,14 @@ def get_pictures(day):
if __name__ == '__main__':
# Info för arangodb
- user_arango = "Pi"
+ user_arango = "Lasse"
db_arango = "facebook"
- host_arango = "http://192.168.0.3:8529"
+ host_arango = "http://192.168.0.4:8529"
# Starta koppling till arangodb
# Avkryptera lösen till arango
- pwd = getpass("Arangolösenord för Pi: ")
+ pwd = getpass(f"Arangolösenord för {user_arango}: ")
db = ArangoClient(hosts=host_arango).db(db_arango, username=user_arango, password=pwd)
@@ -156,9 +160,6 @@ if __name__ == '__main__':
"se27-wg.socks5.mullvad.net:1080",
"se28-wg.socks5.mullvad.net:1080",
]
-
- if not os.path.isdir("profile_pictures"):
- os.mkdir("profile_pictures")
while True:
today = date.today().strftime('%Y%m%d')
diff --git a/facebook/scrapers.py b/facebook/scrapers.py
index 3795536..0f46bfc 100644
--- a/facebook/scrapers.py
+++ b/facebook/scrapers.py
@@ -157,7 +157,7 @@ def profile_picture_reactions(profile, user, first_user=False, mode="all"):
if first_user == False:
if mode == "single" and user.reactions > 30:
break
- elif all([mode == "few", user.reactions > 80, pic != url_pics[-1]]):
+ elif all([any[mode == "few", mode == "solo"], user.reactions > 80, pic != url_pics[-1]]):
# Kolla den sista bilder
check_picture(url_bas + url_pics[-1], user, profile)
user.checked_pictures.append(url_bas + pic)
diff --git a/facebook/stats.py b/facebook/stats.py
new file mode 100644
index 0000000..bcf653f
--- /dev/null
+++ b/facebook/stats.py
@@ -0,0 +1,74 @@
+from datetime import datetime
+from getpass import getpass
+from time import sleep
+
+from arango import ArangoClient
+from json2html import json2html
+
+
+def now():
+ """ Returns current date and time as string"""
+ return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
+
+def write_stats(db, continuous=False):
+ while True:
+ d = {}
+ for col in db.collections():
+ if not col['system']:
+ d[col['name']] = db.collection(col['name']).count()
+ del d['stats']
+ #d['time'] = now()
+ cursor = db.aql.execute(
+ """
+ FOR doc IN members
+ FILTER doc.checked == true
+ COLLECT WITH COUNT INTO length
+ RETURN length
+ """
+ )
+ d['checked_members'] = cursor.next()
+
+
+ # Hur många konton per säljare som finns kvar
+ cursor = db.aql.execute(
+ '''
+ for doc in profiles
+ filter has(doc, "vendor")
+ COLLECT vendor = doc.vendor WITH COUNT INTO length
+ RETURN {
+ "vendor" : vendor,
+ "active" : length
+ }
+ ''')
+ d['active_vendors'] = [doc for doc in cursor]
+
+ d['_key'] = now()[:13]
+ db.insert_document( "stats", d, overwrite=True)
+
+ # Skriv en html-fil
+ with open('website/fb-webbapp/stats.html', 'a+') as html:
+ html.truncate(0)
+ html.write('
')
+
+ html.write(json2html.convert(json = d))
+
+ # Sov för att fortsätta senare
+ if continuous:
+ sleep(86400)
+ else:
+ break
+
+# Info för arangodb
+user_arango = "Lasse"
+db_arango = "facebook"
+host_arango = "http://192.168.0.4:8529"
+
+# Starta koppling till arangodb
+# Avkryptera lösen till arango
+pwd = getpass(f'Arangolösenord för {user_arango}:').strip()
+
+db = ArangoClient(hosts=host_arango).db(
+ db_arango, username=user_arango, password=pwd
+)
+
+write_stats(db, continuous=True)
diff --git a/fb-webbapp/main.py b/fb-webbapp/main.py
new file mode 100644
index 0000000..8a562e5
--- /dev/null
+++ b/fb-webbapp/main.py
@@ -0,0 +1,14 @@
+from flask import Flask, render_template
+import json
+from json2html import json2html
+
+app = Flask(__name__)
+
+
+@app.route("/")
+def stats():
+ return render_template("stats.html")
+
+if __name__ == "__main__":
+ app.run(debug=True)
+
diff --git a/requirements.txt b/requirements.txt
index d58913b..47443c8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,4 @@ soupsieve==2.2
toml==0.10.2
urllib3==1.26.3
Werkzeug==1.0.1
+json2html
diff --git a/workspace.code-workspace b/workspace.code-workspace
index 0495218..189cad0 100644
--- a/workspace.code-workspace
+++ b/workspace.code-workspace
@@ -1,16 +1,17 @@
{
- "folders": [
- {
- "path": "."
- },
- {
- "path": "facebook"
- },
- {
- "path": "../mrkoll"
- }
- ],
- "settings": {
- "python.pythonPath": "/Users/Lasse/.pyenv/versions/3.9.5/bin/python"
- }
+ "folders": [
+ {
+ "path": "."
+ },
+ {
+ "path": "../mrkoll"
+ },
+ {
+ "path": "facebook"
+ }
+ ],
+ "settings": {
+ "python.pythonPath": "/Users/Lasse/Datorgemensamt/Programmeringsprojekt/Facebook/fb-scraper/.venv/bin/python"
+ },
+
}
\ No newline at end of file