You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

75 lines
2.5 KiB

import re
from arango import ArangoClient
from dotenv import load_dotenv
import os
if "INFO" not in os.environ:
import env_manager
env_manager.set_env()
load_dotenv() # Install with pip install python-dotenv
class ArangoDB:
def __init__(self, user=None, password=None, db_name=None):
"""
Initializes an instance of the ArangoClass.
Args:
db_name (str): The name of the database.
username (str): The username for authentication.
password (str): The password for authentication.
"""
host = os.getenv("ARANGO_HOST")
if not password:
password = os.getenv("ARANGO_PASSWORD")
if not db_name:
if user:
db_name = user
else:
db_name = os.getenv("ARANGO_DB")
if not user:
user = os.getenv("ARANGO_USER")
self.client = ArangoClient(hosts=host)
if user=='lasse': #! This need to be fixed to work with all users!
password = os.getenv("ARANGO_PWD_LASSE")
self.db = self.client.db(db_name, username=user, password=password)
def fix_key(self, _key):
"""
Sanitize a given key by replacing all characters that are not alphanumeric,
underscore, hyphen, dot, at symbol, parentheses, plus, equals, semicolon,
dollar sign, asterisk, single quote, percent, or colon with an underscore.
Args:
_key (str): The key to be sanitized.
Returns:
str: The sanitized key with disallowed characters replaced by underscores.
"""
return re.sub(r"[^A-Za-z0-9_\-\.@()+=;$!*\'%:]", "_", _key)
if __name__ == "__main__":
arango = ArangoDB(db_name='base')
articles = arango.db.collection('sci_articles').all()
for article in articles:
if 'metadata' in article and article['metadata']:
if 'abstract' in article['metadata']:
abstract = article['metadata']['abstract']
if isinstance(abstract, str):
# Remove text within <> brackets and the brackets themselves
article['metadata']['abstract'] = re.sub(r'<[^>]*>', '', abstract)
arango.db.collection('sci_articles').update_match(
filters={'_key': article['_key']},
body={'metadata': article['metadata']},
merge=True
)
print(f"Updated abstract for {article['_key']}")