import re from arango import ArangoClient from dotenv import load_dotenv import os if "INFO" not in os.environ: import env_manager env_manager.set_env() load_dotenv() # Install with pip install python-dotenv class ArangoDB: def __init__(self, user=None, password=None, db_name=None): """ Initializes an instance of the ArangoClass. Args: db_name (str): The name of the database. username (str): The username for authentication. password (str): The password for authentication. """ host = os.getenv("ARANGO_HOST") if not password: password = os.getenv("ARANGO_PASSWORD") if not db_name: if user: db_name = user else: db_name = os.getenv("ARANGO_DB") if not user: user = os.getenv("ARANGO_USER") self.client = ArangoClient(hosts=host) if user=='lasse': #! This need to be fixed to work with all users! password = os.getenv("ARANGO_PWD_LASSE") self.db = self.client.db(db_name, username=user, password=password) def fix_key(self, _key): """ Sanitize a given key by replacing all characters that are not alphanumeric, underscore, hyphen, dot, at symbol, parentheses, plus, equals, semicolon, dollar sign, asterisk, single quote, percent, or colon with an underscore. Args: _key (str): The key to be sanitized. Returns: str: The sanitized key with disallowed characters replaced by underscores. """ return re.sub(r"[^A-Za-z0-9_\-\.@()+=;$!*\'%:]", "_", _key) if __name__ == "__main__": arango = ArangoDB(db_name='base') articles = arango.db.collection('sci_articles').all() for article in articles: if 'metadata' in article and article['metadata']: if 'abstract' in article['metadata']: abstract = article['metadata']['abstract'] if isinstance(abstract, str): # Remove text within <> brackets and the brackets themselves article['metadata']['abstract'] = re.sub(r'<[^>]*>', '', abstract) arango.db.collection('sci_articles').update_match( filters={'_key': article['_key']}, body={'metadata': article['metadata']}, merge=True ) print(f"Updated abstract for {article['_key']}")