Refactor code structure and remove unused config.json file

main
lasseedfast 1 year ago
parent 4147b7185d
commit 6d1d97a849
  1. 27
      _llm.py
  2. 3
      config.json
  3. 139
      enrich_sci_articles.py
  4. 85
      make_summaries.py
  5. 828
      parl_meetings.py
  6. 10
      test.py

@ -1,26 +1,37 @@
from ollama import Client from ollama import Client
import os import os
import env_manager import env_manager
env_manager.set_env() env_manager.set_env()
class LLM: class LLM:
def __init__(self, system_message=None, num_ctx=2000, temperature=0, chat=True) -> None: def __init__(
self.llm_model = os.getenv("LLM_MODEL") self, system_message=None, num_ctx=20000, temperature=0, chat=True
) -> None:
self.llm_model = "mistral-nemo:12b-instruct-2407-q5_K_M" #os.getenv("LLM_MODEL")
self.system_message = system_message self.system_message = system_message
self.options = {"temperature": temperature, "num_ctx": num_ctx} self.options = {"temperature": temperature, "num_ctx": num_ctx}
self.messages = [{'role': 'system', 'content': self.system_message}] self.messages = [{"role": "system", "content": self.system_message}]
self.chat = chat self.chat = chat
self.ollama = Client(host=f'{os.getenv("LLM_URL")}:{os.getenv("LLM_PORT")}') self.ollama = Client(
host=f'{os.getenv("LLM_URL")}:{os.getenv("LLM_PORT")}',
)
def generate(self, prompt: str) -> str: def generate(self, prompt: str) -> str:
self.messages.append({"role": "user", "content": prompt}) self.messages.append({"role": "user", "content": prompt})
result = self.ollama.chat(model=self.llm_model, messages=self.messages, options=self.options, ) result = self.ollama.chat(
model=self.llm_model, messages=self.messages, options=self.options
)
answer = result['message']['content'] answer = result["message"]["content"]
self.messages.append({"role": "assistant", "content": answer}) self.messages.append({"role": "assistant", "content": answer})
if not self.chat: if not self.chat:
self.messages = [{'role': 'system', 'content': self.system_message}] self.messages = [{"role": "system", "content": self.system_message}]
return answer return answer
if __name__ == "__main__":
llm = LLM()
print(llm.generate("Why is the sky red?"))

@ -1,3 +0,0 @@
{
"apikey": "eee5c352c30f3586f1cc42d0a07ce440"
}

@ -0,0 +1,139 @@
from _arango import ArangoDB
from _llm import LLM
import tiktoken
from colorprinter.print_color import *
def make_summaries():
# Initialize the tokenizer
tokenizer = tiktoken.get_encoding("cl100k_base")
def count_tokens(text):
tokens = tokenizer.encode(text)
return len(tokens)
articles = [i for i in arango.db.aql.execute('''
for doc in sci_articles
return doc
''')]
for article in articles:
num_tokens = count_tokens(article["text"])
llm = LLM(
system_message="You are summarising scientific articles. It is very important that you keep to what is written and do not add any of your own opinions or interpretations.",
num_ctx=num_tokens+3000 if num_tokens < 67000 else 70000,
temperature=0,
)
try:
prompt = f'''
Make a summary of the following text:
"""
{article["text"]}
"""
Write a detailed summary. Make sure to include information from all sections: introduction, methods, results, and conclusion.
Everything about electric vehicles, and things related to electric cars, is very important.
Write the summary as if you are writing for someone who is not familiar with the topic.
Write it from the point of the view of the author of the text.
'''
article["summary"] = {
"meta": {"model": llm.llm_model, "system_message": llm.system_message, 'num_ctx': llm.options['num_ctx'], 'temperature': llm.options['temperature']},
"text_sum": llm.generate(prompt),
}
print(article["summary"])
arango.db.collection("sci_articles").update(article)
except Exception as e:
print(e)
article['summary_error'] = str(e)
arango.db.collection("sci_articles").update(article)
continue
def make_chunk_qa(num_qa=5):
articles = [i for i in arango.db.aql.execute('''
for doc in sci_articles
return doc
''')]
for article in articles:
try:
if 'abstract' in article['metadata']:
abstract = article['metadata']['abstract'].replace('<jats:p', '').replace('</jats:p>', '')
else:
abstract = article['summary']['text_sum']
question_machine = LLM(
system_message= f'''You are creating questions based on scientific articles. You will be given one text snippet from the article at a time and you should create {num_qa} questions based on that snippet.
To understand the article as a whole you can read this abstract:
"""
{abstract}
"""
The {num_qa} questions should be based on the text snippet and should be answerable by the text, but you can check the conversation history to make them more relevant for the context.
Don't write general questions like "what is the text about?", but rather questions that reflect the facts in the text.
The questions will be used in a CSV file so it's important that you answer on the format: "question1;question2;question3;question4;question5".
Always make {num_qa} questions to every text!
''',
num_ctx=20000,
temperature=0.2,
)
answer_machine = LLM(
system_message=f'''You are answering questions about a text snippet from a scientific article. You will be given one question and one text snippet at a time and you should answer the questions based on that snippet.
The answers should be based on the text snippet, but you can check the conversation history to make them more relevant for the context.
Answer ONLY with the answer to the question, not a reasoning where you explain why you think that is the answer.
Make the answers long enough to be informative and contain relevant information, but not too long.
''',
num_ctx=20000,
temperature=0.2,
)
for chunk in article["chunks"]:
if 'qa' in chunk:
continue
chunk["qa"] = []
prompt = f'''
"""
{chunk['text']}
"""
Remember:
- If there is something in the text about electric cars, please include that in the question.
- Don't write general questinos like "what is the text about?" or "what is the main point of the text?", but rather questions that can be answered by the text. The questions will be used to query a vector database.
- Answer on the format: "question1;question2;question3;question4;question5" as the questions will be used in a CSV file. Answer ONLY with the questions, not anything else!
'''
questions = question_machine.generate(prompt).split(';')
for question in questions:
print_blue(question)
if questions.index(question) == 0:
prompt = f'''
Answer the following question based on the text snippet below: {question}
"""
{chunk['text']}
"""
Remember:
- The answer should be based on the text.
- If there is something in the text about electric cars, please include that in the answer.
- Answer ONLY with the answer, nothing else.
'''
else:
prompt = question
answer = answer_machine.generate(question)
print_green(answer)
qa = {
"question": question,
"answer": answer,
}
chunk["qa"].append(qa)
arango.db.collection("sci_articles").update(article, check_rev=False)
except Exception as e:
print(e)
article['qa_error'] = str(e)
arango.db.collection("sci_articles").update(article, check_rev=False)
continue
if __name__ == "__main__":
arango = ArangoDB()
make_chunk_qa()

@ -0,0 +1,85 @@
from _arango import ArangoDB
from _llm import LLM
import tiktoken
def make_summaries():
# Initialize the tokenizer
tokenizer = tiktoken.get_encoding("cl100k_base")
def count_tokens(text):
tokens = tokenizer.encode(text)
return len(tokens)
articles = [i for i in arango.db.aql.execute('''
for doc in sci_articles
return doc
''')]
for article in articles:
num_tokens = count_tokens(article["text"])
llm = LLM(
system_message="You are summarising scientific articles. It is very important that you keep to what is written and do not add any of your own opinions or interpretations.",
num_ctx=num_tokens+3000 if num_tokens < 67000 else 70000,
temperature=0,
)
try:
prompt = f'''
Make a summary of the following text:
"""
{article["text"]}
"""
Write a detailed summary. Make sure to include information from all sections: introduction, methods, results, and conclusion.
Everything about electric vehicles, and things related to electric cars, is very important.
Write the summary as if you are writing for someone who is not familiar with the topic.
Write it from the point of the view of the author of the text.
'''
article["summary"] = {
"meta": {"model": llm.llm_model, "system_message": llm.system_message, 'num_ctx': llm.options['num_ctx'], 'temperature': llm.options['temperature']},
"text_sum": llm.generate(prompt),
}
print(article["summary"])
arango.db.collection("sci_articles").update(article)
except Exception as e:
print(e)
article['summary_error'] = str(e)
arango.db.collection("sci_articles").update(article)
continue
def make_chunk_qa():
articles = [i for i in arango.db.aql.execute('''
for doc in sci_articles
return doc
''')]
for article in articles:
questin_machine = LLM(
system_message="You are creating questions based on scientific articles. The questions should be based on the text and should be answerable by the text, but you can check the conversation history to make them more relevant for the context.",
num_ctx=20000,
temperature=0.2,
)
answer_machine = LLM(
system_message="You are answering questions based on scientific articles. The answers should be based on the text, but you can check the conversation history to make them more relevant for the context.",
num_ctx=20000,
temperature=0.2,
)
for chunk in article["chunks"]:
prompt = f'''
Create a question based on the following text:
"""
{chunk['text']}
"""
Write a question that can be answered by the text. Make sure to include information from all sections: introduction, methods, results, and conclusion.
Everything about electric vehicles, and things related to electric cars, is very important.
Write the question as if you are writing for someone who is not familiar with the topic.
Write it from the point of the view of the author of the text.
'''
if __name__ == "__main__":
arango = ArangoDB()

@ -0,0 +1,828 @@
from pprint import pprint
import asyncio
from pyppeteer import launch
from bs4 import BeautifulSoup
from _arango import ArangoDB
from time import sleep
from colorprinter.print_color import *
async def get_info(browser, id_number):
try:
page = await browser.newPage()
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36')
url = f'https://transparency-register.europa.eu/search-details_en?id={id_number}'
await page.goto(url, {'waitUntil': 'networkidle2'})
content = await page.content()
await page.close()
soup = BeautifulSoup(content, 'html.parser')
info_html = soup.find_all('div', class_='ecl')[0]
headers = info_html.find_all('h2')
tables = info_html.find_all('table')
headers_and_tables = zip(headers, tables)
data = {}
for header, table in headers_and_tables:
header_text = header.text.strip()
data[header_text] = {}
rows = table.find_all('tr')
for row in rows:
cells = row.find_all('td')
if len(cells) == 2:
row_name = cells[0].get_text(strip=True)
cell_content = cells[1]
# Check if the cell contains a list
ul = cell_content.find('ul')
if ul:
list_items = [li.get_text(strip=True) for li in ul.find_all('li')]
data[header_text][row_name] = {'list': list_items}
else:
cell_text = cell_content.get_text(strip=True)
links = cell_content.find_all('a')
cell_links = {link.get_text(strip=True): link['href'] for link in links}
if cell_links:
data[header_text][row_name] = {'text': cell_text, 'links': cell_links}
else:
data[header_text][row_name] = {'text': cell_text}
data['html'] = str(info_html)
return data
except Exception as e:
print(f"Error fetching info for ID {id_number}: {e}")
return None
def update_info_from_html():
arango = ArangoDB()
arango_docs = [i for i in arango.db.collection('eu_lobbyists').all()]
na = len(arango_docs)
n=0
new_docs = []
for doc in arango_docs:
n += 1
html = doc['html']
data = extract_from_html(html, {'html': html})
data['_key'] = doc['_key']
new_docs.append(data)
print(f'{n}/{na}', end='\r')
arango.db.collection('eu_lobbyists').insert_many(data, overwrite=True)
def extract_from_html(html, data = {}):
soup = BeautifulSoup(html, 'html.parser')
info_html = soup.find_all('div', class_='ecl')[0]
headers = info_html.find_all('h2')
tables = info_html.find_all('table', {'class': 'ecl-table ecl-table--zebra'})
headers_and_tables = zip(headers, tables)
for header, table in headers_and_tables:
header_text = header.text.strip()
if header_text not in data:
data[header_text] = {}
rows = table.find_all('tr')
table_data = {}
for row in rows:
cells = row.find_all('td')
if len(cells) == 2:
row_name = cells[0].get_text(strip=True)
cell_content = cells[1]
elif len(cells) == 1:
row_name = header_text
cell_content = cells[0]
else:
continue
# Check if the cell contains a list
ul = cell_content.find('ul')
if ul:
list_items = [li.get_text(strip=True) for li in ul.find_all('li')]
table_data[row_name] = list_items
if header_text == row_name:
table_data = list_items
else:
cell_text = cell_content.get_text(strip=True)
links = cell_content.find_all('a')
cell_links = {link.get_text(strip=True): link['href'] for link in links}
if cell_links:
table_data[row_name] = {'text': cell_text, 'links': cell_links}
else:
table_data[row_name] = {'text': cell_text}
for k, v in table_data.items():
data[header_text][k] = v
return data
async def get_all_lobbyists(browser, page_number):
try:
page = await browser.newPage()
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36')
url = f'https://transparency-register.europa.eu/searchregister-or-update/search-register_en?searchType=REGISTRANTS&page={page_number}#list-of-all-lobbyists'
await page.goto(url, {'waitUntil': 'networkidle2'})
content = await page.content()
await page.close()
soup = BeautifulSoup(content, 'html.parser')
links = soup.find_all('a', class_='ecl-link')
ids = [link['href'].split('=')[-1] for link in links if 'search-details' in link['href']]
return ids
except Exception as e:
print(f"Error fetching lobbyists for page {page_number}: {e}")
return []
async def main():
arango = ArangoDB()
if not arango.db.has_collection('eu_lobbyists'):
arango.db.create_collection('eu_lobbyists')
#arango.db.collection('eu_lobbyists').truncate()
browser = await launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox'])
try:
for page_number in range(1, 148):
sleep(1.3)
ids = await get_all_lobbyists(browser, page_number)
tasks = []
for id_number in ids:
if not arango.db.collection('eu_lobbyists').get(id_number):
tasks.append(get_info(browser, id_number))
sleep(1.6)
results = await asyncio.gather(*tasks)
for id_number, data in zip(ids, results):
if data:
data['_key'] = id_number
if 'Profile of registrant' in data:
arango.db.collection('eu_lobbyists').insert(data, overwrite=True)
print(f'Inserted {id_number}')
else:
print(f'"Profile of registrant" not in {id_number}')
finally:
await browser.close()
if __name__ == '__main__':
html = '''
<div class="ecl">
<div>
<h2 id="profile-of-registrant">Profile of registrant</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Organisation name</strong>:
</td>
<td class="ecl-table__cell">
<strong>Associação Portuguesa para o Desenvolvimento Local</strong>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>REG Number</strong>:
</td>
<td class="ecl-table__cell">
<span>500479542151-73</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Status</strong>:
</td>
<td class="ecl-table__cell">Activated</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Registration date</strong>:
</td>
<td class="ecl-table__cell">07/04/2021 19:44:13</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>The registrant performed the last (partial or annual) update on</strong>:
</td>
<td class="ecl-table__cell">29/02/2024 13:18:08</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Next annual update due latest on</strong>:
</td>
<td class="ecl-table__cell">28/02/2025</td>
</tr>
</tbody>
</table>
<h2 id="applicantregistrant-organisation-or-self-employed-individuals"> Applicant/registrant: organisation or self-employed individuals</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Organisation name</strong>:
</td>
<td class="ecl-table__cell">
<span>Associação Portuguesa para o Desenvolvimento Local</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Acronym</strong>:
</td>
<td class="ecl-table__cell">
<span>ANIMAR</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Form of entity</strong>:
</td>
<td class="ecl-table__cell">
<span>Associação Sem Fim Lucrativo</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Website</strong>:
</td>
<td class="ecl-table__cell">
<a href="http://animar-dl.pt" target="_blank">http://animar-dl.pt</a>
</td>
</tr>
</tbody>
</table>
<h2 id="contact-details">Contact details</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td colspan="2" class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Contact details of your organisation's head office</strong>:
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Address</strong>:
</td>
<td class="ecl-table__cell">
<span>Av. Santos Dumont, 57 - 1º Esq. Avenidas Novas</span>
<span></span>
<span></span>
<span>1050-202</span>
<span>Lisboa</span>
<span>PORTUGAL</span>
<span></span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Telephone</strong>:
</td>
<td class="ecl-table__cell">
(+<span>351</span> )
<span>21 952 74 50</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Contact details of your organisation's office in charge of EU relations </strong>:
</td>
<td class="ecl-table__cell">
<span>Same as the head office</span>
</td>
</tr>
</tbody>
</table>
<h2 id="person-with-legal-responsibility">Person with legal responsibility</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Person with legal responsibility for the organisation</strong>:
</td>
<td class="ecl-table__cell">
<span>Mr</span>
<span>Marco</span>
<span>Domingues</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Position</strong>:
</td>
<td class="ecl-table__cell">
<span>President</span>
</td>
</tr>
</tbody>
</table>
<h2 id="person-in-charge-of-eu-relations">Person in charge of EU relations</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Person in charge of EU relations</strong>:
</td>
<td class="ecl-table__cell">
<span>Ms</span>
<span>Sara</span>
<span>Trindade</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Position</strong>:
</td>
<td class="ecl-table__cell">
<span>Direction Member</span>
</td>
</tr>
</tbody>
</table>
<h2 id="goalsremit">Goals/remit</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Goals/remits of your organisation</strong>:
</td>
<td class="ecl-table__cell">
<pre style="word-wrap: break-word; white-space: pre-line; font-family: var(--eui-base-font-family);">MISSÃO
Valorizar, promover e reforçar o desenvolvimento local, a cidadania ativa, a igualdade e a coesão social na sociedade portuguesa, enquanto pilares de uma sociedade mais justa, equitativa, solidária e sustentável.
VISÃO
Ser reconhecida pela sociedade civil e pelo Estado, como a organização de referência promotora do desenvolvimento integrado, na diversidade de contextos, organizações e territórios.
CULTURA
Ser laica, apartidária, autónoma do Estado e promotora de interesses coletivos e representativos da sociedade civil; Ser uma organização de pontes para a convergência e concertação das organizações da sociedade civil, cidadãos e cidadãs, no reforço do interesse comum junto do Estado; Assumir a sua identidade na diversidade de organizações, indivíduos, territórios e contextos de atuação, e daí, destacar a multiplicidade de modelos de desenvolvimento local; Assumir a pluralidade de opiniões e modelos de atuação enquanto desafio inerente à promoção do desenvo (...)</pre>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Level of interest represented</strong>:
</td>
<td class="ecl-table__cell">
<div>
<ul>
<li>
<span>National</span>
</li>
</ul>
</div>
</td>
</tr>
</tbody>
</table>
<h2 id="interests-represented">Interests represented</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Applicant/registrant's representation</strong>:
</td>
<td class="ecl-table__cell">
<span>
<span>Promotes their own interests or the collective interests of their members</span>
</span>
</td>
</tr>
</tbody>
</table>
<h2 id="specific-activities-covered-by-the-register">Specific activities covered by the Register</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Main EU legislative proposals or policies targeted</strong>:
</td>
<td class="ecl-table__cell">
<pre style="word-wrap: break-word; white-space: pre-line; font-family: var(--eui-base-font-family);">Desenvolvimento Local
Governança, Cidadania e Igualdade
Sustentabilidade, Coesão Social e Territorial
Inovação e Empreendedorismo
Empregabilidade
Economia Social</pre>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Communication activities (events, campaigns, publications, etc.) related to the EU policies above</strong>:
</td>
<td class="ecl-table__cell">
<span style="word-wrap: break-word; white-space: pre-line; font-family: var(--eui-base-font-family);">https://www.animar-dl.pt/
https://www.animar-dl.pt/recursos/
https://www.animar-dl.pt/projetos/</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Intergroups and unofficial groupings (European Parliament)</strong>:
</td>
<td class="ecl-table__cell">
<div>
<ul>
<li>
<span>
<span>Unofficial groupings</span>:
<span>_Social Economy Europe (a partir da CASES - Cooperativa António Sérgio para a Economia Social)</span>
</span>
</li>
</ul>
</div>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Participation in other EU supported forums and platforms</strong>:
</td>
<td class="ecl-table__cell">
<pre style="word-wrap: break-word; white-space: pre-line; font-family: var(--eui-base-font-family);">_Comissão de Acompanhamento do PDR2020
_ERASMUS+
_Confederação Portuguesa de Economia Social</pre>
</td>
</tr>
</tbody>
</table>
<table class="ecl-table ecl-table--zebra ecl-u-mt-2xl ecl-u-border-top">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell">
<strong>List of meetings with European Commission: </strong>
<div class="ecl-popover" data-ecl-auto-init="Popover" data-ecl-auto-initialized="true">
<a href="javascript:void(0)" class="ecl-link ecl-link--standalone ecl-link--icon ecl-link--icon-before ecl-popover__toggle" aria-controls="popover-meetings" data-ecl-popover-toggle="" aria-expanded="false">
<svg class="ecl-icon ecl-icon--fluid ecl-link__icon" focusable="false" aria-hidden="true"><use xlink:href="/themes/contrib/oe_theme/dist/eu/images/icons/sprites/icons.svg#information"></use></svg>
</a>
<div id="popover-meetings" class="ecl-popover__container" hidden="" style="width: 25em">
<div class="ecl-popover__content">This field displays the list of any meetings the registrant has held with Commissioners, Members of their Cabinet or Director-Generals since 01/12/2014 under its current identification number in the Register.</div>
</div>
</div>
</td>
<td>
<span>N/A</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Open Public Consultations">
<strong>List of contributions to public consultations</strong>:
<div class="ecl-popover" data-ecl-auto-init="Popover" data-ecl-auto-initialized="true">
<a href="javascript:void(0)" class="ecl-link ecl-link--standalone ecl-link--icon ecl-link--icon-before ecl-popover__toggle" aria-controls="popover-publicConsultation" data-ecl-popover-toggle="" aria-expanded="false">
<svg class="ecl-icon ecl-icon--fluid ecl-link__icon" focusable="false" aria-hidden="true"><use xlink:href="/themes/contrib/oe_theme/dist/eu/images/icons/sprites/icons.svg#information"></use></svg>
</a>
<div id="popover-publicConsultation" class="ecl-popover__container" hidden="" style="width: 25em">
<div class="ecl-popover__content">This field displays list of public consultations to which the entity contributed since 24/07/2018 under its current identification number in the Register (provided that the entity indicated the TR ID/REG number in its contribution).</div>
</div>
</div>
</td>
<td class="ecl-table__cell">
<ul>
<li>
<a href="https://ec.europa.eu/info/law/better-regulation/have-your-say/initiatives/12722-demographic-change-in-europe---green-paper-on-ageing/public-consultation" target="_blank" class="ecl-link ecl-link--standalone ecl-link--icon ecl-link--icon-after">
<span>Demographic change in Europe - green paper on ageing</span></a>
</li>
</ul>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Roadmaps">
<strong>List of contributions to roadmaps</strong>:
<div class="ecl-popover" data-ecl-auto-init="Popover" data-ecl-auto-initialized="true">
<a href="javascript:void(0)" class="ecl-link ecl-link--standalone ecl-link--icon ecl-link--icon-before ecl-popover__toggle" aria-controls="popover-roadmaps" data-ecl-popover-toggle="" aria-expanded="false">
<svg class="ecl-icon ecl-icon--fluid ecl-link__icon" focusable="false" aria-hidden="true"><use xlink:href="/themes/contrib/oe_theme/dist/eu/images/icons/sprites/icons.svg#information"></use></svg>
</a>
<div id="popover-roadmaps" class="ecl-popover__container" hidden="" style="width: 25em">
<div class="ecl-popover__content">This field displays list of roadmaps to which the entity contributed since 24/07/2018 under its current identification number in the Register (provided that the entity indicated the TR ID number in its contribution)</div>
</div>
</div>
</td>
<td class="ecl-table__cell">
<span>N/A</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Commission expert groups and other similar entities (European commission)</strong>:
<div class="ecl-popover" data-ecl-auto-init="Popover" data-ecl-auto-initialized="true">
<a href="javascript:void(0)" class="ecl-link ecl-link--standalone ecl-link--icon ecl-link--icon-before ecl-popover__toggle" aria-controls="popover-experts" data-ecl-popover-toggle="" aria-expanded="false">
<svg class="ecl-icon ecl-icon--fluid ecl-link__icon" focusable="false" aria-hidden="true"><use xlink:href="/themes/contrib/oe_theme/dist/eu/images/icons/sprites/icons.svg#information"></use></svg>
</a>
<div id="popover-experts" class="ecl-popover__container" hidden="" style="width: 25em">
<div class="ecl-popover__content">This field displays membership of any active Expert groups and is limited to Type C (Organisation) and Type B (Individual expert appointed as a representative of a common interest) members.</div>
</div>
</div>
</td>
<td class="ecl-table__cell">
<div>
<span>N/A</span>
</div>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell">
<strong>List of meetings with the European Parliament</strong>:
</td>
<td><a target="_blank" href="https://www.europarl.europa.eu/meps/en/search-meetings?transparencyRegisterIds=500479542151-73"><span>Meeting declarations</span></a></td>
</tr>
</tbody>
</table>
<h2 id="number-of-persons-involved-in-the-activities">Number of persons involved in the activities</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Number of persons involved from your organisation expressed in % of working time</strong>:
</td>
<td class="ecl-table__cell">
<b>100%</b>:
<span>0</span>,
<b>75%</b>:
<span>0</span>,
<b>50%</b>:
<span>0</span>,
<b>25%</b>:
<span>0</span>,
<b>10%</b>:
<span>2</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Number of persons involved (total)</strong>:
</td>
<td class="ecl-table__cell">
<span>2</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Full time equivalent (FTE)</strong>:
</td>
<td class="ecl-table__cell">
<span>0.2</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Complementary information</strong>:
</td>
<td class="ecl-table__cell">
<pre style="word-wrap: break-word; white-space: pre-line; font-family: var(--eui-base-font-family);">N/A</pre>
</td>
</tr>
</tbody>
</table>
<h2 id="persons-accredited-for-access-to-european-parliament-premises">Persons accredited for access to European Parliament premises</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<span class="ecl-col-m-12 ecl-u-mt-m ecl-u-type-align-center">No accredited persons</span>
</td>
</tr>
</tbody>
</table>
<h2 id="fields-of-interest">Fields of interest</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Fields of interest</strong>:
</td>
<td class="ecl-table__cell">
<ul>
<li>
<span>Agriculture and rural development</span>
</li>
<li>
<span>Climate action</span>
</li>
<li>
<span>Culture</span>
</li>
<li>
<span>Culture and media</span>
</li>
<li>
<span>Digital economy and society</span>
</li>
<li>
<span>Education and training</span>
</li>
<li>
<span>Employment and social affairs</span>
</li>
<li>
<span>Environment</span>
</li>
<li>
<span>International co-operation and development</span>
</li>
<li>
<span>Migration and asylum</span>
</li>
<li>
<span>Regional policy</span>
</li>
<li>
<span>Research and innovation</span>
</li>
<li>
<span>Youth</span>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<h2 id="membership-and-affiliation">Membership and affiliation</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>List of membership in associations, (con)federations, networks and other bodies </strong>:
</td>
<td class="ecl-table__cell">
<pre style="word-wrap: break-word; white-space: pre-line; font-family: var(--eui-base-font-family);">https://www.animar-dl.pt/quem-somos/filiacoes-e-parcerias/</pre>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>List of members and affiliate/partner organisations </strong>:
</td>
<td class="ecl-table__cell">
<pre style="word-wrap: break-word; white-space: pre-line; font-family: var(--eui-base-font-family);">https://www.animar-dl.pt/entidades/</pre>
</td>
</tr>
</tbody>
</table>
<h2 id="category-of-registration">Category of registration</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Category of registration</strong>:
</td>
<td class="ecl-table__cell">
<span>Non-governmental organisations, platforms and networks and similar</span>
</td>
</tr>
</tbody>
</table>
<h2 id="financial-data">Financial data</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Closed financial year</strong>:
</td>
<td class="ecl-table__cell">
<span>01/2022 - 12/2022</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>EU grants for the most recent closed financial year</strong>:
</td>
<td class="ecl-table__cell">
<span>N/A</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>EU grants for the current financial year</strong>:
</td>
<td class="ecl-table__cell">
<span>N/A</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Intermediaries in the most recent closed financial year</strong>:
</td>
<td class="ecl-table__cell">
<span>N/A</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell">
<strong>Intermediaries in the current financial year</strong>:
</td>
<td class="ecl-table__cell">
<span>N/A</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell">
<strong>Estimate of annual costs related to activities covered by the register</strong>:
</td>
<td class="ecl-table__cell">
<!-- <10000 -->
<!-- >=1000000 -->
<!-- - -->
<span>
<span>10,000</span>
-
<span>24,999</span>
</span>
</td>
</tr>
<tr>
<td class="ecl-table__cell">
<strong>Complementary information</strong>:
</td>
<td class="ecl-table__cell">
<pre style="word-wrap: break-word; white-space: pre-line; font-family: var(--eui-base-font-family);">N/A</pre>
</td>
</tr>
</tbody>
</table>
<h2 id="code-of-conduct">Code of conduct</h2>
<table class="ecl-table ecl-table--zebra">
<tbody class="ecl-table__body">
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>Code of conduct</strong>:
</td>
<td class="ecl-table__cell">
<span>By its registration the organisation has signed the Transparency Register Code of Conduct</span>
</td>
</tr>
<tr class="ecl-table__row">
<td class="ecl-table__cell" data-ecl-table-header="Profile of registrant">
<strong>If the applicant/registrant is also bound by another(professional) code of conduct it can be indicated in this space</strong>:
</td>
<td class="ecl-table__cell">
<span>N/A</span>
</td>
</tr>
</tbody>
</table>
</div>
</div>
'''
#data = extract_from_html(html)
#pprint(data['Fields of interest'])
update_info_from_html()
#asyncio.get_event_loop().run_until_complete(main())

@ -0,0 +1,10 @@
from _arango import ArangoDB
arango = ArangoDB()
initiatives = [i for i in arango.db.collection('eu_initiatives').all()]
ordered_by_headline = sorted(initiatives, key=lambda x: x['headline'])
s = set()
for i in ordered_by_headline:
print(i['headline'])
s.add(i['headline'])
print(len(s))
Loading…
Cancel
Save