You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
369 lines
10 KiB
369 lines
10 KiB
import requests |
|
import json |
|
import argparse |
|
from typing import Optional, List, Literal, Union |
|
from colorprinter.print_color import * |
|
|
|
|
|
def search_semantic_scholar( |
|
query: str, |
|
limit: int = 10, |
|
fields: Optional[List[str]] = None, |
|
publication_types: Optional[ |
|
List[ |
|
Literal[ |
|
"Review", |
|
"JournalArticle", |
|
"CaseReport", |
|
"ClinicalTrial", |
|
"Conference", |
|
"Dataset", |
|
"Editorial", |
|
"LettersAndComments", |
|
"MetaAnalysis", |
|
"News", |
|
"Study", |
|
"Book", |
|
"BookSection", |
|
] |
|
] |
|
] = ["JournalArticle"], |
|
open_access: bool = False, |
|
min_citation_count: Optional[int] = None, |
|
date_range: Optional[str] = None, |
|
year_range: Optional[str] = None, |
|
fields_of_study: Optional[ |
|
List[ |
|
Literal[ |
|
"Computer Science", |
|
"Medicine", |
|
"Chemistry", |
|
"Biology", |
|
"Materials Science", |
|
"Physics", |
|
"Geology", |
|
"Psychology", |
|
"Art", |
|
"History", |
|
"Geography", |
|
"Sociology", |
|
"Business", |
|
"Political Science", |
|
"Economics", |
|
"Philosophy", |
|
"Mathematics", |
|
"Engineering", |
|
"Environmental Science", |
|
"Agricultural and Food Sciences", |
|
"Education", |
|
"Law", |
|
"Linguistics", |
|
] |
|
] |
|
] = None, |
|
): |
|
""" |
|
Search for papers on Semantic Scholar with various filters. |
|
|
|
Parameters: |
|
----------- |
|
query : str |
|
The search query term |
|
limit : int |
|
Number of results to return (max 100) |
|
fields : List[str], optional |
|
List of fields to include in the response |
|
publication_types : List[str], optional |
|
Filter by publication types |
|
open_access : bool |
|
Only include papers with open access PDFs |
|
min_citation_count : int, optional |
|
Minimum number of citations |
|
date_range : str, optional |
|
Date range in format "YYYY-MM-DD:YYYY-MM-DD" |
|
year_range : str, optional |
|
Year range in format "YYYY-YYYY" or "YYYY-" or "-YYYY" |
|
fields_of_study : List[str], optional |
|
List of fields of study to filter by |
|
|
|
Returns: |
|
-------- |
|
dict |
|
JSON response containing search results |
|
""" |
|
# Define the API endpoint URL |
|
url = "https://api.semanticscholar.org/graph/v1/paper/search" |
|
|
|
# Set up default fields if not provided |
|
if fields is None: |
|
fields = [ |
|
"title", |
|
"url", |
|
"abstract", |
|
"year", |
|
"publicationDate", |
|
"authors.name", |
|
"citationCount", |
|
"openAccessPdf", |
|
"tldr", |
|
] |
|
|
|
# Build query parameters |
|
params = {"query": query, "limit": limit, "fields": ",".join(fields)} |
|
|
|
# Add optional filters if provided |
|
if publication_types: |
|
params["publicationTypes"] = ",".join(publication_types) |
|
|
|
if open_access: |
|
params["openAccessPdf"] = "" |
|
|
|
if min_citation_count: |
|
params["minCitationCount"] = str(min_citation_count) |
|
|
|
if date_range: |
|
params["publicationDateOrYear"] = date_range |
|
|
|
if year_range: |
|
params["year"] = year_range |
|
|
|
if fields_of_study: |
|
params["fieldsOfStudy"] = ",".join(fields_of_study) |
|
|
|
# Send the API request |
|
try: |
|
response = requests.get(url, params=params) |
|
response.raise_for_status() # Raise an exception for HTTP errors |
|
return response.json().get("data", []) |
|
except requests.exceptions.HTTPError as e: |
|
print(f"HTTP Error: {e}") |
|
print(f"Response text: {response.text}") |
|
return None |
|
except requests.exceptions.RequestException as e: |
|
print(f"Error: {e}") |
|
return None |
|
|
|
|
|
def main( |
|
query: Optional[str] = None, |
|
limit: int = 10, |
|
fields: Optional[List[str]] = None, |
|
publication_types: Optional[ |
|
List[ |
|
Literal[ |
|
"Review", |
|
"JournalArticle", |
|
"CaseReport", |
|
"ClinicalTrial", |
|
"Conference", |
|
"Dataset", |
|
"Editorial", |
|
"LettersAndComments", |
|
"MetaAnalysis", |
|
"News", |
|
"Study", |
|
"Book", |
|
"BookSection", |
|
] |
|
] |
|
] = None, |
|
open_access: bool = False, |
|
min_citation_count: Optional[int] = None, |
|
date_range: Optional[str] = None, |
|
year_range: Optional[str] = None, |
|
fields_of_study: Optional[ |
|
List[ |
|
Literal[ |
|
"Computer Science", |
|
"Medicine", |
|
"Chemistry", |
|
"Biology", |
|
"Materials Science", |
|
"Physics", |
|
"Geology", |
|
"Psychology", |
|
"Art", |
|
"History", |
|
"Geography", |
|
"Sociology", |
|
"Business", |
|
"Political Science", |
|
"Economics", |
|
"Philosophy", |
|
"Mathematics", |
|
"Engineering", |
|
"Environmental Science", |
|
"Agricultural and Food Sciences", |
|
"Education", |
|
"Law", |
|
"Linguistics", |
|
] |
|
] |
|
] = None, |
|
): |
|
|
|
# Search for papers |
|
papers = search_semantic_scholar( |
|
query=query, |
|
limit=limit, |
|
fields=fields, |
|
publication_types=publication_types, |
|
open_access=open_access, |
|
min_citation_count=min_citation_count, |
|
date_range=date_range, |
|
year_range=year_range, |
|
fields_of_study=fields_of_study, |
|
) |
|
|
|
if not papers: |
|
print("No results found or an error occurred.") |
|
return |
|
|
|
# Print results |
|
print_green(f"\nFound {len(papers)} papers matching your query: '{query}'") |
|
|
|
for paper in papers: |
|
print(paper) |
|
exit() |
|
|
|
|
|
def search_paper_by_title( |
|
title: str, |
|
fields: Optional[List[str]] = None |
|
): |
|
""" |
|
Search for a single paper that best matches the given title. |
|
|
|
Parameters: |
|
----------- |
|
title : str |
|
The title to search for |
|
fields : List[str], optional |
|
List of fields to include in the response |
|
|
|
Returns: |
|
-------- |
|
dict or None |
|
JSON data for the best matching paper, or None if no match or error |
|
""" |
|
# Define the API endpoint URL |
|
url = "https://api.semanticscholar.org/graph/v1/paper/search/match" |
|
|
|
# Set up default fields if not provided |
|
if fields is None: |
|
fields = [ |
|
"title", |
|
"abstract", |
|
"year", |
|
"authors.name", |
|
"externalIds", |
|
"url", |
|
"publicationDate", |
|
"journal", |
|
"citationCount", |
|
"openAccessPdf" |
|
] |
|
|
|
# Build query parameters |
|
params = {"query": title, "fields": ",".join(fields)} |
|
|
|
# Send the API request |
|
try: |
|
response = requests.get(url, params=params) |
|
response.raise_for_status() # Raise an exception for HTTP errors |
|
return response.json() |
|
except requests.exceptions.HTTPError as e: |
|
if e.response.status_code == 404: |
|
print(f"No paper found matching title: {title}") |
|
return None |
|
else: |
|
print(f"HTTP Error: {e}") |
|
print(f"Response text: {e.response.text}") |
|
return None |
|
except requests.exceptions.RequestException as e: |
|
print(f"Error: {e}") |
|
return None |
|
|
|
def get_paper_details( |
|
paper_id: str, |
|
fields: Optional[List[str]] = None |
|
): |
|
""" |
|
Get detailed information about a paper by its identifier. |
|
|
|
Parameters: |
|
----------- |
|
paper_id : str |
|
The paper identifier. Can be: |
|
- Semantic Scholar ID (e.g., 649def34f8be52c8b66281af98ae884c09aef38b) |
|
- DOI (e.g., DOI:10.18653/v1/N18-3011) |
|
- arXiv ID (e.g., ARXIV:2106.15928) |
|
- etc. |
|
fields : List[str], optional |
|
List of fields to include in the response |
|
|
|
Returns: |
|
-------- |
|
dict or None |
|
JSON data for the paper, or None if not found or error |
|
""" |
|
# Define the API endpoint URL |
|
url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}" |
|
|
|
# Set up default fields if not provided |
|
if fields is None: |
|
fields = [ |
|
"title", |
|
"abstract", |
|
"year", |
|
"authors.name", |
|
"externalIds", |
|
"url", |
|
"publicationDate", |
|
"journal", |
|
"citationCount", |
|
"openAccessPdf" |
|
] |
|
|
|
# Add DOI: prefix if it's a DOI without the prefix |
|
if paper_id.startswith("10.") and "DOI:" not in paper_id: |
|
paper_id = f"DOI:{paper_id}" |
|
|
|
# Build query parameters |
|
params = {"fields": ",".join(fields)} |
|
|
|
# Send the API request |
|
try: |
|
response = requests.get(url, params=params) |
|
response.raise_for_status() # Raise an exception for HTTP errors |
|
return response.json() |
|
except requests.exceptions.HTTPError as e: |
|
if e.response.status_code == 404: |
|
print(f"No paper found with ID: {paper_id}") |
|
return None |
|
else: |
|
print(f"HTTP Error: {e}") |
|
print(f"Response text: {e.response.text}") |
|
return None |
|
except requests.exceptions.RequestException as e: |
|
print(f"Error: {e}") |
|
return None |
|
|
|
if __name__ == "__main__": |
|
main( |
|
query="machine learning", |
|
limit=1, |
|
fields=[ |
|
"title", |
|
"url", |
|
"abstract", |
|
"tldr", |
|
"externalIds", |
|
"year", |
|
"influentialCitationCount", |
|
"fieldsOfStudy", |
|
"publicationDate", |
|
"journal", |
|
], |
|
open_access=True, |
|
)
|
|
|