import requests import json import argparse from typing import Optional, List, Literal, Union from colorprinter.print_color import * def search_semantic_scholar( query: str, limit: int = 10, fields: Optional[List[str]] = None, publication_types: Optional[ List[ Literal[ "Review", "JournalArticle", "CaseReport", "ClinicalTrial", "Conference", "Dataset", "Editorial", "LettersAndComments", "MetaAnalysis", "News", "Study", "Book", "BookSection", ] ] ] = ["JournalArticle"], open_access: bool = False, min_citation_count: Optional[int] = None, date_range: Optional[str] = None, year_range: Optional[str] = None, fields_of_study: Optional[ List[ Literal[ "Computer Science", "Medicine", "Chemistry", "Biology", "Materials Science", "Physics", "Geology", "Psychology", "Art", "History", "Geography", "Sociology", "Business", "Political Science", "Economics", "Philosophy", "Mathematics", "Engineering", "Environmental Science", "Agricultural and Food Sciences", "Education", "Law", "Linguistics", ] ] ] = None, ): """ Search for papers on Semantic Scholar with various filters. Parameters: ----------- query : str The search query term limit : int Number of results to return (max 100) fields : List[str], optional List of fields to include in the response publication_types : List[str], optional Filter by publication types open_access : bool Only include papers with open access PDFs min_citation_count : int, optional Minimum number of citations date_range : str, optional Date range in format "YYYY-MM-DD:YYYY-MM-DD" year_range : str, optional Year range in format "YYYY-YYYY" or "YYYY-" or "-YYYY" fields_of_study : List[str], optional List of fields of study to filter by Returns: -------- dict JSON response containing search results """ # Define the API endpoint URL url = "https://api.semanticscholar.org/graph/v1/paper/search" # Set up default fields if not provided if fields is None: fields = [ "title", "url", "abstract", "year", "publicationDate", "authors.name", "citationCount", "openAccessPdf", "tldr", ] # Build query parameters params = {"query": query, "limit": limit, "fields": ",".join(fields)} # Add optional filters if provided if publication_types: params["publicationTypes"] = ",".join(publication_types) if open_access: params["openAccessPdf"] = "" if min_citation_count: params["minCitationCount"] = str(min_citation_count) if date_range: params["publicationDateOrYear"] = date_range if year_range: params["year"] = year_range if fields_of_study: params["fieldsOfStudy"] = ",".join(fields_of_study) # Send the API request try: response = requests.get(url, params=params) response.raise_for_status() # Raise an exception for HTTP errors return response.json().get("data", []) except requests.exceptions.HTTPError as e: print(f"HTTP Error: {e}") print(f"Response text: {response.text}") return None except requests.exceptions.RequestException as e: print(f"Error: {e}") return None def main( query: Optional[str] = None, limit: int = 10, fields: Optional[List[str]] = None, publication_types: Optional[ List[ Literal[ "Review", "JournalArticle", "CaseReport", "ClinicalTrial", "Conference", "Dataset", "Editorial", "LettersAndComments", "MetaAnalysis", "News", "Study", "Book", "BookSection", ] ] ] = None, open_access: bool = False, min_citation_count: Optional[int] = None, date_range: Optional[str] = None, year_range: Optional[str] = None, fields_of_study: Optional[ List[ Literal[ "Computer Science", "Medicine", "Chemistry", "Biology", "Materials Science", "Physics", "Geology", "Psychology", "Art", "History", "Geography", "Sociology", "Business", "Political Science", "Economics", "Philosophy", "Mathematics", "Engineering", "Environmental Science", "Agricultural and Food Sciences", "Education", "Law", "Linguistics", ] ] ] = None, ): # Search for papers papers = search_semantic_scholar( query=query, limit=limit, fields=fields, publication_types=publication_types, open_access=open_access, min_citation_count=min_citation_count, date_range=date_range, year_range=year_range, fields_of_study=fields_of_study, ) if not papers: print("No results found or an error occurred.") return # Print results print_green(f"\nFound {len(papers)} papers matching your query: '{query}'") for paper in papers: print(paper) exit() def search_paper_by_title( title: str, fields: Optional[List[str]] = None ): """ Search for a single paper that best matches the given title. Parameters: ----------- title : str The title to search for fields : List[str], optional List of fields to include in the response Returns: -------- dict or None JSON data for the best matching paper, or None if no match or error """ # Define the API endpoint URL url = "https://api.semanticscholar.org/graph/v1/paper/search/match" # Set up default fields if not provided if fields is None: fields = [ "title", "abstract", "year", "authors.name", "externalIds", "url", "publicationDate", "journal", "citationCount", "openAccessPdf" ] # Build query parameters params = {"query": title, "fields": ",".join(fields)} # Send the API request try: response = requests.get(url, params=params) response.raise_for_status() # Raise an exception for HTTP errors return response.json() except requests.exceptions.HTTPError as e: if e.response.status_code == 404: print(f"No paper found matching title: {title}") return None else: print(f"HTTP Error: {e}") print(f"Response text: {e.response.text}") return None except requests.exceptions.RequestException as e: print(f"Error: {e}") return None def get_paper_details( paper_id: str, fields: Optional[List[str]] = None ): """ Get detailed information about a paper by its identifier. Parameters: ----------- paper_id : str The paper identifier. Can be: - Semantic Scholar ID (e.g., 649def34f8be52c8b66281af98ae884c09aef38b) - DOI (e.g., DOI:10.18653/v1/N18-3011) - arXiv ID (e.g., ARXIV:2106.15928) - etc. fields : List[str], optional List of fields to include in the response Returns: -------- dict or None JSON data for the paper, or None if not found or error """ # Define the API endpoint URL url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}" # Set up default fields if not provided if fields is None: fields = [ "title", "abstract", "year", "authors.name", "externalIds", "url", "publicationDate", "journal", "citationCount", "openAccessPdf" ] # Add DOI: prefix if it's a DOI without the prefix if paper_id.startswith("10.") and "DOI:" not in paper_id: paper_id = f"DOI:{paper_id}" # Build query parameters params = {"fields": ",".join(fields)} # Send the API request try: response = requests.get(url, params=params) response.raise_for_status() # Raise an exception for HTTP errors return response.json() except requests.exceptions.HTTPError as e: if e.response.status_code == 404: print(f"No paper found with ID: {paper_id}") return None else: print(f"HTTP Error: {e}") print(f"Response text: {e.response.text}") return None except requests.exceptions.RequestException as e: print(f"Error: {e}") return None if __name__ == "__main__": main( query="machine learning", limit=1, fields=[ "title", "url", "abstract", "tldr", "externalIds", "year", "influentialCitationCount", "fieldsOfStudy", "publicationDate", "journal", ], open_access=True, )