Source code for paper_firehose.core.apis.semantic_scholar_client
"""
Semantic Scholar API client for fetching paper abstracts.
Semantic Scholar provides free access to academic paper metadata including
abstracts without requiring an API key.
"""
from __future__ import annotations
import json
from urllib.parse import quote
from typing import Optional
import requests
from ..http_client import RetryableHTTPClient
from ..text_utils import strip_jats
[docs]
def get_semantic_scholar_abstract(
doi: str,
*,
session: Optional[requests.Session] = None
) -> Optional[str]:
"""Fetch abstract from Semantic Scholar Graph API by DOI (no key needed).
Args:
doi: Digital Object Identifier to look up
session: Optional requests.Session for backward compatibility
Returns:
Plain-text abstract or None if not available
"""
if not doi:
return None
url = f"https://api.semanticscholar.org/graph/v1/paper/DOI:{quote(doi)}?fields=abstract"
# If session is provided, use old logic for compatibility
if session:
try:
r = session.get(url, timeout=15)
if r.status_code == 404:
return None
r.raise_for_status()
data = r.json()
abs_txt = data.get('abstract')
return strip_jats(abs_txt) if abs_txt else None
except (requests.RequestException, json.JSONDecodeError, KeyError):
return None
# Use new RetryableHTTPClient for better retry logic
try:
client = RetryableHTTPClient(rps=1.0, max_retries=3)
r = client.get_with_retry(url)
if r is None: # 404 case
return None
data = r.json()
abs_txt = data.get('abstract')
return strip_jats(abs_txt) if abs_txt else None
except (requests.RequestException, json.JSONDecodeError, KeyError):
return None