Skip to content

Commit 35e4f3a

Browse files
Added APIs: Unique authors, Unique citations
1 parent 6d57fbd commit 35e4f3a

File tree

5 files changed

+58
-19
lines changed

5 files changed

+58
-19
lines changed

src/main.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,8 @@ def main() -> FastAPI:
7676

7777
db_conn = setup.DatabaseConnection(dsn)
7878
db_conn.connect()
79-
8079
session = SessionLocal()
81-
82-
logger.info("Database connected via SQLAlchemy ORM")
80+
logger.info("Database connected via SQLAlchemy ORM")
8381

8482
# Fields setup
8583
record_fields = set(Record.model_fields.keys())

src/models/citation.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,8 @@ class Reference(BaseModel):
3131
cited_order: int
3232
match: bool
3333

34-
model_config = {"from_attributes": True}
34+
model_config = {"from_attributes": True}
35+
36+
class CitationList(BaseModel):
37+
citations: list[Citation]
38+
citation_count: int

src/repositories/epmc.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -288,10 +288,8 @@ def get_all_pmc_affiliations(self, limit: int = 100, skip: int = 0) -> list[PMCA
288288
return self.db.query(PMCAffiliation).offset(skip).limit(limit).all()
289289

290290
def get_all_articles_ids(self) -> list[tuple[str | None, int]]:
291-
return (
292-
self.db.query(PMCArticle.pm_id, PMCArticle.record_id)
293-
.all()
294-
)
291+
return self.db.query(PMCArticle.pm_id, PMCArticle.record_id).all()
292+
295293

296294
def _get_latest_version_subquery(self, entity_class: Type[Any]):
297295
"""
@@ -409,13 +407,21 @@ def _get_latest_entities_by_column(self, entity_class: Type[Any], group_column,
409407
)
410408

411409
# Query entities where row number is 1 (latest version per group)
412-
return (
413-
self.db.query(entity_class)
414-
.join(version_subq, and_(entity_class.id == version_subq.c.id, version_subq.c.rn == 1))
415-
.offset(skip)
416-
.limit(limit)
417-
.all()
418-
)
410+
if entity_class == Citation:
411+
return (
412+
self.db.query(entity_class)
413+
.join(version_subq, and_(entity_class.id == version_subq.c.id, version_subq.c.rn == 1))
414+
.offset(skip)
415+
.all()
416+
)
417+
else:
418+
return (
419+
self.db.query(entity_class)
420+
.join(version_subq, and_(entity_class.id == version_subq.c.id, version_subq.c.rn == 1))
421+
.offset(skip)
422+
.limit(limit)
423+
.all()
424+
)
419425

420426

421427
def _get_entities_by_column_value(self, entity_class: Type[Any], column_name: str, value: Any, limit: int = 100, skip: int = 0) -> list[Any]:
@@ -920,3 +926,22 @@ def get_unique_citations(self, limit: int = 100, skip: int = 0) -> list[Citation
920926
List of Citation entities, one per unique citation_id with highest version
921927
"""
922928
return self._get_latest_entities_by_column(Citation, Citation.citation_id, limit=limit, skip=skip)
929+
930+
def count_unique_authors(self) -> int:
931+
return self.db.query(
932+
func.count(
933+
func.distinct(
934+
func.concat(
935+
func.lower(func.trim(PMCAuthor.firstname)),
936+
" ",
937+
func.lower(func.trim(PMCAuthor.lastname)),
938+
" ",
939+
func.lower(func.trim(PMCAuthor.initials))
940+
)
941+
)
942+
)
943+
).filter(
944+
PMCAuthor.firstname.isnot(None),
945+
PMCAuthor.lastname.isnot(None),
946+
PMCAuthor.initials.isnot(None)
947+
).scalar()

src/routers/epmc.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from src.models.pmc_article import PMCArticle, PMCArticleFull
55
from src.models.pmc_author import PMCAuthor
6-
from src.models.citation import Citation as CitationModel
6+
from src.models.citation import Citation as CitationModel, CitationList
77
from src.services.epmc import EPMCService as EPMCService
88
from src.repositories.epmc import EPMCRepo as EPMCRepo
99
from src.services.grant import GrantService as Grant
@@ -138,11 +138,12 @@ async def get_affiliation_countries_count():
138138
repo = EPMCRepo(self.db)
139139
return repo.get_affiliation_countries_count()
140140

141-
@self.router.get("/epmc/unique-citations", response_model=list[CitationModel])
141+
@self.router.get("/epmc/unique-citations", response_model=CitationList)
142142
async def get_unique_citations(limit: int = 1000, skip: int = 0):
143143
repo = EPMCRepo(self.db)
144144
citations = repo.get_unique_citations(limit=limit, skip=skip)
145-
return [CitationModel.model_validate(c) for c in citations]
145+
return CitationList(citations=[CitationModel.model_validate(c) for c in citations], citation_count=len(citations))
146+
146147

147148
@self.router.get("/epmc/top-authors")
148149
async def get_top_authors(count: int = 15):
@@ -151,4 +152,12 @@ async def get_top_authors(count: int = 15):
151152
try:
152153
return repo.get_top_authors(count=count)
153154
except Exception as e:
154-
raise HTTPException(status_code=500, detail=f"Failed to fetch top authors: {e}")
155+
raise HTTPException(status_code=500, detail=f"Failed to fetch top authors: {e}")
156+
157+
@self.router.get("/epmc/unique-authors-count")
158+
async def unique_authors_count():
159+
repo = EPMCRepo(self.db)
160+
service = EPMCService(repo)
161+
162+
count = service.get_unique_authors_count()
163+
return {"unique_authors": count}

src/services/epmc.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,6 @@ def insert_references(self, created_by: str):
171171
counts["references"] += 1
172172

173173
return counts
174+
175+
def get_unique_authors_count(self) -> int:
176+
return self.epmc_repo.count_unique_authors()

0 commit comments

Comments
 (0)