Skip to content

Commit 3034f72

Browse files
committed
cleaned up main.py
1 parent 2a62f73 commit 3034f72

File tree

2 files changed

+52
-63
lines changed

2 files changed

+52
-63
lines changed

src/main.py

Lines changed: 31 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@
77
from fastapi import FastAPI
88

99
from sqlalchemy import create_engine
10+
from sqlalchemy.engine import make_url
11+
from urllib.parse import quote
1012
from sqlalchemy.orm import sessionmaker, Session
1113

1214
# config
1315
from .config.constants import GH_BASE_URL
16+
from src.config.session import get_session, SessionLocal
17+
from src.config.engine import engine
1418

1519
# clients / repos / services / routers
1620
from .clients.github import GithubRepoClient
@@ -58,14 +62,25 @@ def main() -> FastAPI:
5862
app = FastAPI()
5963

6064
# DB setup
61-
db_conn = setup.DatabaseConnection(config.database_url)
65+
try:
66+
sa_url = make_url(config.database_url)
67+
user = quote(sa_url.username or "", safe="")
68+
pwd = quote(sa_url.password or "", safe="")
69+
host = sa_url.host or ""
70+
port = sa_url.port or ""
71+
dbname = sa_url.database or ""
72+
dsn = f"postgresql://{user}:{pwd}@{host}:{port}/{dbname}"
73+
except Exception:
74+
# Fallback: pass the original URL through (may work if already libpq-compatible)
75+
dsn = config.database_url
76+
77+
db_conn = setup.DatabaseConnection(dsn)
6278
db_conn.connect()
63-
engine = create_engine(config.database_url, pool_pre_ping=True, future=True)
64-
SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False)
65-
session: Session = SessionLocal()
66-
logger.info("Database connected via SQLAlchemy ORM")
6779

68-
'''
80+
session = SessionLocal()
81+
logger.info("Database connected via SQLAlchemy ORM")
82+
83+
# Fields setup
6984
record_fields = set(Record.model_fields.keys())
7085
record_sql_builder = sqlbuilder.SQLBuilder("records").allow_fields(record_fields - {"id"})
7186
record_repo = RecordRepo(db_conn, record_sql_builder)
@@ -78,72 +93,39 @@ def main() -> FastAPI:
7893
author_sql_builder = sqlbuilder.SQLBuilder("authors").allow_fields(author_fields - {"id"})
7994
author_repo = AuthorRepo(db_conn, author_sql_builder)
8095

81-
# Client setup
96+
# PubMed setup
8297
pubmed_client = pubmed.Pubmed(constants.PUBMED_BASE_URL, config.pubmed_api_key)
83-
84-
# Service setup
8598
pubmed_service = PubmedService(author_repo, record_repo, article_repo, pubmed_client)
86-
87-
# Router setup
8899
pubmed_router = PubmedRouter(pubmed_service)
89100

90-
# --- GitHub client + service setup
101+
# GitHub setup
91102
gh_api_key = os.getenv("GITHUB_API_KEY", "")
92103
gh_org = os.getenv("GITHUB_ORG", "ga4gh") # change via env if needed
93-
94104
gh_client = GithubRepoClient(GH_BASE_URL, gh_api_key)
95-
96105
gh_repo_fields = set(GithubRepo.model_fields.keys())
97106
gh_repo_sql_builder = sqlbuilder.SQLBuilder("github_repos").allow_fields(gh_repo_fields - {"id"})
98107
gh_repo = GithubRepoRepository(db_conn, gh_repo_sql_builder)
99108
gh_service = GithubReposService(gh_repo, gh_client, record_repo)
100109
gh_router = GithubRepoRouter(gh_service)
101-
102-
# Optionally sync repos once at startup
103-
# The service.sync_repos expects a `user` string (created_by/updated_by).
104-
sync_user = os.getenv("GITHUB_SYNC_USER", "system")
105-
106-
try:
107-
logger.info("Starting GitHub repos sync...")
108-
synced = gh_service.sync_repos(sync_user)
109-
logger.info("GitHub sync completed. %d repos synced.", len(synced))
110-
except Exception as e:
111-
logger.exception("GitHub sync failed: %s", e)
112-
113-
# --- FastAPI app + router
114-
app.include_router(gh_router.router)
115-
app.include_router(pubmed_router.router)
116-
117110

111+
# PyPi setup
118112
pypi_fields = set(PypiModel.model_fields.keys())
119113
pypi_sql_builder = sqlbuilder.SQLBuilder("pypi").allow_fields(pypi_fields)
120114
pypi_repo = PypiRepo(db_conn, pypi_sql_builder)
121115
pypi_service = PypiService(pypi_repo)
122116
pypi_router = PypiRouter(pypi_service)
123117

124-
app.include_router(pypi_router.router)
125-
126-
app.include_router(health_router)
127-
'''
128-
118+
# EPMC setup
129119
epmc_repo = EPMCRepo(session)
130120
epmc_service = EPMCService(epmc_repo)
131121
epmc_router = EPMCRouter(epmc_service, session)
122+
123+
# --- FastAPI app + router
124+
app.include_router(gh_router.router)
125+
app.include_router(pypi_router.router)
126+
app.include_router(pubmed_router.router)
132127
app.include_router(epmc_router.router)
133-
# move after ingestion success
134-
grant_service = Grant(epmc_repo)
135-
#print(epmc_service.highest_versions_by_source_id)
136-
#grant_service.create_grants("ga4gh")
137-
start = time.perf_counter()
138-
#result = epmc_service.insert_articles_by_keyword("rews", created_by="system", epmc_db=session)
139-
#citations_result = epmc_service.insert_citations(created_by="system")
140-
#references_result = epmc_service.insert_references(created_by="system")
141-
#grant_result = grant_service.create_grants("ga4gh")
142-
elapsed = time.perf_counter() - start
143-
#print(result)
144-
#print(citations_result)
145-
#print(references_result)
146-
print(f"done data ingestion (elapsed={elapsed:.3f}s)")
128+
147129
return app
148130

149131

src/routers/epmc.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from src.models.citation import Citation as CitationModel
77
from src.services.epmc import EPMCService as EPMCService
88
from src.repositories.epmc import EPMCRepo as EPMCRepo
9+
from src.services.grant import GrantService as Grant
10+
from src.config.session import get_session
911

1012

1113
router = APIRouter(prefix="/epmc", tags=["Articles"])
@@ -19,7 +21,7 @@ def __init__(self, epmc_service: EPMCService, db: Session):
1921

2022
def _setup_routes(self):
2123
@self.router.get("/epmc/all-articles", response_model=list[PMCArticleFull])
22-
async def get_all_articles(limit: int = 100, skip: int = 0): # to be fixed
24+
async def get_all_articles(limit: int = 1000, skip: int = 0): # to be fixed
2325
try:
2426
repo = EPMCRepo(self.db)
2527
articles = repo.get_all_articles(limit=limit, skip=skip)
@@ -44,61 +46,61 @@ async def get_all_articles(limit: int = 100, skip: int = 0): # to be fixed
4446
raise HTTPException(status_code=500, detail=f"Failed to fetch articles: {str(e)}")
4547

4648
@self.router.get("/epmc/all-grants")
47-
async def get_all_grants(limit: int = 100, skip: int = 0):
49+
async def get_all_grants(limit: int = 1000, skip: int = 0):
4850
repo = EPMCRepo(self.db)
4951
service = EPMCService(repo)
5052
return repo.get_all_grants(limit=limit, skip=skip)
5153

5254
@self.router.get("/epmc/all-pmc-authors")
53-
async def get_all_pmc_authors(limit: int = 100, skip: int = 0):
55+
async def get_all_pmc_authors(limit: int = 1000, skip: int = 0):
5456
repo = EPMCRepo(self.db)
5557
service = EPMCService(repo)
5658
return repo.get_all_pmc_authors(limit=limit, skip=skip)
5759

5860
@self.router.get("/epmc/get-authors-by-article-id/{article_id}", response_model=list[PMCAuthor]) # to be fixed
59-
async def get_authors_by_article_id(article_id: int, limit: int = 100, skip: int = 0):
61+
async def get_authors_by_article_id(article_id: int, limit: int = 1000, skip: int = 0):
6062
repo = EPMCRepo(self.db)
6163
service = EPMCService(repo)
6264
return repo.get_authors_by_article_id(article_id, limit=limit, skip=skip)
6365

6466
@self.router.get("/epmc/get-articles-by-author-id/{author_id}", response_model=list[PMCArticle]) # to be fixed
65-
async def get_articles_by_author_id(author_id: int, limit: int = 100, skip: int = 0):
67+
async def get_articles_by_author_id(author_id: int, limit: int = 1000, skip: int = 0):
6668
repo = EPMCRepo(self.db)
6769
service = EPMCService(repo)
6870
return repo.get_articles_by_author_id(author_id, limit=limit, skip=skip)
6971

7072
@self.router.get("/epmc/get-articles-by-keyword/{keyword}", response_model=list[PMCArticle]) # to be fixed
71-
async def get_articles_by_keyword(keyword: str, limit: int = 100, skip: int = 0):
73+
async def get_articles_by_keyword(keyword: str, limit: int = 1000, skip: int = 0):
7274
repo = EPMCRepo(self.db)
7375
service = EPMCService(repo)
7476
return repo.get_articles_by_keyword(keyword, limit=limit, skip=skip)
7577

7678
@self.router.get("/epmc/all-article-authors")
77-
async def get_all_article_authors(limit: int = 100, skip: int = 0):
79+
async def get_all_article_authors(limit: int = 1000, skip: int = 0):
7880
repo = EPMCRepo(self.db)
7981
service = EPMCService(repo)
8082
return repo.get_all_article_authors(limit=limit, skip=skip)
8183

8284
@self.router.get("/epmc/all-pmc-references")
83-
async def get_all_pmc_references(limit: int = 100, skip: int = 0):
85+
async def get_all_pmc_references(limit: int = 1000, skip: int = 0):
8486
repo = EPMCRepo(self.db)
8587
service = EPMCService(repo)
8688
return repo.get_all_pmc_references(limit=limit, skip=skip)
8789

8890
@self.router.get("/epmc/all-citations")
89-
async def get_all_citations(limit: int = 100, skip: int = 0):
91+
async def get_all_citations(limit: int = 1000, skip: int = 0):
9092
repo = EPMCRepo(self.db)
9193
service = EPMCService(repo)
9294
return repo.get_all_citations(limit=limit, skip=skip)
9395

9496
@self.router.get("/epmc/all-fulltexts")
95-
async def get_all_fulltexts(limit: int = 100, skip: int = 0):
97+
async def get_all_fulltexts(limit: int = 1000, skip: int = 0):
9698
repo = EPMCRepo(self.db)
9799
service = EPMCService(repo)
98100
return repo.get_all_fulltexts(limit=limit, skip=skip)
99101

100102
@self.router.get("/epmc/all-pmc-affiliations")
101-
async def get_all_pmc_affiliations(limit: int = 100, skip: int = 0):
103+
async def get_all_pmc_affiliations(limit: int = 1000, skip: int = 0):
102104
repo = EPMCRepo(self.db)
103105
service = EPMCService(repo)
104106
return repo.get_all_pmc_affiliations(limit=limit, skip=skip)
@@ -109,6 +111,11 @@ async def ingest_pmc_data(
109111
):
110112
repo = EPMCRepo(self.db)
111113
service = EPMCService(repo)
114+
grant_service = Grant(repo)
115+
result = service.insert_articles_by_keyword("rews", created_by="system", epmc_db=self.db)
116+
citations_result = service.insert_citations(created_by="system")
117+
references_result = service.insert_references(created_by="system")
118+
grant_result = grant_service.create_grants("ga4gh")
112119

113120
try:
114121
service.insert_articles_by_keyword(keyword, created_by="system", epmc_db=self.db)
@@ -119,12 +126,12 @@ async def ingest_pmc_data(
119126
return [PMCArticleFull.model_validate(article) for article in articles]
120127

121128
@self.router.get("/epmc/all-latest-entries")
122-
async def get_all_latest_entries(limit: int = 100, skip: int = 0):
129+
async def get_all_latest_entries(limit: int = 1000, skip: int = 0):
123130
repo = EPMCRepo(self.db)
124131
return repo.get_all_latest_entries(limit=limit, skip=skip)
125132

126133
@self.router.get("/epmc/article/{pm_id}/latest-entries")
127-
async def get_article_latest_entries(pm_id: str, limit: int = 100, skip: int = 0):
134+
async def get_article_latest_entries(pm_id: str, limit: int = 1000, skip: int = 0):
128135
repo = EPMCRepo(self.db)
129136
return repo.get_all_latest_entries(pm_id=pm_id, limit=limit, skip=skip)
130137

@@ -134,7 +141,7 @@ async def get_affiliation_countries_count():
134141
return repo.get_affiliation_countries_count()
135142

136143
@self.router.get("/epmc/unique-citations", response_model=list[CitationModel])
137-
async def get_unique_citations(limit: int = 100, skip: int = 0):
144+
async def get_unique_citations(limit: int = 1000, skip: int = 0):
138145
repo = EPMCRepo(self.db)
139146
citations = repo.get_unique_citations(limit=limit, skip=skip)
140147
return [CitationModel.model_validate(c) for c in citations]

0 commit comments

Comments
 (0)