Skip to content

Commit e6b3749

Browse files
committed
bugfix for oa state priorisation
1 parent b79a7a0 commit e6b3749

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

server/workers/orcid/src/orcid_service.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -459,9 +459,12 @@ def enrich_metadata_with_base(self, params: Dict[str, str], metadata: pd.DataFra
459459
base_metadata = self._match_dois_by_version(base_metadata, dois)
460460

461461
base_metadata = base_metadata[base_metadata['doi'].isin(dois)]
462-
# Sort ascending so oa_state=1 (open access) rows come before oa_state=2,
463-
# ensuring the most open record is kept when deduplicating by DOI.
464-
base_metadata = base_metadata.sort_values(by='oa_state', ascending=True).drop_duplicates(subset='doi', keep='first')
462+
# Sort by oa_state priority (1=open > 0=restricted > 2=unknown) so the
463+
# most open record is kept when deduplicating by DOI.
464+
oa_state_order = {1: 0, 0: 1, 2: 2}
465+
base_metadata = base_metadata.assign(
466+
_oa_sort=base_metadata['oa_state'].map(oa_state_order)
467+
).sort_values(by='_oa_sort').drop_duplicates(subset='doi', keep='first').drop(columns='_oa_sort')
465468
if self.logger.isEnabledFor(logging.DEBUG):
466469
self._log_dataframe(base_metadata, params, 'base_metadata')
467470

0 commit comments

Comments
 (0)