Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions udata/commands/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
"spatial",
"quality",
"permissions",
"private", # Computed from published_at, not a real field
],
"resource": ["latest", "preview_url", "last_modified"],
"organization": ["class", "page", "uri", "logo_thumbnail"],
Expand Down
10 changes: 6 additions & 4 deletions udata/core/dataset/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import os
from datetime import datetime

import mongoengine
from bson.objectid import ObjectId
from feedgenerator.django.utils.feedgenerator import Atom1Feed
from flask import abort, current_app, make_response, redirect, request, url_for
Expand Down Expand Up @@ -254,7 +253,10 @@ def parse_filters(datasets, args):
if args.get("private") is not None:
if current_user.is_anonymous:
abort(401)
datasets = datasets.filter(private=args["private"])
if args["private"]:
datasets = datasets.filter(published_at=None)
else:
datasets = datasets.filter(published_at__ne=None)
return datasets


Expand Down Expand Up @@ -307,7 +309,7 @@ def get(self):
"""List or search all datasets"""
args = dataset_parser.parse()
datasets = Dataset.objects.visible_by_user(
current_user, mongoengine.Q(private__ne=True, archived=None, deleted=None)
current_user, Q(published_at__ne=None, archived=None, deleted=None)
)
datasets = dataset_parser.parse_filters(datasets, args)
sort = args["sort"] or ("$text_score" if args["q"] else None) or DEFAULT_SORTING
Expand Down Expand Up @@ -829,7 +831,7 @@ class DatasetSuggestAPI(API):
def get(self):
"""Datasets suggest endpoint using mongoDB contains"""
args = suggest_parser.parse_args()
datasets_query = Dataset.objects(archived=None, deleted=None, private=False)
datasets_query = Dataset.objects.visible()
datasets = datasets_query.filter(
Q(title__icontains=args["q"]) | Q(acronym__icontains=args["q"])
)
Expand Down
6 changes: 5 additions & 1 deletion udata/core/dataset/api_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@
"last_modified",
"deleted",
"private",
"published_at",
"tags",
"badges",
"resources",
Expand Down Expand Up @@ -349,7 +350,10 @@
"archived": fields.ISODateTime(description="The archival date if archived"),
"featured": fields.Boolean(description="Is the dataset featured"),
"private": fields.Boolean(
description="Is the dataset private to the owner or the organization"
description="Is the dataset private (DEPRECATED: use published_at instead)"
),
"published_at": fields.ISODateTime(
description="Last publication date, null if unpublished/private. Updated each time the dataset is republished."
),
"tags": fields.List(fields.String),
"badges": fields.List(
Expand Down
8 changes: 6 additions & 2 deletions udata/core/dataset/apiv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"last_modified",
"deleted",
"private",
"published_at",
"tags",
"badges",
"resources",
Expand Down Expand Up @@ -125,7 +126,10 @@
"archived": fields.ISODateTime(description="The archival date if archived"),
"featured": fields.Boolean(description="Is the dataset featured"),
"private": fields.Boolean(
description="Is the dataset private to the owner or the organization"
description="Is the dataset private (DEPRECATED: use published_at instead)"
),
"published_at": fields.ISODateTime(
description="Last publication date, null if unpublished/private. Updated each time the dataset is republished."
),
"tags": fields.List(fields.String),
"badges": fields.List(
Expand Down Expand Up @@ -318,7 +322,7 @@ def get(self):
"""List or search all datasets"""
args = dataset_parser.parse()
datasets = Dataset.objects.exclude("resources").visible_by_user(
current_user, mongoengine.Q(private__ne=True, archived=None, deleted=None)
current_user, mongoengine.Q(published_at__ne=None, archived=None, deleted=None)
)
datasets = dataset_parser.parse_filters(datasets, args)
sort = args["sort"] or ("$text_score" if args["q"] else None) or DEFAULT_SORTING
Expand Down
2 changes: 2 additions & 0 deletions udata/core/dataset/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class DatasetCsvAdapter(csv.Adapter):
"last_modified",
("tags", lambda o: ",".join(o.tags)),
("archived", lambda o: o.archived or False),
"published_at",
("resources_count", lambda o: len(o.resources)),
("main_resources_count", lambda o: len([r for r in o.resources if r.type == "main"])),
("resources_formats", lambda o: ",".join(set(r.format for r in o.resources if r.format))),
Expand Down Expand Up @@ -71,6 +72,7 @@ class ResourcesCsvAdapter(csv.NestedAdapter):
),
dataset_field("license"),
dataset_field("private"),
dataset_field("published_at"),
dataset_field("archived", lambda r: r.archived or False),
)
nested_fields = (
Expand Down
4 changes: 3 additions & 1 deletion udata/core/dataset/factories.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from datetime import datetime
from os.path import join

import factory
Expand All @@ -19,6 +20,7 @@ class Meta:
title = factory.Faker("sentence")
description = factory.Faker("text")
frequency = UpdateFrequency.UNKNOWN
published_at = factory.LazyFunction(datetime.utcnow)
resources = factory.LazyAttribute(lambda o: ResourceFactory.build_batch(o.nb_resources))

class Params:
Expand All @@ -31,7 +33,7 @@ class Params:


class HiddenDatasetFactory(DatasetFactory):
private = True
published_at = None


class ChecksumFactory(ModelFactory):
Expand Down
29 changes: 25 additions & 4 deletions udata/core/dataset/forms.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import datetime

from udata.core.access_type.constants import (
AccessAudienceCondition,
AccessAudienceType,
Expand Down Expand Up @@ -209,17 +211,36 @@ class DatasetForm(ModelForm):
_("Spatial coverage"), description=_("The geographical area covered by the data.")
)
tags = fields.TagField(_("Tags"), description=_("Some taxonomy keywords"))
private = fields.BooleanField(
_("Private"),
description=_("Restrict the dataset visibility to you or your organization only."),
)
published_at = fields.DateTimeField(_("Publication date"))

owner = fields.CurrentUserField()
organization = fields.PublishAsField(_("Publish as"))
extras = fields.ExtrasField()
resources = fields.NestedModelList(ResourceForm)
contact_points = fields.ContactPointListField(validators=[validate_contact_point])

@classmethod
def from_json(cls, formdata=None, *args, **kwargs):
"""
Convert deprecated `private` field to `published_at` for backward compatibility.

The `private` field no longer exists on the Dataset model. We intercept the JSON
here to convert it before WTForms processes it.

Note: We cannot add a `private` field to this form because `populate_obj()` iterates
over all form fields (not just those in the JSON) and would try to set `obj.private`,
which doesn't exist on the model.
"""
if formdata and "private" in formdata:
formdata = formdata.copy()
private = formdata.pop("private")
if "published_at" not in formdata:
if private is True:
formdata["published_at"] = None
else:
formdata["published_at"] = datetime.utcnow().isoformat()
return super().from_json(formdata, *args, **kwargs)


class ResourcesListForm(ModelForm):
model_class = Dataset
Expand Down
18 changes: 14 additions & 4 deletions udata/core/dataset/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,10 +340,10 @@ def default(cls):

class DatasetQuerySet(OwnedQuerySet):
def visible(self):
return self(private__ne=True, deleted=None, archived=None)
return self(published_at__ne=None, deleted=None, archived=None)

def hidden(self):
return self(db.Q(private=True) | db.Q(deleted__ne=None) | db.Q(archived__ne=None))
return self(db.Q(published_at=None) | db.Q(deleted__ne=None) | db.Q(archived__ne=None))

def with_badge(self, kind):
return self(badges__kind=kind)
Expand Down Expand Up @@ -553,7 +553,7 @@ class Dataset(
tags = field(db.TagListField())
resources = field(db.ListField(db.EmbeddedDocumentField(Resource)), auditable=False)

private = field(db.BooleanField(default=False))
published_at = field(db.DateTimeField())

frequency = field(db.EnumField(UpdateFrequency))
frequency_date = field(db.DateTimeField(verbose_name=_("Future date of update")))
Expand Down Expand Up @@ -600,6 +600,11 @@ class Dataset(
def __str__(self):
return self.title or ""

def to_dict(self, exclude=None):
data = super().to_dict(exclude=exclude)
data["private"] = self.private
return data

__metrics_keys__ = [
"discussions",
"discussions_open",
Expand Down Expand Up @@ -743,7 +748,12 @@ def is_visible(self):

@property
def is_hidden(self):
return self.private or self.deleted or self.archived
return self.published_at is None or self.deleted or self.archived

@property
def private(self) -> bool:
"""Computed property for backward compatibility."""
return self.published_at is None

@property
def full_title(self):
Expand Down
11 changes: 8 additions & 3 deletions udata/core/dataset/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,15 @@ def get_queryset(model_cls):
if model_cls.__name__ == "Resource":
model_cls = getattr(udata_models, "Dataset")
params = {}
attrs = ("private", "deleted", "deleted_at")
for attr in attrs:
# Dataset uses published_at instead of private; other models still use private
if model_cls.__name__ == "Dataset":
params["published_at__ne"] = None
elif getattr(model_cls, "private", None):
params["private"] = False
# Filter out deleted/soft-deleted items
for attr in ("deleted", "deleted_at"):
if getattr(model_cls, attr, None):
params[attr] = False
params[attr] = None
# no_cache to avoid eating up too much RAM
return model_cls.objects.filter(**params).no_cache()

Expand Down
2 changes: 1 addition & 1 deletion udata/core/organization/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ def get(self, org):
args = dataset_parser.parse()
qs = Dataset.objects.owned_by(org)
if not OrganizationPrivatePermission(org).can():
qs = qs(private__ne=True)
qs = qs(published_at__ne=None)
return qs.order_by(args["sort"]).paginate(args["page"], args["page_size"])


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,16 @@

import logging

from udata.models import Dataset, Reuse

log = logging.getLogger(__name__)


def migrate(db):
log.info("Processing Reuse…")
result = db.reuse.update_many({"private": None}, {"$set": {"private": False}})
log.info(f"Fixed {result.modified_count} Reuse objects from private None to private False.")

count = Reuse.objects(private=None).update(private=False)
log.info(f"Fixed {count} Reuse objects from private None to private False.")

log.info("Processing Datasets…")
count = Dataset.objects(private=None).update(private=False)
log.info(f"Fixed {count} Dataset objects from private None to private False.")
log.info("Processing Dataset…")
result = db.dataset.update_many({"private": None}, {"$set": {"private": False}})
log.info(f"Fixed {result.modified_count} Dataset objects from private None to private False.")

log.info("Done")
53 changes: 53 additions & 0 deletions udata/migrations/2025-12-10-add-published_at-field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
Migration: Add published_at field and populate from private field history.

Phase 1 (fast): Bulk update all public datasets with created_at_internal
Phase 2 (slow): Refine dates using activity history for more accuracy
"""

import logging

import click

from udata.core.dataset.activities import UserUpdatedDataset
from udata.models import Dataset

log = logging.getLogger(__name__)


def migrate(db):
log.info("Phase 1: Bulk update with created_at_internal...")

# Avoid downtime: set a default value immediately so the system stays functional
result = db.dataset.update_many(
{"private": False, "published_at": {"$exists": False}},
[{"$set": {"published_at": "$created_at_internal"}}],
)
log.info(f"Phase 1 done: {result.modified_count} datasets updated")

log.info("Phase 2: Refining dates from activity history...")

datasets = Dataset.objects(published_at__ne=None).only("id", "created_at_internal")
count = datasets.count()
updated = 0

with click.progressbar(
datasets.no_cache().timeout(False), length=count, label="Refining dates"
) as progress:
for dataset in progress:
activity = (
UserUpdatedDataset.objects(related_to=dataset.id, changes="private")
.order_by("-created_at")
.only("created_at")
.first()
)

if activity and activity.created_at != dataset.created_at_internal:
Dataset.objects(id=dataset.id).update_one(set__published_at=activity.created_at)
updated += 1

log.info(f"Phase 2 done: {updated} datasets refined with activity date")

log.info("Phase 3: Remove deprecated private field...")
result = db.dataset.update_many({"private": {"$exists": True}}, {"$unset": {"private": ""}})
log.info(f"Phase 3 done: {result.modified_count} datasets cleaned")
2 changes: 1 addition & 1 deletion udata/tests/api/test_activities_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_activity_api_list_with_private(self) -> None:
"""It should fetch an activity list from the API"""
activities: list[Activity] = [
FakeDatasetActivity.objects.create(
actor=UserFactory(), related_to=DatasetFactory(private=True)
actor=UserFactory(), related_to=DatasetFactory(published_at=None)
),
FakeReuseActivity.objects.create(
actor=UserFactory(), related_to=ReuseFactory(private=True)
Expand Down
Loading