Skip to content

Commit 1460d1b

Browse files
committed
return one per resource guid if resource has no indexed guid
1 parent 35c67dd commit 1460d1b

2 files changed

Lines changed: 23 additions & 9 deletions

File tree

admin/share_reindex/views.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from osf.models import Guid
66
from django.db.models import F
77
from urllib.parse import urlencode
8-
from api.share.utils import get_not_indexed_guids_for_resource, task__reindex_resource_into_share
8+
from api.share.utils import get_not_indexed_guids_for_resource_with_no_indexed_guid, task__reindex_resource_into_share
99

1010
class FailedShareIndexedGuidList(PermissionRequiredMixin, ListView):
1111
paginate_by = 25
@@ -17,7 +17,7 @@ class FailedShareIndexedGuidList(PermissionRequiredMixin, ListView):
1717
def get_queryset(self):
1818
resource_type = self.request.GET.get('type', 'projects')
1919
# use custom_id because _id fails to render in django template
20-
return get_not_indexed_guids_for_resource(resource_type).annotate(custom_id=F('_id'))
20+
return get_not_indexed_guids_for_resource_with_no_indexed_guid(resource_type).annotate(custom_id=F('_id'))
2121

2222
def get_context_data(self, **kwargs):
2323
query_set = kwargs.pop('object_list', self.object_list)

api/share/utils.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import logging
77

88
from django.apps import apps
9-
from django.db.models import Q
9+
from django.db.models import Q, Exists, OuterRef
1010
from django.contrib.contenttypes.models import ContentType
1111
from website.settings import CeleryConfig
1212
from celery.utils.time import get_exponential_backoff_interval
@@ -133,14 +133,14 @@ def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name
133133

134134
@celery_app.task
135135
def task__reindex_resource_into_share(resource_type: str, limit: int):
136-
guids = get_not_indexed_guids_for_resource(resource_type).values_list('_id', flat=True)[:limit].iterator()
136+
guids = get_not_indexed_guids_for_resource_with_no_indexed_guid(resource_type).values_list('_id', flat=True)[:limit].iterator()
137137
for guid in guids:
138138
task__update_share.apply_async(
139139
kwargs={'guid': guid, 'is_backfill': True},
140140
queue=CeleryConfig.task_low_queue,
141141
)
142142

143-
def get_not_indexed_guids_for_resource(resource_type: str):
143+
def get_not_indexed_guids_for_resource_with_no_indexed_guid(resource_type: str):
144144
from osf.models import Guid, Registration, Preprint, Node, OSFUser
145145
resource_mapper = {
146146
'projects': (Node, Q(is_public=True) & Q(deleted__isnull=True)),
@@ -150,11 +150,25 @@ def get_not_indexed_guids_for_resource(resource_type: str):
150150
}
151151
resource_model, query = resource_mapper.get(resource_type, 'projects')
152152
node_type = ContentType.objects.get_for_model(resource_model)
153-
public_node_ids = resource_model.objects.filter(query).values_list('id', flat=True)
154-
return Guid.objects.filter(
155-
Q(has_been_indexed=False) | Q(has_been_indexed__isnull=True),
153+
# Check if guid belong to a public resource
154+
is_public_resource = resource_model.objects.filter(
155+
query,
156+
id=OuterRef('object_id'),
157+
)
158+
# Check if specific resource has any indexed guids
159+
has_indexed_guid = Guid.objects.filter(
156160
content_type=node_type,
157-
object_id__in=public_node_ids,
161+
object_id=OuterRef('object_id'),
162+
has_been_indexed=True,
163+
)
164+
return (
165+
Guid.objects
166+
.exclude(Exists(has_indexed_guid)) # exclude guid if its resource has any indexed guid
167+
.exclude(has_been_indexed=True) # exclude other guids if its indexed that belong to other resource_type
168+
.filter(content_type=node_type)
169+
.filter(Exists(is_public_resource)) # keep guid if the resource is public for specific content_type
170+
.order_by('object_id', 'id')
171+
.distinct('object_id') # return the oldest created guid from several
158172
)
159173

160174
def pls_send_trove_record(osf_item, *, is_backfill: bool, osfmap_partition: OsfmapPartition):

0 commit comments

Comments
 (0)