Skip to content

Commit 7486140

Browse files
committed
add task__reindex_resource_into_share to reindex into share guids with has_been_indexed None or False and public referents ...
1 parent cb3e548 commit 7486140

2 files changed

Lines changed: 35 additions & 29 deletions

File tree

admin/share_reindex/views.py

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33
from django.shortcuts import redirect
44
from django.views.generic import ListView, View
55
from osf.models import Guid
6-
from django.db.models import F, Q
7-
from django.contrib.contenttypes.models import ContentType
8-
from osf.models import Registration, Preprint, Node, OSFUser
6+
from django.db.models import F
97
from urllib.parse import urlencode
10-
8+
from api.share.utils import get_not_indexed_guids_for_resource, task__reindex_resource_into_share
119

1210
class FailedShareIndexedGuidList(PermissionRequiredMixin, ListView):
1311
paginate_by = 25
@@ -18,28 +16,10 @@ class FailedShareIndexedGuidList(PermissionRequiredMixin, ListView):
1816

1917
def get_queryset(self):
2018
resource_type = self.request.GET.get('type', 'projects')
21-
resource_mapper = {
22-
'projects': (Node, Q(is_public=True)),
23-
'preprints': (Preprint, Q(is_public=True)),
24-
'registries': (Registration, Q(is_public=True)),
25-
'users': (OSFUser, Q(is_active=True))
26-
}
27-
28-
resource_model, query = resource_mapper.get(resource_type)
29-
30-
node_type = ContentType.objects.get_for_model(resource_model)
31-
public_node_ids = resource_model.objects.filter(query).values_list('id', flat=True)
32-
# import pydevd_pycharm
33-
# pydevd_pycharm.settrace('host.docker.internal', port=1234, stdout_to_server=True, stderr_to_server=True)
34-
return Guid.objects.filter(
35-
Q(has_been_indexed=False) | Q(has_been_indexed=None),
36-
content_type=node_type,
37-
object_id__in=public_node_ids
38-
).annotate(custom_id=F('_id'))
19+
# use custom_id because _id fails to render in django template
20+
return get_not_indexed_guids_for_resource(resource_type).annotate(custom_id=F('_id'))
3921

4022
def get_context_data(self, **kwargs):
41-
# import pydevd_pycharm
42-
# pydevd_pycharm.settrace('host.docker.internal', port=1234, stdout_to_server=True, stderr_to_server=True)
4323
query_set = kwargs.pop('object_list', self.object_list)
4424
page_size = self.get_paginate_by(query_set)
4525
paginator, page, query_set, is_paginated = self.paginate_queryset(query_set, page_size)
@@ -58,8 +38,6 @@ def get_context_data(self, **kwargs):
5838
kwargs.setdefault('resource_guid_reindex', resource_type_guid_reindex.get(resource_type))
5939
status_msg = f'Reindex of {resource_type} started, please check in several minutes.' if self.request.GET.get('status') == 'indexing' else ''
6040
kwargs.setdefault('share_reindex_message', status_msg)
61-
# import pydevd_pycharm
62-
# pydevd_pycharm.settrace('host.docker.internal', port=1234, stdout_to_server=True, stderr_to_server=True)
6341
return super().get_context_data(**kwargs)
6442

6543

@@ -68,10 +46,9 @@ class FailedShareIndexedGuidReindex(PermissionRequiredMixin, View):
6846
raise_exception = True
6947

7048
def post(self, request, *args, **kwargs):
71-
# import pydevd_pycharm
72-
# pydevd_pycharm.settrace('host.docker.internal', port=1234, stdout_to_server=True, stderr_to_server=True)
73-
# 1. Get the guid from the URL string
7449
resource_type = self.kwargs.get('resource_type')
50+
# reindex 100_000 guids in background task for specific resource_type and resource is public
51+
task__reindex_resource_into_share.delay(resource_type, 100_000)
7552
base_url = reverse('share_reindex:list')
7653
query_string = urlencode({'type': resource_type, 'status': 'indexing'})
7754
return redirect(f"{base_url}?{query_string}")

api/share/utils.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
import logging
77

88
from django.apps import apps
9+
from django.db.models import Q
10+
from django.contrib.contenttypes.models import ContentType
11+
from website.settings import CeleryConfig
912
from celery.utils.time import get_exponential_backoff_interval
1013
import requests
1114

@@ -128,6 +131,32 @@ def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name
128131
)
129132

130133

134+
@celery_app.task
135+
def task__reindex_resource_into_share(resource_type: str, limit: int):
136+
guids = get_not_indexed_guids_for_resource(resource_type).values_list('_id', flat=True)[:limit].iterator()
137+
for guid in guids:
138+
task__update_share.apply_async(
139+
kwargs={'guid': guid, 'is_backfill': True},
140+
queue=CeleryConfig.task_low_queue,
141+
)
142+
143+
def get_not_indexed_guids_for_resource(resource_type: str):
144+
from osf.models import Guid, Registration, Preprint, Node, OSFUser
145+
resource_mapper = {
146+
'projects': (Node, Q(is_public=True) & Q(deleted__isnull=True)),
147+
'preprints': (Preprint, Q(is_public=True) & Q(is_published=True) & Q(deleted__isnull=True)),
148+
'registries': (Registration, Q(is_public=True) & Q(deleted__isnull=True)),
149+
'users': (OSFUser, Q(is_active=True) & Q(deleted__isnull=True)),
150+
}
151+
resource_model, query = resource_mapper.get(resource_type, 'projects')
152+
node_type = ContentType.objects.get_for_model(resource_model)
153+
public_node_ids = resource_model.objects.filter(query).values_list('id', flat=True)
154+
return Guid.objects.filter(
155+
Q(has_been_indexed=False) | Q(has_been_indexed=None),
156+
content_type=node_type,
157+
object_id__in=public_node_ids,
158+
)
159+
131160
def pls_send_trove_record(osf_item, *, is_backfill: bool, osfmap_partition: OsfmapPartition):
132161
try:
133162
_iri = osf_item.get_semantic_iri()

0 commit comments

Comments
 (0)