-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathapi.py
More file actions
659 lines (567 loc) · 24.6 KB
/
api.py
File metadata and controls
659 lines (567 loc) · 24.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
"""
Components API (warning: UNSTABLE, in progress API)
These functions are often going to be simple-looking write operations, but there
is bookkeeping logic needed across multiple models to keep state consistent. You
can read from the models directly for various queries if necessary–we do this in
the Django Admin for instance. But you should NEVER mutate this app's models
directly, since there might be other related models that you may not know about.
Please look at the models.py file for more information about the kinds of data
are stored in this app.
"""
from __future__ import annotations
import mimetypes
from datetime import datetime
from enum import StrEnum, auto
from logging import getLogger
from pathlib import Path
from uuid import UUID
from django.db.models import Q, QuerySet
from django.db.transaction import atomic
from django.http.response import HttpResponse, HttpResponseNotFound
from ..media import api as media_api
from ..publishing import api as publishing_api
from .models import Component, ComponentType, ComponentVersion, ComponentVersionMedia
# The public API that will be re-exported by openedx_content.api
# is listed in the __all__ entries below. Internal helper functions that are
# private to this module should start with an underscore. If a function does not
# start with an underscore AND it is not in __all__, that function is considered
# to be callable only by other apps in the authoring package.
__all__ = [
"get_or_create_component_type",
"get_or_create_component_type_by_entity_key",
"create_component",
"create_component_version",
"create_next_component_version",
"create_component_and_version",
"get_component",
"get_component_by_key",
"get_component_by_uuid",
"get_component_version_by_uuid",
"component_exists_by_key",
"get_collection_components",
"get_components",
"create_component_version_media",
"look_up_component_version_media",
"AssetError",
"get_redirect_response_for_component_asset",
]
logger = getLogger()
def get_or_create_component_type(namespace: str, name: str) -> ComponentType:
"""
Get the ID of a ComponentType, and create if missing.
Caching Warning: Be careful about putting any caching decorator around this
function (e.g. ``lru_cache``). It's possible that incorrect cache values
could leak out in the event of a rollback–e.g. new types are introduced in
a large import transaction which later fails. You can safely cache the
results that come back from this function with a local dict in your import
process instead.#
"""
component_type, _created = ComponentType.objects.get_or_create(
namespace=namespace,
name=name,
)
return component_type
def get_or_create_component_type_by_entity_key(entity_key: str) -> tuple[ComponentType, str]:
"""
Get or create a ComponentType based on a full entity key string.
The entity key is expected to be in the format
``"{namespace}:{type_name}:{local_key}"``. This function will parse out the
``namespace`` and ``type_name`` parts and use those to get or create the
ComponentType.
Raises ValueError if the entity_key is not in the expected format.
"""
try:
namespace, type_name, local_key = entity_key.split(':', 2)
except ValueError as exc:
raise ValueError(
f"Invalid entity_key format: {entity_key!r}. "
"Expected format: '{namespace}:{type_name}:{local_key}'"
) from exc
return get_or_create_component_type(namespace, type_name), local_key
def create_component(
learning_package_id: int,
/,
component_type: ComponentType,
local_key: str,
created: datetime,
created_by: int | None,
*,
can_stand_alone: bool = True,
) -> Component:
"""
Create a new Component (an entity like a Problem or Video)
"""
key = f"{component_type.namespace}:{component_type.name}:{local_key}"
with atomic():
publishable_entity = publishing_api.create_publishable_entity(
learning_package_id,
key,
created,
created_by,
can_stand_alone=can_stand_alone
)
component = Component.objects.create(
publishable_entity=publishable_entity,
learning_package_id=learning_package_id,
component_type=component_type,
local_key=local_key,
)
return component
def create_component_version(
component_pk: int,
/,
version_num: int,
title: str,
created: datetime,
created_by: int | None,
) -> ComponentVersion:
"""
Create a new ComponentVersion
"""
with atomic():
publishable_entity_version = publishing_api.create_publishable_entity_version(
component_pk,
version_num=version_num,
title=title,
created=created,
created_by=created_by,
)
component_version = ComponentVersion.objects.create(
publishable_entity_version=publishable_entity_version,
component_id=component_pk,
)
return component_version
def create_next_component_version(
component_pk: int,
/,
media_to_replace: dict[str, int | None | bytes],
created: datetime,
title: str | None = None,
created_by: int | None = None,
*,
force_version_num: int | None = None,
ignore_previous_media: bool = False,
) -> ComponentVersion:
"""
Create a new ComponentVersion based on the most recent version.
Args:
component_pk (int): The primary key of the Component to version.
media_to_replace (dict): Mapping of file keys to Media IDs,
None (for deletion), or bytes (for new file media).
created (datetime): The creation timestamp for the new version.
title (str, optional): Title for the new version. If None, uses the previous version's title.
created_by (int, optional): User ID of the creator.
force_version_num (int, optional): If provided, overrides the automatic version number increment and sets
this version's number explicitly. Use this if you need to restore or import a version with a specific
version number, such as during data migration or when synchronizing with external systems.
ignore_previous_media (bool): If True, do not copy over media from the previous version.
Returns:
ComponentVersion: The newly created ComponentVersion instance.
A very common pattern for making a new ComponentVersion is going to be "make
it just like the last version, except changing these one or two things".
Before calling this, you should create any new media via the media
API or send the media bytes as part of ``media_to_replace`` values.
The ``media_to_replace`` dict is a mapping of strings representing the
local path/key for a file, to ``Media.id`` or media bytes values. Using
`None` for a value in this dict means to delete that key in the next version.
Make sure to wrap the function call on a atomic statement:
``with transaction.atomic():``
It is okay to mark entries for deletion that don't exist. For instance, if a
version has ``a.txt`` and ``b.txt``, sending a ``media_to_replace`` value
of ``{"a.txt": None, "c.txt": None}`` will remove ``a.txt`` from the next
version, leave ``b.txt`` alone, and will not error–even though there is no
``c.txt`` in the previous version. This is to make it a little more
convenient to remove paths (e.g. due to deprecation) without having to
always check for its existence first.
Why use force_version_num?
Normally, the version number is incremented automatically from the latest version. If you need to set a specific
version number (for example, when restoring from backup, importing legacy data, or synchronizing with another
system), use force_version_num to override the default behavior.
Why not use create_component_version?
The main reason is that we want to reuse the logic to create a static file component from a dictionary.
TODO: Have to add learning_downloadable info to this when it comes time to
support static asset download.
"""
# This needs to grab the highest version_num for this Publishable Entity.
# This will often be the Draft version, but not always. For instance, if
# an entity was soft-deleted, the draft would be None, but the version_num
# should pick up from the last edited version. Likewise, a Draft might get
# reverted to an earlier version, but we want the latest version_num when
# creating the next version.
component = Component.objects.get(pk=component_pk)
last_version = component.versioning.latest
if last_version is None:
next_version_num = 1
title = title or ""
else:
next_version_num = last_version.version_num + 1
if title is None:
title = last_version.title
if force_version_num is not None:
next_version_num = force_version_num
with atomic():
publishable_entity_version = publishing_api.create_publishable_entity_version(
component_pk,
version_num=next_version_num,
title=title,
created=created,
created_by=created_by,
)
component_version = ComponentVersion.objects.create(
publishable_entity_version=publishable_entity_version,
component_id=component_pk,
)
# First copy the new stuff over...
for key, media_pk_or_bytes in media_to_replace.items():
# If the media_pk is None, it means we want to remove the
# media represented by our key from the next version. Otherwise,
# we add our key->media_pk mapping to the next version.
if media_pk_or_bytes is not None:
if isinstance(media_pk_or_bytes, bytes):
file_path, file_media = key, media_pk_or_bytes
media_type_str, _encoding = mimetypes.guess_type(file_path)
# We use "application/octet-stream" as a generic fallback media type, per
# RFC 2046: https://datatracker.ietf.org/doc/html/rfc2046
media_type_str = media_type_str or "application/octet-stream"
media_type = media_api.get_or_create_media_type(media_type_str)
media = media_api.get_or_create_file_media(
component.learning_package.id,
media_type.id,
data=file_media,
created=created,
)
media_pk = media.pk
else:
media_pk = media_pk_or_bytes
ComponentVersionMedia.objects.create(
media_id=media_pk,
component_version=component_version,
key=key,
)
if ignore_previous_media:
return component_version
# Now copy any old associations that existed, as long as they aren't
# in conflict with the new stuff or marked for deletion.
last_version_media_mapping = ComponentVersionMedia.objects \
.filter(component_version=last_version)
for cvrc in last_version_media_mapping:
if cvrc.key not in media_to_replace:
ComponentVersionMedia.objects.create(
media_id=cvrc.media_id,
component_version=component_version,
key=cvrc.key,
)
return component_version
def create_component_and_version( # pylint: disable=too-many-positional-arguments
learning_package_id: int,
/,
component_type: ComponentType,
local_key: str,
title: str,
created: datetime,
created_by: int | None = None,
*,
can_stand_alone: bool = True,
) -> tuple[Component, ComponentVersion]:
"""
Create a Component and associated ComponentVersion atomically
"""
with atomic():
component = create_component(
learning_package_id,
component_type,
local_key,
created,
created_by,
can_stand_alone=can_stand_alone,
)
component_version = create_component_version(
component.pk,
version_num=1,
title=title,
created=created,
created_by=created_by,
)
return (component, component_version)
def get_component(component_pk: int, /) -> Component:
"""
Get Component by its primary key.
This is the same as the PublishableEntity's ID primary key.
"""
return Component.with_publishing_relations.get(pk=component_pk)
def get_component_by_key(
learning_package_id: int,
/,
namespace: str,
type_name: str,
local_key: str,
) -> Component:
"""
Get a Component by its unique (namespace, type, local_key) tuple.
"""
return Component.with_publishing_relations \
.get(
learning_package_id=learning_package_id,
component_type__namespace=namespace,
component_type__name=type_name,
local_key=local_key,
)
def get_component_by_uuid(uuid: UUID) -> Component:
return Component.with_publishing_relations.get(publishable_entity__uuid=uuid)
def get_component_version_by_uuid(uuid: UUID) -> ComponentVersion:
return (
ComponentVersion
.objects
.select_related(
"component",
"component__learning_package",
)
.get(publishable_entity_version__uuid=uuid)
)
def component_exists_by_key(
learning_package_id: int,
/,
namespace: str,
type_name: str,
local_key: str
) -> bool:
"""
Return True/False for whether a Component exists.
Note that a Component still exists even if it's been soft-deleted (there's
no current Draft version for it), or if it's been unpublished.
"""
try:
_component = Component.objects.only('pk', 'component_type').get(
learning_package_id=learning_package_id,
component_type__namespace=namespace,
component_type__name=type_name,
local_key=local_key,
)
return True
except Component.DoesNotExist:
return False
def get_components( # pylint: disable=too-many-positional-arguments
learning_package_id: int,
/,
draft: bool | None = None,
published: bool | None = None,
namespace: str | None = None,
type_names: list[str] | None = None,
draft_title: str | None = None,
published_title: str | None = None,
) -> QuerySet[Component]:
"""
Fetch a QuerySet of Components for a LearningPackage using various filters.
This method will pre-load all the relations that we need in order to get
info from the Component's draft and published versions, since we'll be
referencing these a lot.
"""
qset = Component.with_publishing_relations \
.filter(learning_package_id=learning_package_id) \
.order_by('pk')
if draft is not None:
qset = qset.filter(publishable_entity__draft__version__isnull=not draft)
if published is not None:
qset = qset.filter(publishable_entity__published__version__isnull=not published)
if namespace is not None:
qset = qset.filter(component_type__namespace=namespace)
if type_names is not None:
qset = qset.filter(component_type__name__in=type_names)
if draft_title is not None:
qset = qset.filter(
Q(publishable_entity__draft__version__title__icontains=draft_title) |
Q(local_key__icontains=draft_title)
)
if published_title is not None:
qset = qset.filter(
Q(publishable_entity__published__version__title__icontains=published_title) |
Q(local_key__icontains=published_title)
)
return qset
def get_collection_components(
learning_package_id: int,
collection_code: str,
) -> QuerySet[Component]:
"""
Returns a QuerySet of Components relating to the PublishableEntities in a Collection.
Components have a one-to-one relationship with PublishableEntity, but the reverse may not always be true.
"""
return Component.objects.filter(
learning_package_id=learning_package_id,
publishable_entity__collections__collection_code=collection_code,
).order_by('pk')
def look_up_component_version_media(
learning_package_key: str,
component_key: str,
version_num: int,
key: Path,
) -> ComponentVersionMedia:
"""
Look up ComponentVersionMedia by human readable keys.
Can raise a django.core.exceptions.ObjectDoesNotExist error if there is no
matching ComponentVersionMedia.
This API call was only used in our proof-of-concept assets media server, and
I don't know if we wantto make it a part of the public interface.
"""
queries = (
Q(component_version__component__learning_package__key=learning_package_key)
& Q(component_version__component__publishable_entity__key=component_key)
& Q(component_version__publishable_entity_version__version_num=version_num)
& Q(key=key)
)
return ComponentVersionMedia.objects \
.select_related(
"media",
"media__media_type",
"component_version",
"component_version__component",
"component_version__component__learning_package",
).get(queries)
def create_component_version_media(
component_version_id: int,
media_id: int,
/,
key: str,
) -> ComponentVersionMedia:
"""
Add a Media to the given ComponentVersion
We don't allow keys that would be absolute paths, e.g. ones that start with
'/'. Storing these causes headaches with building relative paths and because
of mismatches with things that expect a leading slash and those that don't.
So for safety and consistency, we strip off leading slashes and emit a
warning when we do.
"""
if key.startswith('/'):
logger.warning(
"Absolute paths are not supported: "
f"removed leading '/' from ComponentVersion {component_version_id} "
f"media key: {repr(key)} (media_id: {media_id})"
)
key = key.lstrip('/')
cvrc, _created = ComponentVersionMedia.objects.get_or_create(
component_version_id=component_version_id,
media_id=media_id,
key=key,
)
return cvrc
class AssetError(StrEnum):
"""Error codes related to fetching ComponentVersion assets."""
ASSET_PATH_NOT_FOUND_FOR_COMPONENT_VERSION = auto()
ASSET_HAS_NO_DOWNLOAD_FILE = auto()
def _get_component_version_info_headers(component_version: ComponentVersion) -> dict[str, str]:
"""
These are the headers we can derive based on a valid ComponentVersion.
These headers are intended to ease development and debugging, by showing
where this static asset is coming from. These headers will work even if
the asset path does not exist for this particular ComponentVersion.
"""
component = component_version.component
learning_package = component.learning_package
return {
# Component
"X-Open-edX-Component-Key": component.publishable_entity.key,
"X-Open-edX-Component-Uuid": component.uuid,
# Component Version
"X-Open-edX-Component-Version-Uuid": component_version.uuid,
"X-Open-edX-Component-Version-Num": str(component_version.version_num),
# Learning Package
"X-Open-edX-Learning-Package-Key": learning_package.key,
"X-Open-edX-Learning-Package-Uuid": learning_package.uuid,
}
def get_redirect_response_for_component_asset(
component_version_uuid: UUID,
asset_path: Path,
public: bool = False,
) -> HttpResponse:
"""
``HttpResponse`` for a reverse-proxy to serve a ``ComponentVersion`` asset.
:param component_version_uuid: ``UUID`` of the ``ComponentVersion`` that the
asset is part of.
:param asset_path: Path to the asset being requested.
:param public: Is this asset going to be made available without auth checks?
If ``True``, this will return an ``HttpResponse`` that can be cached in
a CDN and shared across many clients.
**Response Codes**
If the asset exists for this ``ComponentVersion``, this function will return
an ``HttpResponse`` with a status code of ``200``.
If the specified asset does not exist for this ``ComponentVersion``, or if
the ``ComponentVersion`` itself does not exist, the response code will be
``404``.
This function does not do auth checking of any sort. It will never return
a ``401`` or ``403`` response code. That is by design. Figuring out who is
making the request and whether they have permission to do so is the
responsiblity of whatever is calling this function.
**Metadata Headers**
The ``HttpResponse`` returned by this function will have headers describing
the asset and the ``ComponentVersion`` it belongs to (if it exists):
* ``Content-Type``
* ``Etag`` (this will be the asset's hash digest)
* ``X-Open-edX-Component-Key``
* ``X-Open-edX-Component-Uuid``
* ``X-Open-edX-Component-Version-Uuid``
* ``X-Open-edX-Component-Version-Num``
* ``X-Open-edX-Learning-Package-Key``
* ``X-Open-edX-Learning-Package-Uuid``
**Asset Redirection**
For performance reasons, the ``HttpResponse`` object returned by this
function does not contain the actual media data of the asset. It requires
an appropriately configured reverse proxy server that handles the
``X-Accel-Redirect`` header (both Caddy and Nginx support this).
.. warning::
If you add any headers here, you may need to add them in the "media"
service container's reverse proxy configuration. In Tutor, this is a
Caddyfile. All non-standard HTTP headers should be prefixed with
``X-Open-edX-``.
"""
# Helper to generate error header messages.
def _error_header(error: AssetError) -> dict[str, str]:
return {"X-Open-edX-Error": str(error)}
# Check: Does the ComponentVersion exist?
try:
component_version = (
ComponentVersion
.objects
.select_related("component", "component__learning_package")
.get(publishable_entity_version__uuid=component_version_uuid)
)
except ComponentVersion.DoesNotExist:
# No need to add headers here, because no ComponentVersion was found.
logger.error(f"Asset Not Found: No ComponentVersion with UUID {component_version_uuid}")
return HttpResponseNotFound()
# At this point we know that the ComponentVersion exists, so we can build
# those headers...
info_headers = _get_component_version_info_headers(component_version)
# Check: Does the ComponentVersion have the requested asset (Media)?
try:
cv_media = component_version.componentversionmedia_set.get(key=asset_path)
except ComponentVersionMedia.DoesNotExist:
logger.error(f"ComponentVersion {component_version_uuid} has no asset {asset_path}")
info_headers.update(
_error_header(AssetError.ASSET_PATH_NOT_FOUND_FOR_COMPONENT_VERSION)
)
return HttpResponseNotFound(headers=info_headers)
# Check: Does the Media have a downloadable file, instead of just inline
# text? It's easy for us to grab this media and stream it to the user
# anyway, but we're explicitly not doing so because streaming large text
# fields from the database is less scalable, and we don't want to encourage
# that usage pattern.
media = cv_media.media
if not media.has_file:
logger.error(
f"ComponentVersion {component_version_uuid} has asset {asset_path}, "
"but it is not downloadable (has_file=False)."
)
info_headers.update(
_error_header(AssetError.ASSET_HAS_NO_DOWNLOAD_FILE)
)
return HttpResponseNotFound(headers=info_headers)
# At this point, we know that there is valid Media that we want to send.
# This adds Media-level headers, like the hash/etag and content type.
info_headers.update(media_api.get_media_info_headers(media))
# Recompute redirect headers (reminder: this should never be cached).
redirect_headers = media_api.get_redirect_headers(media.path, public)
logger.info(
"Asset redirect (uncached metadata): "
f"{component_version_uuid}/{asset_path} -> {redirect_headers}"
)
return HttpResponse(headers={**info_headers, **redirect_headers})