Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions core/sources/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,13 +680,10 @@ def last_mapping_update(self):

def get_mapped_sources(self, exclude_self=True):
"""Returns only direct mapped sources"""
source_ids = self.__get_mapped_source_ids()
queryset = Source.objects.filter(mappings_to__sources=self).distinct()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@paynejd this is way more expensive.
I ran the query plan for this query against CIEL latest:

postgres=# explain analyze SELECT DISTINCT "sources"."checksums", "sources"."id", "sources"."public_access", "sources"."created_at", "sources"."updated_at", "sources"."created_by_id", "sources"."updated_by_id", "sources"."is_active", "sources"."extras", "sources"."uri", "sources"."logo_path", "sources"."mnemonic", "sources"."version", "sources"."released", "sources"."retired", "sources"."is_latest_version", "sources"."name", "sources"."full_name", "sources"."default_locale", "sources"."supported_locales", "sources"."website", "sources"."description", "sources"."external_id", "sources"."organization_id", "sources"."user_id", "sources"."_background_process_ids", "sources"."canonical_url", "sources"."identifier", "sources"."contact", "sources"."jurisdiction", "sources"."publisher", "sources"."purpose", "sources"."copyright", "sources"."revision_date", "sources"."text", "sources"."snapshot", "sources"."experimental", "sources"."meta", "sources"."active_concepts", "sources"."active_mappings", "sources"."custom_validation_schema", "sources"."source_type", "sources"."content_type", "sources"."collection_reference", "sources"."hierarchy_meaning", "sources"."case_sensitive", "sources"."compositional", "sources"."version_needed", "sources"."hierarchy_root_id", "sources"."autoid_concept_mnemonic", "sources"."autoid_concept_external_id", "sources"."autoid_mapping_mnemonic", "sources"."autoid_mapping_external_id", "sources"."autoid_concept_mnemonic_start_from", "sources"."autoid_concept_external_id_start_from", "sources"."autoid_mapping_mnemonic_start_from", "sources"."autoid_mapping_external_id_start_from", "sources"."autoid_concept_name_external_id", "sources"."autoid_concept_description_external_id", "sources"."properties", "sources"."filters", "sources"."match_algorithms" FROM "sources" INNER JOIN "mappings" ON ("sources"."id" = "mappings"."to_source_id") INNER JOIN "mappings_sources" ON ("mappings"."id" = "mappings_sources"."mapping_id") WHERE ("mappings_sources"."source_id" = 22012 AND NOT ("sources"."id" = 22012));
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Unique  (cost=1295046.16..1387640.66 rows=341090 width=2188) (actual time=2600.595..5558.423 rows=49 loops=1)
   ->  Gather Merge  (cost=1295046.16..1334771.71 rows=341090 width=2188) (actual time=2600.594..5142.825 rows=324717 loops=1)
         Workers Planned: 2
         Workers Launched: 2
         ->  Sort  (cost=1294046.13..1294401.44 rows=142121 width=2188) (actual time=2534.285..3317.281 rows=108239 loops=3)
               Sort Key: sources.checksums, sources.id, sources.public_access, sources.created_at, sources.updated_at, sources.created_by_id, sources.updated_by_id, sources.is_active, sources.extras, sources.uri, sources.logo_path, sources.mnemonic, sources.version, sources.released, sources.retired, sources.is_latest_version, sources.name, sources.full_name, sources.default_locale, sources.supported_locales, sources.website, sources.description, sources.external_id, sources.organization_id, sources.user_id, sources._background_process_ids, sources.canonical_url, sources.identifier, sources.contact, sources.jurisdiction, sources.publisher, sources.purpose, sources.copyright, sources.revision_date, sources.text, sources.snapshot, sources.experimental, sources.meta, sources.active_concepts, sources.active_mappings, sources.custom_validation_schema, sources.source_type, sources.content_type, sources.collection_reference, sources.hierarchy_meaning, sources.case_sensitive, sources.compositional, sources.version_needed, sources.hierarchy_root_id, sources.autoid_concept_mnemonic, sources.autoid_concept_external_id, sources.autoid_mapping_mnemonic, sources.autoid_mapping_external_id, sources.autoid_concept_mnemonic_start_from, sources.autoid_concept_external_id_start_from, sources.autoid_mapping_mnemonic_start_from, sources.autoid_mapping_external_id_start_from, sources.autoid_concept_name_external_id, sources.autoid_concept_description_external_id, sources.properties, sources.filters, sources.match_algorithms
               Sort Method: external merge  Disk: 65120kB
               Worker 0:  Sort Method: external merge  Disk: 61184kB
               Worker 1:  Sort Method: external merge  Disk: 66696kB
               ->  Nested Loop  (cost=3800.96..1012767.88 rows=142121 width=2188) (actual time=562.536..933.251 rows=108239 loops=3)
                     ->  Nested Loop  (cost=3800.66..1008870.55 rows=142139 width=8) (actual time=562.384..898.308 rows=108244 loops=3)
                           ->  Parallel Bitmap Heap Scan on mappings_sources  (cost=3800.23..183584.87 rows=142139 width=8) (actual time=2.239..20.165 rows=108244 loops=3)
                                 Recheck Cond: (source_id = 22012)
                                 Heap Blocks: exact=1108
                                 ->  Bitmap Index Scan on mappings_sources_source_id_288454a2  (cost=0.00..3714.94 rows=341134 width=0) (actual time=6.333..6.334 rows=324731 loops=1)
                                       Index Cond: (source_id = 22012)
                           ->  Index Scan using mappings_pkey on mappings  (cost=0.44..5.81 rows=1 width=16) (actual time=0.003..0.003 rows=1 loops=324731)
                                 Index Cond: (id = mappings_sources.mapping_id)
                     ->  Memoize  (cost=0.29..3.79 rows=1 width=2188) (actual time=0.000..0.000 rows=1 loops=324731)
                           Cache Key: mappings.to_source_id
                           Cache Mode: logical
                           Hits: 109702  Misses: 45  Evictions: 0  Overflows: 0  Memory Usage: 32kB
                           Worker 0:  Hits: 102953  Misses: 44  Evictions: 0  Overflows: 0  Memory Usage: 31kB
                           Worker 1:  Hits: 111942  Misses: 45  Evictions: 0  Overflows: 0  Memory Usage: 32kB
                           ->  Index Scan using sources_pkey on sources  (cost=0.28..3.78 rows=1 width=2188) (actual time=0.051..0.051 rows=1 loops=134)
                                 Index Cond: (id = mappings.to_source_id)
                                 Filter: (id <> 22012)
 Planning Time: 49.649 ms
 JIT:
   Functions: 58
   Options: Inlining true, Optimization true, Expressions true, Deforming true
   Timing: Generation 45.586 ms, Inlining 559.338 ms, Optimization 515.667 ms, Emission 605.110 ms, Total 1725.701 ms
 Execution Time: 5941.415 ms
(33 rows)

While the existing implementation with subquery:

  1. Get source_ids
explain analyze SELECT "mappings"."to_source_id" FROM "mappings" INNER JOIN "mappings_sources" ON ("mappings"."id" = "mappings_sources"."mapping_id") WHERE "mappings_sources"."source_id" = 22012;
                                                                             QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Gather  (cost=4800.66..1043588.95 rows=341134 width=8) (actual time=70.935..520.629 rows=324731 loops=1)
   Workers Planned: 2
   Workers Launched: 2
   ->  Nested Loop  (cost=3800.66..1008475.55 rows=142139 width=8) (actual time=71.048..470.223 rows=108244 loops=3)
         ->  Parallel Bitmap Heap Scan on mappings_sources  (cost=3800.23..183584.87 rows=142139 width=8) (actual time=2.560..25.744 rows=108244 loops=3)
               Recheck Cond: (source_id = 22012)
               Heap Blocks: exact=1456
               ->  Bitmap Index Scan on mappings_sources_source_id_288454a2  (cost=0.00..3714.94 rows=341134 width=0) (actual time=7.184..7.185 rows=324731 loops=1)
                     Index Cond: (source_id = 22012)
         ->  Index Scan using mappings_pkey on mappings  (cost=0.44..5.80 rows=1 width=16) (actual time=0.003..0.003 rows=1 loops=324731)
               Index Cond: (id = mappings_sources.mapping_id)
 Planning Time: 2.020 ms
 JIT:
   Functions: 21
   Options: Inlining true, Optimization true, Expressions true, Deforming true
   Timing: Generation 3.155 ms, Inlining 53.329 ms, Optimization 87.317 ms, Emission 64.267 ms, Total 208.068 ms
 Execution Time: 529.079 ms
(17 rows)
  1. Get Sources:
explain analyze SELECT "sources"."checksums", "sources"."id", "sources"."public_access", "sources"."created_at", "sources"."updated_at", "sources"."created_by_id", "sources"."updated_by_id", "sources"."is_active", "sources"."extras", "sources"."uri", "sources"."logo_path", "sources"."mnemonic", "sources"."version", "sources"."released", "sources"."retired", "sources"."is_latest_version", "sources"."name", "sources"."full_name", "sources"."default_locale", "sources"."supported_locales", "sources"."website", "sources"."description", "sources"."external_id", "sources"."organization_id", "sources"."user_id", "sources"."_background_process_ids", "sources"."canonical_url", "sources"."identifier", "sources"."contact", "sources"."jurisdiction", "sources"."publisher", "sources"."purpose", "sources"."copyright", "sources"."revision_date", "sources"."text", "sources"."snapshot", "sources"."experimental", "sources"."meta", "sources"."active_concepts", "sources"."active_mappings", "sources"."custom_validation_schema", "sources"."source_type", "sources"."content_type", "sources"."collection_reference", "sources"."hierarchy_meaning", "sources"."case_sensitive", "sources"."compositional", "sources"."version_needed", "sources"."hierarchy_root_id", "sources"."autoid_concept_mnemonic", "sources"."autoid_concept_external_id", "sources"."autoid_mapping_mnemonic", "sources"."autoid_mapping_external_id", "sources"."autoid_concept_mnemonic_start_from", "sources"."autoid_concept_external_id_start_from", "sources"."autoid_mapping_mnemonic_start_from", "sources"."autoid_mapping_external_id_start_from", "sources"."autoid_concept_name_external_id", "sources"."autoid_concept_description_external_id", "sources"."properties", "sources"."filters", "sources"."match_algorithms" FROM "sources" WHERE "sources"."id" IN (1412, 13, 14, 15, 1810, 19, 1811, 20, 18, 23, 22, 25, 1434, 26, 27, 1820, 28, 31, 32, 33, 34, 30, 29, 16933, 37, 39, 40, 41, 8873, 43, 44, 45, 42, 47, 1338, 8892, 1819, 712, 713, 21968, 337, 338, 339, 4064, 8551, 21, 21996, 22000, 1401);

Also on my local, the API /orgs/CIEL/sources/CIEL/latest/mapped-sources/?brief=true&limit=25

  1. current - 10.40 seconds
  2. this - DNF after 10 minutes. (tried multiple times)

Copy link
Copy Markdown
Contributor

@snyaggarwal snyaggarwal Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A better version is:

        mapped_source_ids = (
            self.get_mappings_queryset()
            .values_list('to_source_id', flat=True)
            .distinct()
        )
        queryset = Source.objects.filter(id__in=mapped_source_ids)
        if exclude_self:
            queryset = queryset.exclude(id=self.id)
        return queryset

Query plan

postgres=# explain analyze SELECT "sources"."checksums", "sources"."id", "sources"."public_access", "sources"."created_at", "sources"."updated_at", "sources"."created_by_id", "sources"."updated_by_id", "sources"."is_active", "sources"."extras", "sources"."uri", "sources"."logo_path", "sources"."mnemonic", "sources"."version", "sources"."released", "sources"."retired", "sources"."is_latest_version", "sources"."name", "sources"."full_name", "sources"."default_locale", "sources"."supported_locales", "sources"."website", "sources"."description", "sources"."external_id", "sources"."organization_id", "sources"."user_id", "sources"."_background_process_ids", "sources"."canonical_url", "sources"."identifier", "sources"."contact", "sources"."jurisdiction", "sources"."publisher", "sources"."purpose", "sources"."copyright", "sources"."revision_date", "sources"."text", "sources"."snapshot", "sources"."experimental", "sources"."meta", "sources"."active_concepts", "sources"."active_mappings", "sources"."custom_validation_schema", "sources"."source_type", "sources"."content_type", "sources"."collection_reference", "sources"."hierarchy_meaning", "sources"."case_sensitive", "sources"."compositional", "sources"."version_needed", "sources"."hierarchy_root_id", "sources"."autoid_concept_mnemonic", "sources"."autoid_concept_external_id", "sources"."autoid_mapping_mnemonic", "sources"."autoid_mapping_external_id", "sources"."autoid_concept_mnemonic_start_from", "sources"."autoid_concept_external_id_start_from", "sources"."autoid_mapping_mnemonic_start_from", "sources"."autoid_mapping_external_id_start_from", "sources"."autoid_concept_name_external_id", "sources"."autoid_concept_description_external_id", "sources"."properties", "sources"."filters", "sources"."match_algorithms" FROM "sources" WHERE ("sources"."id" IN (SELECT DISTINCT V0."to_source_id" FROM "mappings" V0 WHERE V0."id" IN (SELECT U0."mapping_id" FROM "mappings_sources" U0 WHERE U0."source_id" = 22012)) AND NOT ("sources"."id" = 22012));
                                                                                   QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Nested Loop  (cost=1044442.07..1044918.81 rows=90 width=2188) (actual time=421.628..424.490 rows=49 loops=1)
   ->  HashAggregate  (cost=1044441.79..1044442.69 rows=90 width=8) (actual time=421.585..424.263 rows=50 loops=1)
         Group Key: v0.to_source_id
         Batches: 1  Memory Usage: 32kB
         ->  Gather  (cost=4800.66..1043588.95 rows=341134 width=8) (actual time=188.452..406.243 rows=324731 loops=1)
               Workers Planned: 2
               Workers Launched: 2
               ->  Nested Loop  (cost=3800.66..1008475.55 rows=142139 width=8) (actual time=110.774..322.360 rows=108244 loops=3)
                     ->  Parallel Bitmap Heap Scan on mappings_sources u0  (cost=3800.23..183584.87 rows=142139 width=8) (actual time=1.832..10.844 rows=108244 loops=3)
                           Recheck Cond: (source_id = 22012)
                           Heap Blocks: exact=1095
                           ->  Bitmap Index Scan on mappings_sources_source_id_288454a2  (cost=0.00..3714.94 rows=341134 width=0) (actual time=5.166..5.166 rows=324731 loops=1)
                                 Index Cond: (source_id = 22012)
                     ->  Index Scan using mappings_pkey on mappings v0  (cost=0.44..5.80 rows=1 width=16) (actual time=0.002..0.002 rows=1 loops=324731)
                           Index Cond: (id = u0.mapping_id)
   ->  Index Scan using sources_pkey on sources  (cost=0.28..5.28 rows=1 width=2188) (actual time=0.004..0.004 rows=1 loops=50)
         Index Cond: (id = v0.to_source_id)
         Filter: (id <> 22012)
 Planning Time: 0.741 ms
 JIT:
   Functions: 33
   Options: Inlining true, Optimization true, Expressions true, Deforming true
   Timing: Generation 2.490 ms, Inlining 62.419 ms, Optimization 142.723 ms, Emission 121.482 ms, Total 329.114 ms
 Execution Time: 425.860 ms
(24 rows)

For CIEL latest, this runs in ~1.5 seconds

if exclude_self:
source_ids = set(source_ids) - {self.id}
return Source.objects.filter(id__in=source_ids)

def __get_mapped_source_ids(self):
return self.mappings.values_list('to_source_id', flat=True)
queryset = queryset.exclude(id=self.id)
return queryset

def clone_resources(self, user, concepts, mappings, **kwargs):
from core.mappings.models import Mapping
Expand Down
Loading