@@ -656,24 +656,30 @@ class DuplicatesView(TemplateView):
init_qs = Recipe.objects.filter(layerbranch__branch__name=self.kwargs['branch'])
if layer_ids:
init_qs = init_qs.filter(layerbranch__layer__in=layer_ids)
- dupes = init_qs.values('pn').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1)
- qs = init_qs.all().filter(pn__in=[item['pn'] for item in dupes]).order_by('pn', 'layerbranch__layer', '-pv')
+ # Use a subquery instead of a Python list comprehension so that Django
+ # emits a single SQL subquery rather than loading all duplicate pn values
+ # into memory and building a potentially huge IN (...) clause.
+ # See: https://bugzilla.yoctoproject.org/show_bug.cgi?id=16175
+ dupes = init_qs.values('pn').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1).values('pn')
+ qs = init_qs.filter(pn__in=dupes).select_related('layerbranch__layer').order_by('pn', 'layerbranch__layer', '-pv')
return recipes_preferred_count(qs)
def get_classes(self, layer_ids):
init_qs = BBClass.objects.filter(layerbranch__branch__name=self.kwargs['branch'])
if layer_ids:
init_qs = init_qs.filter(layerbranch__layer__in=layer_ids)
- dupes = init_qs.values('name').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1)
- qs = init_qs.all().filter(name__in=[item['name'] for item in dupes]).order_by('name', 'layerbranch__layer')
+ # Use a subquery instead of a Python list comprehension (see bug #16175)
+ dupes = init_qs.values('name').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1).values('name')
+ qs = init_qs.filter(name__in=dupes).select_related('layerbranch__layer').order_by('name', 'layerbranch__layer')
return qs
def get_incfiles(self, layer_ids):
init_qs = IncFile.objects.filter(layerbranch__branch__name=self.kwargs['branch'])
if layer_ids:
init_qs = init_qs.filter(layerbranch__layer__in=layer_ids)
- dupes = init_qs.values('path').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1)
- qs = init_qs.all().filter(path__in=[item['path'] for item in dupes]).order_by('path', 'layerbranch__layer')
+ # Use a subquery instead of a Python list comprehension (see bug #16175)
+ dupes = init_qs.values('path').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1).values('path')
+ qs = init_qs.filter(path__in=dupes).select_related('layerbranch__layer').order_by('path', 'layerbranch__layer')
return qs
def get_context_data(self, **kwargs):
Two performance problems caused the duplicates page to take over 115 seconds to respond at production data volumes, eventually causing clients to disconnect (SIGPIPE / broken pipe): 1. Python list comprehension IN clause: each get_recipes/get_classes/ get_incfiles method evaluated its 'dupes' queryset in Python with a list comprehension to build a filter: filter(pn__in=[item['pn'] for item in dupes]) This loaded all matching values into memory and emitted a potentially huge SQL IN (...) literal. Replace with a Django ORM subquery by passing the queryset directly: filter(pn__in=dupes) # dupes is a .values('pn') queryset Django translates this into a SQL subquery (WHERE pn IN (SELECT pn FROM ... GROUP BY pn HAVING COUNT(...) > 1)), which the database can optimise without a round-trip through Python. 2. N+1 queries: the template renders recipe.layerbranch.layer.name, class.layerbranch.layer.name and incfile.layerbranch.layer.name for every row. Without select_related these trigger one extra query per row. Add select_related('layerbranch__layer') to each queryset so the JOIN is done once up front. [YOCTO #16175] AI-Generated: Claude Cowork Sonnet 4.6 Signed-off-by: Tim Orling <tim.orling@konsulko.com> --- layerindex/views.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-)