diff mbox series

[layerindex-web,3/5] layerindex/views.py: fix DuplicatesView timeout

Message ID 20260510004706.81282-3-tim.orling@konsulko.com
State New
Headers show
Series [layerindex-web,1/5] layerindex/views.py: fix StatsView statistics page timeout | expand

Commit Message

Tim Orling May 10, 2026, 12:47 a.m. UTC
Two performance problems caused the duplicates page to take over 115
seconds to respond at production data volumes, eventually causing clients
to disconnect (SIGPIPE / broken pipe):

1. Python list comprehension IN clause: each get_recipes/get_classes/
   get_incfiles method evaluated its 'dupes' queryset in Python with a
   list comprehension to build a filter:

     filter(pn__in=[item['pn'] for item in dupes])

   This loaded all matching values into memory and emitted a potentially
   huge SQL IN (...) literal. Replace with a Django ORM subquery by
   passing the queryset directly:

     filter(pn__in=dupes)   # dupes is a .values('pn') queryset

   Django translates this into a SQL subquery (WHERE pn IN (SELECT pn
   FROM ... GROUP BY pn HAVING COUNT(...) > 1)), which the database can
   optimise without a round-trip through Python.

2. N+1 queries: the template renders recipe.layerbranch.layer.name,
   class.layerbranch.layer.name and incfile.layerbranch.layer.name for
   every row. Without select_related these trigger one extra query per
   row. Add select_related('layerbranch__layer') to each queryset so the
   JOIN is done once up front.

[YOCTO #16175]

AI-Generated: Claude Cowork Sonnet 4.6
Signed-off-by: Tim Orling <tim.orling@konsulko.com>
---
 layerindex/views.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/layerindex/views.py b/layerindex/views.py
index 23bafd0..742e5d5 100644
--- a/layerindex/views.py
+++ b/layerindex/views.py
@@ -656,24 +656,30 @@  class DuplicatesView(TemplateView):
         init_qs = Recipe.objects.filter(layerbranch__branch__name=self.kwargs['branch'])
         if layer_ids:
             init_qs = init_qs.filter(layerbranch__layer__in=layer_ids)
-        dupes = init_qs.values('pn').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1)
-        qs = init_qs.all().filter(pn__in=[item['pn'] for item in dupes]).order_by('pn', 'layerbranch__layer', '-pv')
+        # Use a subquery instead of a Python list comprehension so that Django
+        # emits a single SQL subquery rather than loading all duplicate pn values
+        # into memory and building a potentially huge IN (...) clause.
+        # See: https://bugzilla.yoctoproject.org/show_bug.cgi?id=16175
+        dupes = init_qs.values('pn').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1).values('pn')
+        qs = init_qs.filter(pn__in=dupes).select_related('layerbranch__layer').order_by('pn', 'layerbranch__layer', '-pv')
         return recipes_preferred_count(qs)
 
     def get_classes(self, layer_ids):
         init_qs = BBClass.objects.filter(layerbranch__branch__name=self.kwargs['branch'])
         if layer_ids:
             init_qs = init_qs.filter(layerbranch__layer__in=layer_ids)
-        dupes = init_qs.values('name').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1)
-        qs = init_qs.all().filter(name__in=[item['name'] for item in dupes]).order_by('name', 'layerbranch__layer')
+        # Use a subquery instead of a Python list comprehension (see bug #16175)
+        dupes = init_qs.values('name').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1).values('name')
+        qs = init_qs.filter(name__in=dupes).select_related('layerbranch__layer').order_by('name', 'layerbranch__layer')
         return qs
 
     def get_incfiles(self, layer_ids):
         init_qs = IncFile.objects.filter(layerbranch__branch__name=self.kwargs['branch'])
         if layer_ids:
             init_qs = init_qs.filter(layerbranch__layer__in=layer_ids)
-        dupes = init_qs.values('path').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1)
-        qs = init_qs.all().filter(path__in=[item['path'] for item in dupes]).order_by('path', 'layerbranch__layer')
+        # Use a subquery instead of a Python list comprehension (see bug #16175)
+        dupes = init_qs.values('path').annotate(Count('layerbranch', distinct=True)).filter(layerbranch__count__gt=1).values('path')
+        qs = init_qs.filter(path__in=dupes).select_related('layerbranch__layer').order_by('path', 'layerbranch__layer')
         return qs
 
     def get_context_data(self, **kwargs):