Friday, 24 December 2021

Annotation inside an annotation in Django Subquery?

I've got a few models and am trying to speed up the page where I list out users.

The issue is that I was leveraging model methods to display some of the data - but when I listed the Users out it was hitting the DB multiple times per User which ended up with hundreds of extra queries (thousands when there were thousands of User objects in the list) so it was a serious performance hit.

I've since began using annotate and prefetch_related which has cut the queries down significantly. I've just got one bit I can't figure out how to annotate.

I have a model method (on Summation model) I use to get a summary of Evaluation data for a user like this:

    def evaluations_summary(self):
        evaluations_summary = (
            self.evaluation_set.all()
            .values("evaluation_type__name")
            .annotate(Count("evaluation_type"))
        )
        return evaluations_summary

I'm trying to figure out how to annotate that particular query on a User object.

So the relationship looks like this User has multiple Summations, but only one is ever 'active', which is the one we display in the User list. Each Summation has multiple Evaluations - the summary of which we're trying to show as well.

Here is a summary of the relevant parts of code (including the Summation model method which gives an example of what is currently 'working' to display the data as needed) - I have also made a pastebin example for easier viewing.

# MODELS 
 
class User(AbstractUser):
    employee_no = models.IntegerField(default=1)
    ...all the other usual attributes...
 
class Summation(CreateUpdateMixin, CreateUpdateUserMixin):
    # CreateUpdateMixin adds 'created_at' & 'updated_at
    # CreateUpdateUserMixin adds 'created_by' & 'updated_by'
    employee = models.ForeignKey(
        User, on_delete=models.PROTECT, related_name="%(class)s_employee"
    )
    report_url = models.CharField(max_length=350, blank=True)
    ...other unimportant attributes...
 
    def evaluations_summary(self):
        evaluations_summary = (
            self.evaluation_set.all()
            .values("evaluation_type__name")
            .annotate(Count("evaluation_type"))
        )
        return evaluations_summary
 
 
class Evaluation(CreateUpdateMixin, CreateUpdateUserMixin):
    summation = models.ForeignKey(Summation, on_delete=models.PROTECT)
    evaluation_type = models.ForeignKey(
        EvaluationType, on_delete=models.PROTECT
    )
    evaluation_level = models.ForeignKey(
        EvaluationLevel, on_delete=models.PROTECT
    )
    evaluation_date = models.DateField(
        auto_now=False, auto_now_add=False, null=True, blank=True
    )
    published = models.BooleanField(default=False)
 
class EvaluationLevel(CreateUpdateMixin):
    name = models.CharField(max_length=50)
    description = models.CharField(max_length=50)    
 
class EvaluationType(CreateUpdateMixin):
    name = models.CharField(max_length=50)
    description = models.CharField(max_length=50)
    evaluation_levels = models.ManyToManyField(EvaluationLevel)   
 
 
# SERIALIZERS
 
class UserSerializer(serializers.HyperlinkedModelSerializer):
    multiple_locations = serializers.BooleanField()
    multiple_jobs = serializers.BooleanField()
    summation_status_due_date = serializers.DateField()
    summation_employee = SummationSerializer(many=True, read_only=True)
    evaluations_summary = serializers.SerializerMethodField()
 
    class Meta:
        model = User
        fields = [
            "url",
            "id",
            "username",
            "first_name",
            "last_name",
            "full_name",
            "email",
            "is_staff",
            "multiple_locations",
            "multiple_jobs",
            "summation_status_due_date",
            "summation_employee",
            "evaluations_summary",
        ]

     def get_evaluations_summary(self, obj):
        return (
            obj.summation_employee__evaluation_set.all()
            .values("evaluation_type__name")
            .annotate(Count("evaluation_type"))
        )
 
 
# CURRENT ANNOTATIONS
 
    # Subquerries for evaluation_summary
    active_summations = (
        Summation.objects.filter(employee=OuterRef("pk"), locked=False)
    )
    evaluations_set = (
        Evaluation.objects.filter(summation__in=active_summations)
        .order_by()
        .values("evaluation_type__name")
    )
    summary_set = evaluations_set.annotate(Count("evaluation_type"))
 
    # the 'summation_employee__evaluation_set' prefetch does not seem 
    # to make an impact on queries needed
    user_list = (
        User.objects.prefetch_related("summation_employee")
        .prefetch_related("summation_employee__evaluation_set")
        .filter(id__in=all_user_ids)
        # Get the total locations and if > 1, set multiple_locations to True
        .annotate(total_locations=Subquery(total_locations))
        .annotate(
            multiple_locations=Case(
                When(total_locations__gt=1, then=Value(True)),
                default=Value(False),
                output_field=BooleanField(),
            )
        )
        # Get the total jobs and if > 1 set mutiple_jobs to True
        .annotate(total_jobs=Subquery(total_jobs))
        .annotate(
            multiple_jobs=Case(
                When(total_jobs__gt=1, then=Value(True)),
                default=Value(False),
                output_field=BooleanField(),
            )
        )
        # Get the due_date of the summation from the SummationStatus object
        .annotate(
            summation_status_due_date=Subquery(
                summation_status.values("summation_due")
            )
        )
        # I need to add the annotation here for the 'evaluations_summary' to avoid
        # having the database hit for every user (which could possibly range into the
        # thousands in certain cases)
        # I have tried a number of ways to obtain what I'm looking for
        .annotate(
            evaluations_summary=Subquery(
                evaluations_set.order_by()
                .values("evaluation_type__name")
                .annotate(Count("evaluation_type"))
            )
        )
        # this annotation gives the error: Only one expression can be specified in the 
        # select list when the subquery is not introduced with EXISTS.
 

Is it even possible to transition that model method annotation?? Am I close?



from Annotation inside an annotation in Django Subquery?

No comments:

Post a Comment