Friday, 28 April 2023

Django using prefetch_related to reduce queries

I am trying to understand how I can improve the following query:

class PDFUploadRequestViewSet(viewsets.ModelViewSet):

    def get_queryset(self):
        project_id = self.request.META.get('HTTP_PROJECT_ID', None)
        if project_id:
            return PDFUploadRequest.objects.filter(project_id=project_id)
        else:
            return PDFUploadRequest.objects.all()

    def get_serializer_class(self):
        if self.action == 'list':
            return PDFUploadRequestListSerializer
        else:
            return self.serializer_class

The issue is that the more PDFPageImage objects are in the DB then it creates separate query for each of them thus slowing down the request. If there is only one value if PDFPageImage related to given PDFUploadRequest then its pretty fast, but for each additional value it is producing extra query and after doing some research I found out that prefetch_related might somehow help with this, but I have not been able to figure out how to use it with my models.

This is how the model for PDFUploadRequest looks like:

class PDFUploadRequest(models.Model, BaseStatusClass):
    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
    file = models.FileField(upload_to='uploaded_pdf')
    file_name = models.CharField(max_length=255)
    status = models.CharField(
        max_length=50,
        choices=BaseStatusClass.PDF_STATUS_CHOICES,
        default=BaseStatusClass.UPLOADED,
    )
    completed = models.DateTimeField(null=True)
    processing_started = models.DateTimeField(null=True)
    text = models.TextField(default=None, null=True, blank=True)
    owner = models.ForeignKey(User, related_name='pdf_requests', on_delete=models.PROTECT, null=True, default=None)
    project = models.ForeignKey(Project, related_name='pdf_requests', on_delete=models.PROTECT, null=True, default=None)

    class Meta:
        ordering = ['-created']
    def no_of_pages(self):
        return self.pdf_page_images.count()
    def time_taken(self):
        if self.completed and self.processing_started:
            return self.completed - self.processing_started

And this is the related model that I think is causing issues:

class PDFPageImage(models.Model, BaseStatusClass):
    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
    pdf_request = models.ForeignKey(PDFUploadRequest, related_name="pdf_page_images", on_delete=models.CASCADE)
    image = models.ImageField()
    status = models.CharField(
        max_length=50,
        choices=BaseStatusClass.PDF_STATUS_CHOICES,
        default=BaseStatusClass.UPLOADED,
    )
    page_number = models.IntegerField(null=True, blank=True, default=None)
   
    class Meta:
        ordering = ['page_number']
        constraints = [
            models.UniqueConstraint(fields=['pdf_request', 'page_number'],
                                    condition=models.Q(deleted=False),
                                    name='pdf_request_and_page_number_unique')
        ]

Here is the serializer:

class PDFUploadRequestSerializer(serializers.ModelSerializer):

    pdf_page_images = PDFPageImageSerializer(many=True, read_only=True)


    class Meta:
        model = PDFUploadRequest
        fields = ('id', 'file','file_name', 'status', 'pdf_page_images',
                  , 'owner', 'project')
        read_only_fields = ('file_name', 'pdf_page_images', 'text',
                           'owner', 'project')

I have tried using prefetch_related on the PDFPageImage model:

PDFUploadRequest.objects.filter(project_id=project_id).prefetch_related("pdf_page_images")

But I dont think it is doing anything. Any idea what can I do to reduce the query times here?



from Django using prefetch_related to reduce queries

No comments:

Post a Comment