I am trying to understand how I can improve the following query:
class PDFUploadRequestViewSet(viewsets.ModelViewSet):
def get_queryset(self):
project_id = self.request.META.get('HTTP_PROJECT_ID', None)
if project_id:
return PDFUploadRequest.objects.filter(project_id=project_id)
else:
return PDFUploadRequest.objects.all()
def get_serializer_class(self):
if self.action == 'list':
return PDFUploadRequestListSerializer
else:
return self.serializer_class
The issue is that the more PDFPageImage
objects are in the DB then it creates separate query for each of them thus slowing down the request. If there is only one value if PDFPageImage
related to given PDFUploadRequest
then its pretty fast, but for each additional value it is producing extra query and after doing some research I found out that prefetch_related
might somehow help with this, but I have not been able to figure out how to use it with my models.
This is how the model for PDFUploadRequest looks like:
class PDFUploadRequest(models.Model, BaseStatusClass):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
file = models.FileField(upload_to='uploaded_pdf')
file_name = models.CharField(max_length=255)
status = models.CharField(
max_length=50,
choices=BaseStatusClass.PDF_STATUS_CHOICES,
default=BaseStatusClass.UPLOADED,
)
completed = models.DateTimeField(null=True)
processing_started = models.DateTimeField(null=True)
text = models.TextField(default=None, null=True, blank=True)
owner = models.ForeignKey(User, related_name='pdf_requests', on_delete=models.PROTECT, null=True, default=None)
project = models.ForeignKey(Project, related_name='pdf_requests', on_delete=models.PROTECT, null=True, default=None)
class Meta:
ordering = ['-created']
def no_of_pages(self):
return self.pdf_page_images.count()
def time_taken(self):
if self.completed and self.processing_started:
return self.completed - self.processing_started
And this is the related model that I think is causing issues:
class PDFPageImage(models.Model, BaseStatusClass):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
pdf_request = models.ForeignKey(PDFUploadRequest, related_name="pdf_page_images", on_delete=models.CASCADE)
image = models.ImageField()
status = models.CharField(
max_length=50,
choices=BaseStatusClass.PDF_STATUS_CHOICES,
default=BaseStatusClass.UPLOADED,
)
page_number = models.IntegerField(null=True, blank=True, default=None)
class Meta:
ordering = ['page_number']
constraints = [
models.UniqueConstraint(fields=['pdf_request', 'page_number'],
condition=models.Q(deleted=False),
name='pdf_request_and_page_number_unique')
]
Here is the serializer:
class PDFUploadRequestSerializer(serializers.ModelSerializer):
pdf_page_images = PDFPageImageSerializer(many=True, read_only=True)
class Meta:
model = PDFUploadRequest
fields = ('id', 'file','file_name', 'status', 'pdf_page_images',
, 'owner', 'project')
read_only_fields = ('file_name', 'pdf_page_images', 'text',
'owner', 'project')
I have tried using prefetch_related
on the PDFPageImage model:
PDFUploadRequest.objects.filter(project_id=project_id).prefetch_related("pdf_page_images")
But I dont think it is doing anything. Any idea what can I do to reduce the query times here?
from Django using prefetch_related to reduce queries
No comments:
Post a Comment