Thursday 5 September 2019

Node JS/Gzip: Image file download ends prematurely with no error

I am creating a application in Node.js to download image files. However, I have been having an issue where if my download speed is slow or I lose connection the image I am downloading will be truncated. This would not be so bad if my program threw an error warning me that the image did not finish downloading, however, it does not and resolves as if the image downloaded successfully. I do not receive an error when I check the response code or probe the image after downloading.

This is the current code I am using to download images.

const fs = require('fs-extra');
const request = require('request');
var probe = require('probe-image-size');
var progress = require('request-progress');

var filename = 'C:/Users/User/Desktop/myimage.jpg';
var req = request(createRequestHeaders('www.linktomyimage.com/image.jpg'));
downloadImage(req, filename)

function createRequestHeaders(url) {
    var cookie = `userid=${userid}; phash=${phash};`;
    return {
        'url': url,
        'method': 'GET',
        'gzip': true,
        'headers': {
            'Referer': `https://${website}/`,
            'Cookie': cookie
        }
    };
}

function downloadImage(req, filename) {
    return new Promise((resolve, reject) => {
        var response = null;
        var bytes;
        var dirname = path.dirname(filename);
        if (!fs.existsSync(dirname)) fs.ensureDirSync(dirname);

        progress(req, { delay: 0 }).on('progress', function (state) {
            updateDownloadSpeed(state.speed);
        }).on('end', function () {
            if (response.statusCode == 200) {
                var input = require('fs').createReadStream(filename);
                probe(input).then(result => {
                    input.destroy();
                    if (result != null) {
                        resolve({bytes: bytes, width: result.width,
                                height: result.height,});
                    } else {
                        // The image size probe does not detect if the download was truncated
                        reject({ 'name': 'ImageMissingOrCorrupt');
                    }
                }).catch((error) => {
                    reject(error);
                });
            } else {
                // This is never triggered when the download stops and the image is truncated
                reject({ 'name': 'StatusCodeError', 'message': response.statusCode });
            }
        }).on('response', function (resp) {
            response = resp;
            bytes = response.headers['content-length'];
        }).on('error', function (error) {
            // This does not detect when an image is truncated either
            reject(error);
        }).pipe(fs.createWriteStream(filename));
    });
}

I asked an earlier version of this question here but after implementing suggestions from the answer my downloader still has the same issues. The code for my downloader has also changed since then.

How can I detect when an image has been truncated so I can instruct the downloader to reattempt the download it?

Edit 1

After reading this I think my issue might have something to do with the website I'm downloading from requiring me to use gzip and the client not being able to tell if the content has actually finished downloading when the server stops the response. However, I am not sure how to test for this happening.

Edit 2

This is what the response header looks like when my downloadedr successfully connects to an image.

cache-control:"public, max-age=31536000"
connection:"close"
content-disposition:"inline; filename=129.jpg"
content-length:"185756"
content-transfer-encoding:"binary"
content-type:"image/jpeg"
date:"Thu, 05 Sep 2019 00:15:11 GMT"
expires:"Fri, 04 Sep 2020 00:15:11 GMT"
server:"Apache"



from Node JS/Gzip: Image file download ends prematurely with no error

No comments:

Post a Comment