Friday, 4 January 2019

Get all the URLs using multi curl

I'm working on an app that gets all the URLs from an array of sites and displays it in array form or JSON.

I can do it using for loop, the problem is the execution time when I tried 10 URLs it gives me an error saying exceeds maximum execution time.

Upon searching I found this multi curl

I also found this Fast PHP CURL Multiple Requests: Retrieve the content of multiple URLs using CURL. I tried to add my code but didn't work because I don't how to use the function.

Hope you help me.

Thanks.

This is my sample code.

<?php

$urls=array(
'http://site1.com/',
'http://site2.com/',
'http://site3.com/');


$mh = curl_multi_init();
foreach ($urls as $i => $url) {

        $urlContent = file_get_contents($url);

        $dom = new DOMDocument();
        @$dom->loadHTML($urlContent);
        $xpath = new DOMXPath($dom);
        $hrefs = $xpath->evaluate("/html/body//a");

        for($i = 0; $i < $hrefs->length; $i++){
            $href = $hrefs->item($i);
            $url = $href->getAttribute('href');
            $url = filter_var($url, FILTER_SANITIZE_URL);
            // validate url
            if(!filter_var($url, FILTER_VALIDATE_URL) === false){
                echo '<a href="'.$url.'">'.$url.'</a><br />';
            }
        }

        $conn[$i]=curl_init($url);
        $fp[$i]=fopen ($g, "w");
        curl_setopt ($conn[$i], CURLOPT_FILE, $fp[$i]);
        curl_setopt ($conn[$i], CURLOPT_HEADER ,0);
        curl_setopt($conn[$i],CURLOPT_CONNECTTIMEOUT,60);
        curl_multi_add_handle ($mh,$conn[$i]);
}
do {
    $n=curl_multi_exec($mh,$active);
}
while ($active);
foreach ($urls as $i => $url) {
    curl_multi_remove_handle($mh,$conn[$i]);
    curl_close($conn[$i]);
    fclose ($fp[$i]);
}
curl_multi_close($mh);
?>



from Get all the URLs using multi curl

No comments:

Post a Comment