From f4e4b97e4e51c5bf381811e73e1a70d021e776c1 Mon Sep 17 00:00:00 2001 From: Slava Date: Sat, 15 Dec 2012 23:31:07 +0400 Subject: [PATCH 1/6] Better docs --- README => README.markdown | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) rename README => README.markdown (77%) diff --git a/README b/README.markdown similarity index 77% rename from README rename to README.markdown index e2a0a06..7ddbca3 100644 --- a/README +++ b/README.markdown @@ -1,5 +1,38 @@ -ParallelCurl -~~~~~~~~~~~~~~~ +# ParallelCurl + +## Usage + + require 'parallelcurl.php'; + + function on_request_done($content, $url, $ch, $param) { + { + $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + if ($httpcode !== 200) { + print "Fetch error $httpcode for '$url'\n"; + return; + } + print $content; + } + + $curl_options = array( + CURLOPT_SSL_VERIFYPEER => FALSE, + CURLOPT_SSL_VERIFYHOST => FALSE, + CURLOPT_USERAGENT, 'Parallel Curl test script', + ); + + $max_parallel_requests = 10; + $parallel_curl = new ParallelCurl($max_parallel_requests, $curl_options); + + $urls = array("http://github.com/", "http://news.ycombinator.com/"); + + foreach ($urls as $url) { + $param = 'Arbitrary parameter'; + $parallel_curl->startRequest($url, 'on_request_done', $param); + } + + $parallel_curl->finishAllRequests(); + +# In detail This module provides an easy-to-use interface to allow you to run multiple CURL url fetches in parallel in PHP. From 8301c23d91aef82ecde29febae8684adb72e34a4 Mon Sep 17 00:00:00 2001 From: Slava Date: Sat, 15 Dec 2012 23:34:08 +0400 Subject: [PATCH 2/6] Better docs --- README.markdown | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/README.markdown b/README.markdown index 7ddbca3..1422a10 100644 --- a/README.markdown +++ b/README.markdown @@ -1,6 +1,22 @@ # ParallelCurl -## Usage +## Simple usage + + require 'parallelcurl.php'; + + $parallel_curl = new ParallelCurl($max_parallel_requests, $curl_options); + + $urls = array("http://github.com/", "http://news.ycombinator.com/"); + foreach ($urls as $url) { + $parallel_curl->startRequest($url, function($content, $url, $ch, $param) { + print $content; + }); + } + + $parallel_curl->finishAllRequests(); + + +## Advanced usage require 'parallelcurl.php'; @@ -38,14 +54,14 @@ This module provides an easy-to-use interface to allow you to run multiple CURL To test it, go to the command line, cd to this folder and run -./test.php + ./test.php This should run 100 searches through Google's API, printing the results. To see what sort of performance difference running parallel requests gets you, try altering the default of 10 requests running in parallel using the optional script argument, and timing how long each takes: -time ./test.php 1 -time ./test.php 20 + time ./test.php 1 + time ./test.php 20 The first only allows one request to run at once, serializing the calls. I see this taking around 100 seconds. The second run has 20 in flight at a time, and takes 11 seconds! Be warned though, @@ -57,9 +73,9 @@ waiting for each one to finish before starting the next. Under the hood it uses but since I find that interface painfully confusing, I wanted one that corresponded to the tasks that I wanted to run. -To use it, first copy parallelcurl.php and include it, then create the ParallelCurl object: +To use it, first copy `parallelcurl.php` and include it, then create the `ParallelCurl` object: -$parallelcurl = new ParallelCurl(10); + $parallelcurl = new ParallelCurl(10); The first argument to the constructor is the maximum number of outstanding fetches to allow before blocking to wait for one to finish. You can change this later using setMaxRequests() @@ -67,7 +83,7 @@ The second optional argument is an array of curl options in the format used by c Next, start a URL fetch: -$parallelcurl->startRequest('http://example.com', 'on_request_done', array('something')); + $parallelcurl->startRequest('http://example.com', 'on_request_done', array('something')); The first argument is the address that should be fetched The second is the callback function that will be run once the request is done @@ -89,7 +105,7 @@ contents of the POST parameters. Since you may have requests outstanding at the end of your script, you *MUST* call -$parallelcurl->finishAllRequests(); + $parallelcurl->finishAllRequests(); before you exit. If you don't, the final requests may be left unprocessed! From d921dec8a47075c82a4ecfe93638d3138a3df588 Mon Sep 17 00:00:00 2001 From: Slava Date: Sat, 15 Dec 2012 23:34:39 +0400 Subject: [PATCH 3/6] Typo --- README.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index 1422a10..e03ff6a 100644 --- a/README.markdown +++ b/README.markdown @@ -8,7 +8,7 @@ $urls = array("http://github.com/", "http://news.ycombinator.com/"); foreach ($urls as $url) { - $parallel_curl->startRequest($url, function($content, $url, $ch, $param) { + $parallel_curl->startRequest($url, function($content, $url, $ch) { print $content; }); } From 3ba457037be3d4b178f31b191871a2a771d7b52d Mon Sep 17 00:00:00 2001 From: Slava Date: Sat, 15 Dec 2012 23:36:44 +0400 Subject: [PATCH 4/6] Typo --- README.markdown | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.markdown b/README.markdown index e03ff6a..1d6baef 100644 --- a/README.markdown +++ b/README.markdown @@ -42,7 +42,7 @@ $urls = array("http://github.com/", "http://news.ycombinator.com/"); foreach ($urls as $url) { - $param = 'Arbitrary parameter'; + $param = 'Arbitrary parameter to callback: ' . $url; $parallel_curl->startRequest($url, 'on_request_done', $param); } @@ -78,8 +78,8 @@ To use it, first copy `parallelcurl.php` and include it, then create the `Parall $parallelcurl = new ParallelCurl(10); The first argument to the constructor is the maximum number of outstanding fetches to allow -before blocking to wait for one to finish. You can change this later using setMaxRequests() -The second optional argument is an array of curl options in the format used by curl_setopt_array() +before blocking to wait for one to finish. You can change this later using `setMaxRequests()` +The second optional argument is an array of curl options in the format used by `curl_setopt_array()` Next, start a URL fetch: @@ -89,10 +89,10 @@ The first argument is the address that should be fetched The second is the callback function that will be run once the request is done The third is a 'cookie', that can contain arbitrary data to be passed to the callback -This startRequest call will return immediately, as long as less than the maximum number of +This `startRequest` call will return immediately, as long as less than the maximum number of requests are outstanding. Once the request is done, the callback function will be called, eg: -on_request_done($content, 'http://example.com', $ch, array('something)); + on_request_done($content, 'http://example.com', $ch, array('something)); The callback should take four arguments. The first is a string containing the content found at the URL. The second is the original URL requested, the third is the curl handle of the request that @@ -109,4 +109,4 @@ Since you may have requests outstanding at the end of your script, you *MUST* ca before you exit. If you don't, the final requests may be left unprocessed! -By Pete Warden , freely reusable, see http://petewarden.typepad.com for more +By Pete Warden , freely reusable, see [http://petewarden.typepad.com](http://petewarden.typepad.com) for more From f79e27e60c3265d60f6b80d0ab856773a7b4898f Mon Sep 17 00:00:00 2001 From: Slava Date: Sat, 15 Dec 2012 23:37:09 +0400 Subject: [PATCH 5/6] # --- README.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index 1d6baef..f47067a 100644 --- a/README.markdown +++ b/README.markdown @@ -48,7 +48,7 @@ $parallel_curl->finishAllRequests(); -# In detail +## In detail This module provides an easy-to-use interface to allow you to run multiple CURL url fetches in parallel in PHP. From c2ca0d8400b4bfd5f98f2e13100837325d1019a4 Mon Sep 17 00:00:00 2001 From: Slava Date: Sat, 15 Dec 2012 23:37:48 +0400 Subject: [PATCH 6/6] More fixes to docs --- README.markdown | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.markdown b/README.markdown index f47067a..60de57b 100644 --- a/README.markdown +++ b/README.markdown @@ -99,9 +99,9 @@ the URL. The second is the original URL requested, the third is the curl handle can be queried to get the results, and the fourth is the arbitrary 'cookie' value that you associated with this object. This cookie contains user-defined data. -There's an optional fourth parameter to startRequest. If you pass in an array at that position in -the arguments, the POST method will be used instead, with the contents of the array controlling the -contents of the POST parameters. +There's an optional fourth parameter to `startRequest`. If you pass in an array at that position in +the arguments, the `POST` method will be used instead, with the contents of the array controlling the +contents of the `POST` parameters. Since you may have requests outstanding at the end of your script, you *MUST* call