From e1cfa057713f6a02fa5806f9b73de76fe59dd6e1 Mon Sep 17 00:00:00 2001 From: "M. C. Skinner" Date: Tue, 8 Feb 2011 20:10:24 -0500 Subject: [PATCH 01/16] Many changes: objectified, replace links in content to point to the proxy. --- ba-simple-proxy.php | 298 ++++++++++++++++++++++++++++++-------------- 1 file changed, 202 insertions(+), 96 deletions(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index b1fa6e8..ecdf059 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -136,119 +136,225 @@ // ############################################################################ // Change these configuration options if needed, see above descriptions for info. -$enable_jsonp = false; -$enable_native = false; -$valid_url_regex = '/.*/'; // ############################################################################ -$url = $_GET['url']; +main(); -if ( !$url ) { +function main(){ + $proxy = new SimpleProxy(); + $url = $proxy->getURL(); + $headersContent = $proxy->fetchURL($url); + if( sizeof($headersContent) !=2){ + throw new Exception("Error trying to get content: "+$url); + } + // print_r( $headersContent[0]); - // Passed url not specified. - $contents = 'ERROR: url not specified'; - $status = array( 'http_code' => 'ERROR' ); - -} else if ( !preg_match( $valid_url_regex, $url ) ) { - - // Passed url doesn't match $valid_url_regex. - $contents = 'ERROR: invalid url'; - $status = array( 'http_code' => 'ERROR' ); - -} else { - $ch = curl_init( $url ); - - if ( strtolower($_SERVER['REQUEST_METHOD']) == 'post' ) { - curl_setopt( $ch, CURLOPT_POST, true ); - curl_setopt( $ch, CURLOPT_POSTFIELDS, $_POST ); + //print_r($headersContent); + $proxy->outputHeaders($headersContent[0]); + + $content = $proxy->filterContent($headersContent[1], $url); + + $proxy->outputContent($content); + // print_r($headersContent[0]); + //print $headersContent[1]; + +} + +class SimpleProxy { + protected $enable_jsonp = false; + protected $enable_native = false; + protected $valid_url_regex = '/.*/'; + + function getURL(){ + global $valid_url_regex; + $url = $_GET['url']; + $needle = "url="; + $pos = strpos($_SERVER['REQUEST_URI'], $needle); + $url = substr($_SERVER['REQUEST_URI'], $pos+strlen($needle)); + if ( !$url ) { + // Passed url not specified. + $contents = 'ERROR: url not specified'; + $status = array( 'http_code' => 'ERROR' ); + throw new Exception("URL not specified."); + } else if ( !preg_match( $this->valid_url_regex, $url ) ) { + // Passed url doesn't match $valid_url_regex. + $contents = 'ERROR: invalid url'; + $status = array( 'http_code' => 'ERROR' ); + throw new Exception("Invalid url,"); + } + return $url; } - - if ( $_GET['send_cookies'] ) { - $cookie = array(); - foreach ( $_COOKIE as $key => $value ) { - $cookie[] = $key . '=' . $value; + + function fetchURL($url){ + + $ch = curl_init( $url ); + + if ( strtolower($_SERVER['REQUEST_METHOD']) == 'post' ) { + curl_setopt( $ch, CURLOPT_POST, true ); + curl_setopt( $ch, CURLOPT_POSTFIELDS, $_POST ); } - if ( $_GET['send_session'] ) { - $cookie[] = SID; + + if ( $_GET['send_cookies'] ) { + $cookie = array(); + foreach ( $_COOKIE as $key => $value ) { + $cookie[] = $key . '=' . $value; + } + if ( $_GET['send_session'] ) { + $cookie[] = SID; + } + + $cookie = implode( '; ', $cookie ); + + curl_setopt( $ch, CURLOPT_COOKIE, $cookie ); } - $cookie = implode( '; ', $cookie ); + + curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); + curl_setopt( $ch, CURLOPT_HEADER, true ); + curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); + + curl_setopt( $ch, CURLOPT_USERAGENT, $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); + + $response = curl_exec( $ch ); + + curl_close( $ch ); + + do{ + $headerStartPosition = strpos($response, "HTTP/"); + $headerEndPosition = strpos($response, "\n\r", $headerStartPosition); + $header = substr($response, $headerStartPosition, $headerEndPosition-$headerStartPosition); + }while(strpos($response, "HTTP/", $headerEndPosition)!==false); + // print ".".$header."."; + + $headerLines = explode("\n", $header); + $content = substr($response, $headerEndPosition+3); + + + - curl_setopt( $ch, CURLOPT_COOKIE, $cookie ); - } - - curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); - curl_setopt( $ch, CURLOPT_HEADER, true ); - curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); - - curl_setopt( $ch, CURLOPT_USERAGENT, $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); - - list( $header, $contents ) = preg_split( '/([\r\n][\r\n])\\1/', curl_exec( $ch ), 2 ); - - $status = curl_getinfo( $ch ); - curl_close( $ch ); -} + + return array($headerLines, $content); + + + } -// Split header text into an array. -$header_text = preg_split( '/[\r\n]+/', $header ); + function outputHeaders(array $headers){ -if ( $_GET['mode'] == 'native' ) { - if ( !$enable_native ) { - $contents = 'ERROR: invalid mode'; - $status = array( 'http_code' => 'ERROR' ); - } - - // Propagate headers to response. - foreach ( $header_text as $header ) { - if ( preg_match( '/^(?:Content-Type|Content-Language|Set-Cookie):/i', $header ) ) { - header( $header ); + //filterHeaders($headers); + + //print "

"; + foreach ( $headers as $h ) { + //if ( preg_match( '/^(Content-Type|Content-Language|Set-Cookie):/i', $h ) ) { + header( $h ); + //print $h; + //print "
"; + //} } - } - - print $contents; - -} else { - - // $data will be serialized into JSON data. - $data = array(); - - // Propagate all HTTP headers into the JSON data object. - if ( $_GET['full_headers'] ) { - $data['headers'] = array(); - foreach ( $header_text as $header ) { - preg_match( '/^(.+?):\s+(.*)$/', $header, $matches ); - if ( $matches ) { - $data['headers'][ $matches[1] ] = $matches[2]; + } + + + + function filterHeaders($headers){ + $matches = array(); + foreach($headers as $h){ + // grab http code + if(preg_match("/^HTTP\/[0-9]+[\.][0-9][\s]*([0-9]+)[\s]*(.*)$/i", $h, $matches)){ + switch($matches[1]){ + case 301: // Moved Permanently + + } } } } - - // Propagate all cURL request / response info to the JSON data object. - if ( $_GET['full_status'] ) { - $data['status'] = $status; - } else { - $data['status'] = array(); - $data['status']['http_code'] = $status['http_code']; + + function filterContent($content, $url){ + $parts = parse_url($url); + $baseURL = $parts['scheme'].'://'.$parts['host']; + // change image links + $content = preg_replace('/([< ]+src[\s]*=[\s]*"?)([http:\/\/])?([^ ">]+)/', "$1http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=$baseURL/$2$3$4", $content); + + // change background links + $content = preg_replace('/(background[\s]*=[\s]*"?)([http:\/\/])?([^ ]+)("?)/', "$1http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=$baseURL/$2$3$4", $content); + + + // change anchor links + $content = preg_replace('/(href[\s]*="?)([^ >"]+)/i', "$1http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=$baseURL/$2$3", $content); + // 'ERROR' ); + } + +// Propagate headers to response. + + +print $contents; + +} else { + +// $data will be serialized into JSON data. +$data = array(); + +// Propagate all HTTP headers into the JSON data object. +if ( $_GET['full_headers'] ) { +$data['headers'] = array(); + +foreach ( $header_text as $header ) { +preg_match( '/^(.+?):\s+(.*)$/', $header, $matches ); +if ( $matches ) { +$data['headers'][ $matches[1] ] = $matches[2]; +} +} +} + +// Propagate all cURL request / response info to the JSON data object. +if ( $_GET['full_status'] ) { +$data['status'] = $status; +} else { +$data['status'] = array(); +$data['status']['http_code'] = $status['http_code']; } -?> +// Set the JSON data object contents, decoding it from JSON if possible. +$decoded_json = json_decode( $contents ); +$data['contents'] = $decoded_json ? $decoded_json : $contents; + +// Generate appropriate content-type header. +$is_xhr = strtolower($_SERVER['HTTP_X_REQUESTED_WITH']) == 'xmlhttprequest'; +header( 'Content-type: application/' . ( $is_xhr ? 'json' : 'x-javascript' ) ); + +// Get JSONP callback. +$jsonp_callback = $enable_jsonp && isset($_GET['callback']) ? $_GET['callback'] : null; + +// Generate JSON/JSONP string +$json = json_encode( $data ); + +print $jsonp_callback ? "$jsonp_callback($json)" : $json; + +}*/ + From a5cfcaf3f56300c2fc12f5a4a8ea1526e36422fa Mon Sep 17 00:00:00 2001 From: "M. C. Skinner" Date: Mon, 14 Feb 2011 20:21:18 -0500 Subject: [PATCH 02/16] Redirection via http headers is done via CURL, but the headers returned by CURL are all the headers. Now only the last header is examined. Html links and image references are parsed hopefully corecctly such that any html page loaded causes all links to point back to the proxy script. Mimetype is also extracted from the curl fetch and stored in an instance variable. This mime-type is examined before processing the content of a CURL to ensure we are not examining non-textual data for content changes to point backto the proxy script. --- ba-simple-proxy.php | 66 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index ecdf059..ac402fc 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -165,6 +165,9 @@ class SimpleProxy { protected $enable_jsonp = false; protected $enable_native = false; protected $valid_url_regex = '/.*/'; + + protected $url; + protected $contentType; function getURL(){ global $valid_url_regex; @@ -172,6 +175,7 @@ function getURL(){ $needle = "url="; $pos = strpos($_SERVER['REQUEST_URI'], $needle); $url = substr($_SERVER['REQUEST_URI'], $pos+strlen($needle)); + if ( !$url ) { // Passed url not specified. $contents = 'ERROR: url not specified'; @@ -183,6 +187,7 @@ function getURL(){ $status = array( 'http_code' => 'ERROR' ); throw new Exception("Invalid url,"); } + $this->url = $url; return $url; } @@ -216,20 +221,37 @@ function fetchURL($url){ curl_setopt( $ch, CURLOPT_USERAGENT, $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); $response = curl_exec( $ch ); + $this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); + $_GET['url']=$this->url; + $contentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); + + $semiColon = strpos($contentType, ";"); + if($semiColon === false){ + $this->mimeType = $contentType; + } else { + $this->mimeType = substr($contentType, 0, $semiColon); + } + curl_close( $ch ); - + //print $response; + //print "

"; + $headerStartPosition = 0; + $headerEndPosition = 0; do{ - $headerStartPosition = strpos($response, "HTTP/"); + $headerStartPosition = $headerEndPosition; + $headerStartPosition = strpos($response, "HTTP/", $headerStartPosition); $headerEndPosition = strpos($response, "\n\r", $headerStartPosition); + $header = substr($response, $headerStartPosition, $headerEndPosition-$headerStartPosition); + // print $header; }while(strpos($response, "HTTP/", $headerEndPosition)!==false); // print ".".$header."."; $headerLines = explode("\n", $header); $content = substr($response, $headerEndPosition+3); - + @@ -270,23 +292,55 @@ function filterHeaders($headers){ } function filterContent($content, $url){ + if($this->contentType == "text/html") { + return; + } + + $linkMap = create_function ('$match',' + $link = $match[2]; + $parts = parse_url($_GET[\'url\']); + $baseURL = $parts[\'scheme\'].\'://\'.$parts[\'host\'].$parts[\'path\']; + if(substr($link, 0, 4)==="http"){ + return "href=\"http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=$link\""; + } + return "href=\"http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=$baseURL/$link\""; + + '); + + $imageMap = create_function ('$match',' + $link = $match[2]; + $parts = parse_url($_GET[\'url\']); + $baseURL = $parts[\'scheme\'].\'://\'.$parts[\'host\'].$parts[\'path\']; + if(substr($link, 0, 4)==="http"){ + return "href=\"http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=$link\""; + } if($link[0]=="/"){ + return $match[1]."http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=".$baseURL."/".$match[2].$match[3].$match[4]; + } else { + $parts = explode("/", $baseURL); + array_pop($parts); + $baseURL = implode($parts, "/"); + return $match[1]."http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=".$baseURL."/".$match[2].$match[3].$match[4]; + + } + + '); $parts = parse_url($url); $baseURL = $parts['scheme'].'://'.$parts['host']; // change image links - $content = preg_replace('/([< ]+src[\s]*=[\s]*"?)([http:\/\/])?([^ ">]+)/', "$1http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=$baseURL/$2$3$4", $content); - + $content = preg_replace_callback('/([< ]+src[\s]*=[\s]*"?)([http:\/\/])?([^ ">]+)/', $imageMap, $content); // change background links $content = preg_replace('/(background[\s]*=[\s]*"?)([http:\/\/])?([^ ]+)("?)/', "$1http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=$baseURL/$2$3$4", $content); // change anchor links - $content = preg_replace('/(href[\s]*="?)([^ >"]+)/i', "$1http://localhost:8080/gewthen/tools/proxy/ba-simple-proxy.php?url=$baseURL/$2$3", $content); + $content = preg_replace_callback('/(href[\s]*="?)([^ >"]+)/i', $linkMap , $content); // Date: Mon, 7 Mar 2011 10:43:17 -0800 Subject: [PATCH 03/16] Proxy copies value of inbound 'authorization' query parameter to outbound 'Authorization:' HTTP header. --- ba-simple-proxy.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index b1fa6e8..c0e25bd 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -176,6 +176,11 @@ curl_setopt( $ch, CURLOPT_COOKIE, $cookie ); } + + if ( isset($_GET['authorization']) ) { + // Set the Authorization header + curl_setopt( $ch, CURLOPT_HTTPHEADER, array("Authorization: ".$_GET['authorization'] )); + } curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); curl_setopt( $ch, CURLOPT_HEADER, true ); From 70c57b1efaa51bb588c1e720ec8b78fede9bb764 Mon Sep 17 00:00:00 2001 From: metadaddy Date: Tue, 8 Mar 2011 11:31:05 -0800 Subject: [PATCH 04/16] Added support for POSTing non-form data and arbitrary request methods --- ba-simple-proxy.php | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index c0e25bd..0aa1029 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -158,12 +158,15 @@ } else { $ch = curl_init( $url ); - - if ( strtolower($_SERVER['REQUEST_METHOD']) == 'post' ) { - curl_setopt( $ch, CURLOPT_POST, true ); - curl_setopt( $ch, CURLOPT_POSTFIELDS, $_POST ); - } - + + // Pass on request method, regardless of what it is + curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $_SERVER['REQUEST_METHOD'] ); + + // Pass on content, regardless of request method + if ( isset($_SERVER['CONTENT_LENGTH'] ) && $_SERVER['CONTENT_LENGTH'] > 0 ) { + curl_setopt( $ch, CURLOPT_POSTFIELDS, file_get_contents("php://input") ); + } + if ( $_GET['send_cookies'] ) { $cookie = array(); foreach ( $_COOKIE as $key => $value ) { @@ -177,9 +180,17 @@ curl_setopt( $ch, CURLOPT_COOKIE, $cookie ); } + $headers = array(); if ( isset($_GET['authorization']) ) { // Set the Authorization header - curl_setopt( $ch, CURLOPT_HTTPHEADER, array("Authorization: ".$_GET['authorization'] )); + array_push($headers, "Authorization: ".$_GET['authorization'] ); + } + if ( isset($_SERVER['CONTENT_TYPE']) ) { + // Pass through the Content-Type header + array_push($headers, "Content-Type: ".$_SERVER['CONTENT_TYPE'] ); + } + if ( count($headers) > 0 ) { + curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers ); } curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); @@ -253,7 +264,7 @@ $json = json_encode( $data ); print $jsonp_callback ? "$jsonp_callback($json)" : $json; - + } ?> From c1c5c8b8e053202603d58a76760c955cf8f791c7 Mon Sep 17 00:00:00 2001 From: metadaddy Date: Tue, 8 Mar 2011 13:23:19 -0800 Subject: [PATCH 05/16] Switched from using a query parameter for authorization to a header, so the token doesn't show up in server logs. --- ba-simple-proxy.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index 0aa1029..ac4fc08 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -165,7 +165,7 @@ // Pass on content, regardless of request method if ( isset($_SERVER['CONTENT_LENGTH'] ) && $_SERVER['CONTENT_LENGTH'] > 0 ) { curl_setopt( $ch, CURLOPT_POSTFIELDS, file_get_contents("php://input") ); - } + } if ( $_GET['send_cookies'] ) { $cookie = array(); @@ -181,9 +181,9 @@ } $headers = array(); - if ( isset($_GET['authorization']) ) { + if ( isset($_SERVER['HTTP_X_AUTHORIZATION']) ) { // Set the Authorization header - array_push($headers, "Authorization: ".$_GET['authorization'] ); + array_push($headers, "Authorization: ".$_SERVER['HTTP_X_AUTHORIZATION'] ); } if ( isset($_SERVER['CONTENT_TYPE']) ) { // Pass through the Content-Type header From 3fbca90bcd6df9e50d5f23ef226498e420134168 Mon Sep 17 00:00:00 2001 From: metadaddy Date: Fri, 11 Mar 2011 17:01:44 -0800 Subject: [PATCH 06/16] Added CORS support (see http://www.w3.org/TR/cors/) and parameterized incoming authz header --- ba-simple-proxy.php | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index ac4fc08..0ab6450 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -132,6 +132,19 @@ // ensure that it is valid. This setting only needs to be used if either // $enable_jsonp or $enable_native are enabled. Defaults to '/.*/' which // validates all URLs. +// $authz_header - an index into the $_SERVER array locating authorization +// data which is to be proxied in the HTTP Authorization header. This is +// necessary since, in a default deployment, Apache will not pass an +// incoming Authorization header to a script. As a convention, we pass +// authorization data to the proxy in the X-Authorization header, so the +// default value is 'HTTP_X_AUTHORIZATION' +// $cors_allow_origin - a space-separated list of origins, each of the form +// https://example.com:8443, from which scripts will be allowed to access +// the proxy. See http://www.w3.org/TR/cors/ for details. +// $cors_allow_methods - HTTP methods allowed from the origins specified in +// $cors_allow_origin. Defaults to 'GET, POST, PUT, PATCH, DELETE, HEAD' +// $cors_allow_headers - HTTP headers allowed from the origins specified in +// $cors_allow_origin. Defaults to 'X-Authorization, Content-Type' // // ############################################################################ @@ -140,6 +153,12 @@ $enable_native = false; $valid_url_regex = '/.*/'; +$authz_header = 'HTTP_X_AUTHORIZATION'; + +$cors_allow_origin = null; +$cors_allow_methods = 'GET, POST, PUT, PATCH, DELETE, HEAD'; +$cors_allow_headers = 'X-Authorization, Content-Type'; + // ############################################################################ $url = $_GET['url']; @@ -157,6 +176,21 @@ $status = array( 'http_code' => 'ERROR' ); } else { + + if ( isset( $cors_allow_origin ) ) { + header( 'Access-Control-Allow-Origin: '.$cors_allow_origin ); + if ( isset( $cors_allow_methods ) ) { + header( 'Access-Control-Allow-Methods: '.$cors_allow_methods ); + } + if ( isset( $cors_allow_headers ) ) { + header( 'Access-Control-Allow-Headers: '.strtolower($cors_allow_headers) ); + } + if ( $_SERVER['REQUEST_METHOD'] == 'OPTIONS' ) { + // We're done - don't proxy CORS OPTIONS request + exit(); + } + } + $ch = curl_init( $url ); // Pass on request method, regardless of what it is @@ -181,9 +215,9 @@ } $headers = array(); - if ( isset($_SERVER['HTTP_X_AUTHORIZATION']) ) { + if ( isset($authz_header) && isset($_SERVER[$authz_header]) ) { // Set the Authorization header - array_push($headers, "Authorization: ".$_SERVER['HTTP_X_AUTHORIZATION'] ); + array_push($headers, "Authorization: ".$_SERVER[$authz_header] ); } if ( isset($_SERVER['CONTENT_TYPE']) ) { // Pass through the Content-Type header @@ -252,7 +286,7 @@ // Set the JSON data object contents, decoding it from JSON if possible. $decoded_json = json_decode( $contents ); $data['contents'] = $decoded_json ? $decoded_json : $contents; - + // Generate appropriate content-type header. $is_xhr = strtolower($_SERVER['HTTP_X_REQUESTED_WITH']) == 'xmlhttprequest'; header( 'Content-type: application/' . ( $is_xhr ? 'json' : 'x-javascript' ) ); From ab7e68c4469e26439839df3ba07139c5488712cf Mon Sep 17 00:00:00 2001 From: chenwei Date: Fri, 25 Nov 2011 20:32:59 +0800 Subject: [PATCH 07/16] fix POST method params not right problem --- ba-simple-proxy.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index b1fa6e8..1fb97be 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -161,7 +161,7 @@ if ( strtolower($_SERVER['REQUEST_METHOD']) == 'post' ) { curl_setopt( $ch, CURLOPT_POST, true ); - curl_setopt( $ch, CURLOPT_POSTFIELDS, $_POST ); + curl_setopt( $ch, CURLOPT_POSTFIELDS, file_get_contents("php://input") ); } if ( $_GET['send_cookies'] ) { From 6613055165e6753d7ea8e2475900d61b6f33a5b0 Mon Sep 17 00:00:00 2001 From: Stefan Hoth Date: Fri, 3 Feb 2012 11:35:39 +0100 Subject: [PATCH 08/16] Add simpler way of whitelisting domains (hate regex) --- ba-simple-proxy.php | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index b1fa6e8..528dc7a 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -23,6 +23,7 @@ // // About: Release History // +// 1.7 - (2/03/2012) Add optional whitelist-check by Stefan Hoth // 1.6 - (1/24/2009) Now defaults to JSON mode, which can now be changed to // native mode by specifying ?mode=native. Native and JSONP modes are // disabled by default because of possible XSS vulnerability issues, but @@ -139,6 +140,10 @@ $enable_jsonp = false; $enable_native = false; $valid_url_regex = '/.*/'; +/** + * only domains listed in this array will be allowed to be proxied + */ +$WHITELIST_DOMAINS = array('google.com','google.de'); // ############################################################################ @@ -156,6 +161,12 @@ $contents = 'ERROR: invalid url'; $status = array( 'http_code' => 'ERROR' ); +}elseif ( is_array($WHITELIST_DOMAINS) && ! empty($WHITELIST_DOMAINS) && + ! in_array( parse_url($url,PHP_URL_HOST), $WHITELIST_DOMAINS) ) { + + $contents = 'ERROR: invalid url (not in whitelist)'; + $status = array( 'http_code' => 'ERROR' ); + } else { $ch = curl_init( $url ); From f80e0be00546fd4fd737184c998c1f9ac2434521 Mon Sep 17 00:00:00 2001 From: Stefan Hoth Date: Fri, 3 Feb 2012 17:51:29 +0100 Subject: [PATCH 09/16] Add optional caching based on URL and TTL --- ba-simple-proxy.php | 138 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 111 insertions(+), 27 deletions(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index 528dc7a..5126a1a 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -23,6 +23,7 @@ // // About: Release History // +// 1.8 - (2/03/2012) Add optional caching of proxied results by Stefan Hoth // 1.7 - (2/03/2012) Add optional whitelist-check by Stefan Hoth // 1.6 - (1/24/2009) Now defaults to JSON mode, which can now be changed to // native mode by specifying ?mode=native. Native and JSONP modes are @@ -145,8 +146,76 @@ */ $WHITELIST_DOMAINS = array('google.com','google.de'); +/** + * CACHING + */ +$enable_caching = false; +//how long after a cache will be renewed +define(CACHE_TTL,600);//10 mins +define(CACHE_DIR,'.cache'); + +// ############################################################################ +// FUNCTIONS // ############################################################################ +/** + * checks or creates the cache dir + */ +function prepare_cache(){ + return is_writable(CACHE_DIR) || mkdir(CACHE_DIR,0777,true); +} + +/** + * generates a cachefile name for a given url + */ +function get_cachefile_name($url){ + return CACHE_DIR.'/'.sha1($url); +} + +/** + * checks if a cache file exists and is not expired for a given url + */ +function cachefile_exits($url){ + + if(! prepare_cache()){ + return false; + } + + return is_readable( get_cachefile_name($url) ) && ! cachefile_is_too_old($url); +} + +/** + * returns if the modification time is older than the cache-time + */ +function cachefile_is_too_old($url){ + return ( time() - filemtime( get_cachefile_name($url) )) >= CACHE_TTL; +} + +/** + * checks if a cache file exists for a given url + */ +function cachefile_read($url){ + + if(! prepare_cache()){ + return false; + } + + return file_get_contents( get_cachefile_name($url) ); +} + +function cachefile_write($url, $content){ + + if(! prepare_cache()){ + return false; + } + + return file_put_contents( get_cachefile_name($url), $content); +} + + +// ############################################################################ + + $url = $_GET['url']; if ( !$url ) { @@ -168,37 +237,52 @@ $status = array( 'http_code' => 'ERROR' ); } else { - $ch = curl_init( $url ); - - if ( strtolower($_SERVER['REQUEST_METHOD']) == 'post' ) { - curl_setopt( $ch, CURLOPT_POST, true ); - curl_setopt( $ch, CURLOPT_POSTFIELDS, $_POST ); - } - - if ( $_GET['send_cookies'] ) { - $cookie = array(); - foreach ( $_COOKIE as $key => $value ) { - $cookie[] = $key . '=' . $value; + + if($enable_caching && cachefile_exits($url)){ + + $header = ''; + $contents = cachefile_read($url); + + }else{ + + $ch = curl_init( $url ); + + if ( strtolower($_SERVER['REQUEST_METHOD']) == 'post' ) { + curl_setopt( $ch, CURLOPT_POST, true ); + curl_setopt( $ch, CURLOPT_POSTFIELDS, $_POST ); } - if ( $_GET['send_session'] ) { - $cookie[] = SID; + + if ( $_GET['send_cookies'] ) { + $cookie = array(); + foreach ( $_COOKIE as $key => $value ) { + $cookie[] = $key . '=' . $value; + } + if ( $_GET['send_session'] ) { + $cookie[] = SID; + } + $cookie = implode( '; ', $cookie ); + + curl_setopt( $ch, CURLOPT_COOKIE, $cookie ); + } + + curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); + curl_setopt( $ch, CURLOPT_HEADER, true ); + curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); + + curl_setopt( $ch, CURLOPT_USERAGENT, $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); + + list( $header, $contents ) = preg_split( '/([\r\n][\r\n])\\1/', curl_exec( $ch ), 2 ); + + $status = curl_getinfo( $ch ); + + curl_close( $ch ); + + if($enable_caching){ + cachefile_write($url,$contents); } - $cookie = implode( '; ', $cookie ); - curl_setopt( $ch, CURLOPT_COOKIE, $cookie ); } - - curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); - curl_setopt( $ch, CURLOPT_HEADER, true ); - curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); - - curl_setopt( $ch, CURLOPT_USERAGENT, $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); - - list( $header, $contents ) = preg_split( '/([\r\n][\r\n])\\1/', curl_exec( $ch ), 2 ); - - $status = curl_getinfo( $ch ); - - curl_close( $ch ); + } // Split header text into an array. From 83c9b14cc22855f5a87001bb082581e9de8a4ba1 Mon Sep 17 00:00:00 2001 From: Ido Green Date: Thu, 10 May 2012 13:28:55 +0300 Subject: [PATCH 10/16] 1. Report errors in cUrl. 2. Add a comment with an option (I've used many times) to Not verify SSL certificates. It's good option for internal usage. --- ba-simple-proxy.php | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index b1fa6e8..ca97917 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -181,9 +181,16 @@ curl_setopt( $ch, CURLOPT_HEADER, true ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); - curl_setopt( $ch, CURLOPT_USERAGENT, $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); + // in case you wish Not to confirm the CA for your server (e.g. it's inside your org) + // curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER , false); - list( $header, $contents ) = preg_split( '/([\r\n][\r\n])\\1/', curl_exec( $ch ), 2 ); + curl_setopt( $ch, CURLOPT_USERAGENT, $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); + $res = curl_exec( $ch ); + if ($res === FALSE) { + // in case we have errors - let's report them! + die(curl_error($ch)); + } + list( $header, $contents ) = preg_split( '/([\r\n][\r\n])\\1/', $res, 2 ); $status = curl_getinfo( $ch ); From f56a5a09fa859863957f807d4a8a2d1f22ebba8d Mon Sep 17 00:00:00 2001 From: Robert Reinhard Date: Thu, 7 Jun 2012 16:47:22 -0700 Subject: [PATCH 11/16] Fixing notices because of missing key values --- ba-simple-proxy.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index b1fa6e8..0b70503 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -164,7 +164,7 @@ curl_setopt( $ch, CURLOPT_POSTFIELDS, $_POST ); } - if ( $_GET['send_cookies'] ) { + if (isset($_GET['send_cookies']) && $_GET['send_cookies'] ) { $cookie = array(); foreach ( $_COOKIE as $key => $value ) { $cookie[] = $key . '=' . $value; @@ -181,7 +181,7 @@ curl_setopt( $ch, CURLOPT_HEADER, true ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); - curl_setopt( $ch, CURLOPT_USERAGENT, $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); + curl_setopt( $ch, CURLOPT_USERAGENT, isset($_GET['user_agent']) && $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); list( $header, $contents ) = preg_split( '/([\r\n][\r\n])\\1/', curl_exec( $ch ), 2 ); @@ -193,7 +193,7 @@ // Split header text into an array. $header_text = preg_split( '/[\r\n]+/', $header ); -if ( $_GET['mode'] == 'native' ) { +if (isset($_GET['mode']) && $_GET['mode'] == 'native' ) { if ( !$enable_native ) { $contents = 'ERROR: invalid mode'; $status = array( 'http_code' => 'ERROR' ); @@ -214,7 +214,7 @@ $data = array(); // Propagate all HTTP headers into the JSON data object. - if ( $_GET['full_headers'] ) { + if (isset($_GET['full_headers']) && $_GET['full_headers'] ) { $data['headers'] = array(); foreach ( $header_text as $header ) { @@ -226,7 +226,7 @@ } // Propagate all cURL request / response info to the JSON data object. - if ( $_GET['full_status'] ) { + if (isset($_GET['full_status']) && $_GET['full_status'] ) { $data['status'] = $status; } else { $data['status'] = array(); @@ -251,4 +251,4 @@ } -?> +?> \ No newline at end of file From 4fbbeba97a19c786cc177b74caca1f2e45d4400f Mon Sep 17 00:00:00 2001 From: nicholasjarnold Date: Tue, 24 Jul 2012 11:06:52 -0500 Subject: [PATCH 12/16] removed non-standard x-javascript header --- ba-simple-proxy.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index b1fa6e8..752a738 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -239,7 +239,7 @@ // Generate appropriate content-type header. $is_xhr = strtolower($_SERVER['HTTP_X_REQUESTED_WITH']) == 'xmlhttprequest'; - header( 'Content-type: application/' . ( $is_xhr ? 'json' : 'x-javascript' ) ); + header( 'Content-type: application/' . ( $is_xhr ? 'json' : 'javascript' ) ); // Get JSONP callback. $jsonp_callback = $enable_jsonp && isset($_GET['callback']) ? $_GET['callback'] : null; From b6ce03bf9c5a2528f978a088e6f43858acfe7900 Mon Sep 17 00:00:00 2001 From: Nathan Reed Date: Thu, 9 Aug 2012 09:28:00 +0200 Subject: [PATCH 13/16] bugfix: do not fail on HTTP Response '100 Continue' --- ba-simple-proxy.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index b1fa6e8..0389e6c 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -185,6 +185,10 @@ list( $header, $contents ) = preg_split( '/([\r\n][\r\n])\\1/', curl_exec( $ch ), 2 ); + if($header == 'HTTP/1.1 100 Continue') { + list($header, $contents) = preg_split( '/([\r\n][\r\n])\\1/', $contents, 2 ); + } + $status = curl_getinfo( $ch ); curl_close( $ch ); From ac16a69e7a46113df83cd85cabd025a989eeeb41 Mon Sep 17 00:00:00 2001 From: Jason Wu Date: Sat, 15 Dec 2012 15:17:08 -0500 Subject: [PATCH 14/16] $enable_native = true --- ba-simple-proxy.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index b1fa6e8..dd6eb6b 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -137,7 +137,7 @@ // Change these configuration options if needed, see above descriptions for info. $enable_jsonp = false; -$enable_native = false; +$enable_native = true; $valid_url_regex = '/.*/'; // ############################################################################ From 954350a398f7044c844e0ab0fd58d5f44814dd33 Mon Sep 17 00:00:00 2001 From: PiBa-NL Date: Sun, 27 Jan 2013 17:12:09 +0100 Subject: [PATCH 15/16] remark added (possible issue with enctype="multipart/form-data" ??) --- ba-simple-proxy.php | 1 + 1 file changed, 1 insertion(+) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index 395dec6..15bbca7 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -284,6 +284,7 @@ function cachefile_write($url, $content){ // Pass on content, regardless of request method if ( isset($_SERVER['CONTENT_LENGTH'] ) && $_SERVER['CONTENT_LENGTH'] > 0 ) { + // PiBa-NL (possibly an issue with enctype="multipart/form-data" ??) curl_setopt( $ch, CURLOPT_POSTFIELDS, file_get_contents("php://input") ); } From 7bbba705fcf6cac837d632101ec0e73ef47320ce Mon Sep 17 00:00:00 2001 From: PiBa-NL Date: Sun, 27 Jan 2013 23:13:08 +0100 Subject: [PATCH 16/16] 1.9 - (1/27/2013) Combined improvements/functionality of several github forks into a single brance --- ba-simple-proxy.php | 1 + 1 file changed, 1 insertion(+) diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index e1bb865..d8b930f 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -23,6 +23,7 @@ // // About: Release History // +// 1.9 - (1/27/2013) Combined improvements/functionality of several github forks into a single brance // 1.8 - (2/03/2012) Add optional caching of proxied results by Stefan Hoth // 1.7 - (2/03/2012) Add optional whitelist-check by Stefan Hoth // 1.6 - (1/24/2009) Now defaults to JSON mode, which can now be changed to