diff --git a/ba-simple-proxy.php b/ba-simple-proxy.php index b1fa6e8..d8b930f 100644 --- a/ba-simple-proxy.php +++ b/ba-simple-proxy.php @@ -23,6 +23,9 @@ // // About: Release History // +// 1.9 - (1/27/2013) Combined improvements/functionality of several github forks into a single brance +// 1.8 - (2/03/2012) Add optional caching of proxied results by Stefan Hoth +// 1.7 - (2/03/2012) Add optional whitelist-check by Stefan Hoth // 1.6 - (1/24/2009) Now defaults to JSON mode, which can now be changed to // native mode by specifying ?mode=native. Native and JSONP modes are // disabled by default because of possible XSS vulnerability issues, but @@ -132,123 +135,397 @@ // ensure that it is valid. This setting only needs to be used if either // $enable_jsonp or $enable_native are enabled. Defaults to '/.*/' which // validates all URLs. +// $authz_header - an index into the $_SERVER array locating authorization +// data which is to be proxied in the HTTP Authorization header. This is +// necessary since, in a default deployment, Apache will not pass an +// incoming Authorization header to a script. As a convention, we pass +// authorization data to the proxy in the X-Authorization header, so the +// default value is 'HTTP_X_AUTHORIZATION' +// $cors_allow_origin - a space-separated list of origins, each of the form +// https://example.com:8443, from which scripts will be allowed to access +// the proxy. See http://www.w3.org/TR/cors/ for details. +// $cors_allow_methods - HTTP methods allowed from the origins specified in +// $cors_allow_origin. Defaults to 'GET, POST, PUT, PATCH, DELETE, HEAD' +// $cors_allow_headers - HTTP headers allowed from the origins specified in +// $cors_allow_origin. Defaults to 'X-Authorization, Content-Type' // // ############################################################################ -// Change these configuration options if needed, see above descriptions for info. -$enable_jsonp = false; -$enable_native = false; -$valid_url_regex = '/.*/'; +main(); +function main(){ + // Change these configuration options if needed, see above descriptions for info. + $enable_jsonp = false; + $enable_native = true; + +try { + $proxy = new SimpleProxy(); + $url = $proxy->getURL(); + $headersContent = $proxy->fetchURL($url); + if( sizeof($headersContent) !=2){ + throw new Exception("Error trying to get content: "+$url); + } + + if (isset($_GET['mode']) && $_GET['mode'] == 'json' ) { + if ( !$enable_jsonp ) { + $contents = 'ERROR: invalid mode (enable_jsonp)'; + $status = array( 'http_code' => 'ERROR' ); + echo $contents; + } else { + $jsoncontent = $proxy->outputContentJson($headersContent[0], $headersContent[1]); + echo ($jsoncontent); + } + } else { + if ( !$enable_native ) { + $contents = 'ERROR: invalid mode (enable_native)'; + $status = array( 'http_code' => 'ERROR' ); + echo $contents; + } else { + $proxy->outputHeaders($headersContent[0]); + $content = $proxy->filterContent($headersContent[1], $url); + $proxy->outputContent($content); + } + } + +} catch (Exception $e) { + header('HTTP/1.1 403 Forbidden'); + echo 'Caught exception: ', $e->getMessage(), "\n"; +} +} + +class proxyLocalCache { + //how long after a cache will be renewed + static protected $CACHE_TTL = 600;//10 mins + static protected $CACHE_DIR = '/tmp/.cache'; + +// ############################################################################ +// FUNCTIONS // ############################################################################ -$url = $_GET['url']; +/** + * checks or creates the cache dir + */ +static function prepare_cache(){ + return is_writable(self::$CACHE_DIR) || mkdir(self::$CACHE_DIR,0777,true); +} -if ( !$url ) { - - // Passed url not specified. - $contents = 'ERROR: url not specified'; - $status = array( 'http_code' => 'ERROR' ); - -} else if ( !preg_match( $valid_url_regex, $url ) ) { - - // Passed url doesn't match $valid_url_regex. - $contents = 'ERROR: invalid url'; - $status = array( 'http_code' => 'ERROR' ); - -} else { - $ch = curl_init( $url ); - - if ( strtolower($_SERVER['REQUEST_METHOD']) == 'post' ) { - curl_setopt( $ch, CURLOPT_POST, true ); - curl_setopt( $ch, CURLOPT_POSTFIELDS, $_POST ); - } - - if ( $_GET['send_cookies'] ) { - $cookie = array(); - foreach ( $_COOKIE as $key => $value ) { - $cookie[] = $key . '=' . $value; - } - if ( $_GET['send_session'] ) { - $cookie[] = SID; - } - $cookie = implode( '; ', $cookie ); - - curl_setopt( $ch, CURLOPT_COOKIE, $cookie ); +/** + * generates a cachefile name for a given url + */ +static function get_cachefile_name($url){ + return self::$CACHE_DIR.'/'.sha1($url); +} + +/** + * checks if a cache file exists and is not expired for a given url + */ +static function cachefile_exits($url){ + + if(! self::prepare_cache()){ + return false; } - - curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); - curl_setopt( $ch, CURLOPT_HEADER, true ); - curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); - - curl_setopt( $ch, CURLOPT_USERAGENT, $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); - - list( $header, $contents ) = preg_split( '/([\r\n][\r\n])\\1/', curl_exec( $ch ), 2 ); - - $status = curl_getinfo( $ch ); - - curl_close( $ch ); + + return is_readable( self::get_cachefile_name($url) ) && ! self::cachefile_is_too_old($url); +} + +/** + * returns if the modification time is older than the cache-time + */ +static function cachefile_is_too_old($url){ + return ( time() - filemtime( self::get_cachefile_name($url) )) >= self::$CACHE_TTL; } -// Split header text into an array. -$header_text = preg_split( '/[\r\n]+/', $header ); +/** + * checks if a cache file exists for a given url + */ +static function cachefile_read($url){ -if ( $_GET['mode'] == 'native' ) { - if ( !$enable_native ) { - $contents = 'ERROR: invalid mode'; - $status = array( 'http_code' => 'ERROR' ); + if(! self::prepare_cache()){ + return false; } - - // Propagate headers to response. - foreach ( $header_text as $header ) { - if ( preg_match( '/^(?:Content-Type|Content-Language|Set-Cookie):/i', $header ) ) { - header( $header ); - } + + return file_get_contents( self::get_cachefile_name($url) ); +} + +static function cachefile_write($url, $content){ + + if(! self::prepare_cache()){ + return false; } - - print $contents; - -} else { - - // $data will be serialized into JSON data. - $data = array(); - - // Propagate all HTTP headers into the JSON data object. - if ( $_GET['full_headers'] ) { - $data['headers'] = array(); + + return file_put_contents( self::get_cachefile_name($url), $content); +} +} +// ############################################################################ + +class SimpleProxy { + protected $valid_url_regex = '/.*/'; + + // CACHING + protected $enable_caching = false; + + // only domains listed in this array will be allowed to be proxied + protected $WHITELIST_DOMAINS = array('google.com','google.de', 'nu.nl', 'www.nu.nl'); + + protected $authz_header = 'HTTP_X_AUTHORIZATION'; + protected $cors_allow_origin = null; + protected $cors_allow_methods = 'GET, POST, PUT, PATCH, DELETE, HEAD'; + protected $cors_allow_headers = 'X-Authorization, Content-Type'; + + protected $proxy_url = "http://lol2/ba-simple-proxy.php"; + + function getURL(){ + global $valid_url_regex; + $url = $_GET['url']; + $needle = "url="; + $pos = strpos($_SERVER['REQUEST_URI'], $needle); + $url = substr($_SERVER['REQUEST_URI'], $pos+strlen($needle)); - foreach ( $header_text as $header ) { - preg_match( '/^(.+?):\s+(.*)$/', $header, $matches ); - if ( $matches ) { - $data['headers'][ $matches[1] ] = $matches[2]; - } + if ( !$url ) { + // Passed url not specified. + $contents = 'ERROR: url not specified'; + $status = array( 'http_code' => 'ERROR' ); + throw new Exception("URL not specified."); + } else if ( !preg_match( $this->valid_url_regex, $url ) ) { + // Passed url doesn't match $valid_url_regex. + $contents = 'ERROR: invalid url'; + $status = array( 'http_code' => 'ERROR' ); + throw new Exception("Invalid url."); + } elseif ( is_array($this->WHITELIST_DOMAINS) && ! empty($this->WHITELIST_DOMAINS) && + ! in_array( parse_url($url,PHP_URL_HOST), $this->WHITELIST_DOMAINS) ) { + $contents = 'ERROR: invalid url (not in whitelist)'; + $status = array( 'http_code' => 'ERROR' ); + throw new Exception("ERROR: invalid url (not in whitelist)."); } + $this->url = $url; + return $url; } - // Propagate all cURL request / response info to the JSON data object. - if ( $_GET['full_status'] ) { - $data['status'] = $status; - } else { - $data['status'] = array(); - $data['status']['http_code'] = $status['http_code']; + function fetchURL($url){ + if($this->enable_caching && proxyLocalCache::cachefile_exits($url)){ + $headerLines = array(); + $content = proxyLocalCache::cachefile_read($url); + return array($headerLines, $content); + }else{ + if ( isset( $cors_allow_origin ) ) { + header( 'Access-Control-Allow-Origin: '.$cors_allow_origin ); + if ( isset( $cors_allow_methods ) ) { + header( 'Access-Control-Allow-Methods: '.$cors_allow_methods ); + } + if ( isset( $cors_allow_headers ) ) { + header( 'Access-Control-Allow-Headers: '.strtolower($cors_allow_headers) ); + } + if ( $_SERVER['REQUEST_METHOD'] == 'OPTIONS' ) { + // We're done - don't proxy CORS OPTIONS request + exit(); + } + } + + $ch = curl_init( $url ); + + // Pass on request method, regardless of what it is + curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $_SERVER['REQUEST_METHOD'] ); + + // Pass on content, regardless of request method + if ( isset($_SERVER['CONTENT_LENGTH'] ) && $_SERVER['CONTENT_LENGTH'] > 0 ) { + // PiBa-NL (possibly an issue with enctype="multipart/form-data" ??) + die("CONTENT_LENGTH"); + curl_setopt( $ch, CURLOPT_POSTFIELDS, file_get_contents("php://input") ); + } + + if (isset($_GET['send_cookies']) && $_GET['send_cookies'] ) { + $cookie = array(); + foreach ( $_COOKIE as $key => $value ) { + $cookie[] = $key . '=' . $value; + } + if ( $_GET['send_session'] ) { + $cookie[] = SID; + } + $cookie = implode( '; ', $cookie ); + + curl_setopt( $ch, CURLOPT_COOKIE, $cookie ); + } + + $headers = array(); + if ( isset($authz_header) && isset($_SERVER[$authz_header]) ) { + // Set the Authorization header + array_push($headers, "Authorization: ".$_SERVER[$authz_header] ); + } + if ( isset($_SERVER['CONTENT_TYPE']) ) { + // Pass through the Content-Type header + array_push($headers, "Content-Type: ".$_SERVER['CONTENT_TYPE'] ); + } + if ( count($headers) > 0 ) { + curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers ); + } + + curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); + curl_setopt( $ch, CURLOPT_HEADER, true ); + curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); + + curl_setopt( $ch, CURLOPT_USERAGENT, isset($_GET['user_agent']) && $_GET['user_agent'] ? $_GET['user_agent'] : $_SERVER['HTTP_USER_AGENT'] ); + // in case you wish Not to confirm the CA for your server (e.g. it's inside your org) + // curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER , false); + + $response = curl_exec( $ch ); + if ($response === FALSE) { + die(curl_error($ch)); // in case we have errors - let's report them! + } + $this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); + $contentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); + $this->status = curl_getinfo( $ch ); + curl_close( $ch ); + + /*list( $header, $contents ) = preg_split( '/([\r\n][\r\n])\\1/', $response, 2 ); + if($header == 'HTTP/1.1 100 Continue') { + list($header, $contents) = preg_split( '/([\r\n][\r\n])\\1/', $contents, 2 ); + }*/ + + $_GET['url']=$this->url; + $semiColon = strpos($contentType, ";"); + + if($semiColon === false){ + $this->mimeType = $contentType; + } else { + $this->mimeType = substr($contentType, 0, $semiColon); + } + $headerStartPosition = 0; + $headerEndPosition = 0; + do{ + $headerStartPosition = $headerEndPosition; + $headerStartPosition = strpos($response, "HTTP/", $headerStartPosition); + $headerEndPosition = strpos($response, "\n\r", $headerStartPosition); + + $header = substr($response, $headerStartPosition, $headerEndPosition-$headerStartPosition); + // print $header; + }while(strpos($response, "HTTP/", $headerEndPosition)!==false); + // print ".".$header."."; + // Split header text into an array. + $headerLines = preg_split( '/[\r\n]+/', $header ); + $content = substr($response, $headerEndPosition+3); + + //die("hoi
$header


\r\n\r\n\r\n$content"); + + if($this->enable_caching){ + proxyLocalCache::cachefile_write($url, $content); + } + return array($headerLines, $content); + } + } + + function outputHeaders(array $headers){ + //filterHeaders($headers); + //print "

"; + foreach ( $headers as $h ) { + if ( preg_match( '/^(Content-Type|Content-Language|Set-Cookie):/i', $h ) ) { + header( $h ); + //print $h; + //print "
"; + } + } + } + function filterHeaders($headers){ + $matches = array(); + foreach($headers as $h){ + // grab http code + if(preg_match("/^HTTP\/[0-9]+[\.][0-9][\s]*([0-9]+)[\s]*(.*)$/i", $h, $matches)){ + switch($matches[1]){ + case 301: // Moved Permanently + } + } + } + } + + function filterContent($content, $url){ + if($this->contentType == "text/html") { + return; + } + + $linkMap = create_function ('$match',' + $link = $match[2]; + $parts = parse_url($_GET[\'url\']); + $baseURL = $parts[\'scheme\'].\'://\'.$parts[\'host\'].$parts[\'path\']; + if(substr($link, 0, 4)==="http"){ + return "href=\"$proxy_url?url=$link\""; + } + return "href=\"$proxy_url?url=$baseURL/$link\""; + + '); + + $imageMap = create_function ('$match',' + $link = $match[2]; + $parts = parse_url($_GET[\'url\']); + $baseURL = $parts[\'scheme\'].\'://\'.$parts[\'host\'].$parts[\'path\']; + if(substr($link, 0, 4)==="http"){ + return "href=\"$proxy_url?url=$link\""; + } if($link[0]=="/"){ + return $match[1]."$proxy_url?url=".$baseURL."/".$match[2].$match[3].$match[4]; + } else { + $parts = explode("/", $baseURL); + array_pop($parts); + $baseURL = implode($parts, "/"); + return $match[1]."$proxy_url?url=".$baseURL."/".$match[2].$match[3].$match[4]; + + } + + '); + $parts = parse_url($url); + $baseURL = $parts['scheme'].'://'.$parts['host']; + // change image links + $content = preg_replace_callback('/([< ]+src[\s]*=[\s]*"?)([http:\/\/])?([^ ">]+)/', $imageMap, $content); + // change background links + $content = preg_replace('/(background[\s]*=[\s]*"?)([http:\/\/])?([^ ]+)("?)/', "$1$proxy_url?url=$baseURL/$2$3$4", $content); + + + // change anchor links + $content = preg_replace_callback('/(href[\s]*="?)([^ >"]+)/i', $linkMap , $content); + //status; + } else { + $data['status'] = array(); + $data['status']['http_code'] = $this->status['http_code']; + } + + // Set the JSON data object contents, decoding it from JSON if possible. + $decoded_json = json_decode( $contents ); + $data['contents'] = $decoded_json ? $decoded_json : $contents; + + // Generate appropriate content-type header. + $is_xhr = strtolower($_SERVER['HTTP_X_REQUESTED_WITH']) == 'xmlhttprequest'; + header( 'Content-type: application/' . ( $is_xhr ? 'json' : 'javascript' ) ); + + // Get JSONP callback. + $jsonp_callback = $enable_jsonp && isset($_GET['callback']) ? $_GET['callback'] : null; + + // Generate JSON/JSONP string + $json = json_encode( $data ); + + return $jsonp_callback ? "$jsonp_callback($json)" : $json; + } +}//SimpleProxy ?>