From ae8ae0289b468a08ff9df7bf8689ec63b79c737b Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Sun, 13 Jul 2014 18:00:17 +0200 Subject: [PATCH 01/16] added config file. --- .gitignore | 2 ++ config.php | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 config.php diff --git a/.gitignore b/.gitignore index 485dee6..1f746d4 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ .idea +nextcheck.dat +config.php diff --git a/config.php b/config.php new file mode 100644 index 0000000..16ce7b1 --- /dev/null +++ b/config.php @@ -0,0 +1,8 @@ + 'localhost', + 'user' => '', + 'pass' => '', + 'db' => '', + 'api_key' => '' +); From 71e268bc4b24f83e3c2f2f66100623b0b49071e3 Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Sun, 13 Jul 2014 18:13:41 +0200 Subject: [PATCH 02/16] rewritten version of phpgsb. --- phpgsb.class.php | 2876 ++++++++++++++++++++++++---------------------- 1 file changed, 1472 insertions(+), 1404 deletions(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index 347026c..75c587b 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -1,1404 +1,1472 @@ -silent(); - $this->outputmsg("phpGSB Loaded"); - if($database&&$username) - $this->dbConnect($database,$username,$password,$host); - } - function close() - { - mysql_close(); - $this->outputmsg("Closing phpGSB. (Peak Memory: ".(round(memory_get_peak_usage()/1048576,3))."MB)"); - } - function silent() - { - $this->verbose = false; - } - function trans_disable() - { - $this->transenabled = false; - } - function trans_enable() - { - $this->transenabled = true; - } - function trans_begin() - { - if($this->transenabled) - { - $this->transtarted = true; - $this->outputmsg("Begin MySQL Transaction"); - mysql_query("BEGIN"); - } - } - function trans_commit() - { - if($this->transtarted&&mysql_ping()&&$this->transenabled) - { - $this->transtarted = false; - $this->outputmsg("Comitting Transaction"); - mysql_query("COMMIT"); - } - } - function trans_rollback() - { - if($this->transtarted&&mysql_ping()&&$this->transenabled) - { - $this->transtarted = false; - $this->outputmsg("Rolling Back Transaction"); - mysql_query("ROLLBACK"); - } - } - /*Function to output messages, used instead of echo, - will make it easier to have a verbose switch in later - releases*/ - function outputmsg($msg) - { - if($this->verbose) - { - echo $msg.'...
'; - $this->ob .= ob_get_contents(); - ob_flush(); - } - } - /*Function to output errors, used instead of echo, - will make it easier to have a verbose switch in later - releases*/ - function fatalerror($msg) - { - if($this->verbose) - { - print_r($msg); - echo '...
'; - $this->ob .= ob_get_contents(); - ob_end_flush(); - } - $this->trans_rollback(); - die(); - } - /*Wrapper to connect to database. Simples.*/ - function dbConnect($database,$username,$password,$host="localhost") - { - $link = mysql_connect($host, $username, $password); - if (!$link) { - $this->fatalerror('Could not connect: ' . mysql_error()); - } - $this->outputmsg('Connected successfully to database server'); - $db_selected = mysql_select_db($database, $link); - if (!$db_selected) { - $this->fatalerror('Can\'t use $database : ' . mysql_error()); - } - $this->outputmsg('Connected to database successfully'); - } - /*Simple logic function to calculate timeout - based on the number of previous errors*/ - function calc($errors) - { - //According to Developer Guide Formula - if($errors==1) - { - //According to Developer Guide (1st error, wait a minute) - return 60; - } - elseif($errors>5) - { - //According to Developer Guide (Above 5 errors check every 4 hours) - return 28800; - } - else - { - //According to Developer Guide we simply double up our timeout each time and use formula: - //(Adapted to be relative to errors) ( ((2^$errors) * 7.5) * (decimalrand(0,1) + 1)) to produce - // a result between: 120min-240min for example - return floor((pow(2,$errors) * 7.5) * ((rand(0,1000)/1000) + 1)); - } - } - /*Writes backoff timeouts, uses calc() to - calculate timeouts and then writes to file - for next check*/ - function Backoff($errdata=false,$type) - { - if($type=="data") - $file = 'nextcheck.dat'; - else - $file = 'nextcheckl.dat'; - $curstatus = explode('||',file_get_contents($this->pingfilepath.$file)); - $curstatus[1] = $curstatus[1] + 1; - $seconds = $this->calc($curstatus[1]); - $until = time()+$seconds.'||'.$curstatus[1]; - file_put_contents($this->pingfilepath.$file,$until); - $this->fatalerror(array("Invalid Response... Backing Off",$errdata)); - } - /*Writes timeout from valid requests to nextcheck file*/ - function setTimeout($seconds) - { - if (file_exists($this->pingfilepath.'nextcheck.dat')) { - $curstatus = explode('||',@file_get_contents($this->pingfilepath.'nextcheck.dat')); - $until = time()+$seconds.'||'.$curstatus[1]; - } else { - $until = time()+$seconds.'||'; - } - file_put_contents($this->pingfilepath.'nextcheck.dat',$until); - } - /*Checks timeout in timeout files (usually performed at the - start of script)*/ - function checkTimeout($type) - { - if($type=="data") - $file = 'nextcheck.dat'; - else - $file = 'nextcheckl.dat'; - $curstatus = explode('||',file_get_contents($this->pingfilepath.$file)); - if(time()<$curstatus[0]) - { - $this->fatalerror("Must wait another ".($curstatus[0]-time()). " seconds before another request"); - } - else - $this->outputmsg("Allowed to request"); - } - /*Function downloads from URL's, POST data can be - passed via $options. $followbackoff indicates - whether to follow backoff procedures or not*/ - function googleDownloader($url,$options,$followbackoff=false) - { - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_HEADER, 0); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - - if(is_array($options)) - curl_setopt_array($ch, $options); - - $data = curl_exec($ch); - $info = curl_getinfo($ch); - curl_close($ch); - if($followbackoff&&$info['http_code']>299) - { - $this->Backoff($info,$followbackoff); - } - return array($info,$data); - } - //UPDATER FUNCTIONS - /*Resets lists database, only called if GSB issues r:resetdatabase*/ - function resetDatabase() - { - //Lord knows why they would EVER issue this request! - if(!empty($this->adminemail)) - mail($this->adminemail,'Reset Database Request Issued','For some crazy unknown reason GSB requested a database reset at '.time()); - foreach($this->usinglists as $value) - { - mysql_query("TRUNCATE TABLE `$value-s-index`"); - mysql_query("TRUNCATE TABLE `$value-s-hosts`"); - mysql_query("TRUNCATE TABLE `$value-s-prefixes`"); - mysql_query("TRUNCATE TABLE `$value-a-index`"); - mysql_query("TRUNCATE TABLE `$value-a-hosts`"); - mysql_query("TRUNCATE TABLE `$value-a-prefixes`"); - } - } - /*Processes data recieved from a GSB data request into a managable array*/ - function processChunks($fulldata,$listname) - { - $subarray = array(); - $addarray = array(); - $loaddata = trim($fulldata); - $clonedata = $loaddata; - while(strlen($clonedata)>0) - { - $splithead = explode("\n",$clonedata,2); - $chunkinfo = explode(':',$splithead[0]); - $type = $chunkinfo[0]; - $chunknum = $chunkinfo[1]; - $hashlen = $chunkinfo[2]; - $chunklen = $chunkinfo[3]; - if($chunklen>0) - { - $tmparray = array(); - //Convert to hex for easy processing - //First get chunkdata according to length - $chunkdata = bin2hex(substr($splithead[1],0,$chunklen)); - if($type=='a') - { - $maini = 0; - while(strlen($chunkdata)>0) - { - $tmparray[$maini]['HOSTKEY'] = substr($chunkdata, 0, 8); - $tmparray[$maini]['COUNT'] = substr($chunkdata, 8, 2); - $chunkdata = substr($chunkdata,10); - $realcount = hexdec($tmparray[$maini]['COUNT']); - if($realcount>0) - { - for ($i = 0; $i < $realcount; $i++) { - $tmparray[$maini]['PAIRS'][$i]['PREFIX'] = substr($chunkdata, 0, ($hashlen*2)); - $chunkdata = substr($chunkdata,(($hashlen*2))); - } - } - elseif($realcount<0) - { - $this->fatalerror(array("Decoding Error, Somethings gone wrong!",$tmparray[$maini])); - } - $maini++; - } - $addarray['CHUNKNUM'] = $chunknum; - $addarray['HASHLEN'] = $hashlen; - $addarray['CHUNKLEN'] = $chunklen; - $addarray['REAL'] = $tmparray; - $this->saveChunkPart($addarray,"ADD",$listname); - unset($addarray); - } - elseif($type=='s') - { - $maini = 0; - while(strlen($chunkdata)>0) - { - $tmparray[$maini]['HOSTKEY'] = substr($chunkdata, 0, 8); - $tmparray[$maini]['COUNT'] = substr($chunkdata, 8, 2); - $chunkdata = substr($chunkdata,10); - $realcount = hexdec($tmparray[$maini]['COUNT']); - if($realcount>0) - { - for ($i = 0; $i < $realcount; $i++) { - $tmparray[$maini]['PAIRS'][$i]['ADDCHUNKNUM'] = substr($chunkdata, 0, 8); - $tmparray[$maini]['PAIRS'][$i]['PREFIX'] = substr($chunkdata, 8, ($hashlen*2)); - $chunkdata = substr($chunkdata,(($hashlen*2)+8)); - } - } - elseif($realcount==0) - { - $tmparray[$maini]['PAIRS'][0]['ADDCHUNKNUM'] = substr($chunkdata, 0, 8); - $chunkdata = substr($chunkdata, 8); - } - else - { - $this->fatalerror(array("Decoding Error, Somethings gone wrong!",$tmparray[$maini])); - } - $maini++; - } - $subarray['CHUNKNUM'] = $chunknum; - $subarray['HASHLEN'] = $hashlen; - $subarray['CHUNKLEN'] = $chunklen; - $subarray['REAL'] = $tmparray; - $this->saveChunkPart($subarray,"SUB",$listname); - unset($subarray); - } - else - { - $this->outputmsg("DISCARDED CHUNKNUM: $chunknum (Had no valid label)"); - } - } - else - { - //No ChunkData, Still Insert - if($type=='a') - { - $addarray['CHUNKNUM'] = $chunknum; - $addarray['HASHLEN'] = $hashlen; - $addarray['CHUNKLEN'] = $chunklen; - $this->saveChunkPart($addarray,"ADD",$listname); - unset($addarray); - } - elseif($type=='s') - { - $subarray['CHUNKNUM'] = $chunknum; - $subarray['HASHLEN'] = $hashlen; - $subarray['CHUNKLEN'] = $chunklen; - $this->saveChunkPart($subarray,"SUB",$listname); - unset($subarray); - } - else - { - $this->outputmsg("DISCARDED CHUNKNUM: $chunknum (Empty)"); - } - } - $clonedata = substr($splithead[1],$chunklen); - } - return true; - } - /*Saves processed data to the MySQL database*/ - function saveChunkPart($data,$type,$listname) - { - $listname = trim($listname); - //Check what type of data it is... - $buildindex = array(); - $buildhost = array(); - $buildpairs = array(); - if($type=="SUB") - { - $value = $data; - if(!isset($this->mainlist['s'][$listname][$value['CHUNKNUM']])) - { - $this->mainlist['s'][$listname][$value['CHUNKNUM']] = true; - $buildindex[] = "('{$value['CHUNKNUM']}','{$value['CHUNKLEN']}')"; - if($value['CHUNKLEN']>0) - { - foreach($value['REAL'] as $newkey=>$newvalue) - { - $buildhost[] = "('{$newvalue['HOSTKEY']}','{$value['CHUNKNUM']}','{$newvalue['COUNT']}','')"; - if(isset($newvalue['PAIRS'])&&count($newvalue['PAIRS'])>0) - { - foreach($newvalue['PAIRS'] as $innerkey=>$innervalue) - { - if( isset($innervalue['PREFIX']) ) { - $buildpairs[] = "('{$newvalue['HOSTKEY']}','{$innervalue['ADDCHUNKNUM']}','{$innervalue['PREFIX']}','')"; - } else { - $buildpairs[] = "('{$newvalue['HOSTKEY']}','{$innervalue['ADDCHUNKNUM']}','','')"; - } - } - } - } - } - } - } - else if($type=="ADD") - { - //Then lets insert add data - $value = $data; - if(!isset($this->mainlist['a'][$listname][$value['CHUNKNUM']])) - { - $this->mainlist['a'][$listname][$value['CHUNKNUM']] = true; - $buildindex[] = "('{$value['CHUNKNUM']}','{$value['CHUNKLEN']}')"; - if($value['CHUNKLEN']>0) - { - foreach($value['REAL'] as $newkey=>$newvalue) - { - $buildhost[] = "('{$newvalue['HOSTKEY']}','{$value['CHUNKNUM']}','{$newvalue['COUNT']}','')"; - if(isset($newvalue['PAIRS'])&&count($newvalue['PAIRS'])>0) - { - foreach($newvalue['PAIRS'] as $innerkey=>$innervalue) - { - if( isset($innervalue['PREFIX']) ) { - $buildpairs[] = "('{$newvalue['HOSTKEY']}','{$innervalue['PREFIX']}','')"; - } else { - $buildpairs[] = "('{$newvalue['HOSTKEY']}','','')"; - } - } - } - } - } - } - } - if(count($buildindex)>0) - { - if($type=="ADD") - $listtype = 'a'; - elseif($type=="SUB") - $listtype = 's'; - //Insert index value - $indexinsert = implode(', ',$buildindex); - $indexins = mysql_query("INSERT INTO `$listname-$listtype-index` (`ChunkNum`,`Chunklen`) VALUES $indexinsert;"); - $error = mysql_error(); - if($indexins) - { - if(count($buildhost)>0) - { - //Insert hostkeys index - $hostinsert = implode(', ',$buildhost); - mysql_query("INSERT INTO `$listname-$listtype-hosts` (`Hostkey`,`Chunknum`,`Count`,`FullHash`) VALUES $hostinsert;"); - $error = mysql_error(); - if(!empty($error)) - $this->outputmsg("INSERTED $listname $type HOST KEYS ".mysql_error()); - } - if(count($buildpairs)>0) - { - //Insert prefixes - $pairinsert = implode(', ',$buildpairs); - if($type=="ADD") - mysql_query("INSERT INTO `$listname-$listtype-prefixes` (`Hostkey`,`Prefix`,`FullHash`) VALUES $pairinsert;"); - elseif($type=="SUB") - mysql_query("INSERT INTO `$listname-$listtype-prefixes` (`Hostkey`,`AddChunkNum`,`Prefix`,`FullHash`) VALUES $pairinsert;"); - $error = mysql_error(); - if(!empty($error)) - $this->outputmsg("INSERTED $listname $type PREFIX HOST KEYS ".mysql_error()); - } - } - elseif(!empty($error)) - $this->outputmsg("COULD NOT SAVE $listname $type INDEXS ".mysql_error()); - } - } - /*Get ranges of existing chunks from a requested list - and type (add [a] or sub [s] return them and set - mainlist to recieved for that chunk (prevent dupes)*/ - function getRanges($listname,$mode) - { - $checktable = $listname.'-'.$mode.'-index'; - $results = mysql_query("SELECT ChunkNum FROM `$checktable` ORDER BY `ChunkNum` ASC"); - $ranges = array(); - $i = 0; - $start = 0; - while ($row = mysql_fetch_array($results, MYSQL_BOTH)) - { - $this->mainlist[$mode][$listname][$row['ChunkNum']] = true; - if($i==0) - { - $start = $row['ChunkNum']; - $previous = $row['ChunkNum']; - } - else - { - $expected = $previous + 1; - if($row['ChunkNum']!=$expected) - { - if($start==$previous) - $ranges[] = $start; - else - $ranges[] = $start.'-'.$previous; - $start = $row['ChunkNum']; - } - $previous = $row['ChunkNum']; - } - $i++; - } - if($start>0&&$previous>0) - { - if($start==$previous) - $ranges[] = $start; - else - $ranges[] = $start.'-'.$previous; - } - return $ranges; - } - /*Get both add and sub ranges for a requested list*/ - function getFullRanges($listname) - { - $subranges = $this->getRanges($listname,'s'); - $addranges = $this->getRanges($listname,'a'); - return array("Subranges"=>$subranges,"Addranges"=>$addranges); - } - /*Format a full request body for a desired list including - name and full ranges for add and sub*/ - function formattedRequest($listname) - { - $fullranges = $this->getFullRanges($listname); - $buildpart = ''; - if(count($fullranges['Subranges'])>0) - $buildpart .= 's:'.implode(',',$fullranges['Subranges']); - if(count($fullranges['Subranges'])>0&&count($fullranges['Addranges'])>0) - $buildpart .= ':'; - if(count($fullranges['Addranges'])>0) - $buildpart .= 'a:'.implode(',',$fullranges['Addranges']); - return $listname.';'.$buildpart."\n"; - } - /*Called when GSB returns a SUB-DEL or ADD-DEL response*/ - function deleteRange($range,$mode,$listname) - { - $buildtrunk = $listname.'-'.$mode; - if(substr_count($range,'-')>0) - { - $deleterange = explode('-',trim($range)); - $clause = "`ChunkNum` >= '{$deleterange[0]}' AND `ChunkNum` <= '{$deleterange[1]}'"; - } - else - $clause = "`ChunkNum` = '$range'"; - //Delete from index - mysql_query("DELETE FROM `$buildtrunk-index` WHERE $clause"); - - //Select all host keys that match chunks (we'll delete them after but we need the hostkeys list!) - $result = mysql_query("SELECT `Hostkey` FROM `$buildtrunk-hosts` WHERE $clause"); - $buildprefixdel = array(); - if($result&&mysql_num_rows($result)>0) - { - while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) - { - if(!empty($row['Hostkey'])) - $buildprefixdel[] = $row['Hostkey']; - } - if (count($buildprefixdel)) { - //Delete all matching hostkey prefixes - mysql_query( - "DELETE FROM `$buildtrunk-prefixes` WHERE `Hostkey` in ('" . implode('\',\'', $buildprefixdel) . "')" - ); - } - - //Delete all matching hostkeys - mysql_query("DELETE FROM `$buildtrunk-hosts` WHERE $clause"); - } - } - /*Main part of updater function, will call all other functions, merely requires - the request body, it will then process and save all data as well as checking - for ADD-DEL and SUB-DEL, runs silently so won't return anything on success*/ - function getData($body) - { - if(empty($body)) - $this->fatalerror("Missing a body for data request"); - $this->trans_begin(); - $buildopts = array(CURLOPT_POST=>true,CURLOPT_POSTFIELDS=>$body."\n"); - $result = $this->googleDownloader("http://safebrowsing.clients.google.com/safebrowsing/downloads?client=api&apikey=".$this->apikey."&appver=".$this->version."&pver=".$this->apiversion,$buildopts,"data"); - preg_match('/^n:(.*)$/m', $result[1], $match); - $timeout = $match[1]; - $this->setTimeout($timeout); - if(substr_count($result[1],'r:pleasereset')>0) - $this->resetDatabase(); - else - { - $formattedlist = array(); - if(substr_count($result[1],'i:')>0) - { - $splitlists = explode('i:',$result[1]); - unset($splitlists[0]); - foreach($splitlists as $key=>$value) - { - $listdata = explode("\n",trim($value)); - $listname = $listdata[0]; - unset($listdata[0]); - $formattedlist[$listname] = $listdata; - } - foreach($formattedlist as $key=>$value) - { - $listname = $key; - foreach($value as $keyinner=>$valueinner) - { - if(substr_count($valueinner,"u:")>0) - { - $chunkdata = $this->googleDownloader('http://'.trim(str_replace('u:','',$valueinner)),false,"data"); - $processed = $this->processChunks($chunkdata[1],$listname); - $this->outputmsg("Saved a chunk file"); - } - elseif(substr_count($valueinner,"ad:")>0) - { - if(substr_count($valueinner,',')>0) - { - $valueinner = explode(',',trim(str_replace("ad:","",$valueinner))); - foreach($valueinner as $keyadd=>$valueadd) - { - $this->deleteRange($valueadd,'a',$listname); - } - } - else - $this->deleteRange(trim(str_replace("ad:","",$valueinner)),'a',$listname); - } - elseif(substr_count($valueinner,"sd:")>0) - { - if(substr_count($valueinner,',')>0) - { - $valueinner = explode(',',trim(str_replace("sd:","",$valueinner))); - foreach($valueinner as $keyadd=>$valueadd) - { - $this->deleteRange($valueadd,'s',$listname); - } - } - else - $this->deleteRange(trim(str_replace("sd:","",$valueinner)),'s',$listname); - } - } - - } - } - else - { - $this->outputmsg('No data available in list'); - } - } - $this->trans_commit(); - return true; - } - /*Shortcut to run updater*/ - function runUpdate() - { - $this->checkTimeout('data'); - $require = ""; - foreach($this->usinglists as $value) - $require .= $this->formattedRequest($value); - $this->outputmsg("Using $require"); - $this->getData($require); - } - //LOOKUP FUNCTIONS - /*Used to check the canonicalize function*/ - function validateMethod() - { - //Input => Expected - $cases = array( - "http://host/%25%32%35" => "http://host/%25", - "http://host/%25%32%35%25%32%35" => "http://host/%25%25", - "http://host/%2525252525252525" => "http://host/%25", - "http://host/asdf%25%32%35asd" => "http://host/asdf%25asd", - "http://host/%%%25%32%35asd%%" => "http://host/%25%25%25asd%25%25", - "http://www.google.com/" => "http://www.google.com/", - "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/" => "http://168.188.99.26/.secure/www.ebay.com/", - "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/" => "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/", - "http://host%23.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B" => 'http://host%23.com/~a!b@c%23d$e%25f^00&11*22(33)44_55+', - "http://3279880203/blah" => "http://195.127.0.11/blah", - "http://www.google.com/blah/.." => "http://www.google.com/", - "www.google.com/" => "http://www.google.com/", - "www.google.com" => "http://www.google.com/", - "http://www.evil.com/blah#frag" => "http://www.evil.com/blah", - "http://www.GOOgle.com/" => "http://www.google.com/", - "http://www.google.com.../" => "http://www.google.com/", - "http://www.google.com/foo\tbar\rbaz\n2" => "http://www.google.com/foobarbaz2", - "http://www.google.com/q?" => "http://www.google.com/q?", - "http://www.google.com/q?r?" => "http://www.google.com/q?r?", - "http://www.google.com/q?r?s" => "http://www.google.com/q?r?s", - "http://evil.com/foo#bar#baz" => "http://evil.com/foo", - "http://evil.com/foo;" => "http://evil.com/foo;", - "http://evil.com/foo?bar;" => "http://evil.com/foo?bar;", - "http://\x01\x80.com/" => "http://%01%80.com/", - "http://notrailingslash.com" => "http://notrailingslash.com/", - "http://www.gotaport.com:1234/" => "http://www.gotaport.com:1234/", - " http://www.google.com/ " => "http://www.google.com/", - "http:// leadingspace.com/" => "http://%20leadingspace.com/", - "http://%20leadingspace.com/" => "http://%20leadingspace.com/", - "%20leadingspace.com/" => "http://%20leadingspace.com/", - "https://www.securesite.com/" => "https://www.securesite.com/", - "http://host.com/ab%23cd" => "http://host.com/ab%23cd", - "http://host.com//twoslashes?more//slashes" => "http://host.com/twoslashes?more//slashes" - ); - foreach($cases as $key=>$value) - { - $canit = $this->Canonicalize($key); - $canit = $canit['GSBURL']; - if($canit==$value) - outputmsg("PASSED: $key"); - else - outputmsg("INVALID:
ORIGINAL: $key
EXPECTED: $value
RECIEVED: $canit
"); - - } - } - /*Special thanks Steven Levithan (stevenlevithan.com) for the ridiculously complicated regex - required to parse urls. This is used over parse_url as it robustly provides access to - port, userinfo etc and handles mangled urls very well. - Expertly integrated into phpGSB by Sam Cleaver ;) - Thanks to mikegillis677 for finding the seg. fault issue in the old function. - Passed validateMethod() check on 17/01/12*/ - function j_parseUrl($url) - { - $strict = '/^(?:([^:\/?#]+):)?(?:\/\/\/?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?(((?:\/(\w:))?((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/'; - $loose = '/^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/\/?)?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?)(((?:\/(\w:))?(\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/'; - preg_match($loose, $url, $match); - if(empty($match)) - { - //As odd as its sounds, we'll fall back to strict (as technically its more correct and so may salvage completely mangled urls) - unset($match); - preg_match($strict, $url, $match); - } - $parts = array("source"=>'',"scheme"=>'',"authority"=>'',"userinfo"=>'',"user"=>'',"password"=>'',"host"=>'',"port"=>'',"relative"=>'',"path"=>'',"drive"=>'',"directory"=>'',"file"=>'',"query"=>'',"fragment"=>''); - switch (count ($match)) { - case 15: $parts['fragment'] = $match[14]; - case 14: $parts['query'] = $match[13]; - case 13: $parts['file'] = $match[12]; - case 12: $parts['directory'] = $match[11]; - case 11: $parts['drive'] = $match[10]; - case 10: $parts['path'] = $match[9]; - case 9: $parts['relative'] = $match[8]; - case 8: $parts['port'] = $match[7]; - case 7: $parts['host'] = $match[6]; - case 6: $parts['password'] = $match[5]; - case 5: $parts['user'] = $match[4]; - case 4: $parts['userinfo'] = $match[3]; - case 3: $parts['authority'] = $match[2]; - case 2: $parts['scheme'] = $match[1]; - case 1: $parts['source'] = $match[0]; - } - return $parts; - } - /*Regex to check if its a numerical IP address*/ - function is_ip($ip) - { - return preg_match("/^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])" . - "(\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}$/", $ip); - } - /*Checks if input is in hex format*/ - function is_hex($x) - { - //Relys on the fact that hex often includes letters meaning PHP will disregard the string - if(($x+3) == 3) - return dechex(hexdec($x)) == $x; - return false; - } - /*Checks if input is in octal format*/ - function is_octal($x) - { - //Relys on the fact that in IP addressing octals must begin with a 0 to denote octal - return substr($x,0,1) == 0; - } - /*Converts hex or octal input into decimal */ - function hexoct2dec($value) - { - //As this deals with parts in IP's we can be more exclusive - if(substr_count(substr($value,0,2),'0x')>0&&$this->is_hex($value)) - { - return hexdec($value); - } - elseif($this->is_octal($value)) - { - return octdec($value); - } - else - return false; - } - /*Converts IP address part in HEX to decimal*/ - function iphexdec($hex) - { - //Removes any leading 0x (used to denote hex) and then and leading 0's) - $temp = str_replace('0x','',$hex); - $temp = ltrim($temp,"0"); - return hexdec($temp); - } - /*Converts full IP address in HEX to decimal*/ - function hexIPtoIP($hex) - { - //Remove hex identifier and leading 0's (not significant) - $tempip = str_replace('0x','',$hex); - $tempip = ltrim($tempip,"0"); - //It might be hex - if($this->is_hex($tempip)) - { - //There may be a load of junk before the part we need - if(strlen($tempip)>8) - { - $tempip = substr($tempip,-8); - } - $hexplode = preg_split('//', $tempip, -1, PREG_SPLIT_NO_EMPTY); - while(count($hexplode)<8) - array_unshift($hexplode,0); - //Normalise - $newip = hexdec($hexplode[0].$hexplode[1]).'.'.hexdec($hexplode[2].$hexplode[3]).'.'.hexdec($hexplode[4].$hexplode[5]).'.'.hexdec($hexplode[6].$hexplode[7]); - //Now check if its an IP - if($this->is_ip($newip)) - return $newip; - else - return false; - } - else - return false; - } - /*Checks if an IP provided in either hex, octal or decimal is in fact - an IP address. Normalises to a four part IP address.*/ - function isValid_IP($ip) - { - //First do a simple check, if it passes this no more needs to be done - if($this->is_ip($ip)) - return $ip; - - //Its a toughy... eerm perhaps its all in hex? - $checkhex = $this->hexIPtoIP($ip); - if($checkhex) - return $checkhex; - - //If we're still here it wasn't hex... maybe a DWORD format? - $checkdword = $this->hexIPtoIP(dechex($ip)); - if($checkdword) - return $checkdword; - - //Nope... maybe in octal or a combination of standard, octal and hex?! - $ipcomponents = explode('.',$ip); - $ipcomponents[0] = $this->hexoct2dec($ipcomponents[0]); - if(count($ipcomponents)==2) - { - //The writers of the RFC docs certainly didn't think about the clients! This could be a DWORD mixed with an IP part - if($ipcomponents[0]<=255&&is_int($ipcomponents[0])&&is_int($ipcomponents[1])) - { - $threeparts = dechex($ipcomponents[1]); - $hexplode = preg_split('//', $threeparts, -1, PREG_SPLIT_NO_EMPTY); - if(count($hexplode)>4) - { - $newip = $ipcomponents[0].'.'.$this->iphexdec($hexplode[0].$hexplode[1]).'.'.$this->iphexdec($hexplode[2].$hexplode[3]).'.'.$this->iphexdec($hexplode[4].$hexplode[5]); - //Now check if its valid - if($this->is_ip($newip)) - return $newip; - } - } - } - $ipcomponents[1] = $this->hexoct2dec($ipcomponents[1]); - if(count($ipcomponents)==3) - { - //Guess what... it could also be a DWORD mixed with two IP parts! - if(($ipcomponents[0]<=255&&is_int($ipcomponents[0]))&&($ipcomponents[1]<=255&&is_int($ipcomponents[1]))&&is_int($ipcomponents[2])) - { - $twoparts = dechex($ipcomponents[2]); - $hexplode = preg_split('//', $twoparts, -1, PREG_SPLIT_NO_EMPTY); - if(count($hexplode)>3) - { - $newip = $ipcomponents[0].'.'.$ipcomponents[1].'.'.$this->iphexdec($hexplode[0].$hexplode[1]).'.'.$this->iphexdec($hexplode[2].$hexplode[3]); - //Now check if its valid - if($this->is_ip($newip)) - return $newip; - } - } - } - //If not it may be a combination of hex and octal - if(count($ipcomponents)>=4) - { - $tmpcomponents = array($ipcomponents[2],$ipcomponents[3]); - foreach($tmpcomponents as $key=>$value) - { - if(!$tmpcomponents[$key] = $this->hexoct2dec($value)) - return false; - } - - array_unshift($tmpcomponents,$ipcomponents[0],$ipcomponents[1]); - //Convert back to IP form - $newip = implode('.',$tmpcomponents); - - //Now check if its valid - if($this->is_ip($newip)) - return $newip; - } - - //Well its not an IP that we can recognise... theres only so much we can do! - return false; - } - /*Had to write another layer as built in PHP urlencode() escapes all non - alpha-numeric Google states to only urlencode if its below 32 or above - or equal to 127 (some of those are non alpha-numeric and so urlencode - on its own won't work).*/ - function flexURLEncode($url,$ignorehash=false) - { - //Had to write another layer as built in PHP urlencode() escapes all non alpha-numeric - //google states to only urlencode if its below 32 or above or equal to 127 (some of those - //are non alpha-numeric and so urlencode on its own won't work). - $urlchars = preg_split('//', $url, -1, PREG_SPLIT_NO_EMPTY); - if(count($urlchars)>0) - { - foreach($urlchars as $key=>$value) - { - - $ascii = ord($value); - if($ascii<=32||$ascii>=127||($value=='#'&&!$ignorehash)||$value=='%') - $urlchars[$key] = rawurlencode($value); - } - return implode('',$urlchars); - } - else - return $url; - } - /*Canonicalize a full URL according to Google's definition.*/ - function Canonicalize($url) - { - //Remove line feeds, return carriages, tabs, vertical tabs - $finalurl = trim(str_replace(array("\x09","\x0A","\x0D","\x0B"),'',$url)); - //URL Encode for easy extraction - $finalurl = $this->flexURLEncode($finalurl,true); - //Now extract hostname & path - $parts = $this->j_parseUrl($finalurl); - $hostname = $parts['host']; - $path = $parts['path']; - $query = $parts['query']; - $lasthost = ""; - $lastpath = ""; - $lastquery = ""; - //Remove all hex coding (loops max of 50 times to stop craziness but should never - //reach that) - for ($i = 0; $i < 50; $i++) { - $hostname = rawurldecode($hostname); - $path = rawurldecode($path); - $query = rawurldecode($query); - if($hostname==$lasthost&&$path==$lastpath&&$query==$lastquery) - break; - $lasthost = $hostname; - $lastpath = $path; - $lastquery = $query; - } - //Deal with hostname first - //Replace all leading and trailing dots - $hostname = trim($hostname,'.'); - //Replace all consecutive dots with one dot - $hostname = preg_replace("/\.{2,}/",".",$hostname); - //Make it lowercase - $hostname = strtolower($hostname); - //See if its a valid IP - $hostnameip = $this->isValid_IP($hostname); - if($hostnameip) - { - $usingip = true; - $usehost = $hostnameip; - } - else - { - $usingip = false; - $usehost = $hostname; - } - //The developer guide has lowercasing and validating IP other way round but its more efficient to - //have it this way - //Now we move onto canonicalizing the path - $pathparts = explode('/',$path); - foreach($pathparts as $key=>$value) - { - if($value=="..") - { - if($key!=0) - { - unset($pathparts[$key-1]); - unset($pathparts[$key]); - } - else - unset($pathparts[$key]); - } - elseif($value=="."||empty($value)) - unset($pathparts[$key]); - } - if(substr($path,-1,1)=="/") - $append = "/"; - else - $append = false; - $path = "/".implode("/",$pathparts); - if($append&&substr($path,-1,1)!="/") - $path .= $append; - $usehost = $this->flexURLEncode($usehost); - $path = $this->flexURLEncode($path); - $query = $this->flexURLEncode($query); - if(empty($parts['scheme'])) - $parts['scheme'] = 'http'; - $canurl = $parts['scheme'].'://'; - $realurl = $canurl; - if(!empty($parts['userinfo'])) - $realurl .= $parts['userinfo'].'@'; - $canurl .= $usehost; - $realurl .= $usehost; - if(!empty($parts['port'])) - { - $canurl .= ':'.$parts['port']; - $realurl .= ':'.$parts['port']; - } - $canurl .= $path; - $realurl .= $path; - if(substr_count($finalurl,"?")>0) - { - $canurl .= '?'.$parts['query']; - $realurl .= '?'.$parts['query']; - } - if(!empty($parts['fragment'])) - $realurl .= '#'.$parts['fragment']; - return array("GSBURL"=>$canurl,"CleanURL"=>$realurl,"Parts"=>array("Host"=>$usehost,"Path"=>$path,"Query"=>$query,"IP"=>$usingip)); - } - /*SHA-256 input (short method).*/ - function sha256($data) - { - return hash('sha256',$data); - } - /*Make Hostkeys for use in a lookup*/ - function makeHostKey($host,$usingip) - - { - if($usingip) - $hosts = array($host."/"); - - else - { - $hostparts = explode(".",$host); - if(count($hostparts)>2) - { - $backhostparts = array_reverse($hostparts); - $threeparts = array_slice($backhostparts,0,3); - $twoparts = array_slice($threeparts,0,2); - $hosts = array(implode('.',array_reverse($threeparts))."/",implode('.',array_reverse($twoparts))."/"); - } - else - $hosts = array($host."/"); - } - //Now make key & key prefix - $returnhosts = array(); - foreach($hosts as $value) - { - $fullhash = $this->sha256($value); - $returnhosts[$fullhash] = array("Host"=>$value,"Prefix"=>substr($fullhash,0,8),"Hash"=>$fullhash); - } - return $returnhosts; - } - /*Hash up a list of values from makePrefixes() (will possibly be - combined into that function at a later date*/ - function makeHashes($prefixarray) - { - if(count($prefixarray)>0) - { - $returnprefixes = array(); - foreach($prefixarray as $value) - { - $fullhash = $this->sha256($value); - $returnprefixes[$fullhash] = array("Original"=>$value,"Prefix"=>substr($fullhash,0,8),"Hash"=>$fullhash); - } - return $returnprefixes; - } - else - return false; - } - /*Make URL prefixes for use after a hostkey check*/ - function makePrefixes($host,$path,$query,$usingip) - { - $prefixes = array(); - //Exact hostname in the url - $hostcombos = array(); - $hostcombos[] = $host; - if(!$usingip) - { - $hostparts = explode('.',$host); - $backhostparts = array_reverse($hostparts); - if(count($backhostparts)>5) - $maxslice = 5; - else - $maxslice = count($backhostparts); - $topslice = array_slice($backhostparts,0,$maxslice); - while($maxslice>1) - { - $hostcombos[] = implode('.',array_reverse($topslice)); - $maxslice--; - $topslice = array_slice($backhostparts,0,$maxslice); - } - } - else - $hostcombos[] = $host; - $hostcombos = array_unique($hostcombos); - $variations = array(); - if(!empty($path)) - { - $pathparts = explode("/",$path); - if(count($pathparts)>4) - $upperlimit = 4; - else - $upperlimit = count($pathparts); - } - foreach($hostcombos as $key=>$value) - { - if(!empty($query)) - $variations[] = $value.$path.'?'.$query; - $variations[] = $value.$path; - if(!empty($path)) - { - $i = 0; - $pathiparts = ""; - while($i<$upperlimit) - { - if($i!=count($pathparts)-1) - $pathiparts = $pathiparts.$pathparts[$i]."/"; - else - $pathiparts = $pathiparts.$pathparts[$i]; - $variations[] = $value.$pathiparts; - $i++; - } - } - } - $variations = array_unique($variations); - return $this->makeHashes($variations); - } - /*Process data provided from the response of a full-hash GSB - request*/ - function processFullLookup($data) - { - $clonedata = $data; - $extracthash = array(); - while(strlen($clonedata)>0) - { - $splithead = explode("\n",$clonedata,2); - $chunkinfo = explode(':',$splithead[0]); - $listname = $chunkinfo[0]; - $addchunk = $chunkinfo[1]; - $chunklen = $chunkinfo[2]; - $chunkdata = bin2hex(substr($splithead[1],0,$chunklen)); - while(strlen($chunkdata)>0) - { - $extracthash[$listname][$addchunk] = substr($chunkdata,0,64); - $chunkdata = substr($chunkdata,64); - } - $clonedata = substr($splithead[1],$chunklen); - } - return $extracthash; - } - /*Add a full-hash key to a prefix or hostkey (the variable is $prefix but it could - be either).*/ - function addFullHash($prefix,$chunknum,$fullhash,$listname) - { - $buildtrunk = $listname."-a"; - //First check hosts - $result = mysql_query("SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '$prefix' AND `Chunknum` = '$chunknum'"); - if($result&&mysql_num_rows($result)>0) - { - while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) - { - if(empty($row['FullHash'])) - { - //We've got a live one! Insert the full hash for it - $addresult = mysql_query("UPDATE `$buildtrunk-hosts` SET `FullHash` = '$fullhash' WHERE `ID` = '{$row['ID']}';"); - if(!$addresult) - $this->fatalerror("Could not cache full-hash key. $prefix, $chunknum, $fullhash, $listname"); - } - } - } - else - { - //If there are no rows it must be a prefix - $result = mysql_query("SELECT * FROM `$buildtrunk-prefixes` WHERE `Prefix` = '$prefix'"); - while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) - { - if(empty($row['FullHash'])) - { - $resulttwo = mysql_query("SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '{$row['Hostkey']}' AND `Chunknum` = '$chunknum'"); - while ($rowtwo = mysql_fetch_array($resulttwo, MYSQL_ASSOC)) - { - if(hexdec($rowtwo['Count'])>0) - { - $addresult = mysql_query("UPDATE `$buildtrunk-prefixes` SET `FullHash` = '$fullhash' WHERE `ID` = '{$row['ID']}';"); - if(!$addresult) - $this->fatalerror("Could not cache full-hash key. $prefix, $chunknum, $fullhash, $listname"); - } - } - } - } - } - - } - /*Check database for any cached full-length hashes for a given prefix.*/ - function cacheCheck($prefix) - { - foreach($this->usinglists as $value) - { - $buildtrunk = $value."-a"; - $result = mysql_query("SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '$prefix' AND `FullHash` != ''"); - if($result&&mysql_num_rows($result)>0) - { - while($row = mysql_fetch_array($result, MYSQL_ASSOC)) - { - return array($row['FullHash'],$row['Chunknum']); - } - } - else - { - $result = mysql_query("SELECT * FROM `$buildtrunk-prefixes` WHERE `Prefix` = '$prefix' AND `FullHash` != ''"); - if($result&&mysql_num_rows($result)>0) - { - while($row = mysql_fetch_array($result, MYSQL_ASSOC)) - { - $resulttwo = mysql_query("SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '{$row['Hostkey']}'"); - while ($rowtwo = mysql_fetch_array($resulttwo, MYSQL_ASSOC)) - { - if(hexdec($rowtwo['Count'])>0) - { - return array($row['FullHash'],$rowtwo['Chunknum']); - } - - } - } - } - } - } - return false; - } - /*Do a full-hash lookup based on prefixes provided, returns (bool) true - on a match and (bool) false on no match.*/ - function doFullLookup($prefixes,$originals) - { - //Store copy of original prefixes - $cloneprefixes = $prefixes; - //They should really all have the same prefix size.. we'll just check one - $prefixsize = strlen($prefixes[0][0])/2; - $length = count($prefixes)*$prefixsize; - foreach($prefixes as $key=>$value) - { - //Check cache on each iteration (we can return true earlier if we get a match!) - $cachechk = $this->cacheCheck($value[0]); - if($cachechk) - { - if(isset($originals[$cachechk[0]])) - { - //Check from same chunk - foreach($cloneprefixes as $nnewvalue) - { - if($nnewvalue[1]==$cachechk[1]&&$value[0]==$originals[$cachechk[0]]['Prefix']) - { - //From same chunks - return true; - } - - } - } - } - $prefixes[$key] = pack("H*",$value[0]); - } - //No cache matches so we continue with request - $body = "$prefixsize:$length\n".implode("",$prefixes); - - $buildopts = array(CURLOPT_POST=>true,CURLOPT_POSTFIELDS=>$body); - $result = $this->googleDownloader("http://safebrowsing.clients.google.com/safebrowsing/gethash?client=api&apikey=".$this->apikey."&appver=".$this->version."&pver=".$this->apiversion,$buildopts,"lookup"); - - if($result[0]['http_code']==200&&!empty($result[1])) - { - //Extract hashes from response - $extractedhashes = $this->processFullLookup($result[1]); - //Loop over each list - foreach($extractedhashes as $key=>$value) - { - //Loop over each value in each list - foreach($value as $newkey=>$newvalue) - { - if(isset($originals[$newvalue])) - { - //Okay it matches a full-hash we have, now to check they're from the same chunks - foreach($cloneprefixes as $nnewvalue) - { - if($nnewvalue[1]==$newkey&&$nnewvalue[0]==$originals[$newvalue]['Prefix']) - { - //From same chunks - //Add full hash to database (cache) - $this->addFullHash($nnewvalue[0],$nnewvalue[1],$newvalue,$key); - return true; - } - - } - } - } - } - return false; - } - elseif($result[0]['http_code']==204&&strlen($result[1])==0) - { - //204 Means no match - return false; - } - else - { - //"No No No! This just doesn't add up at all!" - $this->fatalerror("ERROR: Invalid response returned from GSB ({$result[0]['http_code']})"); - } - } - /*Checks to see if a match for a prefix is found in the sub table, if it is then we won't do - a full-hash lookup. Return true on match in sub list, return false on negative.*/ - function subCheck($listname,$prefixlist,$mode) - { - $buildtrunk = $listname.'-s'; - if($mode=="prefix") - { - //Mode is prefix so the add part was a prefix, not a hostkey so we just check prefixes (saves a lookup) - foreach($prefixlist as $value) - { - $result = mysql_query("SELECT * FROM `$buildtrunk-prefixes` WHERE `Prefix` = '{$value[0]}'"); - if($result&&mysql_num_rows($result)>0) - { - //As interpreted from Developer Guide if theres a match in sub list it cancels out the add listing - //we'll double check its from the same chunk just to be pedantic - while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) - { - if(hexdec($row['AddChunkNum'])==$value[1]) - return true; - } - } - - } - return false; - } - elseif($mode=="hostkey") - { - //Mode is hostkey - foreach($prefixlist as $value) - { - $result = mysql_query("SELECT * FROM `$buildtrunk-prefixes` WHERE `Hostkey` = '{$value[0]}'"); - if($result&&mysql_num_rows($result)>0) - { - //As interpreted from Developer Guide if theres a match in sub list it cancels out the add listing - //we'll double check its from the same chunk just to be pedantic - while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) - { - if(hexdec($row['AddChunkNum'])==$value[1]&&empty($row['Prefix'])) - return true; - } - } - - } - return false; - } - $this->fatalerror("Invalid SubCheck Mode $mode"); - } - /*Does a full URL lookup on given lists, will check if its in database, if slight match there then - will do a full-hash lookup on GSB, returns (bool) true on match and (bool) false on negative.*/ - function doLookup($url) - { - $lists = $this->usinglists; - //First canonicalize the URL - $canurl = $this->Canonicalize($url); - //Make hostkeys - $hostkeys = $this->makeHostKey($canurl['Parts']['Host'],$canurl['Parts']['IP']); - $matches = array(); - foreach($lists as $key=>$value) - { - $buildtrunk = $value.'-a'; - //Loop over each list - foreach($hostkeys as $keyinner=>$valueinner) - { - //Within each list loop over each hostkey - $result = mysql_query("SELECT * FROM `$buildtrunk-hosts` WHERE `Hostkey` = '{$valueinner['Prefix']}'"); - if($result&&mysql_num_rows($result)>0) - { - //For each hostkey match - while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) - { - $nicecount = hexdec($row['Count']); - if($nicecount>0) - { - //There was a match and the count is more than one so there are prefixes! - //Hash up a load of prefixes and create the build query if we haven't done so already - if(!isset($prefixes)) - { - $prefixes = $this->makePrefixes($canurl['Parts']['Host'],$canurl['Parts']['Path'],$canurl['Parts']['Query'],$canurl['Parts']['IP']); - $buildprequery = array(); - foreach($prefixes as $prefix) - { - $buildprequery[] = " `Prefix` = '{$prefix['Prefix']}' "; - } - $buildprequery = implode("OR",$buildprequery); - } - //Check if there are any matching prefixes - $resulttwo = mysql_query("SELECT * FROM `$buildtrunk-prefixes` WHERE ($buildprequery) AND `Hostkey` = '{$row['Hostkey']}'"); - if($resulttwo&&mysql_num_rows($resulttwo)>0) - { - //We found prefix matches - $prematches = array(); - $prelookup = array(); - while ($rowtwo = mysql_fetch_array($resulttwo, MYSQL_ASSOC)) - { - $prematches[] = array($rowtwo['Prefix'],$row['Chunknum']); - } - //Before we send off any requests first check whether its in sub table - $subchk = $this->subCheck($value,$prematches,"prefix"); - if(!$subchk) - { - //Send off any matching prefixes to do some full-hash key checks - $flookup = $this->doFullLookup($prematches,$prefixes); - if($flookup) - return true; - } - } - //If we didn't find matches then do nothing (keep looping till end and it'll return negative) - } - else - { - $subchk = $this->subCheck($value,array(array($row['Hostkey'],$row['Chunknum'])),"hostkey"); - if(!$subchk) - { - //There was a match but the count was 0 that entire domain could be a match, Send off to check - $flookup = $this->doFullLookup(array(array($row['Hostkey'],$row['Chunknum'])),$hostkeys); - if($flookup) - return true; - } - } - } - } - } - } - return false; - - } - } -?> \ No newline at end of file +dbConnect($database, $username, $password, $host); + } + } + + public function __destruct() { + $this->close(); + } + + private function close() { + $this->outputmsg("Closing phpGSB. (Peak Memory: " . (round(memory_get_peak_usage() / 1048576, 3)) . "MB)"); + } + + public function silent() { + $this->verbose = false; + } + + public function trans_disable() { + $this->transenabled = false; + } + + public function trans_enable() { + $this->transenabled = true; + } + + private function trans_begin() { + if ($this->transenabled) { + $this->transtarted = true; + $this->outputmsg("Begin MySQL Transaction"); + $this->db->query('START TRANSACTION;'); + } + } + + private function trans_commit() { + if ($this->transtarted && $this->transenabled) { + $this->transtarted = false; + $this->outputmsg("Comitting Transaction"); + $this->db->query('COMMIT;'); + } + } + + private function trans_rollback() { + if ($this->transtarted && $this->transenabled) { + $this->transtarted = false; + $this->outputmsg("Rolling Back Transaction"); + $this->db->query('ROLLBACK;'); + } + } + + /*Function to output messages, used instead of echo, + will make it easier to have a verbose switch in later + releases*/ + private function outputmsg($msg) { + if ($this->verbose) { + echo $msg . "\n"; + } + } + + /*Function to output errors, used instead of echo, + will make it easier to have a verbose switch in later + releases*/ + private function fatalerror($msg) { + if ($this->verbose) { + print_r($msg); + echo "\n"; + } + $this->trans_rollback(); + die(); + } + + /*Wrapper to connect to database. Simples.*/ + private function dbConnect($database, $username, $password, $host = "localhost") { + $this->db = new PDO('mysql:host=' . $host . ';dbname=' . $database, + $username, + $password + ); + + $this->db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); + } + + /** + * Simple logic function to calculate timeout based on the number of previous errors + */ + private function calc($errors) { + //According to Developer Guide Formula + if ($errors == 1) { + //According to Developer Guide (1st error, wait a minute) + return 60; + } elseif ($errors > 5) { + //According to Developer Guide (Above 5 errors check every 4 hours) + return 28800; + } else { + //According to Developer Guide we simply double up our timeout each + // time and use formula: + //(Adapted to be relative to errors) ( ((2^$errors) * 7.5) * + // (decimalrand(0,1) + 1)) to produce + // a result between: 120min-240min for example + return floor((pow(2, $errors) * 7.5) * ((rand(0, 1000) / 1000) + 1)); + } + } + + /** + * Writes backoff timeouts, uses calc() to calculate timeouts and then writes to file + * for next check + */ + private function Backoff($errdata = false, $type) { + if ($type == "data") + $file = 'nextcheck.dat'; + else + $file = 'nextcheckl.dat'; + $curstatus = explode('||', file_get_contents($this->pingfilepath . $file)); + $curstatus[1] = $curstatus[1] + 1; + $seconds = $this->calc($curstatus[1]); + $until = time() + $seconds . '||' . $curstatus[1]; + file_put_contents($this->pingfilepath . $file, $until); + $this->fatalerror(array( + "Invalid Response... Backing Off", + $errdata + )); + } + + /** + * Writes timeout from valid requests to nextcheck file + */ + private function setTimeout($seconds) { + if (file_exists($this->pingfilepath . 'nextcheck.dat')) { + $curstatus = explode('||', @file_get_contents($this->pingfilepath . 'nextcheck.dat')); + $until = time() + $seconds . '||' . $curstatus[1]; + } else { + $until = time() + $seconds . '||'; + } + file_put_contents($this->pingfilepath . 'nextcheck.dat', $until); + } + + /** + * Checks timeout in timeout files (usually performed at the + * start of script) + */ + private function checkTimeout($type) { + if ($type == "data") + $file = 'nextcheck.dat'; + else + $file = 'nextcheckl.dat'; + $curstatus = explode('||', file_get_contents($this->pingfilepath . $file)); + if (time() < $curstatus[0]) { + $this->fatalerror("Must wait another " . ($curstatus[0] - time()) . " seconds before another request"); + } else + $this->outputmsg("Allowed to request"); + } + + /** + * Function downloads from URL's, POST data can be + * passed via $options. $followbackoff indicates + * whether to follow backoff procedures or not + */ + private function googleDownloader($url, $options, $followbackoff = false) { + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + + if (is_array($options)) + curl_setopt_array($ch, $options); + + $data = curl_exec($ch); + $info = curl_getinfo($ch); + curl_close($ch); + + if ($followbackoff && $info['http_code'] > 299) { + $this->Backoff($info, $followbackoff); + } + return array( + $info, + $data + ); + } + + //UPDATER FUNCTIONS + + /** + * Resets lists database, only called if GSB issues r:resetdatabase + */ + private function resetDatabase() { + //Lord knows why they would EVER issue this request! + if (!empty($this->adminemail)) + mail($this->adminemail, 'Reset Database Request Issued', 'For some crazy unknown reason GSB requested a database reset at ' . time()); + foreach ($this->usinglists as $value) { + $this->query("TRUNCATE TABLE `$value-s-index`"); + $this->query("TRUNCATE TABLE `$value-s-hosts`"); + $this->query("TRUNCATE TABLE `$value-s-prefixes`"); + $this->query("TRUNCATE TABLE `$value-a-index`"); + $this->query("TRUNCATE TABLE `$value-a-hosts`"); + $this->query("TRUNCATE TABLE `$value-a-prefixes`"); + } + } + + /** + * Processes data recieved from a GSB data request into a managable array + */ + private function processChunks($data, $listname) { + $len = strlen($data); + var_dump($len); + $offset = $z = 0; + while ($offset < $len) { + $x = strpos($data, ':', $offset); + $type = substr($data, $offset, $x-$offset); + + $offset = $x+1; + $x = strpos($data, ':', $offset); + $chunknum = substr($data, $offset, $x-$offset); + $offset = $x+1; + if (!is_numeric($chunknum)) { + $this->fatalerror(array( + "Decoding Error, chunknum is not numeric!", + $chunknum + )); + } + + $x = strpos($data, ':', $offset); + $hashlen = substr($data, $offset, $x-$offset); + $offset = $x+1; + if (!is_numeric($hashlen)) { + $this->fatalerror(array( + "Decoding Error, hashlen is not numeric!", + $hashlen + )); + } + $x = strpos($data, "\n", $offset); + $chunklen = substr($data, $offset, $x-$offset); + $offset = $x+1; + $chunkdata = NULL; + if (!is_numeric($chunklen)) { + $this->fatalerror(array( + "Decoding Error, chunklen is not numeric!", + $chunklen + )); + } + if ($chunklen > 0) { + $chunkdata = bin2hex(substr($data, $offset, $chunklen)); + $offset += $chunklen; + } + + if ($type != 'a' && $type != 's') { + $this->outputmsg("DISCARDED CHUNKNUM: $chunknum (Had no valid label)"); + continue; + } + + $dataArr = array( + 'chunknum' => $chunknum, + 'hashlen' => $hashlen, + 'chunklen' => $chunklen, + 'real' => array() + ); + + $chunkOffset = 0; + while ($chunkOffset < $chunklen) { + $row = array( + 'hostkey' => substr($chunkdata, $chunkOffset, 8), + 'count' => hexdec(substr($chunkdata, $chunkOffset+8, 2)), + 'pairs' => array() + ); + + $chunkOffset += 10; + if ($row['count'] > 0) { + for ($i = 0; $i < $row['count']; $i++) { + $pair = array(); + if ($type == 's') { + $pair['addchunknum'] = substr($chunkdata, $chunkOffset, 8); + $chunkOffset += 8; + } + $pair['prefix'] = substr($chunkdata, $chunkOffset, ($hashlen * 2)); + $chunkOffset += ($hashlen * 2); + $row['pairs'][] = $pair; + } + } elseif ($row['count'] == 0 && $type == 's') { + $row['pairs'][] = array( + 'addchunknum' => substr($chunkdata, $chunkOffset, 8) + ); + $chunkOffset += 8; + } elseif ($row['count'] < 0) { + $this->fatalerror(array( + "Decoding Error, Somethings gone wrong!", + array($row, $type) + )); + } + $dataArr['real'][] = $row; + } + $this->saveChunkPart($dataArr, ($type == 's' ? 'SUB' : "ADD"), $listname); + unset($dataArr); + $z++; + } + return true; + } + + /** + * Saves processed data to the MySQL database + */ + private function saveChunkPart($data, $type, $listname) { + $buildindex = array(); + $buildindexValues = array(); + $buildhost = array(); + $buildhostValues = array(); + $buildpairs = array(); + $buildpairsValues = array(); + + //Check what type of data it is... + if ($type == "SUB") { + $listtype = 's'; + } elseif ($type == "ADD") { + $listtype = 'a'; + } else { + $this->fatalerror(array( + "Invalid type given!", + $type + )); + } + + if (!isset($this->mainlist[$listtype][$listname][$data['chunknum']])) { + $this->mainlist[$listtype][$listname][$data['chunknum']] = true; + $buildindex[] = "(?, ?)"; + $buildindexValues[] = $data['chunknum']; + $buildindexValues[] = $data['chunklen']; + + foreach ($data['real'] as $newkey => $newvalue) { + $buildhost[] = "(?, ?, ?, '')"; + $buildhostValues[] = $newvalue['hostkey']; + $buildhostValues[] = $data['chunknum']; + $buildhostValues[] = $newvalue['count']; + foreach ($newvalue['pairs'] as $innerkey => $innervalue) { + $buildpairs[] = "(?, " . ($type == 'SUB' ? '?, ' : '') . "?, '')"; + $buildpairsValues[] = $newvalue['hostkey']; + if ($type == 'SUB') { + $buildpairsValues[] = $innervalue['addchunknum']; + } + + $buildpairsValues[] = (isset($innervalue['prefix']) ? $innervalue['prefix'] : ''); + } + } + } + + + if (!empty($buildindex)) { + //Insert index value + $this->query('INSERT IGNORE INTO `' . $listname . '-' . $listtype. '-index` (`chunk_num`,`chunk_len`) VALUES ' . implode(',', $buildindex), $buildindexValues); + } + + if (!empty($buildhost)) { + //Insert index value + $this->query('INSERT IGNORE INTO `' . $listname . '-' . $listtype. '-hosts` (`hostkey`,`chunk_num`,`count`,`fullhash`) VALUES ' . implode(',', $buildhost), $buildhostValues); + } + + if (!empty($buildpairs)) { + //Insert index value + $this->query('INSERT IGNORE INTO `' . $listname . '-' . $listtype. '-prefixes` (`hostkey`, ' . + ($type == 'SUB' ? '`add_chunk_num`, ' : '') . '`prefix`,`fullhash`) VALUES ' . + implode(',', $buildpairs), $buildpairsValues); + } + } + + /** + * Get ranges of existing chunks from a requested list + * and type (add [a] or sub [s] return them and set + * mainlist to recieved for that chunk (prevent dupes) + */ + private function getRanges($listname, $mode) { + $checktable = $listname . '-' . $mode . '-index'; + + $ranges = array(); + $i = 0; + $start = 0; + $stm = $this->query('SELECT chunk_num FROM `' . $checktable . '` ORDER BY `chunk_num` ASC'); + while ($row = $stm->fetch(\PDO::FETCH_ASSOC)) { + $this->mainlist[$mode][$listname][$row['chunk_num']] = true; + if ($i == 0) { + $start = $row['chunk_num']; + $previous = $row['chunk_num']; + } else { + $expected = $previous + 1; + if ($row['chunk_num'] != $expected) { + if ($start == $previous) { + $ranges[] = $start; + } else { + $ranges[] = $start . '-' . $previous; + } + $start = $row['chunk_num']; + } + $previous = $row['chunk_num']; + } + $i++; + } + + if ($start > 0 && $previous > 0) { + if ($start == $previous) { + $ranges[] = $start; + } else { + $ranges[] = $start . '-' . $previous; + } + } + return $ranges; + } + + /** + * Get both add and sub ranges for a requested list + */ + private function getFullRanges($listname) { + $subranges = $this->getRanges($listname, 's'); + $addranges = $this->getRanges($listname, 'a'); + return array( + "Subranges" => $subranges, + "Addranges" => $addranges + ); + } + + /** + * Format a full request body for a desired list including + * name and full ranges for add and sub + */ + private function formattedRequest($listname) { + $fullranges = $this->getFullRanges($listname); + $buildpart = ''; + + if (count($fullranges['Subranges']) > 0) { + $buildpart .= 's:' . implode(',', $fullranges['Subranges']); + } + + if (count($fullranges['Subranges']) > 0 && count($fullranges['Addranges']) > 0) { + $buildpart .= ':'; + } + + if (count($fullranges['Addranges']) > 0) { + $buildpart .= 'a:' . implode(',', $fullranges['Addranges']); + } + + return $listname . ';' . $buildpart . "\n"; + } + + /** + * Called when GSB returns a SUB-DEL or ADD-DEL response + */ + private function deleteRange($range, $mode, $listname) { + $params = array(); + $buildtrunk = $listname . '-' . $mode; + if (strpos($range, '-') !== false) { + $params = explode('-', trim($range), 1); + $clause = "`chunk_num` >= ? AND `chunk_num` <= ?"; + } else { + $params[] = $range; + $clause = "`chunk_num` = ?"; + } + + // Delete from index + $this->query('DELETE FROM `' . $buildtrunk . '-index` WHERE ' . $clause, $params); + + // Select all host keys that match chunks (we'll delete them after but we + // need the hostkeys list!) + $stm = $this->query('SELECT `hostkey` FROM `' . $buildtrunk . '-hosts` WHERE ' . $clause . " AND hostkey != ''", $params); + $buildprefixdel = array(); + while ($row = $stm->fetch(\PDO::FETCH_ASSOC)) { + $buildprefixdel[] = $row['hostkey']; + } + + if (!empty($buildprefixdel)) { + $this->query('DELETE FROM `' . $buildtrunk . '-hosts` WHERE hostkey IN (' . substr(str_repeat('?, ', count($buildprefixdel)), 0, -2) . ')', $buildprefixdel); + + //Delete all matching hostkeys + $this->query('DELETE FROM `' . $buildtrunk . '-hosts` WHERE ' . $clause, $params); + } + } + + /** + * Main part of updater function, will call all other functions, merely + * requires the request body, it will then process and save all data as well as checking + * for ADD-DEL and SUB-DEL, runs silently so won't return anything on success + */ + private function getData($body) { + if (empty($body)) { + return $this->fatalerror("Missing a body for data request"); + } + + $this->trans_begin(); + $buildopts = array( + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => $body . "\n" + ); + + $result = $this->googleDownloader( + "http://safebrowsing.clients.google.com/safebrowsing/downloads?client=api&apikey=" . $this->apikey . "&appver=" . $this->version . "&pver=" . $this->apiversion, + $buildopts, "data"); + + if (preg_match('/n:(\d+)/', $result[1], $match)) { + $this->setTimeout($match[1]); + } else { + return $this->fatalerror("Missing timeout"); + } + + if (strpos($result[1], 'r:pleasereset') !== false) { + $this->resetDatabase(); + return true; + } + + if (!preg_match_all('/i:(.+?)\n(.+?)(?=i:|$)/s', $result[1], $blocks, PREG_PATTERN_ORDER)) { + $this->outputmsg('No data available in list'); + return true; + } + + foreach ($blocks[1] as $id => $listname) { + if (!preg_match_all('/\s*([^:]+):(.+)/', $blocks[2][$id], $elements, PREG_PATTERN_ORDER)) { + return $this->fatalerror('could not parse response'); + } + + foreach ($elements[1] as $id => $type) { + $value = trim($elements[2][$id]); + switch($type) { + case 'u': + $chunkdata = $this->googleDownloader('http://' . $value, false, "data"); + $processed = $this->processChunks($chunkdata[1], $listname); + $this->outputmsg("Saved a chunk file: " . $value); + break; + case 'sd': + case 'ad': + $delType = substr($type, 0, 1); + foreach (explode(',', $value) as $keyadd => $valueadd) { + $this->deleteRange($valueadd, $delType, $listname); + } + break; + } + } + } + + $this->trans_commit(); + return true; + } + + /** + * Shortcut to run updater + */ + public function runUpdate() { + $this->checkTimeout('data'); + $require = ""; + foreach ($this->usinglists as $value) { + $require .= $this->formattedRequest($value); + } + + $this->outputmsg("Using $require"); + $this->getData($require); + } + + //LOOKUP FUNCTIONS + /** + * Used to check the canonicalize function + */ + public function validateMethod() { + //Input => Expected + $cases = array( + "http://host/%25%32%35" => "http://host/%25", + "http://host/%25%32%35%25%32%35" => "http://host/%25%25", + "http://host/%2525252525252525" => "http://host/%25", + "http://host/asdf%25%32%35asd" => "http://host/asdf%25asd", + "http://host/%%%25%32%35asd%%" => "http://host/%25%25%25asd%25%25", + "http://www.google.com/" => "http://www.google.com/", + "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/" => "http://168.188.99.26/.secure/www.ebay.com/", + "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/" => "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/", + "http://host%23.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B" => 'http://host%23.com/~a!b@c%23d$e%25f^00&11*22(33)44_55+', + "http://3279880203/blah" => "http://195.127.0.11/blah", + "http://www.google.com/blah/.." => "http://www.google.com/", + "www.google.com/" => "http://www.google.com/", + "www.google.com" => "http://www.google.com/", + "http://www.evil.com/blah#frag" => "http://www.evil.com/blah", + "http://www.GOOgle.com/" => "http://www.google.com/", + "http://www.google.com.../" => "http://www.google.com/", + "http://www.google.com/foo\tbar\rbaz\n2" => "http://www.google.com/foobarbaz2", + "http://www.google.com/q?" => "http://www.google.com/q?", + "http://www.google.com/q?r?" => "http://www.google.com/q?r?", + "http://www.google.com/q?r?s" => "http://www.google.com/q?r?s", + "http://evil.com/foo#bar#baz" => "http://evil.com/foo", + "http://evil.com/foo;" => "http://evil.com/foo;", + "http://evil.com/foo?bar;" => "http://evil.com/foo?bar;", + "http://\x01\x80.com/" => "http://%01%80.com/", + "http://notrailingslash.com" => "http://notrailingslash.com/", + "http://www.gotaport.com:1234/" => "http://www.gotaport.com:1234/", + " http://www.google.com/ " => "http://www.google.com/", + "http:// leadingspace.com/" => "http://%20leadingspace.com/", + "http://%20leadingspace.com/" => "http://%20leadingspace.com/", + "%20leadingspace.com/" => "http://%20leadingspace.com/", + "https://www.securesite.com/" => "https://www.securesite.com/", + "http://host.com/ab%23cd" => "http://host.com/ab%23cd", + "http://host.com//twoslashes?more//slashes" => "http://host.com/twoslashes?more//slashes" + ); + + foreach ($cases as $key => $value) { + $canit = self::canonicalizeURL($key); + $canit = $canit['GSBURL']; + if ($canit == $value) { + outputmsg("PASSED: $key"); + } else { + outputmsg("INVALid:
ORIGINAL: $key
EXPECTED: $value
RECIEVED: $canit
"); + } + } + } + + /** + * Special thanks Steven Levithan (stevenlevithan.com) for the ridiculously complicated regex + * required to parse urls. This is used over parse_url as it robustly provides access to + * port, userinfo etc and handles mangled urls very well. + * + * Expertly integrated into phpGSB by Sam Cleaver ;) + * Thanks to mikegillis677 for finding the seg. fault issue in the old function. + * Passed validateMethod() check on 17/01/12 + */ + static function j_parseUrl($url) { + $strict = '/^(?:([^:\/?#]+):)?(?:\/\/\/?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?(((?:\/(\w:))?((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/'; + $loose = '/^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/\/?)?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?)(((?:\/(\w:))?(\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/'; + preg_match($loose, $url, $match); + if (empty($match)) { + //As odd as its sounds, we'll fall back to strict (as technically its + // more correct and so may salvage completely mangled urls) + unset($match); + preg_match($strict, $url, $match); + } + $parts = array( + "source" => '', + "scheme" => '', + "authority" => '', + "userinfo" => '', + "user" => '', + "password" => '', + "host" => '', + "port" => '', + "relative" => '', + "path" => '', + "drive" => '', + "directory" => '', + "file" => '', + "query" => '', + "fragment" => '' + ); + switch (count ($match)) { + case 15 : + $parts['fragment'] = $match[14]; + case 14 : + $parts['query'] = $match[13]; + case 13 : + $parts['file'] = $match[12]; + case 12 : + $parts['directory'] = $match[11]; + case 11 : + $parts['drive'] = $match[10]; + case 10 : + $parts['path'] = $match[9]; + case 9 : + $parts['relative'] = $match[8]; + case 8 : + $parts['port'] = $match[7]; + case 7 : + $parts['host'] = $match[6]; + case 6 : + $parts['password'] = $match[5]; + case 5 : + $parts['user'] = $match[4]; + case 4 : + $parts['userinfo'] = $match[3]; + case 3 : + $parts['authority'] = $match[2]; + case 2 : + $parts['scheme'] = $match[1]; + case 1 : + $parts['source'] = $match[0]; + } + return $parts; + } + + /** + * Regex to check if its a numerical IP address + */ + static function is_ip($ip) { + return preg_match("/^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])" . "(\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}$/", $ip); + } + + /** + * Checks if input is in hex format + */ + static function is_hex($x) { + // Relys on the fact that hex often includes letters meaning PHP will + // disregard the string + if (($x + 3) == 3) { + return dechex(hexdec($x)) == $x; + } + + return false; + } + + /** + * Checks if input is in octal format + */ + static function is_octal($x) { + //Relys on the fact that in IP addressing octals must begin with a 0 to + // denote octal + return substr($x, 0, 1) == 0; + } + + /** + * Converts hex or octal input into decimal + */ + static function hexoct2dec($value) { + //As this deals with parts in IP's we can be more exclusive + if (substr_count(substr($value, 0, 2), '0x') > 0 && self::is_hex($value)) { + return hexdec($value); + } elseif (self::is_octal($value)) { + return octdec($value); + } + + return false; + } + + /** + * Converts IP address part in HEX to decimal + */ + static function iphexdec($hex) { + //Removes any leading 0x (used to denote hex) and then and leading 0's) + $temp = str_replace('0x', '', $hex); + $temp = ltrim($temp, "0"); + return hexdec($temp); + } + + /** + * Converts full IP address in HEX to decimal + */ + static function hexIPtoIP($hex) { + // Remove hex identifier and leading 0's (not significant) + $tempip = str_replace('0x', '', $hex); + $tempip = ltrim($tempip, "0"); + + // It might be hex + if (self::is_hex($tempip)) { + // There may be a load of junk before the part we need + if (strlen($tempip) > 8) { + $tempip = substr($tempip, -8); + } + $hexplode = preg_split('//', $tempip, -1, PREG_SPLIT_NO_EMPTY); + while (count($hexplode) < 8) { + array_unshift($hexplode, 0); + } + + // Normalise + $newip = hexdec($hexplode[0] . $hexplode[1]) . '.' . + hexdec($hexplode[2] . $hexplode[3]) . '.' . + hexdec($hexplode[4] . $hexplode[5]) . '.' . + hexdec($hexplode[6] . $hexplode[7]); + //Now check if its an IP + if (self::is_ip($newip)) { + return $newip; + } + } + return false; + } + + /** + * Checks if an IP provided in either hex, octal or decimal is in fact + * an IP address. Normalises to a four part IP address. + */ + static function isValid_IP($ip) { + // First do a simple check, if it passes this no more needs to be done + if (self::is_ip($ip)) { + return $ip; + } + + // Its a toughy... eerm perhaps its all in hex? + $checkhex = self::hexIPtoIP($ip); + if ($checkhex) { + return $checkhex; + } + + // If we're still here it wasn't hex... maybe a DWORD format? + $checkdword = self::hexIPtoIP(dechex($ip)); + if ($checkdword) { + return $checkdword; + } + + //Nope... maybe in octal or a combination of standard, octal and hex?! + $ipcomponents = explode('.', $ip); + $ipcomponents[0] = self::hexoct2dec($ipcomponents[0]); + if (count($ipcomponents) == 2) { + //The writers of the RFC docs certainly didn't think about the + // clients! This could be a DWORD mixed with an IP part + if ($ipcomponents[0] <= 255 && is_int($ipcomponents[0]) && is_int($ipcomponents[1])) { + $threeparts = dechex($ipcomponents[1]); + $hexplode = preg_split('//', $threeparts, -1, PREG_SPLIT_NO_EMPTY); + if (count($hexplode) > 4) { + $newip = $ipcomponents[0] . '.' . + self::iphexdec($hexplode[0] . $hexplode[1]) . '.' . + self::iphexdec($hexplode[2] . $hexplode[3]) . '.' . + self::iphexdec($hexplode[4] . $hexplode[5]); + //Now check if its valid + if (self::is_ip($newip)) { + return $newip; + } + } + } + } + + $ipcomponents[1] = self::hexoct2dec($ipcomponents[1]); + if (count($ipcomponents) == 3) { + //Guess what... it could also be a DWORD mixed with two IP parts! + if (($ipcomponents[0] <= 255 && is_int($ipcomponents[0])) && ($ipcomponents[1] <= 255 && is_int($ipcomponents[1])) && is_int($ipcomponents[2])) { + $twoparts = dechex($ipcomponents[2]); + $hexplode = preg_split('//', $twoparts, -1, PREG_SPLIT_NO_EMPTY); + if (count($hexplode) > 3) { + $newip = $ipcomponents[0] . '.' . + $ipcomponents[1] . '.' . + self::iphexdec($hexplode[0] . $hexplode[1]) . '.' . + self::iphexdec($hexplode[2] . $hexplode[3]); + //Now check if its valid + if ($this->is_ip($newip)) + return $newip; + } + } + } + //If not it may be a combination of hex and octal + if (count($ipcomponents) >= 4) { + $tmpcomponents = array( + $ipcomponents[2], + $ipcomponents[3] + ); + + foreach ($tmpcomponents as $key => $value) { + if (!$tmpcomponents[$key] = self::hexoct2dec($value)) { + return false; + } + } + + array_unshift($tmpcomponents, $ipcomponents[0], $ipcomponents[1]); + //Convert back to IP form + $newip = implode('.', $tmpcomponents); + + //Now check if its valid + if (self::is_ip($newip)) { + return $newip; + } + } + + // Well its not an IP that we can recognise... theres only so much we can + // do! + return false; + } + + /** + * Had to write another layer as built in PHP urlencode() escapes all non + * alpha-numeric Google states to only urlencode if its below 32 or above + * or equal to 127 (some of those are non alpha-numeric and so urlencode + * on its own won't work). + */ + static function flexURLEncode($url, $ignorehash = false) { + // Had to write another layer as built in PHP urlencode() escapes all non + // alpha-numeric + // google states to only urlencode if its below 32 or above or equal to + // 127 (some of those + // are non alpha-numeric and so urlencode on its own won't work). + $urlchars = preg_split('//', $url, -1, PREG_SPLIT_NO_EMPTY); + if (count($urlchars) > 0) { + foreach ($urlchars as $key => $value) { + $ascii = ord($value); + if ($ascii <= 32 || $ascii >= 127 || ($value == '#' && !$ignorehash) || $value == '%') { + $urlchars[$key] = rawurlencode($value); + } + } + + return implode('', $urlchars); + } + return $url; + } + + /** + * Canonicalize a full URL according to Google's definition. + */ + static function canonicalizeURL($url) { + // Remove line feeds, return carriages, tabs, vertical tabs + $finalurl = trim(str_replace(array( + "\x09", + "\x0A", + "\x0D", + "\x0B" + ), '', $url)); + + // URL Encode for easy extraction + $finalurl = self::flexURLEncode($finalurl, true); + + // Now extract hostname & path + $parts = self::j_parseUrl($finalurl); + $hostname = $parts['host']; + $path = $parts['path']; + $query = $parts['query']; + $lasthost = ""; + $lastpath = ""; + $lastquery = ""; + + // Remove all hex coding (loops max of 50 times to stop craziness but + // should never reach that) + for ($i = 0; $i < 50; $i++) { + $hostname = rawurldecode($hostname); + $path = rawurldecode($path); + $query = rawurldecode($query); + if ($hostname == $lasthost && $path == $lastpath && $query == $lastquery) + break; + $lasthost = $hostname; + $lastpath = $path; + $lastquery = $query; + } + + // Deal with hostname first + // Replace all leading and trailing dots + $hostname = trim($hostname, '.'); + + // Replace all consecutive dots with one dot + $hostname = preg_replace("/\.{2,}/", ".", $hostname); + + // Make it lowercase + $hostname = strtolower($hostname); + + // See if its a valid IP + $hostnameip = self::isValid_IP($hostname); + if ($hostnameip) { + $usingip = true; + $usehost = $hostnameip; + } else { + $usingip = false; + $usehost = $hostname; + } + //The developer guide has lowercasing and validating IP other way round + // but its more efficient to + //have it this way + //Now we move onto canonicalizing the path + $pathparts = explode('/', $path); + foreach ($pathparts as $key => $value) { + if ($value == "..") { + if ($key != 0) { + unset($pathparts[$key - 1]); + unset($pathparts[$key]); + } else { + unset($pathparts[$key]); + } + } elseif ($value == "." || empty($value)) { + unset($pathparts[$key]); + } + } + + if (substr($path, -1, 1) == "/") { + $append = "/"; + } else { + $append = false; + } + + $path = "/" . implode("/", $pathparts); + + if ($append && substr($path, -1, 1) != "/") { + $path .= $append; + } + + $usehost = self::flexURLEncode($usehost); + $path = self::flexURLEncode($path); + $query = self::flexURLEncode($query); + + if (empty($parts['scheme'])) { + $parts['scheme'] = 'http'; + } + + $canurl = $parts['scheme'] . '://'; + $realurl = $canurl; + + if (!empty($parts['userinfo'])) { + $realurl .= $parts['userinfo'] . '@'; + } + + $canurl .= $usehost; + $realurl .= $usehost; + + if (!empty($parts['port'])) { + $canurl .= ':' . $parts['port']; + $realurl .= ':' . $parts['port']; + } + + $canurl .= $path; + $realurl .= $path; + if (substr_count($finalurl, "?") > 0) { + $canurl .= '?' . $parts['query']; + $realurl .= '?' . $parts['query']; + } + + if (!empty($parts['fragment'])) { + $realurl .= '#' . $parts['fragment']; + } + + return array( + "GSBURL" => $canurl, + "CleanURL" => $realurl, + "Parts" => array( + "Host" => $usehost, + "Path" => $path, + "Query" => $query, + "IP" => $usingip + ) + ); + } + + /** + * SHA-256 input (short method). + */ + static function sha256($data) { + return hash('sha256', $data); + } + + /** + * Make hostkeys for use in a lookup + */ + static function makeHostKey($host, $usingip) { + if ($usingip) { + $hosts = array($host . "/"); + } else { + $hostparts = explode(".", $host); + if (count($hostparts) > 2) { + $backhostparts = array_reverse($hostparts); + $threeparts = array_slice($backhostparts, 0, 3); + $twoparts = array_slice($threeparts, 0, 2); + $hosts = array( + implode('.', array_reverse($threeparts)) . "/", + implode('.', array_reverse($twoparts)) . "/" + ); + } else + $hosts = array($host . "/"); + } + + //Now make key & key prefix + $returnhosts = array(); + foreach ($hosts as $value) { + $fullhash = self::sha256($value); + $returnhosts[$fullhash] = array( + "Host" => $value, + "prefix" => substr($fullhash, 0, 8), + "Hash" => $fullhash + ); + } + + return $returnhosts; + } + + /** + * Hash up a list of values from makeprefixes() (will possibly be combined into that function at a later date + */ + static function makeHashes($prefixarray) { + if (count($prefixarray) > 0) { + $returnprefixes = array(); + foreach ($prefixarray as $value) { + $fullhash = self::sha256($value); + $returnprefixes[$fullhash] = array( + "Original" => $value, + "prefix" => substr($fullhash, 0, 8), + "Hash" => $fullhash + ); + } + return $returnprefixes; + } else + return false; + } + + /** + * Make URL prefixes for use after a hostkey check + */ + static function makeprefixes($host, $path, $query, $usingip) { + $prefixes = array(); + + // Exact hostname in the url + $hostcombos = array(); + $hostcombos[] = $host; + if (!$usingip) { + $hostparts = explode('.', $host); + $backhostparts = array_reverse($hostparts); + if (count($backhostparts) > 5) + $maxslice = 5; + else + $maxslice = count($backhostparts); + $topslice = array_slice($backhostparts, 0, $maxslice); + while ($maxslice > 1) { + $hostcombos[] = implode('.', array_reverse($topslice)); + $maxslice--; + $topslice = array_slice($backhostparts, 0, $maxslice); + } + } else + $hostcombos[] = $host; + $hostcombos = array_unique($hostcombos); + $variations = array(); + if (!empty($path)) { + $pathparts = explode("/", $path); + if (count($pathparts) > 4) + $upperlimit = 4; + else + $upperlimit = count($pathparts); + } + foreach ($hostcombos as $key => $value) { + if (!empty($query)) + $variations[] = $value . $path . '?' . $query; + $variations[] = $value . $path; + if (!empty($path)) { + $i = 0; + $pathiparts = ""; + while ($i < $upperlimit) { + if ($i != count($pathparts) - 1) + $pathiparts = $pathiparts . $pathparts[$i] . "/"; + else + $pathiparts = $pathiparts . $pathparts[$i]; + $variations[] = $value . $pathiparts; + $i++; + } + } + } + $variations = array_unique($variations); + return self::makeHashes($variations); + } + + /** + * Process data provided from the response of a full-hash GSB + * request + */ + private function processFullLookup($data) { + $clonedata = $data; + $extracthash = array(); + while (strlen($clonedata) > 0) { + $splithead = explode("\n", $clonedata, 2); + $chunkinfo = explode(':', $splithead[0]); + $listname = $chunkinfo[0]; + $addchunk = $chunkinfo[1]; + $chunklen = $chunkinfo[2]; + $chunkdata = bin2hex(substr($splithead[1], 0, $chunklen)); + while (strlen($chunkdata) > 0) { + $extracthash[$listname][$addchunk] = substr($chunkdata, 0, 64); + $chunkdata = substr($chunkdata, 64); + } + $clonedata = substr($splithead[1], $chunklen); + } + return $extracthash; + } + + /** + * Add a full-hash key to a prefix or hostkey (the variable is $prefix + * but it could be either). + */ + private function addfullhash($prefix, $chunknum, $fullhash, $listname) { + $buildtrunk = $listname . "-a"; + + //First check hosts + $stm = $this->query("SELECT * FROM `" . $buildtrunk ."-hosts` WHERE `hostkey` = ? AND `chunk_num` = ? AND fulllhash = '' LIMIT 1", array($prefix, $chunknum)); + if ($stm->rowCount() > 0) { + $row = $stm->fetch(\PDO::FETCH_ASSOC); + //We've got a live one! Insert the full hash for it + $this->query("UPDATE `" . $buildtrunk . "-hosts` SET `fullhash` = ? WHERE `id` = ?", array($fullhash, $row['id'])); + } else { + $this->query(" + UPDATE + `" . $buildtrunk ."-prefixes` p + JOIN `" . $buildtrunk . "-hosts` h ON (h.hostkey = p.hostkey) + SET + fullhash = ? + WHERE + p.`prefix` = ? AND + p.fullhash = '' AND + h.chunk_num = ? AND + h.count > 0 + ", array($fullhash, $prefix, $chunknum)); + } + } + + /** + * Check database for any cached full-length hashes for a given prefix. + */ + private function cacheCheck($prefix) { + foreach ($this->usinglists as $value) { + $buildtrunk = $value . "-a"; + $stm = $this->query("SELECT * FROM `" . $buildtrunk . "-hosts` WHERE `hostkey` = ? AND `fullhash` != ''", array($prefix)); + if ($stm->rowCount() > 0) { + $row = $stm->fetch(\PDO::FETCH_ASSOC); + return array( + $row['fullhash'], + $row['chunk_num'] + ); + } + + $stm = $this->query("SELECT p.fullhash, h.chunk_num + FROM + `" . $buildtrunk . "-prefixes` p + JOIN `" . $buildtrunk . "-hosts` h ON (p.hostkey = h.hostkey) + WHERE p.`prefix` = ? AND p.`fullhash` != '' AND h.count > 0", array($prefix)); + if ($stm->rowCount() > 0) { + $row = $stm->fetch(\PDO::FETCH_ASSOC); + return array( + $row['fullhash'], + $row['chunk_num'] + ); + } + } + + return false; + } + + /** + * Do a full-hash lookup based on prefixes provided, returns (bool) true on a match and (bool) false on no match. + */ + private function doFullLookup($prefixes, $originals) { + //Store copy of original prefixes + $cloneprefixes = $prefixes; + //They should really all have the same prefix size.. we'll just check one + $prefixsize = strlen($prefixes[0][0]) / 2; + $length = count($prefixes) * $prefixsize; + foreach ($prefixes as $key => $value) { + //Check cache on each iteration (we can return true earlier if we get + // a match!) + $cachechk = $this->cacheCheck($value[0]); + if ($cachechk) { + if (isset($originals[$cachechk[0]])) { + //Check from same chunk + foreach ($cloneprefixes as $nnewvalue) { + if ($nnewvalue[1] == $cachechk[1] && $value[0] == $originals[$cachechk[0]]['prefix']) { + //From same chunks + return true; + } + + } + } + } + $prefixes[$key] = pack("H*", $value[0]); + } + //No cache matches so we continue with request + $body = "$prefixsize:$length\n" . implode("", $prefixes); + + $buildopts = array( + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => $body + ); + $result = $this->googleDownloader("http://safebrowsing.clients.google.com/safebrowsing/gethash?client=api&apikey=" . $this->apikey . "&appver=" . $this->version . "&pver=" . $this->apiversion, $buildopts, "lookup"); + + if ($result[0]['http_code'] == 200 && !empty($result[1])) { + //Extract hashes from response + $extractedhashes = $this->processFullLookup($result[1]); + //Loop over each list + foreach ($extractedhashes as $key => $value) { + //Loop over each value in each list + foreach ($value as $newkey => $newvalue) { + if (isset($originals[$newvalue])) { + //Okay it matches a full-hash we have, now to check + // they're from the same chunks + foreach ($cloneprefixes as $nnewvalue) { + if ($nnewvalue[1] == $newkey && $nnewvalue[0] == $originals[$newvalue]['prefix']) { + //From same chunks + //Add full hash to database (cache) + $this->addfullhash($nnewvalue[0], $nnewvalue[1], $newvalue, $key); + return true; + } + + } + } + } + } + return false; + } elseif ($result[0]['http_code'] == 204 && strlen($result[1]) == 0) { + //204 Means no match + return false; + } else { + //"No No No! This just doesn't add up at all!" + $this->fatalerror("ERROR: Invalid response returned from GSB ({$result[0]['http_code']})"); + } + } + + /** + * Checks to see if a match for a prefix is found in the sub table, if it is + * then we won't do a full-hash lookup. + * Return true on match in sub list, return false on negative. + */ + private function subCheck($listname, $prefixlist, $mode) { + $buildtrunk = $listname . '-s'; + foreach ($prefixlist as $value) { + $stm = $this->query("SELECT * FROM `". $buildtrunk . "-prefixes` WHERE " . ($mode == 'prefix' ? '`prefix`' : 'hostkey') . ' = ?', array($value[0])); + //As interpreted from Developer Guide if theres a match in + // sub list it cancels out the add listing + //we'll double check its from the same chunk just to be + // pedantic + while ($row = $stm->fetch(\PDO::FETCH_ASSOC)) { + if (hexdec($row['add_chunk_num']) == $value[1]) { + return true; + } + } + } + return false; + } + + /** + * query wrapper + */ + private function query($sql, $data = array()) { + $stm = $this->db->prepare($sql); + $stm->execute($data); + return $stm; + } + + /** + * create tables + */ + public function install() { + foreach ($this->usinglists as $listname) { + $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-a-hosts` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `hostkey` varchar(8) NOT NULL, + `chunk_num` int(11) unsigned NOT NULL, + `count` varchar(2) NOT NULL DEFAULT '0', + `fullhash` char(64) NOT NULL, + PRIMARY KEY (`id`), + UNIQUE KEY `hostkey_2` (`hostkey`,`chunk_num`,`count`,`fullhash`), + KEY `hostkey` (`hostkey`) + ) ENGINE=InnoDB DEFAULT CHARSET=latin1;"); + + $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-a-index` ( + `chunk_num` int(11) unsigned NOT NULL AUTO_INCREMENT, + `chunk_len` int(11) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`chunk_num`) + ) ENGINE=InnoDB DEFAULT CHARSET=latin1;"); + + $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-a-prefixes` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `hostkey` varchar(8) NOT NULL, + `prefix` varchar(8) NOT NULL, + `fullhash` char(64) NOT NULL, + PRIMARY KEY (`id`), + UNIQUE KEY `hostkey_2` (`hostkey`,`prefix`), + KEY `hostkey` (`hostkey`) + ) ENGINE=InnoDB DEFAULT CHARSET=latin1;"); + + $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-s-hosts` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `hostkey` varchar(8) NOT NULL, + `chunk_num` int(11) unsigned NOT NULL, + `count` varchar(2) NOT NULL DEFAULT '0', + `fullhash` char(64) NOT NULL, + PRIMARY KEY (`id`), + UNIQUE KEY `hostkey_2` (`hostkey`,`chunk_num`,`count`,`fullhash`), + KEY `hostkey` (`hostkey`) + ) ENGINE=InnoDB DEFAULT CHARSET=latin1;"); + + $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-s-index` ( + `chunk_num` int(11) unsigned NOT NULL AUTO_INCREMENT, + `chunk_len` int(11) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`chunk_num`) + ) ENGINE=InnoDB DEFAULT CHARSET=latin1;"); + + $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-s-prefixes` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `hostkey` varchar(8) NOT NULL, + `add_chunk_num` varchar(8) NOT NULL, + `prefix` varchar(8) NOT NULL, + `fullhash` char(64) NOT NULL, + PRIMARY KEY (`id`), + UNIQUE KEY `hostkey_2` (`hostkey`,`add_chunk_num`,`prefix`), + KEY `hostkey` (`hostkey`) + ) ENGINE=InnoDB DEFAULT CHARSET=latin1;"); + } + } + + /** + * Does a full URL lookup on given lists, will check if its in database, if + * slight match there then will do a full-hash lookup on GSB, + * returns (bool) true on match and (bool) false on negative. + */ + public function doLookup($url) { + $lists = $this->usinglists; + //First canonicalize the URL + $canurl = self::canonicalizeURL($url); + + //Make hostkeys + $hostkeys = self::makeHostKey($canurl['Parts']['Host'], $canurl['Parts']['IP']); + + $prefixes = self::makeprefixes($canurl['Parts']['Host'], $canurl['Parts']['Path'], $canurl['Parts']['Query'], $canurl['Parts']['IP']); + + $prefixParams = array(); + $buildprequery = array(); + foreach ($prefixes as $prefix) { + $buildprequery[] = " `prefix` = ?"; + $prefixParams[] = $prefix['prefix']; + } + $buildprequery = implode("OR", $buildprequery); + + $matches = array(); + foreach ($lists as $key => $value) { + $buildtrunk = $value . '-a'; + $hostsStm = $this->db->prepare('SELECT * FROM `' . $buildtrunk . '-hosts` WHERE hostkey = ?'); + + //Loop over each list + foreach ($hostkeys as $keyinner => $valueinner) { + + // Within each list loop over each hostkey + $hostsStm->execute(array($valueinner['prefix'])); + + // For each hostkey match + while ($row = $hostsStm->fetch(\PDO::FETCH_ASSOC)) { + if ($row['count'] > 0) { + + // There was a match and the count is more than one so + // there are prefixes! + // Hash up a load of prefixes and create the build + // query if we haven't done so already + $params = $prefixParams; + $params[] = $row['hostkey']; + + // Check if there are any matching prefixes + $stm = $this->query("SELECT * FROM `" . $buildtrunk . "-prefixes` WHERE " . $buildprequery . " `hostkey` = ?", $params); + if ($stm->rowCount() > 0) { + // We found prefix matches + $prematches = array(); + $prelookup = array(); + while ($rowPrefix = $hostsStm->fetch(\PDO::FETCH_ASSOC)) { + $prematches[] = array( + $rowPrefix['prefix'], + $row['chunk_num'] + ); + } + + // Before we send off any requests first check + // whether its in sub table + if (!$this->subCheck($value, $prematches, "prefix") && + $this->doFullLookup($prematches, $prefixes)) { + return true; + } + } + + // If we didn't find matches then do nothing (keep + // looping till end and it'll return negative) + } elseif (!$this->subCheck($value, array(array($row['hostkey'], $row['chunk_num'])), "hostkey") && + $this->doFullLookup(array(array($row['hostkey'], $row['chunk_num'])), $hostkeys)) { + return true; + } + } + } + } + return false; + } +} From 05098d9b6a807962867614ab84c1d6f00c138a6d Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Sun, 13 Jul 2014 18:13:56 +0200 Subject: [PATCH 03/16] updated to use config and replaced comments. --- install.php | 93 +++++++++++++------------------------------------ listupdater.php | 35 ++++++++++--------- lookup.php | 41 +++++++++++----------- 3 files changed, 63 insertions(+), 106 deletions(-) diff --git a/install.php b/install.php index c7156c2..9be532a 100644 --- a/install.php +++ b/install.php @@ -1,69 +1,24 @@ -usinglists = array('googpub-phish-shavar','goog-malware-shavar'); -//Install MySQL tables -foreach($phpgsb->usinglists as $value) - { - //Create ADD tables - mysql_query("CREATE TABLE IF NOT EXISTS `$value-a-hosts` ( - `ID` int(255) NOT NULL auto_increment, - `Hostkey` varchar(8) NOT NULL, - `Chunknum` int(255) NOT NULL, - `Count` varchar(2) NOT NULL default '0', - `FullHash` varchar(70) NOT NULL, - PRIMARY KEY (`ID`), - KEY `Hostkey` (`Hostkey`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); - mysql_query("CREATE TABLE IF NOT EXISTS `$value-a-index` ( - `ChunkNum` int(255) NOT NULL auto_increment, - `Chunklen` int(255) NOT NULL default '0', - PRIMARY KEY (`ChunkNum`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); - mysql_query("CREATE TABLE IF NOT EXISTS `$value-a-prefixes` ( - `ID` int(255) NOT NULL auto_increment, - `Hostkey` varchar(8) NOT NULL, - `Prefix` varchar(255) NOT NULL, - `FullHash` varchar(70) NOT NULL, - PRIMARY KEY (`ID`), - KEY `Hostkey` (`Hostkey`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); - //Create SUB tables - mysql_query("CREATE TABLE IF NOT EXISTS `$value-s-hosts` ( - `ID` int(255) NOT NULL auto_increment, - `Hostkey` varchar(8) NOT NULL, - `Chunknum` int(255) NOT NULL, - `Count` varchar(2) NOT NULL default '0', - `FullHash` varchar(70) NOT NULL, - PRIMARY KEY (`ID`), - KEY `Hostkey` (`Hostkey`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); - mysql_query("CREATE TABLE IF NOT EXISTS `$value-s-index` ( - `ChunkNum` int(255) NOT NULL auto_increment, - `Chunklen` int(255) NOT NULL default '0', - PRIMARY KEY (`ChunkNum`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); - mysql_query("CREATE TABLE IF NOT EXISTS `$value-s-prefixes` ( - `ID` int(255) NOT NULL auto_increment, - `Hostkey` varchar(8) NOT NULL, - `AddChunkNum` varchar(8) NOT NULL, - `Prefix` varchar(255) NOT NULL, - `FullHash` varchar(70) NOT NULL, - PRIMARY KEY (`ID`), - KEY `Hostkey` (`Hostkey`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 ;"); - } -//Check timeout files writable -if(file_put_contents("testfile.dat","TEST PRE-USE PHPGSB ".time())) - unlink("testfile.dat"); -else - echo "ERROR: THIS DIRECTORY IS NOT WRITABLE, CHMOD to 775 or 777"; -?> \ No newline at end of file +usinglists = array( + 'googpub-phish-shavar', + 'goog-malware-shavar' +); + +$phpgsb->install(); + +//Check timeout files writable +if (file_put_contents("testfile.dat", "TEST PRE-USE PHPGSB " . time())) { + unlink("testfile.dat"); +} else { + echo "DIRECTORY IS NOT WRITABLE, CHMOD to 775 or 777"; +} diff --git a/listupdater.php b/listupdater.php index a6d9425..69d5016 100644 --- a/listupdater.php +++ b/listupdater.php @@ -1,17 +1,18 @@ -apikey = "API_KEY_HERE"; -$phpgsb->usinglists = array('googpub-phish-shavar','goog-malware-shavar'); -$phpgsb->runUpdate(); -$phpgsb->close(); -?> \ No newline at end of file +apikey = $config['api_key']; +$phpgsb->usinglists = array('googpub-phish-shavar','goog-malware-shavar'); +$phpgsb->runUpdate(); +$phpgsb->close(); diff --git a/lookup.php b/lookup.php index 8313127..805d815 100644 --- a/lookup.php +++ b/lookup.php @@ -1,20 +1,21 @@ -apikey = "API_KEY_HERE"; -$phpgsb->usinglists = array('googpub-phish-shavar','goog-malware-shavar'); -//Should return false (not phishing or malware) -var_dump($phpgsb->doLookup('http://www.google.com')); -//Should return true, malicious URL -var_dump($phpgsb->doLookup('http://www.gumblar.cn')); -$phpgsb->close(); -?> \ No newline at end of file +apikey = $config['api_key']; + +$phpgsb->usinglists = array('googpub-phish-shavar','goog-malware-shavar'); +// Should return false (not phishing or malware) +var_dump($phpgsb->doLookup('http://www.google.com')); +// Should return true, malicious URL +var_dump($phpgsb->doLookup('http://www.gumblar.cn')); From 0a6f2ff7381c6eb8e606b51ea87673b959e2348c Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Sun, 13 Jul 2014 18:23:20 +0200 Subject: [PATCH 04/16] added verbose + fixed some comments. --- phpgsb.class.php | 92 ++++++++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index 75c587b..1f74933 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -29,10 +29,11 @@ class phpGSB { private $db; - public function __construct($database = false, $username = false, $password = false, $host = "localhost") { + public function __construct($database = false, $username = false, $password = false, $host = "localhost", $verbose = false) { if ($database && $username) { $this->dbConnect($database, $username, $password, $host); } + $this->verbose = $verbose; } public function __destruct() { @@ -79,28 +80,33 @@ private function trans_rollback() { } } - /*Function to output messages, used instead of echo, - will make it easier to have a verbose switch in later - releases*/ + /** + * Function to output messages, used instead of echo, + * will make it easier to have a verbose switch in later releases + */ private function outputmsg($msg) { if ($this->verbose) { echo $msg . "\n"; } } - /*Function to output errors, used instead of echo, - will make it easier to have a verbose switch in later - releases*/ + /** + * Function to output errors, used instead of echo, + * will make it easier to have a verbose switch in later releases + */ private function fatalerror($msg) { if ($this->verbose) { print_r($msg); echo "\n"; } + $this->trans_rollback(); - die(); + throw Exception($msg); } - /*Wrapper to connect to database. Simples.*/ + /** + * Wrapper to connect to database. + */ private function dbConnect($database, $username, $password, $host = "localhost") { $this->db = new PDO('mysql:host=' . $host . ';dbname=' . $database, $username, @@ -808,11 +814,11 @@ static function isValid_IP($ip) { return $checkdword; } - //Nope... maybe in octal or a combination of standard, octal and hex?! + // Nope... maybe in octal or a combination of standard, octal and hex?! $ipcomponents = explode('.', $ip); $ipcomponents[0] = self::hexoct2dec($ipcomponents[0]); if (count($ipcomponents) == 2) { - //The writers of the RFC docs certainly didn't think about the + // The writers of the RFC docs certainly didn't think about the // clients! This could be a DWORD mixed with an IP part if ($ipcomponents[0] <= 255 && is_int($ipcomponents[0]) && is_int($ipcomponents[1])) { $threeparts = dechex($ipcomponents[1]); @@ -822,7 +828,7 @@ static function isValid_IP($ip) { self::iphexdec($hexplode[0] . $hexplode[1]) . '.' . self::iphexdec($hexplode[2] . $hexplode[3]) . '.' . self::iphexdec($hexplode[4] . $hexplode[5]); - //Now check if its valid + // Now check if its valid if (self::is_ip($newip)) { return $newip; } @@ -841,13 +847,13 @@ static function isValid_IP($ip) { $ipcomponents[1] . '.' . self::iphexdec($hexplode[0] . $hexplode[1]) . '.' . self::iphexdec($hexplode[2] . $hexplode[3]); - //Now check if its valid + // Now check if its valid if ($this->is_ip($newip)) return $newip; } } } - //If not it may be a combination of hex and octal + // If not it may be a combination of hex and octal if (count($ipcomponents) >= 4) { $tmpcomponents = array( $ipcomponents[2], @@ -861,10 +867,10 @@ static function isValid_IP($ip) { } array_unshift($tmpcomponents, $ipcomponents[0], $ipcomponents[1]); - //Convert back to IP form + // Convert back to IP form $newip = implode('.', $tmpcomponents); - //Now check if its valid + // Now check if its valid if (self::is_ip($newip)) { return $newip; } @@ -957,10 +963,10 @@ static function canonicalizeURL($url) { $usingip = false; $usehost = $hostname; } - //The developer guide has lowercasing and validating IP other way round + // The developer guide has lowercasing and validating IP other way round // but its more efficient to - //have it this way - //Now we move onto canonicalizing the path + // have it this way + // Now we move onto canonicalizing the path $pathparts = explode('/', $path); foreach ($pathparts as $key => $value) { if ($value == "..") { @@ -1060,7 +1066,7 @@ static function makeHostKey($host, $usingip) { $hosts = array($host . "/"); } - //Now make key & key prefix + // Now make key & key prefix $returnhosts = array(); foreach ($hosts as $value) { $fullhash = self::sha256($value); @@ -1105,44 +1111,54 @@ static function makeprefixes($host, $path, $query, $usingip) { if (!$usingip) { $hostparts = explode('.', $host); $backhostparts = array_reverse($hostparts); - if (count($backhostparts) > 5) + if (count($backhostparts) > 5) { $maxslice = 5; - else + } else { $maxslice = count($backhostparts); + } + $topslice = array_slice($backhostparts, 0, $maxslice); while ($maxslice > 1) { $hostcombos[] = implode('.', array_reverse($topslice)); $maxslice--; $topslice = array_slice($backhostparts, 0, $maxslice); } - } else + } else { $hostcombos[] = $host; + } + $hostcombos = array_unique($hostcombos); $variations = array(); if (!empty($path)) { $pathparts = explode("/", $path); - if (count($pathparts) > 4) + if (count($pathparts) > 4) { $upperlimit = 4; - else + } else { $upperlimit = count($pathparts); + } } + foreach ($hostcombos as $key => $value) { - if (!empty($query)) + if (!empty($query)) { $variations[] = $value . $path . '?' . $query; + } + $variations[] = $value . $path; if (!empty($path)) { $i = 0; $pathiparts = ""; while ($i < $upperlimit) { - if ($i != count($pathparts) - 1) + if ($i != count($pathparts) - 1) { $pathiparts = $pathiparts . $pathparts[$i] . "/"; - else + } else { $pathiparts = $pathiparts . $pathparts[$i]; + } $variations[] = $value . $pathiparts; $i++; } } } + $variations = array_unique($variations); return self::makeHashes($variations); } @@ -1177,11 +1193,11 @@ private function processFullLookup($data) { private function addfullhash($prefix, $chunknum, $fullhash, $listname) { $buildtrunk = $listname . "-a"; - //First check hosts + // First check hosts $stm = $this->query("SELECT * FROM `" . $buildtrunk ."-hosts` WHERE `hostkey` = ? AND `chunk_num` = ? AND fulllhash = '' LIMIT 1", array($prefix, $chunknum)); if ($stm->rowCount() > 0) { $row = $stm->fetch(\PDO::FETCH_ASSOC); - //We've got a live one! Insert the full hash for it + // We've got a live one! Insert the full hash for it $this->query("UPDATE `" . $buildtrunk . "-hosts` SET `fullhash` = ? WHERE `id` = ?", array($fullhash, $row['id'])); } else { $this->query(" @@ -1235,13 +1251,13 @@ private function cacheCheck($prefix) { * Do a full-hash lookup based on prefixes provided, returns (bool) true on a match and (bool) false on no match. */ private function doFullLookup($prefixes, $originals) { - //Store copy of original prefixes + // Store copy of original prefixes $cloneprefixes = $prefixes; - //They should really all have the same prefix size.. we'll just check one + // They should really all have the same prefix size.. we'll just check one $prefixsize = strlen($prefixes[0][0]) / 2; $length = count($prefixes) * $prefixsize; foreach ($prefixes as $key => $value) { - //Check cache on each iteration (we can return true earlier if we get + // Check cache on each iteration (we can return true earlier if we get // a match!) $cachechk = $this->cacheCheck($value[0]); if ($cachechk) { @@ -1258,7 +1274,7 @@ private function doFullLookup($prefixes, $originals) { } $prefixes[$key] = pack("H*", $value[0]); } - //No cache matches so we continue with request + // No cache matches so we continue with request $body = "$prefixsize:$length\n" . implode("", $prefixes); $buildopts = array( @@ -1291,10 +1307,10 @@ private function doFullLookup($prefixes, $originals) { } return false; } elseif ($result[0]['http_code'] == 204 && strlen($result[1]) == 0) { - //204 Means no match + // 204 Means no match return false; } else { - //"No No No! This just doesn't add up at all!" + // "No No No! This just doesn't add up at all!" $this->fatalerror("ERROR: Invalid response returned from GSB ({$result[0]['http_code']})"); } } @@ -1308,9 +1324,9 @@ private function subCheck($listname, $prefixlist, $mode) { $buildtrunk = $listname . '-s'; foreach ($prefixlist as $value) { $stm = $this->query("SELECT * FROM `". $buildtrunk . "-prefixes` WHERE " . ($mode == 'prefix' ? '`prefix`' : 'hostkey') . ' = ?', array($value[0])); - //As interpreted from Developer Guide if theres a match in + // As interpreted from Developer Guide if theres a match in // sub list it cancels out the add listing - //we'll double check its from the same chunk just to be + // we'll double check its from the same chunk just to be // pedantic while ($row = $stm->fetch(\PDO::FETCH_ASSOC)) { if (hexdec($row['add_chunk_num']) == $value[1]) { From 02d0e18bd1f55c536ed5ea16d2d257dcc4838c98 Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Sun, 13 Jul 2014 18:26:27 +0200 Subject: [PATCH 05/16] added some comment + removed debug output. --- phpgsb.class.php | 52 +++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index 1f74933..588113a 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -1,11 +1,12 @@ 5) { - //According to Developer Guide (Above 5 errors check every 4 hours) + // According to Developer Guide (Above 5 errors check every 4 hours) return 28800; } else { - //According to Developer Guide we simply double up our timeout each + // According to Developer Guide we simply double up our timeout each // time and use formula: - //(Adapted to be relative to errors) ( ((2^$errors) * 7.5) * + // (Adapted to be relative to errors) ( ((2^$errors) * 7.5) * // (decimalrand(0,1) + 1)) to produce // a result between: 120min-240min for example return floor((pow(2, $errors) * 7.5) * ((rand(0, 1000) / 1000) + 1)); @@ -142,10 +143,8 @@ private function calc($errors) { * for next check */ private function Backoff($errdata = false, $type) { - if ($type == "data") - $file = 'nextcheck.dat'; - else - $file = 'nextcheckl.dat'; + $file = ($type == 'data' ? 'nextcheck.dat' : 'nextcheckl.dat'); + $curstatus = explode('||', file_get_contents($this->pingfilepath . $file)); $curstatus[1] = $curstatus[1] + 1; $seconds = $this->calc($curstatus[1]); @@ -167,6 +166,7 @@ private function setTimeout($seconds) { } else { $until = time() + $seconds . '||'; } + file_put_contents($this->pingfilepath . 'nextcheck.dat', $until); } @@ -175,15 +175,14 @@ private function setTimeout($seconds) { * start of script) */ private function checkTimeout($type) { - if ($type == "data") - $file = 'nextcheck.dat'; - else - $file = 'nextcheckl.dat'; + $file = ($type == 'data' ? 'nextcheck.dat' : 'nextcheckl.dat'); + $curstatus = explode('||', file_get_contents($this->pingfilepath . $file)); if (time() < $curstatus[0]) { $this->fatalerror("Must wait another " . ($curstatus[0] - time()) . " seconds before another request"); - } else - $this->outputmsg("Allowed to request"); + } + + $this->outputmsg("Allowed to request"); } /** @@ -197,8 +196,9 @@ private function googleDownloader($url, $options, $followbackoff = false) { curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - if (is_array($options)) + if (is_array($options)) { curl_setopt_array($ch, $options); + } $data = curl_exec($ch); $info = curl_getinfo($ch); @@ -207,6 +207,7 @@ private function googleDownloader($url, $options, $followbackoff = false) { if ($followbackoff && $info['http_code'] > 299) { $this->Backoff($info, $followbackoff); } + return array( $info, $data @@ -219,9 +220,11 @@ private function googleDownloader($url, $options, $followbackoff = false) { * Resets lists database, only called if GSB issues r:resetdatabase */ private function resetDatabase() { - //Lord knows why they would EVER issue this request! - if (!empty($this->adminemail)) + // Lord knows why they would EVER issue this request! + if (!empty($this->adminemail)) { mail($this->adminemail, 'Reset Database Request Issued', 'For some crazy unknown reason GSB requested a database reset at ' . time()); + } + foreach ($this->usinglists as $value) { $this->query("TRUNCATE TABLE `$value-s-index`"); $this->query("TRUNCATE TABLE `$value-s-hosts`"); @@ -237,7 +240,6 @@ private function resetDatabase() { */ private function processChunks($data, $listname) { $len = strlen($data); - var_dump($len); $offset = $z = 0; while ($offset < $len) { $x = strpos($data, ':', $offset); From cebef1250bd94740e19f6d6e53fa346bcc129c15 Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Sun, 13 Jul 2014 23:20:12 +0200 Subject: [PATCH 06/16] added missing config in installer. --- install.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/install.php b/install.php index 9be532a..7060872 100644 --- a/install.php +++ b/install.php @@ -7,8 +7,9 @@ * INITIAL INSTALLER - RUN ONCE (or more than once if you're adding a new list!) */ -require ("phpgsb.class.php"); -$phpgsb = new phpGSB($config['db'], $config['user'], $config['pass'], $config['host']); +include('config.php'); +require("phpgsb.class.php"); +$phpgsb = new phpGSB($config['db'], $config['user'], $config['pass'], $config['host'], true); $phpgsb->usinglists = array( 'googpub-phish-shavar', 'goog-malware-shavar' From 02b69b651c952d4672167402da03d3208a5e80db Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Sun, 13 Jul 2014 23:20:30 +0200 Subject: [PATCH 07/16] removed close + added verbose. --- listupdater.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/listupdater.php b/listupdater.php index 69d5016..41c216c 100644 --- a/listupdater.php +++ b/listupdater.php @@ -9,10 +9,9 @@ */ include("config.php"); require("phpgsb.class.php"); -$phpgsb = new phpGSB($config['db'], $config['user'], $config['pass'], $config['host']); +$phpgsb = new phpGSB($config['db'], $config['user'], $config['pass'], $config['host'], true); // Obtain an API key from: http://code.google.com/apis/safebrowsing/key_signup.html $phpgsb->apikey = $config['api_key']; $phpgsb->usinglists = array('googpub-phish-shavar','goog-malware-shavar'); $phpgsb->runUpdate(); -$phpgsb->close(); From 8198b77af6e6f49e9100563af17abcfacec47b75 Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Sun, 13 Jul 2014 23:21:14 +0200 Subject: [PATCH 08/16] added debugging. fixed some method declaration. optimized some queries. --- phpgsb.class.php | 184 +++++++++++++++++++++++++++-------------------- 1 file changed, 105 insertions(+), 79 deletions(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index 588113a..1b14e70 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -27,6 +27,9 @@ class phpGSB { private $transtarted = false; private $transenabled = true; private $pingfilepath = ""; + + private $debug = false; + public $debugLog = array(); private $db; @@ -42,13 +45,25 @@ public function __destruct() { } private function close() { - $this->outputmsg("Closing phpGSB. (Peak Memory: " . (round(memory_get_peak_usage() / 1048576, 3)) . "MB)"); + $this->log("Closing phpGSB. (Peak Memory: " . (round(memory_get_peak_usage() / 1048576, 3)) . "MB)"); } public function silent() { $this->verbose = false; } + public function enableDebug() { + $this->debug = true; + } + + public function resetDebugLog() { + $this->debugLog = array(); + } + + public function setApiKey($apikey) { + $this->apikey = $apikey; + } + public function trans_disable() { $this->transenabled = false; } @@ -60,7 +75,7 @@ public function trans_enable() { private function trans_begin() { if ($this->transenabled) { $this->transtarted = true; - $this->outputmsg("Begin MySQL Transaction"); + $this->log("Begin MySQL Transaction"); $this->db->query('START TRANSACTION;'); } } @@ -68,7 +83,7 @@ private function trans_begin() { private function trans_commit() { if ($this->transtarted && $this->transenabled) { $this->transtarted = false; - $this->outputmsg("Comitting Transaction"); + $this->log("Comitting Transaction"); $this->db->query('COMMIT;'); } } @@ -76,7 +91,7 @@ private function trans_commit() { private function trans_rollback() { if ($this->transtarted && $this->transenabled) { $this->transtarted = false; - $this->outputmsg("Rolling Back Transaction"); + $this->log("Rolling Back Transaction"); $this->db->query('ROLLBACK;'); } } @@ -85,7 +100,7 @@ private function trans_rollback() { * Function to output messages, used instead of echo, * will make it easier to have a verbose switch in later releases */ - private function outputmsg($msg) { + private function log($msg) { if ($this->verbose) { echo $msg . "\n"; } @@ -102,7 +117,7 @@ private function fatalerror($msg) { } $this->trans_rollback(); - throw Exception($msg); + throw new Exception($msg); } /** @@ -177,12 +192,12 @@ private function setTimeout($seconds) { private function checkTimeout($type) { $file = ($type == 'data' ? 'nextcheck.dat' : 'nextcheckl.dat'); - $curstatus = explode('||', file_get_contents($this->pingfilepath . $file)); + $curstatus = explode('||', @file_get_contents($this->pingfilepath . $file)); if (time() < $curstatus[0]) { $this->fatalerror("Must wait another " . ($curstatus[0] - time()) . " seconds before another request"); } - $this->outputmsg("Allowed to request"); + $this->log("Allowed to request"); } /** @@ -240,7 +255,7 @@ private function resetDatabase() { */ private function processChunks($data, $listname) { $len = strlen($data); - $offset = $z = 0; + $offset = 0; while ($offset < $len) { $x = strpos($data, ':', $offset); $type = substr($data, $offset, $x-$offset); @@ -281,7 +296,7 @@ private function processChunks($data, $listname) { } if ($type != 'a' && $type != 's') { - $this->outputmsg("DISCARDED CHUNKNUM: $chunknum (Had no valid label)"); + $this->log("DISCARDED CHUNKNUM: $chunknum (Had no valid label)"); continue; } @@ -305,7 +320,7 @@ private function processChunks($data, $listname) { for ($i = 0; $i < $row['count']; $i++) { $pair = array(); if ($type == 's') { - $pair['addchunknum'] = substr($chunkdata, $chunkOffset, 8); + $pair['addchunknum'] = hexdec(substr($chunkdata, $chunkOffset, 8)); $chunkOffset += 8; } $pair['prefix'] = substr($chunkdata, $chunkOffset, ($hashlen * 2)); @@ -314,7 +329,7 @@ private function processChunks($data, $listname) { } } elseif ($row['count'] == 0 && $type == 's') { $row['pairs'][] = array( - 'addchunknum' => substr($chunkdata, $chunkOffset, 8) + 'addchunknum' => hexdec(substr($chunkdata, $chunkOffset, 8)) ); $chunkOffset += 8; } elseif ($row['count'] < 0) { @@ -327,7 +342,6 @@ private function processChunks($data, $listname) { } $this->saveChunkPart($dataArr, ($type == 's' ? 'SUB' : "ADD"), $listname); unset($dataArr); - $z++; } return true; } @@ -481,7 +495,7 @@ private function deleteRange($range, $mode, $listname) { $params = array(); $buildtrunk = $listname . '-' . $mode; if (strpos($range, '-') !== false) { - $params = explode('-', trim($range), 1); + $params = explode('-', trim($range), 2); $clause = "`chunk_num` >= ? AND `chunk_num` <= ?"; } else { $params[] = $range; @@ -539,7 +553,7 @@ private function getData($body) { } if (!preg_match_all('/i:(.+?)\n(.+?)(?=i:|$)/s', $result[1], $blocks, PREG_PATTERN_ORDER)) { - $this->outputmsg('No data available in list'); + $this->log('No data available in list'); return true; } @@ -554,7 +568,7 @@ private function getData($body) { case 'u': $chunkdata = $this->googleDownloader('http://' . $value, false, "data"); $processed = $this->processChunks($chunkdata[1], $listname); - $this->outputmsg("Saved a chunk file: " . $value); + $this->log("Saved a chunk file: " . $value); break; case 'sd': case 'ad': @@ -581,7 +595,7 @@ public function runUpdate() { $require .= $this->formattedRequest($value); } - $this->outputmsg("Using $require"); + $this->log("Using $require"); $this->getData($require); } @@ -631,9 +645,9 @@ public function validateMethod() { $canit = self::canonicalizeURL($key); $canit = $canit['GSBURL']; if ($canit == $value) { - outputmsg("PASSED: $key"); + $this->log("PASSED: $key"); } else { - outputmsg("INVALid:
ORIGINAL: $key
EXPECTED: $value
RECIEVED: $canit
"); + $this->log("INVALid:
ORIGINAL: $key
EXPECTED: $value
RECIEVED: $canit
"); } } } @@ -647,7 +661,7 @@ public function validateMethod() { * Thanks to mikegillis677 for finding the seg. fault issue in the old function. * Passed validateMethod() check on 17/01/12 */ - static function j_parseUrl($url) { + private static function j_parseUrl($url) { $strict = '/^(?:([^:\/?#]+):)?(?:\/\/\/?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?(((?:\/(\w:))?((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/'; $loose = '/^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/\/?)?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?)(((?:\/(\w:))?(\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/'; preg_match($loose, $url, $match); @@ -712,14 +726,14 @@ static function j_parseUrl($url) { /** * Regex to check if its a numerical IP address */ - static function is_ip($ip) { + private static function is_ip($ip) { return preg_match("/^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])" . "(\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}$/", $ip); } /** * Checks if input is in hex format */ - static function is_hex($x) { + private static function is_hex($x) { // Relys on the fact that hex often includes letters meaning PHP will // disregard the string if (($x + 3) == 3) { @@ -732,7 +746,7 @@ static function is_hex($x) { /** * Checks if input is in octal format */ - static function is_octal($x) { + private static function is_octal($x) { //Relys on the fact that in IP addressing octals must begin with a 0 to // denote octal return substr($x, 0, 1) == 0; @@ -741,7 +755,7 @@ static function is_octal($x) { /** * Converts hex or octal input into decimal */ - static function hexoct2dec($value) { + private static function hexoct2dec($value) { //As this deals with parts in IP's we can be more exclusive if (substr_count(substr($value, 0, 2), '0x') > 0 && self::is_hex($value)) { return hexdec($value); @@ -755,7 +769,7 @@ static function hexoct2dec($value) { /** * Converts IP address part in HEX to decimal */ - static function iphexdec($hex) { + private static function iphexdec($hex) { //Removes any leading 0x (used to denote hex) and then and leading 0's) $temp = str_replace('0x', '', $hex); $temp = ltrim($temp, "0"); @@ -765,7 +779,7 @@ static function iphexdec($hex) { /** * Converts full IP address in HEX to decimal */ - static function hexIPtoIP($hex) { + private static function hexIPtoIP($hex) { // Remove hex identifier and leading 0's (not significant) $tempip = str_replace('0x', '', $hex); $tempip = ltrim($tempip, "0"); @@ -798,7 +812,7 @@ static function hexIPtoIP($hex) { * Checks if an IP provided in either hex, octal or decimal is in fact * an IP address. Normalises to a four part IP address. */ - static function isValid_IP($ip) { + private static function isValid_IP($ip) { // First do a simple check, if it passes this no more needs to be done if (self::is_ip($ip)) { return $ip; @@ -889,7 +903,7 @@ static function isValid_IP($ip) { * or equal to 127 (some of those are non alpha-numeric and so urlencode * on its own won't work). */ - static function flexURLEncode($url, $ignorehash = false) { + private static function flexURLEncode($url, $ignorehash = false) { // Had to write another layer as built in PHP urlencode() escapes all non // alpha-numeric // google states to only urlencode if its below 32 or above or equal to @@ -912,7 +926,7 @@ static function flexURLEncode($url, $ignorehash = false) { /** * Canonicalize a full URL according to Google's definition. */ - static function canonicalizeURL($url) { + private static function canonicalizeURL($url) { // Remove line feeds, return carriages, tabs, vertical tabs $finalurl = trim(str_replace(array( "\x09", @@ -1044,14 +1058,14 @@ static function canonicalizeURL($url) { /** * SHA-256 input (short method). */ - static function sha256($data) { + private static function sha256($data) { return hash('sha256', $data); } /** * Make hostkeys for use in a lookup */ - static function makeHostKey($host, $usingip) { + private static function makeHostKey($host, $usingip) { if ($usingip) { $hosts = array($host . "/"); } else { @@ -1085,7 +1099,7 @@ static function makeHostKey($host, $usingip) { /** * Hash up a list of values from makeprefixes() (will possibly be combined into that function at a later date */ - static function makeHashes($prefixarray) { + private static function makeHashes($prefixarray) { if (count($prefixarray) > 0) { $returnprefixes = array(); foreach ($prefixarray as $value) { @@ -1104,7 +1118,7 @@ static function makeHashes($prefixarray) { /** * Make URL prefixes for use after a hostkey check */ - static function makeprefixes($host, $path, $query, $usingip) { + private static function makeprefixes($host, $path, $query, $usingip) { $prefixes = array(); // Exact hostname in the url @@ -1170,21 +1184,22 @@ static function makeprefixes($host, $path, $query, $usingip) { * request */ private function processFullLookup($data) { - $clonedata = $data; $extracthash = array(); - while (strlen($clonedata) > 0) { - $splithead = explode("\n", $clonedata, 2); - $chunkinfo = explode(':', $splithead[0]); - $listname = $chunkinfo[0]; - $addchunk = $chunkinfo[1]; - $chunklen = $chunkinfo[2]; - $chunkdata = bin2hex(substr($splithead[1], 0, $chunklen)); - while (strlen($chunkdata) > 0) { - $extracthash[$listname][$addchunk] = substr($chunkdata, 0, 64); - $chunkdata = substr($chunkdata, 64); + + $len = strlen($data); + $offset = 0; + while ($offset < $len) { + $x = strpos($data, "\n", $offset); + $head = substr($data, $offset, $x-$offset); + $offset = $x+1; + list($listname, $addchunk, $chunklen) = explode(':', $head, 3); + + if ($chunklen > 0) { + $extracthash[$listname][$addchunk] = bin2hex(substr($data, $offset, $chunklen)); + $offset += $chunklen; } - $clonedata = substr($splithead[1], $chunklen); } + return $extracthash; } @@ -1196,7 +1211,7 @@ private function addfullhash($prefix, $chunknum, $fullhash, $listname) { $buildtrunk = $listname . "-a"; // First check hosts - $stm = $this->query("SELECT * FROM `" . $buildtrunk ."-hosts` WHERE `hostkey` = ? AND `chunk_num` = ? AND fulllhash = '' LIMIT 1", array($prefix, $chunknum)); + $stm = $this->query("SELECT * FROM `" . $buildtrunk ."-hosts` WHERE `hostkey` = ? AND `chunk_num` = ? AND fullhash = '' LIMIT 1", array($prefix, $chunknum)); if ($stm->rowCount() > 0) { $row = $stm->fetch(\PDO::FETCH_ASSOC); // We've got a live one! Insert the full hash for it @@ -1232,11 +1247,10 @@ private function cacheCheck($prefix) { ); } - $stm = $this->query("SELECT p.fullhash, h.chunk_num - FROM - `" . $buildtrunk . "-prefixes` p - JOIN `" . $buildtrunk . "-hosts` h ON (p.hostkey = h.hostkey) - WHERE p.`prefix` = ? AND p.`fullhash` != '' AND h.count > 0", array($prefix)); + $stm = $this->query("SELECT p.fullhash, h.chunk_num FROM + `" . $buildtrunk . "-prefixes` p + JOIN `" . $buildtrunk . "-hosts` h ON (p.hostkey = h.hostkey) + WHERE p.`prefix` = ? AND p.`fullhash` != '' AND h.count > 0", array($prefix)); if ($stm->rowCount() > 0) { $row = $stm->fetch(\PDO::FETCH_ASSOC); return array( @@ -1250,11 +1264,13 @@ private function cacheCheck($prefix) { } /** - * Do a full-hash lookup based on prefixes provided, returns (bool) true on a match and (bool) false on no match. + * Do a full-hash lookup based on prefixes provided, + * returns (bool) true on a match and (bool) false on no match. */ private function doFullLookup($prefixes, $originals) { // Store copy of original prefixes $cloneprefixes = $prefixes; + // They should really all have the same prefix size.. we'll just check one $prefixsize = strlen($prefixes[0][0]) / 2; $length = count($prefixes) * $prefixsize; @@ -1277,29 +1293,30 @@ private function doFullLookup($prefixes, $originals) { $prefixes[$key] = pack("H*", $value[0]); } // No cache matches so we continue with request - $body = "$prefixsize:$length\n" . implode("", $prefixes); + $body = $prefixsize . ":" . $length . "\n" . implode("", $prefixes); $buildopts = array( CURLOPT_POST => true, CURLOPT_POSTFIELDS => $body ); - $result = $this->googleDownloader("http://safebrowsing.clients.google.com/safebrowsing/gethash?client=api&apikey=" . $this->apikey . "&appver=" . $this->version . "&pver=" . $this->apiversion, $buildopts, "lookup"); - + + $result = $this->googleDownloader("http://safebrowsing.clients.google.com/safebrowsing/gethash?client=api&apikey=" . + $this->apikey . "&appver=" . $this->version . "&pver=" . $this->apiversion, $buildopts, "lookup"); + if ($result[0]['http_code'] == 200 && !empty($result[1])) { - //Extract hashes from response - $extractedhashes = $this->processFullLookup($result[1]); - //Loop over each list - foreach ($extractedhashes as $key => $value) { - //Loop over each value in each list - foreach ($value as $newkey => $newvalue) { - if (isset($originals[$newvalue])) { - //Okay it matches a full-hash we have, now to check + // Extract hashes from response + // Loop over each list + foreach ($this->processFullLookup($result[1]) as $listname => $chunks) { + // Loop over each value in each list + foreach ($chunks as $newkey => $fullhash) { + if (isset($originals[$fullhash])) { + // Okay it matches a full-hash we have, now to check // they're from the same chunks foreach ($cloneprefixes as $nnewvalue) { - if ($nnewvalue[1] == $newkey && $nnewvalue[0] == $originals[$newvalue]['prefix']) { - //From same chunks - //Add full hash to database (cache) - $this->addfullhash($nnewvalue[0], $nnewvalue[1], $newvalue, $key); + if ($nnewvalue[1] == $newkey && $nnewvalue[0] == $originals[$fullhash]['prefix']) { + // From same chunks + // Add full hash to database (cache) + $this->addfullhash($nnewvalue[0], $nnewvalue[1], $fullhash, $listname); return true; } @@ -1325,15 +1342,13 @@ private function doFullLookup($prefixes, $originals) { private function subCheck($listname, $prefixlist, $mode) { $buildtrunk = $listname . '-s'; foreach ($prefixlist as $value) { - $stm = $this->query("SELECT * FROM `". $buildtrunk . "-prefixes` WHERE " . ($mode == 'prefix' ? '`prefix`' : 'hostkey') . ' = ?', array($value[0])); + $stm = $this->query("SELECT id FROM `". $buildtrunk . "-prefixes` WHERE " . + ($mode == 'prefix' ? '`prefix`' : 'hostkey') . ' = ? AND add_chunk_num = ? LIMIT 1', array($value[0], $value[1])); // As interpreted from Developer Guide if theres a match in // sub list it cancels out the add listing - // we'll double check its from the same chunk just to be - // pedantic - while ($row = $stm->fetch(\PDO::FETCH_ASSOC)) { - if (hexdec($row['add_chunk_num']) == $value[1]) { - return true; - } + // we'll double check its from the same chunk just to be pedantic + if ($stm->rowCount() > 0) { + return true; } } return false; @@ -1345,6 +1360,10 @@ private function subCheck($listname, $prefixlist, $mode) { private function query($sql, $data = array()) { $stm = $this->db->prepare($sql); $stm->execute($data); + if ($this->debug) { + $this->debugLog[] = array($sql, $data, $stm->rowCount()); + + } return $stm; } @@ -1357,7 +1376,7 @@ public function install() { `id` int(11) unsigned NOT NULL AUTO_INCREMENT, `hostkey` varchar(8) NOT NULL, `chunk_num` int(11) unsigned NOT NULL, - `count` varchar(2) NOT NULL DEFAULT '0', + `count` int(11) unsigned NOT NULL DEFAULT '0', `fullhash` char(64) NOT NULL, PRIMARY KEY (`id`), UNIQUE KEY `hostkey_2` (`hostkey`,`chunk_num`,`count`,`fullhash`), @@ -1384,7 +1403,7 @@ public function install() { `id` int(11) unsigned NOT NULL AUTO_INCREMENT, `hostkey` varchar(8) NOT NULL, `chunk_num` int(11) unsigned NOT NULL, - `count` varchar(2) NOT NULL DEFAULT '0', + `count` int(11) unsigned NOT NULL DEFAULT '0', `fullhash` char(64) NOT NULL, PRIMARY KEY (`id`), UNIQUE KEY `hostkey_2` (`hostkey`,`chunk_num`,`count`,`fullhash`), @@ -1400,7 +1419,7 @@ public function install() { $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-s-prefixes` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, `hostkey` varchar(8) NOT NULL, - `add_chunk_num` varchar(8) NOT NULL, + `add_chunk_num` int(11) unsigned NOT NULL, `prefix` varchar(8) NOT NULL, `fullhash` char(64) NOT NULL, PRIMARY KEY (`id`), @@ -1436,11 +1455,14 @@ public function doLookup($url) { $matches = array(); foreach ($lists as $key => $value) { $buildtrunk = $value . '-a'; - $hostsStm = $this->db->prepare('SELECT * FROM `' . $buildtrunk . '-hosts` WHERE hostkey = ?'); + $hostsStm = $this->db->prepare('SELECT count, hostkey, chunk_num FROM `' . $buildtrunk . '-hosts` WHERE hostkey = ?'); //Loop over each list foreach ($hostkeys as $keyinner => $valueinner) { + if ($this->debug) { + $this->debugLog[] = array('SELECT count, hostkey, chunk_num FROM `' . $buildtrunk . '-hosts` WHERE hostkey = ?', array($valueinner['prefix']), $hostsStm->rowCount()); + } // Within each list loop over each hostkey $hostsStm->execute(array($valueinner['prefix'])); @@ -1455,8 +1477,12 @@ public function doLookup($url) { $params = $prefixParams; $params[] = $row['hostkey']; + if ($this->debug) { + $this->debugLog[] = array("SELECT FROM `" . $buildtrunk . "-prefixes` WHERE " . $buildprequery . " `hostkey` = ?", $param); + } + // Check if there are any matching prefixes - $stm = $this->query("SELECT * FROM `" . $buildtrunk . "-prefixes` WHERE " . $buildprequery . " `hostkey` = ?", $params); + $stm = $this->query("SELECT prefix FROM `" . $buildtrunk . "-prefixes` WHERE " . $buildprequery . " `hostkey` = ?", $params); if ($stm->rowCount() > 0) { // We found prefix matches $prematches = array(); From 654ad5bdbd4d5e5c328b2e68b9208896a963d658 Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Sun, 13 Jul 2014 23:56:52 +0200 Subject: [PATCH 09/16] result of doLookup is now name of list url found on. --- phpgsb.class.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index 1b14e70..fd47815 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -1432,7 +1432,7 @@ public function install() { /** * Does a full URL lookup on given lists, will check if its in database, if * slight match there then will do a full-hash lookup on GSB, - * returns (bool) true on match and (bool) false on negative. + * listname on match and (bool) false on negative. */ public function doLookup($url) { $lists = $this->usinglists; @@ -1453,8 +1453,8 @@ public function doLookup($url) { $buildprequery = implode("OR", $buildprequery); $matches = array(); - foreach ($lists as $key => $value) { - $buildtrunk = $value . '-a'; + foreach ($lists as $key => $listname) { + $buildtrunk = $listname . '-a'; $hostsStm = $this->db->prepare('SELECT count, hostkey, chunk_num FROM `' . $buildtrunk . '-hosts` WHERE hostkey = ?'); //Loop over each list @@ -1496,17 +1496,17 @@ public function doLookup($url) { // Before we send off any requests first check // whether its in sub table - if (!$this->subCheck($value, $prematches, "prefix") && + if (!$this->subCheck($listname, $prematches, "prefix") && $this->doFullLookup($prematches, $prefixes)) { - return true; + return $listname; } } // If we didn't find matches then do nothing (keep // looping till end and it'll return negative) - } elseif (!$this->subCheck($value, array(array($row['hostkey'], $row['chunk_num'])), "hostkey") && + } elseif (!$this->subCheck($listname, array(array($row['hostkey'], $row['chunk_num'])), "hostkey") && $this->doFullLookup(array(array($row['hostkey'], $row['chunk_num'])), $hostkeys)) { - return true; + return $listname; } } } From 2ce4f50eaabb9ff7e6cea7ed0b2b62941f2c531e Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Mon, 14 Jul 2014 17:03:30 +0200 Subject: [PATCH 10/16] added dynamic support for yandex sb service. --- phpgsb.class.php | 86 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 28 deletions(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index fd47815..9c74da2 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -22,12 +22,16 @@ class phpGSB { 'goog-malware-shaprivate' ); + public $serviceScheme = 'https'; + public $serviceResourcePrefix = 'safebrowsing/'; + public $serviceDomain = 'safebrowsing.clients.google.com'; + private $mainlist = array(); private $verbose = true; private $transtarted = false; private $transenabled = true; private $pingfilepath = ""; - + private $debug = false; public $debugLog = array(); @@ -40,6 +44,23 @@ public function __construct($database = false, $username = false, $password = fa $this->verbose = $verbose; } + /** + * Get url to service resource with parameters + * + * @param string $resource + * @return string + */ + public function getServiceUrl($resource = '') { + return $this->serviceScheme . '://' . $this->serviceDomain . '/' . $this->serviceResourcePrefix . + $resource . '?client=api&apikey=' . $this->apikey . '&appver=' . $this->version . '&pver=' . $this->apiversion; + } + + public function setService($domain, $resource_prefix = '', $scheme = 'https') { + $this->serviceDomain = $domain; + $this->serviceScheme = $scheme; + $this->serviceResourcePrefix = $resource_prefix; + } + public function __destruct() { $this->close(); } @@ -107,7 +128,7 @@ private function log($msg) { } /** - * Function to output errors, used instead of echo, + * Function to output errors, used instead of echo, * will make it easier to have a verbose switch in later releases */ private function fatalerror($msg) { @@ -115,7 +136,7 @@ private function fatalerror($msg) { print_r($msg); echo "\n"; } - + $this->trans_rollback(); throw new Exception($msg); } @@ -181,7 +202,7 @@ private function setTimeout($seconds) { } else { $until = time() + $seconds . '||'; } - + file_put_contents($this->pingfilepath . 'nextcheck.dat', $until); } @@ -196,7 +217,7 @@ private function checkTimeout($type) { if (time() < $curstatus[0]) { $this->fatalerror("Must wait another " . ($curstatus[0] - time()) . " seconds before another request"); } - + $this->log("Allowed to request"); } @@ -205,11 +226,12 @@ private function checkTimeout($type) { * passed via $options. $followbackoff indicates * whether to follow backoff procedures or not */ - private function googleDownloader($url, $options, $followbackoff = false) { + private function download($url, $options = NULL, $followbackoff = false) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); if (is_array($options)) { curl_setopt_array($ch, $options); @@ -222,7 +244,7 @@ private function googleDownloader($url, $options, $followbackoff = false) { if ($followbackoff && $info['http_code'] > 299) { $this->Backoff($info, $followbackoff); } - + return array( $info, $data @@ -239,7 +261,7 @@ private function resetDatabase() { if (!empty($this->adminemail)) { mail($this->adminemail, 'Reset Database Request Issued', 'For some crazy unknown reason GSB requested a database reset at ' . time()); } - + foreach ($this->usinglists as $value) { $this->query("TRUNCATE TABLE `$value-s-index`"); $this->query("TRUNCATE TABLE `$value-s-hosts`"); @@ -521,6 +543,12 @@ private function deleteRange($range, $mode, $listname) { } } + public function getList() { + $url = $this->getServiceUrl('list'); + $result = $this->download($url); + return explode("\n", trim($result[1])); + } + /** * Main part of updater function, will call all other functions, merely * requires the request body, it will then process and save all data as well as checking @@ -537,9 +565,8 @@ private function getData($body) { CURLOPT_POSTFIELDS => $body . "\n" ); - $result = $this->googleDownloader( - "http://safebrowsing.clients.google.com/safebrowsing/downloads?client=api&apikey=" . $this->apikey . "&appver=" . $this->version . "&pver=" . $this->apiversion, - $buildopts, "data"); + $url = $this->getServiceUrl('downloads'); + $result = $this->download($url, $buildopts, "data"); if (preg_match('/n:(\d+)/', $result[1], $match)) { $this->setTimeout($match[1]); @@ -566,7 +593,7 @@ private function getData($body) { $value = trim($elements[2][$id]); switch($type) { case 'u': - $chunkdata = $this->googleDownloader('http://' . $value, false, "data"); + $chunkdata = $this->download('http://' . $value, false, "data"); $processed = $this->processChunks($chunkdata[1], $listname); $this->log("Saved a chunk file: " . $value); break; @@ -926,7 +953,7 @@ private static function flexURLEncode($url, $ignorehash = false) { /** * Canonicalize a full URL according to Google's definition. */ - private static function canonicalizeURL($url) { + public static function canonicalizeURL($url) { // Remove line feeds, return carriages, tabs, vertical tabs $finalurl = trim(str_replace(array( "\x09", @@ -1118,7 +1145,7 @@ private static function makeHashes($prefixarray) { /** * Make URL prefixes for use after a hostkey check */ - private static function makeprefixes($host, $path, $query, $usingip) { + public static function makeprefixes($host, $path, $query, $usingip) { $prefixes = array(); // Exact hostname in the url @@ -1132,7 +1159,7 @@ private static function makeprefixes($host, $path, $query, $usingip) { } else { $maxslice = count($backhostparts); } - + $topslice = array_slice($backhostparts, 0, $maxslice); while ($maxslice > 1) { $hostcombos[] = implode('.', array_reverse($topslice)); @@ -1142,7 +1169,7 @@ private static function makeprefixes($host, $path, $query, $usingip) { } else { $hostcombos[] = $host; } - + $hostcombos = array_unique($hostcombos); $variations = array(); if (!empty($path)) { @@ -1153,12 +1180,12 @@ private static function makeprefixes($host, $path, $query, $usingip) { $upperlimit = count($pathparts); } } - + foreach ($hostcombos as $key => $value) { if (!empty($query)) { $variations[] = $value . $path . '?' . $query; } - + $variations[] = $value . $path; if (!empty($path)) { $i = 0; @@ -1174,7 +1201,7 @@ private static function makeprefixes($host, $path, $query, $usingip) { } } } - + $variations = array_unique($variations); return self::makeHashes($variations); } @@ -1193,7 +1220,7 @@ private function processFullLookup($data) { $head = substr($data, $offset, $x-$offset); $offset = $x+1; list($listname, $addchunk, $chunklen) = explode(':', $head, 3); - + if ($chunklen > 0) { $extracthash[$listname][$addchunk] = bin2hex(substr($data, $offset, $chunklen)); $offset += $chunklen; @@ -1264,13 +1291,13 @@ private function cacheCheck($prefix) { } /** - * Do a full-hash lookup based on prefixes provided, + * Do a full-hash lookup based on prefixes provided, * returns (bool) true on a match and (bool) false on no match. */ private function doFullLookup($prefixes, $originals) { // Store copy of original prefixes $cloneprefixes = $prefixes; - + // They should really all have the same prefix size.. we'll just check one $prefixsize = strlen($prefixes[0][0]) / 2; $length = count($prefixes) * $prefixsize; @@ -1299,10 +1326,10 @@ private function doFullLookup($prefixes, $originals) { CURLOPT_POST => true, CURLOPT_POSTFIELDS => $body ); - - $result = $this->googleDownloader("http://safebrowsing.clients.google.com/safebrowsing/gethash?client=api&apikey=" . - $this->apikey . "&appver=" . $this->version . "&pver=" . $this->apiversion, $buildopts, "lookup"); - + + $url = $this->getServiceUrl('gethash'); + + $result = $this->download($url, $buildopts, "lookup"); if ($result[0]['http_code'] == 200 && !empty($result[1])) { // Extract hashes from response // Loop over each list @@ -1342,7 +1369,7 @@ private function doFullLookup($prefixes, $originals) { private function subCheck($listname, $prefixlist, $mode) { $buildtrunk = $listname . '-s'; foreach ($prefixlist as $value) { - $stm = $this->query("SELECT id FROM `". $buildtrunk . "-prefixes` WHERE " . + $stm = $this->query("SELECT id FROM `". $buildtrunk . "-prefixes` WHERE " . ($mode == 'prefix' ? '`prefix`' : 'hostkey') . ' = ? AND add_chunk_num = ? LIMIT 1', array($value[0], $value[1])); // As interpreted from Developer Guide if theres a match in // sub list it cancels out the add listing @@ -1362,7 +1389,7 @@ private function query($sql, $data = array()) { $stm->execute($data); if ($this->debug) { $this->debugLog[] = array($sql, $data, $stm->rowCount()); - + } return $stm; } @@ -1451,6 +1478,9 @@ public function doLookup($url) { $prefixParams[] = $prefix['prefix']; } $buildprequery = implode("OR", $buildprequery); + if (!empty($buildprequery)) { + $buildprequery .= ' AND'; + } $matches = array(); foreach ($lists as $key => $listname) { From b3e8ed4bc94e795ce26ea663a46951342b3c3397 Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Mon, 21 Jul 2014 23:48:40 +0200 Subject: [PATCH 11/16] added skip timeout parameter. --- phpgsb.class.php | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index 9c74da2..92428c5 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -554,7 +554,7 @@ public function getList() { * requires the request body, it will then process and save all data as well as checking * for ADD-DEL and SUB-DEL, runs silently so won't return anything on success */ - private function getData($body) { + private function getData($body, $skipSetTimeout = false) { if (empty($body)) { return $this->fatalerror("Missing a body for data request"); } @@ -569,7 +569,9 @@ private function getData($body) { $result = $this->download($url, $buildopts, "data"); if (preg_match('/n:(\d+)/', $result[1], $match)) { - $this->setTimeout($match[1]); + if (!$skipSetTimeout) { + $this->setTimeout($match[1]); + } } else { return $this->fatalerror("Missing timeout"); } @@ -615,8 +617,10 @@ private function getData($body) { /** * Shortcut to run updater */ - public function runUpdate() { - $this->checkTimeout('data'); + public function runUpdate($skipCheckTimeout = false) { + if (!$skipCheckTimeout) { + $this->checkTimeout('data'); + } $require = ""; foreach ($this->usinglists as $value) { $require .= $this->formattedRequest($value); From b11836f65f61b582bf5ce2d31c20a6b14927b240 Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Mon, 21 Jul 2014 23:49:37 +0200 Subject: [PATCH 12/16] added missing paramter to getData call. --- phpgsb.class.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index 92428c5..9366e5f 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -627,7 +627,7 @@ public function runUpdate($skipCheckTimeout = false) { } $this->log("Using $require"); - $this->getData($require); + $this->getData($require, $skipCheckTimeout); } //LOOKUP FUNCTIONS From 96c555abe45c062a92bf5630da2b9420a68b621c Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Mon, 21 Jul 2014 23:54:10 +0200 Subject: [PATCH 13/16] added skipSetTimeout paramter. --- phpgsb.class.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index 9366e5f..6cea1ed 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -617,7 +617,7 @@ private function getData($body, $skipSetTimeout = false) { /** * Shortcut to run updater */ - public function runUpdate($skipCheckTimeout = false) { + public function runUpdate($skipCheckTimeout = false, $skipSetTimeout = false) { if (!$skipCheckTimeout) { $this->checkTimeout('data'); } @@ -627,7 +627,7 @@ public function runUpdate($skipCheckTimeout = false) { } $this->log("Using $require"); - $this->getData($require, $skipCheckTimeout); + $this->getData($require, $skipSetTimeout); } //LOOKUP FUNCTIONS From c849cb04c355040ddc55d97b369bc6a0e567c709 Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Tue, 22 Jul 2014 00:13:48 +0200 Subject: [PATCH 14/16] fixed invalid while. --- phpgsb.class.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index 6cea1ed..d50bc5a 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -1521,7 +1521,7 @@ public function doLookup($url) { // We found prefix matches $prematches = array(); $prelookup = array(); - while ($rowPrefix = $hostsStm->fetch(\PDO::FETCH_ASSOC)) { + while ($rowPrefix = $stm->fetch(\PDO::FETCH_ASSOC)) { $prematches[] = array( $rowPrefix['prefix'], $row['chunk_num'] From 43a7ee07de31e4a9386f5e69cb24bc117246d7c7 Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Thu, 14 Aug 2014 09:18:44 +0200 Subject: [PATCH 15/16] fixed query in addfullhash. --- phpgsb.class.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index d50bc5a..e86feb8 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -1253,13 +1253,14 @@ private function addfullhash($prefix, $chunknum, $fullhash, $listname) { `" . $buildtrunk ."-prefixes` p JOIN `" . $buildtrunk . "-hosts` h ON (h.hostkey = p.hostkey) SET - fullhash = ? + p.fullhash = ?, + h.fullhash = ? WHERE p.`prefix` = ? AND p.fullhash = '' AND h.chunk_num = ? AND h.count > 0 - ", array($fullhash, $prefix, $chunknum)); + ", array($fullhash, $fullhash, $prefix, $chunknum)); } } From a0f5d1ecd41a12d737b1e9e4b7e808817deee4f7 Mon Sep 17 00:00:00 2001 From: Stefan Meinecke Date: Thu, 15 Sep 2016 09:10:39 +0000 Subject: [PATCH 16/16] updated hash fields to binary. --- phpgsb.class.php | 56 ++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/phpgsb.class.php b/phpgsb.class.php index e86feb8..2f121a7 100644 --- a/phpgsb.class.php +++ b/phpgsb.class.php @@ -398,12 +398,12 @@ private function saveChunkPart($data, $type, $listname) { $buildindexValues[] = $data['chunklen']; foreach ($data['real'] as $newkey => $newvalue) { - $buildhost[] = "(?, ?, ?, '')"; + $buildhost[] = "(x?, ?, ?, '')"; $buildhostValues[] = $newvalue['hostkey']; $buildhostValues[] = $data['chunknum']; $buildhostValues[] = $newvalue['count']; foreach ($newvalue['pairs'] as $innerkey => $innervalue) { - $buildpairs[] = "(?, " . ($type == 'SUB' ? '?, ' : '') . "?, '')"; + $buildpairs[] = "(x?, " . ($type == 'SUB' ? '?, ' : '') . "x?, '')"; $buildpairsValues[] = $newvalue['hostkey']; if ($type == 'SUB') { $buildpairsValues[] = $innervalue['addchunknum']; @@ -529,14 +529,14 @@ private function deleteRange($range, $mode, $listname) { // Select all host keys that match chunks (we'll delete them after but we // need the hostkeys list!) - $stm = $this->query('SELECT `hostkey` FROM `' . $buildtrunk . '-hosts` WHERE ' . $clause . " AND hostkey != ''", $params); + $stm = $this->query('SELECT HEX(`hostkey`) hostkey FROM `' . $buildtrunk . '-hosts` WHERE ' . $clause . " AND hostkey != ''", $params); $buildprefixdel = array(); while ($row = $stm->fetch(\PDO::FETCH_ASSOC)) { $buildprefixdel[] = $row['hostkey']; } if (!empty($buildprefixdel)) { - $this->query('DELETE FROM `' . $buildtrunk . '-hosts` WHERE hostkey IN (' . substr(str_repeat('?, ', count($buildprefixdel)), 0, -2) . ')', $buildprefixdel); + $this->query('DELETE FROM `' . $buildtrunk . '-hosts` WHERE hostkey IN (' . substr(str_repeat('x?, ', count($buildprefixdel)), 0, -2) . ')', $buildprefixdel); //Delete all matching hostkeys $this->query('DELETE FROM `' . $buildtrunk . '-hosts` WHERE ' . $clause, $params); @@ -1242,21 +1242,21 @@ private function addfullhash($prefix, $chunknum, $fullhash, $listname) { $buildtrunk = $listname . "-a"; // First check hosts - $stm = $this->query("SELECT * FROM `" . $buildtrunk ."-hosts` WHERE `hostkey` = ? AND `chunk_num` = ? AND fullhash = '' LIMIT 1", array($prefix, $chunknum)); + $stm = $this->query("SELECT id, HEX(hostkey) hostkey, chunk_num, count, HEX(fullhash) fullhash FROM `" . $buildtrunk ."-hosts` WHERE `hostkey` = x? AND `chunk_num` = ? AND fullhash = '' LIMIT 1", array($prefix, $chunknum)); if ($stm->rowCount() > 0) { $row = $stm->fetch(\PDO::FETCH_ASSOC); // We've got a live one! Insert the full hash for it - $this->query("UPDATE `" . $buildtrunk . "-hosts` SET `fullhash` = ? WHERE `id` = ?", array($fullhash, $row['id'])); + $this->query("UPDATE `" . $buildtrunk . "-hosts` SET `fullhash` = x? WHERE `id` = ?", array($fullhash, $row['id'])); } else { $this->query(" UPDATE `" . $buildtrunk ."-prefixes` p JOIN `" . $buildtrunk . "-hosts` h ON (h.hostkey = p.hostkey) SET - p.fullhash = ?, - h.fullhash = ? + p.fullhash = x?, + h.fullhash = x? WHERE - p.`prefix` = ? AND + p.`prefix` = x? AND p.fullhash = '' AND h.chunk_num = ? AND h.count > 0 @@ -1270,7 +1270,7 @@ private function addfullhash($prefix, $chunknum, $fullhash, $listname) { private function cacheCheck($prefix) { foreach ($this->usinglists as $value) { $buildtrunk = $value . "-a"; - $stm = $this->query("SELECT * FROM `" . $buildtrunk . "-hosts` WHERE `hostkey` = ? AND `fullhash` != ''", array($prefix)); + $stm = $this->query("SELECT id, HEX(hostkey) hostkey, chunk_num, count, HEX(fullhash) fullhash FROM `" . $buildtrunk . "-hosts` WHERE `hostkey` = x? AND `fullhash` != ''", array($prefix)); if ($stm->rowCount() > 0) { $row = $stm->fetch(\PDO::FETCH_ASSOC); return array( @@ -1279,10 +1279,10 @@ private function cacheCheck($prefix) { ); } - $stm = $this->query("SELECT p.fullhash, h.chunk_num FROM + $stm = $this->query("SELECT HEX(p.fullhash) fullhash, h.chunk_num FROM `" . $buildtrunk . "-prefixes` p JOIN `" . $buildtrunk . "-hosts` h ON (p.hostkey = h.hostkey) - WHERE p.`prefix` = ? AND p.`fullhash` != '' AND h.count > 0", array($prefix)); + WHERE p.`prefix` = x? AND p.`fullhash` != '' AND h.count > 0", array($prefix)); if ($stm->rowCount() > 0) { $row = $stm->fetch(\PDO::FETCH_ASSOC); return array( @@ -1375,7 +1375,7 @@ private function subCheck($listname, $prefixlist, $mode) { $buildtrunk = $listname . '-s'; foreach ($prefixlist as $value) { $stm = $this->query("SELECT id FROM `". $buildtrunk . "-prefixes` WHERE " . - ($mode == 'prefix' ? '`prefix`' : 'hostkey') . ' = ? AND add_chunk_num = ? LIMIT 1', array($value[0], $value[1])); + ($mode == 'prefix' ? '`prefix`' : 'hostkey') . ' = x? AND add_chunk_num = ? LIMIT 1', array($value[0], $value[1])); // As interpreted from Developer Guide if theres a match in // sub list it cancels out the add listing // we'll double check its from the same chunk just to be pedantic @@ -1406,10 +1406,10 @@ public function install() { foreach ($this->usinglists as $listname) { $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-a-hosts` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `hostkey` varchar(8) NOT NULL, + `hostkey` BINARY(4) NOT NULL, `chunk_num` int(11) unsigned NOT NULL, `count` int(11) unsigned NOT NULL DEFAULT '0', - `fullhash` char(64) NOT NULL, + `fullhash` BINARY(32) NOT NULL, PRIMARY KEY (`id`), UNIQUE KEY `hostkey_2` (`hostkey`,`chunk_num`,`count`,`fullhash`), KEY `hostkey` (`hostkey`) @@ -1423,9 +1423,9 @@ public function install() { $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-a-prefixes` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `hostkey` varchar(8) NOT NULL, - `prefix` varchar(8) NOT NULL, - `fullhash` char(64) NOT NULL, + `hostkey` BINARY(4) NOT NULL, + `prefix` BINARY(4) NOT NULL, + `fullhash` BINARY(32) NOT NULL, PRIMARY KEY (`id`), UNIQUE KEY `hostkey_2` (`hostkey`,`prefix`), KEY `hostkey` (`hostkey`) @@ -1433,10 +1433,10 @@ public function install() { $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-s-hosts` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `hostkey` varchar(8) NOT NULL, + `hostkey` BINARY(4) NOT NULL, `chunk_num` int(11) unsigned NOT NULL, `count` int(11) unsigned NOT NULL DEFAULT '0', - `fullhash` char(64) NOT NULL, + `fullhash` BINARY(32) NOT NULL, PRIMARY KEY (`id`), UNIQUE KEY `hostkey_2` (`hostkey`,`chunk_num`,`count`,`fullhash`), KEY `hostkey` (`hostkey`) @@ -1450,10 +1450,10 @@ public function install() { $this->query("CREATE TABLE IF NOT EXISTS `" . $listname . "-s-prefixes` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `hostkey` varchar(8) NOT NULL, + `hostkey` BINARY(4) NOT NULL, `add_chunk_num` int(11) unsigned NOT NULL, - `prefix` varchar(8) NOT NULL, - `fullhash` char(64) NOT NULL, + `prefix` BINARY(4) NOT NULL, + `fullhash` BINARY(32) NOT NULL, PRIMARY KEY (`id`), UNIQUE KEY `hostkey_2` (`hostkey`,`add_chunk_num`,`prefix`), KEY `hostkey` (`hostkey`) @@ -1479,7 +1479,7 @@ public function doLookup($url) { $prefixParams = array(); $buildprequery = array(); foreach ($prefixes as $prefix) { - $buildprequery[] = " `prefix` = ?"; + $buildprequery[] = " `prefix` = x?"; $prefixParams[] = $prefix['prefix']; } $buildprequery = implode("OR", $buildprequery); @@ -1490,13 +1490,13 @@ public function doLookup($url) { $matches = array(); foreach ($lists as $key => $listname) { $buildtrunk = $listname . '-a'; - $hostsStm = $this->db->prepare('SELECT count, hostkey, chunk_num FROM `' . $buildtrunk . '-hosts` WHERE hostkey = ?'); + $hostsStm = $this->db->prepare('SELECT count, HEX(hostkey) hostkey, chunk_num FROM `' . $buildtrunk . '-hosts` WHERE hostkey = x?'); //Loop over each list foreach ($hostkeys as $keyinner => $valueinner) { if ($this->debug) { - $this->debugLog[] = array('SELECT count, hostkey, chunk_num FROM `' . $buildtrunk . '-hosts` WHERE hostkey = ?', array($valueinner['prefix']), $hostsStm->rowCount()); + $this->debugLog[] = array('SELECT count, HEX(hostkey) hostkey, chunk_num FROM `' . $buildtrunk . '-hosts` WHERE hostkey = x?', array($valueinner['prefix']), $hostsStm->rowCount()); } // Within each list loop over each hostkey $hostsStm->execute(array($valueinner['prefix'])); @@ -1513,11 +1513,11 @@ public function doLookup($url) { $params[] = $row['hostkey']; if ($this->debug) { - $this->debugLog[] = array("SELECT FROM `" . $buildtrunk . "-prefixes` WHERE " . $buildprequery . " `hostkey` = ?", $param); + $this->debugLog[] = array("SELECT FROM `" . $buildtrunk . "-prefixes` WHERE " . $buildprequery . " `hostkey` = x?", $param); } // Check if there are any matching prefixes - $stm = $this->query("SELECT prefix FROM `" . $buildtrunk . "-prefixes` WHERE " . $buildprequery . " `hostkey` = ?", $params); + $stm = $this->query("SELECT HEX(prefix) prefix FROM `" . $buildtrunk . "-prefixes` WHERE " . $buildprequery . " `hostkey` = x?", $params); if ($stm->rowCount() > 0) { // We found prefix matches $prematches = array();