In order for this to be successful, you need to already have all of the links for the pages. You can create a script to scrape through the list, but since they didn't bother updating their "alpha" directory webpage, it's still all in tables, and a pain in the ass to sort through. I will be uploading a full list of the games on to our server for download after I post this (both in .txt and .sql format).
The first function is the actual scraping code. This will grab the raw html, and parse through it back on our local. It returns an array named $thisGame[] which can be called on your calling script.
The second function finds the matching numeral for the month.
- Code: (login to view link)
<?php
function getGame($findGame) {
// Create URL based on afore mentioned data
$curled = 'http://www.metacritic.com/games/platforms/pc/'.$findGame;
// Set up the CURL object
$ch = curl_init($curled);
// Spoof the User Agent, because scraping is frowned upon :(
curl_setopt( $ch, CURLOPT_USERAGENT, "Firefox" );
// Start the output buffering
ob_start();<br />
// Get the HTML from MetaCritic
curl_exec( $ch );
curl_close( $ch );
// Get the contents of the output buffer
$str = ob_get_contents();
ob_end_clean();
// We already know the name of the Dividers we're looking for
// So let's just grab the Info for now
preg_match("/\\<div id=\\"productinfo\\"\\>(.*?)\\<\\/div\\>/is", $str, $rawgame);
// And now we'll get all the entries we need from within that divider
preg_match_all("/\\<\\/strong\\> (.*?)\\<\\/p\\>/is", $rawgame[0], $newGame);
// Set $games as an Array holder
$games = array();
// Crawl through all the data we just scraped
for($i = 0; $i < count( $newGame[1] ); $i++) {
// Sterilize the strings - Just incase
$strlData = $newGame[1][$i];
$strlData = preg_replace("/<.*?>/", "", $strlData);
$strlData = html_entity_decode($strlData);
// Now we add it all to an Array
$games []= array($strlData);
}
// Now we'll grab the Description Divider for the game
// Since the description only has one entry, there's no need to create a loop
// We simply call $gameDesc[1] when ever we want to get the data
preg_match("/\\<p class=\\"summarytext\\"\\>(.*?)\\<\\/p\\>/is", $str, $gameDesc);
// Same goes for the actual game name
preg_match("/\\<h1\\>(.*?)\\<\\/h1\\>/is", $str, $gameName);
// Now we create an array to return to the calling page
$thisGame = array();
$thisGame['name'] = $gameName[1];
$thisGame['publisher'] = $games[0][0];
$thisGame['developer'] = $games[1][0];
$thisGame['esrb'] = $games[2][0];
$thisGame['genre'] = $games[4][0];
$thisGame['description'] = $gameDesc[1];
$thisGame['date'] = $games[5][0];
return $thisGame;
}
// Change a months name to it's equal number
function chkMonths($getMonth) {
$arrMonths = array(
'01' => 'January',
'02' => 'February',
'03' => 'March',
'04' => 'April',
'05' => 'May',
'06' => 'June',
'07' => 'July',
'08' => 'August',
'09' => 'Spetember',
'10' => 'October',
'11' => 'November',
'12' => 'December',
);
// Check month against the array
foreach($arrMonths as $arrMonth=>$pattern) {
// If the month was found (Should always be found)
if(eregi($pattern, $getMonth)) {
// Return the number for the month
return $arrMonth;
}
}
}
?>
The next code block contains our actual viewable page source.
- Code: (login to view link)
<?php
include('meta_critic.php');
echo '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8" /></head><body>';
$allGames = mysql_query("SELECT * FROM games WHERE game_platform='pc' ORDER BY game_id");
while($selectGame = mysql_fetch_array($allGames)) {
$setGame_ID = $selectGame['game_id'];
$setGame_Meta = $selectGame['metauri'];
$thisGame = getGame($setGame_Meta);
$thisDate = $thisGame['date'];
$thisDate = str_replace(',', '', $thisDate);
$thisDate = str_replace(' ', '-', $thisDate);
$thisDate = explode("-", $thisDate);
$thisMonth = chkMonths($thisDate[0]);
$thisDay = strlen($thisDate[1]);
if($thisDay == '1') { $thisDate[1] = '0'.$thisDate[1]; }
$finalDate = $thisMonth.'-'.$thisDate[1].'-'.$thisDate[2];
$changeGame = mysql_query("UPDATE games SET game_description='".$thisGame['description']."', game_publisher='".$thisGame['publisher']."', game_developer='".$thisGame['developer']."', game_esrb='".$thisGame['esrb']."', game_release='".$thisGame['date']."' WHERE game_id='$setGame_ID'");
}
echo '</body></html>';
?>
Enjoy =)

