Original Post
So I wanted to build a link parser like the one on Facebook, but didn’t find one that suited me. So I built one. My code is based off the code found here, but I rewrote much of it to be cleaner and to return JSON rather than HTML.
Code Change – Feb 2nd, 2011
Added some refinements to the cleaning mechanism and greatly speed up image parser.
HTML
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js"></script> <style> #atc_bar{width:500px;} #attach_content{border:1px solid #ccc;padding:10px;margin-top:10px;} #atc_images {width:100px;height:120px;overflow:hidden;float:left;} #atc_info {width:350px;float:left;height:100px;text-align:left; padding:10px;} #atc_title {font-size:14px;display:block;} #atc_url {font-size:10px;display:block;} #atc_desc {font-size:12px;} #atc_total_image_nav{float:left;padding-left:20px} #atc_total_images_info{float:left;padding:4px 10px;font-size:12px;} </style> <br /><br /><br /><br /> <div align="center"> <h1>Parse a Link Like Facebook with PHP and Jquery</h1> <div id="atc_bar" align="center"> Paste Link Here: <input type="text" name="url" size="40" id="url" value="" /> <input type="button" name="attach" value="Parse" id="attach" /> <input type="hidden" name="cur_image" id="cur_image" /> <div id="loader"> <div align="center" id="atc_loading" style="display:none"><img src="load.gif" alt="Loading" /></div> <div id="attach_content" style="display:none"> <div id="atc_images"></div> <div id="atc_info"> <label id="atc_title"></label> <label id="atc_url"></label> <br clear="all" /> <label id="atc_desc"></label> <br clear="all" /> </div> <div id="atc_total_image_nav" > <a href="#" id="prev"><img src="prev.png" alt="Prev" border="0" /></a><a href="#" id="next"><img src="next.png" alt="Next" border="0" /></a> </div> <div id="atc_total_images_info" > Showing <span id="cur_image_num">1</span> of <span id="atc_total_images">1</span> images </div> <br clear="all" /> </div> </div> <br clear="all" /> </div> </div>
JavaScript
<script> $(document).ready(function(){ // delete event $('#attach').bind("click", parse_link); function parse_link () { if(!isValidURL($('#url').val())) { alert('Please enter a valid url.'); return false; } else { $('#atc_loading').show(); $('#atc_url').html($('#url').val()); $.post("fetch.php?url="+escape($('#url').val()), {}, function(response){ //Set Content $('#atc_title').html(response.title); $('#atc_desc').html(response.description); $('#atc_price').html(response.price); $('#atc_total_images').html(response.total_images); $('#atc_images').html(' '); $.each(response.images, function (a, b) { $('#atc_images').append('<img src="'+b.img+'" width="100" id="'+(a+1)+'">'); }); $('#atc_images img').hide(); //Flip Viewable Content $('#attach_content').fadeIn('slow'); $('#atc_loading').hide(); //Show first image $('img#1').fadeIn(); $('#cur_image').val(1); $('#cur_image_num').html(1); // next image $('#next').unbind('click'); $('#next').bind("click", function(){ var total_images = parseInt($('#atc_total_images').html()); if (total_images > 0) { var index = $('#cur_image').val(); $('img#'+index).hide(); if(index < total_images) { new_index = parseInt(index)+parseInt(1); } else { new_index = 1; } $('#cur_image').val(new_index); $('#cur_image_num').html(new_index); $('img#'+new_index).show(); } }); // prev image $('#prev').unbind('click'); $('#prev').bind("click", function(){ var total_images = parseInt($('#atc_total_images').html()); if (total_images > 0) { var index = $('#cur_image').val(); $('img#'+index).hide(); if(index > 1) { new_index = parseInt(index)-parseInt(1);; } else { new_index = total_images; } $('#cur_image').val(new_index); $('#cur_image_num').html(new_index); $('img#'+new_index).show(); } }); }); } }; }); function isValidURL(url) { var RegExp = /(ftp|http|https):\/\/(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%@!\-\/]))?/; if(RegExp.test(url)){ return true; }else{ return false; } } </script>
PHP
$url = urldecode($_REQUEST['url']); $url = checkValues($url); $return_array = array(); $base_url = substr($url,0, strpos($url, "/",8)); $relative_url = substr($url,0, strrpos($url, "/")+1); // Get Data $cc = new cURL(); $string = $cc->get($url); $string = str_replace(array("\n","\r","\t",'</span>','</div>'), '', $string); $string = preg_replace('/(<(div|span)\s[^>]+\s?>)/', '', $string); if (mb_detect_encoding($string, "UTF-8") != "UTF-8") $string = utf8_encode($string); // Parse Title $nodes = extract_tags( $string, 'title' ); $return_array['title'] = trim($nodes[0]['contents']); // Parse Base $base_override = false; $base_regex = '/<base[^>]*'.'href=[\"|\'](.*)[\"|\']/Ui'; preg_match_all($base_regex, $string, $base_match, PREG_PATTERN_ORDER); if(strlen($base_match[1][0]) > 0) { $base_url = $base_match[1][0]; $base_override = true; } // Parse Description $return_array['description'] = ''; $nodes = extract_tags( $string, 'meta' ); foreach($nodes as $node) { if (strtolower($node['attributes']['name']) == 'description') $return_array['description'] = trim($node['attributes']['content']); } // Parse Images $images_array = extract_tags( $string, 'img' ); $images = array(); for ($i=0;$i<=sizeof($images_array);$i++) { $img = trim(@$images_array[$i]['attributes']['src']); $width = preg_replace("/[^0-9.]/", '', $images_array[$i]['attributes']['width']); $height = preg_replace("/[^0-9.]/", '', $images_array[$i]['attributes']['height']); $ext = trim(pathinfo($img, PATHINFO_EXTENSION)); if($img && $ext != 'gif') { if (substr($img,0,7) == 'http://') ; else if (substr($img,0,1) == '/' || $base_override) $img = $base_url . $img; else $img = $relative_url . $img; if ($width == '' && $height == '') { $details = @getimagesize($img); if(is_array($details)) { list($width, $height, $type, $attr) = $details; } } $width = intval($width); $height = intval($height); if ($width > 199 || $height > 199 ) { if ( (($width > 0 && $height > 0 && (($width / $height) < 3) && (($width / $height) > .2)) || ($width > 0 && $height == 0 && $width < 700) || ($width == 0 && $height > 0 && $height < 700) ) && strpos($img, 'logo') === false ) { $images[] = array("img" => $img, "width" => $width, "height" => $height, 'area' => ($width * $height),'offset' => $images_array[$i]['offset']); } } } } $return_array['images'] = array_values(($images)); $return_array['total_images'] = count($return_array['images']); header('Cache-Control: no-cache, must-revalidate'); header('Expires: Mon, 26 Jul 1997 05:00:00 GMT'); header('Content-type: application/json'); echo json_encode($return_array); exit;