Tag Archives: url

Original Post

So I wanted to build a link parser like the one on Facebook, but didn’t find one that suited me. So I built one. My code is based off the code found here, but I rewrote much of it to be cleaner and to return JSON rather than HTML.

Code Change – Feb 2nd, 2011
Added some refinements to the cleaning mechanism and greatly speed up image parser.

HTML

<br />
&lt;script src=&quot;http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js&quot;&gt;&lt;/script&gt;<br />
&lt;style&gt;<br />
	#atc_bar{width:500px;}<br />
	#attach_content{border:1px solid #ccc;padding:10px;margin-top:10px;}<br />
	#atc_images {width:100px;height:120px;overflow:hidden;float:left;}<br />
	#atc_info {width:350px;float:left;height:100px;text-align:left; padding:10px;}<br />
	#atc_title {font-size:14px;display:block;}<br />
	#atc_url {font-size:10px;display:block;}<br />
	#atc_desc {font-size:12px;}<br />
	#atc_total_image_nav{float:left;padding-left:20px}<br />
	#atc_total_images_info{float:left;padding:4px 10px;font-size:12px;}<br />
&lt;/style&gt;<br />
&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;</p>
<p>&lt;div align=&quot;center&quot;&gt;<br />
	&lt;h1&gt;Parse a Link Like Facebook with PHP and Jquery&lt;/h1&gt;<br />
	&lt;div id=&quot;atc_bar&quot; align=&quot;center&quot;&gt;<br />
		Paste Link Here: &lt;input type=&quot;text&quot; name=&quot;url&quot; size=&quot;40&quot; id=&quot;url&quot; value=&quot;&quot; /&gt;<br />
		&lt;input type=&quot;button&quot; name=&quot;attach&quot; value=&quot;Parse&quot; id=&quot;attach&quot; /&gt;<br />
		&lt;input type=&quot;hidden&quot; name=&quot;cur_image&quot; id=&quot;cur_image&quot; /&gt;<br />
		&lt;div id=&quot;loader&quot;&gt;</p>
<p>			&lt;div align=&quot;center&quot; id=&quot;atc_loading&quot; style=&quot;display:none&quot;&gt;&lt;img src=&quot;load.gif&quot; alt=&quot;Loading&quot; /&gt;&lt;/div&gt;<br />
			&lt;div id=&quot;attach_content&quot; style=&quot;display:none&quot;&gt;<br />
				&lt;div id=&quot;atc_images&quot;&gt;&lt;/div&gt;<br />
				&lt;div id=&quot;atc_info&quot;&gt;</p>
<p>					&lt;label id=&quot;atc_title&quot;&gt;&lt;/label&gt;<br />
					&lt;label id=&quot;atc_url&quot;&gt;&lt;/label&gt;<br />
					&lt;br clear=&quot;all&quot; /&gt;<br />
					&lt;label id=&quot;atc_desc&quot;&gt;&lt;/label&gt;<br />
					&lt;br clear=&quot;all&quot; /&gt;<br />
				&lt;/div&gt;<br />
				&lt;div id=&quot;atc_total_image_nav&quot; &gt;<br />
					&lt;a href=&quot;#&quot; id=&quot;prev&quot;&gt;&lt;img src=&quot;prev.png&quot;  alt=&quot;Prev&quot; border=&quot;0&quot; /&gt;&lt;/a&gt;&lt;a href=&quot;#&quot; id=&quot;next&quot;&gt;&lt;img src=&quot;next.png&quot; alt=&quot;Next&quot; border=&quot;0&quot; /&gt;&lt;/a&gt;<br />
				&lt;/div&gt;</p>
<p>				&lt;div id=&quot;atc_total_images_info&quot; &gt;<br />
					Showing &lt;span id=&quot;cur_image_num&quot;&gt;1&lt;/span&gt; of &lt;span id=&quot;atc_total_images&quot;&gt;1&lt;/span&gt; images<br />
				&lt;/div&gt;<br />
				&lt;br clear=&quot;all&quot; /&gt;<br />
			&lt;/div&gt;<br />
		&lt;/div&gt;<br />
		&lt;br clear=&quot;all&quot; /&gt;<br />
	&lt;/div&gt;<br />
&lt;/div&gt;<br />

JavaScript

</p>
<p>&lt;script&gt;</p>
<p>	$(document).ready(function(){</p>
<p>		// delete event<br />
		$('#attach').bind(&quot;click&quot;, parse_link);</p>
<p>		function parse_link ()<br />
		{<br />
			if(!isValidURL($('#url').val()))<br />
			{<br />
				alert('Please enter a valid url.');<br />
				return false;<br />
			}<br />
			else<br />
			{<br />
				$('#atc_loading').show();<br />
				$('#atc_url').html($('#url').val());<br />
				$.post(&quot;fetch.php?url=&quot;+escape($('#url').val()), {}, function(response){</p>
<p>					//Set Content<br />
					$('#atc_title').html(response.title);<br />
					$('#atc_desc').html(response.description);<br />
					$('#atc_price').html(response.price);</p>
<p>					$('#atc_total_images').html(response.total_images);</p>
<p>					$('#atc_images').html(' ');<br />
					$.each(response.images, function (a, b)<br />
					{<br />
						$('#atc_images').append('&lt;img src=&quot;'+b.img+'&quot; width=&quot;100&quot; id=&quot;'+(a+1)+'&quot;&gt;');<br />
					});<br />
					$('#atc_images img').hide();</p>
<p>					//Flip Viewable Content<br />
					$('#attach_content').fadeIn('slow');<br />
					$('#atc_loading').hide();</p>
<p>					//Show first image<br />
					$('img#1').fadeIn();<br />
					$('#cur_image').val(1);<br />
					$('#cur_image_num').html(1);</p>
<p>					// next image<br />
					$('#next').unbind('click');<br />
					$('#next').bind(&quot;click&quot;, function(){</p>
<p>						var total_images = parseInt($('#atc_total_images').html());<br />
						if (total_images &gt; 0)<br />
						{<br />
							var index = $('#cur_image').val();<br />
							$('img#'+index).hide();<br />
							if(index &lt; total_images)<br />
							{<br />
								new_index = parseInt(index)+parseInt(1);<br />
							}<br />
							else<br />
							{<br />
								new_index = 1;<br />
							}</p>
<p>							$('#cur_image').val(new_index);<br />
							$('#cur_image_num').html(new_index);<br />
							$('img#'+new_index).show();<br />
						}<br />
					});</p>
<p>					// prev image<br />
					$('#prev').unbind('click');<br />
					$('#prev').bind(&quot;click&quot;, function(){</p>
<p>						var total_images = parseInt($('#atc_total_images').html());<br />
						if (total_images &gt; 0)<br />
						{<br />
							var index = $('#cur_image').val();<br />
							$('img#'+index).hide();<br />
							if(index &gt; 1)<br />
							{<br />
								new_index = parseInt(index)-parseInt(1);;<br />
							}<br />
							else<br />
							{<br />
								new_index = total_images;<br />
							}</p>
<p>							$('#cur_image').val(new_index);<br />
							$('#cur_image_num').html(new_index);<br />
							$('img#'+new_index).show();<br />
					 	}<br />
					});<br />
				});<br />
			}<br />
		};<br />
	});</p>
<p>	function isValidURL(url)<br />
	{<br />
		var RegExp = /(ftp|http|https):\/\/(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&amp;%@!\-\/]))?/;</p>
<p>		if(RegExp.test(url)){<br />
			return true;<br />
		}else{<br />
			return false;<br />
		}<br />
	}<br />
&lt;/script&gt;<br />

PHP

<br />
$url = urldecode($_REQUEST['url']);<br />
$url = checkValues($url);<br />
$return_array = array();</p>
<p>$base_url = substr($url,0, strpos($url, &quot;/&quot;,8));<br />
$relative_url = substr($url,0, strrpos($url, &quot;/&quot;)+1);</p>
<p>// Get Data<br />
$cc = new cURL();<br />
$string = $cc-&gt;get($url);<br />
$string = str_replace(array(&quot;\n&quot;,&quot;\r&quot;,&quot;\t&quot;,'&lt;/span&gt;','&lt;/div&gt;'), '', $string);</p>
<p>$string = preg_replace('/(&lt;(div|span)\s[^&gt;]+\s?&gt;)/',  '', $string);<br />
if (mb_detect_encoding($string, &quot;UTF-8&quot;) != &quot;UTF-8&quot;)<br />
	$string = utf8_encode($string);</p>
<p>// Parse Title<br />
$nodes = extract_tags( $string, 'title' );<br />
$return_array['title'] = trim($nodes[0]['contents']);</p>
<p>// Parse Base<br />
$base_override = false;<br />
$base_regex = '/&lt;base[^&gt;]*'.'href=[\&quot;|\'](.*)[\&quot;|\']/Ui';<br />
preg_match_all($base_regex, $string, $base_match, PREG_PATTERN_ORDER);<br />
if(strlen($base_match[1][0]) &gt; 0)<br />
{<br />
	$base_url = $base_match[1][0];<br />
	$base_override = true;<br />
}</p>
<p>// Parse Description<br />
$return_array['description'] = '';<br />
$nodes = extract_tags( $string, 'meta' );<br />
foreach($nodes as $node)<br />
{<br />
	if (strtolower($node['attributes']['name']) == 'description')<br />
		$return_array['description'] = trim($node['attributes']['content']);<br />
}</p>
<p>// Parse Images<br />
$images_array = extract_tags( $string, 'img' );<br />
$images = array();<br />
for ($i=0;$i&lt;=sizeof($images_array);$i++)<br />
{<br />
	$img = trim(@$images_array[$i]['attributes']['src']);<br />
	$width = preg_replace(&quot;/[^0-9.]/&quot;, '', $images_array[$i]['attributes']['width']);<br />
	$height = preg_replace(&quot;/[^0-9.]/&quot;, '', $images_array[$i]['attributes']['height']);</p>
<p>	$ext = trim(pathinfo($img, PATHINFO_EXTENSION));</p>
<p>	if($img &amp;&amp; $ext != 'gif')<br />
	{<br />
		if (substr($img,0,7) == 'http://')<br />
			;<br />
		else	if (substr($img,0,1) == '/' || $base_override)<br />
			$img = $base_url . $img;<br />
		else<br />
			$img = $relative_url . $img;</p>
<p>		if ($width == '' &amp;&amp; $height == '')<br />
		{<br />
			$details = @getimagesize($img);</p>
<p>			if(is_array($details))<br />
			{<br />
				list($width, $height, $type, $attr) = $details;<br />
			}<br />
		}<br />
		$width = intval($width);<br />
		$height = intval($height);</p>
<p>		if ($width &gt; 199 || $height &gt; 199 )<br />
		{<br />
			if (<br />
				(($width &gt; 0 &amp;&amp; $height &gt; 0 &amp;&amp; (($width / $height) &lt; 3) &amp;&amp; (($width / $height) &gt; .2))<br />
					|| ($width &gt; 0 &amp;&amp; $height == 0 &amp;&amp; $width &lt; 700)<br />
					|| ($width == 0 &amp;&amp; $height &gt; 0 &amp;&amp; $height &lt; 700)<br />
				)<br />
				&amp;&amp; strpos($img, 'logo') === false )<br />
			{<br />
				$images[] = array(&quot;img&quot; =&gt; $img, &quot;width&quot; =&gt; $width, &quot;height&quot; =&gt; $height, 'area' =&gt;  ($width * $height),'offset' =&gt; $images_array[$i]['offset']);<br />
			}<br />
		}</p>
<p>	}<br />
}<br />
$return_array['images'] = array_values(($images));<br />
$return_array['total_images'] = count($return_array['images']);</p>
<p>header('Cache-Control: no-cache, must-revalidate');<br />
header('Expires: Mon, 26 Jul 1997 05:00:00 GMT');<br />
header('Content-type: application/json');</p>
<p>echo json_encode($return_array);<br />
exit;<br />