I am trying to check for URL Canonicalization. It is working fine for some page while it is not working on some pages.
My code:
function getRedirect($webpage)
{
$curlInit = curl_init($webpage);
curl_setopt($curlInit, CURLOPT_CONNECTTIMEOUT, 20);
curl_setopt($curlInit, CURLOPT_HEADER, true);
curl_setopt($curlInit, CURLOPT_NOBODY, true);
curl_setopt($curlInit, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($curlInit);
$response_time = curl_getinfo($curlInit);
curl_close($curlInit);
return $response_time['redirect_url'];
}
function get_root_domain($url)
{
$pattern = '/\w+\..{2,3}(?:\..{2,3})?(?:$|(?=\/))/i';
if (preg_match($pattern, $url, $matches) === 1) {
return $matches[0];
}
}
function getDomain($url)
{
$url = Trim($url);
$url = preg_replace("/^(http:\/\/)*/is", "", $url);
$url = preg_replace("/^(https:\/\/)*/is", "", $url);
$url = preg_replace("/\/.*$/is", "", $url);
return $url;
}
Main function:
$parsedUrl = get_root_domain($url);
$url_WWW = 'www.' . $parsedUrl;
$getRedirect = getRedirect($parsedUrl);
if ($getRedirect == null)
{
// echo with or without www resolve to the same url
}
else
{
$parsedRedirect = getDomain($getRedirect);
if ($parsedRedirect == $parsedUrl)
{
// echo with or without www resolve to the same url
}
elseif ($parsedRedirect == $url_WWW)
{
// echo with or without www resolve to the same url
}
else
{
// echo with or without www DOES NOT resolve to the same url.
}
}
I have tried modifying the first if statement to this:
if ($get_redirect == null)
{
$get_redirect_url = get_redirect($url_WWW);
$get_redirect_url = getDomain($get_redirect_url);
if ($get_redirect_url == $url_WWW)
{
// echo with or without www DOES NOT resolve to the same url.
}
else
{
// echo with or without www resolve to the same url
}
}
but I get the same inconsistent result.
Aucun commentaire:
Enregistrer un commentaire