Format du document : text/plain
Prévisualisation
// Récupération des ancres via l'API Ahref. Nécessite un token
// requête sur le domaine principal
$url = "http://apiv2.ahrefs.com/?from=anchors&mode=domain&output=json&order_by=anchor&token=xxxxxxxxxx&target=".$_GET['domain'];
$curl = curl_init();
curl_setopt ($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec ($curl);
curl_close ($curl);
$array = json_decode($result,true);
$array = $array['anchors'];
// foreach($array as $data) { echo $data['anchor'].'
'; }
// requête sur le sous-domaine
$url = "http://apiv2.ahrefs.com/?from=anchors&output=json&order_by=anchor&token=xxxxxxxxxx&target=www.".$_GET['domain'];
$curl = curl_init();
curl_setopt ($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec ($curl);
curl_close ($curl);
$array2 = json_decode($result,true);
$array2 = $array2['anchors'];
$array3 = array_merge($array,$array2);
$blacklist = explode("\n",file_get_contents('blacklist.txt'));
$spam = check_anchor($blacklist,$array3);
if ($spam == 1){echo 'spam';}else {echo 'clean';}
// Check si du spam est présent dans les ancres
function check_anchor($blacklist,$array3){
$spam = 0;
foreach($array3 as $data) {
if (!empty($data['anchor'])){
$string = string_control($data['anchor']);
if ($string == false){
$spam = 1;
return $spam;
}
else {
if (preg_match('# #',$data['anchor'])){
$anchorGrams = explode(' ',$data['anchor']);
foreach ($anchorGrams as $nGram){
if (in_array($nGram,$blacklist)){
$spam = 1;
return $spam;
}
}
}
else {
if (in_array($data['anchor'],$blacklist)){
$spam = 1;
return $spam;
}
}
}
}
}
return $spam;
}
function string_control ($string) {
$string = strip_tags($string);
// Si l'ancre contient l'un des caractères de la regex ci-dessous, alors ce n'est pas un alphabet exotique
if (preg_match('#[0-9a-zÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿŠŒŽŽšœž \.\'\’\-]#i',utf8_decode($string))) {
echo 'ok preg match => '.$string.'
';
return true;
}
else {
echo 'no preg match => '.$string.'
';
return false;
}
}
?>