File: /www/wwwroot/biographybirthday.com/wp-scrap/api.php
<?php
require 'vendor/autoload.php';
use Goutte\Client;
//$url = "https://www.famousbirthdays.com/people/selena-gomez.html";
//if(isset($_GET['url'])){
// $url = $_GET['url'];
//}
$url = 'https://www.famousbirthdays.com/random';
$prefix = 'bio_';
$meta = array();
$mapping = array();
$client = new Client();
$crawler = $client->request('POST', $url);
try{
$name = $crawler->filterXpath('//meta[@name="twitter:title"]')->attr('content');
}catch (Exception $e){
$name = '';
}
$name = str_replace('Learn about ','',$name);
// META
$names = splitName($name);
$meta[$prefix.'first_name'] = $names['firstname'];
if($names['lastname'] != ''){
$meta[$prefix.'last_name'] = $names['lastname'];
}
$count = 0;
$dob = '';
$birthplace = '';
$city = '';
$crawler->filter('a.btn-rank')->each(function ($node) use(&$city){
$href = $node->attr('href');
if (strpos($href, '/birthplace') !== false) {
$city = array_shift($node->filter('.title')->extract('_text'));
$city = trim(str_replace('Born in ','',$city));
}
});
$elements = $crawler->filter('.stats .main-stats .is-flex .col-md-12')->each(function($node) use(&$count, &$dob,&$birthplace){
if($count == 0){
$dob = $node->text();
$dob = str_replace('Birthday','',$dob);
}else if($count == 1){
$birthplace = $node->text();
$birthplace = trim(str_replace('Birthplace','',$birthplace));
$birthplace = trim(preg_replace('/\s\s+/', ' ', $birthplace));
}else{
return;
}
$count++;
});
$dob = cleanDate($dob);
$meta[$prefix.'dob'] = $dob;
$meta[$prefix.'fact_birth-place'] = $birthplace;
if($city != ''){
$meta[$prefix.'city'] = $city;
}
//$name = array_shift($crawler->filter('.main-info .col-md-6 h1')->extract('_text'));
$category = array_shift($crawler->filter('.main-info .col-md-6 h1 .person-title')->extract('_text'));
$fields['category'] = trim($category);
$fields['name'] = $name;
try{
$image = $crawler->filterXpath('//meta[@name="og:image"]')->attr('content');
}catch (Exception $e){
$image = '';
}
// Get second image from list of image
$count = 0;
$elements = $crawler->filter('.famous-slider img')->each(function($node) use(&$count, &$image){
if($count <= 1){
$image = $node->attr('src');
}else{
return;
}
$count++;
});
$fields['image'] = $image;
$html = $crawler->filter('div.container div.bio')->html();
$html = preg_replace('/(<(script|style)\b[^>]*>).*?(<\/\2>)/is', "$1$3", $html);
$strip_tags = "center|style|span|ins|script|a|img|input|button|figure|noscript";
// remove link
$html = preg_replace('#<a.*?>.*?</a>#i', '', $html);
$html = preg_replace("#<\s*\/?(".$strip_tags.")\s*[^>]*?>#im", '', $html);
$strip_tags = "figcaption";
/*$html = preg_replace("#<\s*\/?(".$strip_tags.")\s*[^>]*?>#im", '', $html);*/
$html = preg_replace('/(<(figcaption)\b[^>]*>).*?(<\/\2>)/is', "$1$3", $html);
$html = preg_replace('/<figcaption[^>]*>([\s\S]*?)<\/figcaption[^>]*>/', '', $html);
$html = preg_replace('/<div[^>]*>([\s\S]*?)<\/div[^>]*>/', '', $html);
//$html = trim(preg_replace('/\s\s+/', ' ', $html));
//$fields['body'] = $html;
$fields['meta'] = $meta;
$textToSpin = str_replace('</div>','',$html);
$response = spinbot($textToSpin);
//debug($response);
list($strResponseHeaders, $strResponseBody) = explode("\r\n\r\n", $response, 2);
//list($strResponseHeaders, $strResponseBody) = explode("\r\n\r\n", $strResponseBody, 2);
//$aHeaders = putHeadersTextIntoArray($strResponseHeaders);
$fields['body'] = $strResponseBody;
header('Content-Type: application/json');
echo json_encode($fields);
function debug($arr, $exit = true){
echo '<pre>';
print_r($arr);
echo '</pre>';
if($exit) exit;
}
function spinbot($textToSpin){
$url = 'https://api.spinbot.com';
$header = array();
// Required header:
$spinbotApiKey = '87b0d1bb9b3c420381ee141837cbc7f5';
$header[] = "x-auth-key:$spinbotApiKey";
// optional header values
$header[] = 'x-spin-cap-words:true';
$header[] = 'x-words-to-skip:rewrit,nonExistentWordPart';
$header[] = 'x-min-percent-change-per-sentence:any';
//$header[] = 'x-action:getavailablespins';
// Execute cURL request, get response
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $textToSpin);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
$response = curl_exec($ch);
curl_close($ch);
return $response;
}
function cleanDate($date){
$date = trim($date);
for($i = 1; $i <= 12; $i++){
$month = date('F',strtotime('2020-'.$i.'-01'));
$date = str_replace($month,'',$date);
}
try{
$dt = new DateTime($date);
}catch (Exception $e){
debug($e);
}
return $dt->format('Y-m-d');
}
function splitName($name) {
$parts = explode(' ', $name);
return array(
'firstname' => array_shift($parts),
'lastname' => array_pop($parts),
'middlename' => join(' ', $parts)
);
}