File: /www/wwwroot/biographybirthday.com/wp-scrap/spinbot_get_allstarbio.php
<?php
require 'vendor/autoload.php';
use Goutte\Client;
function strip_tags_blacklist($html, $tags) {
$html = preg_replace('/<'. $tags .'\b[^>]*>(.*?)<\/'. $tags .'>/is', "", $html);
return $html;
}
$prefix = 'bio_';
$meta = array();
$url = 'https://allstarbio.com/mariah-mallad-bio-relationship-net-worth/';
if(isset($_GET['url'])){
$url = $_GET['url'];
}
$client = new Client();
$crawler = $client->request('GET', $url);
$name = array_shift($crawler->filter('h1.post-title-alt')->extract('_text'));
try{
$image = $crawler->filterXpath('//meta[@property="og:image"]')->attr('content');
}catch (Exception $e){
$image = '';
}
// $category = array_shift($crawler->filter('a.category')->extract('_text'));
// FACTS
$count = 0;
$dob_valid = false;
$elements = $crawler->filter('div.quickfacts ul li span')->each(function($node) use(&$count, &$meta, &$prefix, &$name, &$dob_valid){
if($count == 0){
$fullname = $node->html();
$names = splitName($fullname);
$name = $fullname;
$meta[$prefix.'first_name'] = $names['firstname'];
if($names['lastname'] != ''){
$meta[$prefix.'last_name'] = $names['lastname'];
}
}else if($count == 1){
if(cleanDate($node->html())){
$dob_valid = true;
$meta[$prefix.'dob'] = cleanDate($node->html());
}else{
return;
}
}
$count++;
if($count > 9){
return;
}
});
$html = $crawler->filter('.post-content')->html();
$html = preg_replace('/(<(script|style)\b[^>]*>).*?(<\/\2>)/is', "$1$3", $html);
$strip_tags = "center|style|span|ins|script|a|img|input|button|figure|noscript";
// remove link
$html = preg_replace('#<a.*?>.*?</a>#i', '', $html);
$html = preg_replace("#<\s*\/?(".$strip_tags.")\s*[^>]*?>#im", '', $html);
$strip_tags = "figcaption";
/*$html = preg_replace("#<\s*\/?(".$strip_tags.")\s*[^>]*?>#im", '', $html);*/
$html = preg_replace('/(<(figcaption)\b[^>]*>).*?(<\/\2>)/is', "$1$3", $html);
$html = preg_replace('/<figcaption[^>]*>([\s\S]*?)<\/figcaption[^>]*>/', '', $html);
$html = preg_replace('/<div[^>]*>([\s\S]*?)<\/div[^>]*>/', '', $html);
$textToSpin = str_replace('</div>','',$html);
$url = 'https://api.spinbot.com';
$header = array();
// Required header:
$spinbotApiKey = '87b0d1bb9b3c420381ee141837cbc7f5';
$header[] = "x-auth-key:$spinbotApiKey";
// optional header values
$header[] = 'x-spin-cap-words:true';
$header[] = 'x-words-to-skip:rewrit,nonExistentWordPart';
$header[] = 'x-min-percent-change-per-sentence:any';
//$header[] = 'x-action:getavailablespins';
// Execute cURL request, get response
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $textToSpin);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
$response = curl_exec($ch);
curl_close($ch);
// Make the response readable
list($strResponseHeaders, $strResponseBody) = explode("\r\n\r\n", $response, 2);
list($strResponseHeaders, $strResponseBody) = explode("\r\n\r\n", $strResponseBody, 2);
$aHeaders = putHeadersTextIntoArray($strResponseHeaders);
//$out = substr(strstr($strResponseBody, '<p>'), strlen('<p>'));
$out = $strResponseBody;
$fields['name'] = trim($name);
$fields['image'] = $image;
$fields['meta'] = $meta;
// $fields['category'] = $category;
$fields['body'] = $out;
$fields['available-spins'] = $aHeaders['available-spins'];
header('Content-Type: application/json');
echo json_encode($fields);
function putHeadersTextIntoArray($header_text) {
$headers = array();
foreach (explode("\r\n", $header_text) as $i => $line)
if ($i === 0) {
$headers['http_code'] = $line;
} else {
list ($key, $value) = explode(': ', $line);
$headers[$key] = $value;
}
return $headers;
}
function debug($arr, $exit = true){
echo '<pre>';
print_r($arr);
echo '</pre>';
if($exit) exit;
}
function splitName($name) {
$parts = explode(' ', $name);
return array(
'firstname' => array_shift($parts),
'lastname' => array_pop($parts),
'middlename' => join(' ', $parts)
);
}
function cleanDate($date){
$date = trim($date);
$date = str_replace(',','',$date);
$date = str_replace(' ','',$date);
$date = str_replace('.','',$date);
$date = str_replace('th','',$date);
$date = str_replace('st','',$date);
$date = str_replace('rd','',$date);
$date = str_replace('<sup></sup>','',$date);
try{
$dt = new DateTime($date);
}catch (Exception $e){
return false;
// debug($e);
}
return $dt->format('Y-m-d');
}
?>