File: /www/wwwroot/biographybirthday.com/wp-content/plugins/scraper.php
<?php
/*
Plugin Name: Scraper Plugin
Plugin URI: http://ashokbasnet.com.np
description: Manages Scraping of data
a plugin to create awesomeness and spread joy
Version: 1.0
Author: Mr. Ashok Basnet
Author URI: http://ashokbasnet.com.np
License: GPL2
*/
function scraper_page(){
?>
<h1>Spinbot URL</h1><hr>
<div class="wrap">
<form method="POST">
<label name="allstar">Enter All Star Bio URL [e.g. https://allstarbio.com/mariah-mallad-bio-relationship-net-worth/]:</label>
<input type="text" name="allstar" class="large-text" value="<?php echo isset($_POST['allstar']) ? $_POST['allstar'] : '';?>" placeholder="Enter URL"/><br /><br />
<label name="realitystarfacts">Enter Reality Star Facts URL [e.g. https://realitystarfacts.com/keshia-knight-pulliam/]:</label>
<input type="text" name="realitystarfacts" class="large-text" value="<?php echo isset($_POST['realitystarfacts']) ? $_POST['realitystarfacts'] : '';?>" placeholder="Enter URL"/><br /><br />
<input type="submit" name="submit_scraper_url" value="Submit" class="button button-primary">
</form>
</div>
<hr>
<?php
function strip_tags_blacklist($html, $tags) {
$html = preg_replace('/<'. $tags .'\b[^>]*>(.*?)<\/'. $tags .'>/is', "", $html);
return $html;
}
if(array_key_exists('submit_scraper_url', $_POST)){
?>
<div id="setting-error-settings_updated" class="update_settings-error notice is-dismissible">
<p><strong>Successfully Spinbot completed and post imported successfully.</strong></p>
</div>
<?php
echo '<div class="card" style="max-width: 100%;">';
$realitystarfacts = $_POST['realitystarfacts'];
$show_detail = true;
$all_star = $_POST['allstar'];
// echo $url;
if(!empty($all_star)){
$urltoscrap = get_site_url().'/wp-scrap/spinbot_get_allstarbio.php?url='.$all_star;
}
elseif(!empty($realitystarfacts)){
$urltoscrap = get_site_url().'/wp-scrap/spinbot_get_realitystarfacts.php?url='.$realitystarfacts;
}else{
$show_detail = false;
echo 'No URL selected';
}
if($show_detail){
$data = file_get_contents($urltoscrap);
$data = json_decode($data, true);
$postId = postToWP($data);
echo '<a class="button button-primary" href="/wp-admin/post.php?post='.$postId.'&action=edit" target="_blank">OPEN POST</a>';
echo '<h2>'.$data['name'].'</h2>';
echo get_the_post_thumbnail($postId).'<br>';
// echo '<img src ="'.$data['image'].'"><br>';
echo '<h3>'.$data['category'].'</h3>';
echo $data['body'];
echo '<hr><h3 style="color:red;">Available SPINS Left: '.$data['available-spins'].'</h3>';
}
echo '</div>';
// echo $data;
}
}
// Add to admin menu
function scraper_add__menu(){
add_submenu_page('options-general.php','Spinbot','Spinbot','publish_posts','spinbot','scraper_page', '',111);
}
add_action('admin_menu','scraper_add__menu');
add_filter( 'query_vars', 'se67095_add_query_vars');
function se67095_add_query_vars($vars){
$vars[] = "scraper";
$vars[] = "random";
return $vars;
}
add_action('template_redirect', 'se67905_random_template');
function se67905_random_template($template) {
global $wp_query;
if(isset($wp_query->query['random'])){
$args = array(
'numberposts'=>1,
'order' => 'ASC',
'post_status' =>'publish',
'post_type' => 'post',
'orderby' => 'rand'
);
$posts = get_posts($args);
if(isset($posts[0])){
header('Location: '.$posts[0]->guid);
}
// print_r($posts);exit;
}
return $template;
}
function postToWP($data){
// IMPORT TO WORDPRESS
$post = get_page_by_title($data['name'], OBJECT, 'post');
if($post){
echo 'already exists';exit;
}
$post_fields = array();
if(isset($data['category'])){
$cat = get_term_by('name', $data['category'] , 'category');
if($cat == false){
$cat = wp_insert_term($data['category'], 'category');
$cat_id = $cat['term_id'] ;
}else{
$cat_id = $cat->term_id ;
}
$post_fields['post_category'] = array($cat_id);
}
$post_fields['post_author'] = 1; // Steven
$post_fields['post_title'] = $data['name'];
$post_fields['post_status'] = 'draft';
$post_fields['post_content'] = $data['body'];
$post_fields['post_name'] = strtolower(trim(preg_replace('/[^A-Za-z0-9-]+/', '-', $data['name'])));;
$post_fields['comment_status'] = 'closed';
$post_fields['ping_status'] = 'closed';
if(isset($data['meta'])) {
$post_fields['meta_input'] = $data['meta'];
}
$postId = wp_insert_post($post_fields);
$thumbnail_id = uploadRemoteImageAndAttach($data['image'],$data['name'], $postId);
set_post_thumbnail( $postId, $thumbnail_id );
return $postId;
}
add_action('template_redirect', 'se67905_my_template');
function se67905_my_template($template) {
global $wp_query;
if(isset($wp_query->query['scraper']) && $wp_query->query['scraper'] == 'scrap'){
$data = file_get_contents("https://biographymask.com/?scraperd=pull&url=".$_GET['url']);
// echo $data;exit;
// $data = file_get_contents(get_site_url().'/wp-scrap/api.json');
$data = json_decode($data,true);
if(!isset($data['meta'])){
die('Data not available');
}
$post = get_page_by_title($data['name'], OBJECT, 'post');
if($post){
echo 'already exists';exit;
}
$cat = get_term_by('name', $data['category'] , 'category');
if($cat == false){
$cat = wp_insert_term($data['category'], 'category');
$cat_id = $cat['term_id'] ;
}else{
//category already exists let's get it's id
$cat_id = $cat->term_id ;
}
$post_fields = array();
$post_fields['post_author'] = 1;
$post_fields['post_title'] = $data['name'];
$post_fields['post_status'] = 'draft';
$post_fields['post_content'] = $data['body'];
$post_fields['post_name'] = strtolower(trim(preg_replace('/[^A-Za-z0-9-]+/', '-', $post_fields['post_title'])));
$post_fields['comment_status'] = 'closed';
$post_fields['ping_status'] = 'closed';
$post_fields['post_category'] = array($cat_id);
$post_fields['meta_input'] = $data['meta'];
$postId = wp_insert_post($post_fields);
// Set Featured Image
// $data['image'] = 'http://netwp.test/wp-content/uploads/2018/ashok-photo-url.jpeg';
// featured_image($postId,$data['image'],$post_fields['post_name']);
$thumbnail_id = uploadRemoteImageAndAttach($data['image'],$post_fields['post_title'], $postId);
set_post_thumbnail( $postId, $thumbnail_id );
echo 'Post Inserted Successfully';exit;
}else if(isset($wp_query->query['scraper']) && $wp_query->query['scraper'] == 'publish'){
// http://netwp.test/scraper?scraper=publish
$args = array(
'numberposts'=>1,
'order' => 'ASC',
'post_status' =>'draft',
'post_type' => 'post',
'orderby' => 'rand'
);
$posts = get_posts($args);
// debug($posts);
foreach($posts as $post){
$current_time = date('Y-m-d H:i:s');
$my_post = array(
'ID' => $post->ID,
'post_status' => 'publish',
'post_date' => $current_time,
'post_date_gmt' => $current_time,
'post_modified'=> $current_time,
'post_modified_gmt'=> $current_time
);
// debug($my_post);
// Update the post into the database
wp_update_post( $my_post );
}
// exit;
echo 'published';
exit;
}else if(isset($wp_query->query['scraper']) && $wp_query->query['scraper'] == 'horoscope_update'){
$meta_query = array(
array(
'key' => 'bio_horoscope',
'compare' => 'NOT EXISTS',
),
array(
'key' => 'bio_dob',
'compare' => 'EXISTS',
)
);
$args = array(
'numberposts'=>10,
'order' => 'ASC',
'post_status' =>'publish',
'post_type' => 'post',
'meta_query' => $meta_query
// 'orderby' => 'rand'
);
$posts = get_posts($args);
foreach($posts as $post){
// debug($post);exit;
$dob = get_post_meta( $post->ID, 'bio_dob', true);
update_post_meta( $post->ID,'bio_horoscope', zodiac($dob));
}
echo 'updated '.count($posts).' posts horoscope';
exit;
}
return $template;
}
function debug($arr){
echo '<pre>';
print_r($arr);
echo '</pre>';
}
function uploadRemoteImageAndAttach($image_url, $post_name, $parent_id){
$image = $image_url;
$file_ext = pathinfo($image_url, PATHINFO_EXTENSION);
$image_name = sanitize_file_name($post_name).'.'.$file_ext;
$get = wp_remote_get( $image );
$type = wp_remote_retrieve_header( $get, 'content-type' );
if (!$type)
return false;
$mirror = wp_upload_bits( $image_name, '', wp_remote_retrieve_body( $get ) );
$attachment = array(
'post_title'=> $post_name,
'post_mime_type' => $type
);
$attach_id = wp_insert_attachment( $attachment, $mirror['file'], $parent_id );
require_once(ABSPATH . 'wp-admin/includes/image.php');
$attach_data = wp_generate_attachment_metadata( $attach_id, $mirror['file'] );
wp_update_attachment_metadata( $attach_id, $attach_data );
return $attach_id;
}
// https://stackoverflow.com/questions/41524931/how-to-set-featured-image-programmatically-from-url
function featured_image($post_id,$image_url,$post_name){
// Add Featured Image to Post
$image_url = $image_url; // Define the image URL here
$file_ext = pathinfo($image_url, PATHINFO_EXTENSION);
$image_name = $post_name.'.'.$file_ext;
$upload_dir = wp_upload_dir(); // Set upload folder
$image_data = file_get_contents($image_url); // Get image data
$unique_file_name = wp_unique_filename( $upload_dir['path'], $image_name ); // Generate unique name
$filename = basename( $unique_file_name ); // Create image file name
// Check folder permission and define file location
if( wp_mkdir_p( $upload_dir['path'] ) ) {
$file = $upload_dir['path'] . '/' . $filename;
} else {
$file = $upload_dir['basedir'] . '/' . $filename;
}
// Create the image file on the server
file_put_contents( $file, $image_data );
// Check image file type
$wp_filetype = wp_check_filetype( $filename, null );
// Set attachment data
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name( $filename ),
'post_content' => '',
'post_status' => 'inherit'
);
// Create the attachment
$attach_id = wp_insert_attachment( $attachment, $file, $post_id );
// Include image.php
require_once(ABSPATH . 'wp-admin/includes/image.php');
// Define attachment metadata
$attach_data = wp_generate_attachment_metadata( $attach_id, $file );
// Assign metadata to attachment
wp_update_attachment_metadata( $attach_id, $attach_data );
// And finally assign featured image to post
set_post_thumbnail( $post_id, $attach_id );
}