Web Scraping is a technique used to extract large amounts of data from websites extracted and saved them to a local file in your computer or to a database or can be used as API. Data displayed by most websites can be viewed by using a web browser only. They do not offer the functionality to save a copy of this data for use. Thus the only option is to copy and paste the selected data that is required, which in reality, is a very tedious job and may take hours complete. In other terms, Web Scraping is the technique of automating such a process, in place of manual work, the Web Scraping software performs the same task within seconds.
all-websites.php
<div class="table-responsive container-fluid p-3" style="margin-top:30px;">
<table class="table table-stripped p-3" style="background:#f7f7f7; padding:30px" id="datatablespk">
<thead>
<th style="text-align:center;">S.No</th>
<th>Website url</th>
<th>Heading Selector</th>
<th>Thumbnail Selector</th>
<th>Details Page url selector</th>
<th>Action</th>
</thead>
<!-- loop start -->
<?php
global $wpdb;
$result = $wpdb->get_results("SELECT * FROM wp_websites");
foreach ($result as $key => $print) {
?>
<tr>
<td width='5%'><?php echo $key+1; ?></td>
<td width='25%'><?php echo $print->website_url; ?></td>
<td width='15%'><?php echo $print->heading_selector; ?></td>
<td width='15%'><?php echo $print->thumb_selector; ?></td>
<td width='15%'><?php echo $print->details_page_url; ?></td>
<td width='20%'>
<a href='admin.php?page=edit-website&id=<?php echo $print->id; ?>'>
<button class='btn btn-success btn-sm' type='button'>Edit</button></a>
<button class='btn btn-danger btn-sm' type='button' data-toggle="modal" data-target="#myModal<?php echo $print->id; ?>">DELETE</button>
<button class='btn btn-primary btn-sm' type='button' id="scrap<?php echo $print->id; ?>"
website_url="<?php echo $print->website_url; ?>" heading_selector="<?php echo $print->heading_selector; ?>" thumb_selector="<?php echo $print->thumb_selector; ?>" details_page_url="<?php echo $print->details_page_url; ?>">
<span id="scrapbtn<?php echo $print->id; ?>">Scrap</span>
<span id="loading<?php echo $print->id; ?>" style="display:none">Loading..</span></button>
</td>
</tr>
<!-- scrap website -->
<script>
jQuery(document).ready( function($) {
let ajax_url = "admin.php?page=scrap-website";
// ajax start
$('#scrap<?php echo $print->id; ?>').click(function(){
$('#scrapbtn<?php echo $print->id; ?>').hide();
$('#loading<?php echo $print->id; ?>').show();
let website_url = $(this).attr('website_url');
let heading_selector = $(this).attr('heading_selector');
let thumb_selector = $(this).attr('thumb_selector');
let details_page_url = $(this).attr('details_page_url');
console.log(ajax_url);
$.ajax({
type : "post",
url: ajax_url,
data : {website_url : website_url, heading_selector: heading_selector, thumb_selector:thumb_selector, details_page_url:details_page_url},
beforeSend:function(){
$('#loading<?php echo $print->id; ?>').show();
},
success: function(data) {
$('#loading<?php echo $print->id; ?>').hide();
$('#scrapbtn<?php echo $print->id; ?>').hide();
$('#scrapbtn<?php echo $print->id; ?>').text('Done !');
$('#scrap<?php echo $print->id; ?>').show();
$('#scrap<?php echo $print->id; ?>').css('background', 'green');
}
});
});
// ajax end
});
</script>
<!-- The Modal -->
<div class="modal" id="myModal<?php echo $print->id; ?>">
<div class="modal-dialog">
<div class="modal-content">
<!-- Modal Header -->
<div class="modal-header">
<h4 class="modal-title">Data Delete</h4>
<button type="button" class="close" data-dismiss="modal">×</button>
</div>
<!-- Modal body -->
<div class="modal-body">
Are You sure ? to want delete this <?php echo $print->website_url; ?> item.
</div>
<!-- Modal footer -->
<div class="modal-footer">
<a href="admin.php?page=delete-website&del=<?php echo $print->id; ?>">
<button type="button" class="btn btn-danger">Yes</button>
</a>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-dark" data-dismiss="modal">No</button>
</div>
</div>
</div>
</div>
<?php
}
?>
<!-- loop end -->
</table>
</div>
<script>
$(document).ready(function() {
$('#datatablespk').dataTable({
"scrollX": false,
"pagingType": "numbers"
} );
} );
</script>
<style>
td, thead>th {
text-align:left;
}
</style>
menu.php
add_submenu_page('', __('',
'scrap-website'), __('', 'scrap-website'),
'manage_options', 'scrap-website',
'scrap_websites_page_function');
function scrap_websites_page_function(){
require_once __DIR__ . "/scrapping-form/scrap-website.php";
}
scrap-website.php
<?php
$website_url = $_POST['website_url'];
$heading_selector = $_POST['heading_selector'];
$thumb_selector = $_POST['thumb_selector'];
$details_page_url = $_POST['details_page_url'];
$html = file_get_html($website_url);
$i = 0;
while ($i <= 5) {
$title = $html->find($heading_selector, $i)->plaintext;
$url = $html->find($details_page_url, $i)->href;
$thumb = $html->find($thumb_selector, $i)->src;
// insert data
global $wpdb;
$website_url = $_POST['website_url'];
$heading_selector = $_POST['heading_selector'];
$thumb_selector = $_POST['thumb_selector'];
$detail_url_selector = $_POST['detail_url_selector'];
$resultFetch = $wpdb->get_results("SELECT * FROM wp_scrap");
foreach ($resultFetch as $key => $print) {
if($website_url != $print->website_url){
$table_name = 'wp_scrap';
$q = $wpdb->insert(
$table_name,
array(
'website_url' => $website_url,
'title' => $title,
'url' => $url,
'thumb' => $thumb,
)
);
}}
$i++;
}
echo 200;
?>