forked from hedii/php-crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.php
204 lines (173 loc) · 6.9 KB
/
index.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
<?php require 'Crawler.php'; ?>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>php-crawler.dev | index.php</title>
<meta name="description" content="">
<meta name="keywords" content="">
<meta name="author" content="">
<!-- css -->
<link href="css/bootstrap.min.css" rel="stylesheet">
<link href="css/style.css" rel="stylesheet">
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<!-- js -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
<script src="js/bootstrap.min.js"></script>
<script src="js/script.js"></script>
</head>
<body>
<div class="counter">
<h4>Statistics</h4>
<div id="total_email">total email found:</div>
<div id="total_url">total url stored:</div>
<div id="visited_url">visited url:</div>
<div id="not_visited_url">not visited url:</div>
<div id="crawled_for_email_url">url crawled for emails:</div>
<div id="not_crawled_for_email_url">url not crawled for emails yet:</div>
<div id="system_load">system load:</div>
<h4>Total urls</h4>
<div class="progress">
<div id="progress_visited_url" class="progress-bar progress-bar-success" style="width: 0%">
<span id="visited_url_percent">visited</span>
</div>
<div id="progress_non_visited_url" class="progress-bar progress-bar-danger" style="width: 0%">
<span id="not_visited_url_percent">non-visited</span>
</div>
</div>
<h4>Urls crawled for emails</h4>
<div class="progress">
<div id="progress_crawled_for_email_url" class="progress-bar progress-bar-success" style="width: 0%">
<span id="crawled_for_email_url_percent">crawled for email</span>
</div>
<div id="progress_non_crawled_for_email_url" class="progress-bar progress-bar-danger" style="width: 0%">
<span id="not_crawled_for_email_url_percent">non-crawled for email</span>
</div>
</div>
</div>
<script>
setInterval(function(){
// focus on the bottom of the page
window.scrollTo(0, document.body.scrollHeight);
$.ajax({
url: '/datas/total_email.php',
type: 'GET',
success: function(data) {
var total_email = data;
$('#total_email').html('total email found: ' + total_email);
}
});
$.ajax({
url: '/datas/total_url.php',
type: 'GET',
success: function(data) {
var total_url = data;
$('#total_url').html('total url stored: ' + total_url);
$.ajax({
url: '/datas/visited_url.php',
type: 'GET',
success: function(data) {
var visited_url = data;
var not_visited_url = total_url - visited_url;
$('#visited_url').html('visited url: ' + visited_url);
$('#not_visited_url').html('not visited url: ' + not_visited_url);
/* Progress bar for urls */
var visited_url_percent = visited_url / total_url * 100;
visited_url_percent = Math.round(visited_url_percent * 100) / 100;
var not_visited_url_percent = not_visited_url / total_url * 100;
not_visited_url_percent = Math.round(not_visited_url_percent * 100) / 100;
$('#progress_visited_url').css('width', visited_url_percent + '%');
$('#progress_non_visited_url').css('width', not_visited_url_percent + '%');
$('#visited_url_percent').html(visited_url_percent + '% visited');
$('#not_visited_url_percent').html(not_visited_url_percent + '% non-visited');
}
});
$.ajax({
url: '/datas/crawled_for_email_url.php',
type: 'GET',
success: function(data) {
var crawled_for_email_url = data;
var not_crawled_for_email_url = total_url - crawled_for_email_url;
$('#crawled_for_email_url').html('url crawled for emails: ' + crawled_for_email_url);
$('#not_crawled_for_email_url').html('url not crawled for emails: ' + not_crawled_for_email_url);
/* Progress bar for crawled for email url */
var crawled_for_email_url_percent = crawled_for_email_url / total_url * 100;
crawled_for_email_url_percent = Math.round(crawled_for_email_url_percent * 100) / 100;
var not_crawled_for_email_url_percent = not_crawled_for_email_url / total_url * 100;
not_crawled_for_email_url_percent = Math.round(not_crawled_for_email_url_percent * 100) / 100;
$('#progress_crawled_for_email_url').css('width', crawled_for_email_url_percent + '%');
$('#progress_non_crawled_for_email_url').css('width', not_crawled_for_email_url_percent + '%');
$('#crawled_for_email_url_percent').html(crawled_for_email_url_percent + '% crawled for email');
$('#not_crawled_for_email_url_percent').html(not_crawled_for_email_url_percent + '% not crawled for email');
}
});
}
});
$.ajax({
url: '/datas/system_load.php',
type: 'GET',
success: function(data) {
$('#system_load').html('system load: ' + data);
}
});
},2000);
</script>
<header id="site-header">
<nav class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container-fluid">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="index.php">php-crawler</a>
</div>
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-right">
<li><a href="index.php">Home</a></li>
<li><a href="emails.php">Emails</a></li>
</ul>
</div><!-- .navbar-collapse -->
</div><!-- .container-fluid -->
</nav><!-- .navbar -->
</header><!-- #site-header -->
<main id="site-content" role="main">
<div class="container">
<div class="row">
<div class="col-md-12">
<div class="page-header">
<h1>php crawler</h1>
</div>
<form method="post" action="">
<div class="form-group">
<label for="url">Please enter an URL</label>
<input type="url" class="form-control" id="url" name="url" placeholder="http://example.com">
</div>
<input type="submit" class="btn btn-default">
</form>
</div>
</div><!-- .row -->
<div class="row">
<div class="col-md-12">
<?php
if (isset($_POST['url'])) {
$crawler = new Crawler();
$url = $_POST['url'];
$crawler->crawl_urls($url);
}
?>
</div>
</div><!-- .row -->
</div><!-- .container -->
</main><!-- #site-content -->
<footer id="site-footer" role="contentinfo">
</footer><!-- #site-footer -->
</body>
</html>