-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathParser.php
executable file
·207 lines (174 loc) · 5.24 KB
/
Parser.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
<?php
/* Parser
* ------
* > getSource - $URL [protected] (Fetches the source code of the specified url.)
* > curl - $URL [private] (Core curl function with additional options.)
*/
class Loadstone_Parser
{
// The source code of the most recent curl
protected $SourceCodeArray;
// Find data based on a tag
protected function find($Tag, $Clean = TRUE)
{
// Search for element
foreach($this->SourceCodeArray as $Line)
{
// Trim line
$Line = trim($Line);
// Search line
if(stripos($Line, $Tag) !== false)
{
// If clean, clean it!
if ($Clean) { $Line = $this->Clean(strip_tags(html_entity_decode($Line))); }
// If empty, return true for "found", else return line.
if (empty($Line))
return true;
else
return $Line;
}
}
// No find
return false;
}
// Find data based on a tag, and take the next i amount
protected function findRange($Tag, $Range, $Tag2 = NULL, $Clean = TRUE)
{
$Found = false;
$Found2 = false;
$Interates = 0;
$Array = NULL;
// If range null
if (!$Range) { $Range = 9999; }
// Search for element
foreach($this->SourceCodeArray as $Line)
{
// Trim line
$Line = trim($Line);
// Search line, mark found
if(stripos($Line, $Tag) !== false) { $Found = true; }
if(stripos($Line, $Tag2) !== false) { $Found2 = true; }
if ($Found)
{
// If clean true, clean line!
if ($Clean) { $Array[] = $this->Clean(strip_tags(html_entity_decode($Line))); } else { $Array[] = $Line; }
// Iterate
$Interates++;
// If iterate hits range, break.
if ($Interates == $Range || $Found2) { break; }
}
}
// Remove empty values
$Array = isset($Array) ? array_values(array_filter($Array)) : NULL;
// Return array, else false.
if ($Array)
return $Array;
else
return false;
}
// Finds all entries based on a tag, and take the next i amount
protected function findAll($Tag, $Range, $Tag2 = NULL, $Clean = TRUE)
{
$Found = false;
$Found2 = false;
$Interates = 0;
$Array = NULL;
$Array2 = NULL;
// If range null
if (!$Range) { $Range = 9999; }
// Search for element
foreach($this->SourceCodeArray as $Line)
{
// Trim line
$Line = trim($Line);
// Search line, mark found
if(stripos($Line, $Tag) !== false && $Tag) { $Found = true; }
if(stripos($Line, $Tag2) !== false && $Tag2) { $Found2 = true; }
if ($Found)
{
// If clean true, clean line!
if ($Clean) { $Array[] = $this->Clean(strip_tags(html_entity_decode($Line))); } else { $Array[] = $Line; }
// Iterate
$Interates++;
// If iterate hits range, append to array and null.
if ($Interates == $Range || $Found2)
{
// Remove empty values
$Array = array_values(array_filter($Array));
// Append
$Array2[] = $Array;
$Array = NULL;
// Reset founds
$Found = false;
$Found2 = false;
$Interates = 0;
}
}
}
// Return array, else false.
if ($Array2)
return $Array2;
else
return false;
}
// Removes section of array up to specified tag
protected function segment($Tag)
{
// Loop through source code array
$i = 0;
foreach($this->SourceCodeArray as $Line)
{
// If find tag, break
if(stripos($Line, $Tag) !== false) { break; }
$i++;
}
// Splice array
array_splice($this->SourceCodeArray, 0, $i);
}
// Clean a found results
private function clean($Line)
{
// Strip tags
$Line = strip_tags(html_entity_decode($Line));
// Random removals
$Remove = array("-->");
$Line = str_ireplace($Remove, NULL, $Line);
// Return value
return $Line;
}
// Prints the source array
public function printSourceArray()
{
Show($this->SourceCodeArray);
}
// Get the DOMDocument from the source via its URL.
protected function getSource($URL)
{
// Get the source of the url
# Show($URL);
$Source = $this->curl($URL);
$this->SourceCodeArray = explode("\n", $Source);
return true;
}
// Fetches page source via CURL
private function curl($URL)
{
$options = array(
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // return headers
CURLOPT_FOLLOWLOCATION => false, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 15, // timeout on connects
CURLOPT_TIMEOUT => 15, // timeout on response
CURLOPT_MAXREDIRS => 5, // stop after 10 redirects
CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
CURLOPT_HTTPHEADER => array('Content-type: text/html; charset=utf-8', 'Accept-Language: en'),
);
$ch = curl_init($URL);
curl_setopt_array($ch, $options);
$source = curl_exec($ch);
curl_close($ch);
return htmlentities($source);
}
}