forked from pgodschalk/swiperproxy
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This is for all intents and purposes, the initial commit. In reality,
there were several commits preceding this one in a now deleted branch for the purposes of configuring and testing against CI.
- Loading branch information
Patrick Godschalk
committed
Dec 27, 2014
0 parents
commit 4846de0
Showing
117 changed files
with
85,239 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
language: python | ||
python: | ||
- "2.7" | ||
|
||
install: | ||
# install dependencies | ||
- sudo which python | ||
- sudo apt-get install cython python-ipy | ||
|
||
# build streamhtmlparser and python module | ||
- cd include/streamhtmlparser | ||
- ./configure | ||
- sudo make | ||
- sudo make install | ||
- cd src/py-streamhtmlparser | ||
- sudo make | ||
- sudo make install | ||
- cd ../../../../../ | ||
|
||
# put streamhtmlparser in the proper environment | ||
- sudo cp -R /usr/local/lib/python2.7/dist-packages/* /usr/lib/python2.7/dist-packages | ||
- sudo ldconfig -v | ||
|
||
# build runtime environment | ||
- sudo mkdir /var/log/swiperproxy | ||
- sudo addgroup --system swiperproxy | ||
- sudo adduser --system swiperproxy --ingroup=swiperproxy --no-create-home | ||
- sudo chown -R swiperproxy:swiperproxy swiperproxy | ||
- sudo chown swiperproxy:swiperproxy /var/log/swiperproxy | ||
|
||
script: | ||
- sudo start-stop-daemon --start --background --pidfile /var/run/swiperproxy.pid --make-pidfile --user swiperproxy --chuid swiperproxy --startas swiperproxy/Proxy.py -- -c swiperproxy/proxy.conf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
# Copyright (c) 2014 SwiperProxy Team | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining | ||
# a copy of this software and associated documentation files (the | ||
# "Software"), to deal in the Software without restriction, including | ||
# without limitation the rights to use, copy, modify, merge, publish | ||
# distribute, sublicense and/or sell copies of the Software, and to | ||
# permit persons to whom the Software is furnished to do so, subject | ||
# to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be | ||
# included in all copies or substantial portions of the Software. | ||
|
||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
# SOFTWARE. | ||
|
||
try: | ||
from cStringIO import StringIO | ||
except ImportError: | ||
from StringIO import StringIO | ||
|
||
class Buffer(object): | ||
MAX_BUFFER=1024*32 | ||
def __init__(self, max_size=MAX_BUFFER): | ||
self.buffers = [] | ||
self.max_size = max_size | ||
self.closing = False | ||
self.eof = False | ||
self.read_pos = 0 | ||
self.write_pos = 0 | ||
|
||
def write(self, data): | ||
try: | ||
if not self.buffers: | ||
self.buffers.append(StringIO()) | ||
self.write_pos = 0 | ||
buffer = self.buffers[-1] | ||
buffer.seek(self.write_pos) | ||
buffer.write(data) | ||
if buffer.tell() >= self.max_size: | ||
buffer = StringIO() | ||
self.buffers.append(buffer) | ||
self.write_pos = buffer.tell() | ||
finally: | ||
pass | ||
|
||
def read(self, length=-1): | ||
read_buf = StringIO() | ||
try: | ||
remaining = length | ||
while True: | ||
if not self.buffers: | ||
break | ||
buffer = self.buffers[0] | ||
buffer.seek(self.read_pos) | ||
read_buf.write(buffer.read(remaining)) | ||
self.read_pos = buffer.tell() | ||
if length == -1: | ||
# We did not limit the read, we exhausted the | ||
# buffer, so delete it and keep reading from | ||
# remaining buffers. | ||
del self.buffers[0] | ||
self.read_pos = 0 | ||
else: | ||
# We limited the read so either we exhausted the | ||
# buffer or not: | ||
remaining = length - read_buf.tell() | ||
if remaining > 0: | ||
# Exhausted, remove buffer, read more. Keep | ||
# reading from remaining buffers. | ||
del self.buffers[0] | ||
self.read_pos = 0 | ||
else: | ||
# Did not exhaust buffer, but read all that | ||
# was requested. Break to stop reading and | ||
# return data of requested length. | ||
break | ||
finally: | ||
pass | ||
return read_buf.getvalue() | ||
|
||
def flush(self): | ||
pass | ||
|
||
def __len__(self): | ||
len = 0 | ||
try: | ||
for buffer in self.buffers: | ||
buffer.seek(0, 2) | ||
if buffer == self.buffers[0]: | ||
len += buffer.tell() - self.read_pos | ||
else: | ||
len += buffer.tell() | ||
return len | ||
finally: | ||
pass | ||
|
||
def close(self): | ||
self.eof = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Copyright (c) 2014 SwiperProxy Team | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining | ||
# a copy of this software and associated documentation files (the | ||
# "Software"), to deal in the Software without restriction, including | ||
# without limitation the rights to use, copy, modify, merge, publish | ||
# distribute, sublicense and/or sell copies of the Software, and to | ||
# permit persons to whom the Software is furnished to do so, subject | ||
# to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be | ||
# included in all copies or substantial portions of the Software. | ||
|
||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
# SOFTWARE. | ||
|
||
import re | ||
import Util | ||
|
||
class CSSPage: | ||
""" | ||
Used for a CSS stylesheet. Uses the reader function to read a | ||
block, rewrites that block and writes it to the client using the | ||
writer function. | ||
""" | ||
BLKSIZE=65536 | ||
|
||
def __init__(self, config, ssl, reader, writer, remote_host): | ||
self.config = config | ||
self.ssl = ssl | ||
self.reader = reader | ||
self.writer = writer | ||
self.input_buffer = '' | ||
self.output_buffer = '' | ||
self.remote_host = remote_host | ||
|
||
def rewrite_re(self, m): | ||
part1 = m.group(1) or '' | ||
scheme = m.group(6) or '' | ||
url = m.group(7) or '' | ||
closer = m.group(9) or '' | ||
|
||
return part1 + Util.rewrite_URL(scheme+"//"+url, self.config, self.ssl, | ||
self.remote_host) + closer | ||
|
||
def rewrite(self): | ||
pattern = r"(((background(-image)?\s*:)|@import)\s*(url)?\s*[('\"]+\s*)(https?:)?//([^\"')]+)(:\d+)?([)'\"]+)" | ||
|
||
while True: | ||
s = self.reader(self.BLKSIZE) | ||
if not s or len(s) == 0: | ||
# End of file, there may be a left-over in the input | ||
# buffer. | ||
self.output_buffer += self.input_buffer | ||
self.write_output(True) | ||
break | ||
|
||
self.input_buffer += s | ||
|
||
news = re.sub(pattern, self.rewrite_re, self.input_buffer, | ||
re.I|re.M|re.S) | ||
|
||
# It may be the case that the background image string is | ||
# divided over two blocks. Keep the last 1024 bytes in the | ||
# input buffer and write everything up to that point to the | ||
# output buffer | ||
if len(news) > 1024: | ||
self.output_buffer += news[:-1024] | ||
self.input_buffer = news[-1024:] | ||
self.write_output(False) | ||
else: | ||
self.output_buffer += news | ||
self.input_buffer = '' | ||
self.write_output(False) | ||
|
||
def write_output(self, final): | ||
length = len(self.output_buffer) | ||
for beg in range(0, length, self.BLKSIZE): | ||
end = beg + self.BLKSIZE | ||
if end > length: | ||
if not final: | ||
self.output_buffer = self.output_buffer[beg:] | ||
return | ||
end = length | ||
self.writer(self.output_buffer[beg:end]) | ||
|
||
self.output_buffer = '' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
# Copyright (c) 2014 SwiperProxy Team | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining | ||
# a copy of this software and associated documentation files (the | ||
# "Software"), to deal in the Software without restriction, including | ||
# without limitation the rights to use, copy, modify, merge, publish | ||
# distribute, sublicense and/or sell copies of the Software, and to | ||
# permit persons to whom the Software is furnished to do so, subject | ||
# to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be | ||
# included in all copies or substantial portions of the Software. | ||
|
||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
# SOFTWARE. | ||
|
||
import re | ||
|
||
BLKSIZE=65536 | ||
|
||
# A string without unescaped quote characters, followed by a quote. | ||
re_scan = re.compile(r"(([^\\\"']|\\.)*)['\"]") | ||
|
||
# A URL or hostname to rewrite. | ||
re_url = re.compile(r"(https?:\\?/\\?/)?([a-zA-Z0-9\-\.]+\.(AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CW|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SX|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN--0ZWM56D|XN--11B5BS3A9AJ6G|XN--3E0B707E|XN--45BRJ9C|XN--80AKHBYKNJ4F|XN--80AO21A|XN--90A3AC|XN--9T4B11YI5A|XN--CLCHC0EA0B2G2A9GCD|XN--DEBA0AD|XN--FIQS8S|XN--FIQZ9S|XN--FPCRJ9C3D|XN--FZC2C9E2C|XN--G6W251D|XN--GECRJ9C|XN--H2BRJ9C|XN--HGBK6AJ7F53BBA|XN--HLCJ6AYA9ESC7A|XN--J6W193G|XN--JXALPDLP|XN--KGBECHTV|XN--KPRW13D|XN--KPRY57D|XN--LGBBAT1AD8J|XN--MGBAAM7A8H|XN--MGBAYH7GPA|XN--MGBBH1A71E|XN--MGBC0A9AZCG|XN--MGBERP4A5D4AR|XN--O3CW4H|XN--OGBPF8FL|XN--P1AI|XN--PGBS0DH|XN--S9BRJ9C|XN--WGBH1C|XN--WGBL6A|XN--XKC2AL3HYE2A|XN--XKC2DL3A5EE0H|XN--YFRO4I67O|XN--YGBI2AMMX|XN--ZCKZAH|XXX|YE|YT|ZA|ZM|ZW))(?![a-zA-Z0-9\-\.])(:\d+)?", re.I) | ||
|
||
class JSPage(object): | ||
def __init__(self, config, ssl, reader, writer, remote_host): | ||
self.config = config | ||
self.ssl = ssl | ||
self.reader = reader | ||
self.writer = writer | ||
self.output_buffer = [] | ||
self.input_buffer = "" | ||
self.input_pos = 0 | ||
self.eof = False | ||
self.output_size = 0 | ||
self.remote_host = remote_host | ||
|
||
def read_some(self): | ||
""" | ||
Read a block of data into the input buffer. Discard any data in | ||
the input buffer that has already been processed. Set the EOF | ||
marker if there is no more input. | ||
""" | ||
if self.eof: return | ||
s = self.reader(BLKSIZE) | ||
if not s: | ||
self.eof = True | ||
return | ||
self.input_buffer = self.input_buffer[self.input_pos:] + s | ||
self.input_pos = 0 | ||
|
||
# Put a string into the output buffer. If the total length of the | ||
# output buffer is at least BLKSIZE, write it to the output stream. | ||
def output(self, s): | ||
""" | ||
Put a string into the output buffer. If the total length of the | ||
output buffer is at least BLKSIZE, write it to the output | ||
stream. | ||
""" | ||
self.output_buffer.append(s) | ||
l = self.output_size | ||
l += len(s) | ||
if l >= BLKSIZE: | ||
self.writer("".join(self.output_buffer)) | ||
self.output_buffer = [] | ||
self.output_size = 0 | ||
else: | ||
self.output_size = l | ||
|
||
def flush(self): | ||
""" | ||
At the end, flush any remaining data in the output buffer. | ||
""" | ||
self.writer("".join(self.output_buffer)) | ||
|
||
def rewrite_part(self, s): | ||
m = re_url.match(s) | ||
if not m: return s | ||
|
||
hostname = m.group(2) | ||
if hostname.lower().endswith(self.config.hostname): | ||
return s | ||
scheme = m.group(1) or '' | ||
|
||
if self.ssl: | ||
port = self.config.https_port | ||
else: | ||
port = self.config.http_port | ||
|
||
# Not necessary to use standard port numbers. Assume proxy is | ||
# not doing HTTP on 443 or HTTPS on 80. | ||
if port == 80 or port == 443: | ||
portstr = '' | ||
else: | ||
portstr = ':' + str(port) | ||
|
||
if scheme: | ||
s = "".join((scheme, hostname, ".", self.config.hostname, | ||
portstr, s[m.end():])) | ||
else: | ||
s = "".join((scheme, hostname, ".", self.config.hostname, | ||
m.group(4) or '', s[m.end():])) | ||
|
||
return s | ||
|
||
def rewrite(self): | ||
max_page_size = self.config.max_page_size | ||
|
||
# Read the first block to make sure there is some data to work | ||
# with. | ||
self.read_some() | ||
|
||
while True: | ||
s = self.input_buffer | ||
p = self.input_pos | ||
|
||
# Too much data without a quoted string match: stop and | ||
# flush the remainder. | ||
if len(s) >= max_page_size: break | ||
|
||
# Find the next unescaped quote character. | ||
m = re_scan.match(s, p) | ||
if not m: | ||
# None found. If there is more input, read another | ||
# chunk of data and try again. | ||
if self.eof: break | ||
self.read_some() | ||
continue | ||
|
||
# Rewrite a possible URL or hostname in the part, and | ||
# advance to the next position. For efficiency, the quote | ||
# is included in the string passed to rewrite_part, but | ||
# this is harmless because it will always be copied to the | ||
# output anyway. | ||
self.output(self.rewrite_part(m.group())) | ||
self.input_pos = m.end() | ||
|
||
# Write whatever is left in the input buffer, and flush the | ||
# output stream. | ||
self.output(s[p:]) | ||
self.flush() |
Oops, something went wrong.