Skip to content

Commit

Permalink
This is for all intents and purposes, the initial commit. In reality,
Browse files Browse the repository at this point in the history
there were several commits preceding this one in a now deleted branch
for the purposes of configuring and testing against CI.
  • Loading branch information
Patrick Godschalk committed Dec 27, 2014
0 parents commit 4846de0
Show file tree
Hide file tree
Showing 117 changed files with 85,239 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.pyc
32 changes: 32 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
language: python
python:
- "2.7"

install:
# install dependencies
- sudo which python
- sudo apt-get install cython python-ipy

# build streamhtmlparser and python module
- cd include/streamhtmlparser
- ./configure
- sudo make
- sudo make install
- cd src/py-streamhtmlparser
- sudo make
- sudo make install
- cd ../../../../../

# put streamhtmlparser in the proper environment
- sudo cp -R /usr/local/lib/python2.7/dist-packages/* /usr/lib/python2.7/dist-packages
- sudo ldconfig -v

# build runtime environment
- sudo mkdir /var/log/swiperproxy
- sudo addgroup --system swiperproxy
- sudo adduser --system swiperproxy --ingroup=swiperproxy --no-create-home
- sudo chown -R swiperproxy:swiperproxy swiperproxy
- sudo chown swiperproxy:swiperproxy /var/log/swiperproxy

script:
- sudo start-stop-daemon --start --background --pidfile /var/run/swiperproxy.pid --make-pidfile --user swiperproxy --chuid swiperproxy --startas swiperproxy/Proxy.py -- -c swiperproxy/proxy.conf
105 changes: 105 additions & 0 deletions Buffer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright (c) 2014 SwiperProxy Team
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish
# distribute, sublicense and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO

class Buffer(object):
MAX_BUFFER=1024*32
def __init__(self, max_size=MAX_BUFFER):
self.buffers = []
self.max_size = max_size
self.closing = False
self.eof = False
self.read_pos = 0
self.write_pos = 0

def write(self, data):
try:
if not self.buffers:
self.buffers.append(StringIO())
self.write_pos = 0
buffer = self.buffers[-1]
buffer.seek(self.write_pos)
buffer.write(data)
if buffer.tell() >= self.max_size:
buffer = StringIO()
self.buffers.append(buffer)
self.write_pos = buffer.tell()
finally:
pass

def read(self, length=-1):
read_buf = StringIO()
try:
remaining = length
while True:
if not self.buffers:
break
buffer = self.buffers[0]
buffer.seek(self.read_pos)
read_buf.write(buffer.read(remaining))
self.read_pos = buffer.tell()
if length == -1:
# We did not limit the read, we exhausted the
# buffer, so delete it and keep reading from
# remaining buffers.
del self.buffers[0]
self.read_pos = 0
else:
# We limited the read so either we exhausted the
# buffer or not:
remaining = length - read_buf.tell()
if remaining > 0:
# Exhausted, remove buffer, read more. Keep
# reading from remaining buffers.
del self.buffers[0]
self.read_pos = 0
else:
# Did not exhaust buffer, but read all that
# was requested. Break to stop reading and
# return data of requested length.
break
finally:
pass
return read_buf.getvalue()

def flush(self):
pass

def __len__(self):
len = 0
try:
for buffer in self.buffers:
buffer.seek(0, 2)
if buffer == self.buffers[0]:
len += buffer.tell() - self.read_pos
else:
len += buffer.tell()
return len
finally:
pass

def close(self):
self.eof = True
93 changes: 93 additions & 0 deletions CSSPage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright (c) 2014 SwiperProxy Team
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish
# distribute, sublicense and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re
import Util

class CSSPage:
"""
Used for a CSS stylesheet. Uses the reader function to read a
block, rewrites that block and writes it to the client using the
writer function.
"""
BLKSIZE=65536

def __init__(self, config, ssl, reader, writer, remote_host):
self.config = config
self.ssl = ssl
self.reader = reader
self.writer = writer
self.input_buffer = ''
self.output_buffer = ''
self.remote_host = remote_host

def rewrite_re(self, m):
part1 = m.group(1) or ''
scheme = m.group(6) or ''
url = m.group(7) or ''
closer = m.group(9) or ''

return part1 + Util.rewrite_URL(scheme+"//"+url, self.config, self.ssl,
self.remote_host) + closer

def rewrite(self):
pattern = r"(((background(-image)?\s*:)|@import)\s*(url)?\s*[('\"]+\s*)(https?:)?//([^\"')]+)(:\d+)?([)'\"]+)"

while True:
s = self.reader(self.BLKSIZE)
if not s or len(s) == 0:
# End of file, there may be a left-over in the input
# buffer.
self.output_buffer += self.input_buffer
self.write_output(True)
break

self.input_buffer += s

news = re.sub(pattern, self.rewrite_re, self.input_buffer,
re.I|re.M|re.S)

# It may be the case that the background image string is
# divided over two blocks. Keep the last 1024 bytes in the
# input buffer and write everything up to that point to the
# output buffer
if len(news) > 1024:
self.output_buffer += news[:-1024]
self.input_buffer = news[-1024:]
self.write_output(False)
else:
self.output_buffer += news
self.input_buffer = ''
self.write_output(False)

def write_output(self, final):
length = len(self.output_buffer)
for beg in range(0, length, self.BLKSIZE):
end = beg + self.BLKSIZE
if end > length:
if not final:
self.output_buffer = self.output_buffer[beg:]
return
end = length
self.writer(self.output_buffer[beg:end])

self.output_buffer = ''
149 changes: 149 additions & 0 deletions JSPage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# Copyright (c) 2014 SwiperProxy Team
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish
# distribute, sublicense and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

BLKSIZE=65536

# A string without unescaped quote characters, followed by a quote.
re_scan = re.compile(r"(([^\\\"']|\\.)*)['\"]")

# A URL or hostname to rewrite.
re_url = re.compile(r"(https?:\\?/\\?/)?([a-zA-Z0-9\-\.]+\.(AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CW|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SX|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN--0ZWM56D|XN--11B5BS3A9AJ6G|XN--3E0B707E|XN--45BRJ9C|XN--80AKHBYKNJ4F|XN--80AO21A|XN--90A3AC|XN--9T4B11YI5A|XN--CLCHC0EA0B2G2A9GCD|XN--DEBA0AD|XN--FIQS8S|XN--FIQZ9S|XN--FPCRJ9C3D|XN--FZC2C9E2C|XN--G6W251D|XN--GECRJ9C|XN--H2BRJ9C|XN--HGBK6AJ7F53BBA|XN--HLCJ6AYA9ESC7A|XN--J6W193G|XN--JXALPDLP|XN--KGBECHTV|XN--KPRW13D|XN--KPRY57D|XN--LGBBAT1AD8J|XN--MGBAAM7A8H|XN--MGBAYH7GPA|XN--MGBBH1A71E|XN--MGBC0A9AZCG|XN--MGBERP4A5D4AR|XN--O3CW4H|XN--OGBPF8FL|XN--P1AI|XN--PGBS0DH|XN--S9BRJ9C|XN--WGBH1C|XN--WGBL6A|XN--XKC2AL3HYE2A|XN--XKC2DL3A5EE0H|XN--YFRO4I67O|XN--YGBI2AMMX|XN--ZCKZAH|XXX|YE|YT|ZA|ZM|ZW))(?![a-zA-Z0-9\-\.])(:\d+)?", re.I)

class JSPage(object):
def __init__(self, config, ssl, reader, writer, remote_host):
self.config = config
self.ssl = ssl
self.reader = reader
self.writer = writer
self.output_buffer = []
self.input_buffer = ""
self.input_pos = 0
self.eof = False
self.output_size = 0
self.remote_host = remote_host

def read_some(self):
"""
Read a block of data into the input buffer. Discard any data in
the input buffer that has already been processed. Set the EOF
marker if there is no more input.
"""
if self.eof: return
s = self.reader(BLKSIZE)
if not s:
self.eof = True
return
self.input_buffer = self.input_buffer[self.input_pos:] + s
self.input_pos = 0

# Put a string into the output buffer. If the total length of the
# output buffer is at least BLKSIZE, write it to the output stream.
def output(self, s):
"""
Put a string into the output buffer. If the total length of the
output buffer is at least BLKSIZE, write it to the output
stream.
"""
self.output_buffer.append(s)
l = self.output_size
l += len(s)
if l >= BLKSIZE:
self.writer("".join(self.output_buffer))
self.output_buffer = []
self.output_size = 0
else:
self.output_size = l

def flush(self):
"""
At the end, flush any remaining data in the output buffer.
"""
self.writer("".join(self.output_buffer))

def rewrite_part(self, s):
m = re_url.match(s)
if not m: return s

hostname = m.group(2)
if hostname.lower().endswith(self.config.hostname):
return s
scheme = m.group(1) or ''

if self.ssl:
port = self.config.https_port
else:
port = self.config.http_port

# Not necessary to use standard port numbers. Assume proxy is
# not doing HTTP on 443 or HTTPS on 80.
if port == 80 or port == 443:
portstr = ''
else:
portstr = ':' + str(port)

if scheme:
s = "".join((scheme, hostname, ".", self.config.hostname,
portstr, s[m.end():]))
else:
s = "".join((scheme, hostname, ".", self.config.hostname,
m.group(4) or '', s[m.end():]))

return s

def rewrite(self):
max_page_size = self.config.max_page_size

# Read the first block to make sure there is some data to work
# with.
self.read_some()

while True:
s = self.input_buffer
p = self.input_pos

# Too much data without a quoted string match: stop and
# flush the remainder.
if len(s) >= max_page_size: break

# Find the next unescaped quote character.
m = re_scan.match(s, p)
if not m:
# None found. If there is more input, read another
# chunk of data and try again.
if self.eof: break
self.read_some()
continue

# Rewrite a possible URL or hostname in the part, and
# advance to the next position. For efficiency, the quote
# is included in the string passed to rewrite_part, but
# this is harmless because it will always be copied to the
# output anyway.
self.output(self.rewrite_part(m.group()))
self.input_pos = m.end()

# Write whatever is left in the input buffer, and flush the
# output stream.
self.output(s[p:])
self.flush()
Loading

0 comments on commit 4846de0

Please sign in to comment.