From 6f42a63775c648d3d14017e66daf600cee75b375 Mon Sep 17 00:00:00 2001 From: Michael Lynch Date: Tue, 28 Oct 2014 18:14:31 -0400 Subject: [PATCH 1/4] Removing trailing whitespace in generated BigQuery queries. This fixes issue #5 --- telescope/query.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/telescope/query.py b/telescope/query.py index 3e814d9..d6f9f99 100644 --- a/telescope/query.py +++ b/telescope/query.py @@ -150,22 +150,22 @@ def _create_query_string(self, mlab_project = 'ndt'): for field in non_null_fields: non_null_conditions.append('%s IS NOT NULL' % field) - select_list_string = ", \n\t".join(self._select_list) + select_list_string = ",\n\t".join(self._select_list) table_list_string = ',\n\t'.join(self._table_list) - conditional_list_string = " \n\tAND ".join(non_null_conditions + tool_specific_conditions) + conditional_list_string = "\n\tAND ".join(non_null_conditions + tool_specific_conditions) if self._conditional_dict.has_key('data_direction') is True: conditional_list_string += "\n\tAND {data_direction}".format( data_direction = self._conditional_dict['data_direction']) - log_times_joined = " OR \n\t".join(self._conditional_dict['log_time']) + log_times_joined = " OR\n\t".join(self._conditional_dict['log_time']) conditional_list_string += "\n\tAND ({log_times})".format(log_times = log_times_joined) - server_ips_joined = " OR \n\t\t".join(self._conditional_dict['server_ip']) + server_ips_joined = " OR\n\t\t".join(self._conditional_dict['server_ip']) conditional_list_string += "\n\tAND ({server_ips})".format(server_ips = server_ips_joined) - client_ips_joined = " OR \n\t\t".join(self._conditional_dict['client_network_block']) + client_ips_joined = " OR\n\t\t".join(self._conditional_dict['client_network_block']) conditional_list_string += "\n\tAND ({client_ips})".format(client_ips = client_ips_joined) built_query_string = built_query_format.format(select_list = select_list_string, From a04deb169414da188e6e9b4c1e9ca5fa4cb765e7 Mon Sep 17 00:00:00 2001 From: Michael Lynch Date: Wed, 5 Nov 2014 11:28:30 -0500 Subject: [PATCH 2/4] Fixing variable name --- main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index 78ac6c7..04f1af0 100644 --- a/main.py +++ b/main.py @@ -128,14 +128,14 @@ def setup_logger(verbosity_level = 0): return logger -def create_directory_if_not_exists(passed_selector): - if not os.path.exists(passed_selector): +def create_directory_if_not_exists(directory_name): + if not os.path.exists(directory_name): try: - os.makedirs(passed_selector) + os.makedirs(directory_name) except OSError: raise argparse.ArgumentError(('{0} does not exist, is not readable or ' - 'could not be created.').format(passed_selector)) - return passed_selector + 'could not be created.').format(directory_name)) + return directory_name def write_metric_calculations_to_file(data_filepath, metric_calculations, should_write_header = False): From a131ebf139e0b0831c9b27ab9237b3d0784141d6 Mon Sep 17 00:00:00 2001 From: Michael Lynch Date: Thu, 6 Nov 2014 18:50:20 -0500 Subject: [PATCH 3/4] This moves some code from main.py to utils and strips special characters out of output filenames (fixes issue #6) --- main.py | 22 +++++++--------------- telescope/utils.py | 38 +++++++++++++++++++++++++++++++++----- telescope/utils_test.py | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 20 deletions(-) create mode 100644 telescope/utils_test.py diff --git a/main.py b/main.py index 04f1af0..5e5f9d6 100644 --- a/main.py +++ b/main.py @@ -29,12 +29,13 @@ from ssl import SSLError -import telescope.selector -import telescope.query +import telescope.external +import telescope.filters import telescope.metrics_math import telescope.mlab -import telescope.filters -import telescope.external +import telescope.query +import telescope.selector +import telescope.utils class NoClientNetworkBlocksFound(Exception): @@ -128,16 +129,6 @@ def setup_logger(verbosity_level = 0): return logger -def create_directory_if_not_exists(directory_name): - if not os.path.exists(directory_name): - try: - os.makedirs(directory_name) - except OSError: - raise argparse.ArgumentError(('{0} does not exist, is not readable or ' - 'could not be created.').format(directory_name)) - return directory_name - - def write_metric_calculations_to_file(data_filepath, metric_calculations, should_write_header = False): """ Writes metric data to a file in CSV format. @@ -217,6 +208,7 @@ def build_filename(resource_type, outpath, date, duration, site, client_provider client_provider = client_provider, metric = metric, extension = extensions[resource_type]) + filename = telescope.utils.strip_special_chars(filename) filepath = os.path.join(outpath, filename) return filepath @@ -550,7 +542,7 @@ def main(args): help="variable output verbosity (e.g., -vv is more than -v)") parser.add_argument('-o', '--output', default='processed/', help='Output file path. If the folder does not exist, it will be created.', - type=create_directory_if_not_exists) + type=telescope.utils.create_directory_if_not_exists) parser.add_argument('--maxminddir', default='resources/', help='MaxMind GeoLite ASN snapshot directory.') parser.add_argument('--savequery', default=False, action='store_true', help='Save the BigQuery statement to the [output] directory as a .sql') diff --git a/telescope/utils.py b/telescope/utils.py index 2eb9f67..2617eb0 100644 --- a/telescope/utils.py +++ b/telescope/utils.py @@ -28,23 +28,51 @@ def tzname(self, dt): def dst(self, dt): return datetime.timedelta(0) + def make_datetime_utc_aware(datetime_timestamp): return datetime_timestamp.replace(tzinfo = UTC()) + def unix_timestamp_to_utc_datetime(unix_timestamp): return datetime.datetime.fromtimestamp(unix_timestamp, tz = UTC()) + def check_for_valid_cache(cache_path, manifest_path = None): """ Checks for results file previously generated by this tool. - Args: - cache_path (str): Built path to cache file that we are interested in. - manifest_path (str, optional): Built path to cache file that we are interested in. Defaults to None. + Args: + cache_path (str): Built path to cache file that we are interested in. + manifest_path (str, optional): Built path to cache file that we are interested in. Defaults to None. - Returns: - bool: True if valid file, False otherwise. + Returns: + bool: True if valid file, False otherwise. """ does_file_exist_at_cache_path = os.path.exists(cache_path) return does_file_exist_at_cache_path + +def strip_special_chars(filename): + """ Removes shell special characters from a filename. + + Args: + filename (str): Filename to be sanitized. + + Returns: + (str) Sanitized version of filename. + """ + sanitized = filename + special_chars = '\\/"\'`<>|:;\t\n?#$^&*=' + for special_char in special_chars: + sanitized = sanitized.replace(special_char, '') + return sanitized + + +def create_directory_if_not_exists(directory_name): + if not os.path.exists(directory_name): + try: + os.makedirs(directory_name) + except OSError: + raise ArgumentError(('{0} does not exist, is not readable or ' + 'could not be created.').format(directory_name)) + return directory_name diff --git a/telescope/utils_test.py b/telescope/utils_test.py new file mode 100644 index 0000000..8617590 --- /dev/null +++ b/telescope/utils_test.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +# +# Copyright 2014 Measurement Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import utils +import unittest + +class UtilsTest(unittest.TestCase): + + + def test_(self): + self.assertEquals('att.csv', utils.strip_special_chars('at&t.csv')) + self.assertEquals('att.csv', utils.strip_special_chars('at&&&&&&&&t.csv')) + self.assertEquals('att.csv', utils.strip_special_chars('at&&/;$&&&&&&t.csv')) + self.assertEquals('maxmin-counts.csv', utils.strip_special_chars('max/min-counts.csv')) + self.assertEquals('namesplacesdates.csv', utils.strip_special_chars(r'names\places\dates.csv')) + self.assertEquals('spaces are okay.csv', utils.strip_special_chars('spaces are okay.csv')) + +if __name__ == '__main__': + unittest.main() From a96998ffb6c68bc4d2b3e3c224392f41deadaedb Mon Sep 17 00:00:00 2001 From: Michael Lynch Date: Thu, 6 Nov 2014 18:53:55 -0500 Subject: [PATCH 4/4] Adding some details to docstring for strip_special_characters --- telescope/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/telescope/utils.py b/telescope/utils.py index 2617eb0..f87fe82 100644 --- a/telescope/utils.py +++ b/telescope/utils.py @@ -56,7 +56,8 @@ def strip_special_chars(filename): """ Removes shell special characters from a filename. Args: - filename (str): Filename to be sanitized. + filename (str): Filename to be sanitized. Note that this should be a single + filename and not a full path, as this will strip path separators. Returns: (str) Sanitized version of filename.