diff --git a/main.py b/main.py index 78ac6c7..5e5f9d6 100644 --- a/main.py +++ b/main.py @@ -29,12 +29,13 @@ from ssl import SSLError -import telescope.selector -import telescope.query +import telescope.external +import telescope.filters import telescope.metrics_math import telescope.mlab -import telescope.filters -import telescope.external +import telescope.query +import telescope.selector +import telescope.utils class NoClientNetworkBlocksFound(Exception): @@ -128,16 +129,6 @@ def setup_logger(verbosity_level = 0): return logger -def create_directory_if_not_exists(passed_selector): - if not os.path.exists(passed_selector): - try: - os.makedirs(passed_selector) - except OSError: - raise argparse.ArgumentError(('{0} does not exist, is not readable or ' - 'could not be created.').format(passed_selector)) - return passed_selector - - def write_metric_calculations_to_file(data_filepath, metric_calculations, should_write_header = False): """ Writes metric data to a file in CSV format. @@ -217,6 +208,7 @@ def build_filename(resource_type, outpath, date, duration, site, client_provider client_provider = client_provider, metric = metric, extension = extensions[resource_type]) + filename = telescope.utils.strip_special_chars(filename) filepath = os.path.join(outpath, filename) return filepath @@ -550,7 +542,7 @@ def main(args): help="variable output verbosity (e.g., -vv is more than -v)") parser.add_argument('-o', '--output', default='processed/', help='Output file path. If the folder does not exist, it will be created.', - type=create_directory_if_not_exists) + type=telescope.utils.create_directory_if_not_exists) parser.add_argument('--maxminddir', default='resources/', help='MaxMind GeoLite ASN snapshot directory.') parser.add_argument('--savequery', default=False, action='store_true', help='Save the BigQuery statement to the [output] directory as a .sql') diff --git a/telescope/query.py b/telescope/query.py index 3e814d9..d6f9f99 100644 --- a/telescope/query.py +++ b/telescope/query.py @@ -150,22 +150,22 @@ def _create_query_string(self, mlab_project = 'ndt'): for field in non_null_fields: non_null_conditions.append('%s IS NOT NULL' % field) - select_list_string = ", \n\t".join(self._select_list) + select_list_string = ",\n\t".join(self._select_list) table_list_string = ',\n\t'.join(self._table_list) - conditional_list_string = " \n\tAND ".join(non_null_conditions + tool_specific_conditions) + conditional_list_string = "\n\tAND ".join(non_null_conditions + tool_specific_conditions) if self._conditional_dict.has_key('data_direction') is True: conditional_list_string += "\n\tAND {data_direction}".format( data_direction = self._conditional_dict['data_direction']) - log_times_joined = " OR \n\t".join(self._conditional_dict['log_time']) + log_times_joined = " OR\n\t".join(self._conditional_dict['log_time']) conditional_list_string += "\n\tAND ({log_times})".format(log_times = log_times_joined) - server_ips_joined = " OR \n\t\t".join(self._conditional_dict['server_ip']) + server_ips_joined = " OR\n\t\t".join(self._conditional_dict['server_ip']) conditional_list_string += "\n\tAND ({server_ips})".format(server_ips = server_ips_joined) - client_ips_joined = " OR \n\t\t".join(self._conditional_dict['client_network_block']) + client_ips_joined = " OR\n\t\t".join(self._conditional_dict['client_network_block']) conditional_list_string += "\n\tAND ({client_ips})".format(client_ips = client_ips_joined) built_query_string = built_query_format.format(select_list = select_list_string, diff --git a/telescope/utils.py b/telescope/utils.py index 2eb9f67..f87fe82 100644 --- a/telescope/utils.py +++ b/telescope/utils.py @@ -28,23 +28,52 @@ def tzname(self, dt): def dst(self, dt): return datetime.timedelta(0) + def make_datetime_utc_aware(datetime_timestamp): return datetime_timestamp.replace(tzinfo = UTC()) + def unix_timestamp_to_utc_datetime(unix_timestamp): return datetime.datetime.fromtimestamp(unix_timestamp, tz = UTC()) + def check_for_valid_cache(cache_path, manifest_path = None): """ Checks for results file previously generated by this tool. - Args: - cache_path (str): Built path to cache file that we are interested in. - manifest_path (str, optional): Built path to cache file that we are interested in. Defaults to None. + Args: + cache_path (str): Built path to cache file that we are interested in. + manifest_path (str, optional): Built path to cache file that we are interested in. Defaults to None. - Returns: - bool: True if valid file, False otherwise. + Returns: + bool: True if valid file, False otherwise. """ does_file_exist_at_cache_path = os.path.exists(cache_path) return does_file_exist_at_cache_path + +def strip_special_chars(filename): + """ Removes shell special characters from a filename. + + Args: + filename (str): Filename to be sanitized. Note that this should be a single + filename and not a full path, as this will strip path separators. + + Returns: + (str) Sanitized version of filename. + """ + sanitized = filename + special_chars = '\\/"\'`<>|:;\t\n?#$^&*=' + for special_char in special_chars: + sanitized = sanitized.replace(special_char, '') + return sanitized + + +def create_directory_if_not_exists(directory_name): + if not os.path.exists(directory_name): + try: + os.makedirs(directory_name) + except OSError: + raise ArgumentError(('{0} does not exist, is not readable or ' + 'could not be created.').format(directory_name)) + return directory_name diff --git a/telescope/utils_test.py b/telescope/utils_test.py new file mode 100644 index 0000000..8617590 --- /dev/null +++ b/telescope/utils_test.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +# +# Copyright 2014 Measurement Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import utils +import unittest + +class UtilsTest(unittest.TestCase): + + + def test_(self): + self.assertEquals('att.csv', utils.strip_special_chars('at&t.csv')) + self.assertEquals('att.csv', utils.strip_special_chars('at&&&&&&&&t.csv')) + self.assertEquals('att.csv', utils.strip_special_chars('at&&/;$&&&&&&t.csv')) + self.assertEquals('maxmin-counts.csv', utils.strip_special_chars('max/min-counts.csv')) + self.assertEquals('namesplacesdates.csv', utils.strip_special_chars(r'names\places\dates.csv')) + self.assertEquals('spaces are okay.csv', utils.strip_special_chars('spaces are okay.csv')) + +if __name__ == '__main__': + unittest.main()