Skip to content
This repository has been archived by the owner on Aug 31, 2021. It is now read-only.

Commit

Permalink
Merge pull request #16 from stasfilin/fix_styles
Browse files Browse the repository at this point in the history
Update styles PEP8
spencebeecher authored Jan 31, 2018

Verified

This commit was signed with the committer’s verified signature.
c0rydoras Arthur
2 parents 0f8f624 + c5b765d commit c299c82
Showing 4 changed files with 51 additions and 50 deletions.
3 changes: 2 additions & 1 deletion examples/pysparnn_utils.py
Original file line number Diff line number Diff line change
@@ -5,9 +5,10 @@
# LICENSE-examples file in the root directory of this source tree.
import numpy as np


# code that will measure query time and recall
def recall(query, full_set):
ret = []
ret = []
for r_items, t_items in zip(query, full_set):
result = 0.0
for r in np.unique(r_items):
23 changes: 10 additions & 13 deletions pysparnn/cluster_index.py
Original file line number Diff line number Diff line change
@@ -6,15 +6,16 @@
# of patent rights can be found in the PATENTS file in the same directory.
"""Defines a cluster pruing search structure to do K-NN Queries"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals

import collections as _collections
import random as _random

import numpy as _np

import pysparnn.matrix_distance


def _k_best(tuple_list, k):
"""For a list of tuples [(distance, value), ...] - Get the k-best tuples by
distance.
@@ -27,6 +28,7 @@ def _k_best(tuple_list, k):

return tuple_lst


def _filter_unique(tuple_list):
"""For a list of tuples [(distance, value), ...] - filter out duplicate
values.
@@ -111,7 +113,7 @@ def __init__(self, features, records_data,

self.matrix_size = matrix_size

num_levels = _np.log(num_records)/_np.log(self.matrix_size)
num_levels = _np.log(num_records) / _np.log(self.matrix_size)

if num_levels <= 1.4:
self.is_terminal = True
@@ -131,7 +133,7 @@ def __init__(self, features, records_data,
list(_np.arange(clusters_selection.shape[0])))

root.remove_near_duplicates()
root = distance_type(root.matrix,
root = distance_type(root.matrix,
list(_np.arange(root.matrix.shape[0])))

rng_step = self.matrix_size
@@ -162,7 +164,6 @@ def __init__(self, features, records_data,

self.root = distance_type(cluster_keeps, clusters)


def insert(self, feature, record):
"""Insert a single record into the index.
@@ -185,8 +186,6 @@ def insert(self, feature, record):

cluster_index._reindex(feature, record)



def _get_child_data(self):
"""Get all of the features and corresponding records represented in the
full tree structure.
@@ -229,8 +228,7 @@ def _reindex(self, feature=None, record=None):
flat_rec.append(record)

self.__init__(self.distance_type.vstack(features), flat_rec, self.distance_type,
self.desired_matrix_size, self.parent)

self.desired_matrix_size, self.parent)

def _search(self, features, k=1, k_clusters=1):
"""Find the closest item(s) for each feature_list in.
@@ -277,7 +275,7 @@ def _search(self, features, k=1, k_clusters=1):
return ret

def search(self, features, k=1, k_clusters=1,
return_distance=True):
return_distance=True):
"""Find the closest item(s) for each feature_list in the index.
Args:
@@ -423,7 +421,6 @@ class docstring for a description of the method.
at the cost of memory.
"""


self.indexes = []
for _ in range(num_indexes):
self.indexes.append((ClusterIndex(features, records_data,
23 changes: 13 additions & 10 deletions pysparnn/matrix_distance.py
Original file line number Diff line number Diff line change
@@ -6,15 +6,15 @@
# of patent rights can be found in the PATENTS file in the same directory.
"""Defines a distance search structure"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals

import abc as _abc

import numpy as _np
import scipy.sparse as _sparse
import scipy.spatial.distance as _spatial_distance


class MatrixMetricSearch(object):
"""A matrix representation out of features."""
__metaclass__ = _abc.ABCMeta
@@ -152,7 +152,7 @@ def __init__(self, features, records_data):
m_c = self.matrix.copy()
m_c.data **= 2
self.matrix_root_sum_square = \
_np.sqrt(_np.asarray(m_c.sum(axis=1)).reshape(-1))
_np.sqrt(_np.asarray(m_c.sum(axis=1)).reshape(-1))

@staticmethod
def features_to_matrix(features):
@@ -186,13 +186,14 @@ def _distance(self, a_matrix):
a_c.data **= 2
a_root_sum_square = _np.asarray(a_c.sum(axis=1)).reshape(-1)
a_root_sum_square = \
a_root_sum_square.reshape(len(a_root_sum_square), 1)
a_root_sum_square.reshape(len(a_root_sum_square), 1)
a_root_sum_square = _np.sqrt(a_root_sum_square)

magnitude = 1.0 / (a_root_sum_square * self.matrix_root_sum_square)

return 1 - dprod.multiply(magnitude).toarray()


class UnitCosineDistance(MatrixMetricSearch):
"""A matrix that implements cosine distance search against it.
@@ -210,7 +211,7 @@ class UnitCosineDistance(MatrixMetricSearch):
def __init__(self, features, records_data):
super(UnitCosineDistance, self).__init__(features, records_data)
self.matrix_root_sum_square = \
_np.sqrt(_np.asarray(self.matrix.sum(axis=1)).reshape(-1))
_np.sqrt(_np.asarray(self.matrix.sum(axis=1)).reshape(-1))

@staticmethod
def features_to_matrix(features):
@@ -242,13 +243,14 @@ def _distance(self, a_matrix):

a_root_sum_square = _np.asarray(a_matrix.sum(axis=1)).reshape(-1)
a_root_sum_square = \
a_root_sum_square.reshape(len(a_root_sum_square), 1)
a_root_sum_square.reshape(len(a_root_sum_square), 1)
a_root_sum_square = _np.sqrt(a_root_sum_square)

magnitude = 1.0 / (a_root_sum_square * self.matrix_root_sum_square)

return 1 - dprod.multiply(magnitude).toarray()


class SlowEuclideanDistance(MatrixMetricSearch):
"""A matrix that implements euclidean distance search against it.
WARNING: This is not optimized.
@@ -286,6 +288,7 @@ def _distance(self, a_matrix):

return _spatial_distance.cdist(a_matrix, self.matrix, 'euclidean')


class DenseCosineDistance(MatrixMetricSearch):
"""A matrix that implements cosine distance search against it.
@@ -300,7 +303,7 @@ def __init__(self, features, records_data):
super(DenseCosineDistance, self).__init__(features, records_data)

self.matrix_root_sum_square = \
_np.sqrt((self.matrix**2).sum(axis=1).reshape(-1))
_np.sqrt((self.matrix ** 2).sum(axis=1).reshape(-1))

@staticmethod
def features_to_matrix(features):
@@ -330,7 +333,7 @@ def _distance(self, a_matrix):
# what is the implmentation of transpose? can i change the order?
dprod = self.matrix.dot(a_matrix.transpose()).transpose() * 1.0

a_root_sum_square = (a_matrix**2).sum(axis=1).reshape(-1)
a_root_sum_square = (a_matrix ** 2).sum(axis=1).reshape(-1)
a_root_sum_square = a_root_sum_square.reshape(len(a_root_sum_square), 1)
a_root_sum_square = _np.sqrt(a_root_sum_square)

52 changes: 26 additions & 26 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
from distutils.core import setup

#This is a list of files to install, and where
#(relative to the 'root' dir, where setup.py is)
#You could be more specific.
# This is a list of files to install, and where
# (relative to the 'root' dir, where setup.py is)
# You could be more specific.
files = []

setup(name = "pysparnn",
version = "0.4",
description = "Sparse (approximate) nearest neighbor search for python!",
author = "Spencer Beecher",
author_email = "[email protected]",
#url = "",
#Name the folder where your packages live:
#(If you have other packages (dirs) or modules (py files) then
#put them into the package directory - they will be found
#recursively.)
packages = ['pysparnn'],
#'package' package must contain files (see list above)
#I called the package 'package' thus cleverly confusing the whole issue...
#This dict maps the package name =to=> directories
#It says, package *needs* these files.
#package_data = {},
#'runner' is in the root.
#scripts = [],
long_description = """Sparse (approximate) nearest neighbor search for python!"""
#
#This next part it for the Cheese Shop, look a little down the page.
#classifiers = []
)
setup(name="pysparnn",
version="0.4",
description="Sparse (approximate) nearest neighbor search for python!",
author="Spencer Beecher",
author_email="[email protected]",
# url = "",
# Name the folder where your packages live:
# (If you have other packages (dirs) or modules (py files) then
# put them into the package directory - they will be found
# recursively.)
packages=['pysparnn'],
# 'package' package must contain files (see list above)
# I called the package 'package' thus cleverly confusing the whole issue...
# This dict maps the package name =to=> directories
# It says, package *needs* these files.
# package_data = {},
# 'runner' is in the root.
# scripts = [],
long_description="""Sparse (approximate) nearest neighbor search for python!"""
#
# This next part it for the Cheese Shop, look a little down the page.
# classifiers = []
)

0 comments on commit c299c82

Please sign in to comment.