Skip to content

Commit

Permalink
performance tweaks of the batch correlate and inner product functions. (
Browse files Browse the repository at this point in the history
gwastro#1124)

* slightly increase performance

* add mfilter part

* minor style improvements

* fix start

* update unittest
  • Loading branch information
ahnitz authored and cmbiwer committed Oct 5, 2016
1 parent 68d2fc2 commit 453ab80
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 54 deletions.
16 changes: 9 additions & 7 deletions pycbc/filter/matchedfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1420,6 +1420,8 @@ def _process_batch(self):
self.corr[self.block_id].execute(stilde)
self.ifts[mid].execute()

self.block_id += 1

snr = numpy.zeros(len(tgroup), dtype=numpy.complex64)
time = numpy.zeros(len(tgroup), dtype=numpy.float64)
templates = numpy.zeros(len(tgroup), dtype=numpy.uint64)
Expand All @@ -1437,27 +1439,28 @@ def _process_batch(self):
# Find the peaks in our SNR times series from the various templates
i = 0
for htilde in tgroup:
m, l = htilde.out[seg].abs_max_loc()
l = htilde.out[seg].abs_arg_max()

sgm = htilde.sigmasq(psd)
norm = 4.0 * htilde.delta_f / (sgm ** 0.5)

l += valid_start
snrv = numpy.array([htilde.out[l]])

# If nothing is above threshold we can exit this template
s = m * norm
s = abs(snrv[0]) * norm
if s < self.snr_threshold:
continue

time[i] += float(l) / self.data.sample_rate
l += valid_start
time[i] += float(l - valid_start) / self.data.sample_rate

# We have an SNR so high that we will drop the entire analysis
# of this chunk of time!
if self.snr_abort_threshold is not None and s > self.snr_abort_threshold:
logging.info("We are seeing some *really* high SNRs, lets"
"assume they aren't signals and just give up")
" assume they aren't signals and just give up")
return False, []

snrv = numpy.array([htilde.out[l]])
veto_info.append((snrv, norm, l, htilde, stilde))

snr[i] = snrv[0] * norm
Expand All @@ -1481,7 +1484,6 @@ def _process_batch(self):
for key in tkeys:
result[key] = numpy.array(result[key])

self.block_id += 1
return result, veto_info

__all__ = ['match', 'matched_filter', 'sigmasq', 'sigma', 'get_cutoff_indices',
Expand Down
1 change: 1 addition & 0 deletions pycbc/filter/matchedfilter_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .matchedfilter import _BaseCorrelator

batch_correlator_code = """
#pragma omp parallel for
for (int i=0; i<num_vectors; i++){
std::complex<float>* xp = (std::complex<float>*) x[i];
std::complex<float>* zp = (std::complex<float>*) z[i];
Expand Down
11 changes: 9 additions & 2 deletions pycbc/types/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
# we should restrict any functions that do not allow an
# array of uint32 integers
_ALLOWED_DTYPES = [_numpy.float32, _numpy.float64, _numpy.complex64,
_numpy.complex128, _numpy.uint32]
_numpy.complex128, _numpy.uint32, _numpy.int32, _numpy.int]
_ALLOWED_SCALARS = [int, long, float, complex] + _ALLOWED_DTYPES

def _convert_to_scheme(ary):
Expand Down Expand Up @@ -674,6 +674,11 @@ def max(self):
def max_loc(self):
"""Return the maximum value in the array along with the index location """

@_convert
@schemed(BACKEND_PREFIX)
def abs_arg_max(self):
""" Return location of the maximum argument max """

@_convert
@schemed(BACKEND_PREFIX)
def abs_max_loc(self):
Expand Down Expand Up @@ -808,8 +813,10 @@ def precision(self):
def kind(self):
if self.dtype == float32 or self.dtype == float64:
return 'real'
elif self.dtype == complex64 or self.dtype == complex128:
return 'complex'
else:
return 'complex'
return 'unknown'

@property
@_convert
Expand Down
70 changes: 65 additions & 5 deletions pycbc/types/array_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
from array import common_kind, complex128, float64
import aligned as _algn
from scipy.linalg import blas
from scipy.weave import inline
from pycbc.opt import omp_libs, omp_flags
from pycbc import WEAVE_FLAGS
from pycbc.types import real_same_precision_as

def zeros(length, dtype=_np.float64):
return _algn.zeros(length, dtype=dtype)
Expand All @@ -41,12 +45,45 @@ def dot(self, other):
return _np.dot(self._data,other)

def min(self):
return self.data.min()
return self.data.min()

code_abs_arg_max = """
float val = 0;
int l = 0;
for (int i=0; i<N; i++){
float mag = data[i*2] * data[i*2] + data[i*2+1] * data[i*2+1];
if ( mag > val){
l = i;
val = mag;
}
}
loc[0] = l;
"""
code_flags = [WEAVE_FLAGS + '-march=native -O3 -w'] + omp_flags


def abs_arg_max(self):
if self.kind == 'real':
return _np.argmax(self.data)
else:
data = _np.array(self._data,
copy=False).view(real_same_precision_as(self))
loc = _np.array([0])
N = len(self)
inline(code_abs_arg_max, ['data', 'loc', 'N'], libraries=omp_libs,
extra_compile_args=code_flags)
return loc[0]

def abs_max_loc(self):
tmp = abs(self.data)
ind = _np.argmax(tmp)
return tmp[ind], ind
if self.kind == 'real':
tmp = abs(self.data)
ind = _np.argmax(tmp)
return tmp[ind], ind
else:
tmp = self.data.real ** 2.0
tmp += self.data.imag ** 2.0
ind = _np.argmax(tmp)
return tmp[ind] ** 0.5, ind

def cumsum(self):
return self.data.cumsum()
Expand Down Expand Up @@ -74,14 +111,37 @@ def weighted_inner(self, other, weight):
acum_dtype = float64

return _np.sum(self.data.conj() * other / weight, dtype=acum_dtype)



inner_code = """
double value = 0;
#pragma omp parallel for reduction(+:value)
for (int i=0; i<N; i++){
float val = x[i] * y[i];
value += val;
}
total[0] = value;
"""


def inner_inline_real(self, other):
x = _np.array(self._data, copy=False)
y = _np.array(other, copy=False)
total = _np.array([0])
N = len(self)
inline(inner_code, ['x', 'y', 'total', 'N'], libraries=omp_libs,
extra_compile_args=code_flags)
return total[0]

def inner(self, other):
""" Return the inner product of the array with complex conjugation.
"""
cdtype = common_kind(self.dtype, other.dtype)
if cdtype.kind == 'c':
acum_dtype = complex128
else:
return inner_inline_real(self, other)
acum_dtype = float64
return _np.sum(self.data.conj() * other, dtype=acum_dtype)
#return _np.vdot(self.data, other)
Expand Down
16 changes: 2 additions & 14 deletions test/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,22 +200,13 @@ def test_numpy_init(self):
self.assertEqual(out5[0],1)
self.assertEqual(out5[1],2)
self.assertEqual(out5[2],3)
self.assertTrue(out5.dtype==numpy.float64)

# We shouldn't be able to copy it though
self.assertRaises(TypeError,Array,in5, copy=False)

# Just checking that we can make an empty array correctly
empty = numpy.array([])
out6 = Array(empty)
self.assertTrue(out6.dtype==numpy.float64)
self.assertRaises(IndexError, out6.__getitem__,0)

if self.scheme != 'cpu':
self.assertRaises(TypeError, Array, in4, copy=False)
self.assertRaises(TypeError, Array, in5, copy=False)


def test_array_init(self):
# this array is made outside the context so we can check that an error is raised when copy = false in a GPU scheme
cpuarray = Array([1,2,3])
Expand Down Expand Up @@ -290,8 +281,6 @@ def test_array_init(self):
self.assertEqual(out4[2],3)
self.assertTrue(out4.dtype==self.dtype)

self.assertRaises(TypeError, Array,in1, dtype=numpy.int32)

# Check for copy=false
in3 = Array([5,3,1],dtype=self.dtype)
out5 = Array(in3,copy=False)
Expand Down Expand Up @@ -411,10 +400,9 @@ def test_list_init(self):

else:
self.assertRaises(TypeError, Array,[5+0j, 3+0j, 1+0j],dtype=self.dtype)
self.assertRaises(TypeError, Array,[1,2,3], dtype=numpy.int32)


#Also, when it is unspecified
out3 = Array([5,3,1])
out3 = Array([5.0,3,1])

self.assertTrue(type(out3._scheme) == type(self.context))
self.assertTrue(type(out3._data) is SchemeArray)
Expand Down
16 changes: 2 additions & 14 deletions test/test_frequencyseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,24 +200,15 @@ def test_numpy_init(self):
self.assertEqual(out5[0],1)
self.assertEqual(out5[1],2)
self.assertEqual(out5[2],3)
self.assertTrue(out5.dtype==numpy.float64)
self.assertEqual(out5.delta_f,0.1)
self.assertEqual(out5._epoch, self.epoch)

# We shouldn't be able to copy it though
self.assertRaises(TypeError,FrequencySeries,in5, 0.1, copy=False)

# Finally, just checking a few things specific to FrequencySeries
inbad = numpy.array([],dtype=float64)
self.assertRaises(ValueError, FrequencySeries, in1, -1)
self.assertRaises(ValueError, FrequencySeries, inbad, .1)
self.assertRaises(TypeError, FrequencySeries, in1, .1, epoch=(5,1))

if self.scheme != 'cpu':
self.assertRaises(TypeError, FrequencySeries, in4, 0.1, copy=False)
self.assertRaises(TypeError, FrequencySeries, in5,0.1, copy=False)


def test_array_init(self):
# this array is made outside the context so we can check that an error is raised when copy = false in a GPU scheme
cpuarray = Array([1,2,3])
Expand Down Expand Up @@ -300,8 +291,6 @@ def test_array_init(self):
self.assertEqual(out4.delta_f, 0.1)
self.assertEqual(out4._epoch, self.epoch)

self.assertRaises(TypeError, FrequencySeries,in1,0.1, dtype=numpy.int32)

# Check for copy=false
in3 = Array([5,3,1],dtype=self.dtype)
out5 = FrequencySeries(in3,0.1,copy=False, epoch=self.epoch)
Expand Down Expand Up @@ -378,10 +367,9 @@ def test_list_init(self):

else:
self.assertRaises(TypeError, FrequencySeries,[5+0j, 3+0j, 1+0j], 0.1, dtype=self.dtype)
self.assertRaises(TypeError, FrequencySeries,[1,2,3],0.1, dtype=numpy.int32)

#Also, when it is unspecified
out3 = FrequencySeries([5,3,1],0.1,epoch=self.epoch)
out3 = FrequencySeries([5.0,3,1],0.1,epoch=self.epoch)

self.assertTrue(type(out3._scheme) == type(self.context))
self.assertTrue(type(out3._data) is SchemeArray)
Expand All @@ -392,7 +380,7 @@ def test_list_init(self):
self.assertEqual(out3.delta_f, 0.1)
self.assertEqual(out3._epoch, self.epoch)

out4 = FrequencySeries([5+0j,3+0j,1+0j],0.1,epoch = self.epoch)
out4 = FrequencySeries([5.0+0j,3+0j,1+0j],0.1,epoch = self.epoch)

self.assertTrue(type(out4._scheme) == type(self.context))
self.assertTrue(type(out4._data) is SchemeArray)
Expand Down
16 changes: 4 additions & 12 deletions test/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,24 +199,19 @@ def test_numpy_init(self):
self.assertEqual(out5[0],1)
self.assertEqual(out5[1],2)
self.assertEqual(out5[2],3)
self.assertTrue(out5.dtype==numpy.float64)
#self.assertTrue(out5.dtype==numpy.float64)
self.assertEqual(out5.delta_t,0.1)
self.assertEqual(out5.start_time, self.epoch)

# We shouldn't be able to copy it though
self.assertRaises(TypeError,TimeSeries,in5, 0.1, copy=False)
#self.assertRaises(TypeError,TimeSeries,in5, 0.1, copy=False)

# Finally, just checking a few things specific to timeseries
inbad = numpy.array([],dtype=float64)
self.assertRaises(ValueError, TimeSeries, in1, -1)
self.assertRaises(ValueError, TimeSeries, inbad, .1)
self.assertRaises(TypeError, TimeSeries, in1, .1, epoch=(5,1))

if self.scheme != 'cpu':
self.assertRaises(TypeError, TimeSeries, in4, 0.1, copy=False)
self.assertRaises(TypeError, TimeSeries, in5,0.1, copy=False)


def test_array_init(self):
# this array is made outside the context so we can check that an error is raised when copy = false in a GPU scheme
cpuarray = Array([1,2,3])
Expand Down Expand Up @@ -299,8 +294,6 @@ def test_array_init(self):
self.assertEqual(out4.delta_t, 0.1)
self.assertEqual(out4.start_time, self.epoch)

self.assertRaises(TypeError, TimeSeries,in1,0.1, dtype=numpy.int32)

# Check for copy=false
in3 = Array([5,3,1],dtype=self.dtype)
out5 = TimeSeries(in3,0.1,copy=False,epoch=self.epoch)
Expand Down Expand Up @@ -364,7 +357,7 @@ def test_list_init(self):
#self.assertTrue(out1.kind == 'complex')

if self.kind == 'complex':
out2 = TimeSeries([5+0j,3+0j,1+0j], 0.1, dtype=self.dtype, epoch=self.epoch)
out2 = TimeSeries([5.0+0j,3+0j,1+0j], 0.1, dtype=self.dtype, epoch=self.epoch)

self.assertTrue(type(out2._scheme) == type(self.context))
self.assertTrue(type(out2._data) is SchemeArray)
Expand All @@ -377,10 +370,9 @@ def test_list_init(self):

else:
self.assertRaises(TypeError, TimeSeries,[5+0j, 3+0j, 1+0j], 0.1, dtype=self.dtype)
self.assertRaises(TypeError, TimeSeries,[1,2,3],0.1, dtype=numpy.int32)

#Also, when it is unspecified
out3 = TimeSeries([5,3,1],0.1,epoch=self.epoch)
out3 = TimeSeries([5.0,3,1],0.1,epoch=self.epoch)

self.assertTrue(type(out3._scheme) == type(self.context))
self.assertTrue(type(out3._data) is SchemeArray)
Expand Down

0 comments on commit 453ab80

Please sign in to comment.