diff --git a/neurokit2/complexity/entropy_fuzzy.py b/neurokit2/complexity/entropy_fuzzy.py index a0e918e892..d4a3a334ea 100644 --- a/neurokit2/complexity/entropy_fuzzy.py +++ b/neurokit2/complexity/entropy_fuzzy.py @@ -3,7 +3,7 @@ from .entropy_sample import entropy_sample -def entropy_fuzzy(signal, delay=1, dimension=2, tolerance="sd", approximate=False, **kwargs): +def entropy_fuzzy(signal, delay=1, dimension=2, tolerance="sd", approximate=False, func_name="exp", **kwargs): """**Fuzzy Entropy (FuzzyEn)** Fuzzy entropy (FuzzyEn) of a signal stems from the combination between information theory and @@ -24,12 +24,25 @@ def entropy_fuzzy(signal, delay=1, dimension=2, tolerance="sd", approximate=Fals dimension : int Embedding Dimension (*m*, sometimes referred to as *d* or *order*). See :func:`complexity_dimension` to estimate the optimal value for this parameter. - tolerance : float - Tolerance (often denoted as *r*), distance to consider two data points as similar. If - ``"sd"`` (default), will be set to :math:`0.2 * SD_{signal}`. See + tolerance : scalar and two-element vector + Tolerance (often denoted as *rn*), refers to a threshold or threshold and power respectively. + distance to consider two data points as similar.See :func:`complexity_tolerance` to estimate the optimal value for this parameter. approximate : bool If ``True``, will compute the fuzzy approximate entropy (FuzzyApEn). + func_name: string + The name of membership functions. Choose in + exp : exponential + gauss : gaussian + cgauss : constgaussian + bell : bell + z : z + trapez : trapezoidal + tri : triangular + sig : sigmoid + + + **kwargs Other arguments. @@ -78,6 +91,7 @@ def entropy_fuzzy(signal, delay=1, dimension=2, tolerance="sd", approximate=Fals dimension=dimension, tolerance=tolerance, fuzzy=True, + func_name=func_name, **kwargs, ) else: diff --git a/neurokit2/complexity/entropy_sample.py b/neurokit2/complexity/entropy_sample.py index debbf84367..756b79ea50 100644 --- a/neurokit2/complexity/entropy_sample.py +++ b/neurokit2/complexity/entropy_sample.py @@ -6,7 +6,7 @@ from .utils_entropy import _phi, _phi_divide -def entropy_sample(signal, delay=1, dimension=2, tolerance="sd", **kwargs): +def entropy_sample(signal, delay=1, dimension=2, tolerance="sd", func_name="exp", **kwargs): """**Sample Entropy (SampEn)** Compute the sample entropy (SampEn) of a signal. SampEn is a modification @@ -30,6 +30,16 @@ def entropy_sample(signal, delay=1, dimension=2, tolerance="sd", **kwargs): Tolerance (often denoted as *r*), distance to consider two data points as similar. If ``"sd"`` (default), will be set to :math:`0.2 * SD_{signal}`. See :func:`complexity_tolerance` to estimate the optimal value for this parameter. + func_name: string + The name of membership functions. Choose in + exp : exponential + gauss : gaussian + cgauss : constgaussian + bell : bell + z : z + trapez : trapezoidal + tri : triangular + sig : sigmoid **kwargs : optional Other arguments. @@ -64,9 +74,7 @@ def entropy_sample(signal, delay=1, dimension=2, tolerance="sd", **kwargs): """ # Sanity checks if isinstance(signal, (np.ndarray, pd.DataFrame)) and signal.ndim > 1: - raise ValueError( - "Multidimensional inputs (e.g., matrices or multichannel data) are not supported yet." - ) + raise ValueError("Multidimensional inputs (e.g., matrices or multichannel data) are not supported yet.") # Store parameters info = { @@ -87,6 +95,7 @@ def entropy_sample(signal, delay=1, dimension=2, tolerance="sd", **kwargs): dimension=dimension, tolerance=info["Tolerance"], approximate=False, + func_name=func_name, **kwargs ) diff --git a/neurokit2/complexity/optim_complexity_tolerance.py b/neurokit2/complexity/optim_complexity_tolerance.py index af8bb06504..da09624562 100644 --- a/neurokit2/complexity/optim_complexity_tolerance.py +++ b/neurokit2/complexity/optim_complexity_tolerance.py @@ -10,9 +10,7 @@ from .utils_entropy import _entropy_apen -def complexity_tolerance( - signal, method="maxApEn", r_range=None, delay=None, dimension=None, show=False -): +def complexity_tolerance(signal, method="maxApEn", r_range=None, delay=None, dimension=None, show=False): """**Automated selection of tolerance (r)** Estimate and select the optimal tolerance (*r*) parameter used by other entropy and other @@ -210,97 +208,87 @@ def complexity_tolerance( 54(5), 723-732. """ - if not isinstance(method, str): - return method, {"Method": "None"} - - # Method - method = method.lower() - if method in ["traditional", "sd", "std", "default"]: - r = 0.2 * np.std(signal, ddof=1) - info = {"Method": "20% SD"} - - elif method in ["adjusted_sd", "nolds"] and ( - isinstance(dimension, (int, float)) or dimension is None - ): - if dimension is None: - raise ValueError("'dimension' cannot be empty for the 'nolds' method.") - r = ( - 0.11604738531196232 - * np.std(signal, ddof=1) - * (0.5627 * np.log(dimension) + 1.3334) - ) - info = {"Method": "Adjusted 20% SD"} - - elif method in ["chon", "chon2009"] and ( - isinstance(dimension, (int, float)) or dimension is None - ): - if dimension is None: - raise ValueError("'dimension' cannot be empty for the 'chon2009' method.") - sd1 = np.std(np.diff(signal), ddof=1) # short-term variability - sd2 = np.std(signal, ddof=1) # long-term variability of the signal - - # Here are the 3 formulas from Chon (2009): - # For m=2: r =(−0.036 + 0.26 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 - # For m=3: r =(−0.08 + 0.46 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 - # For m=4: r =(−0.12 + 0.62 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 - # For m=5: r =(−0.16 + 0.78 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 - # For m=6: r =(−0.19 + 0.91 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 - # For m=7: r =(−0.2 + 1 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 - if dimension <= 2 and dimension <= 7: - x = [-0.036, -0.08, -0.12, -0.16, -0.19, -0.2][dimension - 2] - y = [0.26, 0.46, 0.62, 0.78, 0.91, 1][dimension - 2] - else: - # We need to extrapolate the 2 first numbers, x and y - # np.polyfit(np.log([2,3,4, 5, 6, 7]), [-0.036, -0.08, -0.12, -0.16, -0.19, -0.2], 1) - # np.polyfit([2,3,4, 5, 6, 7], [0.26, 0.46, 0.62, 0.78, 0.91, 1], 1) - x = -0.034 * dimension + 0.022 - y = 0.14885714 * dimension - 0.00180952 - - r = (x + y * np.sqrt(sd1 / sd2)) / (len(signal) / 1000) ** 1 / 4 - info = {"Method": "Chon (2009)"} - - elif method in ["neurokit", "makowski"] and ( - isinstance(dimension, (int, float)) or dimension is None - ): - # Method described in - # https://github.com/DominiqueMakowski/ComplexityTolerance - if dimension is None: - raise ValueError("'dimension' cannot be empty for the 'makowski' method.") - n = len(signal) - r = np.std(signal, ddof=1) * ( - 0.2811 * (dimension - 1) - + 0.0049 * np.log(n) - - 0.02 * ((dimension - 1) * np.log(n)) - ) + if isinstance(method, str): + + # Method for str. + method = method.lower() + if method in ["traditional", "sd", "std", "default"]: + r = 0.2 * np.std(signal, ddof=1) + info = {"Method": "20% SD"} + + elif method in ["adjusted_sd", "nolds"] and (isinstance(dimension, (int, float)) or dimension is None): + if dimension is None: + raise ValueError("'dimension' cannot be empty for the 'nolds' method.") + r = 0.11604738531196232 * np.std(signal, ddof=1) * (0.5627 * np.log(dimension) + 1.3334) + info = {"Method": "Adjusted 20% SD"} + + elif method in ["chon", "chon2009"] and (isinstance(dimension, (int, float)) or dimension is None): + if dimension is None: + raise ValueError("'dimension' cannot be empty for the 'chon2009' method.") + sd1 = np.std(np.diff(signal), ddof=1) # short-term variability + sd2 = np.std(signal, ddof=1) # long-term variability of the signal + + # Here are the 3 formulas from Chon (2009): + # For m=2: r =(−0.036 + 0.26 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 + # For m=3: r =(−0.08 + 0.46 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 + # For m=4: r =(−0.12 + 0.62 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 + # For m=5: r =(−0.16 + 0.78 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 + # For m=6: r =(−0.19 + 0.91 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 + # For m=7: r =(−0.2 + 1 * sqrt(sd1/sd2)) / (len(signal) / 1000)**1/4 + if dimension <= 2 and dimension <= 7: + x = [-0.036, -0.08, -0.12, -0.16, -0.19, -0.2][dimension - 2] + y = [0.26, 0.46, 0.62, 0.78, 0.91, 1][dimension - 2] + else: + # We need to extrapolate the 2 first numbers, x and y + # np.polyfit(np.log([2,3,4, 5, 6, 7]), [-0.036, -0.08, -0.12, -0.16, -0.19, -0.2], 1) + # np.polyfit([2,3,4, 5, 6, 7], [0.26, 0.46, 0.62, 0.78, 0.91, 1], 1) + x = -0.034 * dimension + 0.022 + y = 0.14885714 * dimension - 0.00180952 + + r = (x + y * np.sqrt(sd1 / sd2)) / (len(signal) / 1000) ** 1 / 4 + info = {"Method": "Chon (2009)"} + + elif method in ["neurokit", "makowski"] and (isinstance(dimension, (int, float)) or dimension is None): + # Method described in + # https://github.com/DominiqueMakowski/ComplexityTolerance + if dimension is None: + raise ValueError("'dimension' cannot be empty for the 'makowski' method.") + n = len(signal) + r = np.std(signal, ddof=1) * ( + 0.2811 * (dimension - 1) + 0.0049 * np.log(n) - 0.02 * ((dimension - 1) * np.log(n)) + ) - info = {"Method": "Makowski"} + info = {"Method": "Makowski"} - elif method in ["maxapen", "optimize"]: - r, info = _optimize_tolerance_maxapen( - signal, r_range=r_range, delay=delay, dimension=dimension - ) - info.update({"Method": "Max ApEn"}) + elif method in ["maxapen", "optimize"]: + r, info = _optimize_tolerance_maxapen(signal, r_range=r_range, delay=delay, dimension=dimension) + info.update({"Method": "Max ApEn"}) - elif method in ["recurrence", "rqa"]: - r, info = _optimize_tolerance_recurrence( - signal, r_range=r_range, delay=delay, dimension=dimension - ) - info.update({"Method": "1% Recurrence Rate"}) + elif method in ["recurrence", "rqa"]: + r, info = _optimize_tolerance_recurrence(signal, r_range=r_range, delay=delay, dimension=dimension) + info.update({"Method": "1% Recurrence Rate"}) - elif method in ["neighbours", "neighbors", "nn"]: - r, info = _optimize_tolerance_neighbours( - signal, r_range=r_range, delay=delay, dimension=dimension - ) - info.update({"Method": "2% Neighbours"}) + elif method in ["neighbours", "neighbors", "nn"]: + r, info = _optimize_tolerance_neighbours(signal, r_range=r_range, delay=delay, dimension=dimension) + info.update({"Method": "2% Neighbours"}) - elif method in ["bin", "bins", "singh", "singh2016"]: - r, info = _optimize_tolerance_bin(signal, delay=delay, dimension=dimension) - info.update({"Method": "bin"}) + elif method in ["bin", "bins", "singh", "singh2016"]: + r, info = _optimize_tolerance_bin(signal, delay=delay, dimension=dimension) + info.update({"Method": "bin"}) + + else: + raise ValueError("NeuroKit error: complexity_tolerance(): 'method' not recognized.") + + elif np.isscalar(method): + r = [method * np.std(signal, ddof=1)] + info = {"fuzzy_entropy"} + + elif isinstance(method, (list, np.ndarray)) and len(method) == 2: + r = [method[0] * np.std(signal, ddof=1), method[1]] + info = {"fuzzy_entropy"} else: - raise ValueError( - "NeuroKit error: complexity_tolerance(): 'method' not recognized." - ) + raise ValueError("Invalid type for method") if show is True: _optimize_tolerance_plot(r, info, method=method, signal=signal) @@ -315,9 +303,7 @@ def complexity_tolerance( def _optimize_tolerance_recurrence(signal, r_range=None, delay=None, dimension=None): # Optimize missing parameters if delay is None or dimension is None: - raise ValueError( - "If method='recurrence', both delay and dimension must be specified." - ) + raise ValueError("If method='recurrence', both delay and dimension must be specified.") # Compute distance matrix emb = complexity_embedding(signal, delay=delay, dimension=dimension) @@ -343,9 +329,7 @@ def _optimize_tolerance_recurrence(signal, r_range=None, delay=None, dimension=N def _optimize_tolerance_maxapen(signal, r_range=None, delay=None, dimension=None): # Optimize missing parameters if delay is None or dimension is None: - raise ValueError( - "If method='maxApEn', both delay and dimension must be specified." - ) + raise ValueError("If method='maxApEn', both delay and dimension must be specified.") if r_range is None: r_range = 40 @@ -383,10 +367,7 @@ def _optimize_tolerance_neighbours(signal, r_range=None, delay=None, dimension=N kdtree = sklearn.neighbors.KDTree(embedded, metric="chebyshev") counts = np.array( [ - np.mean( - kdtree.query_radius(embedded, r, count_only=True).astype(np.float64) - / embedded.shape[0] - ) + np.mean(kdtree.query_radius(embedded, r, count_only=True).astype(np.float64) / embedded.shape[0]) for r in r_range ] ) diff --git a/neurokit2/complexity/utils_entropy.py b/neurokit2/complexity/utils_entropy.py index 444dfd8aea..e160ea98fc 100644 --- a/neurokit2/complexity/utils_entropy.py +++ b/neurokit2/complexity/utils_entropy.py @@ -2,6 +2,7 @@ import numpy as np import sklearn.metrics import sklearn.neighbors + from packaging import version from .utils_complexity_embedding import complexity_embedding @@ -36,6 +37,7 @@ def _phi( distance="chebyshev", approximate=True, fuzzy=False, + func_name="exp", kdtree1=None, kdtree2=None, **kwargs, @@ -51,6 +53,8 @@ def _phi( distance=distance, approximate=approximate, fuzzy=fuzzy, + func_name=func_name, + block_size=10, kdtree=kdtree1, ) @@ -62,6 +66,8 @@ def _phi( distance=distance, approximate=True, fuzzy=fuzzy, + func_name=func_name, + block_size=10, kdtree=kdtree2, ) @@ -108,15 +114,16 @@ def _get_count( distance="chebyshev", approximate=True, fuzzy=False, + func_name="exp", kdtree=None, - n=1, **kwargs, ): - """ - This is usually the bottleneck for several complexity methods, in particular in the counting. - That's why we allow the possibility of giving kdtrees as pre-computed (used in the optimization - of tolerance via MaxApEn which computes iteratively the value with multiple tolerances). - However, more improvements are welcome! + """This is usually the bottleneck for several complexity methods, in particular in the counting. + + That's why we allow the possibility of giving kdtrees as pre-computed (used in the optimization of tolerance + via MaxApEn which computes iteratively the value with multiple tolerances). However, more improvements are + welcome! + """ # Get embedded # ------------------- @@ -133,10 +140,7 @@ def _get_count( else: valid_metrics = sklearn.neighbors.KDTree.valid_metrics + ["range"] if distance not in valid_metrics: - raise ValueError( - f"The given metric ({distance}) is not valid." - f" Valid metric names are: {valid_metrics}" - ) + raise ValueError(f"The given metric ({distance}) is not valid." f" Valid metric names are: {valid_metrics}") if fuzzy is True: if distance == "range": @@ -146,17 +150,18 @@ def _get_count( embedded -= np.mean(embedded, axis=1, keepdims=True) # TODO: it would be good to implement 'distrange' here to have fuzzy RangeEn - # TODO: also, different membership functions? - # https://github.com/HamedAzami/FuzzyEntropy_Matlab/blob/master/FuzEn_MFs.m - dist = sklearn.metrics.DistanceMetric.get_metric(distance) - dist = dist.pairwise(embedded) - # sklearn.metrics.pairwise_distances_chunked() - if n > 1: - sim = np.exp(-(dist**n) / tolerance) - else: - sim = np.exp(-dist / tolerance) - # Return the count - count = np.sum(sim, axis=0) + + dist_metric = sklearn.metrics.DistanceMetric.get_metric(distance) + + # Initialize count + count = np.zeros(len(embedded)) + # Process in blocks + block_size = 10 + for i in range(0, len(embedded), block_size): + block = embedded[i : i + block_size] + dist = dist_metric.pairwise(block, embedded) + sim = member_func(func_name, dist, tolerance) + count[i : i + block_size] = np.sum(sim, axis=1) elif distance == "range": # internal function for distrange @@ -167,19 +172,108 @@ def distrange(x, y): return np.divide(numerator[valid], denominator[valid]) # Count for each row - count = np.array( - [ - np.sum(distrange(embedded, embedded[i]) < tolerance) - for i in range(len(embedded)) - ] - ) + count = np.array([np.sum(distrange(embedded, embedded[i]) < tolerance) for i in range(len(embedded))]) else: # chebyshev and other sklearn methods # Perhaps scipy.spatial.KDTree would be faster? Especially since its query() method # has a `workers` argument to use multiple cores? Benchmark or opinion required! if kdtree is None: kdtree = sklearn.neighbors.KDTree(embedded, metric=distance) - count = kdtree.query_radius(embedded, tolerance, count_only=True).astype( - np.float64 - ) + count = kdtree.query_radius(embedded, tolerance, count_only=True).astype(np.float64) return embedded, count, kdtree + + +# ============================================================================= +# Membership Functions +# ============================================================================= + + +def member_func(func_name, dist, tolerance): + if func_name.lower() in membership_function: + return membership_function[func_name.lower()](dist, tolerance) + else: + return ( + "Invalid function!\n" + "Please choose one of the following:\n" + " exp : exponential\n" + " gauss : gaussian\n" + " cgauss : constgaussian\n" + " bell : bell\n" + " z : z\n" + " trapez : trapezoidal\n" + " tri : triangular\n" + " sig : sigmoid\n" + ) + + +# see Azami et al. 2019 +# https://doi.org/10.1109/access.2019.2930625 +def exponential(dist, tolerance): + # assert isinstance(tolerance,tuple), 'Tolerance must be a two-element tuple (threshold,power).' + sim = np.exp(-(dist ** tolerance[1]) / tolerance[0]) + return sim + + +def gaussian(dist, tolerance): # tolerance = sigma + # assert np.size(tolerance)==1, 'Tolerance must be a scalar > 0.' + sim = np.exp(-((dist**2) / (2 * (tolerance[0] ** 2)))) + return sim + + +def constgaussian(dist, tolerance): + # assert np.size(tolerance)==1, 'Tolerance must be a scalar > 0.' + sim = np.ones(np.shape(dist)) + sim[dist > tolerance[0]] = np.exp(-np.log(2) * ((dist[dist > tolerance[0]] - tolerance[0]) / tolerance[0]) ** 2) + return sim + + +def bell(dist, tolerance): + # assert isinstance(tolerance,tuple), 'Tolerance must be a two-element tuple (threshold,power).' + sim = 1 / (1 + (abs(dist / tolerance[0]) ** (2 * tolerance[1]))) + return sim + + +def z(dist, tolerance): + # assert np.size(tolerance)==1, 'Tolerance must be a scalar > 0.' + sim = np.zeros(np.shape(dist)) + sim[dist <= 2 * tolerance[0]] = 2 * (((dist[dist <= 2 * tolerance[0]] - 2 * tolerance[0]) / tolerance[0]) ** 2) + sim[dist <= 1.5 * tolerance[0]] = 1 - ( + 2 * (((dist[dist <= 1.5 * tolerance[0]] - tolerance[0]) / tolerance[0]) ** 2) + ) + sim[dist <= tolerance[0]] = 1 + return sim + + +def trapezoidal(dist, tolerance): + # assert np.size(tolerance)==1, 'Tolerance must be a scalar > 0.' + sim = np.zeros(np.shape(dist)) + sim[dist <= 2 * tolerance[0]] = -(dist[dist <= 2 * tolerance[0]] / tolerance[0]) + 2 + sim[dist <= tolerance[0]] = 1 + return sim + + +def triangular(dist, tolerance): + # assert np.size(tolerance)==1, 'Tolerance must be a scalar > 0.' + sim = 1 - (dist / tolerance[0]) + sim[dist > tolerance[0]] = 0 + return sim + + +def sigmoid(dist, tolerance): + # see Zheng et al. 2018 + # https://doi.org/10.1016/j.measurement.2018.07.045 + # assert isinstance(tolerance,tuple), 'Tolerance must be a two-element tuple (a, threshold).' + sim = 1 / (1 + np.exp(-tolerance[1](dist - tolerance[0]))) + return sim + + +membership_function = { + "exp": exponential, + "gauss": gaussian, + "cgauss": constgaussian, + "bell": bell, + "z": z, + "trapez": trapezoidal, + "tri": triangular, + "sig": sigmoid, +}