function nn = false_neighbors_kd(x, m, D, n, r, w, dc)
% FALSE_NEIGHBORS_KD estimate appropriate embedding dimension by estimating
% false nearest neighbors in a reconstructed phase space based on a
% delay coordinate embedding first advocated in:
% "Determining embedding dimension for phase-space
% reconstruction using a geometrical construction",
% M. B. Kennel, R. Brown, and H.D.I. Abarbanel,
% Physical Review A, Vol 45, No 6, 15 March 1992,
% pp 3403-3411.
% the method was further improved to account for (d+1) nn components
% that were larger than mean random distance, nn distances that were
% larger than mean random distances.
% for more details, please see this post:
% http://egr.uri.edu/nld/2013/12/16/notes-on-false-nearest-neighbors/
%
% CALL:
% nn = false_neighbors_kd(x, m, D, n, r, w, dc);
%
% INPUT:
% x - scalar time series
% m - delay time
% D - maximum embedding dimension to consider
% n - number of points to consider
% r - (optional, 1 default) number of temporarily uncorrelated nearest
% neighbors to consider for each phase space point
% w - (optional, 0 default) temporal correlation (Theiler) window
% dc - (optional, 0 default) set to 1 to remove linear correlations from
% the reconstructed phase space coordinates
%
% OUTPUT:
% nn.i - nxD stores true nearest neighbor index in each dimension
% nn.d - nxD stores distances to nearest neighbors in each dimension
% nn.z - nxD stores distances between d+1 coordinate of nn.i point
% nn.s - same as above but for the synthetic surrogates
%
% copyright 2013-2014 David Chelidze
if nargin < 7 || isempty(dc) % do not decorrelate the phase space
dc = 0;
end
if nargin < 6 || isempty(w) % do not remove temporal correlations
w = 0;
end
if nargin < 5 || isempty(r) % find only one nn
r = 1;
end
if nargin < 4 % need time series
error('ERROR: need at least four input variables')
elseif min(size(x)) == 1 % normalize input time series
x = (x(:)' - mean(x))/std(x);
nm = size(x, 2) - D*m;
n = min(nm, n);
else % cannot deal with vector-valued time series
error('ERROR: x needs to be a scalar time series')
end
% number of nearest neighbors to look up. should be larger than r.
knn = max(2*r, 6*m); % my heuristic number for nn look up
% reconstruct the phase space
y = zeros(D+1, n);
for d = 1:D+1
y(d, :) = x((1:n) + (d-1)*m);
end
si = randperm(n); % randomized indexes for the surrogate data analysis
sw = -w:w; % Theiler window indices
dn = zeros(1,r); % initialize distances to nns
% initialize output variables
nn.d = zeros(n, D); nn.z = nn.d; nn.s = nn.d; nn.i = nn.d;
for d = 1:D % find the false nearest neighbors for each embedding dimension
tic % start the timer
fprintf('Estimating false nearest neighbors for d = %d\n', d)
if dc % remove linear correlations
yd = decorr(y(1:d+1, :), d); % remove linear correlations
else % do not remove linear correlations
yd = y(1:d+1, :);
end
% partition all phase space points for fast nn searching
kdtree = KDTreeSearcher(yd(1:d, :)' ,...
'Distance', 'euclidean', 'BucketSize', 2*knn);
% find the nearest neighbor for each of the phase space point
[nni, nnd] = knnsearch(kdtree, yd(1:d, :)', 'k', knn);
ni = abs( nni - (1:n)'*ones(1, knn) ) > w; % remove temporal corrs
for k = 1:n % for each phase space point get the statistics on nns
nnitmp = nni(k, ni(k, :)); % uncorrelated nns
nndtmp = nnd(k, ni(k, :)); % the corresponding distances
bsi = k + sw; % base strand indices
is = (bsi>0 & bsi0 & nsi