From 3433d9add4ffb248f975f7ecdb08d34061e026ef Mon Sep 17 00:00:00 2001 From: DomenicoPerrottaJRC Date: Sat, 6 Apr 2024 15:30:09 +0200 Subject: [PATCH] Removed from Git, as generating errors that cannot be fixed promptly --- toolbox/utilities_stat/radiusCDF.m | 18 --- toolbox/utilities_stat/radiusDensity.m | 106 ---------------- toolbox/utilities_stat/radiusProcess.m | 27 ----- toolbox/utilities_stat/radiusQuantile.m | 154 ------------------------ 4 files changed, 305 deletions(-) delete mode 100644 toolbox/utilities_stat/radiusCDF.m delete mode 100644 toolbox/utilities_stat/radiusDensity.m delete mode 100644 toolbox/utilities_stat/radiusProcess.m delete mode 100644 toolbox/utilities_stat/radiusQuantile.m diff --git a/toolbox/utilities_stat/radiusCDF.m b/toolbox/utilities_stat/radiusCDF.m deleted file mode 100644 index 380deccb0..000000000 --- a/toolbox/utilities_stat/radiusCDF.m +++ /dev/null @@ -1,18 +0,0 @@ -function F_R = radiusCDF(x,v,nu) -% Distribution function of the radius for the Mahalanobis Squared Distance -% -%Link to the help function -% -% This is $F_{R}$. -% - -% Rbeta(x^2/(nu-2+x^2), p/2, nu/2) - -if nargin < 3 || isempty(nu) || nu <= 0 - F_R = chi2cdf(x^2,v); -else - F_R = betacdf(x^2/(nu-2+x^2), v/2, nu/2); -end -end - -%FScategory:UTISTAT diff --git a/toolbox/utilities_stat/radiusDensity.m b/toolbox/utilities_stat/radiusDensity.m deleted file mode 100644 index 44aa3806d..000000000 --- a/toolbox/utilities_stat/radiusDensity.m +++ /dev/null @@ -1,106 +0,0 @@ -function f_R = radiusDensity(r,v,nu) -%radiusDensity computes the non-squared Mahalanobis distance density -% -%Link to the help function -% -% Given the random variable $X$, with density $f_X(x)$ normally or -% $t$-distributed, $\mu=E[X]$, $\Sigma=Var[X]$, and radius $R = \sqrt -% \left( (X-\mu)' \Sigma^(-1) (X-\mu) \right)$, this function returns the -% radius density $f_R(r)$ for any $r>0$. -% -% Required input arguments: -% -% r : radius value. Scalar. The radius value, possibly computed -% from a multivariate sample $X$. -% -% v : Multivariate dimension. Scalar. Number of variables in the -% multivariate sample. -% Example - 'v',2 -% Data Types - double -% -% Optional input arguments: -% -% nu : Degrees of freedom. Scalar. If this optional argument is -% provided, then the sample is assumed to be heavy-tailed and -% modelled by a Student-t distribution with nu degrees of -% freedom. nu must be a positive value. -% Example - 'nu',5 -% Data Types - double -% Output: -% -% f_R : The radius density. -% -% Optional Output: -% -% See also: radiusQuantile -% -% References: -% -% Barabesi, L. and Cerioli, A. and García-Escudero, L.A. and Mayo-Iscar, A. -% (2023), Consistency factor for the MCD estimator at the Student-t -% distribution. Statistics and Computing. Vol. 33, Num. 132, 1-17. -% -% -% Copyright 2008-2023. -% Written by FSDA team -% -%Link to the help page for this function -% -%$LastChangedDate:: $: Date of the last commit -% -% Examples: -% -% -%{ - % Radius density for normal and t-distribution - n = 100; - v = 2; - nu = 3; - alpha = (n-(1:n)+1) / (n+1); - rN = chi2inv(alpha,v); - rt = (1 - betainv(alpha,v/2,nu/2)).^(-1); - rt = (rt - 1) * (nu - 2); - rt = sqrt(rt); - - dent = radiusDensity(rt , v, nu); - denN = radiusDensity(rN , v); - plot(rt,denN) - hold on; - plot(rN,dent) - ylabel('radius density','Fontsize',16); - xlabel('r','Fontsize',16); - hl=legend('$X \sim N$' , '$X \sim t$'); - set(hl,'Interpreter','Latex','Fontsize',20); - -%} - -if nargin<3 || isempty(nu) || nu <= 0 - % $f_X(x)$ is Normal. The squared Mahalanobis distance of a Gaussian - % distribution is Chi-Square distributed but here we need the - % non-squared distances. - A = r.^(v-1) .* exp(-(r.^2)/2); - B = 2^(v/2 - 1) * gamma(v); - f_R = A/B; -else - % $f_X(x)$ is T. The non-squared Mahalanobis distance of a T - % distribution follows this: - A = 2*(beta(nu/2,v/2)*(nu-2)^(v/2))^(-1); - B = r.^(v-1); - C = (1 + (r.^2/(nu-2))).^(-(nu+v)/2); - f_R = A .* B .* C; - - %{ - % This is equivalent to the above, using the gamma function - Ar = (2*gamma((nu+v)/2)) / (gamma(v/2)*gamma(nu/2)*(nu-2)^(v/2)); - Br = r.^(v-1); - Cr = (1 + 1/(nu-2) * r.^2) .^ (-(nu+v)/2); - f_R2 = Ar .* Br .* Cr; - %} - -end - -f_R = f_R(:); - -end -%FScategory:UTISTAT - diff --git a/toolbox/utilities_stat/radiusProcess.m b/toolbox/utilities_stat/radiusProcess.m deleted file mode 100644 index 965c9a9d0..000000000 --- a/toolbox/utilities_stat/radiusProcess.m +++ /dev/null @@ -1,27 +0,0 @@ -function C = radiusProcess(d,v,nu) - -% ellipsoids of decreasing radius define increasing levels of trimming - - -if nargin < 3 || nu <= 0 - nu = []; -end -d = d(:); -n = size(d(:),1); - -% sort Mahalanobis distances -d = sort(d,'ascend'); - -W = zeros(n,1); -for i = 1:n - alpha = (n-i+1) / (n+1); - % radius - r = radiusQuantile(alpha , v , nu); - % radius density - den = radiusDensity(r , v , nu); - W(i) = sqrt(n) * den * (d(n-i+1) - r); -end - -absW = abs(W); -C = max(absW); -end diff --git a/toolbox/utilities_stat/radiusQuantile.m b/toolbox/utilities_stat/radiusQuantile.m deleted file mode 100644 index edd5000d3..000000000 --- a/toolbox/utilities_stat/radiusQuantile.m +++ /dev/null @@ -1,154 +0,0 @@ -function F_R_inverse = radiusQuantile(C,v,nu) -% Generalised radius for the Mahalanobis Squared Distance -% -%Link to the help function -% -% This is $F_{R}^{-1}$. -% -% Critical threshold derived from the Mahalanobis Squared Distance -% distribution for normal or Student-t population. -% -% Required input arguments: -% -% C : Confidence level. Scalar. Number between 0 and 1 indicating -% the fraction of units not trimmed (1-\alpha). -% If the function is used to determine a cut-off value for the -% estimator, C is the confidence level. -% In this case, usually C = 0.95, 0.975 0.99 (individual alpha) -% or 1-0.05/n, 1-0.025/n, 1-0.01/n (simultaneous alpha). -% Default value is 0.975 -% Example - 'conflev',0.99 -% Data Types - double -% -% v : Multivariate dimension. Scalar. Number of variables in the -% multivariate sample. -% Example - 'v',2 -% Data Types - double -% -% Optional input arguments: -% -% nu : Degrees of freedom. Scalar. If this optional argument is -% provided, then the sample is assumed to be heavy-tailed and -% modelled by a Student-t distribution with nu degrees of -% freedom. nu must be a positive value. -% Example - 'nu',5 -% Data Types - double -% -% Output: -% -% F_R_inverse : Cutoff value. Scalar. The cutoff value for the Mahalanobis -% Squared Distances distribution. -% -% More About: -% -% Mahalanobis distances measure the distance of a sample unit from the mean -% of a distribution, taking into account the correlation between the units -% in the covariance matrix. If $x$ is an observation from a multivariate -% distribution with mean $\mu$ and covariance $\Sigma$, the Mahalanobis -% squared distance (MSD) of $x$ from $\mu$ is $D^{2}=(x − \mu)^{t} -% \Sigma^{-1} (x − \mu)$. When $x$ is from a $v$-dimensional multivariate -% normal with known mean and covariance, the population MSD is distributed -% as a chi-squared $\chi_{v}^{2}$ random variable with $\nu$ degrees of -% freedom (Mardia et al, 1979). Then, to test the deviation of an -% observation from the multivariate normal assumption we can compare its -% MSD with an appropriate quantile of the chi-squared distribution: the -% observation will be considered an outlier if the associated $D^{2}$ value -% is larger than the critical value of the chi-squared distribution. There -% are known limitations to the application of this cut-off, for example -% when the sample is high dimensional and its size is not sufficiently -% high. In this case the distribution of the sample MSD is a scaled Beta -% distribution (Gnanadesikan and Kettenring, 1972). For continuous -% Student-t samples, which account for heavy-tailed distributions, the -% appropriate cutoff value depends from a standard Beta distribution with -% shape parameters $v/2$ and $\nu/2$, as shown by Barabesi et al (2023). -% -% See also: mcd.m -% -% References: -% -% Gnanadesikan, R. and Kettenring, J. R. (1972), Robust estimates, -% residuals, and outlier detection with multiresponse data. Biometrics, -% 28:81–124. -% -% Barabesi, L. and Cerioli, A. and García-Escudero, L.A. and Mayo-Iscar, A. -% (2023), Consistency factor for the MCD estimator at the Student-t -% distribution. Statistics and Computing. Vol. 33, Num. 132, 1-17. -% -% Mardia, K. and Kent, J. and Bibby, J. (1979), Multivariate Analysis, -% Academic Press, New York. -% -% Rousseeuw, P.J. and Van Driessen, K. (1999), A fast algorithm for the -% minimum covariance determinant estimator, Technometrics, 41:212-223. -% -% Maronna, R.A., Martin D. and Yohai V.J. (2006), "Robust Statistics, -% Theory and Methods", Wiley, New York. -% -% -% Copyright 2008-2023. -% Written by FSDA team -% -% -%Link to the help page for this function -% -%$LastChangedDate:: $: Date of the last commit -% -% Examples: -% -%{ - % cutoff for a standard Normal. - n = 100; - v = 3; - conflev = 0.975; - cutoffN = msdcutoff(conflev,v); -%} - -%{ - % cutoff for a Student-t. - n = 100; - v = 3; - nu = 5; - cutoffT = msdcutoff(conflev,v,nu); -%} - -%{ - %% cutoff values for robust squared Mahalanobis distances. - - n = 100; - v = 3; - nu = 5; - conflev = 0.975; - - % sample from the T - Yt = random('T',nu,[n,v]); - Yn = random('Normal',0,1,[n,v]); - - % mcd with the T-model - RAWt = mcd(Yt,'modelT',nu,'plots',0); - - % mcd with the Normal-model - RAWn = mcd(Yn,'plots',0); - - % T-cutoff - cutoffT = msdcutoff(conflev,v,nu); - - % Normal-cutoff - cutoffN = msdcutoff(conflev,v); - - plot(1:n,RAWt.md,'xr' , 1:n,RAWn.md,'ob'); - hold on; - line([1 , n] , [cutoffT , cutoffT] , 'Color', 'r'); - line([1 , n] , [cutoffN , cutoffN] , 'Color', 'b'); - legend({'Student-t','Normal','cutoff-t','cutoff Normal'}); - -%} - -if nargin < 3 || isempty(nu) || nu <= 0 - F_R_inverse=chi2inv(C,v); -else - F_R_inverse = (1 - betainv(C,v/2,nu/2)).^(-1); - F_R_inverse = (F_R_inverse - 1) * (nu - 2); -end -F_R_inverse = sqrt(F_R_inverse); -end - -%FScategory:UTISTAT