Skip to content

Commit

Permalink
Improved KernelSmoothing
Browse files Browse the repository at this point in the history
Now, a log transformation can be applied to ease the reconstruction of skewed distributions.
  • Loading branch information
regislebrun committed Jun 2, 2024
1 parent b5fd833 commit e5167f1
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 18 deletions.
18 changes: 11 additions & 7 deletions lib/etc/openturns.conf.in
Original file line number Diff line number Diff line change
Expand Up @@ -576,13 +576,14 @@
<KernelMixture-SmallSize value_int="50" />

<!-- OT::KernelSmoothing parameters -->
<KernelSmoothing-AbsolutePrecision value_float="0.0" />
<KernelSmoothing-CutOffPlugin value_float="5.0" />
<KernelSmoothing-RelativePrecision value_float="1.0e-5" />
<KernelSmoothing-ResidualPrecision value_float="1.0e-10" />
<KernelSmoothing-BinNumber value_int="1024" />
<KernelSmoothing-MaximumIteration value_int="50" />
<KernelSmoothing-SmallSize value_int="250" />
<KernelSmoothing-AbsolutePrecision value_float="0.0" />
<KernelSmoothing-CutOffPlugin value_float="5.0" />
<KernelSmoothing-RelativePrecision value_float="1.0e-5" />
<KernelSmoothing-ResidualPrecision value_float="1.0e-10" />
<KernelSmoothing-DefaultShiftScale value_float="1.0e-5" />
<KernelSmoothing-BinNumber value_int="1024" />
<KernelSmoothing-MaximumIteration value_int="50" />
<KernelSmoothing-SmallSize value_int="250" />

<!-- OT::LogNormal parameters -->
<LogNormal-CharacteristicFunctionSmallSigmaThreshold value_float="0.2" />
Expand All @@ -601,6 +602,9 @@
<MarginalDistribution-MaximumSubIntervals value_int="128" />
<MarginalDistribution-Rule value_str="G15K31" />

<!-- MarginalUniformOrderStatistics parameters -->
<MarginalUniformOrderStatistics-LargeCaseCDF value_int="1000" />

<!-- Meixner parameters -->
<MeixnerDistribution-MaximumAbsoluteError value_float="1.0e-12" />
<MeixnerDistribution-MaximumConstraintError value_float="1.0e-12" />
Expand Down
4 changes: 4 additions & 0 deletions lib/src/Base/Common/ResourceMap.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,7 @@ void ResourceMap::loadDefaultConfiguration()
addAsScalar("KernelSmoothing-CutOffPlugin", 5.0);
addAsScalar("KernelSmoothing-RelativePrecision", 1.0e-5);
addAsScalar("KernelSmoothing-ResidualPrecision", 1.0e-10);
addAsScalar("KernelSmoothing-DefaultShiftScale", 1.0e-5);
addAsUnsignedInteger("KernelSmoothing-BinNumber", 1024);
addAsUnsignedInteger("KernelSmoothing-MaximumIteration", 50);
addAsUnsignedInteger("KernelSmoothing-SmallSize", 250);
Expand All @@ -1230,6 +1231,9 @@ void ResourceMap::loadDefaultConfiguration()
addAsString("MarginalDistribution-Rule", "G15K31");
addAsUnsignedInteger("MarginalDistribution-MaximumSubIntervals", 128);

// MarginalUniformOrderStatistics //
addAsUnsignedInteger("MarginalUniformOrderStatistics-LargeCaseCDF", 1000);

// Meixner parameters //
addAsScalar("MeixnerDistribution-MaximumAbsoluteError", 1.0e-12);
addAsScalar("MeixnerDistribution-MaximumConstraintError", 1.0e-12);
Expand Down
85 changes: 81 additions & 4 deletions lib/src/Uncertainty/Distribution/KernelSmoothing.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "openturns/PersistentObjectFactory.hxx"
#include "openturns/Brent.hxx"
#include "openturns/MethodBoundEvaluation.hxx"
#include "openturns/SymbolicFunction.hxx"
#include "openturns/Function.hxx"
#include "openturns/HermiteFactory.hxx"
#include "openturns/UniVariatePolynomial.hxx"
Expand All @@ -33,6 +34,7 @@
#include "openturns/SobolSequence.hxx"
#include "openturns/ResourceMap.hxx"
#include "openturns/JointDistribution.hxx"
#include "openturns/CompositeDistribution.hxx"
#include "openturns/BlockIndependentDistribution.hxx"

BEGIN_NAMESPACE_OPENTURNS
Expand Down Expand Up @@ -256,8 +258,41 @@ Distribution KernelSmoothing::build(const Sample & sample) const
{
// For 1D sample, use the rule that give the best tradeoff between speed and precision
if (sample.getDimension() == 1)
return build(sample, computeMixedBandwidth(sample));

{
if (useLogTransform_)
{
const Scalar skewness = sample.computeSkewness()[0];
const Scalar xMin = sample.getMin()[0];
const Scalar xMax = sample.getMax()[0];
const Scalar delta = (xMax - xMin) * std::max(SpecFunc::Precision, ResourceMap::GetAsScalar("KernelSmoothing-DefaultShiftScale"));
SymbolicFunction transform;
SymbolicFunction inverseTransform;
OSS oss(true);
// To get the full double precision
oss.setPrecision(17);
if (skewness >= 0.0)
{
const Scalar shift = delta - xMin;
transform = SymbolicFunction("x", String(oss << "log(x+(" << shift << "))"));
oss.clear();
inverseTransform = SymbolicFunction("y", String(oss << "exp(y)-(" << shift << ")"));
}
else
{
const Scalar shift = xMax + delta;
transform = SymbolicFunction("x", String(oss << "log((" << shift << ")-x)"));
oss.clear();
inverseTransform = SymbolicFunction("y", String(oss << "(" << shift << ") - exp(y)"));
}
const Sample transformedSample(transform(sample));
const Distribution transformedDistribution(build(transformedSample, computeMixedBandwidth(transformedSample)));
CompositeDistribution fitted(inverseTransform, transformedDistribution);
fitted.setDescription(sample.getDescription());
return fitted;
} // useLogTransform
else
return build(sample, computeMixedBandwidth(sample));
} // dimension 1
// For nD sample, use the only available rule
return build(sample, computeSilvermanBandwidth(sample));
}
Expand All @@ -279,8 +314,8 @@ Distribution KernelSmoothing::build(const Sample & sample,
if (xmin == xmax)
{
bandwidth_ = bandwidth;
KernelSmoothing::Implementation result(new Dirac(xmin));
result->setDescription(sample.getDescription());
Dirac result(xmin);
result.setDescription(sample.getDescription());
return result;
}
Indices degenerateIndices;
Expand Down Expand Up @@ -549,6 +584,7 @@ TruncatedDistribution KernelSmoothing::buildAsTruncatedDistribution(const Sample
// Now, work on the extended sample
SampleImplementation newSample(newSampleData.getSize(), 1);
newSample.setData(newSampleData);
newSample.setDescription(sample.getDescription());
size = newSample.getSize();
const Bool mustBin = binned_ && (dimension * std::log(1.0 * binNumber_) < std::log(1.0 * size));
if (binned_ != mustBin) LOGINFO("Will not bin the data because the bin number is greater than the sample size");
Expand Down Expand Up @@ -585,6 +621,11 @@ void KernelSmoothing::setBoundaryCorrection(const Bool boundaryCorrection)
boundingOption_ = (boundaryCorrection ? BOTH : NONE);
}

Bool KernelSmoothing::getBoundaryCorrection() const
{
return (boundingOption_ != NONE);
}

/* Boundary correction accessor */
void KernelSmoothing::setBoundingOption(const BoundingOption boundingOption)
{
Expand Down Expand Up @@ -615,6 +656,38 @@ void KernelSmoothing::setAutomaticUpperBound(const Bool automaticUpperBound)
automaticUpperBound_ = automaticUpperBound;
}

/* Binning accessors */
void KernelSmoothing::setBinning(const Bool binned)
{
binned_ = binned;
}

Bool KernelSmoothing::getBinning() const
{
return binned_;
}

/* Bin number accessor */
void KernelSmoothing::setBinNumber(const UnsignedInteger binNumber)
{
if (binNumber_ < 2) throw InvalidArgumentException(HERE) << "Error: The number of bins=" << binNumber << " is less than 2.";
}

UnsignedInteger KernelSmoothing::getBinNumber() const
{
return binNumber_;
}

/* Use log transform accessor */
void KernelSmoothing::setUseLogTransform(const Bool useLog)
{
useLogTransform_ = useLog;
}

Bool KernelSmoothing::getUseLogTransform() const
{
return useLogTransform_;
}

/* Method save() stores the object through the StorageManager */
void KernelSmoothing::save(Advocate & adv) const
Expand Down Expand Up @@ -646,6 +719,10 @@ void KernelSmoothing::load(Advocate & adv)
adv.loadAttribute("automaticLowerBound_", automaticLowerBound_);
adv.loadAttribute("upperBound_", upperBound_);
adv.loadAttribute("automaticUpperBound_", automaticUpperBound_);
if (adv.hasAttribute("useLogTransform_"))
adv.loadAttribute("useLogTransform_", useLogTransform_);
else
useLogTransform_ = false;
}

END_NAMESPACE_OPENTURNS
29 changes: 22 additions & 7 deletions lib/src/Uncertainty/Distribution/openturns/KernelSmoothing.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -75,20 +75,33 @@ public:
/** Kernel accessor */
Distribution getKernel() const;

/* Boundary correction accessor, shortcut for setBoundingOption(NONE) or setBoundingOption(BOTH) */
/** Boundary correction accessor, shortcut for setBoundingOption(NONE) or setBoundingOption(BOTH) */
void setBoundaryCorrection(const Bool boundaryCorrection);
Bool getBoundaryCorrection() const;

/* Boundary correction accessor */
/** Boundary correction accessor */
void setBoundingOption(const BoundingOption boundingOption);

/* Boundary accessor */
/** Boundary accessor */
void setLowerBound(const Scalar lowerBound);
void setUpperBound(const Scalar upperBound);

/* Automatic boundary accessor */
/** Automatic boundary accessor */
void setAutomaticLowerBound(const Bool automaticLowerBound);
void setAutomaticUpperBound(const Bool automaticUpperBound);

/** Binning accessors */
void setBinning(const Bool binned);
Bool getBinning() const;

/** Bin number accessor */
void setBinNumber(const UnsignedInteger binNumber);
UnsignedInteger getBinNumber() const;

/** Use log transform accessor */
void setUseLogTransform(const Bool useLog);
Bool getUseLogTransform() const;

/** Compute the bandwidth according to Silverman's rule */
Point computeSilvermanBandwidth(const Sample & sample) const;

Expand Down Expand Up @@ -124,20 +137,22 @@ private:
Distribution kernel_;

// Flag to tell if we compute a binned version of the estimator
Bool binned_;
Bool binned_ = false;

// Number of bins in each dimension
UnsignedInteger binNumber_;
UnsignedInteger binNumber_ = ResourceMap::GetAsUnsignedInteger("KernelSmoothing-BinNumber");

// Direction of the boundary treatment
BoundingOption boundingOption_;
BoundingOption boundingOption_ = NONE;

// Known bounds
Scalar lowerBound_;
Bool automaticLowerBound_;
Scalar upperBound_;
Bool automaticUpperBound_;

// Use log transform
Bool useLogTransform_ = false;
}; /* class KernelSmoothing */


Expand Down

0 comments on commit e5167f1

Please sign in to comment.