120 lines
3.6 KiB
C++
120 lines
3.6 KiB
C++
#include "LowessSmoother.h"
|
|
#include <numeric>
|
|
|
|
namespace Lowess {
|
|
|
|
void validateConfig(const Config& config) {
|
|
if (config.smoothingFactor <= 0 || config.smoothingFactor > 1) {
|
|
throw std::invalid_argument("Smoothing factor must be between 0 and 1");
|
|
}
|
|
if (config.robustnessIterations < 1) {
|
|
throw std::invalid_argument("Robustness iterations must be at least 1");
|
|
}
|
|
}
|
|
|
|
double weightFunction(double x) {
|
|
if (x < 0 || x > 1) return 0;
|
|
return std::pow(1 - std::pow(x, 3), 3);
|
|
}
|
|
|
|
double medianAbsoluteDeviation(const std::vector<double>& residuals) {
|
|
if (residuals.empty()) return 0;
|
|
|
|
std::vector<double> absResiduals(residuals.size());
|
|
for (size_t i = 0; i < residuals.size(); ++i) {
|
|
absResiduals[i] = std::abs(residuals[i]);
|
|
}
|
|
|
|
std::sort(absResiduals.begin(), absResiduals.end());
|
|
return absResiduals[absResiduals.size() / 2];
|
|
}
|
|
|
|
std::vector<double> computeBandwidths(const std::vector<double>& x, double smoothingFactor) {
|
|
size_t n = x.size();
|
|
size_t r = static_cast<size_t>(std::ceil(smoothingFactor * n));
|
|
std::vector<double> h(n);
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
std::vector<double> distances(n);
|
|
for (size_t j = 0; j < n; ++j) {
|
|
distances[j] = std::abs(x[i] - x[j]);
|
|
}
|
|
std::sort(distances.begin(), distances.end());
|
|
h[i] = distances[r];
|
|
}
|
|
|
|
return h;
|
|
}
|
|
|
|
std::vector<double> smooth(
|
|
const std::vector<double>& x,
|
|
const std::vector<double>& y,
|
|
const Config& config
|
|
) {
|
|
validateConfig(config);
|
|
|
|
if (x.size() != y.size()) {
|
|
throw std::invalid_argument("x and y must have the same size");
|
|
}
|
|
if (x.size() < 2) {
|
|
throw std::invalid_argument("At least 2 data points required");
|
|
}
|
|
|
|
size_t n = x.size();
|
|
std::vector<double> yest(n, 0);
|
|
std::vector<double> delta(n, 1);
|
|
std::vector<double> residuals(n);
|
|
|
|
// 计算带宽
|
|
std::vector<double> h = computeBandwidths(x, config.smoothingFactor);
|
|
|
|
for (int iteration = 0; iteration < config.robustnessIterations; ++iteration) {
|
|
for (size_t i = 0; i < n; ++i) {
|
|
// 计算权重
|
|
std::vector<double> weights(n);
|
|
for (size_t j = 0; j < n; ++j) {
|
|
double u = std::abs(x[j] - x[i]) / h[i];
|
|
weights[j] = delta[j] * weightFunction(u);
|
|
}
|
|
|
|
// 加权最小二乘
|
|
double sumW = 0, sumWx = 0, sumWx2 = 0, sumWy = 0, sumWxy = 0;
|
|
for (size_t j = 0; j < n; ++j) {
|
|
sumW += weights[j];
|
|
sumWx += weights[j] * x[j];
|
|
sumWx2 += weights[j] * x[j] * x[j];
|
|
sumWy += weights[j] * y[j];
|
|
sumWxy += weights[j] * x[j] * y[j];
|
|
}
|
|
|
|
double det = sumW * sumWx2 - sumWx * sumWx;
|
|
if (std::abs(det) < 1e-10) { // 防止奇异矩阵
|
|
yest[i] = sumWy / sumW; // 退化为加权平均
|
|
} else {
|
|
double a = (sumWx2 * sumWy - sumWx * sumWxy) / det;
|
|
double b = (sumW * sumWxy - sumWx * sumWy) / det;
|
|
yest[i] = a + b * x[i];
|
|
}
|
|
}
|
|
|
|
// 计算残差和鲁棒权重
|
|
for (size_t i = 0; i < n; ++i) {
|
|
residuals[i] = y[i] - yest[i];
|
|
}
|
|
|
|
double s = medianAbsoluteDeviation(residuals);
|
|
if (std::abs(s) < 1e-10) s = 1; // 防止除以0
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
double u = residuals[i] / (6.0 * s);
|
|
if (u < -1) u = -1;
|
|
if (u > 1) u = 1;
|
|
delta[i] = std::pow(1 - u * u, 2);
|
|
}
|
|
}
|
|
|
|
return yest;
|
|
}
|
|
|
|
} // namespace Lowess
|