Skip to content

Commit a069989

Browse files
ppavlidisarteymix
authored andcommitted
refactoring and tuning of the voom implementation, new rigorous test
avoid unnecessary call to mv();
1 parent fd3dea2 commit a069989

9 files changed

Lines changed: 920 additions & 393 deletions

File tree

src/ubic/basecode/math/Smooth.java

Lines changed: 146 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,52 @@
11
/*
22
* The baseCode project
3-
*
3+
*
44
* Copyright (c) 2011 University of British Columbia
5-
*
5+
*
66
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
77
* the License. You may obtain a copy of the License at
8-
*
8+
*
99
* http://www.apache.org/licenses/LICENSE-2.0
10-
*
10+
*
1111
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
1212
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
1313
* specific language governing permissions and limitations under the License.
1414
*/
1515
package ubic.basecode.math;
1616

1717
import java.util.LinkedList;
18+
import java.util.Map;
1819
import java.util.Queue;
20+
import java.util.TreeMap;
1921

22+
import cern.colt.list.DoubleArrayList;
2023
import cern.colt.matrix.DoubleMatrix1D;
24+
import cern.colt.matrix.DoubleMatrix2D;
25+
import cern.colt.matrix.impl.DenseDoubleMatrix2D;
26+
import cern.jet.stat.Descriptive;
27+
import org.apache.commons.lang3.ArrayUtils;
28+
import org.apache.commons.math3.analysis.interpolation.LinearInterpolator;
29+
import org.apache.commons.math3.analysis.interpolation.LoessInterpolator;
30+
import org.apache.commons.math3.analysis.polynomials.PolynomialSplineFunction;
31+
import org.apache.commons.math3.exception.OutOfRangeException;
32+
import ubic.basecode.math.linearmodels.MeanVarianceEstimator;
2133

2234
/**
23-
* Methods for moving averages etc.
24-
*
35+
* Methods for moving averages, loess
36+
*
2537
* @author paul
38+
* @author ptan
2639
*/
2740
public class Smooth {
2841

2942
/**
3043
* Simple moving average that sums the points "backwards".
31-
*
44+
*
3245
* @param m
3346
* @param windowSize
3447
* @return
3548
*/
36-
public static DoubleMatrix1D movingAverage( DoubleMatrix1D m, int windowSize ) {
49+
public static DoubleMatrix1D movingAverage(DoubleMatrix1D m, int windowSize) {
3750

3851
Queue<Double> window = new LinkedList<>();
3952

@@ -42,25 +55,143 @@ public static DoubleMatrix1D movingAverage( DoubleMatrix1D m, int windowSize ) {
4255
assert windowSize > 0;
4356

4457
DoubleMatrix1D result = m.like();
45-
for ( int i = 0; i < m.size(); i++ ) {
58+
for (int i = 0; i < m.size(); i++) {
4659

47-
double num = m.get( i );
60+
double num = m.get(i);
4861
sum += num;
49-
window.add( num );
50-
if ( window.size() > windowSize ) {
62+
window.add(num);
63+
if (window.size() > windowSize) {
5164

5265
sum -= window.remove();
5366
}
5467

55-
if ( !window.isEmpty() ) {
68+
if (!window.isEmpty()) {
5669
// if ( window.size() == windowSize ) {
57-
result.set( i, sum / window.size() );
70+
result.set(i, sum / window.size());
5871
} else {
59-
result.set( i, Double.NaN );
72+
result.set(i, Double.NaN);
6073
}
6174
}
6275

6376
return result;
6477

6578
}
79+
80+
/**
81+
* Default loess span (This is the default value used by limma-voom)
82+
*/
83+
static final double BANDWIDTH = 0.5;
84+
85+
/**
86+
* Default number of loess robustness iterations; 0 is probably fine.
87+
*/
88+
static final int ROBUSTNESS_ITERS = 3;
89+
90+
/**
91+
* @param xy
92+
* @return loessFit with default bandwitdh
93+
*/
94+
public static DoubleMatrix2D loessFit(DoubleMatrix2D xy) {
95+
return loessFit(xy, BANDWIDTH);
96+
}
97+
98+
/**
99+
* Computes a loess regression line to fit the data
100+
*
101+
* @param xy data to be fit
102+
* @param bandwidth the span of the smoother (from 2/n to 1 where n is the number of points in xy)
103+
* @return loessFit (same dimensions as xy) or null if there are less than 3 data points
104+
*/
105+
public static DoubleMatrix2D loessFit(DoubleMatrix2D xy, double bandwidth) {
106+
assert xy != null;
107+
108+
DoubleMatrix1D sx = xy.viewColumn(0);
109+
DoubleMatrix1D sy = xy.viewColumn(1);
110+
Map<Double, Double> map = new TreeMap<>();// to enforce monotonicity
111+
for (int i = 0; i < sx.size(); i++) {
112+
if (Double.isNaN(sx.get(i)) || Double.isInfinite(sx.get(i)) || Double.isNaN(sy.get(i))
113+
|| Double.isInfinite(sy.get(i))) {
114+
continue;
115+
}
116+
map.put(sx.get(i), sy.get(i));
117+
}
118+
DoubleMatrix2D xyChecked = new DenseDoubleMatrix2D(map.size(), 2);
119+
xyChecked.viewColumn(0).assign(ArrayUtils.toPrimitive(map.keySet().toArray(new Double[0])));
120+
xyChecked.viewColumn(1).assign(ArrayUtils.toPrimitive(map.values().toArray(new Double[0])));
121+
122+
// in R:
123+
// loess(c(1:5),c(1:5)^2,f=0.5,iter=3)
124+
// Note: we start to lose some precision here in comparison with R's loess FIXME why? does it matter?
125+
DoubleMatrix2D loessFit = new DenseDoubleMatrix2D(xyChecked.rows(), xyChecked.columns());
126+
127+
// fit a loess curve
128+
LoessInterpolator loessInterpolator = new LoessInterpolator(bandwidth,
129+
ROBUSTNESS_ITERS);
130+
131+
double[] loessY = loessInterpolator.smooth(xyChecked.viewColumn(0).toArray(),
132+
xyChecked.viewColumn(1).toArray());
133+
134+
loessFit.viewColumn(0).assign(xyChecked.viewColumn(0));
135+
loessFit.viewColumn(1).assign(loessY);
136+
137+
return loessFit;
138+
}
139+
140+
141+
142+
/**
143+
* Linearlly interpolate values from a given data set
144+
*
145+
* Similar implementation of R's stats.approxfun(..., rule = 2) where values outside the interval ['min(x)',
146+
* 'max(x)'] get the value at the closest data extreme. Also performs sorting based on xTrain.
147+
*
148+
* @param x the training set of x values
149+
* @param y the training set of y values
150+
* @param xInterpolate the set of x values to interpolate
151+
* @return yInterpolate the interpolated set of y values
152+
*/
153+
public static double[] interpolate( double[] x, double[] y, double[] xInterpolate ) {
154+
155+
assert x != null;
156+
assert y != null;
157+
assert xInterpolate != null;
158+
assert x.length == y.length;
159+
160+
double[] yInterpolate = new double[xInterpolate.length];
161+
LinearInterpolator linearInterpolator = new LinearInterpolator();
162+
163+
// make sure that x is strictly increasing
164+
DoubleMatrix2D matrix = new DenseDoubleMatrix2D( x.length, 2 );
165+
matrix.viewColumn( 0 ).assign( x );
166+
matrix.viewColumn( 1 ).assign( y );
167+
matrix = matrix.viewSorted( 0 );
168+
double[] sortedX = matrix.viewColumn( 0 ).toArray();
169+
double[] sortedY = matrix.viewColumn( 1 ).toArray();
170+
171+
// make sure x is within the domain
172+
DoubleArrayList xList = new DoubleArrayList( sortedX );
173+
double x3ListMin = Descriptive.min( xList );
174+
double x3ListMax = Descriptive.max( xList );
175+
PolynomialSplineFunction fun = linearInterpolator.interpolate( sortedX, sortedY );
176+
for ( int i = 0; i < xInterpolate.length; i++ ) {
177+
try {
178+
// approx(...,rule=2)
179+
if ( xInterpolate[i] > x3ListMax ) {
180+
yInterpolate[i] = fun.value( x3ListMax );
181+
} else if ( xInterpolate[i] < x3ListMin ) {
182+
yInterpolate[i] = fun.value( x3ListMin );
183+
} else {
184+
yInterpolate[i] = fun.value( xInterpolate[i] );
185+
}
186+
} catch ( OutOfRangeException e ) {
187+
// this shouldn't happen anymore
188+
yInterpolate[i] = Double.NaN;
189+
}
190+
}
191+
192+
return yInterpolate;
193+
}
194+
195+
196+
66197
}

0 commit comments

Comments
 (0)