Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -86,38 +86,26 @@ public void terminate(PointCollector collector) throws Exception {
case DOUBLE:
collector.putDouble(0, res);
break;
case TIMESTAMP:
case DATE:
case TEXT:
case STRING:
case BLOB:
case BOOLEAN:
default:
break;
}
}

private long dataToLong(Object data) {
long result;
private long dataToLong(double res) {
switch (dataType) {
case INT32:
return (int) data;
return (int) res;
case FLOAT:
result = Float.floatToIntBits((float) data);
return (float) data >= 0f ? result : result ^ Long.MAX_VALUE;
float f = (float) res;
long flBits = Float.floatToIntBits(f);
return f >= 0f ? flBits : flBits ^ Long.MAX_VALUE;
case INT64:
return (long) data;
return (long) res;
case DOUBLE:
result = Double.doubleToLongBits((double) data);
return (double) data >= 0d ? result : result ^ Long.MAX_VALUE;
case BLOB:
case BOOLEAN:
case STRING:
case TEXT:
case DATE:
case TIMESTAMP:
long d = Double.doubleToLongBits(res);
return res >= 0d ? d : d ^ Long.MAX_VALUE;
default:
return (long) data;
return (long) res;
}
}

Expand All @@ -131,12 +119,6 @@ private double longToResult(long result) {
return Double.longBitsToDouble(result);
case INT64:
case INT32:
case DATE:
case TEXT:
case STRING:
case BOOLEAN:
case BLOB:
case TIMESTAMP:
default:
return (result);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,14 @@
import java.io.IOException;
import java.util.NoSuchElementException;

/** Util for computing median, MAD, percentile. */
/**
* Util for computing median, MAD, percentile.
*
* <p>Percentile / quantile ({@link #getPercentile}) uses <b>discrete nearest-rank</b>: for sorted
* size {@code n} and {@code phi} in (0, 1], take 1-based rank {@code k = ceil(n * phi)} and 0-based
* index {@code k - 1}, clamped to {@code [0, n - 1]}. No interpolation; {@code phi = 0.5} is not
* required to match {@link #getMedian}.
*/
public class ExactOrderStatistics {

private final Type dataType;
Expand All @@ -55,12 +62,6 @@ public ExactOrderStatistics(Type type) throws UDFInputSeriesDataTypeNotValidExce
case DOUBLE:
doubleArrayList = new DoubleArrayList();
break;
case STRING:
case TEXT:
case BOOLEAN:
case BLOB:
case DATE:
case TIMESTAMP:
default:
// This will not happen.
throw new UDFInputSeriesDataTypeNotValidException(
Expand Down Expand Up @@ -88,12 +89,6 @@ public void insert(Row row) throws UDFInputSeriesDataTypeNotValidException, IOEx
doubleArrayList.add(vd);
}
break;
case DATE:
case TIMESTAMP:
case BLOB:
case BOOLEAN:
case TEXT:
case STRING:
default:
// This will not happen.
throw new UDFInputSeriesDataTypeNotValidException(
Expand All @@ -111,12 +106,6 @@ public double getMedian() throws UDFInputSeriesDataTypeNotValidException {
return getMedian(floatArrayList);
case DOUBLE:
return getMedian(doubleArrayList);
case TEXT:
case STRING:
case BOOLEAN:
case BLOB:
case TIMESTAMP:
case DATE:
default:
// This will not happen.
throw new UDFInputSeriesDataTypeNotValidException(
Expand Down Expand Up @@ -199,12 +188,6 @@ public double getMad() throws UDFInputSeriesDataTypeNotValidException {
return getMad(floatArrayList);
case DOUBLE:
return getMad(doubleArrayList);
case TIMESTAMP:
case DATE:
case BLOB:
case BOOLEAN:
case STRING:
case TEXT:
default:
// This will not happen.
throw new UDFInputSeriesDataTypeNotValidException(
Expand Down Expand Up @@ -251,12 +234,18 @@ public static double getMad(LongArrayList nums) {
}
}

/** Discrete nearest-rank index into sorted data of length {@code n}; see class Javadoc. */
private static int discreteNearestRankIndex(int n, double phi) {
int idx = (int) Math.ceil(n * phi) - 1;
return Math.max(0, Math.min(n - 1, idx));
}

public static float getPercentile(FloatArrayList nums, double phi) {
if (nums.isEmpty()) {
throw new NoSuchElementException();
} else {
nums.sortThis();
return nums.get((int) Math.ceil(nums.size() * phi));
return nums.get(discreteNearestRankIndex(nums.size(), phi));
}
}

Expand All @@ -265,7 +254,7 @@ public static double getPercentile(DoubleArrayList nums, double phi) {
throw new NoSuchElementException();
} else {
nums.sortThis();
return nums.get((int) Math.ceil(nums.size() * phi));
return nums.get(discreteNearestRankIndex(nums.size(), phi));
}
}

Expand All @@ -279,12 +268,6 @@ public String getPercentile(double phi) throws UDFInputSeriesDataTypeNotValidExc
return Float.toString(getPercentile(floatArrayList, phi));
case DOUBLE:
return Double.toString(getPercentile(doubleArrayList, phi));
case STRING:
case TEXT:
case BOOLEAN:
case BLOB:
case DATE:
case TIMESTAMP:
default:
// This will not happen.
throw new UDFInputSeriesDataTypeNotValidException(
Expand All @@ -297,7 +280,7 @@ public static int getPercentile(IntArrayList nums, double phi) {
throw new NoSuchElementException();
} else {
nums.sortThis();
return nums.get((int) Math.ceil(nums.size() * phi));
return nums.get(discreteNearestRankIndex(nums.size(), phi));
}
}

Expand All @@ -306,7 +289,7 @@ public static long getPercentile(LongArrayList nums, double phi) {
throw new NoSuchElementException();
} else {
nums.sortThis();
return nums.get((int) Math.ceil(nums.size() * phi));
return nums.get(discreteNearestRankIndex(nums.size(), phi));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,19 @@ private void compress(List<Tuple> additionalEntries) {

i++;

} else if (i >= additionalEntries.size()) {
// Only sketch entries left (must check before comparing additionalEntries.get(i)).
if (j + 1 < entries.size()
&& entries.get(j).g + entries.get(j + 1).g + entries.get(j + 1).delta
<= removalThreshold) {
// Removable from sketch.
entries.get(j + 1).g += entries.get(j).g;
} else {
mergedEntries.add(entries.get(j));
}

j++;

} else if (additionalEntries.get(i).v < entries.get(j).v) {
if (additionalEntries.get(i).g + entries.get(j).g + entries.get(j).delta
<= removalThreshold) {
Expand All @@ -136,7 +149,7 @@ private void compress(List<Tuple> additionalEntries) {

i++;

} else { // the same as i == additionalEntries.size()
} else {
if (j + 1 < entries.size()
&& entries.get(j).g + entries.get(j + 1).g + entries.get(j + 1).delta
<= removalThreshold) {
Expand Down