-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathColumnStats.scala
More file actions
56 lines (47 loc) · 1.8 KB
/
ColumnStats.scala
File metadata and controls
56 lines (47 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
package com.cloudera.sa.examples.tablestats.model
/**
* Created by ted.malaska on 6/29/15.
*/
class ColumnStats(var nulls:Long = 0l,
var empties:Long = 0l,
var totalCount: Long = 0l,
var uniqueValues:Long = 0l,
var maxLong:Long = Long.MinValue,
var minLong:Long = Long.MaxValue,
var sumLong:Long = 0l,
val topNValues:TopNList = new TopNList(10)) extends Serializable {
def avgLong: Long = sumLong/totalCount
//Part B.1.1
def +=(colValue: Any, colCount: Long): Unit = {
totalCount += colCount
uniqueValues += 1
if (colValue == null) {
nulls += 1
} else if (colValue.isInstanceOf[String]) {
val colStringValue = colValue.asInstanceOf[String]
if (colStringValue.isEmpty) {
empties += 1
}
} else if (colValue.isInstanceOf[Long]) {
val colLongValue = colValue.asInstanceOf[Long]
if (maxLong < colLongValue) maxLong = colLongValue
if (minLong > colLongValue) minLong = colLongValue
sumLong += colLongValue
}
topNValues.add(colValue, colCount)
}
//Part B.1.2
def +=(columnStats: ColumnStats): Unit = {
totalCount += columnStats.totalCount
uniqueValues += columnStats.uniqueValues
nulls += columnStats.nulls
empties += columnStats.empties
sumLong += columnStats.sumLong
maxLong = maxLong.max(columnStats.maxLong)
minLong = minLong.min(columnStats.minLong)
columnStats.topNValues.topNCountsForColumnArray.foreach{ r =>
topNValues.add(r._1, r._2)
}
}
override def toString = s"ColumnStats(nulls=$nulls, empties=$empties, totalCount=$totalCount, uniqueValues=$uniqueValues, maxLong=$maxLong, minLong=$minLong, sumLong=$sumLong, topNValues=$topNValues, avgLong=$avgLong)"
}