-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathColumnStats.scala
More file actions
54 lines (45 loc) · 1.59 KB
/
ColumnStats.scala
File metadata and controls
54 lines (45 loc) · 1.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
package com.cloudera.sa.examples.tablestats.model
/**
* Created by ted.malaska on 6/29/15.
*/
class ColumnStats(var nulls:Long = 0l,
var empties:Long = 0l,
var totalCount: Long = 0l,
var uniqueValues:Long = 0l,
var maxLong:Long = Long.MinValue,
var minLong:Long = Long.MaxValue,
var sumLong:Long = 0l,
val topNValues:TopNList = new TopNList(10)) extends Serializable {
def avg: Double = sumLong / totalCount.toDouble
//Part C.B
def +=(colValue: Any, colCount: Long): Unit = {
totalCount += colCount
uniqueValues += 1
colValue match {
case null =>
nulls += 1
case s: String =>
if (s.isEmpty)
empties += 1
case l: Long =>
if (maxLong < l) maxLong = l
if (minLong > l) minLong = l
sumLong += l
}
topNValues.add(colValue, colCount)
}
//Part C.C
def +=(columnStats: ColumnStats): Unit = {
totalCount += columnStats.totalCount
uniqueValues += columnStats.uniqueValues
nulls += columnStats.nulls
empties += columnStats.empties
sumLong += columnStats.sumLong
maxLong = maxLong.max(columnStats.maxLong)
minLong = minLong.max(columnStats.minLong)
columnStats.topNValues.topNCountsForColumnArray.foreach{ r =>
topNValues.add(r._1, r._2)
}
}
override def toString = s"ColumnStats(nulls=$nulls, empties=$empties, totalCount=$totalCount, uniqueValues=$uniqueValues, maxLong=$maxLong, minLong=$minLong, sumLong=$sumLong, topNValues=$topNValues, avgLong=$avg)"
}