Skip to content

Commit 4d60e09

Browse files
committed
Column arithmetics review fixes (tests for functions on ColumnAccessor, expr usage)
1 parent 1434413 commit 4d60e09

3 files changed

Lines changed: 68 additions & 26 deletions

File tree

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnArithmetics.kt

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,14 @@ class DataColumnArithmeticsTests {
2222

2323
@Test
2424
fun `not on ColumnAccessor`() {
25-
val isClosed = column<Boolean>("isClosed")
2625
val df = dataFrameOf("isClosed")(true, false)
27-
df.getColumn { !isClosed } shouldBe columnOf(false, true).named("isClosed")
26+
df.getColumn { !col<Boolean>("isClosed") } shouldBe columnOf(false, true).named("isClosed")
2827
}
2928

3029
@Test
3130
fun `not on ColumnAccessor with null`() {
32-
val isClosed = column<Boolean?>("isClosed")
3331
val df = dataFrameOf("isClosed")(true, null)
34-
df.getColumn { !isClosed } shouldBe columnOf(false, null).named("isClosed")
32+
df.getColumn { !col<Boolean?>("isClosed") } shouldBe columnOf(false, null).named("isClosed")
3533
}
3634

3735
// endregion
@@ -59,11 +57,10 @@ class DataColumnArithmeticsTests {
5957

6058
@Test
6159
fun `plus on ColumnAccessor of Int`() {
62-
val age = column<Int>("age")
6360
val df = dataFrameOf("age")(1, 2, 3)
6461
val expected = columnOf(2, 3, 4).named("age")
65-
df.getColumn { age + 1 } shouldBe expected
66-
df.getColumn { 1 + age } shouldBe expected
62+
df.getColumn { col<Int>("age") + 1 } shouldBe expected
63+
df.getColumn { 1 + col<Int>("age") } shouldBe expected
6764
}
6865

6966
@Test
@@ -123,10 +120,9 @@ class DataColumnArithmeticsTests {
123120

124121
@Test
125122
fun `plus on ColumnAccessor with String`() {
126-
val age = column<Int>("age")
127123
val df = dataFrameOf("age")(1, 2, 3)
128124
val expected = columnOf("11", "21", "31").named("age")
129-
df.getColumn { age + "1" } shouldBe expected
125+
df.getColumn { col<Int>("age") + "1" } shouldBe expected
130126
}
131127

132128
// endregion
@@ -142,10 +138,9 @@ class DataColumnArithmeticsTests {
142138

143139
@Test
144140
fun `ColumnAccessor of Int minus Int`() {
145-
val age = column<Int>("age")
146141
val df = dataFrameOf("age")(2, 3, 4)
147142
val expected = columnOf(1, 2, 3).named("age")
148-
df.getColumn { age - 1 } shouldBe expected
143+
df.getColumn { col<Int>("age") - 1 } shouldBe expected
149144
}
150145

151146
@Test
@@ -210,10 +205,9 @@ class DataColumnArithmeticsTests {
210205

211206
@Test
212207
fun `Int minus ColumnAccessor of Int`() {
213-
val age = column<Int>("age")
214208
val df = dataFrameOf("age")(2, 3, 4)
215209
val expected = columnOf(8, 7, 6).named("age")
216-
df.getColumn { 10 - age } shouldBe expected
210+
df.getColumn { 10 - col<Int>("age") } shouldBe expected
217211
}
218212

219213
@Test
@@ -278,10 +272,9 @@ class DataColumnArithmeticsTests {
278272

279273
@Test
280274
fun `unary minus on ColumnAccessor of Int`() {
281-
val age = column<Int>("age")
282275
val df = dataFrameOf("age")(1, 2, 3)
283276
val expected = columnOf(-1, -2, -3).named("age")
284-
df.getColumn { -age } shouldBe expected
277+
df.getColumn { -col<Int>("age") } shouldBe expected
285278
}
286279

287280
@Test
@@ -332,10 +325,9 @@ class DataColumnArithmeticsTests {
332325

333326
@Test
334327
fun `times on ColumnAccessor of Int`() {
335-
val age = column<Int>("age")
336328
val df = dataFrameOf("age")(1, 2, 3)
337329
val expected = columnOf(2, 4, 6).named("age")
338-
df.getColumn { age * 2 } shouldBe expected
330+
df.getColumn { col<Int>("age") * 2 } shouldBe expected
339331
}
340332

341333
@Test
@@ -402,10 +394,9 @@ class DataColumnArithmeticsTests {
402394

403395
@Test
404396
fun `ColumnAccessor of Int div Int`() {
405-
val age = column<Int>("age")
406397
val df = dataFrameOf("age")(2, 4, 6)
407398
val expected = columnOf(1, 2, 3).named("age")
408-
df.getColumn { age / 2 } shouldBe expected
399+
df.getColumn { col<Int>("age") / 2 } shouldBe expected
409400
}
410401

411402
@Test
@@ -470,9 +461,8 @@ class DataColumnArithmeticsTests {
470461

471462
@Test
472463
fun `ColumnAccessor of Int div zero`() {
473-
val age = column<Int>("age")
474464
val df = dataFrameOf("age")(2, 4, 6)
475-
shouldThrow<ArithmeticException> { df.getColumn { age / 0 } }
465+
shouldThrow<ArithmeticException> { df.getColumn { col<Int>("age") / 0 } }
476466
}
477467

478468
@Test
@@ -527,10 +517,9 @@ class DataColumnArithmeticsTests {
527517

528518
@Test
529519
fun `Int div ColumnAccessor of Int`() {
530-
val age = column<Int>("age")
531520
val df = dataFrameOf("age")(2, 4, 6)
532521
val expected = columnOf(6, 3, 2).named("age")
533-
df.getColumn { 12 / age } shouldBe expected
522+
df.getColumn { 12 / col<Int>("age") } shouldBe expected
534523
}
535524

536525
@Test
@@ -590,9 +579,8 @@ class DataColumnArithmeticsTests {
590579

591580
@Test
592581
fun `Int div ColumnAccessor with zero`() {
593-
val age = column<Int>("age")
594582
val df = dataFrameOf("age")(2, 0, 6)
595-
shouldThrow<ArithmeticException> { df.getColumn { 10 / age } }
583+
shouldThrow<ArithmeticException> { df.getColumn { 10 / col<Int>("age") } }
596584
}
597585

598586
@Test

docs/StardustDocs/topics/columnArithmetics.md

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,35 @@ to each cell in a column, multiplying a column by a value, and comparing element
99
## When useful
1010

1111
In most transformations, these column operations are usually not the preferred approach in Kotlin DataFrame,
12-
because the library provides row-based APIs such as [`add`](add.md), [`update`](update.md), [`map`](map.md), and `expr`,
12+
because the library provides row-based APIs such as [`add`](add.md), [`update`](update.md), and [`map`](map.md),
1313
which are usually recommended.
1414

15+
Also, the [`expr`](ColumnSelectors.md#expr-column-expression) function
16+
is particularly useful in this context, as it allows you to write row expressions
17+
inside the [`Columns Selection DSL`](ColumnSelectors.md).
18+
In other words, [`expr`](ColumnSelectors.md#expr-column-expression) works as an adapter
19+
between a column selector and a row expression.
20+
21+
For example,
22+
<!---FUN columnArithmetics_groupBy_without_expr-->
23+
24+
```kotlin
25+
orders.groupBy { status + " orders" }
26+
```
27+
28+
<!---END-->
29+
is equivalent to
30+
<!---FUN columnArithmetics_groupBy_with_expr-->
31+
32+
```kotlin
33+
orders.groupBy { expr("status") { status + " orders" } }
34+
```
35+
36+
<!---END-->
37+
38+
but in the first case, `status` is used as a [`DataColumn`](DataColumn.md) of String values,
39+
and in the second case, `status` is treated as a String.
40+
1541
However, column arithmetics might still be useful in some cases.
1642
For example, when building temporary plotting expressions.
1743
If distance is stored in meters, but you need to [`plot`](https://kotlin.github.io/kandy/welcome.html) it in kilometers,

samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ import org.jetbrains.kotlinx.dataframe.api.columnOf
77
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
88
import org.jetbrains.kotlinx.dataframe.api.div
99
import org.jetbrains.kotlinx.dataframe.api.eq
10+
import org.jetbrains.kotlinx.dataframe.api.expr
1011
import org.jetbrains.kotlinx.dataframe.api.format
1112
import org.jetbrains.kotlinx.dataframe.api.getColumnIndex
13+
import org.jetbrains.kotlinx.dataframe.api.groupBy
1214
import org.jetbrains.kotlinx.dataframe.api.gt
1315
import org.jetbrains.kotlinx.dataframe.api.linearBg
1416
import org.jetbrains.kotlinx.dataframe.api.lt
@@ -299,4 +301,30 @@ class Modify : DataFrameSampleHelper("operations", "modify") {
299301
orders.cost lt 20.0
300302
// SampleEnd
301303
}
304+
305+
@Test
306+
fun columnArithmetics_groupBy_without_expr() {
307+
val orders = dataFrameOf(
308+
"id" to columnOf("1", "2", "3"),
309+
"status" to columnOf("completed", "completed", "canceled"),
310+
"cost" to columnOf(10.0, 200.0, 1500.0),
311+
)
312+
313+
// SampleStart
314+
orders.groupBy { status + " orders" }
315+
// SampleEnd
316+
}
317+
318+
@Test
319+
fun columnArithmetics_groupBy_with_expr() {
320+
val orders = dataFrameOf(
321+
"id" to columnOf("1", "2", "3"),
322+
"status" to columnOf("completed", "completed", "canceled"),
323+
"cost" to columnOf(10.0, 200.0, 1500.0),
324+
)
325+
326+
// SampleStart
327+
orders.groupBy { expr("status") { status + " orders" } }
328+
// SampleEnd
329+
}
302330
}

0 commit comments

Comments
 (0)