Skip to content

Commit 11633a1

Browse files
[MOD] XQuery: faster set operations for integer sequences
1 parent 99e9d0b commit 11633a1

2 files changed

Lines changed: 62 additions & 65 deletions

File tree

basex-core/src/main/java/org/basex/query/func/fn/FnDistinctValues.java

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,15 @@ public final class FnDistinctValues extends StandardFunc {
3333
public Iter iter(final QueryContext qc) throws QueryException {
3434
final Iter values = arg(0).atomIter(qc, info);
3535
final Collation collation = toCollation(arg(1), qc);
36-
37-
final ItemSet set = ItemSet.get(collation, info);
38-
final IntSet ints = new IntSet();
39-
4036
return new Iter() {
41-
boolean intseq = seqType().instanceOf(SeqType.INTEGER_ZM);
37+
final IntSet ints = new IntSet();
38+
ItemSet set;
4239

4340
@Override
4441
public Item next() throws QueryException {
45-
for(Item item; (item = qc.next(values)) != null;) {
46-
if(intseq) {
42+
if(set == null) {
43+
// try to parse input as 32-bit integer sequence
44+
for(Item item; (item = qc.next(values)) != null;) {
4745
if(item.type == AtomType.INTEGER) {
4846
final long l = item.itr(info);
4947
final int i = (int) l;
@@ -52,10 +50,14 @@ public Item next() throws QueryException {
5250
continue;
5351
}
5452
}
55-
// fallback (input is no 32bit integer)
56-
intseq = false;
53+
set = ItemSet.get(collation, info);
5754
for(final int i : ints.toArray()) set.add(Int.get(i));
55+
if(set.add(item)) return item;
56+
break;
5857
}
58+
}
59+
// generic fallback
60+
for(Item item; (item = qc.next(values)) != null;) {
5961
if(set.add(item)) return item;
6062
}
6163
return null;
@@ -68,31 +70,28 @@ public Value value(final QueryContext qc) throws QueryException {
6870
final Iter values = arg(0).atomIter(qc, info);
6971
final Collation collation = toCollation(arg(1), qc);
7072

71-
final ItemSet set = ItemSet.get(collation, info);
72-
final IntSet ints = new IntSet();
73-
74-
final ValueBuilder vb = new ValueBuilder(qc);
73+
// try to parse input as 32-bit integer sequence
7574
final LongList list = new LongList();
75+
final IntSet ints = new IntSet();
76+
Item item = null;
77+
while((item = qc.next(values)) != null) {
78+
if(item.type != AtomType.INTEGER) break;
79+
final long l = item.itr(info);
80+
final int i = (int) l;
81+
if(i != l) break;
82+
if(ints.add(i)) list.add(i);
83+
}
84+
final Value intseq = IntSeq.get(list.finish());
85+
if(item == null) return intseq;
7686

77-
boolean intseq = seqType().instanceOf(SeqType.INTEGER_ZM);
78-
for(Item item; (item = qc.next(values)) != null;) {
79-
if(intseq) {
80-
if(item.type == AtomType.INTEGER) {
81-
final long l = item.itr(info);
82-
final int i = (int) l;
83-
if(i == l) {
84-
if(ints.add(i)) list.add(i);
85-
continue;
86-
}
87-
}
88-
// fallback (input is no 32bit integer)
89-
intseq = false;
90-
for(final int i : ints.toArray()) set.add(Int.get(i));
91-
for(final long l : list.finish()) vb.add(Int.get(l));
92-
}
87+
// generic fallback
88+
final ValueBuilder vb = new ValueBuilder(qc).add(intseq);
89+
final ItemSet set = ItemSet.get(collation, info);
90+
for(final int i : ints.toArray()) set.add(Int.get(i));
91+
do {
9392
if(set.add(item)) vb.add(item);
94-
}
95-
return intseq ? IntSeq.get(list.finish()) : vb.value(this);
93+
} while((item = qc.next(values)) != null);
94+
return vb.value(this);
9695
}
9796

9897
@Override

basex-core/src/main/java/org/basex/query/func/fn/FnDuplicateValues.java

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,15 @@ public final class FnDuplicateValues extends StandardFunc {
2424
public Iter iter(final QueryContext qc) throws QueryException {
2525
final Iter values = arg(0).atomIter(qc, info);
2626
final Collation collation = toCollation(arg(1), qc);
27-
28-
final ItemSet set1 = ItemSet.get(collation, info);
29-
final ItemSet set2 = ItemSet.get(collation, info);
30-
final IntSet ints1 = new IntSet(), ints2 = new IntSet();
31-
3227
return new Iter() {
33-
boolean intseq = seqType().eq(SeqType.INTEGER_ZM);
28+
final IntSet ints1 = new IntSet(), ints2 = new IntSet();
29+
ItemSet set1, set2;
3430

3531
@Override
3632
public Item next() throws QueryException {
37-
for(Item item; (item = qc.next(values)) != null;) {
38-
if(intseq) {
33+
if(set1 == null) {
34+
// try to parse input as 32-bit integer sequence
35+
for(Item item; (item = qc.next(values)) != null;) {
3936
if(item.type == AtomType.INTEGER) {
4037
final long l = item.itr(info);
4138
final int i = (int) l;
@@ -44,11 +41,16 @@ public Item next() throws QueryException {
4441
continue;
4542
}
4643
}
47-
// fallback (input is no 32bit integer)
48-
intseq = false;
44+
set1 = ItemSet.get(collation, info);
45+
set2 = ItemSet.get(collation, info);
4946
for(final int i : ints1.toArray()) set1.add(Int.get(i));
5047
for(final int i : ints2.toArray()) set2.add(Int.get(i));
48+
if(!set1.add(item) && set2.add(item)) return item;
49+
break;
5150
}
51+
}
52+
// generic fallback
53+
for(Item item; (item = qc.next(values)) != null;) {
5254
if(!set1.add(item) && set2.add(item)) return item;
5355
}
5456
return null;
@@ -61,33 +63,29 @@ public Value value(final QueryContext qc) throws QueryException {
6163
final Iter values = arg(0).atomIter(qc, info);
6264
final Collation collation = toCollation(arg(1), qc);
6365

64-
final ItemSet set1 = ItemSet.get(collation, info);
65-
final ItemSet set2 = ItemSet.get(collation, info);
66-
final IntSet ints1 = new IntSet(), ints2 = new IntSet();
67-
68-
final ValueBuilder vb = new ValueBuilder(qc);
66+
// try to parse input as 32-bit integer sequence
6967
final LongList list = new LongList();
68+
final IntSet ints1 = new IntSet(), ints2 = new IntSet();
69+
Item item = null;
70+
while((item = qc.next(values)) != null) {
71+
if(item.type != AtomType.INTEGER) break;
72+
final long l = item.itr(info);
73+
final int i = (int) l;
74+
if(i != l) break;
75+
if(!ints1.add(i) && ints2.add(i)) list.add(i);
76+
}
77+
final Value intseq = IntSeq.get(list.finish());
78+
if(item == null) return intseq;
7079

71-
boolean intseq = seqType().eq(SeqType.INTEGER_ZM);
72-
for(Item item; (item = qc.next(values)) != null;) {
73-
if(intseq) {
74-
if(item.type == AtomType.INTEGER) {
75-
final long l = item.itr(info);
76-
final int i = (int) l;
77-
if(i == l) {
78-
if(!ints1.add(i) && ints2.add(i)) list.add(i);
79-
continue;
80-
}
81-
}
82-
// fallback (input is no 32bit integer)
83-
intseq = false;
84-
for(final int i : ints1.toArray()) set1.add(Int.get(i));
85-
for(final int i : ints2.toArray()) set2.add(Int.get(i));
86-
for(final long l : list.finish()) vb.add(Int.get(l));
87-
}
80+
// generic fallback
81+
final ValueBuilder vb = new ValueBuilder(qc).add(intseq);
82+
final ItemSet set1 = ItemSet.get(collation, info), set2 = ItemSet.get(collation, info);
83+
for(final int i : ints1.toArray()) set1.add(Int.get(i));
84+
for(final int i : ints2.toArray()) set2.add(Int.get(i));
85+
do {
8886
if(!set1.add(item) && set2.add(item)) vb.add(item);
89-
}
90-
return intseq ? IntSeq.get(list.finish()) : vb.value(this);
87+
} while((item = qc.next(values)) != null);
88+
return vb.value(this);
9189
}
9290

9391
@Override

0 commit comments

Comments
 (0)