Skip to content

Commit 4ded014

Browse files
committed
UsingText example
1 parent e048e89 commit 4ded014

1 file changed

Lines changed: 101 additions & 0 deletions

File tree

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import com.nuix.enginebaseline.NuixEngine;
2+
import nuix.*;
3+
import org.junit.jupiter.api.Test;
4+
5+
import java.io.BufferedReader;
6+
import java.io.File;
7+
import java.io.IOException;
8+
import java.io.Reader;
9+
import java.util.List;
10+
import java.util.Map;
11+
import java.util.Set;
12+
import java.util.concurrent.atomic.AtomicLong;
13+
14+
import static org.junit.jupiter.api.Assertions.assertEquals;
15+
16+
/***
17+
* Demonstration of the UsingTest method, which allows for getting at very large item text that would otherwise causes
18+
* memory issues or issues allocating a single string which is to large (has more chars than char array max length).
19+
*/
20+
public class ExampleUsingText extends CommonTestFunctionality {
21+
@Test
22+
public void ExampleUsingText() throws Exception {
23+
File caseDirectory = new File(testOutputDirectory, "ExampleUsingText_Case");
24+
File dataDirectory = new File(testOutputDirectory, "ExampleUsingText_Natives");
25+
26+
List<CommonTestFunctionality.TermCount> termCounts = createSearchableTestData(dataDirectory, 5000);
27+
28+
NuixEngine nuixEngine = constructNuixEngine();
29+
nuixEngine.run((utilities -> {
30+
// Create a new case
31+
Map<String, Object> caseSettings = Map.of(
32+
"compound", false,
33+
"name", "ExampleUsingText",
34+
"description", "A Nuix case created using the Nuix Java Engine API",
35+
"investigator", "Test"
36+
);
37+
SimpleCase nuixCase = (SimpleCase) utilities.getCaseFactory().create(caseDirectory, caseSettings);
38+
39+
log.info("Queuing data for processing...");
40+
Processor processor = nuixCase.createProcessor();
41+
EvidenceContainer evidenceContainer = processor.newEvidenceContainer("SearchTestData");
42+
evidenceContainer.addFile(dataDirectory);
43+
evidenceContainer.save();
44+
45+
// Periodically log progress
46+
final long[] lastProgressTime = {0};
47+
int updateIntervalSeconds = 10;
48+
AtomicLong itemCount = new AtomicLong(0);
49+
processor.whenItemProcessed(info -> {
50+
long currentItemCount = itemCount.addAndGet(1);
51+
if (System.currentTimeMillis() - lastProgressTime[0] > updateIntervalSeconds * 1000) {
52+
lastProgressTime[0] = System.currentTimeMillis();
53+
log.info(String.format("%s items processed", currentItemCount));
54+
}
55+
});
56+
57+
log.info("Processing starting...");
58+
processor.process();
59+
log.info("Processing completed");
60+
61+
// Contrived example where we will iterate each line of the item's text and when a given
62+
// line is blank after trimming whitespace, we add 1 to our blank line count, ultimately
63+
// returning the number of blank lines we encountered.
64+
ReaderReadLogic<Integer> textOperation = new ReaderReadLogic<Integer>() {
65+
@Override
66+
public Integer withReader(Reader reader) throws IOException {
67+
int blankLineCount = 0;
68+
BufferedReader buffer = new BufferedReader(reader);
69+
String line;
70+
while((line = buffer.readLine()) != null) {
71+
if(line.trim().isEmpty()) {
72+
blankLineCount++;
73+
}
74+
}
75+
return blankLineCount;
76+
}
77+
};
78+
79+
String query = "flag:audited AND content:*";
80+
log.info(String.format("Searching for: %s", query));
81+
Set<Item> items = nuixCase.searchUnsorted(query);
82+
log.info(String.format("%s items responsive", items.size()));
83+
84+
for(Item item : items) {
85+
Text itemTextObject = item.getTextObject();
86+
// Have our text operation do something with the items text. Since this operation is handed a
87+
// Reader rather than attempting to construct one solitary string in memory, this operation should
88+
// behave better when an item has an especially large text value.
89+
int blankLineCount = itemTextObject.usingText(textOperation);
90+
91+
// Record the number of blank lines we encountered as custom metadata
92+
item.getCustomMetadata().putInteger("ContentBlankLines", blankLineCount);
93+
94+
log.info(String.format("%s has %s blank lines in its content text", item.getGuid(), blankLineCount));
95+
}
96+
97+
log.info("Closing case");
98+
nuixCase.close();
99+
}));
100+
}
101+
}

0 commit comments

Comments
 (0)