Skip to content

Commit fb29950

Browse files
committed
WIP sync up
1 parent 79255fa commit fb29950

6 files changed

Lines changed: 304 additions & 117 deletions

File tree

IntelliJ/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ dependencies {
4141
"**/*joda*.jar",
4242
"**/*commons*.jar",
4343
"**/*guava*.jar",
44+
"**/*gson*.jar",
4445
// All Nuix API jars
4546
"**/nuix-*.jar"
4647
)

IntelliJ/src/test/java/BasicTests.java

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import org.junit.jupiter.api.Test;
44

55
import java.io.File;
6-
import java.util.HashMap;
76
import java.util.List;
87
import java.util.Map;
98
import java.util.Set;
@@ -72,9 +71,8 @@ public void CreateAndOpenSimpleCase() throws Exception {
7271
@Test
7372
public void LoadDataIntoSimpleCase() throws Exception {
7473
File caseDirectory = new File(testOutputDirectory, "LoadDataIntoSimpleCase_Case");
75-
File dataDirectory = new File(testOutputDirectory, "LoadDataIntoSimpleCase_Natives");
76-
77-
List<TermCount> termCounts = createSearchableTestData(dataDirectory, 1000);
74+
File textFilesDirectory = TestData.getTestDataTextFilesDirectory();
75+
Map<String, Long> termCounts = TestData.getTestDataTextFileTermCounts();
7876

7977
NuixEngine nuixEngine = constructNuixEngine();
8078
nuixEngine.run((utilities -> {
@@ -90,17 +88,19 @@ public void LoadDataIntoSimpleCase() throws Exception {
9088
log.info("Queuing data for processing...");
9189
Processor processor = nuixCase.createProcessor();
9290
EvidenceContainer evidenceContainer = processor.newEvidenceContainer("SearchTestData");
93-
evidenceContainer.addFile(dataDirectory);
91+
evidenceContainer.addFile(textFilesDirectory);
9492
evidenceContainer.save();
9593
log.info("Processing starting...");
9694
processor.process();
9795
log.info("Processing completed");
9896

9997
log.info("Validating search counts...");
100-
for (TermCount termCount : termCounts) {
101-
long hitCount = nuixCase.count(termCount.term);
102-
assertEquals(termCount.count, hitCount, String.format("For term %s, expect %s but got %s",
103-
termCount.term, termCount.count, hitCount));
98+
for (Map.Entry<String, Long> termCount : termCounts.entrySet()) {
99+
String term = termCount.getKey();
100+
Long count = termCount.getValue() + 1; // Add 1 for hit on term counts JSON
101+
long hitCount = nuixCase.count(term);
102+
assertEquals(count, hitCount, String.format("For term %s, expect %s but got %s",
103+
term, count, hitCount));
104104
}
105105

106106
log.info("Closing case");
@@ -111,9 +111,8 @@ public void LoadDataIntoSimpleCase() throws Exception {
111111
@Test
112112
public void SearchAndTag() throws Exception {
113113
File caseDirectory = new File(testOutputDirectory, "SearchAndTag_Case");
114-
File dataDirectory = new File(testOutputDirectory, "SearchAndTag_Natives");
115-
116-
List<TermCount> termCounts = createSearchableTestData(dataDirectory, 5000);
114+
File textFilesDirectory = TestData.getTestDataTextFilesDirectory();
115+
Map<String, Long> termCounts = TestData.getTestDataTextFileTermCounts();
117116

118117
NuixEngine nuixEngine = constructNuixEngine();
119118
nuixEngine.run((utilities -> {
@@ -129,7 +128,7 @@ public void SearchAndTag() throws Exception {
129128
log.info("Queuing data for processing...");
130129
Processor processor = nuixCase.createProcessor();
131130
EvidenceContainer evidenceContainer = processor.newEvidenceContainer("SearchTestData");
132-
evidenceContainer.addFile(dataDirectory);
131+
evidenceContainer.addFile(textFilesDirectory);
133132
evidenceContainer.save();
134133

135134
// Periodically log progress
@@ -149,21 +148,24 @@ public void SearchAndTag() throws Exception {
149148
log.info("Processing completed");
150149

151150
log.info("Applying Tags...");
152-
for (TermCount termCount : termCounts) {
153-
String tag = "Terms|" + termCount.term;
154-
Set<Item> responsiveItems = nuixCase.searchUnsorted(termCount.term);
151+
for (Map.Entry<String, Long> termCount : termCounts.entrySet()) {
152+
String term = termCount.getKey();
153+
String tag = "Terms|" + term;
154+
Set<Item> responsiveItems = nuixCase.searchUnsorted(term);
155155
log.info(String.format("Tagging %s items with tag '%s'",
156156
responsiveItems.size(), tag));
157157
utilities.getBulkAnnotater().addTag(tag, responsiveItems);
158158
}
159159

160160
log.info("Validating tag counts...");
161-
for (TermCount termCount : termCounts) {
162-
String tag = "Terms|" + termCount.term;
161+
for (Map.Entry<String, Long> termCount : termCounts.entrySet()) {
162+
String term = termCount.getKey();
163+
Long count = termCount.getValue() + 1; // Add 1 for hit on term counts JSON
164+
String tag = "Terms|" + term;
163165
String query = "tag:\"" + tag + "\"";
164166
long hitCount = nuixCase.count(query);
165-
assertEquals(termCount.count, hitCount, String.format("For term %s, expect %s tagged items, but got %s",
166-
termCount.term, termCount.count, hitCount));
167+
assertEquals(count, hitCount, String.format("For term %s, expect %s tagged items, but got %s",
168+
term, count, hitCount));
167169
}
168170

169171
log.info("Closing case");
@@ -174,9 +176,8 @@ public void SearchAndTag() throws Exception {
174176
@Test
175177
public void CreateProductionSet() throws Exception {
176178
File caseDirectory = new File(testOutputDirectory, "CreateProductionSet_Case");
177-
File dataDirectory = new File(testOutputDirectory, "CreateProductionSet_Natives");
178-
179-
List<TermCount> termCounts = createSearchableTestData(dataDirectory, 5000);
179+
File textFilesDirectory = TestData.getTestDataTextFilesDirectory();
180+
Map<String, Long> termCounts = TestData.getTestDataTextFileTermCounts();
180181

181182
NuixEngine nuixEngine = constructNuixEngine();
182183
nuixEngine.run((utilities -> {
@@ -192,7 +193,7 @@ public void CreateProductionSet() throws Exception {
192193
log.info("Queuing data for processing...");
193194
Processor processor = nuixCase.createProcessor();
194195
EvidenceContainer evidenceContainer = processor.newEvidenceContainer("SearchTestData");
195-
evidenceContainer.addFile(dataDirectory);
196+
evidenceContainer.addFile(textFilesDirectory);
196197
evidenceContainer.save();
197198

198199
// Periodically log progress
@@ -242,11 +243,10 @@ public void CreateProductionSet() throws Exception {
242243
@Test
243244
public void Export() throws Exception {
244245
File caseDirectory = new File(testOutputDirectory, "ExportTest_Case");
245-
File dataDirectory = new File(testOutputDirectory, "ExportTest_Natives");
246+
File textFilesDirectory = TestData.getTestDataTextFilesDirectory();
247+
Map<String, Long> termCounts = TestData.getTestDataTextFileTermCounts();
246248
File exportDirectory = new File(testOutputDirectory, "ExportTest_Export");
247249

248-
List<TermCount> termCounts = createSearchableTestData(dataDirectory, 5000);
249-
250250
NuixEngine nuixEngine = constructNuixEngine();
251251
nuixEngine.run((utilities -> {
252252
// Create a new case
@@ -261,7 +261,7 @@ public void Export() throws Exception {
261261
log.info("Queuing data for processing...");
262262
Processor processor = nuixCase.createProcessor();
263263
EvidenceContainer evidenceContainer = processor.newEvidenceContainer("SearchTestData");
264-
evidenceContainer.addFile(dataDirectory);
264+
evidenceContainer.addFile(textFilesDirectory);
265265
evidenceContainer.save();
266266

267267
// Periodically log progress
@@ -339,7 +339,7 @@ public void Export() throws Exception {
339339
// ParallelProcessingConfigurable.setParallelProcessingSettings for list of settings and what they do.
340340
exporter.setParallelProcessingSettings(Map.of(
341341
"workerCount", utilities.getLicence().getWorkers(),
342-
"workerTemp", new File(testOutputDirectory,"WorkerTemp").getAbsolutePath()
342+
"workerTemp", new File(testOutputDirectory, "WorkerTemp").getAbsolutePath()
343343
));
344344

345345
// Track error count
@@ -348,6 +348,7 @@ public void Export() throws Exception {
348348
exporter.whenItemEventOccurs(new ItemEventCallback() {
349349
// Use this to track when we reported progress last
350350
long lastProgressMillis = System.currentTimeMillis();
351+
351352
@Override
352353
public void itemProcessed(ItemEventInfo info) {
353354
// Report progress if it has been at least 5 seconds (5000 milliseconds) since
Lines changed: 1 addition & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
import com.esotericsoftware.minlog.Log;
21
import com.nuix.enginebaseline.NuixEngine;
32
import com.nuix.enginebaseline.NuixLicenseResolver;
4-
import net.datafaker.Faker;
53
import org.apache.commons.io.FileUtils;
64
import org.apache.logging.log4j.LogManager;
75
import org.apache.logging.log4j.Logger;
@@ -12,7 +10,6 @@
1210
import java.io.IOException;
1311
import java.lang.management.ManagementFactory;
1412
import java.lang.management.RuntimeMXBean;
15-
import java.nio.charset.StandardCharsets;
1613
import java.util.*;
1714

1815
public class CommonTestFunctionality {
@@ -38,30 +35,16 @@ public TermCount(String term, long count) {
3835
// place inputs for tests.
3936
protected static File testDataDirectory;
4037

41-
// A seeded Random instance so that we get repeatable random results in a few places
42-
// where randomness is used, for example while generating test source data.
43-
protected static Random rand = new Random(1234567890);
44-
45-
// Instance of Faker library to assist with fake data generation for tests. We provide it
46-
// seeded Random instance so that when repeatedly running the same test or tests, the results will hopefully
47-
// be consistent between runs.
48-
protected static Faker faker = new Faker(rand);
49-
5038
// When true, the testOutputDirectory used during tests will be deleted
5139
// upon test completion. Set this to false if you wish to manually review the output
5240
// of tests afterwards.
5341
protected static boolean deleteTestOutputOnCompletion = true;
5442

55-
5643
@BeforeAll
5744
public static void setup() throws Exception {
45+
TestData.init();
5846
log = LogManager.getLogger("Tests");
59-
6047
testOutputDirectory = new File(System.getenv("TEST_OUTPUT_DIRECTORY"));
61-
testDataDirectory = new File(System.getenv("TEST_DATA_DIRECTORY"));
62-
log.info("TEST_OUTPUT_DIRECTORY: " + testOutputDirectory.getAbsolutePath());
63-
log.info("TEST_DATA_DIRECTORY: " + testDataDirectory.getAbsolutePath());
64-
6548
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
6649
if (deleteTestOutputOnCompletion) {
6750
try {
@@ -117,69 +100,4 @@ public NuixEngine constructNuixEngine(String... additionalRequiredFeatures) {
117100
.setEngineDistributionDirectoryFromEnvVar()
118101
.setLogDirectory(new File(testOutputDirectory, "Logs_"+System.currentTimeMillis()));
119102
}
120-
121-
/***
122-
* Creates a series of random text files at a specified location, returning details about the expected terms
123-
* and their counts so a test may later ingest and verify the counts.
124-
* @param outputDirectory Where the text files should be written to
125-
* @param itemsToGenerate The number of text files desired
126-
* @return A List of {@link TermCount} objects
127-
*/
128-
public List<TermCount> createSearchableTestData(File outputDirectory, int itemsToGenerate) {
129-
log.info(String.format("Generating %s random text files, for use as test data, to directory %s",
130-
itemsToGenerate, outputDirectory));
131-
Map<String, TermCount> overallTermCounts = new HashMap<>();
132-
133-
List<String> termPool = new ArrayList<>();
134-
135-
// Pre generate pool of 2000 terms
136-
for (int i = 0; i < 2000; i++) {
137-
String term = faker.text().text(4, 8);
138-
termPool.add(term);
139-
}
140-
141-
// Will hold terms to be written to each generated text file
142-
Set<String> textFileTerms = new HashSet<>();
143-
144-
// Iteratively generate test text files
145-
for (int i = 0; i < itemsToGenerate; i++) {
146-
if (i + 1 % 1000 == 0) {
147-
Log.info(String.format("Generated %s fake text files so far...", i + 1));
148-
}
149-
textFileTerms.clear();
150-
151-
// Randomly determine how many terms to write
152-
int targetTermCount = faker.random().nextInt(5, 10);
153-
154-
// Determine output file name
155-
File textFile = new File(outputDirectory, String.format("%08d.txt", i));
156-
157-
for (int t = 0; t < targetTermCount; t++) {
158-
// Grab term from pool
159-
String term = termPool.get(faker.random().nextInt(0, termPool.size() - 1));
160-
161-
// We will only write each term once per file for simplicity
162-
if (textFileTerms.contains(term)) {
163-
continue;
164-
} else {
165-
textFileTerms.add(term);
166-
}
167-
168-
// Track this term in overall counts for later verification
169-
if (!overallTermCounts.containsKey(term)) {
170-
overallTermCounts.put(term, new TermCount(term, 1));
171-
} else {
172-
overallTermCounts.get(term).count++;
173-
}
174-
}
175-
176-
try {
177-
FileUtils.writeStringToFile(textFile, String.join(" ", textFileTerms), StandardCharsets.UTF_8);
178-
} catch (IOException e) {
179-
throw new RuntimeException(e);
180-
}
181-
}
182-
183-
return new ArrayList<>(overallTermCounts.values());
184-
}
185103
}

IntelliJ/src/test/java/ExampleUsingText.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import java.io.File;
77
import java.io.IOException;
88
import java.io.Reader;
9-
import java.util.List;
109
import java.util.Map;
1110
import java.util.Set;
1211
import java.util.concurrent.atomic.AtomicLong;
@@ -21,9 +20,8 @@ public class ExampleUsingText extends CommonTestFunctionality {
2120
@Test
2221
public void ExampleUsingText() throws Exception {
2322
File caseDirectory = new File(testOutputDirectory, "ExampleUsingText_Case");
24-
File dataDirectory = new File(testOutputDirectory, "ExampleUsingText_Natives");
25-
26-
List<CommonTestFunctionality.TermCount> termCounts = createSearchableTestData(dataDirectory, 5000);
23+
File textFilesDirectory = TestData.getTestDataTextFilesDirectory();
24+
Map<String,Long> termCounts = TestData.getTestDataTextFileTermCounts();
2725

2826
NuixEngine nuixEngine = constructNuixEngine();
2927
nuixEngine.run((utilities -> {
@@ -39,7 +37,7 @@ public void ExampleUsingText() throws Exception {
3937
log.info("Queuing data for processing...");
4038
Processor processor = nuixCase.createProcessor();
4139
EvidenceContainer evidenceContainer = processor.newEvidenceContainer("SearchTestData");
42-
evidenceContainer.addFile(dataDirectory);
40+
evidenceContainer.addFile(textFilesDirectory);
4341
evidenceContainer.save();
4442

4543
// Periodically log progress

0 commit comments

Comments
 (0)