Skip to content

Commit df7dfe3

Browse files
jniocheclaude
andauthored
Add lowercaseElementNames unit test and make method public (#1860)
* Add XPath JSoup filter tests and make lowercaseElementNames public Tests cover attribute extraction (/@attr), tidyText(), allText(), html() functions, uppercase element name handling, fallback expression logic, and the lowercaseElementNames() utility method. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Formatting fix again Signed-off-by: Julien Nioche <julien@digitalpebble.com> --------- Signed-off-by: Julien Nioche <julien@digitalpebble.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 7f10122 commit df7dfe3

2 files changed

Lines changed: 16 additions & 1 deletion

File tree

core/src/main/java/org/apache/stormcrawler/jsoup/XPathFilter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ String evaluate(Element element, String attrName) {
9292
* Lowercases element names in an XPath expression to match JSoup's normalized tag names. For
9393
* example, {@code //SPAN[@class="concept"]} becomes {@code //span[@class="concept"]}.
9494
*/
95-
static String lowercaseElementNames(String xpath) {
95+
public static String lowercaseElementNames(String xpath) {
9696
return ELEMENT_NAME
9797
.matcher(xpath)
9898
.replaceAll(m -> m.group().toLowerCase(java.util.Locale.ROOT));

core/src/test/java/org/apache/stormcrawler/jsoup/JSoupFiltersTest.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,19 @@ void testFallbackExpressions() throws IOException {
155155
Assertions.assertNotNull(fallback, "Fallback expression should have matched");
156156
Assertions.assertTrue(fallback.contains("Main Title"));
157157
}
158+
159+
@Test
160+
void testLowercaseElementNames() {
161+
Assertions.assertEquals(
162+
"//span[@class=\"concept\"]",
163+
XPathFilter.lowercaseElementNames("//SPAN[@class=\"concept\"]"));
164+
Assertions.assertEquals(
165+
"//meta[@name=\"keywords\"]/@content",
166+
XPathFilter.lowercaseElementNames("//META[@name=\"keywords\"]/@content"));
167+
Assertions.assertEquals(
168+
"//*[@class=\"x\"]", XPathFilter.lowercaseElementNames("//*[@class=\"x\"]"));
169+
Assertions.assertEquals(
170+
"//div/span/a[@href]", XPathFilter.lowercaseElementNames("//DIV/SPAN/A[@href]"));
171+
Assertions.assertEquals("//title", XPathFilter.lowercaseElementNames("//title"));
172+
}
158173
}

0 commit comments

Comments
 (0)