Skip to content

Commit 03d9cb3

Browse files
author
eisenbahnplatte
committed
updated Collection Download
1 parent af59f46 commit 03d9cb3

4 files changed

Lines changed: 44 additions & 18 deletions

File tree

src/main/scala/org/dbpedia/databus/client/filehandling/SourceHandler.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import org.apache.http.client.ResponseHandler
66
import org.apache.http.client.methods.HttpGet
77
import org.apache.http.impl.client.{BasicResponseHandler, HttpClientBuilder}
88
import org.dbpedia.databus.client.filehandling.download.Downloader
9+
import org.dbpedia.databus.client.sparql.QueryHandler
10+
import org.dbpedia.databus.client.sparql.queries.DatabusQueries
911
import org.slf4j.LoggerFactory
1012

1113

@@ -53,13 +55,16 @@ object SourceHandler {
5355
*/
5456
def handleQuery(query: String, target: File, cache: File, format: String, compression: String, overwrite: Boolean=false):Unit = {
5557

56-
val queryStr = {
58+
var queryStr = {
5759
if (isCollection(query)) getQueryOfCollection(query)
5860
else query
5961
}
6062

6163
printTask("query", queryStr, target.pathAsString)
6264

65+
//necessary due collection queries query the permament DBpedia URIs not the actual download links
66+
if(isCollection(query)) queryStr = DatabusQueries.queryDownloadURLOfDatabusFiles(QueryHandler.executeDownloadQuery(queryStr))
67+
6368
println("DOWNLOAD TOOL:")
6469

6570
val allSHAs = Downloader.downloadWithQuery(queryStr, cache, overwrite)

src/main/scala/org/dbpedia/databus/client/filehandling/download/Downloader.scala

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,24 @@ object Downloader {
2929
results.foreach(fileIRI => {
3030
val fileSHA = QueryHandler.getSHA256Sum(fileIRI)
3131

32-
if (overwrite) {
33-
downloadFile(fileIRI, fileSHA, targetdir) match {
34-
case Some(file: File) => allSHAs = allSHAs :+ fileSHA
35-
case None => ""
36-
}
37-
}
38-
else {
39-
if (!FileUtil.checkIfFileInCache(targetdir, fileSHA)) {
32+
if (fileSHA != "") {
33+
if (overwrite) {
4034
downloadFile(fileIRI, fileSHA, targetdir) match {
41-
case Some(file: File) => allSHAs = allSHAs :+ fileSHA
42-
case None => ""
35+
case Some(file: File) => allSHAs = allSHAs :+ fileSHA
36+
case None => ""
4337
}
4438
}
4539
else {
46-
println(s"$fileIRI --> already exists in Cache")
47-
allSHAs = allSHAs :+ fileSHA
40+
if (!FileUtil.checkIfFileInCache(targetdir, fileSHA)) {
41+
downloadFile(fileIRI, fileSHA, targetdir) match {
42+
case Some(file: File) => allSHAs = allSHAs :+ fileSHA
43+
case None => ""
44+
}
45+
}
46+
else {
47+
println(s"$fileIRI --> already exists in Cache")
48+
allSHAs = allSHAs :+ fileSHA
49+
}
4850
}
4951
}
5052
})
@@ -80,7 +82,7 @@ object Downloader {
8082
if (!correctFileTransfer) {
8183
println("file download had issues")
8284
LoggerFactory.getLogger("Download-Logger").error(s"couldn't download file $url properly")
83-
file.delete(true)
85+
file.delete(swallowIOExceptions = true)
8486
return None
8587
}
8688

src/main/scala/org/dbpedia/databus/client/sparql/QueryHandler.scala

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,19 @@ package org.dbpedia.databus.client.sparql
22

33
import java.io.FileNotFoundException
44
import java.net.URL
5-
65
import better.files.File
76
import org.apache.commons.io.FileUtils
87
import org.apache.jena.JenaRuntime
98
import org.apache.jena.query._
109
import org.apache.jena.rdf.model.{Model, ModelFactory}
1110
import org.apache.jena.riot.{RDFDataMgr, RDFLanguages}
1211
import org.dbpedia.databus.client.sparql.queries.{DataIdQueries, DatabusQueries, MappingQueries}
13-
import org.slf4j.LoggerFactory
12+
import org.slf4j.{Logger, LoggerFactory}
1413

1514
object QueryHandler {
1615

1716
val service = "https://databus.dbpedia.org/repo/sparql"
17+
val logger: Logger = LoggerFactory.getLogger(getClass)
1818

1919
def executeQuery(queryString: String, model:Model = ModelFactory.createDefaultModel()): Seq[QuerySolution] = {
2020

@@ -54,9 +54,15 @@ object QueryHandler {
5454
def getSHA256Sum(url: String): String = {
5555

5656
val results = executeQuery(DatabusQueries.querySha256(url))
57-
val sparqlVar = results.head.varNames().next()
5857

59-
results.head.getLiteral(sparqlVar).getString
58+
try{
59+
val sparqlVar = results.head.varNames().next()
60+
results.head.getLiteral(sparqlVar).getString
61+
} catch {
62+
case noSuchElementException: NoSuchElementException =>
63+
logger.error(s"No Sha Sum found for $url")
64+
""
65+
}
6066

6167
}
6268

src/main/scala/org/dbpedia/databus/client/sparql/queries/DatabusQueries.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,17 @@ object DatabusQueries {
5353
| ?mapping <http://tmp-namespace.org/databusFixRequired> ?file .
5454
|}
5555
|""".stripMargin
56+
57+
def queryDownloadURLOfDatabusFiles(files: Seq[String]): String = {
58+
val databusFilesString = files.mkString("(<",">) (<",">)")
59+
s"""
60+
|PREFIX dcat: <http://www.w3.org/ns/dcat#>
61+
|
62+
|SELECT DISTINCT ?file WHERE {
63+
| VALUES (?databusfile) {$databusFilesString}
64+
| ?distribution ?o ?databusfile .
65+
| ?distribution dcat:downloadURL ?file .
66+
|}
67+
|""".stripMargin
68+
}
5669
}

0 commit comments

Comments
 (0)