Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/release-notes/12454-S3Fixes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
### Support for Backblaze B2 as an S3 store, improved support for storJ

An improvement to the .disable-tagging=true support for S3 stores now allows use of BackBlaze B2 as an S3 implementation (and may help other stores that do not handle tagging).

The /api/datasets/<id>/cleanStorage endpoint will now work for datasets with more than 1000 files when storJ is used as the S3 store.
6 changes: 6 additions & 0 deletions doc/sphinx-guides/source/installation/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,12 @@ You may provide the values for these via any `supported MicroProfile Config API
Reported Working S3-Compatible Storage
######################################


`BackBlaze B2 <https://www.backblaze.com/cloud-storage>`_
(as of 6/11/2026)
Set ``dataverse.files.<id>.disable-tagging=true``, as B2 does not support tagging (and will fail without this setting).
Tested with ``.path-style-access=true``, ``.download-redirect=true``, and ``.upload-redirect=true``.

`Minio v2018-09-12 <https://minio.io>`_
Set ``dataverse.files.<id>.path-style-access=true``, as Minio works path-based. Works pretty smooth, easy to setup.
**Can be used for quick testing, too:** just use the example values above. Uses the public (read: unsecure and
Expand Down
18 changes: 6 additions & 12 deletions src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,23 +77,17 @@ public void copyFromS3(Dataset dataset, String s3ImportPath) throws IOException
ListObjectsV2Request listReq = ListObjectsV2Request.builder()
.bucket(dcmBucketName)
.prefix(dcmDatasetKey)
.maxKeys(1000)
.build();

ListObjectsV2Response listRes;
List<S3Object> storedDcmDatasetFilesSummary = new ArrayList<>();
try {
listRes = s3.listObjectsV2(listReq);
s3.listObjectsV2Paginator(listReq).stream()
.flatMap(r -> r.contents().stream())
.forEach(storedDcmDatasetFilesSummary::add);
} catch (S3Exception se) {
logger.info("Caught an S3Exception in s3ImportUtil: " + se.getMessage());
throw new IOException("S3 listAuxObjects: failed to get a listing for " + dcmDatasetKey);
}

List<S3Object> storedDcmDatasetFilesSummary = new ArrayList<>(listRes.contents());

while (listRes.isTruncated()) {
logger.fine("S3 listAuxObjects: going to next page of list");
listReq = listReq.toBuilder().continuationToken(listRes.nextContinuationToken()).build();
listRes = s3.listObjectsV2(listReq);
storedDcmDatasetFilesSummary.addAll(listRes.contents());
throw new IOException("S3 listObjects: failed to get a listing for " + dcmDatasetKey);
}

for (S3Object item : storedDcmDatasetFilesSummary) {
Expand Down
150 changes: 51 additions & 99 deletions src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -696,56 +696,53 @@ private File createTempFile(Path path, InputStream inputStream) throws IOExcepti
return targetFile;
}

@Override
public List<String> listAuxObjects() throws IOException {
if (!this.canWrite()) {
open();
}
String prefix = getDestinationKey("");

List<String> ret = new ArrayList<>();
ListObjectsV2Request listObjectsReqManual = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix)
private List<S3Object> listObjects(String prefix, String methodName) throws IOException {
List<S3Object> objects = new ArrayList<>();
ListObjectsV2Request listRequest = ListObjectsV2Request.builder()
.bucket(bucketName)
.prefix(prefix)
.maxKeys(1000) // Required for storJ
.build();

ListObjectsV2Response listObjectsResponse = null;
try {
listObjectsResponse = s3.listObjectsV2(listObjectsReqManual).get();
} catch (InterruptedException | ExecutionException e) {
throw new IOException("S3 listAuxObjects: failed to get a listing for " + prefix, e);
}

if (listObjectsResponse == null) {
return ret;
}

List<S3Object> storedAuxFilesSummary = new ArrayList<>(listObjectsResponse.contents());

try {
String nextContinuationToken = listObjectsResponse.nextContinuationToken();
while (nextContinuationToken != null) {
logger.fine("S3 listAuxObjects: going to next page of list");
ListObjectsV2Request nextReq = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix)
.continuationToken(nextContinuationToken).build();

ListObjectsV2Response nextResponse = s3.listObjectsV2(nextReq).get();
if (nextResponse != null) {
storedAuxFilesSummary.addAll(nextResponse.contents());
nextContinuationToken = nextResponse.nextContinuationToken();
} else {
nextContinuationToken = null;
ListObjectsV2Response listResponse;
String nextToken = null;
do {
ListObjectsV2Request.Builder reqBuilder = listRequest.toBuilder();
if (nextToken != null) {
reqBuilder = reqBuilder.continuationToken(nextToken);
}
}
ListObjectsV2Request req = reqBuilder.build();
listResponse = s3.listObjectsV2(req).get();
objects.addAll(listResponse.contents());
nextToken = listResponse.nextContinuationToken();
if (listResponse.isTruncated() && nextToken == null) {
logger.warning("S3 " + methodName + ": list is truncated but nextContinuationToken is null; stopping to avoid infinite loop");
break;
}
} while (listResponse.isTruncated());
} catch (InterruptedException | ExecutionException e) {
throw new IOException("S3AccessIO: Failed to get aux objects for listing.", e);
throw new IOException("S3AccessIO: Failed to get objects for listing in " + methodName + ".", e);
}
return objects;
}

for (S3Object item : storedAuxFilesSummary) {
String destinationKey = item.key();
String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1);
logger.fine("S3 cached aux object fileName: " + fileName);
ret.add(fileName);
@Override
public List<String> listAuxObjects() throws IOException {
if (!this.canWrite()) {
open();
}
return ret;
String prefix = getDestinationKey("");
List<S3Object> contents = listObjects(prefix, "listAuxObjects");

return contents.stream()
.map(item -> {
String destinationKey = item.key();
String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1);
logger.fine("S3 cached aux object fileName: " + fileName);
return fileName;
})
.collect(Collectors.toList());
}

@Override
Expand Down Expand Up @@ -773,22 +770,7 @@ public void deleteAllAuxObjects() throws IOException {
}

String prefix = getDestinationKey("");

List<S3Object> storedAuxFilesSummary = new ArrayList<>();
try {
ListObjectsV2Request listRequest = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix).build();

ListObjectsV2Response listResponse;
do {
listResponse = s3.listObjectsV2(listRequest).get();
storedAuxFilesSummary.addAll(listResponse.contents());

listRequest = listRequest.toBuilder().continuationToken(listResponse.nextContinuationToken()).build();
} while (listResponse.isTruncated());

} catch (InterruptedException | ExecutionException e) {
throw new IOException("S3AccessIO: Failed to get aux objects for listing to delete.", e);
}
List<S3Object> storedAuxFilesSummary = listObjects(prefix, "deleteAllAuxObjects");

if (storedAuxFilesSummary.isEmpty()) {
logger.fine("S3AccessIO: No auxiliary objects to delete.");
Expand Down Expand Up @@ -986,7 +968,7 @@ public boolean downloadRedirectEnabled(String auxObjectTag) {
* @param auxiliaryFileName (optional) - file name, if different from the main
* file label.
* @return redirect url
* @throws IOException.
* @throws IOException
*/
public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName)
throws IOException {
Expand Down Expand Up @@ -1314,6 +1296,12 @@ private static AwsCredentialsProvider getCredentialsProvider(String driverId) {
}

public void removeTempTag() throws IOException {
final boolean taggingDisabled = JvmSettings.DISABLE_S3_TAGGING.lookupOptional(Boolean.class, this.driverId)
.orElse(false);
if (taggingDisabled) {
logger.fine("S3 tagging disabled for storage driver " + driverId + "; skipping temp tag removal.");
return;
}
if (!(dvObject instanceof DataFile)) {
logger.warning("Attempt to remove tag from non-file DVObject id: " + dvObject.getId());
throw new IOException("Attempt to remove temp tag from non-file S3 Object");
Expand Down Expand Up @@ -1453,47 +1441,11 @@ private List<String> listAllFiles() throws IOException {
}
String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/";

List<String> ret = new ArrayList<>();
ListObjectsV2Request listObjectsReqManual = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix)
.build();

ListObjectsV2Response listObjectsResponse = null;
try {
listObjectsResponse = s3.listObjectsV2(listObjectsReqManual).get();
} catch (InterruptedException | ExecutionException e) {
throw new IOException("S3 listObjects: failed to get a listing for " + prefix, e);
}

if (listObjectsResponse == null) {
return ret;
}

List<S3Object> storedFilesSummary = new ArrayList<>(listObjectsResponse.contents());

try {
String nextContinuationToken = listObjectsResponse.nextContinuationToken();
while (nextContinuationToken != null) {
logger.fine("S3 listObjects: going to next page of list");
ListObjectsV2Request nextReq = ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix)
.continuationToken(nextContinuationToken).build();

ListObjectsV2Response nextResponse = s3.listObjectsV2(nextReq).get();
if (nextResponse != null) {
storedFilesSummary.addAll(nextResponse.contents());
nextContinuationToken = nextResponse.nextContinuationToken();
} else {
nextContinuationToken = null;
}
}
} catch (InterruptedException | ExecutionException e) {
throw new IOException("S3AccessIO: Failed to get objects for listing.", e);
}
List<S3Object> contents = listObjects(prefix, "listAllFiles");

for (S3Object item : storedFilesSummary) {
String fileName = item.key().substring(prefix.length());
ret.add(fileName);
}
return ret;
return contents.stream()
.map(item -> item.key().substring(prefix.length()))
.collect(Collectors.toList());
}

private void deleteFile(String fileName) throws IOException {
Expand Down