Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ public class ShapefileHandler{
public static final List<String> SHAPEFILE_MANDATORY_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj");
public static final String SHP_XML_EXTENSION = "shp.xml";
public static final String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__";
public static final String SKIP_PREFIX_1 = "__";
public static final String SKIP_PREFIX_2 = "._";
public static final String SKIP_PREFIX_3 = "..";
public static final String SKIP_SUFFIX_1 = ".DS_Store";
public static final List<String> SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION);
private final File zipFile;
public boolean DEBUG = false;
Expand Down Expand Up @@ -282,14 +286,38 @@ private boolean unzipFilesToDirectory(ZipFile zipfileInput, File target_director
return false;
}

List<String> unzippedFileNames = new ArrayList<>();
String canonicalTargetDir;
try {
canonicalTargetDir = target_directory.getCanonicalPath();

if (!canonicalTargetDir.endsWith(File.separator)) {
canonicalTargetDir += File.separator;
}
} catch (IOException e) {
this.addErrorMessage("Failed to get canonical path for target directory: " + target_directory.getAbsolutePath());
return false;
}

List<String> unzippedFileNames = new ArrayList<>();


try {
for(var origEntry : Collections.list(zipfileInput.entries())){

String zentryFileName = origEntry.getName();
logger.fine("\nOriginal entry name: " + origEntry);

// validate zip entry:
try {
File targetFile = new File(target_directory, zentryFileName);
if (!targetFile.getCanonicalPath().startsWith(canonicalTargetDir)) {
logger.warning("Skipping invalid zip entry: " + zentryFileName);
continue;
}
} catch (IOException e) {
logger.warning("Failed to get canonical path for zip entry: " + zentryFileName);
continue;
}

if (this.isFileToSkip(zentryFileName)){
logger.fine("Skip file");
Expand Down Expand Up @@ -629,16 +657,28 @@ private boolean isFileToSkip(String fname){
return true;
}

if (fname.startsWith("__")){
// null bytes can be a problem:
if (fname.indexOf('\0') >= 0) {
return true;
}

if (fname.startsWith(SKIP_PREFIX_1)){
return true;
}

if (fname.startsWith(SKIP_PREFIX_2)){
return true;
}

if (fname.startsWith("._")){
// normalize, re-check:
String fnameNormalized = fname.replace('\\', File.separatorChar);
if (fnameNormalized.startsWith(SKIP_PREFIX_3 + File.separator)
|| fnameNormalized.contains(File.separator + SKIP_PREFIX_3 + File.separator)) {
return true;
}

File fnameFile = new File(fname);
if (fnameFile.getName().endsWith(".DS_Store")){
if (fnameFile.getName().endsWith(SKIP_SUFFIX_1)){
return true;
}
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
//import edu.harvard.iq.dataverse.util.ZipMaker;
import edu.harvard.iq.dataverse.util.ShapefileHandler;
import static edu.harvard.iq.dataverse.util.ShapefileHandler.SHP_XML_EXTENSION;
import static edu.harvard.iq.dataverse.util.ShapefileHandler.SKIP_PREFIX_1;
import static edu.harvard.iq.dataverse.util.ShapefileHandler.SKIP_PREFIX_2;
import static edu.harvard.iq.dataverse.util.ShapefileHandler.SKIP_PREFIX_3;
import static edu.harvard.iq.dataverse.util.ShapefileHandler.SKIP_SUFFIX_1;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -55,15 +59,6 @@ public void msgt(String s){
msg("------------------------------------------------------------");
}



private File createBlankFile(String filename) throws IOException {
if (filename == null){
return null;
}
return Files.createFile(tempFolder.resolve(filename)).toFile();
}

private FileInputStream createZipReturnFilestream(List<String> file_names, String zipfile_name) throws IOException{

File zip_file_obj = this.createAndZipFiles(file_names, zipfile_name);
Expand All @@ -88,30 +83,20 @@ private File createAndZipFiles(List<String> file_names, String zipfile_name) thr
return null;
}

// Create blank files based on a list of file names
//
Collection<File> fileCollection = new ArrayList<>();
for (String fname : file_names) {
File file_obj = this.createBlankFile(fname);
fileCollection.add(file_obj);
//msg("File created: " + file_obj.getName());
}

// Create blank zip entries based on a list of file names

Path zip_file_obj = this.tempFolder.resolve(zipfile_name);
try (ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zip_file_obj.toFile()))) {

// Iterate through File objects and add them to the ZipOutputStream
for (File file_obj : fileCollection) {
this.addToZipFile(file_obj.getName(), file_obj, zip_stream);
// Iterate through file names and add them to the ZipOutputStream
for (String fname : file_names) {
ZipEntry zipEntry = new ZipEntry(fname);
zip_stream.putNextEntry(zipEntry);
zip_stream.write(new byte[]{0});
zip_stream.closeEntry();
}
}
/* -----------------------------------
Cleanup: Delete single files that were added to .zip
----------------------------------- */
for (File file_obj : fileCollection) {
file_obj.delete();
}


return zip_file_obj.toFile();

} // end createAndZipFiles
Expand Down Expand Up @@ -236,12 +221,17 @@ public void testZippedTwoShapefiles() throws IOException{


@Test
// Testing the handling of a shape set with "extra files" - files that that
// do not belong to any of the shape sets inside. Generally Dataverse will
// unzip and add them as standalone files. Some files however should be
// skipped, such as files with certain system prefixes.
public void testZippedShapefileWithExtraFiles() throws IOException{
msgt("(3) testZippedShapefileWithExtraFiles");

// Create files and put them in a .zip
List<String> file_names = Arrays.asList("shape1.shp", "shape1.shx", "shape1.dbf", "shape1.prj", "shape1.pdf", "shape1.cpg", "shape1." + SHP_XML_EXTENSION, "README.md", "shape_notes.txt");
File zipfile_obj = createAndZipFiles(file_names, "shape-plus.zip");
List<String> file_names = Arrays.asList("shape1.shp", "shape1.shx", "shape1.dbf", "shape1.prj", "shape1.pdf", "shape1.cpg", "shape1." + SHP_XML_EXTENSION, "README.md", "shape_notes.txt",
SKIP_PREFIX_1 + "extra", SKIP_PREFIX_2 + "extra", SKIP_PREFIX_3 + "/extra.txt", "extra" + SKIP_SUFFIX_1);
File zipfile_obj = createAndZipFiles(file_names, "shape-plus.zip");

// Pass the .zip to the ShapefileHandler
ShapefileHandler shp_handler = new ShapefileHandler(zipfile_obj);
Expand Down Expand Up @@ -273,6 +263,8 @@ public void testZippedShapefileWithExtraFiles() throws IOException{
rezipped_filenames.addAll(Arrays.asList(unzip2Folder.list()));

msg("rezipped_filenames: " + rezipped_filenames);
// It is just as important to check that the expected files have been processed,
// as it is to confirm that the unwanted files have been skipped:
List<String> expected_filenames = Arrays.asList("shape1.zip", "scratch-for-unzip-12345", "shape1.pdf", "README.md", "shape_notes.txt");

assertTrue(expected_filenames.containsAll(rezipped_filenames), "verify that all files exist");
Expand Down
Loading