Skip to content

Commit 5796c41

Browse files
committed
Added script to parse a DID for a dataset and produce a txt file containing all file paths for files in the dataset.
1 parent b461105 commit 5796c41

1 file changed

Lines changed: 57 additions & 0 deletions

File tree

files/DIDlist_Parse.sh

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#! /bin/bash
2+
3+
# If no arguments or more than one given, complain
4+
if [ "$#" -ne 1 ]; then
5+
echo ""
6+
echo "!!! ERROR !!! - Expected 1 argument - !!! ERROR !!!"
7+
echo "Expect - DID dataset to parse"
8+
echo "!!! ERROR !!! - Expected 1 argument - !!! ERROR !!!"
9+
echo ""
10+
exit 0
11+
fi
12+
13+
# Set variables equal to arguments provided
14+
DID=$1
15+
Scope="epic" # Scope will always be epic
16+
17+
# Check DID is singular, do not allow wildcards
18+
if [[ $DID =~ \* ]]; then
19+
echo "Provided DID string includes a wild card. Please provide a singular DID dataset path only!"
20+
exit 1
21+
fi
22+
# Check DID is valid
23+
DID_Check=$(rucio did list --short ${Scope}:${DID})
24+
if [[ $DID_Check == "" ]]; then
25+
echo "DID provided is blank! Please provide the DID for a dataset!"
26+
exit 2
27+
fi
28+
touch "DIDlist_Parse_tmp"
29+
rucio did list ${Scope}:${DID} > DIDlist_Parse_tmp
30+
# Check DID is a dataset
31+
if ! grep -q DATASET DIDlist_Parse_tmp; then
32+
echo "DID is not a dataset! Please provide the DID for a dataset!"
33+
exit 3
34+
fi
35+
36+
# Checks passed, now process the file
37+
rucio did content list --short ${Scope}:${DID} > DIDlist_Parse_tmp # Dump list of files in dataset to a file
38+
touch "DID_Pathlist" # Open the file which will contain the path lst
39+
NLines=$(wc --lines < tmp) # Check number of files to process in total
40+
NLinesRed=$(( ((${NLines%.*}+5)/10)*10 )) # Round to nearest value of 10 to make the progress counter simpler
41+
42+
echo "Processing ${NLines} files in dataset ${Scope}:${DID}$ and creating file with list of paths."
43+
i=0
44+
while IFS='' read -r line || [[ -n "$line" ]]; do
45+
rucio replica list file --protocols root --pfns --rses isopenaccess $line >> DID_Pathlist
46+
i=$(( $i + 1 ))
47+
if [[ $(( $i % $(( ${NLinesRed}/10 )) )) == 0 ]]; then # Go to the Microsoft school of progress tracking (aka, lying) and calculate progress
48+
Prog=$(printf %.0f "$((10**2 * ${i}/${NLinesRed}))e-0")
49+
echo "${Prog} % of file list processed"
50+
fi
51+
done < "DIDlist_Parse_tmp"
52+
echo "Parsed provided DID - $DID"
53+
echo "All files locations within this dataset have been printed to DID_Pathlist" # Need to automate this in future such that the file is named sensibly
54+
55+
rm DIDlist_Parse_tmp # Delete the tmp file
56+
57+
exit 4

0 commit comments

Comments
 (0)