Skip to content

Commit 53862ec

Browse files
committed
add gzip support for in/out of ExpandBED
Add support for gzipped input BED files for the ExpandBED script. #91 - Decorated classes with JavaDocs tags - Reorganized argument validation in ExpandBEDCLI to both support -z flag and clarify output filename logic - Update BufferedReader and output stream objects to detect and support Gzipped files - add input validation logic to script - Update ExpandBED window with an "Output GZIP" checkbox that is added to the script call - update file selector to include gzipped file extensions - change "Convert" to "Execute" buttons - Reorganize default output filepath determination logic
1 parent e5a07fc commit 53862ec

3 files changed

Lines changed: 117 additions & 54 deletions

File tree

src/cli/Coordinate_Manipulation/BED_Manipulation/ExpandBEDCLI.java

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ public class ExpandBEDCLI implements Callable<Integer> {
3232
private File output = null;
3333
@Option(names = {"-s", "--stdout"}, description = "output bed to STDOUT")
3434
private boolean stdout = false;
35+
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
36+
private boolean gzOutput = false;
3537

3638
@ArgGroup(validate = false, heading = "%nType of Expansion%n")
3739
ExpandType expandType = new ExpandType();
@@ -55,25 +57,22 @@ public Integer call() throws Exception {
5557
System.exit(1);
5658
}
5759

58-
ExpandBED.expandBEDBorders(output, bedFile, SIZE, byCenter);
60+
ExpandBED.expandBEDBorders(output, bedFile, SIZE, byCenter, gzOutput);
5961

6062
System.err.println("Expansion Complete");
6163
return(0);
6264
}
6365

6466
private String validateInput() throws IOException {
6567
String r = "";
66-
68+
6769
//check inputs exist
6870
if(!bedFile.exists()){
6971
r += "(!)BED file does not exist: " + bedFile.getName() + "\n";
7072
return(r);
7173
}
72-
//check input extensions
73-
if(!"bed".equals(ExtensionFileFilter.getExtension(bedFile))){
74-
r += "(!)Is this a BED file? Check extension: " + bedFile.getName() + "\n";
75-
}
76-
74+
if(!"".equals(r)){ return(r); }
75+
7776
// Define default behavior
7877
if(expandType.center==-999 && expandType.border==-999){
7978
SIZE = 250;
@@ -91,27 +90,28 @@ private String validateInput() throws IOException {
9190
if(SIZE<=0){
9291
r += "(!) Invalid size input. Must be a positive integer greater than 0.";
9392
}
94-
93+
94+
//check stdout and gzip not both selected
95+
if (stdout && gzOutput) {
96+
r += "(!) Cannot use -s flag with -z.\n";
97+
}
9598
//set default output filename
96-
if(output==null && !stdout){
97-
if(byCenter){ output = new File(ExtensionFileFilter.stripExtension(bedFile) + "_" + Integer.toString(SIZE) + "bp.bed"); }
98-
else{ output = new File(ExtensionFileFilter.stripExtension(bedFile) + "_border_" + Integer.toString(SIZE) + "bp.bed"); }
99-
//check stdout and output not both selected
100-
}else if(stdout){
101-
if(output!=null){ r += "(!)Cannot use -s flag with -o.\n"; }
102-
//check output filename is valid
103-
}else{
104-
//check ext
105-
try{
106-
if(!"bed".equals(ExtensionFileFilter.getExtension(output))){
107-
r += "(!)Use BED extension for output filename. Try: " + ExtensionFileFilter.stripExtension(output) + ".bed\n";
108-
}
109-
} catch( NullPointerException e){ r += "(!)Output filename must have extension: use BED extension for output filename. Try: " + output + ".bed\n"; }
99+
if (output == null) {
100+
if (!stdout) {
101+
String NAME = ExtensionFileFilter.stripExtension(bedFile);
102+
NAME += byCenter ? "_" + Integer.toString(SIZE) + "bp.bed" : "_border_" + Integer.toString(SIZE) + "bp.bed";
103+
NAME += gzOutput ? ".gz" : "";
104+
output = new File(NAME);
105+
//check stdout and output not both selected
106+
} else {
107+
r += "(!) Cannot use -s flag with -o.\n";
108+
}
109+
} else {
110110
//check directory
111-
if(output.getParent()==null){
112-
// System.err.println("default to current directory");
113-
} else if(!new File(output.getParent()).exists()){
114-
r += "(!)Check output directory exists: " + output.getParent() + "\n";
111+
if (output.getParent() != null) {
112+
if (!new File(output.getParent()).exists()) {
113+
r += "(!) Check output directory exists: " + output.getParent() + "\n";
114+
}
115115
}
116116
}
117117

src/scripts/Coordinate_Manipulation/BED_Manipulation/ExpandBED.java

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,57 @@
11
package scripts.Coordinate_Manipulation.BED_Manipulation;
22

3+
import java.io.BufferedOutputStream;
4+
import java.io.BufferedReader;
35
import java.io.File;
6+
import java.io.FileInputStream;
7+
import java.io.FileOutputStream;
48
import java.io.IOException;
9+
import java.io.InputStreamReader;
510
import java.io.PrintStream;
611
import java.util.Arrays;
7-
import java.util.Scanner;
12+
import java.util.zip.GZIPInputStream;
13+
import java.util.zip.GZIPOutputStream;
814

9-
public class ExpandBED {
10-
public static void expandBEDBorders(File out_filepath, File input, int SIZE, boolean ExCenter) throws IOException {
15+
import util.GZipUtilities;
1116

12-
Scanner scan = new Scanner(input);
17+
/**
18+
* Class that contains method for expanding (BED) coordinate intervals from the center/border by a user-defined direction and distance.
19+
*
20+
* @author William KM Lai
21+
*
22+
*/
23+
public class ExpandBED {
24+
/**
25+
* Self-contained method for expanding the BED-formatted intervals in a BED file by user-specified distance and strategy. This method accounts for even-sized BED interval expansion in midpoint calculations by using the strand-aware downstream nucleotide between the two center nucleotides.
26+
*
27+
* @param out_filepath Filepath to save expanded BED-formatted files. If null, outputs to STDOUT.
28+
* @param input Filepath to starting BED-formatted coordinates we want to shift. Supports automatic detection and handling of GZipped BED-formatted files. Must have at least 3 tab-delimited columns per BED specifications.
29+
* @param SIZE Integer value indicating number of nucleotides to expand by (must be a positive integer).
30+
* @param ExCenter Specifies expansion strategy: if true, size expansion will be performed from the midpoint of each BED interval, if false, size expansion will be performed from the border/edges of the BED intervals (default=true).
31+
* @param gzOutput If this is true, the output file will be gzipped.
32+
* @throws IOException
33+
*/
34+
public static void expandBEDBorders(File out_filepath, File input, int SIZE, boolean ExCenter, boolean gzOutput ) throws IOException {
35+
// Initialize output writer
1336
PrintStream OUT = System.out;
14-
if (out_filepath != null)
15-
OUT = new PrintStream(out_filepath);
16-
17-
while (scan.hasNextLine()) {
18-
String[] temp = scan.nextLine().split("\t");
37+
if (out_filepath != null) {
38+
if (gzOutput) {
39+
OUT = new PrintStream(new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(out_filepath))));
40+
} else {
41+
OUT = new PrintStream(new BufferedOutputStream(new FileOutputStream(out_filepath)));
42+
}
43+
}
44+
// Check if file is gzipped and instantiate appropriate BufferedReader
45+
BufferedReader br;
46+
if(GZipUtilities.isGZipped(input)) {
47+
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(input)), "UTF-8"));
48+
} else {
49+
br = new BufferedReader(new InputStreamReader(new FileInputStream(input), "UTF-8"));
50+
}
51+
// Initialize line variable to loop through
52+
String line = br.readLine();
53+
while (line != null) {
54+
String[] temp = line.split("\t");
1955
if (temp[0].toLowerCase().contains("track") || temp[0].startsWith("#")) {
2056
OUT.println(String.join("\t", temp));
2157
} else {
@@ -41,12 +77,15 @@ public static void expandBEDBorders(File out_filepath, File input, int SIZE, boo
4177
}
4278
OUT.println();
4379
} else {
44-
System.out.println("Invalid Coordinate in File!!!\n" + Arrays.toString(temp));
80+
System.out.println("Invalid Coordinate in File!!! (coordinate must be >= 0)\n" + Arrays.toString(temp));
4581
}
82+
} else {
83+
System.out.println("Invalid Coordinate in File!!! (must have at least 3 columns)\n" + Arrays.toString(temp));
4684
}
4785
}
86+
line = br.readLine();
4887
}
49-
scan.close();
88+
br.close();
5089
OUT.close();
5190
}
5291
}

src/window_interface/Coordinate_Manipulation/BED_Manipulation/ExpandBEDWindow.java

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import javax.swing.ButtonGroup;
1212
import javax.swing.DefaultListModel;
1313
import javax.swing.JButton;
14+
import javax.swing.JCheckBox;
1415
import javax.swing.JOptionPane;
1516
import javax.swing.ListSelectionModel;
1617
import javax.swing.SpringLayout;
@@ -34,8 +35,14 @@
3435
import java.beans.PropertyChangeListener;
3536

3637
import scripts.Coordinate_Manipulation.BED_Manipulation.ExpandBED;
38+
import util.ExtensionFileFilter;
3739
import util.FileSelection;
38-
40+
/**
41+
* Graphical interface window for the size expansion of BED coordinate interval files.
42+
*
43+
* @author William KM Lai
44+
*
45+
*/
3946
@SuppressWarnings("serial")
4047
public class ExpandBEDWindow extends JFrame implements ActionListener, PropertyChangeListener {
4148
private JPanel contentPane;
@@ -49,7 +56,7 @@ public class ExpandBEDWindow extends JFrame implements ActionListener, PropertyC
4956

5057
private JButton btnLoad;
5158
private JButton btnRemoveBED;
52-
private JButton btnConvert;
59+
private JButton btnExecute;
5360

5461
public Task task;
5562
private JLabel lblCurrent;
@@ -59,6 +66,7 @@ public class ExpandBEDWindow extends JFrame implements ActionListener, PropertyC
5966

6067
private static JRadioButton rdbtnExpandFromCenter;
6168
private static JRadioButton rdbtnAddToBorder;
69+
private static JCheckBox chckbxGzipOutput;
6270

6371
class Task extends SwingWorker<Void, Void> {
6472
@Override
@@ -70,16 +78,24 @@ public Void doInBackground() throws IOException {
7078
} else {
7179
setProgress(0);
7280
for (int x = 0; x < BEDFiles.size(); x++) {
81+
// Save current BED to temp variable
7382
File XBED = BEDFiles.get(x);
74-
// Set outfilepath
75-
String OUTPUT = (XBED.getName()).substring(0, XBED.getName().length() - 4) + "_"
76-
+ Integer.toString(SIZE) + "bp.bed";
83+
System.out.println("Input: " + XBED.getName());
84+
// Set output filepath with name and output directory
85+
String OUTPUT = ExtensionFileFilter.stripExtension(XBED);
7786
if (OUT_DIR != null) {
7887
OUTPUT = OUT_DIR + File.separator + OUTPUT;
7988
}
89+
// Strip second extension if input has ".gz" first extension
90+
if (XBED.getName().endsWith(".bed.gz")) {
91+
OUTPUT = ExtensionFileFilter.stripExtensionPath(new File(OUTPUT)) ;
92+
}
93+
// Add suffix
94+
OUTPUT += "_" + Integer.toString(SIZE) + "bp.bed";
95+
OUTPUT += chckbxGzipOutput.isSelected() ? ".gz" : "";
8096

8197
// Execute expansion and update progress
82-
ExpandBED.expandBEDBorders(new File(OUTPUT), XBED, SIZE, rdbtnExpandFromCenter.isSelected());
98+
ExpandBED.expandBEDBorders(new File(OUTPUT), XBED, SIZE, rdbtnExpandFromCenter.isSelected(), chckbxGzipOutput.isSelected());
8399
int percentComplete = (int) (((double) (x + 1) / BEDFiles.size()) * 100);
84100
setProgress(percentComplete);
85101
}
@@ -98,6 +114,9 @@ public void done() {
98114
}
99115
}
100116

117+
/**
118+
* Instantiate window with graphical interface design.
119+
*/
101120
public ExpandBEDWindow() {
102121
setTitle("Expand BED File");
103122
setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
@@ -125,7 +144,7 @@ public ExpandBEDWindow() {
125144
sl_contentPane.putConstraint(SpringLayout.SOUTH, btnLoad, -6, SpringLayout.NORTH, scrollPane);
126145
btnLoad.addActionListener(new ActionListener() {
127146
public void actionPerformed(ActionEvent e) {
128-
File[] newBEDFiles = FileSelection.getFiles(fc, "bed");
147+
File[] newBEDFiles = FileSelection.getFiles(fc, "bed", true);
129148
if (newBEDFiles != null) {
130149
for (int x = 0; x < newBEDFiles.length; x++) {
131150
BEDFiles.add(newBEDFiles[x]);
@@ -149,19 +168,19 @@ public void actionPerformed(ActionEvent arg0) {
149168
});
150169
contentPane.add(btnRemoveBED);
151170

152-
btnConvert = new JButton("Convert");
153-
sl_contentPane.putConstraint(SpringLayout.WEST, btnConvert, 167, SpringLayout.WEST, contentPane);
154-
sl_contentPane.putConstraint(SpringLayout.SOUTH, btnConvert, 0, SpringLayout.SOUTH, contentPane);
155-
sl_contentPane.putConstraint(SpringLayout.EAST, btnConvert, -175, SpringLayout.EAST, contentPane);
156-
contentPane.add(btnConvert);
171+
btnExecute = new JButton("Expand");
172+
sl_contentPane.putConstraint(SpringLayout.WEST, btnExecute, 167, SpringLayout.WEST, contentPane);
173+
sl_contentPane.putConstraint(SpringLayout.SOUTH, btnExecute, 0, SpringLayout.SOUTH, contentPane);
174+
sl_contentPane.putConstraint(SpringLayout.EAST, btnExecute, -175, SpringLayout.EAST, contentPane);
175+
contentPane.add(btnExecute);
157176

158177
progressBar = new JProgressBar();
159-
sl_contentPane.putConstraint(SpringLayout.NORTH, progressBar, 3, SpringLayout.NORTH, btnConvert);
178+
sl_contentPane.putConstraint(SpringLayout.NORTH, progressBar, 3, SpringLayout.NORTH, btnExecute);
160179
sl_contentPane.putConstraint(SpringLayout.EAST, progressBar, -5, SpringLayout.EAST, contentPane);
161180
progressBar.setStringPainted(true);
162181
contentPane.add(progressBar);
163182

164-
btnConvert.setActionCommand("start");
183+
btnExecute.setActionCommand("start");
165184

166185
lblCurrent = new JLabel("Current Output:");
167186
sl_contentPane.putConstraint(SpringLayout.WEST, lblCurrent, 10, SpringLayout.WEST, contentPane);
@@ -175,8 +194,7 @@ public void actionPerformed(ActionEvent arg0) {
175194
contentPane.add(lblDefaultToLocal);
176195

177196
btnOutput = new JButton("Output Directory");
178-
sl_contentPane.putConstraint(SpringLayout.WEST, btnOutput, 143, SpringLayout.WEST, contentPane);
179-
sl_contentPane.putConstraint(SpringLayout.EAST, btnOutput, -157, SpringLayout.EAST, contentPane);
197+
sl_contentPane.putConstraint(SpringLayout.WEST, btnOutput, 10, SpringLayout.WEST, contentPane);
180198
btnOutput.addActionListener(new ActionListener() {
181199
public void actionPerformed(ActionEvent e) {
182200
OUT_DIR = FileSelection.getOutputDir(fc);
@@ -186,6 +204,11 @@ public void actionPerformed(ActionEvent e) {
186204
}
187205
});
188206
contentPane.add(btnOutput);
207+
208+
chckbxGzipOutput = new JCheckBox("Output GZIP");
209+
sl_contentPane.putConstraint(SpringLayout.NORTH, chckbxGzipOutput, 0, SpringLayout.NORTH, btnOutput);
210+
sl_contentPane.putConstraint(SpringLayout.EAST, chckbxGzipOutput, -10, SpringLayout.EAST, contentPane);
211+
contentPane.add(chckbxGzipOutput);
189212

190213
rdbtnExpandFromCenter = new JRadioButton("Expand from Center");
191214
sl_contentPane.putConstraint(SpringLayout.NORTH, rdbtnExpandFromCenter, 6, SpringLayout.SOUTH, scrollPane);
@@ -218,7 +241,8 @@ public void actionPerformed(ActionEvent e) {
218241
sl_contentPane.putConstraint(SpringLayout.WEST, lblSizeOfExpansion, 100, SpringLayout.WEST, contentPane);
219242
sl_contentPane.putConstraint(SpringLayout.NORTH, lblCurrent, 40, SpringLayout.SOUTH, lblSizeOfExpansion);
220243
contentPane.add(lblSizeOfExpansion);
221-
btnConvert.addActionListener(this);
244+
245+
btnExecute.addActionListener(this);
222246
}
223247

224248
@Override

0 commit comments

Comments
 (0)