Skip to content

Commit b5e6e5c

Browse files
committed
Improve CellRangerVDJ import code
1 parent 2e809bc commit b5e6e5c

1 file changed

Lines changed: 57 additions & 21 deletions

File tree

tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -89,22 +89,28 @@ public void importAssayData(PipelineJob job, AnalysisModel model, File vLoupeFil
8989
return;
9090
}
9191

92-
File consensusCsv = new File(cellRangerOutDir, "consensus_annotations.csv");
93-
if (!consensusCsv .exists())
92+
File consensusFastaAB = new File(cellRangerOutDir, "consensus.fasta");
93+
if (!consensusFastaAB.exists())
9494
{
95-
throw new PipelineJobException("unable to find consensus contigs: " + consensusCsv .getPath());
95+
throw new PipelineJobException("unable to find FASTA: " + consensusFastaAB.getPath());
9696
}
9797

98-
File consensusFasta = new File(cellRangerOutDir, "consensus.fasta");
99-
if (!consensusFasta.exists())
98+
File allFastaAB = new File(cellRangerOutDir, "all_contig.fasta");
99+
if (!allFastaAB.exists())
100100
{
101-
throw new PipelineJobException("unable to find FASTA: " + consensusFasta.getPath());
101+
throw new PipelineJobException("unable to find FASTA: " + allFastaAB.getPath());
102102
}
103103

104-
File allFasta = new File(cellRangerOutDir, "all_contig.fasta");
105-
if (!allFasta.exists())
104+
File consensusFastaGD = new File(cellRangerOutDir, "../vdj_t_gd/consensus.fasta");
105+
if (!consensusFastaGD.exists())
106106
{
107-
throw new PipelineJobException("unable to find FASTA: " + allFasta.getPath());
107+
throw new PipelineJobException("unable to find FASTA: " + consensusFastaGD.getPath());
108+
}
109+
110+
File allFastaGD = new File(cellRangerOutDir, "../vdj_t_gd/all_contig.fasta");
111+
if (!allFastaGD.exists())
112+
{
113+
throw new PipelineJobException("unable to find FASTA: " + allFastaGD.getPath());
108114
}
109115

110116
_log.info("loading results into assay: " + assayId);
@@ -276,12 +282,9 @@ else if ("Low Counts".equals(hto))
276282
_log.info("processing clonotype CSV: " + allCsv.getPath());
277283

278284
// use unfiltered data so we can apply demultiplex and also apply alternate filtering logic.
279-
// also, 10x no longer reports TRD/TRG data in their consensus file
280-
//header for 3.x and lower:
281-
// barcode is_cell contig_id high_confidence length chain v_gene d_gene j_gene c_gene full_length productive cdr3 cdr3_nt reads umis raw_clonotype_id raw_consensus_id
282-
283-
//header for 6.x:
284-
// barcode is_cell contig_id high_confidence length chain v_gene d_gene j_gene c_gene full_length productive fwr1 fwr1_nt cdr1 cdr1_nt fwr2 fwr2_nt cdr2 cdr2_nt fwr3 fwr3_nt cdr3 cdr3_nt fwr4 fwr4_nt reads umis raw_clonotype_id raw_consensus_id exact_subclonotype_id
285+
//header for >=6.x:
286+
// NOTE: chain_type is added by CellRangerVDJWrapper when merging a/b and g/d
287+
// barcode is_cell contig_id high_confidence length chain v_gene d_gene j_gene c_gene full_length productive fwr1 fwr1_nt cdr1 cdr1_nt fwr2 fwr2_nt cdr2 cdr2_nt fwr3 fwr3_nt cdr3 cdr3_nt fwr4 fwr4_nt reads umis raw_clonotype_id raw_consensus_id exact_subclonotype_id chain_type
285288
try (CSVReader reader = new CSVReader(Readers.getReader(allCsv), ','))
286289
{
287290
String[] line;
@@ -380,7 +383,12 @@ else if (discordantBarcodes.contains(barcode))
380383
}
381384

382385
//Preferentially use raw_consensus_id, but fall back to contig_id
386+
String chainType = extractField(line, headerToIdx.get(HEADER_FIELD.CHAIN_TYPE));
383387
String coalescedContigName = extractField(line, headerToIdx.get(HEADER_FIELD.RAW_CONSENSUS_ID)) == null ? extractField(line, headerToIdx.get(HEADER_FIELD.CONTIG_ID)) : extractField(line, headerToIdx.get(HEADER_FIELD.RAW_CONSENSUS_ID));
388+
if (coalescedContigName != null)
389+
{
390+
coalescedContigName = chainType + "<>" + coalescedContigName;
391+
}
384392

385393
//NOTE: chimeras with a TRDV / TRAJ / TRAC are relatively common. categorize as TRA for reporting ease
386394
String locus = line[headerToIdx.get(HEADER_FIELD.CHAIN)];
@@ -465,7 +473,7 @@ else if (discordantBarcodes.contains(barcode))
465473

466474
//build map of distinct FL sequences:
467475
Map<String, String> sequenceMap = new HashMap<>();
468-
for (File f : Arrays.asList(consensusFasta, allFasta))
476+
for (File f : Arrays.asList(consensusFastaAB, allFastaAB))
469477
{
470478
_log.info("processing FASTA: " + f.getPath());
471479
try (FastaDataLoader loader = new FastaDataLoader(f, false))
@@ -479,7 +487,34 @@ else if (discordantBarcodes.contains(barcode))
479487
String header = (String) fastaRecord.get("header");
480488
if (uniqueContigNames.contains(header))
481489
{
482-
sequenceMap.put(header, (String) fastaRecord.get("sequence"));
490+
sequenceMap.put("vdj_t<>" + header, (String) fastaRecord.get("sequence"));
491+
}
492+
}
493+
}
494+
}
495+
catch (IOException e)
496+
{
497+
throw new PipelineJobException(e);
498+
}
499+
500+
_log.info("total A/B sequences: " + sequenceMap.size());
501+
}
502+
503+
for (File f : Arrays.asList(consensusFastaGD, allFastaGD))
504+
{
505+
_log.info("processing G/D FASTA: " + f.getPath());
506+
try (FastaDataLoader loader = new FastaDataLoader(f, false))
507+
{
508+
loader.setCharacterFilter(new FastaLoader.UpperAndLowercaseCharacterFilter());
509+
try (CloseableIterator<Map<String, Object>> i = loader.iterator())
510+
{
511+
while (i.hasNext())
512+
{
513+
Map<String, Object> fastaRecord = i.next();
514+
String header = (String) fastaRecord.get("header");
515+
if (uniqueContigNames.contains(header))
516+
{
517+
sequenceMap.put("vdj_t_gd<>" + header, (String) fastaRecord.get("sequence"));
483518
}
484519
}
485520
}
@@ -489,7 +524,7 @@ else if (discordantBarcodes.contains(barcode))
489524
throw new PipelineJobException(e);
490525
}
491526

492-
_log.info("total sequences: " + sequenceMap.size());
527+
_log.info("total sequences after G/D: " + sequenceMap.size());
493528
}
494529

495530
List<Map<String, Object>> assayRows = new ArrayList<>();
@@ -714,9 +749,9 @@ public static void deleteExistingData(AssayProvider ap, ExpProtocol protocol, Co
714749
}
715750
}
716751

717-
public static File getPerCellCsv(File cellRangerOutDir)
752+
public static File getPerCellCsv(File dirContainingABVLoupe)
718753
{
719-
return new File(cellRangerOutDir, "all_contig_annotations.csv");
754+
return new File(dirContainingABVLoupe, "all_contig_annotations_combined.csv");
720755
}
721756

722757
private Map<HEADER_FIELD, Integer> inferFieldIdx(String[] line)
@@ -751,7 +786,8 @@ private enum HEADER_FIELD
751786
CDR3("cdr3"),
752787
CDR3_NT("cdr3_nt"),
753788
RAW_CLONOTYPE_ID("raw_clonotype_id"),
754-
RAW_CONSENSUS_ID("raw_consensus_id");
789+
RAW_CONSENSUS_ID("raw_consensus_id"),
790+
CHAIN_TYPE("chain_type");
755791

756792
String headerStr;
757793

0 commit comments

Comments
 (0)