22
33import java .util .ArrayList ;
44import java .util .HashMap ;
5+ import java .util .HashSet ;
56import java .util .List ;
67import java .util .Map ;
8+ import java .util .Set ;
79
810import mesquite .categ .lib .CategoricalData ;
11+ import mesquite .categ .lib .CategoricalState ;
912import mesquite .categ .lib .DNAData ;
1013import mesquite .categ .lib .ProteinData ;
1114import mesquite .categ .lib .RNAData ;
2225import org .nexml .model .CategoricalMatrix ;
2326import org .nexml .model .Character ;
2427import org .nexml .model .CharacterStateSet ;
28+ import org .nexml .model .CompoundCharacterState ;
2529import org .nexml .model .Document ;
2630import org .nexml .model .Matrix ;
2731import org .nexml .model .MatrixCell ;
2832import org .nexml .model .MolecularMatrix ;
2933import org .nexml .model .NexmlWritable ;
3034import org .nexml .model .OTU ;
3135import org .nexml .model .OTUs ;
36+ import org .nexml .model .PolymorphicCharacterState ;
37+ import org .nexml .model .UncertainCharacterState ;
3238
3339public class NexmlCharactersBlockWriter extends NexmlBlockWriter {
34-
40+
41+ /**
42+ * Generate symbols for uncertainties and polymorphisms that don't conflict with existing state symbols.
43+ */
44+ private int nextMultipleStateSymbol = CategoricalState .getMaxPossibleStateStatic () + 1 ;
45+
3546 @ SuppressWarnings ("serial" )
3647 private static final Map <String , String > xmlMolecularDataTypeFor = new HashMap <String , String >() {{
3748 put (DNAData .DATATYPENAME , MolecularMatrix .DNA );
3849 put (RNAData .DATATYPENAME , MolecularMatrix .RNA );
3950 put (ProteinData .DATATYPENAME , MolecularMatrix .Protein );
4051 }};
41-
52+
4253 /**
4354 *
4455 * @param employerEmployee
@@ -57,66 +68,101 @@ protected Annotatable writeBlock(Document xmlProject, FileElement mesBlock) {
5768 Taxa mesTaxa = mesData .getTaxa ();
5869 OTUs xmlTaxa = findEquivalentTaxa (mesTaxa ,xmlProject );
5970 org .nexml .model .Matrix <?> xmlMatrix = null ;
60- CharacterStateSet xmlCharacterStateSet = null ;
6171 String mesDataType = mesData .getDataTypeName ();
6272 if ( xmlMolecularDataTypeFor .containsKey (mesDataType ) ) {
6373 xmlMatrix = xmlProject .createMolecularMatrix (xmlTaxa ,xmlMolecularDataTypeFor .get (mesDataType ));
64- xmlCharacterStateSet = ((MolecularMatrix )xmlMatrix ).getCharacterStateSet ();
6574 }
6675 else if ( mesDataType .equalsIgnoreCase (CategoricalData .DATATYPENAME ) ) {
6776 xmlMatrix = xmlProject .createCategoricalMatrix (xmlTaxa );
68- xmlCharacterStateSet = ((CategoricalMatrix )xmlMatrix ).createCharacterStateSet ();
6977 }
7078 else if ( mesDataType .equalsIgnoreCase (ContinuousData .DATATYPENAME ) ) {
7179 xmlMatrix = xmlProject .createContinuousMatrix (xmlTaxa );
7280 }
7381 else {
7482 MesquiteMessage .warnProgrammer ("Can't write data type " +mesDataType );
75- }
76- writeCharacterStates (mesData , xmlMatrix , xmlCharacterStateSet );
83+ }
84+ writeCharacterStates (mesData , xmlMatrix );
7785 return xmlMatrix ;
7886 }
79-
87+
8088 /**
8189 *
8290 * @param mesData
8391 * @param xmlMatrix
8492 * @param xmlCharacterStateSet
8593 */
8694 @ SuppressWarnings ("unchecked" )
87- private void writeCharacterStates (CharacterData mesData , org .nexml .model .Matrix <?> xmlMatrix , CharacterStateSet xmlCharacterStateSet ) {
95+ private void writeCharacterStates (CharacterData mesData , org .nexml .model .Matrix <?> xmlMatrix ) {
8896 String mesDataType = mesData .getDataTypeName ();
8997 int mesNchar = mesData .getNumChars ();
9098 List <Character > xmlCharacters = new ArrayList <Character >(mesNchar );
91- for ( int j = 0 ; j < mesNchar ; j ++ ) {
99+ for ( int characterIndex = 0 ; characterIndex < mesNchar ; characterIndex ++ ) {
100+ CharacterStateSet xmlCharacterStateSet = null ;
101+ if ( xmlMolecularDataTypeFor .containsKey (mesDataType ) ) {
102+ xmlCharacterStateSet = ((MolecularMatrix )xmlMatrix ).getCharacterStateSet ();
103+ }
104+ else if ( mesDataType .equalsIgnoreCase (CategoricalData .DATATYPENAME ) ) {
105+ xmlCharacterStateSet = ((CategoricalMatrix )xmlMatrix ).createCharacterStateSet ();
106+ }
92107 Character xmlChar = xmlMatrix .createCharacter (xmlCharacterStateSet );
93- String mesCharacterName = mesData .getCharacterName (j );
108+ String mesCharacterName = mesData .getCharacterName (characterIndex );
94109 if ( null != mesCharacterName && ! mesCharacterName .equals ("" ) ) {
95110 xmlChar .setLabel (mesCharacterName );
96111 }
112+ if ( mesDataType .equalsIgnoreCase (CategoricalData .DATATYPENAME ) ) {
113+ CategoricalData data = ((CategoricalData )mesData );
114+ int maxStateIndex = data .maxStateWithName (characterIndex );
115+ for (int stateIndex = 0 ; stateIndex <= maxStateIndex ; stateIndex ++) {
116+ String symbol = String .valueOf (data .getSymbol (stateIndex ));
117+ org .nexml .model .CharacterState state = xmlChar .getCharacterStateSet ().createCharacterState (symbol );
118+ state .setSymbol (symbol );
119+ if (data .hasStateName (characterIndex , stateIndex )) {
120+ String stateLabel = data .getStateName (characterIndex , stateIndex );
121+ state .setLabel (stateLabel );
122+ }
123+ }
124+ }
97125 xmlCharacters .add (xmlChar );
98126 }
99- for ( int j = 0 ; j < mesData .getNumTaxa (); j ++ ) {
100- CharacterState [] mesChars = mesData .getCharacterStateArray (j , 0 , mesNchar );
101- Taxon mesTaxon = mesData .getTaxa ().getTaxon (j );
127+ for (int taxonIndex = 0 ; taxonIndex < mesData .getNumTaxa (); taxonIndex ++ ) {
128+ CharacterState [] mesCharStates = mesData .getCharacterStateArray (taxonIndex , 0 , mesNchar );
129+ Taxon mesTaxon = mesData .getTaxa ().getTaxon (taxonIndex );
102130 OTU xmlTaxon = findEquivalentTaxon (mesTaxon ,xmlMatrix .getOTUs ());
103- for ( int k = 0 ; k < mesNchar ; k ++ ) {
104- Character xmlChar = xmlCharacters .get (k );
105- String mesCharString = mesChars [k ].toDisplayString ();
106- if ( mesCharString != null && !mesCharString .equals ("-" ) ) {
107- if ( mesDataType .equalsIgnoreCase (ContinuousData .DATATYPENAME ) ) {
108- MatrixCell <Double > xmlCell = (MatrixCell <Double >) xmlMatrix .getCell (xmlTaxon ,xmlChar );
109- xmlCell .setValue ((Double )xmlMatrix .parseSymbol (mesCharString ));
131+ for ( int characterIndex = 0 ; characterIndex < mesNchar ; characterIndex ++ ) {
132+ Character xmlChar = xmlCharacters .get (characterIndex );
133+ CharacterState mesState = mesCharStates [characterIndex ];
134+ if (mesDataType .equalsIgnoreCase (CategoricalData .DATATYPENAME )) {
135+ CharacterStateSet xmlStateSet = xmlChar .getCharacterStateSet ();
136+ CategoricalData categoricalData = (CategoricalData )mesData ;
137+ long stateAssignment = categoricalData .getState (characterIndex , taxonIndex );
138+ org .nexml .model .CharacterState xmlCharacterState = null ;
139+ if (CategoricalState .hasMultipleStates (stateAssignment )) {
140+ Set <String > symbols = new HashSet <String >();
141+ for (int mesStateCode : CategoricalState .expand (stateAssignment )) {
142+ symbols .add (String .valueOf (categoricalData .getSymbol (mesStateCode )));
143+ }
144+ if (CategoricalState .isUncertain (stateAssignment )) {
145+ xmlCharacterState = findOrCreateUncertainStateSet (xmlStateSet , symbols );
146+ } else { //polymorphic
147+ xmlCharacterState = findOrCreatePolymorphicStateSet (xmlStateSet , symbols );
148+ }
149+ } else { // single state
150+ if ((!CategoricalState .isUnassigned (stateAssignment )) && (!CategoricalState .isInapplicable (stateAssignment ))) {
151+ String symbol = String .valueOf (categoricalData .getSymbol (CategoricalState .getOnlyElement (stateAssignment )));
152+ xmlCharacterState = xmlStateSet .lookupCharacterStateBySymbol (symbol );
153+ }
110154 }
111- else if ( mesDataType . equalsIgnoreCase ( CategoricalData . DATATYPENAME ) ) {
112- MatrixCell <org .nexml .model .CharacterState > xmlCell = (MatrixCell <org .nexml .model .CharacterState >) xmlMatrix .getCell (xmlTaxon ,xmlChar );
113- xmlCell .setValue (( org . nexml . model . CharacterState ) xmlMatrix . parseSymbol ( mesCharString ) );
155+ if (xmlCharacterState != null ) {
156+ MatrixCell <org .nexml .model .CharacterState > xmlCell = (MatrixCell <org .nexml .model .CharacterState >) xmlMatrix .getCell (xmlTaxon , xmlChar );
157+ xmlCell .setValue (xmlCharacterState );
114158 }
115- else if ( xmlMolecularDataTypeFor .containsKey (mesDataType ) ) {
116- MatrixCell <org .nexml .model .CharacterState > xmlCell = (MatrixCell <org .nexml .model .CharacterState >) xmlMatrix .getCell (xmlTaxon ,xmlChar );
117- xmlCell .setValue ((org .nexml .model .CharacterState )((MolecularMatrix )xmlMatrix ).parseSymbol (mesCharString ,xmlMolecularDataTypeFor .get (mesDataType )));
118- }
119- }
159+ } else if (mesDataType .equalsIgnoreCase (ContinuousData .DATATYPENAME )) {
160+ MatrixCell <Double > xmlCell = (MatrixCell <Double >) xmlMatrix .getCell (xmlTaxon ,xmlChar );
161+ xmlCell .setValue ((Double )xmlMatrix .parseSymbol (mesState .toDisplayString (), xmlChar ));
162+ } else if ( xmlMolecularDataTypeFor .containsKey (mesDataType ) ) {
163+ MatrixCell <org .nexml .model .CharacterState > xmlCell = (MatrixCell <org .nexml .model .CharacterState >) xmlMatrix .getCell (xmlTaxon ,xmlChar );
164+ xmlCell .setValue ((org .nexml .model .CharacterState )((MolecularMatrix )xmlMatrix ).parseSymbol (mesState .toDisplayString (), xmlMolecularDataTypeFor .get (mesDataType )));
165+ }
120166 }
121167 }
122168 }
@@ -131,4 +177,51 @@ protected Annotatable getThingInXmlBlock(NexmlWritable xmlBlock, int index) {
131177 return xmlMatrix .getCharacters ().get (index );
132178 }
133179
180+ private UncertainCharacterState findOrCreateUncertainStateSet (CharacterStateSet containingStateSet , Set <String > symbols ) {
181+ for (org .nexml .model .CharacterState state : containingStateSet .getCharacterStates ()) {
182+ if (state instanceof UncertainCharacterState ) {
183+ UncertainCharacterState uncertainState = (UncertainCharacterState )state ;
184+ if (containsMatchingStates (uncertainState , symbols )) {
185+ return uncertainState ;
186+ }
187+ }
188+ }
189+ Set <org .nexml .model .CharacterState > memberStates = collectMatchingStates (containingStateSet , symbols );
190+ return containingStateSet .createUncertainCharacterState (this .nextMultipleStateSymbol ++, memberStates );
191+ }
192+
193+ private PolymorphicCharacterState findOrCreatePolymorphicStateSet (CharacterStateSet containingStateSet , Set <String > symbols ) {
194+ for (org .nexml .model .CharacterState state : containingStateSet .getCharacterStates ()) {
195+ if (state instanceof PolymorphicCharacterState ) {
196+ PolymorphicCharacterState polymorphicState = (PolymorphicCharacterState )state ;
197+ if (containsMatchingStates (polymorphicState , symbols )) {
198+ return polymorphicState ;
199+ }
200+ }
201+ }
202+ Set <org .nexml .model .CharacterState > memberStates = collectMatchingStates (containingStateSet , symbols );
203+ return containingStateSet .createPolymorphicCharacterState (this .nextMultipleStateSymbol ++, memberStates );
204+ }
205+
206+ private boolean containsMatchingStates (CompoundCharacterState state , Set <String > symbols ) {
207+ Set <String > containedSymbols = new HashSet <String >();
208+ for (org .nexml .model .CharacterState containedState : state .getStates ()) {
209+ containedSymbols .add (containedState .getSymbol ().toString ());
210+ }
211+ return containedSymbols .equals (symbols );
212+ }
213+
214+ private Set <org .nexml .model .CharacterState > collectMatchingStates (CharacterStateSet containingStateSet , Set <String > symbols ) {
215+ Set <org .nexml .model .CharacterState > memberStates = new HashSet <org .nexml .model .CharacterState >();
216+ for (String symbol : symbols ) {
217+ org .nexml .model .CharacterState member = containingStateSet .lookupCharacterStateBySymbol (symbol );
218+ if ( null != member ) {
219+ memberStates .add (member );
220+ } else {
221+ memberStates .add (containingStateSet .createCharacterState (symbol ));
222+ }
223+ }
224+ return memberStates ;
225+ }
226+
134227}
0 commit comments