@@ -1059,20 +1059,19 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"):
10591059 raise ValueError ("Parquet metadata does not contain 'lambda'." )
10601060 if not is_mbar :
10611061 try :
1062- # Normalise to :.5f strings to match sire energy trajectory column names.
1063- lambda_grad = [f"{ float (v ):.5f} " for v in metadata ["lambda_grad" ]]
1062+ # Normalise to floats to match the DataFrame column type expected
1063+ # by alchemlyb (handles both old float and new string metadata).
1064+ lambda_grad = [float (v ) for v in metadata ["lambda_grad" ]]
10641065 except :
10651066 raise ValueError ("Parquet metadata does not contain 'lambda grad'" )
10661067 else :
10671068 try :
1068- # Normalise to :.5f strings to match sire energy trajectory column names.
1069- lambda_grad = [f"{ float (v ):.5f} " for v in metadata ["lambda_grad" ]]
1069+ # Normalise to floats to match the DataFrame column type expected
1070+ # by alchemlyb (handles both old float and new string metadata).
1071+ lambda_grad = [float (v ) for v in metadata ["lambda_grad" ]]
10701072 except :
10711073 lambda_grad = []
10721074
1073- # Key used to index the simulated lambda column in the dataframe.
1074- lam_key = f"{ lam :.5f} "
1075-
10761075 # Make sure that the temperature is correct.
10771076 if not T == temperature :
10781077 raise ValueError (
@@ -1083,16 +1082,15 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"):
10831082 # Convert to a pandas dataframe.
10841083 df = table .to_pandas ()
10851084
1086- # Normalise column names to :.5f string format so that comparisons are
1087- # consistent regardless of whether the parquet was written with float keys
1088- # (old sire) or formatted string keys (new sire).
1085+ # Normalise column names to floats so that comparisons are consistent
1086+ # regardless of whether the parquet was written with float keys (old
1087+ # sire) or formatted string keys (new sire). float("0.10000") and
1088+ # float("0.1") give the same IEEE754 value, so old and new files are
1089+ # handled identically and the alchemlyb index check passes.
10891090 df .columns = [
1090- f"{ float (c ):.5f} "
1091- if isinstance (c , (int , float ))
1092- or (
1093- isinstance (c , str )
1094- and c .replace ("." , "" , 1 ).replace ("-" , "" , 1 ).isdigit ()
1095- )
1091+ float (c )
1092+ if isinstance (c , str )
1093+ and c .replace ("." , "" , 1 ).replace ("-" , "" , 1 ).isdigit ()
10961094 else c
10971095 for c in df .columns
10981096 ]
@@ -1102,7 +1100,7 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"):
11021100 df = df [[x for x in df .columns if x not in lambda_grad ]]
11031101
11041102 # Subtract the potential at the simulated lambda.
1105- df = df .subtract (df [lam_key ], axis = 0 )
1103+ df = df .subtract (df [lam ], axis = 0 )
11061104
11071105 # Apply the existing attributes.
11081106 df .attrs = attrs
@@ -1115,19 +1113,19 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"):
11151113 lam_delta = lambda_grad [0 ]
11161114
11171115 # Forward difference.
1118- if float ( lam_delta ) > lam :
1119- incr = float ( lam_delta ) - lam
1120- grad = (df [lam_delta ] - df [lam_key ]) / incr
1116+ if lam_delta > lam :
1117+ incr = lam_delta - lam
1118+ grad = (df [lam_delta ] - df [lam ]) / incr
11211119
11221120 # Backward difference.
11231121 else :
1124- incr = lam - float ( lam_delta )
1125- grad = (df [lam_key ] - df [lam_delta ]) / incr
1122+ incr = lam - lam_delta
1123+ grad = (df [lam ] - df [lam_delta ]) / incr
11261124
11271125 # Central difference.
11281126 else :
11291127 lam_below , lam_above = lambda_grad
1130- double_incr = float ( lam_above ) - float ( lam_below )
1128+ double_incr = lam_above - lam_below
11311129 grad = (df [lam_above ] - df [lam_below ]) / double_incr
11321130
11331131 # Create a DataFrame with the multi-index
0 commit comments