From fa5cf48dbd74e6cbf62472b30ad456c7d8fe7530 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Thu, 23 Apr 2026 15:59:30 -0400 Subject: [PATCH 01/52] regex splitting logic --- src/pre_tokenizers/split_bigsmiles.rs | 83 +++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 src/pre_tokenizers/split_bigsmiles.rs diff --git a/src/pre_tokenizers/split_bigsmiles.rs b/src/pre_tokenizers/split_bigsmiles.rs new file mode 100644 index 0000000..6e75d14 --- /dev/null +++ b/src/pre_tokenizers/split_bigsmiles.rs @@ -0,0 +1,83 @@ +use const_format::formatcp; + +const BRACKETED_SYMBOL: &'static str = concat!( + r"A(?:c|g|l|m|r|s|t|u)|", + r"B(?:a|e|h|i|k|r)?|", + r"C(?:a|d|e|f|l|m|n|o|r|s|u)?|", + r"D(?:b|s|y)|", + r"E(?:r|s|u)|", + r"F(?:e|l|m|r)?|", + r"G(?:a|d|e)|", + r"H(?:e|f|g|o|s)?|", + r"I(?:n|r)?|", + r"Kr?|", + r"L(?:a|i|r|u|v)|", + r"M(?:c|d|g|n|o|t)|", + r"N(?:a|b|d|e|h|i|o|p)?|", + r"O(?:g|s)?|", + r"P(?:a|b|d|m|o|r|t|u)?|", + r"R(?:a|b|e|f|g|h|n|u)|", + r"S(?:b|c|e|g|i|m|n|r)?|", + r"T(?:a|b|c|e|h|i|l|m|s)|", + r"U|", + r"V|", + r"W|", + r"Xe|", + r"Yb?|", + r"Z(?:n|r)|", + r"as|", + r"b|", + r"c|", + r"n|", + r"o|", + r"p|", + r"se?|", + r"\*", +); + +const CHIRAL: &'static str = r"@(?:@|AL|OH|SP|T(?:B|H))?"; + +pub const MATCH_OUTER_BIGSMILES: &'static str = concat!( + r"Br?|Cl?|F|I|N|O|P|S|", // organic subset elements + r"b|c|n|o|p|s|", // Aromatic organic subset + r"\*|", // Wildcard + r"[\.\-=\#\$:/\\]|", // Bonds + r"\d|%|", // Ring closures + r"\(|\)|", + r"\{|\}|", // Stochastic object delimiters + r",|;|", // Repeat unit separator and end group separator + r"\[(?:[^\[\]]+|\[[^\[\]]*\])*\]", // Bracketed atoms/descriptors +); + +pub const MATCH_INNER_BIGSMILES: &'static str = formatcp!(concat!( + r"^(?:", + r"", + r"|", + r"(\$|<|>)(\d+)?", + r"|", + r"(\$|<|>)(\d+)?(\[)(\$|<|>)(\d+)?(\])(\d+)", + r"|", + r"(#)([!-~]+)", + r"|", + r"(\d+)?", + r"({BRACKETED_SYMBOL})", + r"(?:({CHIRAL})(\d{{1,2}})?)?", + r"(?:(H)(\d)?)?", + r"(?:([+-]{{1,2}})(\d{{1,2}})?)?", + r"(?:(:)(\d+))?", + r")$", +)); + +pub const BONDING_DESCRIPTOR: &'static str = concat!( + r"(\$|<|>)", // Descriptor type + r"(\d+)?", // Optional index +); + +pub const LADDER_BONDING_DESCRIPTOR: &'static str = concat!( + r"(\$|<|>)", // Outer descriptor type + r"(\d+)?", // Outer descriptor id + r"(\[)(\$|<|>)(\d+)?(\])", // Inner descriptor + r"(\d+)", // Group id +); + +pub const FRAGMENT_REFERENCE: &'static str = r"(#)([!-~]+)"; From 95e834dd85ed8b61ed01996d3feafcf531de1dd7 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Thu, 23 Apr 2026 15:59:56 -0400 Subject: [PATCH 02/52] BigSMILES vocab file add --- python/smirk/vocab_bigsmiles.json | 167 ++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 python/smirk/vocab_bigsmiles.json diff --git a/python/smirk/vocab_bigsmiles.json b/python/smirk/vocab_bigsmiles.json new file mode 100644 index 0000000..1ec1537 --- /dev/null +++ b/python/smirk/vocab_bigsmiles.json @@ -0,0 +1,167 @@ +{ + "[UNK]": 0, + "#": 1, + "$": 2, + "%": 3, + "(": 4, + ")": 5, + "*": 6, + "+": 7, + "-": 8, + ".": 9, + "/": 10, + "0": 11, + "1": 12, + "2": 13, + "3": 14, + "4": 15, + "5": 16, + "6": 17, + "7": 18, + "8": 19, + "9": 20, + ":": 21, + "=": 22, + "@": 23, + "@@": 24, + "@AL": 25, + "@OH": 26, + "@SP": 27, + "@TB": 28, + "@TH": 29, + "Ac": 30, + "Ag": 31, + "Al": 32, + "Am": 33, + "Ar": 34, + "As": 35, + "At": 36, + "Au": 37, + "B": 38, + "Ba": 39, + "Be": 40, + "Bh": 41, + "Bi": 42, + "Bk": 43, + "Br": 44, + "C": 45, + "Ca": 46, + "Cd": 47, + "Ce": 48, + "Cf": 49, + "Cl": 50, + "Cm": 51, + "Cn": 52, + "Co": 53, + "Cr": 54, + "Cs": 55, + "Cu": 56, + "Db": 57, + "Ds": 58, + "Dy": 59, + "Er": 60, + "Es": 61, + "Eu": 62, + "F": 63, + "Fe": 64, + "Fl": 65, + "Fm": 66, + "Fr": 67, + "Ga": 68, + "Gd": 69, + "Ge": 70, + "H": 71, + "He": 72, + "Hf": 73, + "Hg": 74, + "Ho": 75, + "Hs": 76, + "I": 77, + "In": 78, + "Ir": 79, + "K": 80, + "Kr": 81, + "La": 82, + "Li": 83, + "Lr": 84, + "Lu": 85, + "Lv": 86, + "Mc": 87, + "Md": 88, + "Mg": 89, + "Mn": 90, + "Mo": 91, + "Mt": 92, + "N": 93, + "Na": 94, + "Nb": 95, + "Nd": 96, + "Ne": 97, + "Nh": 98, + "Ni": 99, + "No": 100, + "Np": 101, + "O": 102, + "Og": 103, + "Os": 104, + "P": 105, + "Pa": 106, + "Pb": 107, + "Pd": 108, + "Pm": 109, + "Po": 110, + "Pr": 111, + "Pt": 112, + "Pu": 113, + "Ra": 114, + "Rb": 115, + "Re": 116, + "Rf": 117, + "Rg": 118, + "Rh": 119, + "Rn": 120, + "Ru": 121, + "S": 122, + "Sb": 123, + "Sc": 124, + "Se": 125, + "Sg": 126, + "Si": 127, + "Sm": 128, + "Sn": 129, + "Sr": 130, + "Ta": 131, + "Tb": 132, + "Tc": 133, + "Te": 134, + "Th": 135, + "Ti": 136, + "Tl": 137, + "Tm": 138, + "Ts": 139, + "U": 140, + "V": 141, + "W": 142, + "Xe": 143, + "Y": 144, + "Yb": 145, + "Zn": 146, + "Zr": 147, + "[": 148, + "\\": 149, + "]": 150, + "as": 151, + "b": 152, + "c": 153, + "n": 154, + "o": 155, + "p": 156, + "s": 157, + "se": 158, + "{": 159, + "}": 160, + ",": 161, + ";": 162, + "<": 163, + ">": 164 +} From 821f97eec49d11eb3b0aed213471c4f5bd2bfc57 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Thu, 23 Apr 2026 16:00:31 -0400 Subject: [PATCH 03/52] add vocab_bigsmiles to includes --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5c847b5..68f258d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ docs = [ "torch~=2.0", "rdkit==2024.9.5", "transformers~=4.48.2", + "notebook>=7.5.5", ] [build-system] @@ -53,4 +54,4 @@ requires = ["maturin~=1.7"] [tool.maturin] python-source = "python" -include = [ "python/smirk/vocab_smiles.json", "python/smirk/vocab_selfies.json" ] +include = [ "python/smirk/vocab_smiles.json", "python/smirk/vocab_bigsmiles.json", "python/smirk/vocab_selfies.json" ] From b656eddd9790538d00088e4460a87205ff611469 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Thu, 23 Apr 2026 16:01:43 -0400 Subject: [PATCH 04/52] fist pass big smiles tokenizer --- src/pre_tokenizers/bigsmirk.rs | 586 +++++++++++++++++++++++++++++++++ 1 file changed, 586 insertions(+) create mode 100644 src/pre_tokenizers/bigsmirk.rs diff --git a/src/pre_tokenizers/bigsmirk.rs b/src/pre_tokenizers/bigsmirk.rs new file mode 100644 index 0000000..53d2318 --- /dev/null +++ b/src/pre_tokenizers/bigsmirk.rs @@ -0,0 +1,586 @@ +use super::split_bigsmiles::{MATCH_INNER_BIGSMILES, MATCH_OUTER_BIGSMILES}; +use once_cell::sync::Lazy; +use regex::{Match, Regex}; +use serde::de::Visitor; +use serde::ser::SerializeStruct; +use serde::{Deserialize, Serialize}; +use std::fmt; +use tokenizers::tokenizer::pattern::Pattern; +use tokenizers::tokenizer::{ + Offsets, PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior, +}; + + +#[derive(Clone)] +pub struct BigSmirkPreTokenizer { + outer: Regex, + inner: Regex, +} + +impl BigSmirkPreTokenizer { + pub fn new(outer: &str, inner: &str) -> Self { + Self { + outer: Regex::new(&outer).unwrap(), + inner: Regex::new(&inner).unwrap(), + } + } + + pub fn split(&self, text: &String) -> Vec { + self.find_matches(text) + .unwrap() + .into_iter() + .map(|(offset, _)| text.get(offset.0..offset.1).unwrap().to_owned()) + .filter(|tok| !tok.is_empty()) + .collect() + } +} + +impl Default for BigSmirkPreTokenizer { + fn default() -> Self { + BigSmirkPreTokenizer::new(MATCH_OUTER_BIGSMILES, MATCH_INNER_BIGSMILES) + } +} + +impl PartialEq for BigSmirkPreTokenizer { + fn eq(&self, other: &Self) -> bool { + self.outer.as_str() == other.outer.as_str() && self.inner.as_str() == other.inner.as_str() + } +} + +impl fmt::Debug for BigSmirkPreTokenizer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("BigSmirkPreTokenizer") + .field("outer", &format_args!("'{}'", &self.outer.as_str())) + .field("inner", &format_args!("'{}'", &self.inner.as_str())) + .finish() + } +} + +impl Serialize for BigSmirkPreTokenizer { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + let mut state = serializer.serialize_struct("BigSmirkPreTokenizer", 3)?; + state.serialize_field("type", "BigSmirkPreTokenizer")?; + state.serialize_field("outer", self.outer.as_str())?; + state.serialize_field("inner", self.inner.as_str())?; + state.end() + } +} + +impl<'de> Deserialize<'de> for BigSmirkPreTokenizer { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + deserializer.deserialize_struct( + "BigSmirkPreTokenizer", + &["type", "outer", "inner"], + BigSmirkPreTokenizerVisitor, + ) + } +} + +struct BigSmirkPreTokenizerVisitor; +impl<'de> Visitor<'de> for BigSmirkPreTokenizerVisitor { + type Value = BigSmirkPreTokenizer; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(formatter, "struct BigSmirkPreTokenizer with type field") + } + + fn visit_map(self, mut map: A) -> std::result::Result + where + A: serde::de::MapAccess<'de>, + { + let mut outer: Option = None; + let mut inner: Option = None; + let mut type_field: Option = None; + while let Some(key) = map.next_key::()? { + match key.as_ref() { + "type" => { + type_field = Some(map.next_value()?); + } + "outer" => { + if let Some(x) = map.next_value()? { + outer = Some(x); + } + } + "inner" => { + if let Some(x) = map.next_value()? { + inner = Some(x); + } + } + _ => { + let _: serde::de::IgnoredAny = map.next_value()?; + } + } + } + match type_field.as_deref() { + Some("BigSmirkPreTokenizer") => {} + _ => { + return Err(serde::de::Error::custom( + "Missing or invalid type field for BigSmirkPreTokenizer", + )); + } + } + Ok(BigSmirkPreTokenizer::new( + outer.expect("Missing `outer`").as_str(), + inner.expect("Missing `inner`").as_str(), + )) + } +} + +impl PreTokenizer for BigSmirkPreTokenizer { + fn pre_tokenize(&self, pretokenized: &mut PreTokenizedString) -> Result<()> { + pretokenized.split(|_, s| s.split(self.to_owned(), SplitDelimiterBehavior::Isolated)) + } +} + +fn append_split(splits: &mut Vec<(Offsets, bool)>, prev: &mut usize, m: Match, offset: usize) { + let start = m.start() + offset; + let end = m.end() + offset; + if *prev != start { + splits.push(((*prev, start), false)); + } + splits.push(((start, end), true)); + *prev = end; +} + +impl Pattern for BigSmirkPreTokenizer { + fn find_matches(&self, inside: &str) -> Result> { + let mut splits = Vec::with_capacity(inside.len()); + let mut prev = 0; + let n_inner_groups = self.inner.captures_len(); + static IS_NUMBER: Lazy = Lazy::new(|| Regex::new(r"^\d+$").unwrap()); + static IS_BONDING_DESC: Lazy = Lazy::new(|| Regex::new(r"^[\$<>]$").unwrap()); + for m_outer in self.outer.find_iter(inside) { + // Check for Brackets + if m_outer.as_str().starts_with("[") { + // Record opening [ + splits.push(((m_outer.start(), m_outer.start() + 1), true)); + prev = m_outer.start() + 1; + + // Record contents between brackets + let bracketed = &inside[(m_outer.start() + 1)..(m_outer.end() - 1)]; + + // Try to match with inner pattern + if let Some(capture) = self.inner.captures(&bracketed) { + // Unpack bracketed atoms + for i in 1..n_inner_groups { + if let Some(m) = capture.get(i) { + let matched_str = m.as_str(); + if matched_str.is_empty() { + continue; + } + if IS_NUMBER.is_match(matched_str) { + // Tokenize numbers as digits + for d in m.range() { + let s = d + m_outer.start() + 1; + splits.push(((s, s + 1), true)); + prev = s + 1; + } + } else if IS_BONDING_DESC.is_match(matched_str) { + // Bonding descriptor ($, <, >) - keep as single token + append_split(&mut splits, &mut prev, m, m_outer.start() + 1) + } else { + append_split(&mut splits, &mut prev, m, m_outer.start() + 1) + } + } + } + } + + // Check for trailing unmatched characters within the brackets + if prev != (m_outer.end() - 1) { + splits.push(((prev, m_outer.end() - 1), false)); + prev = m_outer.end() - 1; + } + + // Record closing ] + assert!(m_outer.as_str().ends_with("]")); + splits.push(((prev, m_outer.end()), true)); + prev = m_outer.end(); + } else { + append_split(&mut splits, &mut prev, m_outer, 0); + } + } + if prev != inside.len() { + splits.push(((prev, inside.len()), false)); + } + Ok(splits) + } +} + +#[cfg(test)] +pub mod tests { + use std::fs; + use std::path::PathBuf; + + use super::*; + use crate::test_utils::check_serde; + use tokenizers::tokenizer::{OffsetReferential, OffsetType}; + + #[test] + fn serialize_default() { + let default = BigSmirkPreTokenizer::default(); + check_serde(&default); + } + + #[test] + fn serialize_pretok() { + let pretok = BigSmirkPreTokenizer::new(r".|\[.*?]", "."); + check_serde(&pretok); + } + + fn all_matches(tok: &BigSmirkPreTokenizer, bigsmiles: &str) -> bool { + let splits = tok.find_matches(bigsmiles).unwrap(); + splits.into_iter().all(|(_s, m)| m) + } + + fn get_matched_pretokens(tok: &BigSmirkPreTokenizer, bigsmiles: &str) -> Vec { + tok.find_matches(bigsmiles) + .unwrap() + .into_iter() + .filter(|(_, m)| *m) + .map(|(o, _)| bigsmiles[o.0..o.1].into()) + .collect() + } + + fn get_split_tokens(tok: &BigSmirkPreTokenizer, bigsmiles: &str) -> Vec { + let mut bigsmiles = PreTokenizedString::from(bigsmiles); + tok.pre_tokenize(&mut bigsmiles).unwrap(); + bigsmiles + .get_splits(OffsetReferential::Original, OffsetType::Byte) + .into_iter() + .map(|(s, _, _)| s.to_string()) + .collect() + } + + #[test] + fn test_standard_smiles_basic() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "OC[C@@H]"), + ["O", "C", "[", "C", "@@", "H", "]"] + ); + assert_eq!( + get_split_tokens(&pretok, "OC[C@@H][OH]"), + ["O", "C", "[", "C", "@@", "H", "]", "[", "O", "H", "]"] + ); + } + + #[test] + fn test_standard_smiles_chirality() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!(get_split_tokens(&pretok, "[C@]"), ["[", "C", "@", "]"]); + assert_eq!( + get_split_tokens(&pretok, "[C@H]"), + ["[", "C", "@", "H", "]"] + ); + assert_eq!(get_split_tokens(&pretok, "[C@@]"), ["[", "C", "@@", "]"]); + assert_eq!( + get_split_tokens(&pretok, "[Fe@TB3+3]"), + ["[", "Fe", "@TB", "3", "+", "3", "]"] + ); + } + + #[test] + fn test_standard_smiles_isotopes_charges() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "[16C]"), + ["[", "1", "6", "C", "]"] + ); + assert_eq!( + get_split_tokens(&pretok, "[C+12]"), + ["[", "C", "+", "1", "2", "]"] + ); + assert_eq!( + get_split_tokens(&pretok, "[CH4:200]"), + ["[", "C", "H", "4", ":", "2", "0", "0", "]"] + ); + } + + #[test] + fn test_standard_smiles_rings_bonds() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "C1CCCC2C1CCCC2"), + ["C", "1", "C", "C", "C", "C", "2", "C", "1", "C", "C", "C", "C", "2"] + ); + assert_eq!(get_split_tokens(&pretok, "C%12"), ["C", "%", "1", "2"]); + assert_eq!( + get_split_tokens(&pretok, "F/C=C/F"), + ["F", "/", "C", "=", "C", "/", "F"] + ); + assert_eq!( + get_split_tokens(&pretok, r"F/C=C\F"), + ["F", "/", "C", "=", "C", "\\", "F"] + ); + } + + #[test] + fn test_standard_smiles_complex() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "[Na+].[Cl-]"), + ["[", "Na", "+", "]", ".", "[", "Cl", "-", "]"] + ); + assert_eq!(get_split_tokens(&pretok, "CC-O"), ["C", "C", "-", "O"]); + assert_eq!(get_split_tokens(&pretok, "O=C=O"), ["O", "=", "C", "=", "O"]); + assert_eq!(get_split_tokens(&pretok, "C#N"), ["C", "#", "N"]); + assert_eq!( + get_split_tokens(&pretok, "c1ccccc1"), + ["c", "1", "c", "c", "c", "c", "c", "1"] + ); + assert_eq!( + get_split_tokens(&pretok, "FC(Br)(Cl)F"), + ["F", "C", "(", "Br", ")", "(", "Cl", ")", "F"] + ); + assert!(all_matches( + &pretok, + "OC[C@@H](O1)[C@@H](O)[C@H](O)[C@@H](O)[C@H](O)1" + )); + } + + #[test] + fn test_stochastic_object_simple() { + let pretok = BigSmirkPreTokenizer::default(); + // Simple polymer repeat unit with AA-type bonding + assert_eq!( + get_split_tokens(&pretok, "{[$]CC[$]}"), + ["{", "[", "$", "]", "C", "C", "[", "$", "]", "}"] + ); + } + + #[test] + fn test_stochastic_object_multiple_units() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "{[$]CC[$],[$]C(C)C[$]}"), + [ + "{", "[", "$", "]", "C", "C", "[", "$", "]", ",", "[", "$", "]", "C", "(", "C", + ")", "C", "[", "$", "]", "}" + ] + ); + } + + #[test] + fn test_ab_type_descriptors() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "{[<]CC[>]}"), + ["{", "[", "<", "]", "C", "C", "[", ">", "]", "}"] + ); + assert_eq!( + get_split_tokens(&pretok, "{[>]CCCCCC(=O)[<],[>]NCCCCCCN[<]}"), + [ + "{", "[", ">", "]", "C", "C", "C", "C", "C", "C", "(", "=", "O", ")", "[", + "<", "]", ",", "[", ">", "]", "N", "C", "C", "C", "C", "C", "C", "N", "[", + "<", "]", "}" + ] + ); + } + + #[test] + fn test_indexed_bonding_descriptors() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "[$1]"), + ["[", "$", "1", "]"] + ); + assert_eq!( + get_split_tokens(&pretok, "[$2]"), + ["[", "$", "2", "]"] + ); + + assert_eq!(get_split_tokens(&pretok, "[<1]"), ["[", "<", "1", "]"]); + assert_eq!(get_split_tokens(&pretok, "[>1]"), ["[", ">", "1", "]"]); + + assert_eq!( + get_split_tokens(&pretok, "{[$1]CC[$1],[$2]C(C)C[$2]}"), + [ + "{", "[", "$", "1", "]", "C", "C", "[", "$", "1", "]", ",", "[", "$", "2", + "]", "C", "(", "C", ")", "C", "[", "$", "2", "]", "}" + ] + ); + } + + #[test] + fn test_ladder_descriptors() { + let pretok = BigSmirkPreTokenizer::default(); + + assert_eq!( + get_split_tokens(&pretok, "[<1[<1]1]"), + ["[", "<", "1", "[", "<", "1", "]", "1", "]"] + ); + assert_eq!( + get_split_tokens(&pretok, "[$1[$2]3]"), + ["[", "$", "1", "[", "$", "2", "]", "3", "]"] + ); + + assert_eq!( + get_split_tokens(&pretok, "{[<1[<1]1]CC[>1[>1]1]}"), + [ + "{", "[", "<", "1", "[", "<", "1", "]", "1", "]", "C", "C", "[", ">", "1", + "[", ">", "1", "]", "1", "]", "}" + ] + ); + } + + #[test] + fn test_external_bond_order_with_descriptors() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "C=[$2]"), + ["C", "=", "[", "$", "2", "]"] + ); + assert_eq!( + get_split_tokens(&pretok, r"C/[>1]"), + ["C", "/", "[", ">", "1", "]"] + ); + assert_eq!( + get_split_tokens(&pretok, r"C\[<1]"), + ["C", "\\", "[", "<", "1", "]"] + ); + } + + #[test] + fn test_empty_terminal() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!(get_split_tokens(&pretok, "[]"), ["[", "]"]); + + assert_eq!( + get_split_tokens(&pretok, "{[]CC[$]}"), + ["{", "[", "]", "C", "C", "[", "$", "]", "}"] + ); + + assert_eq!( + get_split_tokens(&pretok, "{[]CC[]}"), + ["{", "[", "]", "C", "C", "[", "]", "}"] + ); + } + + #[test] + fn test_end_groups_semicolon() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "{[$]CC[$];[H][$],[$]O}"), + [ + "{", "[", "$", "]", "C", "C", "[", "$", "]", ";", "[", "H", "]", "[", "$", "]", + ",", "[", "$", "]", "O", "}" + ] + ); + assert_eq!( + get_split_tokens(&pretok, "{[$]CC[$];C[$],[$]C}"), + [ + "{", "[", "$", "]", "C", "C", "[", "$", "]", ";", "C", "[", "$", "]", ",", "[", + "$", "]", "C", "}" + ] + ); + } + + #[test] + fn test_block_copolymer() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "{[$]CC[$]}{[$]CC(C)[$]}"), + [ + "{", "[", "$", "]", "C", "C", "[", "$", "]", "}", "{", "[", "$", "]", "C", "C", + "(", "C", ")", "[", "$", "]", "}" + ] + ); + assert_eq!( + get_split_tokens(&pretok, "CC{[$]CC[$]}CC"), + ["C", "C", "{", "[", "$", "]", "C", "C", "[", "$", "]", "}", "C", "C"] + ); + } + + #[test] + fn test_graft_copolymer_nested() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "{[$]CC(C{[<]CC[>]})[$]}"), + [ + "{", "[", "$", "]", "C", "C", "(", "C", "{", "[", "<", "]", "C", "C", "[", ">", + "]", "}", ")", "[", "$", "]", "}" + ] + ); + assert_eq!( + get_split_tokens(&pretok, "{[$]CC{[<]C[>]}[$]}"), + [ + "{", "[", "$", "]", "C", "C", "{", "[", "<", "]", "C", "[", ">", "]", "}", "[", + "$", "]", "}" + ] + ); + } + + #[test] + fn test_fragment_reference() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "[#PEG]"), + ["[", "#", "PEG", "]"] + ); + assert_eq!( + get_split_tokens(&pretok, "[#Styrene]"), + ["[", "#", "Styrene", "]"] + ); + assert_eq!(get_split_tokens(&pretok, "[#+]"), ["[", "#", "+", "]"]); + assert_eq!( + get_split_tokens(&pretok, "[#PEG-1]"), + ["[", "#", "PEG-1", "]"] + ); + assert_eq!(get_split_tokens(&pretok, "[#A]"), ["[", "#", "A", "]"]); + assert_eq!( + get_split_tokens(&pretok, "{[$][#Styrene][$]}"), + ["{", "[", "$", "]", "[", "#", "Styrene", "]", "[", "$", "]", "}"] + ); + } + + #[test] + fn test_reject_invalid_bracket_symbol_forms() { + let pretok = BigSmirkPreTokenizer::default(); + assert!(!all_matches(&pretok, "[B|]")); + assert!(!all_matches(&pretok, "[C@@Hextra]")); + assert!(!all_matches(&pretok, "[$=]")); + assert!(!all_matches(&pretok, "[>#]")); + assert!(!all_matches(&pretok, "[$/]")); + assert!(!all_matches(&pretok, r"[$\]")); + assert!(!all_matches(&pretok, "[#PEG 1]")); + } + + #[test] + fn test_mixed_smiles_bigsmiles() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!( + get_split_tokens(&pretok, "CCCC{[$]CC(c1ccccc1)[$]}CCCC"), + [ + "C", "C", "C", "C", "{", "[", "$", "]", "C", "C", "(", "c", "1", "c", "c", "c", + "c", "c", "1", ")", "[", "$", "]", "}", "C", "C", "C", "C" + ] + ); + assert_eq!( + get_split_tokens(&pretok, "CCCC{[$]CC[$]}CCCC.NCC"), + [ + "C", "C", "C", "C", "{", "[", "$", "]", "C", "C", "[", "$", "]", "}", "C", "C", + "C", "C", ".", "N", "C", "C" + ] + ); + } + + #[test] + fn test_opensmiles_spec() { + let pretok = BigSmirkPreTokenizer::default(); + let mut opensmiles_examples = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + opensmiles_examples.push("test"); + opensmiles_examples.push("opensmiles.smi"); + let examples = fs::read_to_string(opensmiles_examples.as_path()) + .expect("failed to open opensmiles.smi"); + for line in examples.lines().filter(|x| !x.starts_with("#")) { + dbg!(&line); + assert!(all_matches(&pretok, line)); + } + } +} From b3f7eacc050f2ed046e895d7cbe4c6605f103509 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Thu, 23 Apr 2026 16:36:17 -0400 Subject: [PATCH 05/52] add big smiles tokenizer to wrapper --- python/smirk/__init__.py | 36 ++++++++++++++++++++++++++++++------ src/pre_tokenizers/mod.rs | 3 +++ src/tokenizer.rs | 22 +++++++++++++++++----- src/wrapper.rs | 16 +++++++++++++++- 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/python/smirk/__init__.py b/python/smirk/__init__.py index 196af33..61bb6da 100644 --- a/python/smirk/__init__.py +++ b/python/smirk/__init__.py @@ -15,7 +15,6 @@ TruncationStrategy, ) from transformers.tokenization_utils_fast import TOKENIZER_FILE -from transformers.utils import add_code_sample_docstrings from . import smirk as rs_smirk @@ -28,8 +27,8 @@ "cls_token": "[CLS]", "mask_token": "[MASK]", } -""" Default Special tokens used by the :py:class:`SmirkTokenizerFast` -and :py:func:`SmirkSelfiesFast` tokenizers. +""" Default special tokens used by :py:class:`SmirkTokenizerFast`, +:py:class:`SmirkBigSmilesFast`, and :py:func:`SmirkSelfiesFast`. """ @@ -162,9 +161,9 @@ def num_special_tokens_to_add(self, pair: bool = False) -> int: return len(self.build_inputs_with_special_tokens([], [] if pair else None)) def __check_encode_kwargs(self, kwargs): - assert ( - kwargs.pop("return_overflowing_tokens", False) is False - ), "Not implemented" + assert kwargs.pop("return_overflowing_tokens", False) is False, ( + "Not implemented" + ) assert kwargs.pop("split_special_tokens", False) is False, "Not implemented" assert kwargs.pop("is_split_into_words", False) is False, "Not implemented" @@ -342,6 +341,31 @@ def _save_pretrained( AutoTokenizer.register("SmirkTokenizer", fast_tokenizer_class=SmirkTokenizerFast) +class SmirkBigSmilesFast(SmirkTokenizerFast): + def __init__(self, tokenizer_file: Optional[os.PathLike] = None, **kwargs): + """ + A Chemically-Complete Tokenizer for core BigSMILES line notation. + + :param tokenizer_file: Path to a JSON serialize SmirkTokenizerFast tokenizers + :param kwargs: Additional kwargs are passed to :py:class:`SmirkTokenizerFast` + """ + default_vocab_file = files("smirk").joinpath("vocab_bigsmiles.json") + if tokenizer := kwargs.pop("tokenizer", None): + tokenizer = tokenizer + elif tokenizer_file: + tokenizer = rs_smirk.SmirkTokenizer.from_file(str(tokenizer_file)) + kwargs["tokenizer_file"] = str(tokenizer_file) + elif vocab_file := kwargs.pop("vocab_file", default_vocab_file): + tokenizer = rs_smirk.SmirkTokenizer.from_vocab( + str(vocab_file), bigsmiles=True + ) + kwargs["vocab_file"] = str(vocab_file) + else: + tokenizer = rs_smirk.SmirkTokenizer(bigsmiles=True) + + super().__init__(tokenizer=tokenizer, **kwargs) + + def SmirkSelfiesFast( vocab_file: Optional[os.PathLike] = None, unk_token="[UNK]", **kwargs ) -> PreTrainedTokenizerFast: diff --git a/src/pre_tokenizers/mod.rs b/src/pre_tokenizers/mod.rs index d2db9db..ded1c4d 100644 --- a/src/pre_tokenizers/mod.rs +++ b/src/pre_tokenizers/mod.rs @@ -1,9 +1,12 @@ +mod bigsmirk; mod smirk; +mod split_bigsmiles; mod split_smiles; use tokenizers::pre_tokenizers::split::{Split, SplitPattern}; use tokenizers::SplitDelimiterBehavior; +pub use bigsmirk::BigSmirkPreTokenizer; pub use smirk::SmirkPreTokenizer; pub fn split_structure() -> Split { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 41c61f8..4184840 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,7 +1,7 @@ use std::collections::{HashMap, HashSet}; use crate::gpe::{GpeTrainer, GPE}; -use crate::pre_tokenizers::{split_structure, SmirkPreTokenizer}; +use crate::pre_tokenizers::{split_structure, BigSmirkPreTokenizer, SmirkPreTokenizer}; use crate::wrapper::{ModelWrapper, PreTokenizerWrapper, TrainerWrapper}; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -51,7 +51,13 @@ fn normalizer() -> normalizers::Sequence { #[pymethods] impl SmirkTokenizer { #[new] - fn __new__() -> Self { + #[pyo3(signature = (bigsmiles = false))] + fn __new__(bigsmiles: bool) -> Self { + let pre_tokenizer: PreTokenizerWrapper = if bigsmiles { + BigSmirkPreTokenizer::default().into() + } else { + SmirkPreTokenizer::default().into() + }; let tokenizer: Tokenizer = TokenizerBuilder::new() .with_model( WordLevel::builder() @@ -60,7 +66,7 @@ impl SmirkTokenizer { .unwrap() .into(), ) - .with_pre_tokenizer(Some(SmirkPreTokenizer::default().into())) + .with_pre_tokenizer(Some(pre_tokenizer)) .with_normalizer(Some(normalizer().into())) .with_decoder(Some(Fuse::default().into())) .build() @@ -77,11 +83,17 @@ impl SmirkTokenizer { } #[staticmethod] - fn from_vocab(file: &str) -> Self { + #[pyo3(signature = (file, bigsmiles = false))] + fn from_vocab(file: &str, bigsmiles: bool) -> Self { + let pre_tokenizer: PreTokenizerWrapper = if bigsmiles { + BigSmirkPreTokenizer::default().into() + } else { + SmirkPreTokenizer::default().into() + }; let model = WordLevel::from_file(file, "[UNK]".to_string()).unwrap(); let tokenizer = TokenizerBuilder::new() .with_model(model.into()) - .with_pre_tokenizer(Some(SmirkPreTokenizer::default().into())) + .with_pre_tokenizer(Some(pre_tokenizer)) .with_normalizer(Some(normalizer().into())) .with_decoder(Some(Fuse::new().into())) .build() diff --git a/src/wrapper.rs b/src/wrapper.rs index ec6e626..4ebfb6d 100644 --- a/src/wrapper.rs +++ b/src/wrapper.rs @@ -2,12 +2,13 @@ use serde::{Deserialize, Serialize}; use tokenizers::tokenizer::{Model, PreTokenizedString, PreTokenizer, Result, Trainer}; use crate::gpe::{GpeTrainer, GPE}; -use crate::pre_tokenizers::SmirkPreTokenizer; +use crate::pre_tokenizers::{BigSmirkPreTokenizer, SmirkPreTokenizer}; #[derive(Deserialize, Serialize, Clone, Debug, PartialEq)] #[serde(untagged)] pub enum PreTokenizerWrapper { PreTokenizer(tokenizers::PreTokenizerWrapper), + BigSmirkPreTokenizer(BigSmirkPreTokenizer), SmirkPreTokenizer(SmirkPreTokenizer), } @@ -15,6 +16,7 @@ impl PreTokenizer for PreTokenizerWrapper { fn pre_tokenize(&self, pretokenized: &mut PreTokenizedString) -> Result<()> { match self { Self::PreTokenizer(t) => t.pre_tokenize(pretokenized), + Self::BigSmirkPreTokenizer(t) => t.pre_tokenize(pretokenized), Self::SmirkPreTokenizer(t) => t.pre_tokenize(pretokenized), } } @@ -26,6 +28,12 @@ impl From for PreTokenizerWrapper { } } +impl From for PreTokenizerWrapper { + fn from(value: BigSmirkPreTokenizer) -> Self { + Self::BigSmirkPreTokenizer(value) + } +} + impl From for PreTokenizerWrapper { fn from(value: tokenizers::PreTokenizerWrapper) -> Self { Self::PreTokenizer(value) @@ -201,4 +209,10 @@ mod test { check_serde(&pretok.0.clone()); check_serde(&pretok); } + + #[test] + fn serialize_bigsmirk_pretok() { + let pretok = PreTokenizerWrapper::BigSmirkPreTokenizer(BigSmirkPreTokenizer::default()); + check_serde(&pretok); + } } From 56002c683093313c74acf7667449eae5c7cbf39c Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Thu, 23 Apr 2026 16:39:03 -0400 Subject: [PATCH 06/52] add python tests --- test/test_tokenize_bigsmiles.py | 95 +++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 test/test_tokenize_bigsmiles.py diff --git a/test/test_tokenize_bigsmiles.py b/test/test_tokenize_bigsmiles.py new file mode 100644 index 0000000..705c941 --- /dev/null +++ b/test/test_tokenize_bigsmiles.py @@ -0,0 +1,95 @@ +import json +from tempfile import NamedTemporaryFile + +import pytest + +import smirk +from smirk.smirk import SmirkTokenizer + + +def _assert_pretokenize( + tokenizer: SmirkTokenizer, text: str, expected_tokens: list[str] +) -> None: + assert tokenizer.pretokenize(text) == expected_tokens + + +def _tokens(spec: str) -> list[str]: + return spec.split() + + +@pytest.fixture +def bigsmiles_tokenizer() -> SmirkTokenizer: + return SmirkTokenizer(bigsmiles=True) + + +@pytest.fixture +def smiles_tokenizer() -> SmirkTokenizer: + return SmirkTokenizer(bigsmiles=False) + + +@pytest.mark.parametrize( + "bigsmiles_batch", + [ + ["{[$]CC[$]}"], + ["{[$]CC[$],[$]C(C)C[$]}", "{[<]CC[>]}"], + ["[$1]", "[<2]", "[]", "{[]CC[$]}", "{[$]CC[$];C[$],[$]C}"], + ["CC{[$]CC[$]}CC", "{[$]CC(c1ccccc1)[$]}"], + ["{[>]CCCCCC(=O)[<],[>]NCCCCCCN[<]}", "{[$]CC[$]}{[$]CC(C)[$]}"], + ], +) +def test_bigsmiles_roundtrip_batch_decode(bigsmiles_batch): + bigsmirk = smirk.SmirkBigSmilesFast() + encoded = bigsmirk(bigsmiles_batch, add_special_tokens=False) + decoded = bigsmirk.batch_decode(encoded["input_ids"], skip_special_tokens=True) + assert decoded == bigsmiles_batch + + +@pytest.mark.parametrize( + ("text", "expected_tokens"), + [ + ("OC[C@@H]", _tokens("O C [ C @@ H ]")), + ("C[C@H](N)C(=O)O", _tokens("C [ C @ H ] ( N ) C ( = O ) O")), + ], +) +def test_smiles_tokens_match_between_modes( + bigsmiles_tokenizer, smiles_tokenizer, text, expected_tokens +): + _assert_pretokenize(bigsmiles_tokenizer, text, expected_tokens) + _assert_pretokenize(smiles_tokenizer, text, expected_tokens) + + +@pytest.mark.parametrize( + "bigsmiles,expected_type", + [ + (True, "BigSmirkPreTokenizer"), + (False, None), + ], +) +def test_tokenizer_serialize_pretokenizer_type(bigsmiles, expected_type): + tokenizer = SmirkTokenizer(bigsmiles=bigsmiles) + config = json.loads(tokenizer.to_str()) + assert "pre_tokenizer" in config + + if expected_type is None: + assert "type" not in config["pre_tokenizer"] + else: + assert config["pre_tokenizer"].get("type") == expected_type + + +@pytest.mark.parametrize( + "text", + [ + "{[$]CC[$]}", + "{[<]CC[>]}", + "{[]CC[$]}", + "{[$]CC[$],[$]C(C)C[$]}", + ], +) +def test_bigsmiles_tokenizer_save_load(bigsmiles_tokenizer, text): + with NamedTemporaryFile("w", suffix=".json", delete=False) as file: + bigsmiles_tokenizer.save(file.name) + loaded = SmirkTokenizer.from_file(file.name) + + original_splits = bigsmiles_tokenizer.pretokenize(text) + loaded_splits = loaded.pretokenize(text) + assert original_splits == loaded_splits From 26d284b17c17657ab95b5a287621f7c51428f08a Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Thu, 23 Apr 2026 16:50:03 -0400 Subject: [PATCH 07/52] formatting and first pass demo --- docs/big_smirk_demo.ipynb | 393 ++++++++++++++++++++++++++ python/smirk/__init__.py | 6 +- src/pre_tokenizers/bigsmirk.rs | 35 +-- src/pre_tokenizers/split_bigsmiles.rs | 34 +-- test/test_tokenize_bigsmiles.py | 1 - 5 files changed, 427 insertions(+), 42 deletions(-) create mode 100644 docs/big_smirk_demo.ipynb diff --git a/docs/big_smirk_demo.ipynb b/docs/big_smirk_demo.ipynb new file mode 100644 index 0000000..c824e9b --- /dev/null +++ b/docs/big_smirk_demo.ipynb @@ -0,0 +1,393 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# BigSMILES and BigSmirk\n", + "\n", + "\n", + " \"Open\n", + "\n", + "\n", + " \"Binder\"\n", + "\n", + "\n", + "\n", + "\n", + "BigSmirk tokenizes the [BigSMILES] encoding for macromolecules all the way down to their constituent elements.\n", + "\n", + "Let's see it in action!\n", + "\n", + "[BigSMILES]: https://olsenlabmit.github.io/BigSMILES/docs/#the-bigsmiles-project" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "🐍 Installation is easy with pre-build binaries on [PyPI](https://pypi.org/project/smirk/) and [GitHub](https://github.com/BattModels/smirk/releases). Just run: `pip install smirk`\n", + "\n", + "> Installing from source? See [installing from source](./developer.md#installing-from-source) for instructions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "!python -m pip install smirk transformers" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "## First steps\n", + "\n", + "🤗 smirk subclasses Hugging Face's [PreTrainedTokenizerBase](#transformers.PreTrainedTokenizerBase) for seamless compatibility and leverages [Tokenizers] for raw rust-powered speed. No need to learn another framework; everything works out of the box 🎁\n", + "\n", + "[Tokenizers]: https://huggingface.co/docs/tokenizers/index" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input_ids': [159, 148, 150, 148, 2, 150, 45, 45, 148, 2, 150, 161, 148, 2, 150, 45, 45, 4, 45, 45, 5, 148, 2, 150, 148, 150, 160], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from smirk import SmirkBigSmilesFast\n", + "\n", + "# Just import, set the bigsmiles argument and tokenize!\n", + "bigsmirk = SmirkBigSmilesFast()\n", + "bigsmirk(\"{[][$]CC[$],[$]CC(CC)[$][]}\") # ethylene butene copolymer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input_ids': [[148, 71, 150, 102, 159, 148, 164, 150, 148, 163, 150, 45, 4, 22, 102, 5, 45, 45, 45, 45, 45, 4, 22, 102, 5, 148, 163, 150, 161, 148, 164, 150, 93, 45, 45, 45, 45, 45, 45, 93, 148, 164, 150, 148, 163, 150, 160, 148, 71, 150], [159, 148, 150, 148, 163, 150, 102, 45, 45, 148, 164, 150, 148, 163, 150, 160, 159, 148, 164, 150, 148, 163, 150, 102, 45, 4, 45, 5, 45, 148, 164, 150, 148, 150, 160, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168], [159, 148, 150, 148, 163, 150, 45, 4, 22, 102, 5, 153, 12, 153, 153, 153, 4, 153, 153, 12, 5, 45, 4, 22, 102, 5, 148, 163, 150, 161, 148, 164, 150, 102, 45, 45, 102, 148, 164, 150, 148, 150, 160, 168, 168, 168, 168, 168, 168, 168], [159, 163, 10, 45, 22, 45, 10, 153, 12, 153, 153, 4, 45, 45, 45, 45, 45, 45, 5, 153, 4, 153, 153, 12, 45, 45, 45, 45, 45, 45, 5, 10, 45, 22, 45, 10, 153, 13, 153, 153, 153, 4, 153, 153, 13, 5, 164, 160, 168, 168]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Batch Tokenization with Padding\n", + "batch = bigsmirk([\n", + " \"[H]O{[>][<]C(=O)CCCCC(=O)[<],[>]NCCCCCCN[>][<]}[H]\", # nylon-6,6\n", + " \"{[][<]OCC[>][<]}{[>][<]OC(C)C[>][]}\", # block copolymer\n", + " \"{[][<]C(=O)c1ccc(cc1)C(=O)[<],[>]OCCO[>][]}\", # alternation co-polymer\n", + "], padding=\"longest\")\n", + "batch" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['[H]O{[>][<]C(=O)CCCCC(=O)[<],[>]NCCCCCCN[>][<]}[H]',\n", + " '{[][<]OCC[>][<]}{[>][<]OC(C)C[>][]}',\n", + " '{[][<]C(=O)c1ccc(cc1)C(=O)[<],[>]OCCO[>][]}',\n", + " '{}']" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Back to polymers!\n", + "bigsmirk.batch_decode(batch[\"input_ids\"], skip_special_tokens=True)" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "## Zero to Polymer Foundation Model with Smirk!" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Let's train a small [RoBERTa] model on polymers from [S. Choi et al., 2024] using Hugging Face and smirk.\n", + "\n", + "[RoBERTa]: https://doi.org/10.48550/ARXIV.1907.11692\n", + "[S. Choi et al., 2024]:https://www.nature.com/articles/s41597-024-03212-4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-output", + "remove-stderr" + ] + }, + "outputs": [], + "source": [ + "!python -m pip install accelerate datasets torch" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "### Dataset Preprocessing" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "0a1e9444", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "202 text/html; charset=UTF-8 https://springernature.figshare.com/ndownloader/files/42507037 \n" + ] + } + ], + "source": [ + "print(r.status_code, r.headers.get(\"Content-Type\"), r.url, r.text[:500])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "15", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-stderr" + ] + }, + "outputs": [ + { + "ename": "BadZipFile", + "evalue": "File is not a zip file", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mBadZipFile\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 17\u001b[39m\n\u001b[32m 13\u001b[39m r.raise_for_status()\n\u001b[32m 14\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m open(z, \u001b[33m\"wb\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[32m 15\u001b[39m f.write(r.content)\n\u001b[32m 16\u001b[39m \n\u001b[32m---> \u001b[39m\u001b[32m17\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m zipfile.ZipFile(z) \u001b[38;5;28;01mas\u001b[39;00m zf:\n\u001b[32m 18\u001b[39m zf.extractall(d)\n\u001b[32m 19\u001b[39m dataset=load_dataset(\u001b[33m\"csv\"\u001b[39m, data_files=[os.path.join(d,\u001b[33m\"JCIM_sup_bigsmiles.csv\"\u001b[39m), os.path.join(d,\u001b[33m\"Bicerano_bigsmiles.csv\"\u001b[39m)])[\u001b[33m\"train\"\u001b[39m].select_columns([\u001b[33m\"BigSMILES\"\u001b[39m]).train_test_split(test_size=\u001b[32m0.2\u001b[39m)\n\u001b[32m 20\u001b[39m dataset=dataset.map(bigsmirk, input_columns=[\u001b[33m\"BigSMILES\"\u001b[39m], desc=\u001b[33m\"Tokenizing\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/zipfile/__init__.py:1349\u001b[39m, in \u001b[36mZipFile.__init__\u001b[39m\u001b[34m(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps, metadata_encoding)\u001b[39m\n\u001b[32m 1347\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 1348\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m mode == \u001b[33m'\u001b[39m\u001b[33mr\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m-> \u001b[39m\u001b[32m1349\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_RealGetContents\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1350\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m mode \u001b[38;5;129;01min\u001b[39;00m (\u001b[33m'\u001b[39m\u001b[33mw\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mx\u001b[39m\u001b[33m'\u001b[39m):\n\u001b[32m 1351\u001b[39m \u001b[38;5;66;03m# set the modified flag so central directory gets written\u001b[39;00m\n\u001b[32m 1352\u001b[39m \u001b[38;5;66;03m# even if no files are added to the archive\u001b[39;00m\n\u001b[32m 1353\u001b[39m \u001b[38;5;28mself\u001b[39m._didModify = \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/zipfile/__init__.py:1416\u001b[39m, in \u001b[36mZipFile._RealGetContents\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1414\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m BadZipFile(\u001b[33m\"\u001b[39m\u001b[33mFile is not a zip file\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 1415\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m endrec:\n\u001b[32m-> \u001b[39m\u001b[32m1416\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m BadZipFile(\u001b[33m\"\u001b[39m\u001b[33mFile is not a zip file\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 1417\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.debug > \u001b[32m1\u001b[39m:\n\u001b[32m 1418\u001b[39m \u001b[38;5;28mprint\u001b[39m(endrec)\n", + "\u001b[31mBadZipFile\u001b[39m: File is not a zip file" + ] + } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "import tempfile\n", + "import zipfile\n", + "import requests\n", + "import os\n", + "d=tempfile.mkdtemp()\n", + "z=os.path.join(d,\"with_Tg.zip\")\n", + "url = \"https://springernature.figshare.com/ndownloader/files/42507037\"\n", + "headers = {\"User-Agent\": \"Mozilla/5.0\"}\n", + "\n", + "r = requests.get(url, headers=headers, allow_redirects=True, timeout=60)\n", + "r.raise_for_status()\n", + "with open(z, \"wb\") as f:\n", + " f.write(r.content)\n", + "\n", + "with zipfile.ZipFile(z) as zf:\n", + " zf.extractall(d)\n", + "dataset=load_dataset(\"csv\", data_files=[os.path.join(d,\"JCIM_sup_bigsmiles.csv\"), os.path.join(d,\"Bicerano_bigsmiles.csv\")])[\"train\"].select_columns([\"BigSMILES\"]).train_test_split(test_size=0.2)\n", + "dataset=dataset.map(bigsmirk, input_columns=[\"BigSMILES\"], desc=\"Tokenizing\")" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "> 💡 huggingface/tokenizers may raise a warning about being forked as we've already used our tokenizers (this isn't a smirk issue).\n", + "> It's harmless, but when actually training it's best to avoid tokenization until after the fork to benefit from the rust-level parallelism\n", + "\n", + "🎉 That's it! We've tokenized all of QM9 using smirk!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "dataset[\"train\"].to_pandas().head()" + ] + }, + { + "cell_type": "markdown", + "id": "18", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "### Training\n", + "Once we've tokenized the dataset, training the model is just a matter of configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-stderr" + ] + }, + "outputs": [], + "source": [ + "from accelerate import Accelerator\n", + "from transformers import Trainer, TrainingArguments, RobertaForMaskedLM, RobertaConfig, DataCollatorForLanguageModeling\n", + "\n", + "# A very small model for demonstrating training a molecular foundation model with smirk \n", + "config = RobertaConfig(\n", + " vocab_size=len(smirk),\n", + " hidden_size=256,\n", + " intermediate_size=1024,\n", + " num_hidden_layers=4,\n", + " num_attention_heads=4,\n", + ")\n", + "model = RobertaForMaskedLM(config)\n", + "\n", + "# Setup up the trainer to use our dataset\n", + "trainer = Trainer(\n", + " model=model,\n", + " train_dataset=dataset[\"train\"],\n", + " eval_dataset=dataset[\"test\"],\n", + " processing_class=smirk,\n", + " data_collator=DataCollatorForLanguageModeling(smirk), # The data collator needs to know about our tokenizer\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "skip-execution" + ] + }, + "outputs": [], + "source": [ + "trainer.train()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python/smirk/__init__.py b/python/smirk/__init__.py index 61bb6da..edd28af 100644 --- a/python/smirk/__init__.py +++ b/python/smirk/__init__.py @@ -161,9 +161,9 @@ def num_special_tokens_to_add(self, pair: bool = False) -> int: return len(self.build_inputs_with_special_tokens([], [] if pair else None)) def __check_encode_kwargs(self, kwargs): - assert kwargs.pop("return_overflowing_tokens", False) is False, ( - "Not implemented" - ) + assert ( + kwargs.pop("return_overflowing_tokens", False) is False + ), "Not implemented" assert kwargs.pop("split_special_tokens", False) is False, "Not implemented" assert kwargs.pop("is_split_into_words", False) is False, "Not implemented" diff --git a/src/pre_tokenizers/bigsmirk.rs b/src/pre_tokenizers/bigsmirk.rs index 53d2318..d2fd673 100644 --- a/src/pre_tokenizers/bigsmirk.rs +++ b/src/pre_tokenizers/bigsmirk.rs @@ -10,7 +10,6 @@ use tokenizers::tokenizer::{ Offsets, PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior, }; - #[derive(Clone)] pub struct BigSmirkPreTokenizer { outer: Regex, @@ -328,7 +327,10 @@ pub mod tests { ["[", "Na", "+", "]", ".", "[", "Cl", "-", "]"] ); assert_eq!(get_split_tokens(&pretok, "CC-O"), ["C", "C", "-", "O"]); - assert_eq!(get_split_tokens(&pretok, "O=C=O"), ["O", "=", "C", "=", "O"]); + assert_eq!( + get_split_tokens(&pretok, "O=C=O"), + ["O", "=", "C", "=", "O"] + ); assert_eq!(get_split_tokens(&pretok, "C#N"), ["C", "#", "N"]); assert_eq!( get_split_tokens(&pretok, "c1ccccc1"), @@ -376,9 +378,9 @@ pub mod tests { assert_eq!( get_split_tokens(&pretok, "{[>]CCCCCC(=O)[<],[>]NCCCCCCN[<]}"), [ - "{", "[", ">", "]", "C", "C", "C", "C", "C", "C", "(", "=", "O", ")", "[", - "<", "]", ",", "[", ">", "]", "N", "C", "C", "C", "C", "C", "C", "N", "[", - "<", "]", "}" + "{", "[", ">", "]", "C", "C", "C", "C", "C", "C", "(", "=", "O", ")", "[", "<", + "]", ",", "[", ">", "]", "N", "C", "C", "C", "C", "C", "C", "N", "[", "<", "]", + "}" ] ); } @@ -386,14 +388,8 @@ pub mod tests { #[test] fn test_indexed_bonding_descriptors() { let pretok = BigSmirkPreTokenizer::default(); - assert_eq!( - get_split_tokens(&pretok, "[$1]"), - ["[", "$", "1", "]"] - ); - assert_eq!( - get_split_tokens(&pretok, "[$2]"), - ["[", "$", "2", "]"] - ); + assert_eq!(get_split_tokens(&pretok, "[$1]"), ["[", "$", "1", "]"]); + assert_eq!(get_split_tokens(&pretok, "[$2]"), ["[", "$", "2", "]"]); assert_eq!(get_split_tokens(&pretok, "[<1]"), ["[", "<", "1", "]"]); assert_eq!(get_split_tokens(&pretok, "[>1]"), ["[", ">", "1", "]"]); @@ -401,8 +397,8 @@ pub mod tests { assert_eq!( get_split_tokens(&pretok, "{[$1]CC[$1],[$2]C(C)C[$2]}"), [ - "{", "[", "$", "1", "]", "C", "C", "[", "$", "1", "]", ",", "[", "$", "2", - "]", "C", "(", "C", ")", "C", "[", "$", "2", "]", "}" + "{", "[", "$", "1", "]", "C", "C", "[", "$", "1", "]", ",", "[", "$", "2", "]", + "C", "(", "C", ")", "C", "[", "$", "2", "]", "}" ] ); } @@ -423,8 +419,8 @@ pub mod tests { assert_eq!( get_split_tokens(&pretok, "{[<1[<1]1]CC[>1[>1]1]}"), [ - "{", "[", "<", "1", "[", "<", "1", "]", "1", "]", "C", "C", "[", ">", "1", - "[", ">", "1", "]", "1", "]", "}" + "{", "[", "<", "1", "[", "<", "1", "]", "1", "]", "C", "C", "[", ">", "1", "[", + ">", "1", "]", "1", "]", "}" ] ); } @@ -519,10 +515,7 @@ pub mod tests { #[test] fn test_fragment_reference() { let pretok = BigSmirkPreTokenizer::default(); - assert_eq!( - get_split_tokens(&pretok, "[#PEG]"), - ["[", "#", "PEG", "]"] - ); + assert_eq!(get_split_tokens(&pretok, "[#PEG]"), ["[", "#", "PEG", "]"]); assert_eq!( get_split_tokens(&pretok, "[#Styrene]"), ["[", "#", "Styrene", "]"] diff --git a/src/pre_tokenizers/split_bigsmiles.rs b/src/pre_tokenizers/split_bigsmiles.rs index 6e75d14..ae8bacf 100644 --- a/src/pre_tokenizers/split_bigsmiles.rs +++ b/src/pre_tokenizers/split_bigsmiles.rs @@ -39,13 +39,13 @@ const CHIRAL: &'static str = r"@(?:@|AL|OH|SP|T(?:B|H))?"; pub const MATCH_OUTER_BIGSMILES: &'static str = concat!( r"Br?|Cl?|F|I|N|O|P|S|", // organic subset elements - r"b|c|n|o|p|s|", // Aromatic organic subset - r"\*|", // Wildcard - r"[\.\-=\#\$:/\\]|", // Bonds - r"\d|%|", // Ring closures + r"b|c|n|o|p|s|", // Aromatic organic subset + r"\*|", // Wildcard + r"[\.\-=\#\$:/\\]|", // Bonds + r"\d|%|", // Ring closures r"\(|\)|", - r"\{|\}|", // Stochastic object delimiters - r",|;|", // Repeat unit separator and end group separator + r"\{|\}|", // Stochastic object delimiters + r",|;|", // Repeat unit separator and end group separator r"\[(?:[^\[\]]+|\[[^\[\]]*\])*\]", // Bracketed atoms/descriptors ); @@ -59,25 +59,25 @@ pub const MATCH_INNER_BIGSMILES: &'static str = formatcp!(concat!( r"|", r"(#)([!-~]+)", r"|", - r"(\d+)?", - r"({BRACKETED_SYMBOL})", - r"(?:({CHIRAL})(\d{{1,2}})?)?", - r"(?:(H)(\d)?)?", - r"(?:([+-]{{1,2}})(\d{{1,2}})?)?", - r"(?:(:)(\d+))?", + r"(\d+)?", + r"({BRACKETED_SYMBOL})", + r"(?:({CHIRAL})(\d{{1,2}})?)?", + r"(?:(H)(\d)?)?", + r"(?:([+-]{{1,2}})(\d{{1,2}})?)?", + r"(?:(:)(\d+))?", r")$", )); pub const BONDING_DESCRIPTOR: &'static str = concat!( - r"(\$|<|>)", // Descriptor type - r"(\d+)?", // Optional index + r"(\$|<|>)", // Descriptor type + r"(\d+)?", // Optional index ); pub const LADDER_BONDING_DESCRIPTOR: &'static str = concat!( - r"(\$|<|>)", // Outer descriptor type - r"(\d+)?", // Outer descriptor id + r"(\$|<|>)", // Outer descriptor type + r"(\d+)?", // Outer descriptor id r"(\[)(\$|<|>)(\d+)?(\])", // Inner descriptor - r"(\d+)", // Group id + r"(\d+)", // Group id ); pub const FRAGMENT_REFERENCE: &'static str = r"(#)([!-~]+)"; diff --git a/test/test_tokenize_bigsmiles.py b/test/test_tokenize_bigsmiles.py index 705c941..e2a8f7b 100644 --- a/test/test_tokenize_bigsmiles.py +++ b/test/test_tokenize_bigsmiles.py @@ -2,7 +2,6 @@ from tempfile import NamedTemporaryFile import pytest - import smirk from smirk.smirk import SmirkTokenizer From c860816093bff067aafc0e659916b7378789a404 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Thu, 23 Apr 2026 19:27:46 -0400 Subject: [PATCH 08/52] add additional tokenization and MLM demo in nbk --- docs/big_smirk_demo.ipynb | 249 ++++++++++++++++++++++---------------- 1 file changed, 146 insertions(+), 103 deletions(-) diff --git a/docs/big_smirk_demo.ipynb b/docs/big_smirk_demo.ipynb index c824e9b..3cbf79d 100644 --- a/docs/big_smirk_demo.ipynb +++ b/docs/big_smirk_demo.ipynb @@ -7,10 +7,10 @@ "source": [ "# BigSMILES and BigSmirk\n", "\n", - "\n", + "\n", " \"Open\n", "\n", - "\n", + "\n", " \"Binder\"\n", "\n", "\n", @@ -62,25 +62,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "4", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'input_ids': [159, 148, 150, 148, 2, 150, 45, 45, 148, 2, 150, 161, 148, 2, 150, 45, 45, 4, 45, 45, 5, 148, 2, 150, 148, 150, 160], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from smirk import SmirkBigSmilesFast\n", "\n", - "# Just import, set the bigsmiles argument and tokenize!\n", + "# Just import and tokenize!\n", "bigsmirk = SmirkBigSmilesFast()\n", "bigsmirk(\"{[][$]CC[$],[$]CC(CC)[$][]}\") # ethylene butene copolymer" ] @@ -90,18 +79,7 @@ "execution_count": null, "id": "5", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'input_ids': [[148, 71, 150, 102, 159, 148, 164, 150, 148, 163, 150, 45, 4, 22, 102, 5, 45, 45, 45, 45, 45, 4, 22, 102, 5, 148, 163, 150, 161, 148, 164, 150, 93, 45, 45, 45, 45, 45, 45, 93, 148, 164, 150, 148, 163, 150, 160, 148, 71, 150], [159, 148, 150, 148, 163, 150, 102, 45, 45, 148, 164, 150, 148, 163, 150, 160, 159, 148, 164, 150, 148, 163, 150, 102, 45, 4, 45, 5, 45, 148, 164, 150, 148, 150, 160, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168], [159, 148, 150, 148, 163, 150, 45, 4, 22, 102, 5, 153, 12, 153, 153, 153, 4, 153, 153, 12, 5, 45, 4, 22, 102, 5, 148, 163, 150, 161, 148, 164, 150, 102, 45, 45, 102, 148, 164, 150, 148, 150, 160, 168, 168, 168, 168, 168, 168, 168], [159, 163, 10, 45, 22, 45, 10, 153, 12, 153, 153, 4, 45, 45, 45, 45, 45, 45, 5, 153, 4, 153, 153, 12, 45, 45, 45, 45, 45, 45, 5, 10, 45, 22, 45, 10, 153, 13, 153, 153, 153, 4, 153, 153, 13, 5, 164, 160, 168, 168]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]}" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Batch Tokenization with Padding\n", "batch = bigsmirk([\n", @@ -114,29 +92,69 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "6", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['[H]O{[>][<]C(=O)CCCCC(=O)[<],[>]NCCCCCCN[>][<]}[H]',\n", - " '{[][<]OCC[>][<]}{[>][<]OC(C)C[>][]}',\n", - " '{[][<]C(=O)c1ccc(cc1)C(=O)[<],[>]OCCO[>][]}',\n", - " '{}']" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Back to polymers!\n", "bigsmirk.batch_decode(batch[\"input_ids\"], skip_special_tokens=True)" ] }, + { + "cell_type": "markdown", + "id": "21", + "metadata": {}, + "source": [ + "### Token Coloring Render\n", + "\n", + "Visualize BigSMILES token boundaries for PVC () and sPP (Syndiotactic Polypropylene) by coloring each token in sequence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "import hashlib\n", + "from html import escape\n", + "from IPython.display import HTML\n", + "\n", + "\n", + "def render_colored_tokens(text, tokenizer=bigsmirk):\n", + " tokens = tokenizer.tokenize(text)\n", + " palette = [\n", + " \"#f94144\", \"#f3722c\", \"#f8961e\", \"#f9844a\", \"#f9c74f\",\n", + " \"#90be6d\", \"#43aa8b\", \"#4d908e\", \"#577590\", \"#277da1\",\n", + " ]\n", + " spans = []\n", + " for tok in tokens:\n", + " digest = hashlib.sha1(tok.encode(\"utf-8\")).digest()\n", + " color = palette[int.from_bytes(digest[:2], \"big\") % len(palette)]\n", + " label = escape(tok)\n", + " spans.append(\n", + " f\"{label}\"\n", + " )\n", + " return HTML(\"
\" + \"\".join(spans) + \"
\")\n", + "\n", + "\n", + "pvc = \"{[][$]CC(Cl)[$][]}\"\n", + "render_colored_tokens(pvc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35086304", + "metadata": {}, + "outputs": [], + "source": [ + "spp = \"CC{[>][<]C[C@@H](C)C[C@H](C)[>];[<]C=CC,[<]C[C@H](C)C=CC[]}\"\n", + "render_colored_tokens(spp)" + ] + }, { "cell_type": "markdown", "id": "11", @@ -192,30 +210,22 @@ "tags": [] }, "source": [ - "### Dataset Preprocessing" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "0a1e9444", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "202 text/html; charset=UTF-8 https://springernature.figshare.com/ndownloader/files/42507037 \n" - ] - } - ], - "source": [ - "print(r.status_code, r.headers.get(\"Content-Type\"), r.url, r.text[:500])" + "### Dataset Preprocessing\n", + "\n", + "Download the dataset for generated by [Choi et al] from [figshare] and unzip it:\n", + "\n", + "```\n", + "curl -L -o with_Tg.zip \"https://springernature.figshare.com/ndownloader/files/42507037\"\n", + "unzip with_Tg.zip -d with_Tg\n", + "```\n", + "\n", + "[Choi et al]: https://www.nature.com/articles/s41597-024-03212-4#citeas\n", + "[figshare]: \"https://springernature.figshare.com/ndownloader/files/42507037\"" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "15", "metadata": { "editable": true, @@ -226,41 +236,14 @@ "remove-stderr" ] }, - "outputs": [ - { - "ename": "BadZipFile", - "evalue": "File is not a zip file", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mBadZipFile\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 17\u001b[39m\n\u001b[32m 13\u001b[39m r.raise_for_status()\n\u001b[32m 14\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m open(z, \u001b[33m\"wb\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[32m 15\u001b[39m f.write(r.content)\n\u001b[32m 16\u001b[39m \n\u001b[32m---> \u001b[39m\u001b[32m17\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m zipfile.ZipFile(z) \u001b[38;5;28;01mas\u001b[39;00m zf:\n\u001b[32m 18\u001b[39m zf.extractall(d)\n\u001b[32m 19\u001b[39m dataset=load_dataset(\u001b[33m\"csv\"\u001b[39m, data_files=[os.path.join(d,\u001b[33m\"JCIM_sup_bigsmiles.csv\"\u001b[39m), os.path.join(d,\u001b[33m\"Bicerano_bigsmiles.csv\"\u001b[39m)])[\u001b[33m\"train\"\u001b[39m].select_columns([\u001b[33m\"BigSMILES\"\u001b[39m]).train_test_split(test_size=\u001b[32m0.2\u001b[39m)\n\u001b[32m 20\u001b[39m dataset=dataset.map(bigsmirk, input_columns=[\u001b[33m\"BigSMILES\"\u001b[39m], desc=\u001b[33m\"Tokenizing\"\u001b[39m)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/zipfile/__init__.py:1349\u001b[39m, in \u001b[36mZipFile.__init__\u001b[39m\u001b[34m(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps, metadata_encoding)\u001b[39m\n\u001b[32m 1347\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 1348\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m mode == \u001b[33m'\u001b[39m\u001b[33mr\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m-> \u001b[39m\u001b[32m1349\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_RealGetContents\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1350\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m mode \u001b[38;5;129;01min\u001b[39;00m (\u001b[33m'\u001b[39m\u001b[33mw\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mx\u001b[39m\u001b[33m'\u001b[39m):\n\u001b[32m 1351\u001b[39m \u001b[38;5;66;03m# set the modified flag so central directory gets written\u001b[39;00m\n\u001b[32m 1352\u001b[39m \u001b[38;5;66;03m# even if no files are added to the archive\u001b[39;00m\n\u001b[32m 1353\u001b[39m \u001b[38;5;28mself\u001b[39m._didModify = \u001b[38;5;28;01mTrue\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/zipfile/__init__.py:1416\u001b[39m, in \u001b[36mZipFile._RealGetContents\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1414\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m BadZipFile(\u001b[33m\"\u001b[39m\u001b[33mFile is not a zip file\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 1415\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m endrec:\n\u001b[32m-> \u001b[39m\u001b[32m1416\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m BadZipFile(\u001b[33m\"\u001b[39m\u001b[33mFile is not a zip file\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 1417\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.debug > \u001b[32m1\u001b[39m:\n\u001b[32m 1418\u001b[39m \u001b[38;5;28mprint\u001b[39m(endrec)\n", - "\u001b[31mBadZipFile\u001b[39m: File is not a zip file" - ] - } - ], + "outputs": [], "source": [ "from datasets import load_dataset\n", - "\n", - "import tempfile\n", - "import zipfile\n", - "import requests\n", "import os\n", - "d=tempfile.mkdtemp()\n", - "z=os.path.join(d,\"with_Tg.zip\")\n", - "url = \"https://springernature.figshare.com/ndownloader/files/42507037\"\n", - "headers = {\"User-Agent\": \"Mozilla/5.0\"}\n", - "\n", - "r = requests.get(url, headers=headers, allow_redirects=True, timeout=60)\n", - "r.raise_for_status()\n", - "with open(z, \"wb\") as f:\n", - " f.write(r.content)\n", - "\n", - "with zipfile.ZipFile(z) as zf:\n", - " zf.extractall(d)\n", - "dataset=load_dataset(\"csv\", data_files=[os.path.join(d,\"JCIM_sup_bigsmiles.csv\"), os.path.join(d,\"Bicerano_bigsmiles.csv\")])[\"train\"].select_columns([\"BigSMILES\"]).train_test_split(test_size=0.2)\n", + "\n", + "# Location to unzipped data\n", + "data_dir = \"with_Tg\"\n", + "dataset=load_dataset(\"csv\", data_files=[os.path.join(data_dir,\"JCIM_sup_bigsmiles.csv\")])[\"train\"].select_columns([\"BigSMILES\"]).train_test_split(test_size=0.2)\n", "dataset=dataset.map(bigsmirk, input_columns=[\"BigSMILES\"], desc=\"Tokenizing\")" ] }, @@ -275,10 +258,10 @@ "tags": [] }, "source": [ - "> 💡 huggingface/tokenizers may raise a warning about being forked as we've already used our tokenizers (this isn't a smirk issue).\n", + "> 💡 Hugging Face/ Tokenizers may raise a warning about being forked as we've already used our tokenizers (this isn't a smirk issue).\n", "> It's harmless, but when actually training it's best to avoid tokenization until after the fork to benefit from the rust-level parallelism\n", "\n", - "🎉 That's it! We've tokenized all of QM9 using smirk!" + "🎉 That's it! We've tokenized all of the BigSMILES dataset using smirk!" ] }, { @@ -327,12 +310,11 @@ }, "outputs": [], "source": [ - "from accelerate import Accelerator\n", - "from transformers import Trainer, TrainingArguments, RobertaForMaskedLM, RobertaConfig, DataCollatorForLanguageModeling\n", + "from transformers import Trainer, RobertaForMaskedLM, RobertaConfig, DataCollatorForLanguageModeling\n", "\n", "# A very small model for demonstrating training a molecular foundation model with smirk \n", "config = RobertaConfig(\n", - " vocab_size=len(smirk),\n", + " vocab_size=len(bigsmirk),\n", " hidden_size=256,\n", " intermediate_size=1024,\n", " num_hidden_layers=4,\n", @@ -345,8 +327,8 @@ " model=model,\n", " train_dataset=dataset[\"train\"],\n", " eval_dataset=dataset[\"test\"],\n", - " processing_class=smirk,\n", - " data_collator=DataCollatorForLanguageModeling(smirk), # The data collator needs to know about our tokenizer\n", + " processing_class=bigsmirk,\n", + " data_collator=DataCollatorForLanguageModeling(bigsmirk), # The data collator needs to know about our tokenizer\n", ")" ] }, @@ -367,6 +349,67 @@ "source": [ "trainer.train()" ] + }, + { + "cell_type": "markdown", + "id": "23", + "metadata": {}, + "source": [ + "### MLM Example: Predict a Masked Token\n", + "\n", + "Mask one token in a BigSMILES string and ask the trained model for top predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "inference_model = trainer.model\n", + "inference_model.eval()\n", + "device = next(inference_model.parameters()).device\n", + "\n", + "sample = dataset[\"test\"][5][\"BigSMILES\"]\n", + "\n", + "# Encode and choose a position to mask\n", + "encoded = bigsmirk(sample, add_special_tokens=False)\n", + "input_ids = encoded[\"input_ids\"]\n", + "tokens = bigsmirk.convert_ids_to_tokens(input_ids)\n", + "mask_pos = len(tokens) // 2\n", + "\n", + "masked_ids = input_ids.copy()\n", + "masked_ids[mask_pos] = bigsmirk.mask_token_id\n", + "masked_tokens = bigsmirk.convert_ids_to_tokens(masked_ids)\n", + "\n", + "inputs = {\n", + " \"input_ids\": torch.tensor([masked_ids],device=device),\n", + " \"attention_mask\": torch.ones((1, len(masked_ids)), device=device),\n", + "}\n", + "\n", + "with torch.no_grad():\n", + " logits = inference_model(**inputs).logits[0, mask_pos].detach().cpu()\n", + " probs = torch.softmax(logits, dim=-1)\n", + "\n", + "top_k = 5\n", + "top_ids = torch.topk(probs, k=top_k).indices.tolist()\n", + "top_tokens = bigsmirk.convert_ids_to_tokens(top_ids)\n", + "\n", + "print(\"Original:\", sample)\n", + "print(\"Masked :\", \"\".join(masked_tokens))\n", + "print(f\"Masked token index: {mask_pos} (original token: {tokens[mask_pos]})\")\n", + "print(\"\\nTop predictions:\")\n", + "\n", + "for rank, (tok_id, tok) in enumerate(zip(top_ids, top_tokens), start=1):\n", + " candidate_ids = masked_ids.copy()\n", + " candidate_ids[mask_pos] = tok_id\n", + " candidate = bigsmirk.decode(candidate_ids, skip_special_tokens=True)\n", + " score = probs[tok_id].item()\n", + " print(f\"{rank}. token={tok!r:>4} p={score:.4f} -> {candidate}\")" + ] } ], "metadata": { From ab85a89b40107131789d83288f4759fc57f67a3a Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Thu, 23 Apr 2026 19:41:11 -0400 Subject: [PATCH 09/52] add API ref --- docs/api.rst | 8 +++++++- docs/big_smirk_demo.ipynb | 30 +++++++++++++++--------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 5a8a09e..4b67cab 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -33,7 +33,13 @@ API Reference .. seealso:: :py:meth:`transformers.PreTrainedTokenizerBase.batch_decode` for the 🤗 documentation -.. autofunction:: smirk.SmirkSelfiesFast +.. autoclass:: smirk.SmirkSelfiesFast + :special-members: + :members: + +.. autoclass:: smirk.SmirkBigSmilesFast + :special-members: + :members: .. autodata:: smirk.SPECIAL_TOKENS diff --git a/docs/big_smirk_demo.ipynb b/docs/big_smirk_demo.ipynb index 3cbf79d..33a1555 100644 --- a/docs/big_smirk_demo.ipynb +++ b/docs/big_smirk_demo.ipynb @@ -103,7 +103,7 @@ }, { "cell_type": "markdown", - "id": "21", + "id": "7", "metadata": {}, "source": [ "### Token Coloring Render\n", @@ -114,7 +114,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -147,7 +147,7 @@ { "cell_type": "code", "execution_count": null, - "id": "35086304", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -157,7 +157,7 @@ }, { "cell_type": "markdown", - "id": "11", + "id": "10", "metadata": {}, "source": [ "## Zero to Polymer Foundation Model with Smirk!" @@ -165,7 +165,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "11", "metadata": { "editable": true, "slideshow": { @@ -183,7 +183,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "12", "metadata": { "editable": true, "slideshow": { @@ -201,7 +201,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "13", "metadata": { "editable": true, "slideshow": { @@ -226,7 +226,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "14", "metadata": { "editable": true, "slideshow": { @@ -249,7 +249,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "15", "metadata": { "editable": true, "slideshow": { @@ -267,7 +267,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "16", "metadata": { "editable": true, "slideshow": { @@ -282,7 +282,7 @@ }, { "cell_type": "markdown", - "id": "18", + "id": "17", "metadata": { "editable": true, "slideshow": { @@ -298,7 +298,7 @@ { "cell_type": "code", "execution_count": null, - "id": "19", + "id": "18", "metadata": { "editable": true, "slideshow": { @@ -335,7 +335,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "19", "metadata": { "editable": true, "slideshow": { @@ -352,7 +352,7 @@ }, { "cell_type": "markdown", - "id": "23", + "id": "20", "metadata": {}, "source": [ "### MLM Example: Predict a Masked Token\n", @@ -363,7 +363,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24", + "id": "21", "metadata": {}, "outputs": [], "source": [ From d7eb1a8ed54a894553a33905bebe3c950c5ca074 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sat, 25 Apr 2026 22:11:27 -0400 Subject: [PATCH 10/52] docs: add path to BigSMILES spec --- python/smirk/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/smirk/__init__.py b/python/smirk/__init__.py index edd28af..5a05d4e 100644 --- a/python/smirk/__init__.py +++ b/python/smirk/__init__.py @@ -161,9 +161,9 @@ def num_special_tokens_to_add(self, pair: bool = False) -> int: return len(self.build_inputs_with_special_tokens([], [] if pair else None)) def __check_encode_kwargs(self, kwargs): - assert ( - kwargs.pop("return_overflowing_tokens", False) is False - ), "Not implemented" + assert kwargs.pop("return_overflowing_tokens", False) is False, ( + "Not implemented" + ) assert kwargs.pop("split_special_tokens", False) is False, "Not implemented" assert kwargs.pop("is_split_into_words", False) is False, "Not implemented" @@ -345,7 +345,8 @@ class SmirkBigSmilesFast(SmirkTokenizerFast): def __init__(self, tokenizer_file: Optional[os.PathLike] = None, **kwargs): """ A Chemically-Complete Tokenizer for core BigSMILES line notation. - + For a specification of of the reference see: + https://olsenlabmit.github.io/BigSMILES/docs/line_notation.html. :param tokenizer_file: Path to a JSON serialize SmirkTokenizerFast tokenizers :param kwargs: Additional kwargs are passed to :py:class:`SmirkTokenizerFast` """ From 3da58ecc8d5bc7ff70917a53532aea3ecc73a1f8 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sun, 26 Apr 2026 12:12:17 -0400 Subject: [PATCH 11/52] add updated vocab generator --- opt/build_vocab.py | 167 ++++++++++++++++++++++++++++++++++++--- python/smirk/__init__.py | 6 +- 2 files changed, 157 insertions(+), 16 deletions(-) diff --git a/opt/build_vocab.py b/opt/build_vocab.py index d157a6e..4d6fac1 100755 --- a/opt/build_vocab.py +++ b/opt/build_vocab.py @@ -29,6 +29,7 @@ CHIRAL_CONFIG = ["TH", "AL", "SP", "TB", "OH"] BONDS = [".", "-", "=", "#", "$", ":", "/", "\\"] DIGITS = [str(x) for x in range(10)] +BIGSMILES_EXTRA_TOKENS = ["{", "}", ",", ";", "<", ">"] def build_smiles_alphabet(): @@ -59,17 +60,23 @@ def build_smiles_alphabet(): return vocab -def const_str(name, regex, comment=None, public=False): +def const_str(name, regex, comment=None, public=False, separator="|", wrapper=None): out = f"const {name}: &'static str =" if isinstance(regex, list): - out += " concat!(\n" + if wrapper: + out += f" {wrapper}!(concat!(\n" + else: + out += " concat!(\n" for idx, r in enumerate(regex): out += f' r"{r}' if idx < len(regex) - 1: - out += "|" + out += separator out += '",\n' - out += ");" + if wrapper: + out += "));" + else: + out += ");" else: out += f' r"{regex}";' @@ -110,6 +117,35 @@ def merge_tokens(tokens): return sorted(out) +def merge_tokens_grouped(tokens): + branches = defaultdict(set) + for token in tokens: + assert len(token) in [1, 2] + if len(token) == 1: + branches[token[0]] |= {None} + else: + branches[token[0]].add(token[1]) + + out = [] + for leader, tail in branches.items(): + if None in tail: + tail -= {None} + if len(tail) == 0: + cr = leader + elif len(tail) == 1: + cr = f"{leader}{tail.pop()}?" + else: + cr = f"{leader}(?:{'|'.join(sorted(tail))})?" + else: + if len(tail) == 1: + cr = f"{leader}{tail.pop()}" + else: + cr = f"{leader}(?:{'|'.join(sorted(tail))})" + + out.append(cr) + return sorted(out) + + def match_chars(chars: list[str]): """Combine chars into a regex: `[chars]`, adding escapes as needed""" return "[" + re.escape("".join(chars)) + "]" @@ -145,6 +181,89 @@ def build_smiles_pretokenizer(): print(const_str("CHIRAL", f"@(?:@|{chiral})?")) +def build_bigsmiles_pretokenizer(): + print( + const_str( + "BRACKETED_SYMBOL", + [ + *merge_tokens_grouped(ELEMENT_SYMBOLS), + *merge_tokens_grouped(AROMATIC_SYMBOLS), + r"\*", + ], + ) + ) + chiral = "|".join(merge_tokens_grouped(CHIRAL_CONFIG)) + print(const_str("CHIRAL", f"@(?:@|{chiral})?")) + print( + const_str( + "MATCH_OUTER_BIGSMILES", + [ + "|".join(merge_tokens(ALIPHATIC_ORGANIC)), # organic subset elements + "|".join(merge_tokens(AROMATIC_ORGANIC)), # aromatic organic subset + r"\*", # wildcard + match_chars(BONDS), # bonds + r"\d|%", # ring closures + r"\(|\)", # branches + r"\{|\}", # stochastic object delimiters + r",|;", # repeat unit separator and end group separator + r"\[(?:[^\[\]]+|\[[^\[\]]*\])*\]", # bracketed atoms/descriptors + ], + public=True, + ) + ) + print( + const_str( + "MATCH_INNER_BIGSMILES", + [ + r"^(?:", + r"", + r"|", + r"(\$|<|>)(\d+)?", + r"|", + r"(\$|<|>)(\d+)?(\[)(\$|<|>)(\d+)?(\])(\d+)", + r"|", + r"(#)([!-~]+)", + r"|", + r"(\d+)?", + r"({BRACKETED_SYMBOL})", + r"(?:({CHIRAL})(\d{{1,2}})?)?", + r"(?:(H)(\d)?)?", + r"(?:([+-]{{1,2}})(\d{{1,2}})?)?", + r"(?:(:)(\d+))?", + r")$", + ], + public=True, + separator="", + wrapper="formatcp", + ) + ) + print( + const_str( + "BONDING_DESCRIPTOR", + [ + r"(\$|<|>)", # descriptor type + r"(\d+)?", # optional index + ], + public=True, + separator="", + ) + ) + print( + const_str( + "LADDER_BONDING_DESCRIPTOR", + [ + r"(\$|<|>)", # outer descriptor type + r"(\d+)?", # outer descriptor id + r"(\[)(\$|<|>)(\d+)?(\])", # inner descriptor + r"(\d+)", # group id + ], + public=True, + separator="", + ) + ) + print(const_str("FRAGMENT_REFERENCE", r"(#)([!-~]+)", public=True)) + + def build_selfies_pretokenizer(): print( "|".join( @@ -171,35 +290,57 @@ def build_selfies_alphabet(): return vocab +def build_bigsmiles_alphabet(): + vocab = build_smiles_alphabet() + vocab.update(BIGSMILES_EXTRA_TOKENS) + + return vocab + + def build_vocab(tokens: set): tokens = ["[UNK]", *sorted(tokens)] return {token: id for id, token in enumerate(tokens)} +def build_bigsmiles_vocab(): + vocab = build_vocab(build_smiles_alphabet()) + for token in BIGSMILES_EXTRA_TOKENS: + vocab[token] = len(vocab) + return vocab + + if __name__ == "__main__": p = argparse.ArgumentParser() p.add_argument("-t", "--type", choices=["vocab", "regex"], default="vocab") - p.add_argument("-f", "--format", choices=["smiles", "selfies"], default="smiles") + p.add_argument( + "-f", "--format", choices=["smiles", "bigsmiles", "selfies"], default="smiles" + ) p.add_argument("output", type=argparse.FileType("w"), default=sys.stdout, nargs="?") args = p.parse_args() if args.type == "vocab": - if args.format == "smiles": - alphabet = build_smiles_alphabet() - elif args.format == "selfies": - alphabet = build_selfies_alphabet() + if args.format == "bigsmiles": + vocab = build_bigsmiles_vocab() else: - # Argparse should catch this sooner - raise RuntimeError("Unknown format", args.format) + if args.format == "smiles": + alphabet = build_smiles_alphabet() + elif args.format == "selfies": + alphabet = build_selfies_alphabet() + else: + # Argparse should catch this sooner + raise RuntimeError("Unknown format", args.format) - # Convert enumerated glyphs to a vocab - vocab = build_vocab(alphabet) + # Convert enumerated glyphs to a vocab + vocab = build_vocab(alphabet) json.dump(vocab, args.output, indent=4) + args.output.write("\n") elif args.type == "regex": if args.format == "smiles": build_smiles_pretokenizer() + elif args.format == "bigsmiles": + build_bigsmiles_pretokenizer() elif args.format == "selfies": build_selfies_pretokenizer() else: diff --git a/python/smirk/__init__.py b/python/smirk/__init__.py index 5a05d4e..ba40f96 100644 --- a/python/smirk/__init__.py +++ b/python/smirk/__init__.py @@ -161,9 +161,9 @@ def num_special_tokens_to_add(self, pair: bool = False) -> int: return len(self.build_inputs_with_special_tokens([], [] if pair else None)) def __check_encode_kwargs(self, kwargs): - assert kwargs.pop("return_overflowing_tokens", False) is False, ( - "Not implemented" - ) + assert ( + kwargs.pop("return_overflowing_tokens", False) is False + ), "Not implemented" assert kwargs.pop("split_special_tokens", False) is False, "Not implemented" assert kwargs.pop("is_split_into_words", False) is False, "Not implemented" From ba53f9f86247ec9581e7ab8410031e462b704f62 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sun, 26 Apr 2026 12:55:38 -0400 Subject: [PATCH 12/52] add support for abstarct labels and test for spec egs --- opt/build_vocab.py | 6 ++- src/pre_tokenizers/bigsmirk.rs | 25 ++++++++++++ src/pre_tokenizers/split_bigsmiles.rs | 11 +++--- test/bigsmiles.smi | 57 +++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 7 deletions(-) create mode 100644 test/bigsmiles.smi diff --git a/opt/build_vocab.py b/opt/build_vocab.py index 4d6fac1..d9b98d5 100755 --- a/opt/build_vocab.py +++ b/opt/build_vocab.py @@ -30,6 +30,7 @@ BONDS = [".", "-", "=", "#", "$", ":", "/", "\\"] DIGITS = [str(x) for x in range(10)] BIGSMILES_EXTRA_TOKENS = ["{", "}", ",", ";", "<", ">"] +BIGSMILES_LABEL = r"[A-Z][A-Za-z0-9']*" def build_smiles_alphabet(): @@ -206,6 +207,7 @@ def build_bigsmiles_pretokenizer(): r"\(|\)", # branches r"\{|\}", # stochastic object delimiters r",|;", # repeat unit separator and end group separator + BIGSMILES_LABEL, # fragment and abstract spec labels r"\[(?:[^\[\]]+|\[[^\[\]]*\])*\]", # bracketed atoms/descriptors ], public=True, @@ -220,7 +222,7 @@ def build_bigsmiles_pretokenizer(): r"|", r"(\$|<|>)(\d+)?", r"|", - r"(\$|<|>)(\d+)?(\[)(\$|<|>)(\d+)?(\])(\d+)", + rf"(\$|<|>)(\d+)?(\[)(\$|<|>|{BIGSMILES_LABEL})(\d+)?(\])(\d+)", r"|", r"(#)([!-~]+)", r"|", @@ -254,7 +256,7 @@ def build_bigsmiles_pretokenizer(): [ r"(\$|<|>)", # outer descriptor type r"(\d+)?", # outer descriptor id - r"(\[)(\$|<|>)(\d+)?(\])", # inner descriptor + rf"(\[)(\$|<|>|{BIGSMILES_LABEL})(\d+)?(\])", # inner descriptor r"(\d+)", # group id ], public=True, diff --git a/src/pre_tokenizers/bigsmirk.rs b/src/pre_tokenizers/bigsmirk.rs index d2fd673..300a4cb 100644 --- a/src/pre_tokenizers/bigsmirk.rs +++ b/src/pre_tokenizers/bigsmirk.rs @@ -576,4 +576,29 @@ pub mod tests { assert!(all_matches(&pretok, line)); } } + + #[test] + fn test_bigsmiles_spec() { + let pretok = BigSmirkPreTokenizer::default(); + let mut bigsmiles_examples = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + bigsmiles_examples.push("test"); + bigsmiles_examples.push("bigsmiles.smi"); + let examples = + fs::read_to_string(bigsmiles_examples.as_path()).expect("failed to open bigsmiles.smi"); + let mut failures = Vec::new(); + for (idx, line) in examples + .lines() + .enumerate() + .filter(|(_, x)| !x.starts_with("#") && !x.is_empty()) + { + if !all_matches(&pretok, line) { + failures.push(format!("line {}: {}", idx + 1, line)); + } + } + assert!( + failures.is_empty(), + "failed to tokenize BigSMILES fixtures:\n{}", + failures.join("\n") + ); + } } diff --git a/src/pre_tokenizers/split_bigsmiles.rs b/src/pre_tokenizers/split_bigsmiles.rs index ae8bacf..82d2ca3 100644 --- a/src/pre_tokenizers/split_bigsmiles.rs +++ b/src/pre_tokenizers/split_bigsmiles.rs @@ -46,6 +46,7 @@ pub const MATCH_OUTER_BIGSMILES: &'static str = concat!( r"\(|\)|", r"\{|\}|", // Stochastic object delimiters r",|;|", // Repeat unit separator and end group separator + r"[A-Z][A-Za-z0-9']*|", // Fragment and abstract spec labels r"\[(?:[^\[\]]+|\[[^\[\]]*\])*\]", // Bracketed atoms/descriptors ); @@ -55,7 +56,7 @@ pub const MATCH_INNER_BIGSMILES: &'static str = formatcp!(concat!( r"|", r"(\$|<|>)(\d+)?", r"|", - r"(\$|<|>)(\d+)?(\[)(\$|<|>)(\d+)?(\])(\d+)", + r"(\$|<|>)(\d+)?(\[)(\$|<|>|[A-Z][A-Za-z0-9']*)(\d+)?(\])(\d+)", r"|", r"(#)([!-~]+)", r"|", @@ -74,10 +75,10 @@ pub const BONDING_DESCRIPTOR: &'static str = concat!( ); pub const LADDER_BONDING_DESCRIPTOR: &'static str = concat!( - r"(\$|<|>)", // Outer descriptor type - r"(\d+)?", // Outer descriptor id - r"(\[)(\$|<|>)(\d+)?(\])", // Inner descriptor - r"(\d+)", // Group id + r"(\$|<|>)", // Outer descriptor type + r"(\d+)?", // Outer descriptor id + r"(\[)(\$|<|>|[A-Z][A-Za-z0-9']*)(\d+)?(\])", // Inner descriptor + r"(\d+)", // Group id ); pub const FRAGMENT_REFERENCE: &'static str = r"(#)([!-~]+)"; diff --git a/test/bigsmiles.smi b/test/bigsmiles.smi new file mode 100644 index 0000000..cbeef35 --- /dev/null +++ b/test/bigsmiles.smi @@ -0,0 +1,57 @@ +# Example BigSMILES String pulled from https://olsenlabmit.github.io/BigSMILES/docs/line_notation.html#the-bigsmiles-line-notation +[$]-CC-[$] +[$]CC[$] +[$]CC(CC)[$] +C([$])C([$])CC +[<]C(=O)CCCCC(=O)[<] +[>]NCCCCCCN[>] +[<]C(=O)CCCCC(=O)NCCCCCCN[>] +[<]CCO[>] +[>]CCO[<] +[$1]-CC-[$2] +[$1]CC[$2] +C([$1])C[$2] +[$]-CC-[$] +[$]-CC([$])-[$] +[$]=CCC=[$] +{[][$]CC[$],[$]CC(CC)[$][]} +{[][<]C(=O)CCCCC(=O)[<],[>]NCCCCCCN[>][]} +{[][<]C(=O)CCCCC(=O)NCCCCCCN[>][]} +{[][$]CC[$],[$]CC(CC)[$][]} +C{[$][$]CC[$],[$]CC(CC)[$][$]} +[H]O{[>][<]C(=O)CCCCC(=O)[<],[>]NCCCCCCN[>][<]}[H] +{[][<]C(=O)CCCCC(=O)[<],[>]NCCCCCCN[>];[>]O[H],[<][H][]} +[H]O{[>][<]C(=O)CCCCC(=O)[<],[>]NCCCCCCN[>];[<][H][]} +{[][<]c1cc([>])cc([>]c1);[<]Br,[>]B(O)O[]} +OB(O){[>][<]c1cc([>])cc([>]c1);[<]Br[]} +{[][$]CC(C#N)[$],[$]CC(c1ccccc1)[$][]} +{[][<]OCC[>][<]}{[>][<]OC(C)C[>][]} +{[][<]C(=O)c1ccc(cc1)C(=O)[<],[>]OCCO[>][]} +{[][$]CC[$],[$]CC([$])[$][]} +{[][$]CC(C)(C)[$],[$]CC(c1ccc(cc1)C{[$][$]CC(C)(C(=O)OC)[$][$]}Br)[$][]} +{[][>]C(=O)Nc1ccc(C)c(c1)NC(=O)[>],[<]OCC{[<][>]OCC[<][>]}O[<],[<]OCCCO[<][]} +{[][$]CC[$],[$]CC(CC)[$][]} +{[]CC,CC(CC)[]} +O{[>][<]C(=O)C(C)N[>],[<]C(=O)CN[>][<]} +O{[>]C(=O)C(C)N,C(=O)CN[<]} +{[<][<]C(=O)C(C)N[>],[<]C(=O)CN[>][>]}O +{[<][>]NC(C)C(=O)[<],[>]NCC(=O)[<][>]}O +{[<]NC(C)C(=O),NCC(=O)[>]}O +{[][$]CC(C)([#R])[$][]}.{#R=C(=O)OCC12CC(C3)CC(C1)CC3C2} +C([#Arm])([#Arm])([#Arm])[#Arm].{#Arm=CO{[<][>]CCO[<][>]}} +A([<1[Inner]1])R(A[<1[Inner]1])(B[>1[Inner]2])B[>1[Inner]2] +A([<1[<1]1])R(A[<1[<1]1])(B[>1[>1]2])B[>1[>1]2] +A([$1[Inner]1])R(A'[$1[Inner]1])(A[$1[Inner]2])A'[$1[Inner]2] +A([$1[$1]1])R(A'[$1[$1]1])(A[$1[$1]2])A'[$1[$1]2] +A([$1[$1]1])R(A'[$1[$2]1])(A[$1[$1]2])A'[$1[$2]2] +A([$1[<1]1])R(A'[$1[>1]1])(A[$1[<1]2])A'[$1[>1]2] +CC{[>][<]CC(C)[>][<]}CC(C)=C +C{[>][<]C[C@@H](C)[>][<]}CC(C)=C +CC{[>][<]C[C@@H](C)C[C@H](C)[>];[<]C=CC,[<]C[C@H](C)C=CC[]} +{[][$]CC(c1ccncc1)[$],[$]CC(c1cc[n+](C)cc1)[$].[I-][]} +{[][<][#A][#R][#A][<],[>][#B][#R']([#B][>])([#B][>])[#B][>][]} +{[][<][#A][#R][#A][<],[>][#B][#R']([#B][>])([#B][>])[#B][>];[>][#E1],[<][#E2][]} +{[][>]COC(=O){[$][$]COC[$][$]}C(=O)OC[>],c1([<])cc([#L]2)cc([#L]3)c1.c4([<])cc([#L]5)cc([#L]6)c4.c7([<])cc([#L]8)cc([#L]9)c7.C%10([<])cc([#L]%11)cc([#L]%12)c%10.[Pd++]258%11.[Pd++]369%12}.{#L=c(c1)cccn1} +S1C(c2ccccc2){[$][$]CC(c1ccccc1)[$][$]}C(=O)OC(=C3)N=NN3CC(O)COC(=O)C(C)C1 +C1CCC{[$1][$1]=CCCCCCCC=[$1][$1]}CCCC1 +{[][$1]=CCCCCCCC=[$1][]} \ No newline at end of file From 3905621f1e0be7baf170a26b30f156dfd14674b6 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sun, 26 Apr 2026 13:04:38 -0400 Subject: [PATCH 13/52] fix unk handling and add test --- src/pre_tokenizers/bigsmirk.rs | 36 +++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/pre_tokenizers/bigsmirk.rs b/src/pre_tokenizers/bigsmirk.rs index 300a4cb..bbdc95d 100644 --- a/src/pre_tokenizers/bigsmirk.rs +++ b/src/pre_tokenizers/bigsmirk.rs @@ -14,6 +14,7 @@ use tokenizers::tokenizer::{ pub struct BigSmirkPreTokenizer { outer: Regex, inner: Regex, + inner_partial: Regex, } impl BigSmirkPreTokenizer { @@ -21,6 +22,7 @@ impl BigSmirkPreTokenizer { Self { outer: Regex::new(&outer).unwrap(), inner: Regex::new(&inner).unwrap(), + inner_partial: Regex::new(partial_inner_pattern(inner)).unwrap(), } } @@ -147,11 +149,18 @@ fn append_split(splits: &mut Vec<(Offsets, bool)>, prev: &mut usize, m: Match, o *prev = end; } +fn partial_inner_pattern(inner: &str) -> &str { + let pattern = inner + .strip_prefix("^(?:") + .and_then(|pattern| pattern.strip_suffix(")$")) + .unwrap_or(inner); + pattern.strip_prefix('|').unwrap_or(pattern) +} + impl Pattern for BigSmirkPreTokenizer { fn find_matches(&self, inside: &str) -> Result> { let mut splits = Vec::with_capacity(inside.len()); let mut prev = 0; - let n_inner_groups = self.inner.captures_len(); static IS_NUMBER: Lazy = Lazy::new(|| Regex::new(r"^\d+$").unwrap()); static IS_BONDING_DESC: Lazy = Lazy::new(|| Regex::new(r"^[\$<>]$").unwrap()); for m_outer in self.outer.find_iter(inside) { @@ -165,9 +174,13 @@ impl Pattern for BigSmirkPreTokenizer { let bracketed = &inside[(m_outer.start() + 1)..(m_outer.end() - 1)]; // Try to match with inner pattern - if let Some(capture) = self.inner.captures(&bracketed) { + if let Some(capture) = self + .inner + .captures(bracketed) + .or_else(|| self.inner_partial.captures(bracketed)) + { // Unpack bracketed atoms - for i in 1..n_inner_groups { + for i in 1..capture.len() { if let Some(m) = capture.get(i) { let matched_str = m.as_str(); if matched_str.is_empty() { @@ -256,6 +269,23 @@ pub mod tests { .collect() } + #[test] + fn check_unknown() { + let pretok = BigSmirkPreTokenizer::default(); + assert_eq!(get_split_tokens(&pretok, "C🤷"), ["C", "🤷"]); + assert_eq!(get_split_tokens(&pretok, "🤷"), ["🤷"]); + assert_eq!(get_split_tokens(&pretok, "🤷C"), ["🤷", "C"]); + assert_eq!( + get_split_tokens(&pretok, "C[H🤷]"), + ["C", "[", "H", "🤷", "]"] + ); + assert_eq!(get_split_tokens(&pretok, "[🤷]"), ["[", "🤷", "]"]); + assert_eq!( + get_split_tokens(&pretok, "[🤷H]C"), + ["[", "🤷", "H", "]", "C"] + ); + } + #[test] fn test_standard_smiles_basic() { let pretok = BigSmirkPreTokenizer::default(); From 789b9fb092b5785b1cacd49840979ed73f8f231b Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sun, 26 Apr 2026 13:08:10 -0400 Subject: [PATCH 14/52] add missing test for clone --- src/wrapper.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wrapper.rs b/src/wrapper.rs index 4ebfb6d..3b33992 100644 --- a/src/wrapper.rs +++ b/src/wrapper.rs @@ -213,6 +213,7 @@ mod test { #[test] fn serialize_bigsmirk_pretok() { let pretok = PreTokenizerWrapper::BigSmirkPreTokenizer(BigSmirkPreTokenizer::default()); + check_serde(&pretok.clone()); check_serde(&pretok); } } From e553b1560e6c27347384f26f4787afd3a85c3502 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sun, 26 Apr 2026 13:09:23 -0400 Subject: [PATCH 15/52] formatting --- test/bigsmiles.smi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/bigsmiles.smi b/test/bigsmiles.smi index cbeef35..28fe243 100644 --- a/test/bigsmiles.smi +++ b/test/bigsmiles.smi @@ -54,4 +54,4 @@ CC{[>][<]C[C@@H](C)C[C@H](C)[>];[<]C=CC,[<]C[C@H](C)C=CC[]} {[][>]COC(=O){[$][$]COC[$][$]}C(=O)OC[>],c1([<])cc([#L]2)cc([#L]3)c1.c4([<])cc([#L]5)cc([#L]6)c4.c7([<])cc([#L]8)cc([#L]9)c7.C%10([<])cc([#L]%11)cc([#L]%12)c%10.[Pd++]258%11.[Pd++]369%12}.{#L=c(c1)cccn1} S1C(c2ccccc2){[$][$]CC(c1ccccc1)[$][$]}C(=O)OC(=C3)N=NN3CC(O)COC(=O)C(C)C1 C1CCC{[$1][$1]=CCCCCCCC=[$1][$1]}CCCC1 -{[][$1]=CCCCCCCC=[$1][]} \ No newline at end of file +{[][$1]=CCCCCCCC=[$1][]} From 6a039e32218fbc1d92e7a1de93cad17806ab184d Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sun, 26 Apr 2026 13:26:58 -0400 Subject: [PATCH 16/52] add rust level test for splitting and init --- src/pre_tokenizers/bigsmirk.rs | 9 ++++++ src/tokenizer.rs | 56 ++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/src/pre_tokenizers/bigsmirk.rs b/src/pre_tokenizers/bigsmirk.rs index bbdc95d..ad108e6 100644 --- a/src/pre_tokenizers/bigsmirk.rs +++ b/src/pre_tokenizers/bigsmirk.rs @@ -269,6 +269,15 @@ pub mod tests { .collect() } + #[test] + fn check_bigsmiles_splits() { + let pretok = BigSmirkPreTokenizer::default(); + let bigsmiles = "{[$]CC[$]}".to_string(); + let split = ["{", "[", "$", "]", "C", "C", "[", "$", "]", "}"]; + assert_eq!(get_split_tokens(&pretok, bigsmiles.as_str()), split); + assert_eq!(pretok.split(&bigsmiles), split); + } + #[test] fn check_unknown() { let pretok = BigSmirkPreTokenizer::default(); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 4184840..7d6b3c9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -475,3 +475,59 @@ impl From for Encoding { } } } + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use tokenizers::PreTokenizer; + + use super::*; + + fn assert_bigsmiles_pre_tokenizer(tokenizer: &SmirkTokenizer) { + assert!(matches!( + tokenizer.tokenizer.get_pre_tokenizer(), + Some(PreTokenizerWrapper::BigSmirkPreTokenizer(_)) + )); + } + + fn get_splits(tokenizer: &SmirkTokenizer, text: &str) -> Vec { + let mut pretokenized = PreTokenizedString::from(text); + tokenizer + .tokenizer + .get_pre_tokenizer() + .unwrap() + .pre_tokenize(&mut pretokenized) + .unwrap(); + pretokenized + .get_splits(OffsetReferential::Original, OffsetType::Byte) + .into_iter() + .map(|(s, _, _)| s.to_string()) + .collect() + } + + #[test] + fn new_selects_bigsmiles_pre_tokenizer() { + let tokenizer = SmirkTokenizer::__new__(true); + assert_bigsmiles_pre_tokenizer(&tokenizer); + assert_eq!( + get_splits(&tokenizer, "{[$]CC[$]}"), + ["{", "[", "$", "]", "C", "C", "[", "$", "]", "}"] + ); + } + + #[test] + fn from_vocab_selects_bigsmiles_pre_tokenizer() { + let mut vocab_file = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + vocab_file.push("python"); + vocab_file.push("smirk"); + vocab_file.push("vocab_bigsmiles.json"); + + let tokenizer = SmirkTokenizer::from_vocab(vocab_file.to_str().unwrap(), true); + assert_bigsmiles_pre_tokenizer(&tokenizer); + assert_eq!( + get_splits(&tokenizer, "{[$]CC[$]}"), + ["{", "[", "$", "]", "C", "C", "[", "$", "]", "}"] + ); + } +} From e765b394c19ac0d17a652a18fb07c5c90d2db50a Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sun, 26 Apr 2026 13:31:17 -0400 Subject: [PATCH 17/52] remove redundant jupyter notebook dep --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 68f258d..510439a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,6 @@ docs = [ "torch~=2.0", "rdkit==2024.9.5", "transformers~=4.48.2", - "notebook>=7.5.5", ] [build-system] From 023b549ac6f317c5439bebcbd68feeab7e60c899 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sun, 26 Apr 2026 13:37:08 -0400 Subject: [PATCH 18/52] add BigSMILES tokenizer to changelog --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6596ee..5bc46ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- `SmirkBigSmilesFast` Tokenizer for BigSMILES line notation representation of polymers ([#8](https://github.com/BattModels/smirk/pull/8)) + ### Fixed -- Build issue due to leading `./` in included file paths ([#7](https://github.com/BattModels/smirk/pull/7)) +- Build issue due to leading `./` in included file paths ([#7](https://github.com/BattModels/smirk/pull/7)) ## [v0.2.0](https://github.com/BattModels/smirk/tree/v0.2.0) From b20b142ce4cbde4da09c02a7a742003b113a44b5 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Mon, 27 Apr 2026 21:45:52 -0400 Subject: [PATCH 19/52] serialize BigSMILES version and test save --- src/pre_tokenizers/bigsmirk.rs | 19 +++++++++++++++++-- test/test_tokenize_bigsmiles.py | 5 +++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/pre_tokenizers/bigsmirk.rs b/src/pre_tokenizers/bigsmirk.rs index ad108e6..87b9bcd 100644 --- a/src/pre_tokenizers/bigsmirk.rs +++ b/src/pre_tokenizers/bigsmirk.rs @@ -18,6 +18,8 @@ pub struct BigSmirkPreTokenizer { } impl BigSmirkPreTokenizer { + pub const BIGSMILES_VERSION: &'static str = "1.1"; + pub fn new(outer: &str, inner: &str) -> Self { Self { outer: Regex::new(&outer).unwrap(), @@ -62,8 +64,9 @@ impl Serialize for BigSmirkPreTokenizer { where S: serde::Serializer, { - let mut state = serializer.serialize_struct("BigSmirkPreTokenizer", 3)?; + let mut state = serializer.serialize_struct("BigSmirkPreTokenizer", 4)?; state.serialize_field("type", "BigSmirkPreTokenizer")?; + state.serialize_field("bigsmiles_version", Self::BIGSMILES_VERSION)?; state.serialize_field("outer", self.outer.as_str())?; state.serialize_field("inner", self.inner.as_str())?; state.end() @@ -77,7 +80,7 @@ impl<'de> Deserialize<'de> for BigSmirkPreTokenizer { { deserializer.deserialize_struct( "BigSmirkPreTokenizer", - &["type", "outer", "inner"], + &["type", "bigsmiles_version", "outer", "inner"], BigSmirkPreTokenizerVisitor, ) } @@ -103,6 +106,9 @@ impl<'de> Visitor<'de> for BigSmirkPreTokenizerVisitor { "type" => { type_field = Some(map.next_value()?); } + "bigsmiles_version" => { + let _: serde::de::IgnoredAny = map.next_value()?; + } "outer" => { if let Some(x) = map.next_value()? { outer = Some(x); @@ -239,6 +245,15 @@ pub mod tests { check_serde(&default); } + #[test] + fn serializes_bigsmiles_version() { + let value = serde_json::to_value(BigSmirkPreTokenizer::default()).unwrap(); + assert_eq!( + value.get("bigsmiles_version").and_then(|v| v.as_str()), + Some(BigSmirkPreTokenizer::BIGSMILES_VERSION) + ); + } + #[test] fn serialize_pretok() { let pretok = BigSmirkPreTokenizer::new(r".|\[.*?]", "."); diff --git a/test/test_tokenize_bigsmiles.py b/test/test_tokenize_bigsmiles.py index e2a8f7b..23a4b37 100644 --- a/test/test_tokenize_bigsmiles.py +++ b/test/test_tokenize_bigsmiles.py @@ -71,8 +71,10 @@ def test_tokenizer_serialize_pretokenizer_type(bigsmiles, expected_type): if expected_type is None: assert "type" not in config["pre_tokenizer"] + assert "bigsmiles_version" not in config["pre_tokenizer"] else: assert config["pre_tokenizer"].get("type") == expected_type + assert config["pre_tokenizer"].get("bigsmiles_version") == "1.1" @pytest.mark.parametrize( @@ -87,8 +89,11 @@ def test_tokenizer_serialize_pretokenizer_type(bigsmiles, expected_type): def test_bigsmiles_tokenizer_save_load(bigsmiles_tokenizer, text): with NamedTemporaryFile("w", suffix=".json", delete=False) as file: bigsmiles_tokenizer.save(file.name) + with open(file.name) as saved: + config = json.load(saved) loaded = SmirkTokenizer.from_file(file.name) + assert config["pre_tokenizer"].get("bigsmiles_version") == "1.1" original_splits = bigsmiles_tokenizer.pretokenize(text) loaded_splits = loaded.pretokenize(text) assert original_splits == loaded_splits From 601b36c9d7f18b6040f1b857c6a0a12256cb7559 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Tue, 28 Apr 2026 17:14:34 -0400 Subject: [PATCH 20/52] fix typo in demo notebook --- docs/big_smirk_demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/big_smirk_demo.ipynb b/docs/big_smirk_demo.ipynb index 33a1555..c29bea5 100644 --- a/docs/big_smirk_demo.ipynb +++ b/docs/big_smirk_demo.ipynb @@ -108,7 +108,7 @@ "source": [ "### Token Coloring Render\n", "\n", - "Visualize BigSMILES token boundaries for PVC () and sPP (Syndiotactic Polypropylene) by coloring each token in sequence." + "Visualize BigSMILES token boundaries for PVC (Polyvinyl chloride ) and sPP (Syndiotactic Polypropylene) by coloring each token in sequence." ] }, { From a3f14cfe69e02c6fb388eaa843bdfcfcf232e67e Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Tue, 28 Apr 2026 17:42:29 -0400 Subject: [PATCH 21/52] only support abstract labels when fragment def is provided --- src/pre_tokenizers/bigsmirk.rs | 183 ++++++++++++++++++++++++-- src/pre_tokenizers/split_bigsmiles.rs | 8 +- 2 files changed, 178 insertions(+), 13 deletions(-) diff --git a/src/pre_tokenizers/bigsmirk.rs b/src/pre_tokenizers/bigsmirk.rs index 87b9bcd..d60c0c6 100644 --- a/src/pre_tokenizers/bigsmirk.rs +++ b/src/pre_tokenizers/bigsmirk.rs @@ -4,6 +4,7 @@ use regex::{Match, Regex}; use serde::de::Visitor; use serde::ser::SerializeStruct; use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use std::fmt; use tokenizers::tokenizer::pattern::Pattern; use tokenizers::tokenizer::{ @@ -141,10 +142,141 @@ impl<'de> Visitor<'de> for BigSmirkPreTokenizerVisitor { impl PreTokenizer for BigSmirkPreTokenizer { fn pre_tokenize(&self, pretokenized: &mut PreTokenizedString) -> Result<()> { + pretokenized.normalize(|normalized| { + if let Some(expanded) = expand_fragment_definitions(normalized.get()) { + static MATCH_FULL_STRING: Lazy = + Lazy::new(|| Regex::new(r"(?s)^.*$").unwrap()); + normalized.replace(&*MATCH_FULL_STRING, &expanded)?; + } + Ok(()) + })?; pretokenized.split(|_, s| s.split(self.to_owned(), SplitDelimiterBehavior::Isolated)) } } +fn expand_fragment_definitions(input: &str) -> Option { + let (main, definitions) = split_fragment_definitions(input)?; + let mut expanded = main.to_string(); + + for _ in 0..=definitions.len() { + let (next, changed) = expand_fragment_references_once(&expanded, &definitions); + expanded = next; + if !changed { + break; + } + } + + Some(expanded) +} + +fn split_fragment_definitions(input: &str) -> Option<(&str, HashMap)> { + for (start, _) in input.match_indices(".{#") { + if let Some(definitions) = parse_fragment_definition_suffix(&input[start..]) { + return Some((&input[..start], definitions)); + } + } + None +} + +fn parse_fragment_definition_suffix(suffix: &str) -> Option> { + let mut definitions = HashMap::new(); + let mut pos = 0; + + while pos < suffix.len() { + if !suffix[pos..].starts_with(".{#") { + return None; + } + pos += ".{#".len(); + + let name_start = pos; + while pos < suffix.len() { + let c = suffix[pos..].chars().next().unwrap(); + if c == '=' { + break; + } + pos += c.len_utf8(); + } + let name = &suffix[name_start..pos]; + if name.is_empty() || !is_fragment_definition_name(name) { + return None; + } + if !suffix[pos..].starts_with('=') { + return None; + } + pos += '='.len_utf8(); + + let value_start = pos; + let mut depth = 1; + while pos < suffix.len() { + let c = suffix[pos..].chars().next().unwrap(); + match c { + '{' => depth += 1, + '}' => { + depth -= 1; + if depth == 0 { + definitions.insert(name.to_string(), suffix[value_start..pos].to_string()); + pos += c.len_utf8(); + break; + } + } + _ => {} + } + pos += c.len_utf8(); + } + + if depth != 0 { + return None; + } + } + + Some(definitions) +} + +fn is_fragment_definition_name(name: &str) -> bool { + name.chars() + .all(|c| matches!(c, '!'..='~') && !matches!(c, '=' | '{' | '}' | '[' | ']')) +} + +fn expand_fragment_references_once( + input: &str, + definitions: &HashMap, +) -> (String, bool) { + let mut expanded = String::with_capacity(input.len()); + let mut changed = false; + let mut pos = 0; + + while pos < input.len() { + let rest = &input[pos..]; + if let Some((name, len)) = bracketed_fragment_reference(rest) { + if let Some(replacement) = definitions.get(name) { + expanded.push_str(replacement); + pos += len; + changed = true; + continue; + } + } + + let c = rest.chars().next().unwrap(); + expanded.push(c); + pos += c.len_utf8(); + } + + (expanded, changed) +} + +fn bracketed_fragment_reference(input: &str) -> Option<(&str, usize)> { + if !input.starts_with("[#") { + return None; + } + + let end = input.find(']')?; + let name = &input[2..end]; + if name.is_empty() { + return None; + } + Some((name, end + ']'.len_utf8())) +} + fn append_split(splits: &mut Vec<(Offsets, bool)>, prev: &mut usize, m: Match, offset: usize) { let start = m.start() + offset; let end = m.end() + offset; @@ -265,6 +397,11 @@ pub mod tests { splits.into_iter().all(|(_s, m)| m) } + fn all_matches_after_fragment_expansion(tok: &BigSmirkPreTokenizer, bigsmiles: &str) -> bool { + let expanded = expand_fragment_definitions(bigsmiles).unwrap_or_else(|| bigsmiles.into()); + all_matches(tok, &expanded) + } + fn get_matched_pretokens(tok: &BigSmirkPreTokenizer, bigsmiles: &str) -> Vec { tok.find_matches(bigsmiles) .unwrap() @@ -569,21 +706,49 @@ pub mod tests { #[test] fn test_fragment_reference() { let pretok = BigSmirkPreTokenizer::default(); - assert_eq!(get_split_tokens(&pretok, "[#PEG]"), ["[", "#", "PEG", "]"]); + assert_eq!(get_split_tokens(&pretok, "[#PEG]"), ["[", "#PEG", "]"]); assert_eq!( get_split_tokens(&pretok, "[#Styrene]"), - ["[", "#", "Styrene", "]"] + ["[", "#Styrene", "]"] ); - assert_eq!(get_split_tokens(&pretok, "[#+]"), ["[", "#", "+", "]"]); + assert_eq!(get_split_tokens(&pretok, "[#+]"), ["[", "#+", "]"]); + assert_eq!(get_split_tokens(&pretok, "[#PEG-1]"), ["[", "#PEG-1", "]"]); + assert_eq!(get_split_tokens(&pretok, "[#A]"), ["[", "#A", "]"]); assert_eq!( - get_split_tokens(&pretok, "[#PEG-1]"), - ["[", "#", "PEG-1", "]"] + get_split_tokens(&pretok, "{[$][#Styrene][$]}"), + ["{", "[", "$", "]", "[", "#Styrene", "]", "[", "$", "]", "}"] ); - assert_eq!(get_split_tokens(&pretok, "[#A]"), ["[", "#", "A", "]"]); + } + + #[test] + fn test_fragment_definition_expansion() { + let pretok = BigSmirkPreTokenizer::default(); assert_eq!( - get_split_tokens(&pretok, "{[$][#Styrene][$]}"), - ["{", "[", "$", "]", "[", "#", "Styrene", "]", "[", "$", "]", "}"] + get_split_tokens(&pretok, "C([#R]).{#R=CO}"), + ["C", "(", "C", "O", ")"] + ); + assert_eq!( + get_split_tokens(&pretok, "{[#A]CC[#B]}.{#A=[<]}.{#B=[>]}"), + ["{", "[", "<", "]", "C", "C", "[", ">", "]", "}"] + ); + } + + #[test] + fn test_fragment_definitions_do_not_expand_bare_labels() { + let pretok = BigSmirkPreTokenizer::default(); + let tokens = get_split_tokens( + &pretok, + "A([$1[<1]1])R(A'[$1[>1]1])(A[$1[<1]2])A'[$1[>1]2].{#A=C}.{#A'=C}.{#R=C}", + ); + + assert_eq!( + tokens + .iter() + .filter(|token| matches!(token.as_str(), "A" | "A'" | "R")) + .count(), + 5 ); + assert!(!tokens.iter().any(|token| token == "{" || token == "=")); } #[test] @@ -645,7 +810,7 @@ pub mod tests { .enumerate() .filter(|(_, x)| !x.starts_with("#") && !x.is_empty()) { - if !all_matches(&pretok, line) { + if !all_matches_after_fragment_expansion(&pretok, line) { failures.push(format!("line {}: {}", idx + 1, line)); } } diff --git a/src/pre_tokenizers/split_bigsmiles.rs b/src/pre_tokenizers/split_bigsmiles.rs index 82d2ca3..0b6bf4d 100644 --- a/src/pre_tokenizers/split_bigsmiles.rs +++ b/src/pre_tokenizers/split_bigsmiles.rs @@ -43,10 +43,10 @@ pub const MATCH_OUTER_BIGSMILES: &'static str = concat!( r"\*|", // Wildcard r"[\.\-=\#\$:/\\]|", // Bonds r"\d|%|", // Ring closures - r"\(|\)|", + r"\(|\)|", // Branch delimiters in SMILES and BigSMILES r"\{|\}|", // Stochastic object delimiters r",|;|", // Repeat unit separator and end group separator - r"[A-Z][A-Za-z0-9']*|", // Fragment and abstract spec labels + r"[A-Z][A-Za-z0-9']*|", // Bare spec labels r"\[(?:[^\[\]]+|\[[^\[\]]*\])*\]", // Bracketed atoms/descriptors ); @@ -58,7 +58,7 @@ pub const MATCH_INNER_BIGSMILES: &'static str = formatcp!(concat!( r"|", r"(\$|<|>)(\d+)?(\[)(\$|<|>|[A-Z][A-Za-z0-9']*)(\d+)?(\])(\d+)", r"|", - r"(#)([!-~]+)", + r"(#[!-~]+)", r"|", r"(\d+)?", r"({BRACKETED_SYMBOL})", @@ -81,4 +81,4 @@ pub const LADDER_BONDING_DESCRIPTOR: &'static str = concat!( r"(\d+)", // Group id ); -pub const FRAGMENT_REFERENCE: &'static str = r"(#)([!-~]+)"; +pub const FRAGMENT_REFERENCE: &'static str = r"(#[!-~]+)"; From 5bf0d10307df3eaf6c2a8a8c319900fe7c57ef81 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Tue, 28 Apr 2026 17:43:12 -0400 Subject: [PATCH 22/52] regex generator updated for new abstract label logic --- opt/build_vocab.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opt/build_vocab.py b/opt/build_vocab.py index d9b98d5..dfb2b86 100755 --- a/opt/build_vocab.py +++ b/opt/build_vocab.py @@ -207,7 +207,7 @@ def build_bigsmiles_pretokenizer(): r"\(|\)", # branches r"\{|\}", # stochastic object delimiters r",|;", # repeat unit separator and end group separator - BIGSMILES_LABEL, # fragment and abstract spec labels + BIGSMILES_LABEL, # bare spec labels r"\[(?:[^\[\]]+|\[[^\[\]]*\])*\]", # bracketed atoms/descriptors ], public=True, @@ -224,7 +224,7 @@ def build_bigsmiles_pretokenizer(): r"|", rf"(\$|<|>)(\d+)?(\[)(\$|<|>|{BIGSMILES_LABEL})(\d+)?(\])(\d+)", r"|", - r"(#)([!-~]+)", + r"(#[!-~]+)", r"|", r"(\d+)?", r"({BRACKETED_SYMBOL})", @@ -263,7 +263,7 @@ def build_bigsmiles_pretokenizer(): separator="", ) ) - print(const_str("FRAGMENT_REFERENCE", r"(#)([!-~]+)", public=True)) + print(const_str("FRAGMENT_REFERENCE", r"(#[!-~]+)", public=True)) def build_selfies_pretokenizer(): From 0af2abca6be9205e1619c39655d420ab9d07021b Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Tue, 28 Apr 2026 17:43:40 -0400 Subject: [PATCH 23/52] expand test and test all smiles/ bigsmiles for round trip --- test/bigsmiles.csv | 664 ++++++++++++++++++++++++++++++++ test/bigsmiles.smi | 17 +- test/test_tokenize_bigsmiles.py | 216 ++++++++++- 3 files changed, 881 insertions(+), 16 deletions(-) create mode 100644 test/bigsmiles.csv diff --git a/test/bigsmiles.csv b/test/bigsmiles.csv new file mode 100644 index 0000000..bade9a3 --- /dev/null +++ b/test/bigsmiles.csv @@ -0,0 +1,664 @@ +# Data from https://doi.org/10.1038/s41597-024-03212-4 +,SMILES,BigSMILES,Tg (C) +0,*C1COC2C1OCC2Oc1ccc(cc1)CNC(=O)CCCCCCC(=O)NCc1ccc(cc1)O*,{},21.58173134 +1,*OC(CCC(OC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)*)C)C,{},63.5893379 +2,*OC(=O)c1ccc(cc1)C(=O)OCCCC(=O)NCc1ccc(cc1)CNC(=O)CCC*,{},53.55726117 +3,*OC(=O)NCCNC(=O)OCC*,{},5.896093021 +4,*SCCCCC*,{},-55.37860961 +5,*Oc1ccc(cc1)C(=O)OC(=O)c1ccc(cc1)OCCCCCC*,{},64.73496741 +6,*c1[nH]c(cc1CC(=O)OCCCCCCCC)*,{},-4.076963699 +7,*C(C*)(CC(=O)OCCCCCCCCCC)C(=O)OCCCCCCCCCC,{$CC(C(=O)OCCCCCCCCCC)(CC(=O)OCCCCCCCCCC)$},75.04044311 +8,*OCC1C(C1)C*,{},-28.98581149 +9,*N(C(=O)CCCCCCCCCCCCCCCCC(=O)N(CCCCCC*)C)C,{},49.34222836 +10,*O[Si](*)(CCC(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C,{},-68.87734458 +11,*S(=O)(=O)c1ccc(cc1)C(=O)CNc1ccc(cc1)NCC(=O)c1ccc(cc1)*,{},198.9026743 +12,*S(=O)(=O)c1ccc(cc1)C(=O)NCCNC(=O)c1ccc(cc1)*,{},205.0712987 +13,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)C(c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)CCCCC)cc1,{},204.7640603 +14,*NC(=O)C(=O)NCCCCCCCCCCNC(=O)CCCCCCCCC(=O)NCCCCCCCCCC*,{},82.2677155 +15,*OC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCCCCCCCCCCCC*,{},17.27783213 +16,*Oc1ccc(cc1)C(=O)Oc1ccc(cc1)Cc1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*,{},88.16409459 +17,*Oc1ccc(cc1)SSCCCCSSc1ccc(cc1)*,{},102.260186 +18,*Oc1ccc(cc1)C(=O)Nc1c(cc(cc1)c1cc(c(cc1)NC(=O)c1ccc(cc1)OCCCCCC*)Cl)Cl,{},133.9866306 +19,*c1c(cc(c(c1)C=Cc1ccc(cc1)OCCC(CCCC(C)C)C)C=C*)C=Cc1ccc(cc1)OCCC(CCCC(C)C)C,{$C=Cc1cc(C=Cc2ccc(OCCC(C)CCCC(C)C)cc2)c(cc1C=Cc1ccc(OCCC(C)CCCC(C)C)cc1)$},2.868925186 +20,*c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)C(=O)CCCCC(=O)*)CC1,{},197.4539358 +21,*SC(=O)CCCCC(=O)SCc1ccc(cc1)c1ccc(cc1)C*,{},-14.32353196 +22,*C(C*)(C(=O)OCCNC(=O)N(CC(C(C(OC1OC(C(C(C1O)O)O)CO)C(CO)O)O)O)CCCCCC)C,{$CC(C(=O)OCCNC(=O)N(CCCCCC)CC(O)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO)(C)$},23.2402228 +23,*Oc1ccc(cc1)OC(=O)c1cc(cc(c1)NC(=O)c1ccc(cc1)NC(=O)C(N1C(=O)c2c(C1=O)cccc2)C)C(=O)*,{},58.7319127 +24,*OC(=O)Nc1ccc(cc1)C(=O)OCC1CCC(CC1)C*,{},148.8519878 +25,*Nc1c(cccc1)NC(=O)c1ccc(cc1)C(=O)*,{},231.8080905 +26,*C(C*)(C(=O)OCCCCCCOc1ccc(cc1)C(=O)Oc1ccc(cc1)OCCCCCC)C,{$CC(C(=O)OCCCCCCOc1ccc(C(=O)Oc2ccc(OCCCCCC)cc2)cc1)(C)$},14.67015775 +27,*Oc1ccc(cc1)S(=O)(=O)c1c2c(ccc1)c(ccc2)S(=O)(=O)c1ccc(cc1)Oc1c(cc(c(c1C)C)c1c(c(c(c(c1)C)*)C)C)C,{},300.6954413 +28,*OC(=O)NC1CCC(CC1)CC1CCC(CC1)NC(=O)OCC(C*)(C)C,{},15.9792949 +29,*OC(=O)NCCCCCC(=O)OCCCC*,{},47.28425166 +30,*N(c1c(cc(cc1)Cc1cc(c(cc1)N(C(=O)c1ccc(cc1)C(=O)*)CCC)C)C)CCC,{},179.9023847 +31,*Nc1ccc(cc1)NC(=O)c1cc(cc(c1)NC(=O)C(CCSC)N1C(=O)c2c(C1=O)cccc2)C(=O)*,{},230.3191562 +32,*N(c1c(cc(cc1)Cc1cc(c(cc1)N(C(=O)c1ccc(cc1)C(=O)*)C)C)C)C,{},167.9642319 +33,*OC(=O)CCCCCCC(=O)OCCc1ccc(cc1)N1ON1c1ccc(cc1)CC*,{},17.91289325 +34,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*)(C(F)(F)F)C(F)(F)F,"{C(=O)c3ccc(cc3)C(=O)>}",150.9011854 +35,*c1ccc2[nH]c3c(c2c1)cc(cc3)*,{},246.5898577 +36,*C(C*)C(=O)NCCCC,{$CC(C(=O)NCCCC)$},-84.72234668 +37,*C(=C*)CCCCCCCCCCOc1ccc(cc1)c1ccc(cc1)OCC(CCCCCC)F,{$C=C(CCCCCCCCCCOc1ccc(c2ccc(OCC(F)CCCCCC)cc2)cc1)$},27.34580481 +38,*NC(=O)NCCCCNC(=O)NCCCCCC*,{},60.10434296 +39,*Oc1ccc(cc1)C(=Cc1ccc(cc1)OC(=O)OCCCCCOC(=O)*)C,{},39.38257392 +40,*N1CCN(CC1)C(=O)CCN1CCN(CC1)CCC(=O)*,{},40.58060093 +41,*c1c(cc(cc1)C#C*)SCCCCCCCCCCCC,{$C#Cc1ccc(c(SCCCCCCCCCCCC)c1)$},53.41644401 +42,*OC(=O)Nc1cc(ccc1)C(=O)OCCCCCCCCCC*,{},45.09440244 +43,*Oc1c(cc(cc1)C(=O)*)CCCCCC,{},85.05865655 +44,*Nc1ccc(cc1)NC(=O)C=CC(=O)*,{},115.1255691 +45,*C(C*)(C(=O)OC(Oc1ccccc1)C)C,{$CC(C(=O)OC(C)Oc1ccccc1)(C)$},31.196908 +46,*C(*)C,{$C(C)$},122.3867673 +47,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2c(c3c(C(CC3)(C)C)c(c2C)*)C)cc1,{},325.2939228 +48,*Nc1ccc(cc1)C(=O)*,{},92.05287264 +49,*C(=C*)C,{$C=C(C)$},4.023221393 +50,*c1nc(nc(n1)Oc1c2c(ccc1C(=O)Oc1ccc(cc1)OC(=O)c1c(c3c(cc1)cccc3)O*)cccc2)N1CCCCC1,{},288.7077709 +51,*OS(=O)(=O)c1ccc(cc1)Sc1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)C1(CCCCC1)c1ccc(cc1)*,{},157.8104834 +52,*c1c(c(c(c(c1Cl)Cl)CC*)Cl)Cl,{$CCc1c(Cl)c(Cl)c(c(Cl)c1Cl)$},56.1987128 +53,*OC(=O)c1ccc(cc1)C(=O)OCCCC(=O)NCCCCCCNC(=O)CCC*,{},60.02078862 +54,*c1c(c(cc(c1)*)C=Nc1ccc(cc1)F)O,{$c1cc(c(O)c(C=Nc2ccc(F)cc2)c1)$},-121.5212841 +55,*N=C1C=CC(=NC2=CC(=O)C(=CC2=O)*)C=C1,{},212.9967182 +56,*OC(=O)NCCSCCCCCSCCNC(=O)OCC1CCC(CC1)C*,{},77.84678315 +57,*OS(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)C1(CCCCC1)c1ccc(cc1)*,{},149.9071313 +58,*c1nc(nc(n1)Oc1cc2c(cc1)cc(cc2)C(=O)Oc1c(cccc1)OC(=O)c1cc2c(cc1)cc(cc2)O*)N1CCN(CC1)C,{},54.90728348 +59,*OC(=O)CC(=O)OCC*,{},-90.52916041 +60,*OC(=O)CCCCCCCC(=O)OCC(C*)(C)C,{},17.01342593 +61,*OC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)OCCN(CC*)CCCC(=O)Nc1ccc(cc1)N=Nc1ccccc1,{},-32.25789051 +62,*[Si](*)(CCCCCC)CCCCCC,{$[Si](CCCCCC)(CCCCCC)$},-11.37918107 +63,*c1n(c(cc1)C=C*)CCCCCC,{},-15.03969582 +64,*NC1CC(CC(C1)(CNC(=O)c1cc(cc(c1)NC(=O)c1ccc(cc1)NC(=O)C(CC(C)C)N1C(=O)c2c(C1=O)cccc2)C(=O)*)C)(C)C,{},195.2683571 +65,*Nc1cc(cc(c1)C(=O)Nc1ccccc1)NC(=O)c1cc(ccc1)C(=O)*,{},233.599405 +66,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Oc2ccc(cc2)OC(=O)*)cc1,{},-71.68107141 +67,*OC(=O)CCCCCC(=O)OCCC*,{},-71.28231613 +68,*N1C(=O)C(CC1=O)SCCOCCSC1C(=O)N(C(=O)C1)c1ccc(cc1)C(=O)OCCCCCCOC(=O)c1ccc(cc1)*,{},19.42674542 +69,*NC(=O)CCCCCCCCC(=O)NCCCP(CCC*)c1ccccc1,{},56.20366062 +70,*c1nc2c([nH]1)cc(cc2)S(=O)(=O)c1cc2c(nc([nH]2)c2ccc(cc2)Oc2ccc(cc2)*)cc1,{},315.1120806 +71,*C(C*)(C(=O)OCCCCCCOc1ccc(cc1)C=Nc1ccc(cc1)CCCC)C,{$CC(C(=O)OCCCCCCOc1ccc(C=Nc2ccc(CCCC)cc2)cc1)(C)$},12.41121342 +72,*Oc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1cc(cc(c1)N1C(=O)c2c(C1=O)c(c(c(c2Cl)Cl)Cl)Cl)C(=O)Nc1ccc(cc1)*,{},315.5911196 +73,*c1ccc(cc1)/C=C/c1c(cc(c(c1)CCCCCC)/C=C/*)CCCCCC,{},51.04706134 +74,*Oc1ccc(cc1)OC(=O)*,"{C(=O)>}",105.0499992 +75,*C1(c2c(C(=O)O1)cccc2)c1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)*,{},292.5925873 +76,*c1c(cc(c(c1)OCCOCCOCCOCCC(=O)O[Na])C#Cc1c(cc(c(c1)OC(COCCOCCOCCOC)COCCOCCOCCOC)C#C*)OC(COCCOCCOCCOC)COCCOCCOCCOC)OCCOCCOCCOCCC(=O)O[Na],{},-43.36051195 +77,*N(CC*)C(=O)CCCCCCCC,{},-49.55374382 +78,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)c2ccc(cc2)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)cc1,{},232.2309602 +79,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CCCCCC(=O)Oc1cc2c(cc1)cc(cc2)OC(=O)CCCCC*,{},-10.30414379 +80,*NC(=O)CCP(CCC(=O)NCCCCCC*)c1ccccc1,{},9.575432391 +81,*c1cc2C(c3c(c2cc1)ccc(c3)c1cc2C(c3c(c2cc1)ccc(c3)*)(CCCCCCCCCCCC)CCCCCCCCCCCC)(CCCCCCCC)CCCCCCCC,{},37.51200969 +82,*C1OC(=O)C(C1)CCC1C(=O)OC(C1)CCCCCCCCCC*,{},84.1573412 +83,*C(C*)C(=O)OC(CC(C)C)C,{$CC(C(=O)OC(C)CC(C)C)$},11.47675289 +84,*OC(C*)COc1ccc(cc1)C,{},-55.31696784 +85,*N(C(=O)CCCCCCCCCCCCCCC(=O)N(Cc1ccc(cc1)C*)C)C,{},-14.32362541 +86,*C1OC(OC(C1)*)O,{},136.0523749 +87,*c1c(cc(c(c1)OCCCCCCCCCC)c1ccc(cc1)*)OCCCCCCCCCC,{},61.07976291 +88,*c1c2c(nccc2)c(cc1)OCCOc1c2ncccc2c(cc1)C*,{},74.04018308 +89,*NC(CC(=O)*)C(=O)OCCCCCC,{},0.713924343 +90,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)*)C,{},81.15934241 +91,*n1c(=O)c2ccc3c(=O)n(c(=O)c4ccc(c1=O)c2c34)CCCCCCCCCCCC*,{},101.4116865 +92,*Oc1c(cc(cc1)C=C1C(=O)C(=Cc2cc(c(cc2)OC(=O)c2ccc(cc2)C(=O)*)OC)CCC1)OC,{},105.2246285 +93,*Nc1ccc(cc1)NC(=O)C1C(C(=CC(C1)C(C(=O)*)CC(=O)O)C)C(=O)O,{},149.7687276 +94,*Oc1ccc(cc1)OC(=O)c1c(cccc1)C=Cc1ccc(cc1)C=Cc1c(cccc1)C(=O)*,{},86.85824281 +95,*OC(=O)c1ccc(cc1)NC(=O)CCCCCCCCCCC(=O)Nc1ccc(cc1)C(=O)OCC*,{},14.08686284 +96,*OC(=O)N(c1ccc(cc1)N(C(=O)OCC(C*)CCCCCCOc1ccc(cc1)c1ccc(cc1)C#N)C)C,{},78.11061669 +97,*C1C(C(C(C1)C=C*)C(=O)OCCCCCOc1ccc(cc1)c1ccc(cc1)C#N)C(=O)OCCCCCOc1ccc(cc1)c1ccc(cc1)C#N,{},27.68661296 +98,*C#CC(=C(*)CCCCOC(=O)NCCCCCC)CCCCOC(=O)NCCCCCC,{$C#CC(CCCCOC(=O)NCCCCCC)=C(CCCCOC(=O)NCCCCCC)$},5.219913288 +99,*c1ccc(cc1)/C=C/*,{},43.3930774 +100,*c1nc2c([nH]1)ccc(c2)c1ccc2c(nc([nH]2)c2ccc3S(=O)(=O)c4c(c3c2)cc(cc4)*)c1,{},355.5073659 +101,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc(cc1)C(c1ccc(cc1)*)C,{},307.2645731 +102,*Oc1c(cc(c(c1)OC(=O)c1ccc(cc1)C=Nc1cc(c(cc1)OCCCCCCOc1c(cc(cc1)N=Cc1ccc(cc1)C(=O)*)Cl)Cl)Cl)Cl,{},82.06188226 +103,*c1ccc2n(c3c(c2c1)cc(cc3)/C=C/c1ccc(cc1)Oc1c(cc(cc1)c1ccc(cc1)c1ccc(cc1)c1cc(c(cc1)Oc1ccc(cc1)/C=C/*)C(F)(F)F)C(F)(F)F)CC(CCCC)CC,{},187.4626146 +104,*Oc1ccc(cc1)Oc1ccc(cc1)C(=Nc1ccc(cc1)Oc1ccc(cc1)N=C(c1ccc(cc1)*)C)C,{},117.5082044 +105,*Oc1ccc(cc1)NC(=O)NCCCCCCNC(=O)*,{},122.4088725 +106,*C(C*)OC(=O)CC(=O)C,{$CC(OC(=O)CC(C)=O)$},9.819405608 +107,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)OCCOc1ccc(cc1)C(=O)*)C,{},68.01369706 +108,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)*,{},176.5386789 +109,*N1C(=S)SC(=CC=C2SC(=S)N(C2=O)CC*)C1=O,{},164.0140939 +110,*C1NC(=O)C(NC1=O)CCC(=O)OCCCC=CCCCOC(=O)CC*,{},25.37727633 +111,*Oc1ccc(cc1)N=Cc1cc(c(cc1)OC(=O)CCCCC(=O)Oc1c(cc(cc1)C=Nc1ccc(cc1)*)OC)OC,{},47.38250675 +112,*c1n(c(nn1)c1ccc(cc1)c1ccc(cc1)c1n(c(nn1)COc1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)OC*)CC1)c1ccccc1)c1ccccc1,{},140.326419 +113,*N1CCN(CC1)C(=O)C1C(C1)C(=O)*,{},100.5840527 +114,*C(=C*)c1c(sc(c1)C(F)(F)F)C(F)(F)F,{$C=C(c1cc(C(F)(F)F)sc1C(F)(F)F)$},54.57900551 +115,*N1C(=O)c2c(C1=O)cc(cc2)c1cc2c(C(=O)N(C2=O)c2ccc(cc2)c2ccc(cc2)*)cc1,{},421.9822435 +116,*c1cc2c(C(=O)N(C2=O)c2c(c(c(c(c2F)F)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)F)F)cc1,{},388.0137456 +117,*C#CC(=C(*)OS(=O)(=O)c1ccc(cc1)C)OS(=O)(=O)c1ccc(cc1)C,{$C#CC(OS(=O)(=O)c1ccc(C)cc1)=C(OS(=O)(=O)c1ccc(C)cc1)$},164.1018919 +118,*OC(=O)CCC(=O)OCCOCCOCC*,{},14.21396213 +119,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)c1cc(ccc1)C(=O)*)(CC)C,"{C(=O)c3cccc(c3)C(=O)>}",194.1629808 +120,*n1c(=O)c2c(c3c(cc2c1=O)c(=O)n(c3=O)CCCCCCCCCC*)Br,{},137.1087261 +121,*C(C*)(C(=O)OCCCCCCCCOC(=O)OC1CC2=CCC3C(CCC4(C(CCC34)C(CCCC(C)C)C)C)C2(CC1)C)C,{$CC(C(=O)OCCCCCCCCOC(=O)OC1CCC2(C)C(=CCC3C2CCC2(C)C(C(C)CCCC(C)C)CCC32)C1)(C)$},47.48888608 +122,*OC(=O)CCCCCCCC(=O)OCCCCCCCCC*,{},3.667443154 +123,*Oc1ccc(cc1)c1c(cc(c(c1)OCCCCCC)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*)OCCCCCC,{},81.55306128 +124,*c1c2c(nccc2)c(cc1)CC*,{},70.93028064 +125,*c1n(c(cc1)CCC(=O)OC(=O)CC*)C,{},78.11959859 +126,*N1C(=O)C2OC3C(C2C1=O)C(=O)N(C3=O)c1ccc(cc1)c1c(nc2c(n1)cc(cc2)S(=O)(=O)c1cc2c(nc(c(n2)c2ccc(cc2)*)c2ccccc2)cc1)c1ccccc1,{},373.476202 +127,*c1oc(cc1)Sc1oc(cc1)C=C1C(=O)C(=C*)CC1,{},102.2883172 +128,*Oc1c(c(ccc1)Oc1ccc(cc1)Nc1ccc(cc1)Nc1ccc(cc1)Nc1ccc(cc1)Nc1ccc(cc1)*)C(=O)Nc1ccc(cc1)N=Nc1ccccc1,{},238.4094841 +129,*=C=C=C(C(=*)CO)CO,{$=C=C=C(CO)C(CO)=$},42.01413885 +130,*c1oc(cc1)Sc1oc(cc1)C=NCCN=C*,{},95.6213855 +131,*NC(=O)/C=C/CC/C=C/C(=O)NCCCCCC*,{},2.127453311 +132,*Nc1c(cc(c(c1)SCCC#N)NC(=O)c1ccc(cc1)C(=O)*)SCCC#N,{},214.7483216 +133,*c1c(cc(cc1)CC*)C(=O)C,{$CCc1ccc(c(C(C)=O)c1)$},8.997925789 +134,*Oc1ccc(cc1)C(=O)c1ccc(cc1)Oc1ccc(cc1)C(=O)c1ccc(cc1)C(=O)c1ccc(cc1)*,{},138.8344532 +135,*c1sc(nn1)c1cc(ccc1)OCCCCCCCCCCCOC(=O)CCCCC(=O)OCCCCCCCCCCCOc1cc(ccc1)*,{},35.51374296 +136,*Nc1c(cc(c(c1)OC)*)OC,{},168.2573371 +137,*SCSCCCC*,{},1.410869343 +138,*c1nc(sc1)NC(=O)NCCCCCCNC(=O)Nc1nc(cs1)c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)*)CCC1,{},47.02441211 +139,*N(CC*)C(=O)c1ccc(cc1)C,{},87.3045689 +140,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CCCCCC(=O)Oc1ccc(cc1)c1ccc(cc1)OC(=O)CCCCC*,{},60.41501019 +141,*Oc1c(c(c(c(c1[2H])[2H])C(c1c(c(c(c(c1[2H])[2H])OC(=O)*)[2H])[2H])(C([2H])([2H])[2H])C([2H])([2H])[2H])[2H])[2H],{},230.9913822 +142,*Oc1ccc(cc1)Oc1ccc(cc1)Oc1ccc(cc1)c1ccc(cc1)Oc1ccc(cc1)Oc1ccc(cc1)Oc1c(cc(cc1)C(=O)c1cc(c(cc1)*)S(=O)(=O)O)S(=O)(=O)O,{},151.7193386 +143,*c1c2c(nsn2)c(cc1)c1sc(c(c1)CCCCCC)C=Cc1sc(c(c1CCCCCC)CCCCCC)C=Cc1sc(cc1CCCCCC)*,{},17.9542177 +144,*N1C(=O)c2c(C1=O)c(ccc2)c1cc2c(C(=O)N(C2=O)c2ccc(cc2)C(=O)Nc2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)NC(=O)c2ccc(cc2)*)cc1,{},178.6163373 +145,*c1cc2n(c3c(c2cc1CCCCCC)cc(c(c3)C=C*)CCCCCC)CCCCCCCC,{},45.45691415 +146,*C#CC(=C(*)c1cncnc1)CCCCOC(=O)NCC(=O)OCCCC,{$C#CC(CCCCOC(=O)NCC(=O)OCCCC)=C(c1cncnc1)$},73.99227071 +147,*OC(=O)c1ccc(cc1)C(=O)NCCCNC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCC*,{},76.72929425 +148,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1c(cc(cc1)C(c1cc(c(cc1)*)[N-][N+]#N)(C)C)[N-][N+]#N,{},181.1592195 +149,*c1sc(cc1CCCCCCCC)c1sc(cc1CCCCCC(C(C(C(F)(F)F)(F)F)(F)F)(F)F)*,{},6.476080227 +150,*c1c2C(=O)N(C(=O)c2c(c2ccccc12)c1ccc(cc1)Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)*)c1c(cccc1)F,{},314.5599695 +151,*NC(=O)CCCCCCCCCCC(=O)NCC1CC(CCC1)C*,{},26.18556129 +152,*Nc1ccc(cc1)NC(=O)c1cc(cc(c1)C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)C(=O)*,{},235.6895845 +153,*c1c(cc(c(c1)O)O)*,{$c1cc(O)c(O)cc1$},158.9171015 +154,*Oc1ccc(cc1)C(=Cc1ccc(cc1)OCCCCCCC*)C,{},36.76976951 +155,*C(=C(*)C)[Si](CC)(C)C,{$C(C)=C([Si](C)(C)CC)$},157.1009863 +156,*OC(=O)CCCCCCCC(=O)*,{},-68.69432556 +157,*OC(=O)c1ccc(cc1)NC(=O)CCCCCCCCCCC(=O)Nc1ccc(cc1)C(=O)OCCCCCCCCCCCC*,{},-26.12929226 +158,*c1c(cc(c(c1)OCCCCCCCCCC)*)OCCCCCCCCCC,{},5.423112055 +159,*C(C(=O)*)(C)C,{$C(=O)C(C)(C)$},105.8292511 +160,*C#CC(=C(*)Cn1c2ccc(cc2c2cc(ccc12)CCCCCCCCCCCCCCCC)CCCCCCCCCCCCCCCC)Cn1c2ccc(cc2c2cc(ccc12)CCCCCCCCCCCCCCCC)CCCCCCCCCCCCCCCC,{$C#CC(Cn1c2ccc(CCCCCCCCCCCCCCCC)cc2c2cc(CCCCCCCCCCCCCCCC)ccc21)=C(Cn1c2ccc(CCCCCCCCCCCCCCCC)cc2c2cc(CCCCCCCCCCCCCCCC)ccc21)$},-2.237818484 +161,*C(C*)CCCC(C)C,{$CC(CCCC(C)C)$},5.980301023 +162,*N1C(=O)C(CC1=O)Oc1ccc(cc1)N=Cc1ccc(cc1)OC1C(=O)N(C(=O)C1)c1ccc(cc1)Oc1ccc(cc1)*,{},154.6342574 +163,*c1nc(nc(n1)Oc1c(cccc1)C(=O)Oc1ccc(cc1)OC(=O)c1c(cccc1)O*)N(CC)c1ccccc1,{},146.7365026 +164,*c1nc2c(n1CCCS(=O)(=O)O)ccc(c2)c1ccc2c(nc(n2CCCS(=O)(=O)O)c2cc(ccc2)*)c1,{},208.6363648 +165,*OC(=O)C1C(=O)CC(C(=O)C1)C(=O)OCCCCCC*,{},-36.66445232 +166,*Oc1ccc(cc1)C(c1ccc(cc1)Oc1ccc(cc1)C(=O)c1ccc(cc1)*)c1c(cccc1)C(=O)O[Na],{},169.1341304 +167,*N(C(=O)CCCCCCCCCCCCCCCCC(=O)N(Cc1ccc(cc1)C*)CC)CC,{},4.279694171 +168,*c1nc(nc(n1)Oc1ccc(cc1)C(c1ccc(cc1)O*)(C)C)OC,{},121.2094568 +169,*OC(=O)NCc1ccc(cc1)CNC(=O)OCCCCCCCC*,{},-25.31417235 +170,*NC(=O)CCCCCCCC(=O)NCCCCCCCCC*,{},9.904824538 +171,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2cc(ccc2)Oc2ccc(cc2)S(=O)(=O)c2ccc(cc2)Oc2cc(ccc2)*)cc1,{},199.6619548 +172,*c1n(c(cc1)*)CC,{},112.6054346 +173,*OC(=O)CCCCS(=O)(=O)CCCCC(=O)OCCCCCCCCCC*,{},37.87494117 +174,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C3(C(=O)N(C2=O)C(=O)*)CCCCC3)cc1,{},277.0792131 +175,*C(=C*)c1c(cccc1)C,{$C=C(c1ccccc1C)$},261.6623551 +176,*Oc1cc2C(c3c(c2cc1)ccc(c3)Oc1ccc(cc1)c1c2cc(ccc2c(c2ccccc12)c1ccc(cc1)*)CCC)(c1ccc(cc1)N(c1ccccc1)c1ccccc1)c1ccc(cc1)N(c1ccccc1)c1ccccc1,{},187.3941077 +177,*SC(=O)CCCCC(=O)SCC*,{},-21.91188888 +178,*OC(CC*)(C(F)(F)F)C(F)(F)F,{},-8.026293088 +179,*C(C(*)c1ccccc1)[N+](=O)[O-],{$C(c1ccccc1)C([N+](=O)[O-])$},173.9913454 +180,*N1C(=O)c2c(C1=O)cc(cc2)C(c1cc2c(C(=O)N(C2=O)CC(=O)NNC(=O)c2ccc(cc2)C(=O)NNC(=O)C*)cc1)(C(F)(F)F)C(F)(F)F,{},226.7602549 +181,*C(C*)CCCCCCCCCCCCCCCCCCCC,{$CC(CCCCCCCCCCCCCCCCCCCC)$},21.77937998 +182,*OS(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)c1ccc(cc1)*,{},190.021745 +183,*SCCCCCC(=O)NCCc1ccc(cc1)CCNC(=O)CCCCC*,{},-18.01175462 +184,*N1CCC(CC1)C(=O)c1c(cc(cc1)C(=O)N1CCC(CC1)CCC*)Oc1ccccc1,{},131.1821319 +185,*C(C(CC*)(F)F)(Cl)F,{$CCC(F)(F)C(Cl)(F)$},-41.84388225 +186,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CC(=O)OCCCCCCOC(=O)C*,{},55.49864746 +187,*c1oc(cc1)Sc1oc(cc1)C=Nc1cc(ccc1)N=C*,{},122.5813993 +188,*C(C*)S(=O)c1ccccc1,{$CC(S(=O)c1ccccc1)$},137.9705527 +189,*OC(=O)c1cc(c(cc1)C(=O)Nc1ccc(cc1)C(c1ccc(cc1)C(c1ccc(cc1)NC(=O)c1c(ccc(c1)C(=O)OCCN(CC*)CCCCOc1ccc(cc1)N=Nc1ccc(cc1)CCCCCC)C(=O)O)(C)C)(C)C)C(=O)O,{},82.18427933 +190,*=C=C=C(C(=*)COC(=O)NCC)COC(=O)NCC,{$=C=C=C(COC(=O)NCC)C(COC(=O)NCC)=$},172.0633872 +191,*Oc1ccc(cc1)c1ccc(cc1)OCCCCCCOC(=O)Nc1c(ccc(c1)NC(=O)OCCCCOC(=O)Nc1cc(c(cc1)C)NC(=O)OCCCCCC*)C,{},41.49405569 +192,*c1sc(c(c1C#N)C#N)N=Cc1ccc(cc1)C=N*,{},204.4271705 +193,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)cc1,{},126.2551791 +194,*c1cc2c(C(=O)N(C2=O)c2cc(ccc2)P(=O)(c2cc(ccc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C(=O)N(C(=O)C2(C)C)C(=O)*)c2ccccc2)cc1,{},138.7903633 +195,*OCOCCCCCC*,{},-41.68464364 +196,*Oc1ccc(cc1)OC(=O)c1c(cc(c(c1)OCCCCCCCCCCCCCCCC)C(=O)*)OCCCCCCCCCCCCCCCC,{},77.98044592 +197,*c1sc(cc1)/C=C/c1ccc(cc1)N(c1ccc(cc1)N(c1ccc(cc1)/C=C/c1sc(cc1)/C=C(/C(=O)Nc1cc(ccc1)NC(=O)/C(=C/*)/C#N)\C#N)c1ccccc1)c1ccccc1,{},243.9892983 +198,*N(c1ccc(cc1)Cc1ccc(cc1)N(C(=O)c1ccc(cc1)C(=O)*)CCC)CCC,{},173.7390032 +199,*Oc1ccc(cc1)C(c1ccc(cc1)OCC#CC#CC*)(C)C,{},48.046331 +200,*NC(=O)C(C(C(=O)NCCCCCC*)O)O,{},149.1553302 +201,*NC(=O)CCCCCC(=O)NCc1ccc(cc1)C*,{},61.73836642 +202,*Nc1c(c(c(cc1)*)Cl)Cl,{},115.5762842 +203,*NC(=O)C(C(=O)NCCCCCCCCCC*)CCCCCCCCCCCC,{},-0.868265132 +204,*N1C(=O)c2c(C1=O)c(c(c(c2F)F)Oc1c(c(c(c(c1F)F)Oc1c(c2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)*)c(c1F)F)F)F)F)F,{},283.2696326 +205,*C1(c2c(C(=O)O1)cccc2)c1cc(c(c(c1)Br)OC(=O)c1cc(ccc1)C(=O)Oc1c(cc(cc1Br)*)Br)Br,{},245.9668197 +206,*OC(=O)CCCCSCCCCC(=O)OCCCCCCCCCC*,{},-22.30064955 +207,*OC(=O)c1c(cccc1)C(=O)OCc1ccc(cc1)C*,{},95.33964688 +208,*C=CCC(CC*)(C(=O)OC)C,{$C=CCC(C)(C(=O)OC)CC$},-28.15217408 +209,*O[Si](*)(CCCCCOc1ccc(cc1)OC(=O)c1ccc(cc1)OCCCC)C,{},-64.12566312 +210,*Nc1ccc(cc1)CCc1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)*,{},245.2414672 +211,*=NN=CC#C[Si](C#CC=*)(c1ccccc1)c1ccccc1,{<=CC#C[Si](c1ccccc1)(c1ccccc1)C#CC=NN=>},131.2577432 +212,*Oc1ccc(cc1)C(=O)OCCOCCOCCOC(=O)c1ccc(cc1)OC(=O)Nc1c(ccc(c1)NC(=O)*)C,{},100.4455143 +213,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1c(cc(cc1)C(=O)*)Oc1ccc(cc1)C(c1ccccc1)(C)C,"{C(=O)c3ccc(cc3Oc3ccc(C(C)(C)c4ccccc4)cc3)C(=O)>}",135.0490958 +214,*OC(=O)NCCSCCCCCSCCNC(=O)OCCCC*,{},3.000872148 +215,*Oc1cc(ccc1)OC(=O)c1cc(c(c(c1)C(C)(C)C)OC(=O)c1ccc(cc1)C(=O)Oc1c(cc(cc1C(C)(C)C)C(=O)*)C(C)(C)C)C(C)(C)C,{},135.2011857 +216,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)*)(CCCC)C,"{C(=O)>}",118.8361345 +217,*c1cc2n3c(nc2cc1)c1ccc(cc1C3=O)Oc1cc2c3n(c4ccc(cc4n3)O*)C(=O)c2cc1,{},291.281809 +218,*Oc1c(cc(cc1)OC(=O)Oc1ccc(cc1)OC(=O)*)C,{},120.5520321 +219,*N1C(=O)c2c(C1=O)c(ccc2)c1cc2c(C(=O)N(C2=O)c2ccc(cc2)OCCCCCCCCCOc2ccc(cc2)*)cc1,{},47.98864595 +220,*C1OC(CO1)COCC1OC(OC1)CCCCCCCC(=O)OCCOC(=O)CCCCCCC*,{},-2.553202642 +221,*Oc1ccc(cc1)Oc1ccc(cc1)Oc1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*,{},136.3718208 +222,*C(C(=O)C*)c1ccc(cc1)C,{$CC(=O)C(c1ccc(C)cc1)$},67.57313464 +223,*C(C*)C(=O)OCC1(COC(OC1)(C)C)C,{$CC(C(=O)OCC1(C)COC(C)(C)OC1)$},95.74104893 +224,*Oc1c(cc(cc1)C=C1C(=O)C(=Cc2cc(c(cc2)OCCCC*)OC)CC1)OC,{},36.19808899 +225,*N=C1c2ccccc2C(=Nc2ccc(cc2)*)c2ccccc12,{},158.2579262 +226,*NC(=O)CCCCCCCCCCCCCCCC*,{},15.38660594 +227,*Nc1cc(ccc1)NC(=O)CCCCCC(=O)*,{},-12.07623639 +228,*c1cc2c(cc1)cc(cc2)*,{},239.5402998 +229,*c1ncc(cc1)c1ccc(nc1)*,{},138.2497069 +230,*C(C*)n1c2ccc(cc2c2cc(ccc12)Br)Br,{$CC(n1c2ccc(Br)cc2c2cc(Br)ccc21)$},115.6170166 +231,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)c2ccc(cc2)c2ccc(cc2)*)cc1,{},357.2965065 +232,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)c2ccc(cc2)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)cc1,{},302.652739 +233,*OC(=O)NC1C(C1)NC(=O)OCCCC*,{},47.31586772 +234,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1c(cc(cc1C)C1C(CC(CC1)C(c1cc(c(c(c1)C)*)C)(C)C)C)C,{},250.477212 +235,*C1C(=O)N(C(=O)C1*)c1ccccc1,{},197.1821336 +236,*[Si](c1ccc(cc1)*)(C)C,{},126.7080905 +237,*Nc1c(c(c(c(c1[2H])[2H])*)[2H])[2H],{},235.370823 +238,*C1c2c(C(C=C1)C=C*)cc(cc2)CCCCCC,{$C=CC1C=CC(c2ccc(CCCCCC)cc21)$},9.704073219 +239,*c1ccc2n(c3c(c2c1)cc(cc3)/C=C/c1ccc(cc1)Oc1c(cc(cc1)c1ccc(cc1)c1ccc(cc1)c1cc(c(cc1)Oc1ccc(cc1)/C=C/*)C#N)C#N)CC(CCCC)CC,{},129.2206103 +240,*c1sc(c(c1OCCCCCCC)C)*,{},32.04437108 +241,*OC(=O)C(=O)OCCCCCCCCCC*,{},-65.08541819 +242,*C=CC(C(*)C)C,{$C=CC(C)C(C)$},78.17880502 +243,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(CC2(C)C)(c2ccc(cc2)Oc2cc3c(C(=O)N(C3=O)c3cc(ccc3)N3C(=O)c4c(C3=O)cc(cc4)Oc3ccc(cc3)C3(CC(c4c3cc(cc4)Oc3cc4c(C(=O)N(C4=O)c4cc(ccc4)*)cc3)(C)C)C)cc2)C)cc1,{},231.956044 +244,*=C1SC(=S)N(C1=O)c1ccc(cc1)N1C(=S)SC(=Cc2ccc(cc2)C=*)C1=O,{<=Cc1ccc(cc1)C=C2SC(=S)N(C2=O)c3ccc(cc3)N4C(=O)C(SC4=S)=>},187.1036635 +245,*Nc1c(cc(c(c1)C(=O)O)*)OC,{},93.18891585 +246,*NNC(=O)CCCCC(=O)NNC(=O)CCCCCCCC(=O)*,{},101.0022623 +247,*n1c(=O)c2sc3c(sc2c1=O)c(=O)n(c3=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)NC(=O)c1ccc(cc1)*,{},327.2441987 +248,*C1CC(CC1)C*,{},42.35931484 +249,*Nc1ccc(cc1)NC(=O)c1c(cc(c(c1)C(=O)O)C(=O)*)C(=O)O,{},240.3936293 +250,*NNC(=O)c1cc(ccc1)C(=O)*,{},232.2266265 +251,*c1sc(c(c1CCCCCCCCCCCC)CCCCCCCCCCCC)c1sc(cc1)c1sc(cc1)*,{},14.60597757 +252,*C(C*)C(=O)NCC,{$CC(C(=O)NCC)$},55.10481694 +253,*Oc1c(cc(cc1C(C)(C)C)C(=O)*)C(C)(C)C,{},206.7951116 +254,*OS(=O)(=O)c1ccc(cc1)c1ccc(cc1)S(=O)(=O)Oc1c(cc(cc1Br)C1(CCCCC1)c1cc(c(c(c1)Br)*)Br)Br,{},268.4647521 +255,*C(C*)C(=O)NCCCCCCCCCCCC,{$CC(C(=O)NCCCCCCCCCCCC)$},21.73577755 +256,*N(c1ccc(cc1)*)CCCCCC,{},-128.6299242 +257,*Oc1c(cc(cc1)C(c1cc(c(cc1)OC(=O)c1cc(ccc1)C(=O)*)[N+](=O)[O-])(CCC(=O)O)C)[N+](=O)[O-],{},127.0784722 +258,*c1ncc(cc1)C(=O)NNC(=O)*,{},135.1836518 +259,*c1n(c(cc1)*)C(C(=O)NO)n1ccc2c1cccc2,{},180.3082327 +260,*N=Nc1ccc(cc1)*,{},144.6951233 +261,*c1sc2c(c1)sc(c2)c1sc(cc1CCCCCCCCCCBr)c1sc(c(c1)CCCCCCCCCCBr)*,{},24.95900908 +262,*C(CC(C*)c1ccccc1)(C(=O)OC)C#N,{$CC(c1ccccc1)CC(C(=O)OC)(C#N)$},127.0156605 +263,*=C=C=C(C(=*)COS(=O)(=O)c1ccc(cc1)OC)COS(=O)(=O)c1ccc(cc1)OC,{$=C=C=C(COS(=O)(=O)c1ccc(OC)cc1)C(COS(=O)(=O)c1ccc(OC)cc1)=$},55.30555285 +264,*c1c(cc(c(c1)OCCCCCCCCCC)C#C*)OCCCCCCCCCC,{},61.85443636 +265,*OCCCC(=O)NCCCCCCNC(=O)CCCCCC*,{},1.467345964 +266,*c1cncc(c1)C(=O)NCCCCCCCCCCNC(=O)*,{},85.60365033 +267,*C(C(C(C(*)(F)F)(F)F)(F)F)(C(F)(F)F)F,{$C(F)(F)C(F)(F)C(F)(F)C(C(F)(F)F)(F)$},-86.88823628 +268,*Oc1cc(ccc1)C(=O)NNC(=O)c1cc(ccc1)C(=O)NNC(=O)CC*,{},234.2133464 +269,*Oc1cc(ccc1)NC(=O)c1ccc(cc1)C(c1ccc(cc1)C(=O)Nc1ccc(cc1)*)(C(F)(F)F)C(F)(F)F,{},157.6228264 +270,*c1n(c(nn1)CCCCCCCC*)N,{},-41.92176029 +271,*c1nc2c([nH]1)cc(cc2)c1cc2c(nc([nH]2)c2ccc(cc2)NC(=O)c2ccc(cc2)C(=O)Nc2ccc(cc2)*)cc1,{},358.667269 +272,*Nc1c(cc(cc1Cl)*)Cl,{},-0.214281278 +273,*c1ccc2n(c3c(c2c1)cc(cc3)N=Cc1sc(cc1)c1sc(cc1)C=N*)CCCCCC,{},95.07860424 +274,*N1C2(CCCC2)C(=O)N(C1=O)C(=O)c1ccc(cc1)N=Nc1ccc(cc1)C(=O)*,{},17.25361376 +275,*P(=Nc1ccc(cc1)N=P(CC*)(Cl)Cl)(Cl)Cl,{},-27.34500252 +276,*Nc1c(cc(c(c1)SCCC#N)NC(=O)c1cc(ccc1)C(=O)*)SCCC#N,{},38.16065966 +277,*C(C*)(c1ccc(cc1)OC(=O)CC)OC(=O)C,{$CC(c1ccc(OC(=O)CC)cc1)(OC(C)=O)$},2.162388076 +278,*C1C(=O)N(C(=O)C1C(C*)c1ccccc1)CCCCCCCC,{},105.6965321 +279,*c1sc2cc3c(cc2n1)sc(n3)CCCCC*,{},87.36313445 +280,*NC(C(=O)*)CCC(=O)OCCCCCCCCCCCC,{},30.09586697 +281,*NC(C(=O)*)CO,{},84.57547927 +282,*Nc1c(cccc1)NC(=O)CCCCCCC(=O)*,{},71.92438381 +283,*Oc1c(cc(cc1)C=Cc1ccc(cc1)C=Cc1cc(c(cc1)OC(=O)CCCCCCCCC(=O)*)C)C,{},35.47523522 +284,*c1c2c(nccn2)c(cc1)*,{},216.378732 +285,*Oc1ccc(cc1)Oc1ccc(cc1)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)c1ccc(cc1)Oc1ccc(cc1)NC(=O)c1ccc(cc1)*,{},165.0428244 +286,*OP(=O)(OCCCCCCCCCCOc1ccc(cc1)C=Cc1ccc(cc1)OCCCCCCCCCC*)OCCCCCCCCCCOc1ccc(cc1)N=Nc1ccc(cc1)F,{},-7.2122695 +287,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)OCCCOc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2ccc(cc2)S(=O)(=O)c2ccc(cc2)NC(=O)*)cc1,{},163.1829015 +288,*OC(=O)c1ccc(cc1)C(=O)NCCCCCCNC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCC*,{},-20.66610996 +289,*NNC(=O)c1ccc(cc1)NC(=O)c1cc(cc(c1)N1C(=O)c2c(C1=O)c(c(c(c2Cl)Cl)Cl)Cl)C(=O)*,{},133.1528291 +290,*N=P(*)(OCCC(=O)C=C)OCCC(=O)C=C,{},-42.12432011 +291,*S(=O)(=O)c1ccc(cc1)c1ccc(cc1)*,{},229.0539301 +292,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)S(=O)(=O)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C(=O)N(C(=O)C2(CC)C)C(=O)*)cc1,{},242.6534046 +293,*OP(=O)(OCCCCCCCCCCOc1ccc(cc1)C=Cc1ccc(cc1)OCCCCCCCCCC*)OCCCCCCCCCCOc1ccc(cc1)N=Nc1ccc(cc1)C,{},5.817024886 +294,*NC(CCCCNC(=O)NCCCCNC(=O)*)C(=O)OC,{},0.336556425 +295,*C(=C*)c1nc2c(n1C)cccc2,{$C=C(c1nc2ccccc2n1C)$},-6.104199835 +296,*Oc1ccc(cc1)N=Nc1ccc(cc1)*,{},116.9759489 +297,*c1nc(nc(n1)NC(=O)c1c(cc(c(c1)C(=O)N*)C(=O)O)C(=O)OC(=O)Nc1c(ccc(c1)NC(=O)OCCCCCCCC)C)c1ccccc1,{},-1.691479041 +298,*c1[nH]c2cc3c(cc2n1)nc([nH]3)c1ccc(cc1)*,{},340.5865983 +299,*OC(=O)Cc1ccc(cc1)CC(=O)OCCCC*,{},-34.10658315 +300,*SC(=O)NCCCCCCNC(=O)SCCCC*,{},-33.13797704 +301,*c1c(cc(c(c1)OC)*)OC,{},63.90036252 +302,*N(CC*)C(=O)CCC(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F,{},-52.64825421 +303,*OC(=O)c1c(cccc1)NC(=O)c1ccc(cc1)C(=O)Nc1c(cccc1)C(=O)OC(=O)c1cc(ccc1)C(=O)*,{},185.0999075 +304,*c1sc(cc1CCCCCCCC)Nc1ccc(cc1)*,{},49.50827798 +305,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc(cc1)c1ccc(cc1)c1ccc(cc1)*,{},278.5221779 +306,*n1c(=O)c2c(c3c(c(c2c1=O)F)c(=O)n(c3=O)c1ccc(cc1)*)F,{},337.816724 +307,*OC(=O)c1ccc(cc1)C(=O)NCCNC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCC*,{},4.250402609 +308,*NC(=O)NC(=O)NCc1c(c(cc(c1)Cc1c(c(cc(c1)C*)C=O)O)C=O)O,{},190.6072154 +309,*OC(=O)C(C*)(CCCC)CCCC,{},-17.98562642 +310,*C1(C(=O)C(CCC1)C*)CO,{},71.70468573 +311,*N1C(=O)c2c(C1=O)cc(cc2)Oc1ccc(cc1)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)*)cc1,"{N4C(=O)c5ccc(cc5C4=O)Oc6ccc(cc6)Oc7ccc8c(c7)C(=O)N(C8=O)>}",191.2304459 +312,*C(=C*)c1cc(c(c(c1)CO)OCc1ccc(cc1)CNC(COCCCCCCCC)C)CO,{$C=C(c1cc(CO)c(OCc2ccc(CNC(C)COCCCCCCCC)cc2)c(CO)c1)$},65.78481038 +313,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CCCCCCCCCCCC*,{},73.37639604 +314,*c1sc2cc3c(cc2n1)sc(n3)c1ccc(cc1)c1ccc(cc1)*,{},343.9030515 +315,*c1sc(cc1COCCCCCCOc1ccc(cc1)c1ccc(cc1)C#N)*,{},59.88076613 +316,*Nc1ccc(cc1)CC(=O)*,{},63.68828699 +317,*C(C*)C(=O)N(c1ccccc1)O,{$CC(C(=O)N(O)c1ccccc1)$},141.1832818 +318,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)C(=O)*)c1ccccc1,"{C(=O)c2ccc(cc2)C(=O)>}",65.88587862 +319,*OC(CC(=O)*)C(C)C,{},-24.82439314 +320,*NC(C(=O)*)CC(=O)OCc1ccccc1,{},70.10647363 +321,*c1c(c(cc(c1)N=Nc1ccc(cc1)[N+](=O)[O-])*)O,{},165.6801535 +322,*OCCCCC(=O)NCCCCCCNC(=O)CCCC*,{},-56.49395983 +323,*=C=C=C(C(=*)CCCCOC(=O)NC(=O)OCCCC)CCCCOC(=O)NC(=O)OCCCC,{$=C=C=C(CCCCOC(=O)NC(=O)OCCCC)C(CCCCOC(=O)NC(=O)OCCCC)=$},27.51087357 +324,*Oc1cc(c(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)C(c1ccc(cc1)*)(C)C)[N-][N+]#N,{},196.3053138 +325,*c1sc(nn1)SCC(=O)NN=Cc1ccc(cc1)OCCCCOc1ccc(cc1)C=NNC(=O)CS*,{},39.79677892 +326,*OC(=O)NCCSCCCCSCCNC(=O)OCC*,{},26.25521298 +327,*C(C*)C(=O)N1CC[N+](CC1)(CCCCCCCCCCCC)C,{$CC(C(=O)N1CC[N+](C)(CCCCCCCCCCCC)CC1)$},-81.38297384 +328,*OC(=O)c1cc(ccc1)c1cc(ccc1)C(=O)OCCCCCCCCCC*,{},8.531981028 +329,*C1(C(=O)OC(=O)C1)C*,{},143.854555 +330,*Oc1cc(ccc1)OC(=O)c1ccc(cc1)C(=O)Oc1cc(ccc1)OCCCCCCCCCC*,{},20.92794041 +331,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)Oc1ccc(cc1)OC(=O)c1cc(ccc1)C(=O)*,{},119.4238893 +332,*=C1CN(CC(=Cc2oc(cc2)C(=O)CCCCC(=O)c2oc(cc2)C=*)C1=O)C,{<=Cc1ccc(o1)C(=O)CCCCC(=O)c2ccc(o2)C=C3CN(C)CC(C3=O)=>},95.14114513 +333,*Oc1ccc(cc1)NC(=O)c1c(c(c(c(c1F)F)C(=O)Nc1ccc(cc1)*)F)F,{},168.6168885 +334,*C(=C(CC*)C)C,{$CCC(C)=C(C)$},46.49641909 +335,*S(=O)(=O)NCCNS(=O)(=O)c1ccc(cc1)*,{},7.54081281 +336,*N(C(=O)*)c1ccccc1,{},150.7130657 +337,*OC(=O)C(C(=O)OCCCCCC*)CCCCCCOc1ccc(cc1)c1ccc(cc1)OCc1ccc(cc1)[N+](=O)[O-],{},17.95743357 +338,*N1C(=S)SC(=Cc2ccc(cc2)C=C2SC(=S)N(C2=O)CCCCCC*)C1=O,{},35.37444876 +339,*c1oc(cc1)Sc1oc(cc1)C=Nc1ccc(cc1)N=C*,{},95.64631957 +340,*OC(=O)c1ccc(cc1)S(=O)(=O)CCCCCCS(=O)(=O)c1ccc(cc1)C(=O)OCCCCCC*,{},35.86441642 +341,*OS(=O)(=O)c1cc(ccc1)S(=O)(=O)Oc1cc(ccc1)*,{},29.19231194 +342,*SSC(=O)N(c1ccc(cc1)Cc1ccc(cc1)N(C(=O)SSCCCC*)C)C,{},20.68923822 +343,*C(C*)(c1ccc(cc1)OC(=O)C)OC(=O)C,{$CC(c1ccc(OC(C)=O)cc1)(OC(C)=O)$},74.85696518 +344,*Oc1ccc(cc1)C(=Cc1ccc(cc1)OC(=O)CCCCCCCCCCC(=O)*)C,{},6.458707655 +345,*N(C(=O)CCCCC(=O)N(CC(C(C(C*)(F)F)(F)F)(F)F)CC)CC,{},10.59395774 +346,*c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)C(=O)c2ccc(cc2)C(=O)*)CC1,{},192.2096838 +347,*Oc1cc(c(cc1)N1ON1c1c(cc(cc1)OC(=O)CCCCCCCCCCC(=O)*)C)C,{},50.67469492 +348,*c1c(cc(c(c1)Oc1ccc(cc1)S(=O)(=O)O[Na])c1ccc(cc1)*)Oc1ccc(cc1)S(=O)(=O)O[Na],{},172.5717242 +349,*OC(=O)NCCCCC*,{},-13.55087665 +350,*N1C(=O)C(CC1=O)Nc1ccc(cc1)NC1C(=O)N(C(=O)C1)c1ccc(cc1)Cc1ccc(cc1)*,{},248.5034267 +351,*C(=C*)c1ccc(cc1)OCCCCCC(=O)Oc1c(c(c(c(c1F)F)F)F)F,{$C=C(c1ccc(OCCCCCC(=O)Oc2c(F)c(F)c(F)c(F)c2F)cc1)$},73.83198457 +352,*Oc1cc(ccc1)OC(=O)c1ccc(cc1)C=Nc1ccc(cc1)OCCCCCCOc1ccc(cc1)N=Cc1ccc(cc1)C(=O)*,{},83.83402436 +353,*C1=NC2=CC(C=CC2=C1)*,{},103.156476 +354,*C=CCCCCCCCC*,{$C=CCCCCCCCC$},-17.2820223 +355,*C(CCC*)Cl,{$CCCC(Cl)$},-30.93658282 +356,*C#CC(=C(*)CCCCOC(=O)NCCC)CCCCOC(=O)NCCC,{$C#CC(CCCCOC(=O)NCCC)=C(CCCCOC(=O)NCCC)$},40.70123878 +357,*NC(CC(=O)*)c1ccccc1,{},-30.79261317 +358,*S(=O)(=O)C(C=CC(*)C)C,{},33.82674704 +359,*C(C(*)(C([2H])([2H])[2H])C([2H])([2H])[2H])([2H])[2H],{$C([2H])([2H])C(C([2H])([2H])[2H])(C([2H])([2H])[2H])$},85.2671101 +360,*c1sc(cc1)C(=O)NCCCCCCNC(=O)*,{},42.90894132 +361,*SC(=O)c1ccc(cc1)C(=O)SCc1c(c(c(c(c1C)C)C*)C)C,{},127.8963011 +362,*c1ccc2c(c1)c(=O)oc(n2)c1cc(cc(c1)N1C(=O)c2c(C1=O)c(c(c(c2Cl)Cl)Cl)Cl)c1oc(=O)c2c(ccc(c2)C*)n1,{},378.8956296 +363,*Nc1ccc(cc1)NC(=O)c1c(cc(c(c1)SCCCCCCCC)C(=O)*)SCCCCCCCC,{},69.67482713 +364,*Oc1c(cc(cc1)C=Nc1ccc(cc1)N=Cc1cc(c(cc1)OC(=O)CCCCC(=O)*)OC)OC,{},-41.85748469 +365,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)CCCCCCC(=O)*,"{C(=O)c2ccc(cc2)OC(=O)CCCCCCC(=O)>}",72.32081554 +366,*c1nc(sc1)N=Cc1cc(c(cc1)OCCCCOc1c(cc(cc1)C=Nc1nc(cs1)c1ccc(cc1)Oc1ccc(cc1)*)OC)OC,{},104.0626058 +367,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)C(=O)NNC(=O)c1cc(c(cc1)NC(=O)c1ccc(cc1)*)O,{},111.457133 +368,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1cc(ccc1)Oc1ccc(cc1)C(=O)*,"{C(=O)c3cccc(c3)Oc4ccc(cc4)C(=O)>}",89.5930824 +369,*C(C*)OCCC,{$CC(OCCC)$},-17.16506937 +370,*OC(=O)CCCC(=O)OCC(C(C(C(C*)(F)F)(F)F)(F)F)(F)F,{},-64.11657159 +371,*OC1CCC(CC1)OC(=O)CCCCCCC(=O)*,"{C(=O)CCCCCCC(=O)>}",-35.83964331 +372,*N1C(=O)N(C(=O)C1(C)C)C(=O)c1ccc(cc1)N=Nc1ccc(cc1)C(=O)*,{},302.3547996 +373,*Oc1ccc(cc1)OCC(=O)OC(=O)c1ccc(cc1)C(=O)OC(=O)C*,{},80.44433982 +374,*c1c2c(c(cc1)*)cccc2,{},189.0821827 +375,*C1C(=O)N(C(=O)C1C(C*)c1ccccc1)c1ccc(cc1)Cl,{},168.825845 +376,*[Si](c1ccc(cc1)*)(OCC)OCC,{},7.02246836 +377,*NC(CNC(=O)NCCCCCCNC(=O)*)C,{},55.35434565 +378,*c1nc(cs1)c1ccc(cc1)c1nc(sc1)CCCC*,{},110.5188915 +379,*Oc1ccc(cc1)OC(=O)CCCCCCCC(=O)*,"{C(=O)CCCCCCCC(=O)>}",-6.058372606 +380,*C(C*)OC(=O)C(CC)(CC)CC,{$CC(OC(=O)C(CC)(CC)CC)$},20.41787387 +381,*NC(=O)CCCCCCCCCCCCCCC(=O)NCCc1ccc(cc1)CC*,{},65.25488595 +382,*C1CCN(CC1)SC(=O)OCCCCOC(=O)SN1CCC(CC1)CCC*,{},-6.032415023 +383,*Oc1cc2c(C(CC2(C)C)(c2ccc(cc2)Oc2ccc(cc2)C2(CC(c3c2cc(cc3)*)(C)C)C)C)cc1,{},308.5311609 +384,*OC(=O)c1ccc(cc1)CCc1ccc(cc1)C(=O)OCC*,{},53.2795594 +385,*c1oc2c(n1)cc(cc2)c1cc2c(oc(n2)CCCCCCCC*)cc1,{},61.93714913 +386,*C(=C*)CNS(=O)(=O)CC,{$C=C(CNS(=O)(=O)CC)$},44.748248 +387,*c1nc(ccc1)Oc1ccc(cc1)Oc1ccc(cc1)O*,{},-19.11943844 +388,*C(=C(*)C)[Si](CCCC)(C)C,{$C(C)=C([Si](C)(C)CCCC)$},152.5242236 +389,*OC(=O)CCSCCC(=O)*,{},6.950276755 +390,*Oc1ccc(cc1)Oc1ccc(cc1)NC(=C(C#N)C#N)c1ccc(cc1)c1ccc(cc1)C(=C(C#N)C#N)Nc1ccc(cc1)*,{},270.2905197 +391,*Oc1cc(ccc1)OC(=O)Oc1ccc(cc1)OC(=O)*,{},-104.3379932 +392,*OC(=O)c1ccc(cc1)NC(=O)CCCCC(=O)Nc1ccc(cc1)C(=O)OCCC*,{},83.99780359 +393,*c1nc2c([nH]1)cc(cc2)NC(=NO)C(=NO)Nc1ccc2c(nc([nH]2)CCCC*)c1,{},170.1130329 +394,*Nc1c(c(c(c(c1C)C)NC(=O)c1ccc(cc1)C(=O)*)C)C,{},205.6552433 +395,*OC(=O)Nc1ccc(cc1)NC(=O)OCC*,{},134.2908917 +396,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)C(=O)*)Cl,{},119.6241841 +397,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1c(cc(c(c1)C(=O)OCCCCCC)C(=O)*)C(=O)OCCCCCC,{},77.13970172 +398,*N1CCN(CC1)CCC(=O)N(CCN(C(=O)CC*)C(C)C)C(C)C,{},-43.71593166 +399,*c1ccc2n(c3c(c2c1)cc(cc3)C=NN(c1ccc(cc1)S(=O)(=O)c1ccc(cc1)N(N=C*)CCCC)CCCC)CC,{},139.604302 +400,*Sc1ccc(cc1)c1ccc(cc1)SC(=O)CCCCC(=O)*,{},13.2621556 +401,*N(c1ccc(cc1)c1ccc(cc1)N(C(=O)c1ccc(cc1)C(=O)*)CC)CC,{},62.56896406 +402,*S(=O)(=O)CCCC*,{},-14.41108992 +403,*c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)C(=O)c2ccc(cc2)C(=O)*)CCC1,{},240.3798309 +404,*c1ccc2n(c3c(c2c1)cc(cc3)*)CC,{},206.6525359 +405,*OC(C(C(=O)*)(C)C)c1ccccc1,{},65.42132156 +406,*c1nc2c([nH]1)cc(cc2)c1ccc2c(nc([nH]2)c2ccc(cc2)C(=O)c2ccc(cc2)*)c1,{},300.9001313 +407,*Nc1ccc(cc1)NC(=S)NC(=O)c1ccc(cc1)C(=O)NC(=S)*,{},220.8197438 +408,*=c1c2cc3c(cc2c(=O)o1)c(=O)oc3=Nc1cc(ccc1)Oc1cc(ccc1)Oc1cc(ccc1)N=*,{<=Nc1cccc(c1)Oc2cccc(c2)Oc3cccc(c3)N=C4OC(=O)c5cc6c(cc54)C(OC6=O)=>},132.5253262 +409,*OS(=O)(=O)c1ccc(cc1)S(=O)(=O)c1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)*,{},194.2678014 +410,*OC(CCOC(=O)c1cc(ccc1)C(=O)*)C,"{C(=O)c1cccc(c1)C(=O)>}",-42.28619333 +411,*NC(=O)CCCCCCCCC(=O)NC*,{},66.76854223 +412,*NC(=O)CCP(=O)(CCC(=O)NCC*)C,{},143.8466204 +413,*C(C*)c1c(cccc1)OC,{$CC(c1ccccc1OC)$},160.10962 +414,*OCCCCCOCCCCCCOCCCCCC*,{},-72.01996519 +415,*NC(=O)C(=O)NCCNC(=O)CCCCCCCC(=O)NCC*,{},112.5256843 +416,*OC(=O)c1ccc(cc1)C(=O)OCC(C*)(CCl)CCl,{},11.46555203 +417,*OC(COC(=O)CCCCC(=O)*)C,"{C(=O)CCCCC(=O)>}",-15.95518318 +418,*c1ccc(cc1)c1ccc(cc1)C(*)(C)C,{},210.9469969 +419,*Oc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)*,{},203.5999878 +420,*Oc1c(cc(cc1)Oc1ccc(cc1)C(=O)c1ccc(cc1)*)CBr,{},98.31320425 +421,*Oc1cc(ccc1)C(=O)OC(=O)c1cc(ccc1)OCC*,{},3.697542049 +422,*C(=C*)c1ccc(cc1)[N+](=O)[O-],{$C=C(c1ccc([N+](=O)[O-])cc1)$},-12.60583746 +423,*c1sc(cc1)[Si](c1sc(cc1)[SiH](*)C)(C)C,{},60.04219897 +424,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)c1ccc(cc1)Oc1ccc(cc1)C(=O)*)C,"{C(=O)c3ccc(cc3)Oc4ccc(cc4)C(=O)>}",76.95210942 +425,*C(C*)C(=O)n1sc2c(c1=O)cccc2,{$CC(C(=O)n1sc2ccccc2c1=O)$},48.67425788 +426,*Nc1cc(cc(c1)C(=O)OCCN(c1ccc(cc1)S(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C)NC(=O)c1cc(cc(c1)OCCN(c1ccc(cc1)S(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C)C(=O)*,{},172.7344272 +427,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1c(cc(cc1)c1cc(c(cc1)*)C)C,{},271.2577887 +428,*c1c(cc(c(c1)OCCCCCCOc1ccc(cc1)C1CCC(CC1)CCCCC)C=C*)OCCCCCCOc1ccc(cc1)C1CCC(CC1)CCCCC,{},36.67582945 +429,*OC(=O)SSC(=O)OCCCC*,{},-58.94578339 +430,*NC(C(=O)NCC(=O)*)C,{},19.96697332 +431,*NC(=O)CNC(=O)CC*,{},70.42884183 +432,*SC(=O)CCCCC(=O)SCc1c(c(c(c(c1C)C)C*)C)C,{},44.80678229 +433,*C(=C*)c1ccccc1,{$C=C(c1ccccc1)$},66.12991425 +434,*c1cc2c(C(=O)N(C2=O)c2cc(ccc2)NC(=O)c2cc(ccc2)C(=O)Nc2cc(ccc2)NC(=O)*)cc1,{},213.4893774 +435,*Oc1c(cc(cc1Br)C(c1cc(c(c(c1)Br)OC(=O)c1cc(ccc1)C(=O)*)Br)(CCC(=O)O)C)Br,{},175.2273279 +436,*OC(=O)CCCCC(=O)OCC(C*)(CCl)CCl,{},2.83724036 +437,*N1CCN(CC1)C(=O)SSCCCCSSC(=O)*,{},-3.571974592 +438,*N1C(=O)N(C(=O)C1(c1ccccc1)c1ccccc1)C(=O)c1ccc(cc1)N=Nc1ccc(cc1)C(=O)*,{},183.844758 +439,*c1cc2n(c3c(c2cc1)ccc(c3)C#CC#C*)CCCCCCCCCCCCCCCC,{},68.35869686 +440,*c1c2c(nccc2)c(cc1)OCc1ccc(cc1)COc1c2ncccc2c(cc1)C*,{},200.3538526 +441,*OC(=O)c1ccc(cc1)C(=O)OCCOCCOCC*,{},36.06576581 +442,*c1c2c(c(s1)*)sc(n2)CCCCCCCCC,{},56.24069524 +443,*=C=C=C(C(=*)COS(=O)(=O)c1ccc(cc1)C)COS(=O)(=O)c1ccc(cc1)C,{$=C=C=C(COS(=O)(=O)c1ccc(C)cc1)C(COS(=O)(=O)c1ccc(C)cc1)=$},76.80290526 +444,*Oc1c(cc(cc1)C=CC=Cc1cc(c(cc1)OCCCCCCC*)C)C,{},41.89270134 +445,*Oc1ccc(cc1)C(=O)OCCCCOC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*,{},68.79315579 +446,*Oc1c(c(ccc1)Oc1ccc(cc1)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1ccc(cc1)*)C#N,{},160.9324159 +447,*C1C(=O)N(C(=O)C1*)c1ccc(cc1)COC(C)(C)C,{},122.5873684 +448,*c1ccc2n(c3c(c2c1)cc(cc3)C(=O)Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)*)(C)C)C,{},73.83261176 +449,*/C=C/*,{$/C=C/$},59.5588378 +450,*c1ncc(cc1)c1n(c(cc1)c1n(c(cc1)*)C)C,{},256.5965094 +451,*c1ncc(cc1)*,{},322.0959561 +452,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2ccc(cc2)NC(=O)*)cc1,{},48.20867337 +453,*N(C(=O)*)CCCCCC,{},14.34014558 +454,*Nc1cc(ccc1)C#Cc1cc(ccc1)NC(=O)c1c(cc(cc1)C(=O)*)C(=O)O,{},187.6187871 +455,*c1c(cc(c(c1)C*)C)O,{$Cc1cc(c(O)cc1C)$},12.40187977 +456,*Oc1cc(c(cc1)C(=O)Nc1ccc(cc1)NC(=O)c1c(cc(cc1)*)C(=O)O)C(=O)O,{},216.6231471 +457,*c1nc(nc(n1)Oc1cc(ccc1)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1cc(ccc1)O*)Sc1ccccc1,{},184.9515774 +458,*=C1c2c(C(=O)O1)cc(cc2)c1cc2c(C(=O)OC2=Nc2cc(ccc2)Oc2cc(ccc2)Oc2cc(ccc2)N=*)cc1,{<=Nc1cccc(c1)Oc2cccc(c2)Oc3cccc(c3)N=C4OC(=O)c5ccc(cc54)c6ccc7c(c6)C(=O)OC7=>},193.7356518 +459,*Nc1c(cc(cc1)*)CC,{},162.1855704 +460,*OC(=O)NCCCCCCNC(=O)OCCN(CC*)c1ccc(cc1)N=Nc1ccc(cc1)C,{},24.09354398 +461,*c1nc2c([nH]1)ccc(c2)c1ccc2c(nc([nH]2)c2ccc(cc2)*)c1,{},423.6341908 +462,*c1ncc(cc1)C(=O)OC(=O)COc1ccc(cc1)OCC(=O)OC(=O)*,{},155.9709567 +463,*C(C*)C(=O)OCCN(S(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C,{$CC(C(=O)OCCN(C)S(=O)(=O)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F)$},-148.0297376 +464,*OC(=O)c1cc(ccc1)C(=O)OCC1C(C1)C*,{},10.23490017 +465,*NC(C(=O)*)C(C)C,{},153.0278775 +466,*C(C*)(C(=O)OCC(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)F,{$CC(C(=O)OCC(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F)(F)$},-98.70472035 +467,*C(*)C(=O)OC(C)(C)C,{$C(C(=O)OC(C)(C)C)$},12.73837737 +468,*OC(=O)c1c(cccc1)c1c(cccc1)C(=O)OCCCC*,{},71.34273106 +469,*Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)Nc1ccc(cc1)CCc1ccc(cc1)NC(=O)*,{},183.1975298 +470,*C1C(=O)N(C(=O)C1C(C*)OC(=O)C)c1ccccc1,{},157.3759721 +471,*OC(CC(=O)*)C(Cl)Cl,{},110.2326669 +472,*OC(=O)NCCCCCCCCCCNC(=O)OCCCCCCCC*,{},24.78605928 +473,*C1C=CC(CC1)*,{},103.377349 +474,*NC(=O)NCCCP(CCC*)c1ccccc1,{},-49.19787298 +475,*c1oc(nn1)CCCCCCCC*,{},-48.88416733 +476,*OC(=O)NCCCCCCNC(=O)OCC(C*)(C)C,{},47.60248847 +477,*N=P(*)(OCC(C(C(F)(F)F)(F)F)(F)F)OCC(C(C(F)(F)F)(F)F)(F)F,{},-77.91107652 +478,*OC(=O)c1ccc(cc1)C(=O)OCCCCCC(=O)NCCNC(=O)CCCCC*,{},81.65245915 +479,*c1cc2c(C(=O)OC2=Nc2cc(ccc2)N=C2OC(=O)c3c2cc(cc3)C(=O)*)cc1,{},246.6584182 +480,*C*,{$C$},-2.526682925 +481,*c1c(nnc(n1)c1nc(ccc1)c1nc(c(nn1)c1ccccc1)c1ccc(cc1)Sc1ccc(cc1)*)c1ccccc1,{},419.5781202 +482,*c1cc2c(C(=O)N(C2=O)c2c(cc(cc2)c2cc(c(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Oc2ccc(cc2)C(c2ccc(cc2)OC(=O)*)(C)C)C)C)cc1,{},87.17702398 +483,*=c1sc(cs1)c1ccc(cc1)C=*,{<=Cc1ccc(cc1)C2=CSC(S2)=>},105.3372816 +484,*Oc1ccc(cc1)N=Cc1ccc(cc1)OC(=O)c1ccc(cc1)C=Nc1ccc(cc1)OCCCCOC(=O)NCCCCCCNC(=O)OCCCC*,{},-34.25655466 +485,*Oc1ccc(cc1)C(=O)CNc1ccc(cc1)NCC(=O)c1ccc(cc1)*,{},158.6944649 +486,*OC(=O)N(c1c(ccc(c1)N(C(=O)OCC*)C)C)C,{},42.46691504 +487,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C(=O)N(C(C(=C2C)C(=O)OCC)c2ccc(cc2)Cl)C(=O)*)cc1,{},170.3261787 +488,*C(C*)(C(=O)Oc1ccc(cc1)C)C,{$CC(C(=O)Oc1ccc(C)cc1)(C)$},126.1154692 +489,*c1sc2c(n1)ccc(c2)OCCCCCCCCCCCOC(=O)CCCCC(=O)OCCCCCCCCCCCOc1ccc(cc1)*,{},-17.21827415 +490,*c1nc2c(c(c1)OCCO*)cc(cc2)C,{},88.77435741 +491,*Oc1ccc(cc1)C(=O)CCCCCCCCC(=O)c1ccc(cc1)OC(=O)*,{},6.568396994 +492,*c1ccc2c(nc([nH]2)c2cc(ccc2)c2nc3c([nH]2)ccc(c3)C(=O)Nc2ccc(cc2)NC(=O)*)c1,{},309.3374253 +493,*OCC1(C2CCC(C1)CC2)C*,{},81.08225465 +494,*c1nc(ccc1)C=Nc1ccc(cc1)N=C*,{},139.1234551 +495,*/C(=C(/*)\c1ccccc1)/c1ccccc1,{$/C(c1ccccc1)=C(c1ccccc1)/$},206.5698859 +496,*N1C(CN(C(C1)C)C(=O)OCCOC(=O)*)C,{},60.10633691 +497,*C1C(CC1)*,{$C1CCC1$},69.57488221 +498,*OC(C*)CCCCCCCCOc1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)OCC(CC)C,{},-0.17829817 +499,*C(C*)(C(=O)OCCOC(=O)c1cc(cc(c1)OC(=O)c1ccc(cc1)N=Nc1ccc(cc1)OCCCCCCC)OC(=O)c1ccc(cc1)N=Nc1ccc(cc1)OCCCCCCC)C,{$CC(C(=O)OCCOC(=O)c1cc(OC(=O)c2ccc(N=Nc3ccc(OCCCCCCC)cc3)cc2)cc(OC(=O)c2ccc(N=Nc3ccc(OCCCCCCC)cc3)cc2)c1)(C)$},9.014452923 +500,*C(C*)C(=O)OCCOC(C(F)(F)F)(C(F)(F)F)F,{$CC(C(=O)OCCOC(F)(C(F)(F)F)C(F)(F)F)$},-51.63721715 +501,*Sc1ccc(cc1)*,{},64.586946 +502,*Oc1ccc(cc1)CCCNC(=O)CCCCC(=O)NCCCc1ccc(cc1)OCCCCC*,{},28.2211431 +503,*SSCCCCSSCCCCCC*,{},-41.26672381 +504,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)OCCN(CCOC(=O)*)c2ccc(cc2)N=Nc2ccc(cc2)[N+](=O)[O-])cc1,{},145.3751112 +505,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)C(C(=O)N1C(=O)N(C(C1=O)(C)C)C(=O)C(*)C)C,{},252.5865186 +506,*OC(=O)CCCCC(=O)OCc1ccc(cc1)C*,{},-4.158432897 +507,*Nc1c(cc(cc1)NC(=O)c1c(cc(c(c1)C(=O)*)C(=O)O)C(=O)O)S(=O)(=O)O[Na],{},129.970858 +508,*Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)c1cc(cc(c1)NC(=O)C(CC(C)C)N1C(=O)c2c(C1=O)cccc2)C(=O)*,{},36.87047274 +509,*N1C(CN(C(C1)C)SC(=O)OCCCCOC(=O)S*)C,{},-30.512105 +510,*N(C(=O)*)CC=C,{},164.8639006 +511,*[Si](c1ccc(cc1)*)(c1ccc(cc1)CN(C)C)C,{},12.90627629 +512,*c1sc(cc1)C#CC#C*,{},49.13696662 +513,*c1ncnc(c1)C=Cc1ccc(cc1)C=C*,{},84.80053451 +514,*c1nc(nc(n1)Oc1c2c(ccc1C(=O)Nc1ccc(cc1)NC(=O)c1c(c3c(cc1)cccc3)O*)cccc2)N1CCCCC1,{},162.6671353 +515,*OC(=O)CCC(=O)OCCCCCCCCCC*,{},1.783806133 +516,*c1cc2n3c(=O)c4cc5c(cc4c3nc2cc1)c(=O)n1c2ccc(cc2nc51)*,{},384.637936 +517,*c1ccc2n(c3c(c2c1)cc(cc3)C(=O)Oc1ccc(cc1)C1(c2c(C(=O)O1)cccc2)c1ccc(cc1)OC(=O)*)C,{},262.5942508 +518,*=Nc1ccc(cc1)N=C(Nc1c(cc(cc1)c1cc(c(cc1)NC(=*)C)C(=O)O)C(=O)O)C,{<=Nc1ccc(cc1)N=C(C)Nc2ccc(cc2C(=O)O)c3ccc(c(C(=O)O)c3)NC(C)=>},89.38045943 +519,*OC(=O)C=C(CC(=O)OCC*)c1ccc(cc1)OCC,{},1.131191733 +520,*OC(=O)Nc1c(ccc(c1)NC(=O)OCCOCCOCCC*)C,{},-26.75831261 +521,*Nc1c(cccc1)CCc1c(cccc1)NC(=O)*,{},207.655323 +522,*SC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)SCc1ccc(cc1)C*,{},90.77725081 +523,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)NCCC[Si](O[Si](CCCNC(=O)*)(C)C)(C)C)cc1,{},143.0502256 +524,*OC(=O)c1cc(ccc1)C(=O)OCCNC(=O)c1ccc(cc1)C(=O)NCC*,{},121.124261 +525,*C(C*)(C(=O)OCCCCCCCCCCOc1ccc(cc1)N1C(=O)C(=Cc2c(c3c(n2C)cccc3)C)C(=C(C)C)C1=O)C,{$CC(C(=O)OCCCCCCCCCCOc1ccc(N2C(=O)C(=Cc3c(C)c4ccccc4n3C)C(=C(C)C)C2=O)cc1)(C)$},-17.96880959 +526,*SC(=O)CCCCCCCCC(=O)SCCCCCC*,{},-39.13613776 +527,*c1nc(ccc1)C(=O)NCCCCCCCCCCNC(=O)*,{},49.59402876 +528,*NC(=O)c1ccc(cc1)C(=O)NCCCCCCCCCC*,{},50.1155014 +529,*Nc1ccc(cc1)NC(=O)c1ccc(cc1)NC(=O)C=Cc1ccc(cc1)C=CC(=O)Nc1ccc(cc1)C(=O)*,{},169.7759737 +530,*NNC(=O)CCCCCCCCC(=O)NNC(=S)c1cc(ccc1)C(=S)*,{},32.4414411 +531,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)NC(=O)c2cc(ccc2)C(=O)Nc2ccc(cc2)*)cc1,{},155.8375326 +532,*NC(=S)C=Cc1ccc(cc1)Cc1ccc(cc1)C=CC(=S)NCC*,{},99.87529352 +533,*c1ccc2[nH]c3c(c2c1)cc(cc3)C(=O)c1c(cc(c(c1)C(=O)*)C(=O)O)C(=O)O,{},106.1795053 +534,*c1nc(sc1)NC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)Nc1nc(cs1)c1ccc(cc1)Oc1ccc(cc1)*,{},221.6298798 +535,*OC(=O)c1ccc(cc1)N1ON1c1ccc(cc1)C(=O)OCCOCC*,{},15.34426557 +536,*c1ccc2nc3c(Sc4cc(ccc4N3)c3cc4Sc5c(Nc4cc3)nc3ccc(cc3n5)O*)nc2c1,{},383.4 +537,*c1cc2Sc3c(=Nc2cc1)[nH]c1ccc(cc1n3)c1ccc2[nH]c3=Nc4c(Sc3nc2c1)cc(cc4)*,{},418.69 +538,*c1nc2c([nH]1)ccc(c2)c1ccc2c(nc([nH]2)c2oc(cc2)*)c1,{},419.98 +539,*=C1OC(=c2cc3ccc4=CC(=*)C=c5ccc(c2)c3c45)c2c3c4c1ccc1cccc(c41)c1c3c(ccc1)cc2,{<=c1cc2ccc3cc(cc4ccc(c1)c2c34)=c4oc(c5ccc6cccc7c8cccc9ccc4c(c98)c5c67)=>},432.43 +540,*c1cc2c3n(c4ccc(cc4n3)Oc3ccc4n5c(nc4c3)c3ccc(cc3C5=O)C(*)(C(F)(F)F)C(F)(F)F)C(=O)c2cc1,{},395.15 +541,*c1cc2n3c(=O)c4cc5c(cc4c3nc2cc1)c(=O)n1c2ccc(cc2nc51)O*,{},416.53 +542,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc(cc1)c1c(cc(cc1c1ccc(cc1)c1ccccc1)c1ccc(cc1)c1cc(c(c(c1)c1ccc(cc1)c1ccccc1)c1ccc(cc1)*)c1ccc(cc1)c1ccccc1)c1ccc(cc1)c1ccccc1,{},435 +543,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc2c(nc([nH]2)c2ccc(cc2)*)c1,{},456.35 +544,*N1C(=O)c2cc3C(c4c(Oc3cc2C1=O)cc1C(=O)N(C(=O)c1c4)c1cc(c(cc1)c1c(cc(cc1)*)C(F)(F)F)C(F)(F)F)(C(F)(F)F)C(F)(F)F,{},472.25 +545,*c1cc2nc3c4c5c6c(c3nc2cc1)cccc6c1nc2ccc(cc2nc1c5ccc4)*,{},411.97 +546,*=C1C=c2ccc3cc(=C4c5ccccc5C(=*)c5ccccc45)cc4ccc(=C1)c2c34,{<=c1cc2ccc3cc(cc4ccc(c1)c2c34)=c4c5ccccc5c(c5ccccc45)=>},437.49 +547,*c1n(c(cc1)*)C(C(=O)OC)C,{},279.4452403 +548,*NC(C(=O)NCC(=O)NCC(=O)*)C,{},208.6397491 +549,*c1sc2cc3c(cc2n1)sc(n3)c1c(cc(c(c1)OCCCCCC)*)OCCCCCC,{},168.5263131 +550,*C(*)C(=O)OC(CC)(C)C,{$C(C(=O)OC(C)(C)CC)$},136.5678336 +551,*N(c1ccc(cc1)*)CCCCCCC,{},110.7170963 +552,*Oc1ccc(cc1)OC(=O)c1c(cc(cc1)C(=O)*)c1ccccc1,"{C(=O)c2ccc(cc2c2ccccc2)C(=O)>}",227.700588 +553,*S(=O)(=O)NCCNS(=O)(=O)c1ccc(cc1)c1ccc(cc1)*,{},173.2454244 +554,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)*,{},213.4133554 +555,*c1[nH]c(cc1c1ccccc1)*,{},120.4503456 +556,*c1ccc(cc1)C1C(C(C1C(=O)OCC)*)C(=O)OCC,{},164.3224631 +557,*C(C(*)O)C,{$C(C)C(O)$},113.5665564 +558,*C(C*)C(CC)CC,{$CC(C(CC)CC)$},38.96888215 +559,*OC(=O)CCC(=O)OCCCCCCCCCCCCCCCCCCCC*,{},-32.76938912 +560,*C(C*)C(=O)N(CC)CC,{$CC(C(=O)N(CC)CC)$},56.77009786 +561,*NNC(=O)CCC(=O)NNC(=O)CCCCCCCCC(=O)*,{},64.69850401 +562,*NC(=O)CCC(=O)NCCCCCCCC*,{},69.22130195 +563,*NC(C(C(=O)*)(C)C)c1ccccc1,{},154.3595069 +564,*OC(=O)C/C=C/CC(=O)OCCCCCCCCCCCCCC*,{},-41.10158883 +565,*C(C*)(C(=O)OCC)CO,{$CC(C(=O)OCC)(CO)$},22.36004964 +566,*O[Si](CCCN=C1c2c(ccc(c2C(=NCCC[Si](*)(C)C)c2ccccc12)O)O)(C)C,{},51.3 +567,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)C(c1ccc(cc1)*)(c1ccc(cc1)O)C,{},255.52 +568,*OP(=O)(N=Nc1ccc(cc1)COC(=O)c1cc(cc(c1)C(C)(C)C)C(=O)OCc1ccc(cc1)N=NP(=O)(OCCCCCC*)OC)OC,{},62.39 +569,*c1cc2c(C(=O)N(C2=O)c2c(ccc(c2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)CP(=O)(OCC)OCC)cc1,{},264.06 +570,*Nc1c(cc(cc1)NC(=O)c1ccc(cc1)NC(=O)CCCCCCCCCCC(=O)Nc1ccc(cc1)C(=O)*)C(=O)OCCCCCCCCCCCCCCCC,{},187.43 +571,*C(C*)(C(=O)OCCF)C,{$CC(C(=O)OCCF)(C)$},76.42 +572,*N1C(=O)c2c(C1=O)cc(cc2)Oc1ccc(cc1)Oc1ccc(cc1)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)Sc2ccc(cc2)Oc2ccc(cc2)*)cc1,{},225.33 +573,*C(C*)c1ccc(cc1)C(=O)N(C)C,{$CC(c1ccc(C(=O)N(C)C)cc1)$},146.9 +574,*OC(=O)c1ccc(cc1)C(c1ccc(cc1)C(=O)*)(C)C,{},46.39 +575,*Oc1cc2c(cc1)ccc(c2)Oc1ccc(cc1)C(=O)Nc1cc(ccc1)NC(=O)c1ccc(cc1)*,{},204.47 +576,*SCC(=O)NCCCCCCNC(=O)C*,{},29.78 +577,*c1cc2C(c3c(c2cc1)ccc(c3)c1ccc(cc1)c1c(c(cc(c1)c1ccc(cc1)OCC(CCCC)CC)c1ccc(cc1)*)c1ccc(cc1)OCC(CCCC)CC)(CCCCCC)CCCCCC,{},123.52 +578,*OC(=O)COCC(=O)OCCCC*,{},27.22 +579,*C1(CCN(CC1)C(=O)C(CC(=O)N1CCC(CC1)(CCC*)C)C)C,{},111.94 +580,*c1nc2c(nc1)cc(cc2)Oc1cc2c(nc(cn2)c2ccc(cc2)*)cc1,{},381.02 +581,*C(C*)OC(=O)c1c(cccc1)C,{$CC(OC(=O)c1ccccc1C)$},81.84 +582,*C1(CC(c2c1cc(cc2)*)(C)C)C,{},261.41 +583,*c1cc2c(C(=O)N(C2=O)c2ccc3Cc4c(c3c2)cc(cc4)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)cc1,{},442.63 +584,*c1cc2c(C(=O)N(C2=O)c2c3c(ccc2)c(ccc3)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2nc(nc(n2)NC(=O)*)c2ccccc2)cc1,{},330.59 +585,*Oc1ccc(cc1)OC(=O)c1c(cc(cc1)C(=O)*)Sc1ccc(cc1)Cl,{},146.43 +586,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)CCCCCCCCCCCC*)cc1,{},80.63 +587,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)OCCCCC*,{},89.92 +588,*C1C(=O)N(C(=O)C1C(C*)c1ccccc1)CCCCCCCCCCCC,{},70.24 +589,*OC(=O)Nc1c(ccc(c1)NC(=O)OCCCCCCCC*)C,{},117.25 +590,*C(C*)(C(=O)OCCCCCCOc1ccc(cc1)C(=O)Oc1ccc2c(c1)oc(=O)cc2)C,{$CC(C(=O)OCCCCCCOc1ccc(C(=O)Oc2ccc3ccc(=O)oc3c2)cc1)(C)$},118.96 +591,*Oc1ccc(cc1)CC(NC(=O)Cc1ccc(cc1)OC(=O)CCCCCCC(=O)*)C(=O)OCC,{},66.4 +592,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2cc(ccc2)C(c2cc(ccc2)*)O[Si](O[Si](O[Si](C)(C)C)(C)C)(C)C)cc1,{},196.68 +593,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)C(=O)OCCOCCOCCOC(=O)*)cc1,{},79.22 +594,*Oc1ccc(cc1)C1(c2ccccc2c2ccccc12)c1ccc(cc1)Oc1c(cc(cc1)C(=O)c1ccc(cc1)C(=O)c1cc(c(cc1)*)C(F)(F)F)C(F)(F)F,{},265.8 +595,*O[Si](O[Si](O[Si](O[Si](CC[Si](O[Si](O[Si](O[Si](O[Si](CC[Si](*)(c1ccccc1)c1ccccc1)(C)C)(C)C)(C)C)(C)C)(C)C)(c1ccccc1)c1ccccc1)(c1ccccc1)c1ccccc1)(c1ccccc1)c1ccccc1)(c1ccccc1)c1ccccc1,{},11.62 +596,*N1C(=O)c2c(C1=O)c(ccc2)Oc1c(c(cc(c1)C(C)(C)C)C(C)(C)C)Oc1c2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)*)ccc1,"{N3C(=O)c4cccc(c4C3=O)Oc5c(cc(C(C)(C)C)cc5C(C)(C)C)Oc6cccc7c6C(=O)N(C7=O)>}",301.26 +597,*Oc1ccc(cc1)c1ccc(cc1)Oc1c(c(c(c(c1F)F)COC(c1cc(ccc1)C(OCc1c(c(c(c(c1F)F)*)F)F)(C(F)(F)F)C(F)(F)F)(C(F)(F)F)C(F)(F)F)F)F,{},139.73 +598,*S(=O)(=O)c1ccc(cc1)NC(=O)c1cc(cc(c1)NC(=O)c1ccc(cc1)NC(=O)C(N1C(=O)c2c(C1=O)cccc2)C)C(=O)Nc1ccc(cc1)*,{},272.65 +599,*NC(=O)NCc1ccc(cc1)CNC(=O)NCCCCCCCCCCCCCCCCCC*,{},62.4 +600,*C1CC2CC(CC(C1)O2)OC(=O)O*,{},196.52 +601,*C(C(*)C(=O)OC(C)(C)C)C(=O)OC,{$C(C(=O)OC)C(C(=O)OC(C)(C)C)$},148.77 +602,*Oc1ccc(cc1)c1ccc(cc1)C(=O)OCC(COC(=O)c1ccc(cc1)c1ccc(cc1)OC(CC*)C)C,{},116.96 +603,*N1C(=O)c2c(C1=O)cc(cc2)c1cc2c(C(=O)N(C2=O)c2c(cc(cc2C)C(c2cc(c(c(c2)C)*)C)c2c3c(ccc2)cccc3)C)cc1,{},369 +604,*c1cc2c(nc(c(n2)c2ccccc2)c2ccc(cc2)c2c(nc3c(n2)cc(cc3)C(=O)*)c2ccccc2)cc1,{},326.27 +605,*C(C*)(C(=O)OCC)F,{$CC(C(=O)OCC)(F)$},124.69 +606,*NC(=O)CCCCC(=O)NCC(CC(CC*)(C)C)C,{},54.82 +607,*c1c2C(=O)N(C(=O)c2c(c2ccccc12)c1ccc(cc1)Oc1ccc(cc1)C(=O)c1cc(ccc1)C(=O)c1ccc(cc1)Oc1ccc(cc1)*)CCCCCCCCCCCC,{},161.91 +608,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=S)*)C,{},101.06 +609,*C(C*)OCC(CC)(C)C,{$CC(OCC(C)(C)CC)$},12.2 +610,*c1sc(cc1)C(=O)Oc1ccc(cc1)[Si](c1ccc(cc1)OC(=O)*)(CC)CC,{},-36.93 +611,*Oc1c(cc(cc1C)*)C(CCCCCCCCCCCC)C,{},44.22 +612,*Oc1cc(ccc1)C(C(C(c1cc(ccc1)OC(=O)c1cc(ccc1)C(C(C(c1cc(ccc1)C(=O)*)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F,{},105.91 +613,*C(C*)C(=O)Oc1ccc(cc1)C(=O)Oc1ccc(cc1)OC(=O)c1ccc(cc1)OCCCC,{$CC(C(=O)Oc1ccc(C(=O)Oc2ccc(OC(=O)c3ccc(OCCCC)cc3)cc2)cc1)$},42.05 +614,*C(C*)c1c(cccc1)C(=O)OCCC,{$CC(c1ccccc1C(=O)OCCC)$},135.21 +615,*N=P(*)(OCc1ccc(cc1)c1ccccc1)OCc1ccc(cc1)c1ccccc1,{},51.23 +616,*c1oc(nn1)c1ccc(cc1)C(=O)OCCCCCCOc1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)OCCCCCCOC(=O)c2ccc(cc2)*)CCC1,{},90.11 +617,*OC(=O)NCCCCCCNC(=O)OCCCCCCCCCCCC*,{},35.91 +618,*c1oc(nc1)c1cc(c(cc1)Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1c(cc(cc1)*)C(F)(F)F)C(F)(F)F,{},267.06 +619,*c1nc(nc(n1)c1ccc(cc1)Oc1ccc(cc1)C(c1ccc(cc1)Oc1ccc(cc1)*)(C(F)(F)F)C(F)(F)F)c1ccccc1,{},303.48 +620,*Oc1c(c(c(c(c1C)C)Oc1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)*)C)C,{},337.62 +621,*N1C(=O)c2c(C1=O)c(ccc2)c1cc2c(C(=O)N(C2=O)c2cc(ccc2)*)cc1,"{N2C(=O)c3ccc(cc3C2=O)c4cccc5c4C(=O)N(C5=O)>}",348.28 +622,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2cc(c(c(c2)Br)Oc2c(cc(cc2Br)*)Br)Br)cc1,{},302.26 +623,*N1C(=O)C2C3C4C(C(C2C1=O)C=C3)C(=O)N(C4=O)c1ccc(cc1)Sc1cc2c(C(=O)N(C2=O)c2ccc(cc2)CC)cc1Sc1ccc(cc1)*,{},307.1 +624,*C1C(C(C(C1)C=C*)(F)F)(C(F)(F)F)C(F)(F)F,{$C=CC1CC(C(C(F)(F)F)(C(F)(F)F)C1(F)F)$},163.71 +625,*OC(CCC(OC(=O)CCCCCCCCC(=O)*)C)C,"{C(=O)CCCCCCCCC(=O)>}",-18.21 +626,*c1ccc2c(nc([nH]2)c2cc(ccc2)c2nc3c([nH]2)ccc(c3)C(=O)Nc2cc(cc(c2)c2nc3c([nH]2)cccc3)NC(=O)*)c1,{},348.55 +627,*C(C*)C(=O)c1ccc(cc1)CC,{$CC(C(=O)c1ccc(CC)cc1)$},68.42 +628,*C1C(=O)N(C(=O)C1*)CCOc1ccc(cc1)c1ccc(cc1)C#N,{},158.36 +629,*C1COC2C1OCC2OC(=O)CCC(=O)O*,{},86.76 +630,*OC(=O)c1ccc(cc1)C(=O)OCCCCOC(=O)CCCCC(=O)OCCCC*,{},33.64 +631,*C(C*)c1ccc(cc1)COCC(CCCC)CC,{$CC(c1ccc(COCC(CC)CCCC)cc1)$},13.66 +632,*C(C*)(C(=O)Oc1cc(c(cc1)C(=O)c1ccccc1)O)C,{$CC(C(=O)Oc1ccc(C(=O)c2ccccc2)c(O)c1)(C)$},186.45 +633,*C(C*)C(=O)OCCOC(C(F)F)(F)F,{$CC(C(=O)OCCOC(F)(F)C(F)F)$},11.16 +634,*OC(=O)OCC(C(C*)O)O,{},98.39 +635,*Oc1ccc(cc1)NC(=O)c1cc(cc(c1)C(C)(C)C)C(=O)Nc1ccc(cc1)OCCCCCC*,{},158.08 +636,*N(c1ccc(cc1)C(c1ccc(cc1)*)c1ccccc1)c1ccc(cc1)C,{},263.16 +637,*Nc1ccc(cc1)C(c1ccc(cc1)NC(=O)c1cc(cc(c1)N1C(=O)C2C(C1=O)CC=CC2)C(=O)*)(C)C,{},358.53 +638,*C(C*)(CC(=O)OCCCc1ccccc1)C(=O)OCCCc1ccccc1,{$CC(C(=O)OCCCc1ccccc1)(CC(=O)OCCCc1ccccc1)$},29.9 +639,*C=CCCCC(CCC*)Cl,{$C=CCCCC(Cl)CCC$},-9.32 +640,*C1C(=O)N(C(=O)C1C(C*)(C)C)c1c(cccc1)C,{},286.47 +641,*C1(c2c(C(=O)O1)cccc2)c1ccc(cc1)Oc1c(c(c(c(c1F)F)C(=O)c1c(c(c(c(c1F)F)Oc1ccc(cc1)*)F)F)F)F,{},276.34 +642,*Nc1cc(ccc1)NC(=O)CCCCCCC(=O)*,{},128.17 +643,*C(C*)(C(=O)OCCOc1ccc(cc1)N=Nc1ccc(cc1)C#N)C,{$CC(C(=O)OCCOc1ccc(N=Nc2ccc(C#N)cc2)cc1)(C)$},141.68 +644,*N1C(=O)c2c(C1=O)cc(cc2)Oc1ccc(cc1)C1(CCC(CC1)c1ccccc1)c1ccc(cc1)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)Cc2ccc(cc2)*)cc1,{},249.05 +645,*Oc1ccc(cc1)Oc1ccc(cc1)C(=O)c1c(c(c(c(c1c1ccc(cc1)F)c1ccc(cc1)F)c1ccc(cc1)F)c1ccc(cc1)F)C(=O)c1ccc(cc1)*,{},252.56 +646,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)NC(=O)Nc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2ccc(cc2)c2ccc(cc2)NC(=O)*)cc1,{},253.71 +647,*OC(C*)CCl,"{,}",2.36 +648,*NC(=O)c1cc(cc(c1)C(C)(C)C)C(=O)NCC(CCC(C*)C)C,{},157.37 +649,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)C(=O)c2cc(ccc2)C(=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)cc1,{},240.35 +650,*Oc1c(cc(cc1OC)C=Cc1ccc(cc1)C=Cc1cc(c(c(c1)OC)OCCCCCCCC*)OC)OC,{},65.39 +651,*Nc1c2c(ccc1)c(ccc2)NC(=O)c1cc(ccc1)C(=O)*,{},339.66 +652,*OS(=O)(=O)c1ccc(cc1)*,{},140.64 +653,*c1ccc2ccc3c(c2n1)nc(cc3)c1ccc(cc1)c1c(cc(c(c1)CCCCCC)c1ccc(cc1)*)CCCCCC,{},160.44 +654,*C(C*)(C(=O)OCCCCCCCCCCn1c2ccc(cc2c2ccccc12)N=Nc1ccc(cc1)[N+](=O)[O-])C,{$CC(C(=O)OCCCCCCCCCCn1c2ccccc2c2cc(N=Nc3ccc([N+](=O)[O-])cc3)ccc21)(C)$},60.81 +655,*N=Nc1ccc(cc1)NC(=O)CCC(=O)Nc1ccc(cc1)*,{},208.85 +656,*C(C*)c1cc(ccc1)Cl,{$CC(c1cccc(Cl)c1)$},122.57 +657,*Oc1ccc(cc1)C1(c2cc(ccc2c2ccc(cc12)[N+](=O)[O-])OC)c1ccc(cc1)OC(=O)CCCC(=O)*,{},195.71 +658,*c1cc2c(C(=O)N(C2=O)c2c(cc(c(c2C)C(=O)c2cc(ccc2)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)C)C)cc1,{},280.09 +659,*Oc1ccc(cc1)Oc1ccc(cc1)C(=O)c1cc(ccc1)NC(=O)c1cc(ccc1)C(=O)Nc1cc(ccc1)C(=O)c1ccc(cc1)*,{},222.22 +660,*Oc1ccc(cc1)CC(NC(=O)CCc1ccc(cc1)OC(=O)CCCC(=O)*)C(=O)OCCOCCOCC,{},42.34 +661,*OP(=O)(OCCCCCCCCCCOc1ccc(cc1)C=Cc1ccc(cc1)OCCCCCCCCCC*)OCCCCCCCCCCOc1ccc(cc1)N=Nc1ccc(cc1)C#N,{},51.61 diff --git a/test/bigsmiles.smi b/test/bigsmiles.smi index 28fe243..dd5de8b 100644 --- a/test/bigsmiles.smi +++ b/test/bigsmiles.smi @@ -37,20 +37,25 @@ O{[>]C(=O)C(C)N,C(=O)CN[<]} {[<][<]C(=O)C(C)N[>],[<]C(=O)CN[>][>]}O {[<][>]NC(C)C(=O)[<],[>]NCC(=O)[<][>]}O {[<]NC(C)C(=O),NCC(=O)[>]}O +{[][$]CC(C)([#R])[$][]} {[][$]CC(C)([#R])[$][]}.{#R=C(=O)OCC12CC(C3)CC(C1)CC3C2} +C([#Arm])([#Arm])([#Arm])[#Arm] C([#Arm])([#Arm])([#Arm])[#Arm].{#Arm=CO{[<][>]CCO[<][>]}} -A([<1[Inner]1])R(A[<1[Inner]1])(B[>1[Inner]2])B[>1[Inner]2] -A([<1[<1]1])R(A[<1[<1]1])(B[>1[>1]2])B[>1[>1]2] -A([$1[Inner]1])R(A'[$1[Inner]1])(A[$1[Inner]2])A'[$1[Inner]2] -A([$1[$1]1])R(A'[$1[$1]1])(A[$1[$1]2])A'[$1[$1]2] -A([$1[$1]1])R(A'[$1[$2]1])(A[$1[$1]2])A'[$1[$2]2] -A([$1[<1]1])R(A'[$1[>1]1])(A[$1[<1]2])A'[$1[>1]2] +A([<1[Inner]1])R(A[<1[Inner]1])(B[>1[Inner]2])B[>1[Inner]2].{#A=C}.{#R=C}.{#B=C}.{#Inner=<} +A([<1[<1]1])R(A[<1[<1]1])(B[>1[>1]2])B[>1[>1]2].{#A=C}.{#R=C}.{#B=C} +A([$1[Inner]1])R(A'[$1[Inner]1])(A[$1[Inner]2])A'[$1[Inner]2].{#A=C}.{#A'=C}.{#R=C}.{#Inner=$} +A([$1[$1]1])R(A'[$1[$1]1])(A[$1[$1]2])A'[$1[$1]2].{#A=C}.{#A'=C}.{#R=C} +A([$1[$1]1])R(A'[$1[$2]1])(A[$1[$1]2])A'[$1[$2]2].{#A=C}.{#A'=C}.{#R=C} +A([$1[<1]1])R(A'[$1[>1]1])(A[$1[<1]2])A'[$1[>1]2].{#A=C}.{#A'=C}.{#R=C} CC{[>][<]CC(C)[>][<]}CC(C)=C C{[>][<]C[C@@H](C)[>][<]}CC(C)=C CC{[>][<]C[C@@H](C)C[C@H](C)[>];[<]C=CC,[<]C[C@H](C)C=CC[]} {[][$]CC(c1ccncc1)[$],[$]CC(c1cc[n+](C)cc1)[$].[I-][]} {[][<][#A][#R][#A][<],[>][#B][#R']([#B][>])([#B][>])[#B][>][]} +{[][<][#A][#R][#A][<],[>][#B][#R']([#B][>])([#B][>])[#B][>][]}.{#A=C}.{#R=C}.{#B=C}.{#R'=C} {[][<][#A][#R][#A][<],[>][#B][#R']([#B][>])([#B][>])[#B][>];[>][#E1],[<][#E2][]} +{[][<][#A][#R][#A][<],[>][#B][#R']([#B][>])([#B][>])[#B][>];[>][#E1],[<][#E2][]}.{#A=C}.{#R=C}.{#B=C}.{#R'=C}.{#E1=C}.{#E2=C} +{[][>]COC(=O){[$][$]COC[$][$]}C(=O)OC[>],c1([<])cc([#L]2)cc([#L]3)c1.c4([<])cc([#L]5)cc([#L]6)c4.c7([<])cc([#L]8)cc([#L]9)c7.C%10([<])cc([#L]%11)cc([#L]%12)c%10.[Pd++]258%11.[Pd++]369%12} {[][>]COC(=O){[$][$]COC[$][$]}C(=O)OC[>],c1([<])cc([#L]2)cc([#L]3)c1.c4([<])cc([#L]5)cc([#L]6)c4.c7([<])cc([#L]8)cc([#L]9)c7.C%10([<])cc([#L]%11)cc([#L]%12)c%10.[Pd++]258%11.[Pd++]369%12}.{#L=c(c1)cccn1} S1C(c2ccccc2){[$][$]CC(c1ccccc1)[$][$]}C(=O)OC(=C3)N=NN3CC(O)COC(=O)C(C)C1 C1CCC{[$1][$1]=CCCCCCCC=[$1][$1]}CCCC1 diff --git a/test/test_tokenize_bigsmiles.py b/test/test_tokenize_bigsmiles.py index 23a4b37..9633ce6 100644 --- a/test/test_tokenize_bigsmiles.py +++ b/test/test_tokenize_bigsmiles.py @@ -1,4 +1,6 @@ +import csv import json +from pathlib import Path from tempfile import NamedTemporaryFile import pytest @@ -16,6 +18,139 @@ def _tokens(spec: str) -> list[str]: return spec.split() +def _smi_fixture(filename: str) -> list[tuple[str, str]]: + path = Path(__file__).with_name(filename) + return [ + (f"{filename}:{idx}", line) + for idx, line in enumerate(path.read_text().splitlines(), start=1) + if line and not line.startswith("#") + ] + + +def _bigsmiles_fixture() -> list[tuple[str, str]]: + return _smi_fixture("bigsmiles.smi") + + +def _bigsmiles_csv_fixture() -> list[tuple[str, str]]: + path = Path(__file__).with_name("bigsmiles.csv") + data_lines = [ + (idx, line) + for idx, line in enumerate(path.read_text().splitlines(), start=1) + if line and not line.startswith("#") + ] + reader = csv.reader(line for _, line in data_lines) + header = next(reader) + bigsmiles_index = header.index("BigSMILES") + return [ + (f"bigsmiles.csv:{line_no}", row[bigsmiles_index]) + for (line_no, _), row in zip(data_lines[1:], reader) + if row[bigsmiles_index] + ] + + +INLINE_ROUNDTRIP_BIGSMILES = [ + "{[$]CC[$]}", + "{[$]CC[$],[$]C(C)C[$]}", + "{[<]CC[>]}", + "[$1]", + "[<2]", + "[]", + "{[]CC[$]}", + "{[$]CC[$];C[$],[$]C}", + "CC{[$]CC[$]}CC", + "{[$]CC(c1ccccc1)[$]}", + "{[>]CCCCCC(=O)[<],[>]NCCCCCCN[<]}", + "{[$]CC[$]}{[$]CC(C)[$]}", +] + + +def _roundtrip_fixtures() -> list[tuple[str, list[tuple[str, str]]]]: + return [ + ( + "inline", + [ + (f"inline:{idx}", text) + for idx, text in enumerate(INLINE_ROUNDTRIP_BIGSMILES, start=1) + ], + ), + ("bigsmiles.smi", _bigsmiles_fixture()), + ("opensmiles.smi", _smi_fixture("opensmiles.smi")), + ("bigsmiles.csv", _bigsmiles_csv_fixture()), + ] + + +UNDEFINED_FRAGMENT_PLACEHOLDER_CASES = [ + ("{[][$]CC(C)([#R])[$][]}", ["#R"]), + ("C([#Arm])([#Arm])([#Arm])[#Arm]", ["#Arm", "#Arm", "#Arm", "#Arm"]), + ( + "{[][<][#A][#R][#A][<],[>][#B][#R']([#B][>])([#B][>])[#B][>][]}", + ["#A", "#R", "#A", "#B", "#R'", "#B", "#B", "#B"], + ), + ( + "{[][<][#A][#R][#A][<],[>][#B][#R']([#B][>])([#B][>])[#B][>];" + "[>][#E1],[<][#E2][]}", + ["#A", "#R", "#A", "#B", "#R'", "#B", "#B", "#B", "#E1", "#E2"], + ), + ( + "{[][>]COC(=O){[$][$]COC[$][$]}C(=O)OC[>],c1([<])cc([#L]2)cc([#L]3)c1." + "c4([<])cc([#L]5)cc([#L]6)c4.c7([<])cc([#L]8)cc([#L]9)c7." + "C%10([<])cc([#L]%11)cc([#L]%12)c%10.[Pd++]258%11.[Pd++]369%12}", + ["#L", "#L", "#L", "#L", "#L", "#L", "#L", "#L"], + ), +] + +BARE_LABEL_BIGSMILES_CASES = [ + ( + "A([<1[Inner]1])R(A[<1[Inner]1])(B[>1[Inner]2])B[>1[Inner]2]", + 7, + ), + ( + "A([<1[<1]1])R(A[<1[<1]1])(B[>1[>1]2])B[>1[>1]2]", + 3, + ), + ( + "A([$1[Inner]1])R(A'[$1[Inner]1])(A[$1[Inner]2])A'[$1[Inner]2]", + 9, + ), + ( + "A([$1[$1]1])R(A'[$1[$1]1])(A[$1[$1]2])A'[$1[$1]2]", + 5, + ), + ( + "A([$1[$1]1])R(A'[$1[$2]1])(A[$1[$1]2])A'[$1[$2]2]", + 5, + ), + ( + "A([$1[<1]1])R(A'[$1[>1]1])(A[$1[<1]2])A'[$1[>1]2]", + 5, + ), +] +BARE_LABEL_BIGSMILES_WITH_DEFINITIONS = [ + ( + "A([<1[Inner]1])R(A[<1[Inner]1])(B[>1[Inner]2])B[>1[Inner]2]." + "{#A=C}.{#R=C}.{#B=C}.{#Inner=<}" + ), + "A([<1[<1]1])R(A[<1[<1]1])(B[>1[>1]2])B[>1[>1]2].{#A=C}.{#R=C}.{#B=C}", + ( + "A([$1[Inner]1])R(A'[$1[Inner]1])(A[$1[Inner]2])A'[$1[Inner]2]." + "{#A=C}.{#A'=C}.{#R=C}.{#Inner=$}" + ), + "A([$1[$1]1])R(A'[$1[$1]1])(A[$1[$1]2])A'[$1[$1]2].{#A=C}.{#A'=C}.{#R=C}", + "A([$1[$1]1])R(A'[$1[$2]1])(A[$1[$1]2])A'[$1[$2]2].{#A=C}.{#A'=C}.{#R=C}", + "A([$1[<1]1])R(A'[$1[>1]1])(A[$1[<1]2])A'[$1[>1]2].{#A=C}.{#A'=C}.{#R=C}", +] +EXPECTED_UNKNOWN_BIGSMILES_TEXTS = ( + {text for text, _ in UNDEFINED_FRAGMENT_PLACEHOLDER_CASES} + | {text for text, _ in BARE_LABEL_BIGSMILES_CASES} + | set(BARE_LABEL_BIGSMILES_WITH_DEFINITIONS) +) +NON_EXACT_ROUNDTRIP_TEXTS = EXPECTED_UNKNOWN_BIGSMILES_TEXTS | {"[Cu++]"} + + +def _is_lossless_roundtrip_text(text: str) -> bool: + return ".{#" not in text and text not in NON_EXACT_ROUNDTRIP_TEXTS + + @pytest.fixture def bigsmiles_tokenizer() -> SmirkTokenizer: return SmirkTokenizer(bigsmiles=True) @@ -26,21 +161,82 @@ def smiles_tokenizer() -> SmirkTokenizer: return SmirkTokenizer(bigsmiles=False) +ROUNDTRIP_FIXTURES = _roundtrip_fixtures() + + @pytest.mark.parametrize( - "bigsmiles_batch", - [ - ["{[$]CC[$]}"], - ["{[$]CC[$],[$]C(C)C[$]}", "{[<]CC[>]}"], - ["[$1]", "[<2]", "[]", "{[]CC[$]}", "{[$]CC[$];C[$],[$]C}"], - ["CC{[$]CC[$]}CC", "{[$]CC(c1ccccc1)[$]}"], - ["{[>]CCCCCC(=O)[<],[>]NCCCCCCN[<]}", "{[$]CC[$]}{[$]CC(C)[$]}"], - ], + ("fixture_name", "fixture_rows"), + ROUNDTRIP_FIXTURES, + ids=[name for name, _ in ROUNDTRIP_FIXTURES], ) -def test_bigsmiles_roundtrip_batch_decode(bigsmiles_batch): +def test_bigsmiles_roundtrip_batch_decode(fixture_name, fixture_rows): bigsmirk = smirk.SmirkBigSmilesFast() + bigsmiles_batch = [text for _, text in fixture_rows] encoded = bigsmirk(bigsmiles_batch, add_special_tokens=False) decoded = bigsmirk.batch_decode(encoded["input_ids"], skip_special_tokens=True) - assert decoded == bigsmiles_batch + itemwise_decoded = [ + bigsmirk.decode(ids, skip_special_tokens=True) for ids in encoded["input_ids"] + ] + exact_failures = [ + f"{source}: expected {text!r}, got {decoded_text!r}" + for (source, text), decoded_text in zip(fixture_rows, decoded) + if _is_lossless_roundtrip_text(text) and decoded_text != text + ] + + assert decoded == itemwise_decoded + assert len(decoded) == len(bigsmiles_batch) + assert not exact_failures, ( + f"{fixture_name} exact roundtrip mismatches:\n" + "\n".join(exact_failures) + ) + + +def test_bigsmiles_fixture_has_no_unknown_tokens(): + bigsmirk = smirk.SmirkBigSmilesFast() + failures = [] + + for line_no, text in _bigsmiles_fixture(): + if text in EXPECTED_UNKNOWN_BIGSMILES_TEXTS: + continue + + tokens = bigsmirk.tokenize(text, add_special_tokens=False) + if bigsmirk.unk_token in tokens: + failures.append(f"line {line_no}: {text}") + + assert not failures, "unknown tokens in BigSMILES fixtures:\n" + "\n".join(failures) + + +def _unknown_spans(bigsmirk: smirk.SmirkBigSmilesFast, text: str) -> list[str]: + encoding = bigsmirk._tokenizer.encode(text, add_special_tokens=False) + unk_token_id = bigsmirk._tokenizer.token_to_id(bigsmirk.unk_token) + return [ + text[start:end] + for token_id, (start, end) in zip(encoding["input_ids"], encoding["offsets"]) + if token_id == unk_token_id + ] + + +@pytest.mark.parametrize( + ("text", "unknown_spans"), + UNDEFINED_FRAGMENT_PLACEHOLDER_CASES, +) +def test_bigsmiles_undefined_fragment_placeholders_return_unknowns(text, unknown_spans): + bigsmirk = smirk.SmirkBigSmilesFast() + tokens = bigsmirk.tokenize(text, add_special_tokens=False) + actual_unknown_spans = _unknown_spans(bigsmirk, text) + + assert actual_unknown_spans == unknown_spans + assert tokens.count(bigsmirk.unk_token) == len(actual_unknown_spans) + + +@pytest.mark.parametrize( + ("text", "unknown_count"), + BARE_LABEL_BIGSMILES_CASES, +) +def test_bigsmiles_bare_labels_return_unknowns(text, unknown_count): + bigsmirk = smirk.SmirkBigSmilesFast() + tokens = bigsmirk.tokenize(text, add_special_tokens=False) + + assert tokens.count(bigsmirk.unk_token) == unknown_count @pytest.mark.parametrize( From 7ef3e88ecfef66f4f569f9c5933971704e38b815 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Tue, 28 Apr 2026 17:48:41 -0400 Subject: [PATCH 24/52] clean up test formatting --- test/bigsmiles.csv | 1328 +++++++++++++++---------------- test/test_tokenize_bigsmiles.py | 12 - 2 files changed, 664 insertions(+), 676 deletions(-) diff --git a/test/bigsmiles.csv b/test/bigsmiles.csv index bade9a3..12b2a19 100644 --- a/test/bigsmiles.csv +++ b/test/bigsmiles.csv @@ -1,664 +1,664 @@ -# Data from https://doi.org/10.1038/s41597-024-03212-4 -,SMILES,BigSMILES,Tg (C) -0,*C1COC2C1OCC2Oc1ccc(cc1)CNC(=O)CCCCCCC(=O)NCc1ccc(cc1)O*,{},21.58173134 -1,*OC(CCC(OC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)*)C)C,{},63.5893379 -2,*OC(=O)c1ccc(cc1)C(=O)OCCCC(=O)NCc1ccc(cc1)CNC(=O)CCC*,{},53.55726117 -3,*OC(=O)NCCNC(=O)OCC*,{},5.896093021 -4,*SCCCCC*,{},-55.37860961 -5,*Oc1ccc(cc1)C(=O)OC(=O)c1ccc(cc1)OCCCCCC*,{},64.73496741 -6,*c1[nH]c(cc1CC(=O)OCCCCCCCC)*,{},-4.076963699 -7,*C(C*)(CC(=O)OCCCCCCCCCC)C(=O)OCCCCCCCCCC,{$CC(C(=O)OCCCCCCCCCC)(CC(=O)OCCCCCCCCCC)$},75.04044311 -8,*OCC1C(C1)C*,{},-28.98581149 -9,*N(C(=O)CCCCCCCCCCCCCCCCC(=O)N(CCCCCC*)C)C,{},49.34222836 -10,*O[Si](*)(CCC(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C,{},-68.87734458 -11,*S(=O)(=O)c1ccc(cc1)C(=O)CNc1ccc(cc1)NCC(=O)c1ccc(cc1)*,{},198.9026743 -12,*S(=O)(=O)c1ccc(cc1)C(=O)NCCNC(=O)c1ccc(cc1)*,{},205.0712987 -13,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)C(c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)CCCCC)cc1,{},204.7640603 -14,*NC(=O)C(=O)NCCCCCCCCCCNC(=O)CCCCCCCCC(=O)NCCCCCCCCCC*,{},82.2677155 -15,*OC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCCCCCCCCCCCC*,{},17.27783213 -16,*Oc1ccc(cc1)C(=O)Oc1ccc(cc1)Cc1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*,{},88.16409459 -17,*Oc1ccc(cc1)SSCCCCSSc1ccc(cc1)*,{},102.260186 -18,*Oc1ccc(cc1)C(=O)Nc1c(cc(cc1)c1cc(c(cc1)NC(=O)c1ccc(cc1)OCCCCCC*)Cl)Cl,{},133.9866306 -19,*c1c(cc(c(c1)C=Cc1ccc(cc1)OCCC(CCCC(C)C)C)C=C*)C=Cc1ccc(cc1)OCCC(CCCC(C)C)C,{$C=Cc1cc(C=Cc2ccc(OCCC(C)CCCC(C)C)cc2)c(cc1C=Cc1ccc(OCCC(C)CCCC(C)C)cc1)$},2.868925186 -20,*c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)C(=O)CCCCC(=O)*)CC1,{},197.4539358 -21,*SC(=O)CCCCC(=O)SCc1ccc(cc1)c1ccc(cc1)C*,{},-14.32353196 -22,*C(C*)(C(=O)OCCNC(=O)N(CC(C(C(OC1OC(C(C(C1O)O)O)CO)C(CO)O)O)O)CCCCCC)C,{$CC(C(=O)OCCNC(=O)N(CCCCCC)CC(O)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO)(C)$},23.2402228 -23,*Oc1ccc(cc1)OC(=O)c1cc(cc(c1)NC(=O)c1ccc(cc1)NC(=O)C(N1C(=O)c2c(C1=O)cccc2)C)C(=O)*,{},58.7319127 -24,*OC(=O)Nc1ccc(cc1)C(=O)OCC1CCC(CC1)C*,{},148.8519878 -25,*Nc1c(cccc1)NC(=O)c1ccc(cc1)C(=O)*,{},231.8080905 -26,*C(C*)(C(=O)OCCCCCCOc1ccc(cc1)C(=O)Oc1ccc(cc1)OCCCCCC)C,{$CC(C(=O)OCCCCCCOc1ccc(C(=O)Oc2ccc(OCCCCCC)cc2)cc1)(C)$},14.67015775 -27,*Oc1ccc(cc1)S(=O)(=O)c1c2c(ccc1)c(ccc2)S(=O)(=O)c1ccc(cc1)Oc1c(cc(c(c1C)C)c1c(c(c(c(c1)C)*)C)C)C,{},300.6954413 -28,*OC(=O)NC1CCC(CC1)CC1CCC(CC1)NC(=O)OCC(C*)(C)C,{},15.9792949 -29,*OC(=O)NCCCCCC(=O)OCCCC*,{},47.28425166 -30,*N(c1c(cc(cc1)Cc1cc(c(cc1)N(C(=O)c1ccc(cc1)C(=O)*)CCC)C)C)CCC,{},179.9023847 -31,*Nc1ccc(cc1)NC(=O)c1cc(cc(c1)NC(=O)C(CCSC)N1C(=O)c2c(C1=O)cccc2)C(=O)*,{},230.3191562 -32,*N(c1c(cc(cc1)Cc1cc(c(cc1)N(C(=O)c1ccc(cc1)C(=O)*)C)C)C)C,{},167.9642319 -33,*OC(=O)CCCCCCC(=O)OCCc1ccc(cc1)N1ON1c1ccc(cc1)CC*,{},17.91289325 -34,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*)(C(F)(F)F)C(F)(F)F,"{C(=O)c3ccc(cc3)C(=O)>}",150.9011854 -35,*c1ccc2[nH]c3c(c2c1)cc(cc3)*,{},246.5898577 -36,*C(C*)C(=O)NCCCC,{$CC(C(=O)NCCCC)$},-84.72234668 -37,*C(=C*)CCCCCCCCCCOc1ccc(cc1)c1ccc(cc1)OCC(CCCCCC)F,{$C=C(CCCCCCCCCCOc1ccc(c2ccc(OCC(F)CCCCCC)cc2)cc1)$},27.34580481 -38,*NC(=O)NCCCCNC(=O)NCCCCCC*,{},60.10434296 -39,*Oc1ccc(cc1)C(=Cc1ccc(cc1)OC(=O)OCCCCCOC(=O)*)C,{},39.38257392 -40,*N1CCN(CC1)C(=O)CCN1CCN(CC1)CCC(=O)*,{},40.58060093 -41,*c1c(cc(cc1)C#C*)SCCCCCCCCCCCC,{$C#Cc1ccc(c(SCCCCCCCCCCCC)c1)$},53.41644401 -42,*OC(=O)Nc1cc(ccc1)C(=O)OCCCCCCCCCC*,{},45.09440244 -43,*Oc1c(cc(cc1)C(=O)*)CCCCCC,{},85.05865655 -44,*Nc1ccc(cc1)NC(=O)C=CC(=O)*,{},115.1255691 -45,*C(C*)(C(=O)OC(Oc1ccccc1)C)C,{$CC(C(=O)OC(C)Oc1ccccc1)(C)$},31.196908 -46,*C(*)C,{$C(C)$},122.3867673 -47,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2c(c3c(C(CC3)(C)C)c(c2C)*)C)cc1,{},325.2939228 -48,*Nc1ccc(cc1)C(=O)*,{},92.05287264 -49,*C(=C*)C,{$C=C(C)$},4.023221393 -50,*c1nc(nc(n1)Oc1c2c(ccc1C(=O)Oc1ccc(cc1)OC(=O)c1c(c3c(cc1)cccc3)O*)cccc2)N1CCCCC1,{},288.7077709 -51,*OS(=O)(=O)c1ccc(cc1)Sc1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)C1(CCCCC1)c1ccc(cc1)*,{},157.8104834 -52,*c1c(c(c(c(c1Cl)Cl)CC*)Cl)Cl,{$CCc1c(Cl)c(Cl)c(c(Cl)c1Cl)$},56.1987128 -53,*OC(=O)c1ccc(cc1)C(=O)OCCCC(=O)NCCCCCCNC(=O)CCC*,{},60.02078862 -54,*c1c(c(cc(c1)*)C=Nc1ccc(cc1)F)O,{$c1cc(c(O)c(C=Nc2ccc(F)cc2)c1)$},-121.5212841 -55,*N=C1C=CC(=NC2=CC(=O)C(=CC2=O)*)C=C1,{},212.9967182 -56,*OC(=O)NCCSCCCCCSCCNC(=O)OCC1CCC(CC1)C*,{},77.84678315 -57,*OS(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)C1(CCCCC1)c1ccc(cc1)*,{},149.9071313 -58,*c1nc(nc(n1)Oc1cc2c(cc1)cc(cc2)C(=O)Oc1c(cccc1)OC(=O)c1cc2c(cc1)cc(cc2)O*)N1CCN(CC1)C,{},54.90728348 -59,*OC(=O)CC(=O)OCC*,{},-90.52916041 -60,*OC(=O)CCCCCCCC(=O)OCC(C*)(C)C,{},17.01342593 -61,*OC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)OCCN(CC*)CCCC(=O)Nc1ccc(cc1)N=Nc1ccccc1,{},-32.25789051 -62,*[Si](*)(CCCCCC)CCCCCC,{$[Si](CCCCCC)(CCCCCC)$},-11.37918107 -63,*c1n(c(cc1)C=C*)CCCCCC,{},-15.03969582 -64,*NC1CC(CC(C1)(CNC(=O)c1cc(cc(c1)NC(=O)c1ccc(cc1)NC(=O)C(CC(C)C)N1C(=O)c2c(C1=O)cccc2)C(=O)*)C)(C)C,{},195.2683571 -65,*Nc1cc(cc(c1)C(=O)Nc1ccccc1)NC(=O)c1cc(ccc1)C(=O)*,{},233.599405 -66,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Oc2ccc(cc2)OC(=O)*)cc1,{},-71.68107141 -67,*OC(=O)CCCCCC(=O)OCCC*,{},-71.28231613 -68,*N1C(=O)C(CC1=O)SCCOCCSC1C(=O)N(C(=O)C1)c1ccc(cc1)C(=O)OCCCCCCOC(=O)c1ccc(cc1)*,{},19.42674542 -69,*NC(=O)CCCCCCCCC(=O)NCCCP(CCC*)c1ccccc1,{},56.20366062 -70,*c1nc2c([nH]1)cc(cc2)S(=O)(=O)c1cc2c(nc([nH]2)c2ccc(cc2)Oc2ccc(cc2)*)cc1,{},315.1120806 -71,*C(C*)(C(=O)OCCCCCCOc1ccc(cc1)C=Nc1ccc(cc1)CCCC)C,{$CC(C(=O)OCCCCCCOc1ccc(C=Nc2ccc(CCCC)cc2)cc1)(C)$},12.41121342 -72,*Oc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1cc(cc(c1)N1C(=O)c2c(C1=O)c(c(c(c2Cl)Cl)Cl)Cl)C(=O)Nc1ccc(cc1)*,{},315.5911196 -73,*c1ccc(cc1)/C=C/c1c(cc(c(c1)CCCCCC)/C=C/*)CCCCCC,{},51.04706134 -74,*Oc1ccc(cc1)OC(=O)*,"{C(=O)>}",105.0499992 -75,*C1(c2c(C(=O)O1)cccc2)c1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)*,{},292.5925873 -76,*c1c(cc(c(c1)OCCOCCOCCOCCC(=O)O[Na])C#Cc1c(cc(c(c1)OC(COCCOCCOCCOC)COCCOCCOCCOC)C#C*)OC(COCCOCCOCCOC)COCCOCCOCCOC)OCCOCCOCCOCCC(=O)O[Na],{},-43.36051195 -77,*N(CC*)C(=O)CCCCCCCC,{},-49.55374382 -78,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)c2ccc(cc2)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)cc1,{},232.2309602 -79,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CCCCCC(=O)Oc1cc2c(cc1)cc(cc2)OC(=O)CCCCC*,{},-10.30414379 -80,*NC(=O)CCP(CCC(=O)NCCCCCC*)c1ccccc1,{},9.575432391 -81,*c1cc2C(c3c(c2cc1)ccc(c3)c1cc2C(c3c(c2cc1)ccc(c3)*)(CCCCCCCCCCCC)CCCCCCCCCCCC)(CCCCCCCC)CCCCCCCC,{},37.51200969 -82,*C1OC(=O)C(C1)CCC1C(=O)OC(C1)CCCCCCCCCC*,{},84.1573412 -83,*C(C*)C(=O)OC(CC(C)C)C,{$CC(C(=O)OC(C)CC(C)C)$},11.47675289 -84,*OC(C*)COc1ccc(cc1)C,{},-55.31696784 -85,*N(C(=O)CCCCCCCCCCCCCCC(=O)N(Cc1ccc(cc1)C*)C)C,{},-14.32362541 -86,*C1OC(OC(C1)*)O,{},136.0523749 -87,*c1c(cc(c(c1)OCCCCCCCCCC)c1ccc(cc1)*)OCCCCCCCCCC,{},61.07976291 -88,*c1c2c(nccc2)c(cc1)OCCOc1c2ncccc2c(cc1)C*,{},74.04018308 -89,*NC(CC(=O)*)C(=O)OCCCCCC,{},0.713924343 -90,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)*)C,{},81.15934241 -91,*n1c(=O)c2ccc3c(=O)n(c(=O)c4ccc(c1=O)c2c34)CCCCCCCCCCCC*,{},101.4116865 -92,*Oc1c(cc(cc1)C=C1C(=O)C(=Cc2cc(c(cc2)OC(=O)c2ccc(cc2)C(=O)*)OC)CCC1)OC,{},105.2246285 -93,*Nc1ccc(cc1)NC(=O)C1C(C(=CC(C1)C(C(=O)*)CC(=O)O)C)C(=O)O,{},149.7687276 -94,*Oc1ccc(cc1)OC(=O)c1c(cccc1)C=Cc1ccc(cc1)C=Cc1c(cccc1)C(=O)*,{},86.85824281 -95,*OC(=O)c1ccc(cc1)NC(=O)CCCCCCCCCCC(=O)Nc1ccc(cc1)C(=O)OCC*,{},14.08686284 -96,*OC(=O)N(c1ccc(cc1)N(C(=O)OCC(C*)CCCCCCOc1ccc(cc1)c1ccc(cc1)C#N)C)C,{},78.11061669 -97,*C1C(C(C(C1)C=C*)C(=O)OCCCCCOc1ccc(cc1)c1ccc(cc1)C#N)C(=O)OCCCCCOc1ccc(cc1)c1ccc(cc1)C#N,{},27.68661296 -98,*C#CC(=C(*)CCCCOC(=O)NCCCCCC)CCCCOC(=O)NCCCCCC,{$C#CC(CCCCOC(=O)NCCCCCC)=C(CCCCOC(=O)NCCCCCC)$},5.219913288 -99,*c1ccc(cc1)/C=C/*,{},43.3930774 -100,*c1nc2c([nH]1)ccc(c2)c1ccc2c(nc([nH]2)c2ccc3S(=O)(=O)c4c(c3c2)cc(cc4)*)c1,{},355.5073659 -101,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc(cc1)C(c1ccc(cc1)*)C,{},307.2645731 -102,*Oc1c(cc(c(c1)OC(=O)c1ccc(cc1)C=Nc1cc(c(cc1)OCCCCCCOc1c(cc(cc1)N=Cc1ccc(cc1)C(=O)*)Cl)Cl)Cl)Cl,{},82.06188226 -103,*c1ccc2n(c3c(c2c1)cc(cc3)/C=C/c1ccc(cc1)Oc1c(cc(cc1)c1ccc(cc1)c1ccc(cc1)c1cc(c(cc1)Oc1ccc(cc1)/C=C/*)C(F)(F)F)C(F)(F)F)CC(CCCC)CC,{},187.4626146 -104,*Oc1ccc(cc1)Oc1ccc(cc1)C(=Nc1ccc(cc1)Oc1ccc(cc1)N=C(c1ccc(cc1)*)C)C,{},117.5082044 -105,*Oc1ccc(cc1)NC(=O)NCCCCCCNC(=O)*,{},122.4088725 -106,*C(C*)OC(=O)CC(=O)C,{$CC(OC(=O)CC(C)=O)$},9.819405608 -107,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)OCCOc1ccc(cc1)C(=O)*)C,{},68.01369706 -108,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)*,{},176.5386789 -109,*N1C(=S)SC(=CC=C2SC(=S)N(C2=O)CC*)C1=O,{},164.0140939 -110,*C1NC(=O)C(NC1=O)CCC(=O)OCCCC=CCCCOC(=O)CC*,{},25.37727633 -111,*Oc1ccc(cc1)N=Cc1cc(c(cc1)OC(=O)CCCCC(=O)Oc1c(cc(cc1)C=Nc1ccc(cc1)*)OC)OC,{},47.38250675 -112,*c1n(c(nn1)c1ccc(cc1)c1ccc(cc1)c1n(c(nn1)COc1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)OC*)CC1)c1ccccc1)c1ccccc1,{},140.326419 -113,*N1CCN(CC1)C(=O)C1C(C1)C(=O)*,{},100.5840527 -114,*C(=C*)c1c(sc(c1)C(F)(F)F)C(F)(F)F,{$C=C(c1cc(C(F)(F)F)sc1C(F)(F)F)$},54.57900551 -115,*N1C(=O)c2c(C1=O)cc(cc2)c1cc2c(C(=O)N(C2=O)c2ccc(cc2)c2ccc(cc2)*)cc1,{},421.9822435 -116,*c1cc2c(C(=O)N(C2=O)c2c(c(c(c(c2F)F)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)F)F)cc1,{},388.0137456 -117,*C#CC(=C(*)OS(=O)(=O)c1ccc(cc1)C)OS(=O)(=O)c1ccc(cc1)C,{$C#CC(OS(=O)(=O)c1ccc(C)cc1)=C(OS(=O)(=O)c1ccc(C)cc1)$},164.1018919 -118,*OC(=O)CCC(=O)OCCOCCOCC*,{},14.21396213 -119,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)c1cc(ccc1)C(=O)*)(CC)C,"{C(=O)c3cccc(c3)C(=O)>}",194.1629808 -120,*n1c(=O)c2c(c3c(cc2c1=O)c(=O)n(c3=O)CCCCCCCCCC*)Br,{},137.1087261 -121,*C(C*)(C(=O)OCCCCCCCCOC(=O)OC1CC2=CCC3C(CCC4(C(CCC34)C(CCCC(C)C)C)C)C2(CC1)C)C,{$CC(C(=O)OCCCCCCCCOC(=O)OC1CCC2(C)C(=CCC3C2CCC2(C)C(C(C)CCCC(C)C)CCC32)C1)(C)$},47.48888608 -122,*OC(=O)CCCCCCCC(=O)OCCCCCCCCC*,{},3.667443154 -123,*Oc1ccc(cc1)c1c(cc(c(c1)OCCCCCC)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*)OCCCCCC,{},81.55306128 -124,*c1c2c(nccc2)c(cc1)CC*,{},70.93028064 -125,*c1n(c(cc1)CCC(=O)OC(=O)CC*)C,{},78.11959859 -126,*N1C(=O)C2OC3C(C2C1=O)C(=O)N(C3=O)c1ccc(cc1)c1c(nc2c(n1)cc(cc2)S(=O)(=O)c1cc2c(nc(c(n2)c2ccc(cc2)*)c2ccccc2)cc1)c1ccccc1,{},373.476202 -127,*c1oc(cc1)Sc1oc(cc1)C=C1C(=O)C(=C*)CC1,{},102.2883172 -128,*Oc1c(c(ccc1)Oc1ccc(cc1)Nc1ccc(cc1)Nc1ccc(cc1)Nc1ccc(cc1)Nc1ccc(cc1)*)C(=O)Nc1ccc(cc1)N=Nc1ccccc1,{},238.4094841 -129,*=C=C=C(C(=*)CO)CO,{$=C=C=C(CO)C(CO)=$},42.01413885 -130,*c1oc(cc1)Sc1oc(cc1)C=NCCN=C*,{},95.6213855 -131,*NC(=O)/C=C/CC/C=C/C(=O)NCCCCCC*,{},2.127453311 -132,*Nc1c(cc(c(c1)SCCC#N)NC(=O)c1ccc(cc1)C(=O)*)SCCC#N,{},214.7483216 -133,*c1c(cc(cc1)CC*)C(=O)C,{$CCc1ccc(c(C(C)=O)c1)$},8.997925789 -134,*Oc1ccc(cc1)C(=O)c1ccc(cc1)Oc1ccc(cc1)C(=O)c1ccc(cc1)C(=O)c1ccc(cc1)*,{},138.8344532 -135,*c1sc(nn1)c1cc(ccc1)OCCCCCCCCCCCOC(=O)CCCCC(=O)OCCCCCCCCCCCOc1cc(ccc1)*,{},35.51374296 -136,*Nc1c(cc(c(c1)OC)*)OC,{},168.2573371 -137,*SCSCCCC*,{},1.410869343 -138,*c1nc(sc1)NC(=O)NCCCCCCNC(=O)Nc1nc(cs1)c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)*)CCC1,{},47.02441211 -139,*N(CC*)C(=O)c1ccc(cc1)C,{},87.3045689 -140,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CCCCCC(=O)Oc1ccc(cc1)c1ccc(cc1)OC(=O)CCCCC*,{},60.41501019 -141,*Oc1c(c(c(c(c1[2H])[2H])C(c1c(c(c(c(c1[2H])[2H])OC(=O)*)[2H])[2H])(C([2H])([2H])[2H])C([2H])([2H])[2H])[2H])[2H],{},230.9913822 -142,*Oc1ccc(cc1)Oc1ccc(cc1)Oc1ccc(cc1)c1ccc(cc1)Oc1ccc(cc1)Oc1ccc(cc1)Oc1c(cc(cc1)C(=O)c1cc(c(cc1)*)S(=O)(=O)O)S(=O)(=O)O,{},151.7193386 -143,*c1c2c(nsn2)c(cc1)c1sc(c(c1)CCCCCC)C=Cc1sc(c(c1CCCCCC)CCCCCC)C=Cc1sc(cc1CCCCCC)*,{},17.9542177 -144,*N1C(=O)c2c(C1=O)c(ccc2)c1cc2c(C(=O)N(C2=O)c2ccc(cc2)C(=O)Nc2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)NC(=O)c2ccc(cc2)*)cc1,{},178.6163373 -145,*c1cc2n(c3c(c2cc1CCCCCC)cc(c(c3)C=C*)CCCCCC)CCCCCCCC,{},45.45691415 -146,*C#CC(=C(*)c1cncnc1)CCCCOC(=O)NCC(=O)OCCCC,{$C#CC(CCCCOC(=O)NCC(=O)OCCCC)=C(c1cncnc1)$},73.99227071 -147,*OC(=O)c1ccc(cc1)C(=O)NCCCNC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCC*,{},76.72929425 -148,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1c(cc(cc1)C(c1cc(c(cc1)*)[N-][N+]#N)(C)C)[N-][N+]#N,{},181.1592195 -149,*c1sc(cc1CCCCCCCC)c1sc(cc1CCCCCC(C(C(C(F)(F)F)(F)F)(F)F)(F)F)*,{},6.476080227 -150,*c1c2C(=O)N(C(=O)c2c(c2ccccc12)c1ccc(cc1)Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)*)c1c(cccc1)F,{},314.5599695 -151,*NC(=O)CCCCCCCCCCC(=O)NCC1CC(CCC1)C*,{},26.18556129 -152,*Nc1ccc(cc1)NC(=O)c1cc(cc(c1)C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)C(=O)*,{},235.6895845 -153,*c1c(cc(c(c1)O)O)*,{$c1cc(O)c(O)cc1$},158.9171015 -154,*Oc1ccc(cc1)C(=Cc1ccc(cc1)OCCCCCCC*)C,{},36.76976951 -155,*C(=C(*)C)[Si](CC)(C)C,{$C(C)=C([Si](C)(C)CC)$},157.1009863 -156,*OC(=O)CCCCCCCC(=O)*,{},-68.69432556 -157,*OC(=O)c1ccc(cc1)NC(=O)CCCCCCCCCCC(=O)Nc1ccc(cc1)C(=O)OCCCCCCCCCCCC*,{},-26.12929226 -158,*c1c(cc(c(c1)OCCCCCCCCCC)*)OCCCCCCCCCC,{},5.423112055 -159,*C(C(=O)*)(C)C,{$C(=O)C(C)(C)$},105.8292511 -160,*C#CC(=C(*)Cn1c2ccc(cc2c2cc(ccc12)CCCCCCCCCCCCCCCC)CCCCCCCCCCCCCCCC)Cn1c2ccc(cc2c2cc(ccc12)CCCCCCCCCCCCCCCC)CCCCCCCCCCCCCCCC,{$C#CC(Cn1c2ccc(CCCCCCCCCCCCCCCC)cc2c2cc(CCCCCCCCCCCCCCCC)ccc21)=C(Cn1c2ccc(CCCCCCCCCCCCCCCC)cc2c2cc(CCCCCCCCCCCCCCCC)ccc21)$},-2.237818484 -161,*C(C*)CCCC(C)C,{$CC(CCCC(C)C)$},5.980301023 -162,*N1C(=O)C(CC1=O)Oc1ccc(cc1)N=Cc1ccc(cc1)OC1C(=O)N(C(=O)C1)c1ccc(cc1)Oc1ccc(cc1)*,{},154.6342574 -163,*c1nc(nc(n1)Oc1c(cccc1)C(=O)Oc1ccc(cc1)OC(=O)c1c(cccc1)O*)N(CC)c1ccccc1,{},146.7365026 -164,*c1nc2c(n1CCCS(=O)(=O)O)ccc(c2)c1ccc2c(nc(n2CCCS(=O)(=O)O)c2cc(ccc2)*)c1,{},208.6363648 -165,*OC(=O)C1C(=O)CC(C(=O)C1)C(=O)OCCCCCC*,{},-36.66445232 -166,*Oc1ccc(cc1)C(c1ccc(cc1)Oc1ccc(cc1)C(=O)c1ccc(cc1)*)c1c(cccc1)C(=O)O[Na],{},169.1341304 -167,*N(C(=O)CCCCCCCCCCCCCCCCC(=O)N(Cc1ccc(cc1)C*)CC)CC,{},4.279694171 -168,*c1nc(nc(n1)Oc1ccc(cc1)C(c1ccc(cc1)O*)(C)C)OC,{},121.2094568 -169,*OC(=O)NCc1ccc(cc1)CNC(=O)OCCCCCCCC*,{},-25.31417235 -170,*NC(=O)CCCCCCCC(=O)NCCCCCCCCC*,{},9.904824538 -171,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2cc(ccc2)Oc2ccc(cc2)S(=O)(=O)c2ccc(cc2)Oc2cc(ccc2)*)cc1,{},199.6619548 -172,*c1n(c(cc1)*)CC,{},112.6054346 -173,*OC(=O)CCCCS(=O)(=O)CCCCC(=O)OCCCCCCCCCC*,{},37.87494117 -174,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C3(C(=O)N(C2=O)C(=O)*)CCCCC3)cc1,{},277.0792131 -175,*C(=C*)c1c(cccc1)C,{$C=C(c1ccccc1C)$},261.6623551 -176,*Oc1cc2C(c3c(c2cc1)ccc(c3)Oc1ccc(cc1)c1c2cc(ccc2c(c2ccccc12)c1ccc(cc1)*)CCC)(c1ccc(cc1)N(c1ccccc1)c1ccccc1)c1ccc(cc1)N(c1ccccc1)c1ccccc1,{},187.3941077 -177,*SC(=O)CCCCC(=O)SCC*,{},-21.91188888 -178,*OC(CC*)(C(F)(F)F)C(F)(F)F,{},-8.026293088 -179,*C(C(*)c1ccccc1)[N+](=O)[O-],{$C(c1ccccc1)C([N+](=O)[O-])$},173.9913454 -180,*N1C(=O)c2c(C1=O)cc(cc2)C(c1cc2c(C(=O)N(C2=O)CC(=O)NNC(=O)c2ccc(cc2)C(=O)NNC(=O)C*)cc1)(C(F)(F)F)C(F)(F)F,{},226.7602549 -181,*C(C*)CCCCCCCCCCCCCCCCCCCC,{$CC(CCCCCCCCCCCCCCCCCCCC)$},21.77937998 -182,*OS(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)c1ccc(cc1)*,{},190.021745 -183,*SCCCCCC(=O)NCCc1ccc(cc1)CCNC(=O)CCCCC*,{},-18.01175462 -184,*N1CCC(CC1)C(=O)c1c(cc(cc1)C(=O)N1CCC(CC1)CCC*)Oc1ccccc1,{},131.1821319 -185,*C(C(CC*)(F)F)(Cl)F,{$CCC(F)(F)C(Cl)(F)$},-41.84388225 -186,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CC(=O)OCCCCCCOC(=O)C*,{},55.49864746 -187,*c1oc(cc1)Sc1oc(cc1)C=Nc1cc(ccc1)N=C*,{},122.5813993 -188,*C(C*)S(=O)c1ccccc1,{$CC(S(=O)c1ccccc1)$},137.9705527 -189,*OC(=O)c1cc(c(cc1)C(=O)Nc1ccc(cc1)C(c1ccc(cc1)C(c1ccc(cc1)NC(=O)c1c(ccc(c1)C(=O)OCCN(CC*)CCCCOc1ccc(cc1)N=Nc1ccc(cc1)CCCCCC)C(=O)O)(C)C)(C)C)C(=O)O,{},82.18427933 -190,*=C=C=C(C(=*)COC(=O)NCC)COC(=O)NCC,{$=C=C=C(COC(=O)NCC)C(COC(=O)NCC)=$},172.0633872 -191,*Oc1ccc(cc1)c1ccc(cc1)OCCCCCCOC(=O)Nc1c(ccc(c1)NC(=O)OCCCCOC(=O)Nc1cc(c(cc1)C)NC(=O)OCCCCCC*)C,{},41.49405569 -192,*c1sc(c(c1C#N)C#N)N=Cc1ccc(cc1)C=N*,{},204.4271705 -193,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)cc1,{},126.2551791 -194,*c1cc2c(C(=O)N(C2=O)c2cc(ccc2)P(=O)(c2cc(ccc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C(=O)N(C(=O)C2(C)C)C(=O)*)c2ccccc2)cc1,{},138.7903633 -195,*OCOCCCCCC*,{},-41.68464364 -196,*Oc1ccc(cc1)OC(=O)c1c(cc(c(c1)OCCCCCCCCCCCCCCCC)C(=O)*)OCCCCCCCCCCCCCCCC,{},77.98044592 -197,*c1sc(cc1)/C=C/c1ccc(cc1)N(c1ccc(cc1)N(c1ccc(cc1)/C=C/c1sc(cc1)/C=C(/C(=O)Nc1cc(ccc1)NC(=O)/C(=C/*)/C#N)\C#N)c1ccccc1)c1ccccc1,{},243.9892983 -198,*N(c1ccc(cc1)Cc1ccc(cc1)N(C(=O)c1ccc(cc1)C(=O)*)CCC)CCC,{},173.7390032 -199,*Oc1ccc(cc1)C(c1ccc(cc1)OCC#CC#CC*)(C)C,{},48.046331 -200,*NC(=O)C(C(C(=O)NCCCCCC*)O)O,{},149.1553302 -201,*NC(=O)CCCCCC(=O)NCc1ccc(cc1)C*,{},61.73836642 -202,*Nc1c(c(c(cc1)*)Cl)Cl,{},115.5762842 -203,*NC(=O)C(C(=O)NCCCCCCCCCC*)CCCCCCCCCCCC,{},-0.868265132 -204,*N1C(=O)c2c(C1=O)c(c(c(c2F)F)Oc1c(c(c(c(c1F)F)Oc1c(c2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)*)c(c1F)F)F)F)F)F,{},283.2696326 -205,*C1(c2c(C(=O)O1)cccc2)c1cc(c(c(c1)Br)OC(=O)c1cc(ccc1)C(=O)Oc1c(cc(cc1Br)*)Br)Br,{},245.9668197 -206,*OC(=O)CCCCSCCCCC(=O)OCCCCCCCCCC*,{},-22.30064955 -207,*OC(=O)c1c(cccc1)C(=O)OCc1ccc(cc1)C*,{},95.33964688 -208,*C=CCC(CC*)(C(=O)OC)C,{$C=CCC(C)(C(=O)OC)CC$},-28.15217408 -209,*O[Si](*)(CCCCCOc1ccc(cc1)OC(=O)c1ccc(cc1)OCCCC)C,{},-64.12566312 -210,*Nc1ccc(cc1)CCc1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)*,{},245.2414672 -211,*=NN=CC#C[Si](C#CC=*)(c1ccccc1)c1ccccc1,{<=CC#C[Si](c1ccccc1)(c1ccccc1)C#CC=NN=>},131.2577432 -212,*Oc1ccc(cc1)C(=O)OCCOCCOCCOC(=O)c1ccc(cc1)OC(=O)Nc1c(ccc(c1)NC(=O)*)C,{},100.4455143 -213,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1c(cc(cc1)C(=O)*)Oc1ccc(cc1)C(c1ccccc1)(C)C,"{C(=O)c3ccc(cc3Oc3ccc(C(C)(C)c4ccccc4)cc3)C(=O)>}",135.0490958 -214,*OC(=O)NCCSCCCCCSCCNC(=O)OCCCC*,{},3.000872148 -215,*Oc1cc(ccc1)OC(=O)c1cc(c(c(c1)C(C)(C)C)OC(=O)c1ccc(cc1)C(=O)Oc1c(cc(cc1C(C)(C)C)C(=O)*)C(C)(C)C)C(C)(C)C,{},135.2011857 -216,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)*)(CCCC)C,"{C(=O)>}",118.8361345 -217,*c1cc2n3c(nc2cc1)c1ccc(cc1C3=O)Oc1cc2c3n(c4ccc(cc4n3)O*)C(=O)c2cc1,{},291.281809 -218,*Oc1c(cc(cc1)OC(=O)Oc1ccc(cc1)OC(=O)*)C,{},120.5520321 -219,*N1C(=O)c2c(C1=O)c(ccc2)c1cc2c(C(=O)N(C2=O)c2ccc(cc2)OCCCCCCCCCOc2ccc(cc2)*)cc1,{},47.98864595 -220,*C1OC(CO1)COCC1OC(OC1)CCCCCCCC(=O)OCCOC(=O)CCCCCCC*,{},-2.553202642 -221,*Oc1ccc(cc1)Oc1ccc(cc1)Oc1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*,{},136.3718208 -222,*C(C(=O)C*)c1ccc(cc1)C,{$CC(=O)C(c1ccc(C)cc1)$},67.57313464 -223,*C(C*)C(=O)OCC1(COC(OC1)(C)C)C,{$CC(C(=O)OCC1(C)COC(C)(C)OC1)$},95.74104893 -224,*Oc1c(cc(cc1)C=C1C(=O)C(=Cc2cc(c(cc2)OCCCC*)OC)CC1)OC,{},36.19808899 -225,*N=C1c2ccccc2C(=Nc2ccc(cc2)*)c2ccccc12,{},158.2579262 -226,*NC(=O)CCCCCCCCCCCCCCCC*,{},15.38660594 -227,*Nc1cc(ccc1)NC(=O)CCCCCC(=O)*,{},-12.07623639 -228,*c1cc2c(cc1)cc(cc2)*,{},239.5402998 -229,*c1ncc(cc1)c1ccc(nc1)*,{},138.2497069 -230,*C(C*)n1c2ccc(cc2c2cc(ccc12)Br)Br,{$CC(n1c2ccc(Br)cc2c2cc(Br)ccc21)$},115.6170166 -231,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)c2ccc(cc2)c2ccc(cc2)*)cc1,{},357.2965065 -232,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)c2ccc(cc2)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)cc1,{},302.652739 -233,*OC(=O)NC1C(C1)NC(=O)OCCCC*,{},47.31586772 -234,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1c(cc(cc1C)C1C(CC(CC1)C(c1cc(c(c(c1)C)*)C)(C)C)C)C,{},250.477212 -235,*C1C(=O)N(C(=O)C1*)c1ccccc1,{},197.1821336 -236,*[Si](c1ccc(cc1)*)(C)C,{},126.7080905 -237,*Nc1c(c(c(c(c1[2H])[2H])*)[2H])[2H],{},235.370823 -238,*C1c2c(C(C=C1)C=C*)cc(cc2)CCCCCC,{$C=CC1C=CC(c2ccc(CCCCCC)cc21)$},9.704073219 -239,*c1ccc2n(c3c(c2c1)cc(cc3)/C=C/c1ccc(cc1)Oc1c(cc(cc1)c1ccc(cc1)c1ccc(cc1)c1cc(c(cc1)Oc1ccc(cc1)/C=C/*)C#N)C#N)CC(CCCC)CC,{},129.2206103 -240,*c1sc(c(c1OCCCCCCC)C)*,{},32.04437108 -241,*OC(=O)C(=O)OCCCCCCCCCC*,{},-65.08541819 -242,*C=CC(C(*)C)C,{$C=CC(C)C(C)$},78.17880502 -243,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(CC2(C)C)(c2ccc(cc2)Oc2cc3c(C(=O)N(C3=O)c3cc(ccc3)N3C(=O)c4c(C3=O)cc(cc4)Oc3ccc(cc3)C3(CC(c4c3cc(cc4)Oc3cc4c(C(=O)N(C4=O)c4cc(ccc4)*)cc3)(C)C)C)cc2)C)cc1,{},231.956044 -244,*=C1SC(=S)N(C1=O)c1ccc(cc1)N1C(=S)SC(=Cc2ccc(cc2)C=*)C1=O,{<=Cc1ccc(cc1)C=C2SC(=S)N(C2=O)c3ccc(cc3)N4C(=O)C(SC4=S)=>},187.1036635 -245,*Nc1c(cc(c(c1)C(=O)O)*)OC,{},93.18891585 -246,*NNC(=O)CCCCC(=O)NNC(=O)CCCCCCCC(=O)*,{},101.0022623 -247,*n1c(=O)c2sc3c(sc2c1=O)c(=O)n(c3=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)NC(=O)c1ccc(cc1)*,{},327.2441987 -248,*C1CC(CC1)C*,{},42.35931484 -249,*Nc1ccc(cc1)NC(=O)c1c(cc(c(c1)C(=O)O)C(=O)*)C(=O)O,{},240.3936293 -250,*NNC(=O)c1cc(ccc1)C(=O)*,{},232.2266265 -251,*c1sc(c(c1CCCCCCCCCCCC)CCCCCCCCCCCC)c1sc(cc1)c1sc(cc1)*,{},14.60597757 -252,*C(C*)C(=O)NCC,{$CC(C(=O)NCC)$},55.10481694 -253,*Oc1c(cc(cc1C(C)(C)C)C(=O)*)C(C)(C)C,{},206.7951116 -254,*OS(=O)(=O)c1ccc(cc1)c1ccc(cc1)S(=O)(=O)Oc1c(cc(cc1Br)C1(CCCCC1)c1cc(c(c(c1)Br)*)Br)Br,{},268.4647521 -255,*C(C*)C(=O)NCCCCCCCCCCCC,{$CC(C(=O)NCCCCCCCCCCCC)$},21.73577755 -256,*N(c1ccc(cc1)*)CCCCCC,{},-128.6299242 -257,*Oc1c(cc(cc1)C(c1cc(c(cc1)OC(=O)c1cc(ccc1)C(=O)*)[N+](=O)[O-])(CCC(=O)O)C)[N+](=O)[O-],{},127.0784722 -258,*c1ncc(cc1)C(=O)NNC(=O)*,{},135.1836518 -259,*c1n(c(cc1)*)C(C(=O)NO)n1ccc2c1cccc2,{},180.3082327 -260,*N=Nc1ccc(cc1)*,{},144.6951233 -261,*c1sc2c(c1)sc(c2)c1sc(cc1CCCCCCCCCCBr)c1sc(c(c1)CCCCCCCCCCBr)*,{},24.95900908 -262,*C(CC(C*)c1ccccc1)(C(=O)OC)C#N,{$CC(c1ccccc1)CC(C(=O)OC)(C#N)$},127.0156605 -263,*=C=C=C(C(=*)COS(=O)(=O)c1ccc(cc1)OC)COS(=O)(=O)c1ccc(cc1)OC,{$=C=C=C(COS(=O)(=O)c1ccc(OC)cc1)C(COS(=O)(=O)c1ccc(OC)cc1)=$},55.30555285 -264,*c1c(cc(c(c1)OCCCCCCCCCC)C#C*)OCCCCCCCCCC,{},61.85443636 -265,*OCCCC(=O)NCCCCCCNC(=O)CCCCCC*,{},1.467345964 -266,*c1cncc(c1)C(=O)NCCCCCCCCCCNC(=O)*,{},85.60365033 -267,*C(C(C(C(*)(F)F)(F)F)(F)F)(C(F)(F)F)F,{$C(F)(F)C(F)(F)C(F)(F)C(C(F)(F)F)(F)$},-86.88823628 -268,*Oc1cc(ccc1)C(=O)NNC(=O)c1cc(ccc1)C(=O)NNC(=O)CC*,{},234.2133464 -269,*Oc1cc(ccc1)NC(=O)c1ccc(cc1)C(c1ccc(cc1)C(=O)Nc1ccc(cc1)*)(C(F)(F)F)C(F)(F)F,{},157.6228264 -270,*c1n(c(nn1)CCCCCCCC*)N,{},-41.92176029 -271,*c1nc2c([nH]1)cc(cc2)c1cc2c(nc([nH]2)c2ccc(cc2)NC(=O)c2ccc(cc2)C(=O)Nc2ccc(cc2)*)cc1,{},358.667269 -272,*Nc1c(cc(cc1Cl)*)Cl,{},-0.214281278 -273,*c1ccc2n(c3c(c2c1)cc(cc3)N=Cc1sc(cc1)c1sc(cc1)C=N*)CCCCCC,{},95.07860424 -274,*N1C2(CCCC2)C(=O)N(C1=O)C(=O)c1ccc(cc1)N=Nc1ccc(cc1)C(=O)*,{},17.25361376 -275,*P(=Nc1ccc(cc1)N=P(CC*)(Cl)Cl)(Cl)Cl,{},-27.34500252 -276,*Nc1c(cc(c(c1)SCCC#N)NC(=O)c1cc(ccc1)C(=O)*)SCCC#N,{},38.16065966 -277,*C(C*)(c1ccc(cc1)OC(=O)CC)OC(=O)C,{$CC(c1ccc(OC(=O)CC)cc1)(OC(C)=O)$},2.162388076 -278,*C1C(=O)N(C(=O)C1C(C*)c1ccccc1)CCCCCCCC,{},105.6965321 -279,*c1sc2cc3c(cc2n1)sc(n3)CCCCC*,{},87.36313445 -280,*NC(C(=O)*)CCC(=O)OCCCCCCCCCCCC,{},30.09586697 -281,*NC(C(=O)*)CO,{},84.57547927 -282,*Nc1c(cccc1)NC(=O)CCCCCCC(=O)*,{},71.92438381 -283,*Oc1c(cc(cc1)C=Cc1ccc(cc1)C=Cc1cc(c(cc1)OC(=O)CCCCCCCCC(=O)*)C)C,{},35.47523522 -284,*c1c2c(nccn2)c(cc1)*,{},216.378732 -285,*Oc1ccc(cc1)Oc1ccc(cc1)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)c1ccc(cc1)Oc1ccc(cc1)NC(=O)c1ccc(cc1)*,{},165.0428244 -286,*OP(=O)(OCCCCCCCCCCOc1ccc(cc1)C=Cc1ccc(cc1)OCCCCCCCCCC*)OCCCCCCCCCCOc1ccc(cc1)N=Nc1ccc(cc1)F,{},-7.2122695 -287,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)OCCCOc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2ccc(cc2)S(=O)(=O)c2ccc(cc2)NC(=O)*)cc1,{},163.1829015 -288,*OC(=O)c1ccc(cc1)C(=O)NCCCCCCNC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCC*,{},-20.66610996 -289,*NNC(=O)c1ccc(cc1)NC(=O)c1cc(cc(c1)N1C(=O)c2c(C1=O)c(c(c(c2Cl)Cl)Cl)Cl)C(=O)*,{},133.1528291 -290,*N=P(*)(OCCC(=O)C=C)OCCC(=O)C=C,{},-42.12432011 -291,*S(=O)(=O)c1ccc(cc1)c1ccc(cc1)*,{},229.0539301 -292,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)S(=O)(=O)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C(=O)N(C(=O)C2(CC)C)C(=O)*)cc1,{},242.6534046 -293,*OP(=O)(OCCCCCCCCCCOc1ccc(cc1)C=Cc1ccc(cc1)OCCCCCCCCCC*)OCCCCCCCCCCOc1ccc(cc1)N=Nc1ccc(cc1)C,{},5.817024886 -294,*NC(CCCCNC(=O)NCCCCNC(=O)*)C(=O)OC,{},0.336556425 -295,*C(=C*)c1nc2c(n1C)cccc2,{$C=C(c1nc2ccccc2n1C)$},-6.104199835 -296,*Oc1ccc(cc1)N=Nc1ccc(cc1)*,{},116.9759489 -297,*c1nc(nc(n1)NC(=O)c1c(cc(c(c1)C(=O)N*)C(=O)O)C(=O)OC(=O)Nc1c(ccc(c1)NC(=O)OCCCCCCCC)C)c1ccccc1,{},-1.691479041 -298,*c1[nH]c2cc3c(cc2n1)nc([nH]3)c1ccc(cc1)*,{},340.5865983 -299,*OC(=O)Cc1ccc(cc1)CC(=O)OCCCC*,{},-34.10658315 -300,*SC(=O)NCCCCCCNC(=O)SCCCC*,{},-33.13797704 -301,*c1c(cc(c(c1)OC)*)OC,{},63.90036252 -302,*N(CC*)C(=O)CCC(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F,{},-52.64825421 -303,*OC(=O)c1c(cccc1)NC(=O)c1ccc(cc1)C(=O)Nc1c(cccc1)C(=O)OC(=O)c1cc(ccc1)C(=O)*,{},185.0999075 -304,*c1sc(cc1CCCCCCCC)Nc1ccc(cc1)*,{},49.50827798 -305,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc(cc1)c1ccc(cc1)c1ccc(cc1)*,{},278.5221779 -306,*n1c(=O)c2c(c3c(c(c2c1=O)F)c(=O)n(c3=O)c1ccc(cc1)*)F,{},337.816724 -307,*OC(=O)c1ccc(cc1)C(=O)NCCNC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCC*,{},4.250402609 -308,*NC(=O)NC(=O)NCc1c(c(cc(c1)Cc1c(c(cc(c1)C*)C=O)O)C=O)O,{},190.6072154 -309,*OC(=O)C(C*)(CCCC)CCCC,{},-17.98562642 -310,*C1(C(=O)C(CCC1)C*)CO,{},71.70468573 -311,*N1C(=O)c2c(C1=O)cc(cc2)Oc1ccc(cc1)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)*)cc1,"{N4C(=O)c5ccc(cc5C4=O)Oc6ccc(cc6)Oc7ccc8c(c7)C(=O)N(C8=O)>}",191.2304459 -312,*C(=C*)c1cc(c(c(c1)CO)OCc1ccc(cc1)CNC(COCCCCCCCC)C)CO,{$C=C(c1cc(CO)c(OCc2ccc(CNC(C)COCCCCCCCC)cc2)c(CO)c1)$},65.78481038 -313,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CCCCCCCCCCCC*,{},73.37639604 -314,*c1sc2cc3c(cc2n1)sc(n3)c1ccc(cc1)c1ccc(cc1)*,{},343.9030515 -315,*c1sc(cc1COCCCCCCOc1ccc(cc1)c1ccc(cc1)C#N)*,{},59.88076613 -316,*Nc1ccc(cc1)CC(=O)*,{},63.68828699 -317,*C(C*)C(=O)N(c1ccccc1)O,{$CC(C(=O)N(O)c1ccccc1)$},141.1832818 -318,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)C(=O)*)c1ccccc1,"{C(=O)c2ccc(cc2)C(=O)>}",65.88587862 -319,*OC(CC(=O)*)C(C)C,{},-24.82439314 -320,*NC(C(=O)*)CC(=O)OCc1ccccc1,{},70.10647363 -321,*c1c(c(cc(c1)N=Nc1ccc(cc1)[N+](=O)[O-])*)O,{},165.6801535 -322,*OCCCCC(=O)NCCCCCCNC(=O)CCCC*,{},-56.49395983 -323,*=C=C=C(C(=*)CCCCOC(=O)NC(=O)OCCCC)CCCCOC(=O)NC(=O)OCCCC,{$=C=C=C(CCCCOC(=O)NC(=O)OCCCC)C(CCCCOC(=O)NC(=O)OCCCC)=$},27.51087357 -324,*Oc1cc(c(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)C(c1ccc(cc1)*)(C)C)[N-][N+]#N,{},196.3053138 -325,*c1sc(nn1)SCC(=O)NN=Cc1ccc(cc1)OCCCCOc1ccc(cc1)C=NNC(=O)CS*,{},39.79677892 -326,*OC(=O)NCCSCCCCSCCNC(=O)OCC*,{},26.25521298 -327,*C(C*)C(=O)N1CC[N+](CC1)(CCCCCCCCCCCC)C,{$CC(C(=O)N1CC[N+](C)(CCCCCCCCCCCC)CC1)$},-81.38297384 -328,*OC(=O)c1cc(ccc1)c1cc(ccc1)C(=O)OCCCCCCCCCC*,{},8.531981028 -329,*C1(C(=O)OC(=O)C1)C*,{},143.854555 -330,*Oc1cc(ccc1)OC(=O)c1ccc(cc1)C(=O)Oc1cc(ccc1)OCCCCCCCCCC*,{},20.92794041 -331,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)Oc1ccc(cc1)OC(=O)c1cc(ccc1)C(=O)*,{},119.4238893 -332,*=C1CN(CC(=Cc2oc(cc2)C(=O)CCCCC(=O)c2oc(cc2)C=*)C1=O)C,{<=Cc1ccc(o1)C(=O)CCCCC(=O)c2ccc(o2)C=C3CN(C)CC(C3=O)=>},95.14114513 -333,*Oc1ccc(cc1)NC(=O)c1c(c(c(c(c1F)F)C(=O)Nc1ccc(cc1)*)F)F,{},168.6168885 -334,*C(=C(CC*)C)C,{$CCC(C)=C(C)$},46.49641909 -335,*S(=O)(=O)NCCNS(=O)(=O)c1ccc(cc1)*,{},7.54081281 -336,*N(C(=O)*)c1ccccc1,{},150.7130657 -337,*OC(=O)C(C(=O)OCCCCCC*)CCCCCCOc1ccc(cc1)c1ccc(cc1)OCc1ccc(cc1)[N+](=O)[O-],{},17.95743357 -338,*N1C(=S)SC(=Cc2ccc(cc2)C=C2SC(=S)N(C2=O)CCCCCC*)C1=O,{},35.37444876 -339,*c1oc(cc1)Sc1oc(cc1)C=Nc1ccc(cc1)N=C*,{},95.64631957 -340,*OC(=O)c1ccc(cc1)S(=O)(=O)CCCCCCS(=O)(=O)c1ccc(cc1)C(=O)OCCCCCC*,{},35.86441642 -341,*OS(=O)(=O)c1cc(ccc1)S(=O)(=O)Oc1cc(ccc1)*,{},29.19231194 -342,*SSC(=O)N(c1ccc(cc1)Cc1ccc(cc1)N(C(=O)SSCCCC*)C)C,{},20.68923822 -343,*C(C*)(c1ccc(cc1)OC(=O)C)OC(=O)C,{$CC(c1ccc(OC(C)=O)cc1)(OC(C)=O)$},74.85696518 -344,*Oc1ccc(cc1)C(=Cc1ccc(cc1)OC(=O)CCCCCCCCCCC(=O)*)C,{},6.458707655 -345,*N(C(=O)CCCCC(=O)N(CC(C(C(C*)(F)F)(F)F)(F)F)CC)CC,{},10.59395774 -346,*c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)C(=O)c2ccc(cc2)C(=O)*)CC1,{},192.2096838 -347,*Oc1cc(c(cc1)N1ON1c1c(cc(cc1)OC(=O)CCCCCCCCCCC(=O)*)C)C,{},50.67469492 -348,*c1c(cc(c(c1)Oc1ccc(cc1)S(=O)(=O)O[Na])c1ccc(cc1)*)Oc1ccc(cc1)S(=O)(=O)O[Na],{},172.5717242 -349,*OC(=O)NCCCCC*,{},-13.55087665 -350,*N1C(=O)C(CC1=O)Nc1ccc(cc1)NC1C(=O)N(C(=O)C1)c1ccc(cc1)Cc1ccc(cc1)*,{},248.5034267 -351,*C(=C*)c1ccc(cc1)OCCCCCC(=O)Oc1c(c(c(c(c1F)F)F)F)F,{$C=C(c1ccc(OCCCCCC(=O)Oc2c(F)c(F)c(F)c(F)c2F)cc1)$},73.83198457 -352,*Oc1cc(ccc1)OC(=O)c1ccc(cc1)C=Nc1ccc(cc1)OCCCCCCOc1ccc(cc1)N=Cc1ccc(cc1)C(=O)*,{},83.83402436 -353,*C1=NC2=CC(C=CC2=C1)*,{},103.156476 -354,*C=CCCCCCCCC*,{$C=CCCCCCCCC$},-17.2820223 -355,*C(CCC*)Cl,{$CCCC(Cl)$},-30.93658282 -356,*C#CC(=C(*)CCCCOC(=O)NCCC)CCCCOC(=O)NCCC,{$C#CC(CCCCOC(=O)NCCC)=C(CCCCOC(=O)NCCC)$},40.70123878 -357,*NC(CC(=O)*)c1ccccc1,{},-30.79261317 -358,*S(=O)(=O)C(C=CC(*)C)C,{},33.82674704 -359,*C(C(*)(C([2H])([2H])[2H])C([2H])([2H])[2H])([2H])[2H],{$C([2H])([2H])C(C([2H])([2H])[2H])(C([2H])([2H])[2H])$},85.2671101 -360,*c1sc(cc1)C(=O)NCCCCCCNC(=O)*,{},42.90894132 -361,*SC(=O)c1ccc(cc1)C(=O)SCc1c(c(c(c(c1C)C)C*)C)C,{},127.8963011 -362,*c1ccc2c(c1)c(=O)oc(n2)c1cc(cc(c1)N1C(=O)c2c(C1=O)c(c(c(c2Cl)Cl)Cl)Cl)c1oc(=O)c2c(ccc(c2)C*)n1,{},378.8956296 -363,*Nc1ccc(cc1)NC(=O)c1c(cc(c(c1)SCCCCCCCC)C(=O)*)SCCCCCCCC,{},69.67482713 -364,*Oc1c(cc(cc1)C=Nc1ccc(cc1)N=Cc1cc(c(cc1)OC(=O)CCCCC(=O)*)OC)OC,{},-41.85748469 -365,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)CCCCCCC(=O)*,"{C(=O)c2ccc(cc2)OC(=O)CCCCCCC(=O)>}",72.32081554 -366,*c1nc(sc1)N=Cc1cc(c(cc1)OCCCCOc1c(cc(cc1)C=Nc1nc(cs1)c1ccc(cc1)Oc1ccc(cc1)*)OC)OC,{},104.0626058 -367,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)C(=O)NNC(=O)c1cc(c(cc1)NC(=O)c1ccc(cc1)*)O,{},111.457133 -368,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1cc(ccc1)Oc1ccc(cc1)C(=O)*,"{C(=O)c3cccc(c3)Oc4ccc(cc4)C(=O)>}",89.5930824 -369,*C(C*)OCCC,{$CC(OCCC)$},-17.16506937 -370,*OC(=O)CCCC(=O)OCC(C(C(C(C*)(F)F)(F)F)(F)F)(F)F,{},-64.11657159 -371,*OC1CCC(CC1)OC(=O)CCCCCCC(=O)*,"{C(=O)CCCCCCC(=O)>}",-35.83964331 -372,*N1C(=O)N(C(=O)C1(C)C)C(=O)c1ccc(cc1)N=Nc1ccc(cc1)C(=O)*,{},302.3547996 -373,*Oc1ccc(cc1)OCC(=O)OC(=O)c1ccc(cc1)C(=O)OC(=O)C*,{},80.44433982 -374,*c1c2c(c(cc1)*)cccc2,{},189.0821827 -375,*C1C(=O)N(C(=O)C1C(C*)c1ccccc1)c1ccc(cc1)Cl,{},168.825845 -376,*[Si](c1ccc(cc1)*)(OCC)OCC,{},7.02246836 -377,*NC(CNC(=O)NCCCCCCNC(=O)*)C,{},55.35434565 -378,*c1nc(cs1)c1ccc(cc1)c1nc(sc1)CCCC*,{},110.5188915 -379,*Oc1ccc(cc1)OC(=O)CCCCCCCC(=O)*,"{C(=O)CCCCCCCC(=O)>}",-6.058372606 -380,*C(C*)OC(=O)C(CC)(CC)CC,{$CC(OC(=O)C(CC)(CC)CC)$},20.41787387 -381,*NC(=O)CCCCCCCCCCCCCCC(=O)NCCc1ccc(cc1)CC*,{},65.25488595 -382,*C1CCN(CC1)SC(=O)OCCCCOC(=O)SN1CCC(CC1)CCC*,{},-6.032415023 -383,*Oc1cc2c(C(CC2(C)C)(c2ccc(cc2)Oc2ccc(cc2)C2(CC(c3c2cc(cc3)*)(C)C)C)C)cc1,{},308.5311609 -384,*OC(=O)c1ccc(cc1)CCc1ccc(cc1)C(=O)OCC*,{},53.2795594 -385,*c1oc2c(n1)cc(cc2)c1cc2c(oc(n2)CCCCCCCC*)cc1,{},61.93714913 -386,*C(=C*)CNS(=O)(=O)CC,{$C=C(CNS(=O)(=O)CC)$},44.748248 -387,*c1nc(ccc1)Oc1ccc(cc1)Oc1ccc(cc1)O*,{},-19.11943844 -388,*C(=C(*)C)[Si](CCCC)(C)C,{$C(C)=C([Si](C)(C)CCCC)$},152.5242236 -389,*OC(=O)CCSCCC(=O)*,{},6.950276755 -390,*Oc1ccc(cc1)Oc1ccc(cc1)NC(=C(C#N)C#N)c1ccc(cc1)c1ccc(cc1)C(=C(C#N)C#N)Nc1ccc(cc1)*,{},270.2905197 -391,*Oc1cc(ccc1)OC(=O)Oc1ccc(cc1)OC(=O)*,{},-104.3379932 -392,*OC(=O)c1ccc(cc1)NC(=O)CCCCC(=O)Nc1ccc(cc1)C(=O)OCCC*,{},83.99780359 -393,*c1nc2c([nH]1)cc(cc2)NC(=NO)C(=NO)Nc1ccc2c(nc([nH]2)CCCC*)c1,{},170.1130329 -394,*Nc1c(c(c(c(c1C)C)NC(=O)c1ccc(cc1)C(=O)*)C)C,{},205.6552433 -395,*OC(=O)Nc1ccc(cc1)NC(=O)OCC*,{},134.2908917 -396,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)C(=O)*)Cl,{},119.6241841 -397,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1c(cc(c(c1)C(=O)OCCCCCC)C(=O)*)C(=O)OCCCCCC,{},77.13970172 -398,*N1CCN(CC1)CCC(=O)N(CCN(C(=O)CC*)C(C)C)C(C)C,{},-43.71593166 -399,*c1ccc2n(c3c(c2c1)cc(cc3)C=NN(c1ccc(cc1)S(=O)(=O)c1ccc(cc1)N(N=C*)CCCC)CCCC)CC,{},139.604302 -400,*Sc1ccc(cc1)c1ccc(cc1)SC(=O)CCCCC(=O)*,{},13.2621556 -401,*N(c1ccc(cc1)c1ccc(cc1)N(C(=O)c1ccc(cc1)C(=O)*)CC)CC,{},62.56896406 -402,*S(=O)(=O)CCCC*,{},-14.41108992 -403,*c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)C(=O)c2ccc(cc2)C(=O)*)CCC1,{},240.3798309 -404,*c1ccc2n(c3c(c2c1)cc(cc3)*)CC,{},206.6525359 -405,*OC(C(C(=O)*)(C)C)c1ccccc1,{},65.42132156 -406,*c1nc2c([nH]1)cc(cc2)c1ccc2c(nc([nH]2)c2ccc(cc2)C(=O)c2ccc(cc2)*)c1,{},300.9001313 -407,*Nc1ccc(cc1)NC(=S)NC(=O)c1ccc(cc1)C(=O)NC(=S)*,{},220.8197438 -408,*=c1c2cc3c(cc2c(=O)o1)c(=O)oc3=Nc1cc(ccc1)Oc1cc(ccc1)Oc1cc(ccc1)N=*,{<=Nc1cccc(c1)Oc2cccc(c2)Oc3cccc(c3)N=C4OC(=O)c5cc6c(cc54)C(OC6=O)=>},132.5253262 -409,*OS(=O)(=O)c1ccc(cc1)S(=O)(=O)c1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)*,{},194.2678014 -410,*OC(CCOC(=O)c1cc(ccc1)C(=O)*)C,"{C(=O)c1cccc(c1)C(=O)>}",-42.28619333 -411,*NC(=O)CCCCCCCCC(=O)NC*,{},66.76854223 -412,*NC(=O)CCP(=O)(CCC(=O)NCC*)C,{},143.8466204 -413,*C(C*)c1c(cccc1)OC,{$CC(c1ccccc1OC)$},160.10962 -414,*OCCCCCOCCCCCCOCCCCCC*,{},-72.01996519 -415,*NC(=O)C(=O)NCCNC(=O)CCCCCCCC(=O)NCC*,{},112.5256843 -416,*OC(=O)c1ccc(cc1)C(=O)OCC(C*)(CCl)CCl,{},11.46555203 -417,*OC(COC(=O)CCCCC(=O)*)C,"{C(=O)CCCCC(=O)>}",-15.95518318 -418,*c1ccc(cc1)c1ccc(cc1)C(*)(C)C,{},210.9469969 -419,*Oc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)*,{},203.5999878 -420,*Oc1c(cc(cc1)Oc1ccc(cc1)C(=O)c1ccc(cc1)*)CBr,{},98.31320425 -421,*Oc1cc(ccc1)C(=O)OC(=O)c1cc(ccc1)OCC*,{},3.697542049 -422,*C(=C*)c1ccc(cc1)[N+](=O)[O-],{$C=C(c1ccc([N+](=O)[O-])cc1)$},-12.60583746 -423,*c1sc(cc1)[Si](c1sc(cc1)[SiH](*)C)(C)C,{},60.04219897 -424,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)c1ccc(cc1)Oc1ccc(cc1)C(=O)*)C,"{C(=O)c3ccc(cc3)Oc4ccc(cc4)C(=O)>}",76.95210942 -425,*C(C*)C(=O)n1sc2c(c1=O)cccc2,{$CC(C(=O)n1sc2ccccc2c1=O)$},48.67425788 -426,*Nc1cc(cc(c1)C(=O)OCCN(c1ccc(cc1)S(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C)NC(=O)c1cc(cc(c1)OCCN(c1ccc(cc1)S(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C)C(=O)*,{},172.7344272 -427,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1c(cc(cc1)c1cc(c(cc1)*)C)C,{},271.2577887 -428,*c1c(cc(c(c1)OCCCCCCOc1ccc(cc1)C1CCC(CC1)CCCCC)C=C*)OCCCCCCOc1ccc(cc1)C1CCC(CC1)CCCCC,{},36.67582945 -429,*OC(=O)SSC(=O)OCCCC*,{},-58.94578339 -430,*NC(C(=O)NCC(=O)*)C,{},19.96697332 -431,*NC(=O)CNC(=O)CC*,{},70.42884183 -432,*SC(=O)CCCCC(=O)SCc1c(c(c(c(c1C)C)C*)C)C,{},44.80678229 -433,*C(=C*)c1ccccc1,{$C=C(c1ccccc1)$},66.12991425 -434,*c1cc2c(C(=O)N(C2=O)c2cc(ccc2)NC(=O)c2cc(ccc2)C(=O)Nc2cc(ccc2)NC(=O)*)cc1,{},213.4893774 -435,*Oc1c(cc(cc1Br)C(c1cc(c(c(c1)Br)OC(=O)c1cc(ccc1)C(=O)*)Br)(CCC(=O)O)C)Br,{},175.2273279 -436,*OC(=O)CCCCC(=O)OCC(C*)(CCl)CCl,{},2.83724036 -437,*N1CCN(CC1)C(=O)SSCCCCSSC(=O)*,{},-3.571974592 -438,*N1C(=O)N(C(=O)C1(c1ccccc1)c1ccccc1)C(=O)c1ccc(cc1)N=Nc1ccc(cc1)C(=O)*,{},183.844758 -439,*c1cc2n(c3c(c2cc1)ccc(c3)C#CC#C*)CCCCCCCCCCCCCCCC,{},68.35869686 -440,*c1c2c(nccc2)c(cc1)OCc1ccc(cc1)COc1c2ncccc2c(cc1)C*,{},200.3538526 -441,*OC(=O)c1ccc(cc1)C(=O)OCCOCCOCC*,{},36.06576581 -442,*c1c2c(c(s1)*)sc(n2)CCCCCCCCC,{},56.24069524 -443,*=C=C=C(C(=*)COS(=O)(=O)c1ccc(cc1)C)COS(=O)(=O)c1ccc(cc1)C,{$=C=C=C(COS(=O)(=O)c1ccc(C)cc1)C(COS(=O)(=O)c1ccc(C)cc1)=$},76.80290526 -444,*Oc1c(cc(cc1)C=CC=Cc1cc(c(cc1)OCCCCCCC*)C)C,{},41.89270134 -445,*Oc1ccc(cc1)C(=O)OCCCCOC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*,{},68.79315579 -446,*Oc1c(c(ccc1)Oc1ccc(cc1)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1ccc(cc1)*)C#N,{},160.9324159 -447,*C1C(=O)N(C(=O)C1*)c1ccc(cc1)COC(C)(C)C,{},122.5873684 -448,*c1ccc2n(c3c(c2c1)cc(cc3)C(=O)Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)*)(C)C)C,{},73.83261176 -449,*/C=C/*,{$/C=C/$},59.5588378 -450,*c1ncc(cc1)c1n(c(cc1)c1n(c(cc1)*)C)C,{},256.5965094 -451,*c1ncc(cc1)*,{},322.0959561 -452,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2ccc(cc2)NC(=O)*)cc1,{},48.20867337 -453,*N(C(=O)*)CCCCCC,{},14.34014558 -454,*Nc1cc(ccc1)C#Cc1cc(ccc1)NC(=O)c1c(cc(cc1)C(=O)*)C(=O)O,{},187.6187871 -455,*c1c(cc(c(c1)C*)C)O,{$Cc1cc(c(O)cc1C)$},12.40187977 -456,*Oc1cc(c(cc1)C(=O)Nc1ccc(cc1)NC(=O)c1c(cc(cc1)*)C(=O)O)C(=O)O,{},216.6231471 -457,*c1nc(nc(n1)Oc1cc(ccc1)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1cc(ccc1)O*)Sc1ccccc1,{},184.9515774 -458,*=C1c2c(C(=O)O1)cc(cc2)c1cc2c(C(=O)OC2=Nc2cc(ccc2)Oc2cc(ccc2)Oc2cc(ccc2)N=*)cc1,{<=Nc1cccc(c1)Oc2cccc(c2)Oc3cccc(c3)N=C4OC(=O)c5ccc(cc54)c6ccc7c(c6)C(=O)OC7=>},193.7356518 -459,*Nc1c(cc(cc1)*)CC,{},162.1855704 -460,*OC(=O)NCCCCCCNC(=O)OCCN(CC*)c1ccc(cc1)N=Nc1ccc(cc1)C,{},24.09354398 -461,*c1nc2c([nH]1)ccc(c2)c1ccc2c(nc([nH]2)c2ccc(cc2)*)c1,{},423.6341908 -462,*c1ncc(cc1)C(=O)OC(=O)COc1ccc(cc1)OCC(=O)OC(=O)*,{},155.9709567 -463,*C(C*)C(=O)OCCN(S(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C,{$CC(C(=O)OCCN(C)S(=O)(=O)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F)$},-148.0297376 -464,*OC(=O)c1cc(ccc1)C(=O)OCC1C(C1)C*,{},10.23490017 -465,*NC(C(=O)*)C(C)C,{},153.0278775 -466,*C(C*)(C(=O)OCC(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)F,{$CC(C(=O)OCC(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F)(F)$},-98.70472035 -467,*C(*)C(=O)OC(C)(C)C,{$C(C(=O)OC(C)(C)C)$},12.73837737 -468,*OC(=O)c1c(cccc1)c1c(cccc1)C(=O)OCCCC*,{},71.34273106 -469,*Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)Nc1ccc(cc1)CCc1ccc(cc1)NC(=O)*,{},183.1975298 -470,*C1C(=O)N(C(=O)C1C(C*)OC(=O)C)c1ccccc1,{},157.3759721 -471,*OC(CC(=O)*)C(Cl)Cl,{},110.2326669 -472,*OC(=O)NCCCCCCCCCCNC(=O)OCCCCCCCC*,{},24.78605928 -473,*C1C=CC(CC1)*,{},103.377349 -474,*NC(=O)NCCCP(CCC*)c1ccccc1,{},-49.19787298 -475,*c1oc(nn1)CCCCCCCC*,{},-48.88416733 -476,*OC(=O)NCCCCCCNC(=O)OCC(C*)(C)C,{},47.60248847 -477,*N=P(*)(OCC(C(C(F)(F)F)(F)F)(F)F)OCC(C(C(F)(F)F)(F)F)(F)F,{},-77.91107652 -478,*OC(=O)c1ccc(cc1)C(=O)OCCCCCC(=O)NCCNC(=O)CCCCC*,{},81.65245915 -479,*c1cc2c(C(=O)OC2=Nc2cc(ccc2)N=C2OC(=O)c3c2cc(cc3)C(=O)*)cc1,{},246.6584182 -480,*C*,{$C$},-2.526682925 -481,*c1c(nnc(n1)c1nc(ccc1)c1nc(c(nn1)c1ccccc1)c1ccc(cc1)Sc1ccc(cc1)*)c1ccccc1,{},419.5781202 -482,*c1cc2c(C(=O)N(C2=O)c2c(cc(cc2)c2cc(c(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Oc2ccc(cc2)C(c2ccc(cc2)OC(=O)*)(C)C)C)C)cc1,{},87.17702398 -483,*=c1sc(cs1)c1ccc(cc1)C=*,{<=Cc1ccc(cc1)C2=CSC(S2)=>},105.3372816 -484,*Oc1ccc(cc1)N=Cc1ccc(cc1)OC(=O)c1ccc(cc1)C=Nc1ccc(cc1)OCCCCOC(=O)NCCCCCCNC(=O)OCCCC*,{},-34.25655466 -485,*Oc1ccc(cc1)C(=O)CNc1ccc(cc1)NCC(=O)c1ccc(cc1)*,{},158.6944649 -486,*OC(=O)N(c1c(ccc(c1)N(C(=O)OCC*)C)C)C,{},42.46691504 -487,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C(=O)N(C(C(=C2C)C(=O)OCC)c2ccc(cc2)Cl)C(=O)*)cc1,{},170.3261787 -488,*C(C*)(C(=O)Oc1ccc(cc1)C)C,{$CC(C(=O)Oc1ccc(C)cc1)(C)$},126.1154692 -489,*c1sc2c(n1)ccc(c2)OCCCCCCCCCCCOC(=O)CCCCC(=O)OCCCCCCCCCCCOc1ccc(cc1)*,{},-17.21827415 -490,*c1nc2c(c(c1)OCCO*)cc(cc2)C,{},88.77435741 -491,*Oc1ccc(cc1)C(=O)CCCCCCCCC(=O)c1ccc(cc1)OC(=O)*,{},6.568396994 -492,*c1ccc2c(nc([nH]2)c2cc(ccc2)c2nc3c([nH]2)ccc(c3)C(=O)Nc2ccc(cc2)NC(=O)*)c1,{},309.3374253 -493,*OCC1(C2CCC(C1)CC2)C*,{},81.08225465 -494,*c1nc(ccc1)C=Nc1ccc(cc1)N=C*,{},139.1234551 -495,*/C(=C(/*)\c1ccccc1)/c1ccccc1,{$/C(c1ccccc1)=C(c1ccccc1)/$},206.5698859 -496,*N1C(CN(C(C1)C)C(=O)OCCOC(=O)*)C,{},60.10633691 -497,*C1C(CC1)*,{$C1CCC1$},69.57488221 -498,*OC(C*)CCCCCCCCOc1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)OCC(CC)C,{},-0.17829817 -499,*C(C*)(C(=O)OCCOC(=O)c1cc(cc(c1)OC(=O)c1ccc(cc1)N=Nc1ccc(cc1)OCCCCCCC)OC(=O)c1ccc(cc1)N=Nc1ccc(cc1)OCCCCCCC)C,{$CC(C(=O)OCCOC(=O)c1cc(OC(=O)c2ccc(N=Nc3ccc(OCCCCCCC)cc3)cc2)cc(OC(=O)c2ccc(N=Nc3ccc(OCCCCCCC)cc3)cc2)c1)(C)$},9.014452923 -500,*C(C*)C(=O)OCCOC(C(F)(F)F)(C(F)(F)F)F,{$CC(C(=O)OCCOC(F)(C(F)(F)F)C(F)(F)F)$},-51.63721715 -501,*Sc1ccc(cc1)*,{},64.586946 -502,*Oc1ccc(cc1)CCCNC(=O)CCCCC(=O)NCCCc1ccc(cc1)OCCCCC*,{},28.2211431 -503,*SSCCCCSSCCCCCC*,{},-41.26672381 -504,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)OCCN(CCOC(=O)*)c2ccc(cc2)N=Nc2ccc(cc2)[N+](=O)[O-])cc1,{},145.3751112 -505,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)C(C(=O)N1C(=O)N(C(C1=O)(C)C)C(=O)C(*)C)C,{},252.5865186 -506,*OC(=O)CCCCC(=O)OCc1ccc(cc1)C*,{},-4.158432897 -507,*Nc1c(cc(cc1)NC(=O)c1c(cc(c(c1)C(=O)*)C(=O)O)C(=O)O)S(=O)(=O)O[Na],{},129.970858 -508,*Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)c1cc(cc(c1)NC(=O)C(CC(C)C)N1C(=O)c2c(C1=O)cccc2)C(=O)*,{},36.87047274 -509,*N1C(CN(C(C1)C)SC(=O)OCCCCOC(=O)S*)C,{},-30.512105 -510,*N(C(=O)*)CC=C,{},164.8639006 -511,*[Si](c1ccc(cc1)*)(c1ccc(cc1)CN(C)C)C,{},12.90627629 -512,*c1sc(cc1)C#CC#C*,{},49.13696662 -513,*c1ncnc(c1)C=Cc1ccc(cc1)C=C*,{},84.80053451 -514,*c1nc(nc(n1)Oc1c2c(ccc1C(=O)Nc1ccc(cc1)NC(=O)c1c(c3c(cc1)cccc3)O*)cccc2)N1CCCCC1,{},162.6671353 -515,*OC(=O)CCC(=O)OCCCCCCCCCC*,{},1.783806133 -516,*c1cc2n3c(=O)c4cc5c(cc4c3nc2cc1)c(=O)n1c2ccc(cc2nc51)*,{},384.637936 -517,*c1ccc2n(c3c(c2c1)cc(cc3)C(=O)Oc1ccc(cc1)C1(c2c(C(=O)O1)cccc2)c1ccc(cc1)OC(=O)*)C,{},262.5942508 -518,*=Nc1ccc(cc1)N=C(Nc1c(cc(cc1)c1cc(c(cc1)NC(=*)C)C(=O)O)C(=O)O)C,{<=Nc1ccc(cc1)N=C(C)Nc2ccc(cc2C(=O)O)c3ccc(c(C(=O)O)c3)NC(C)=>},89.38045943 -519,*OC(=O)C=C(CC(=O)OCC*)c1ccc(cc1)OCC,{},1.131191733 -520,*OC(=O)Nc1c(ccc(c1)NC(=O)OCCOCCOCCC*)C,{},-26.75831261 -521,*Nc1c(cccc1)CCc1c(cccc1)NC(=O)*,{},207.655323 -522,*SC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)SCc1ccc(cc1)C*,{},90.77725081 -523,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)NCCC[Si](O[Si](CCCNC(=O)*)(C)C)(C)C)cc1,{},143.0502256 -524,*OC(=O)c1cc(ccc1)C(=O)OCCNC(=O)c1ccc(cc1)C(=O)NCC*,{},121.124261 -525,*C(C*)(C(=O)OCCCCCCCCCCOc1ccc(cc1)N1C(=O)C(=Cc2c(c3c(n2C)cccc3)C)C(=C(C)C)C1=O)C,{$CC(C(=O)OCCCCCCCCCCOc1ccc(N2C(=O)C(=Cc3c(C)c4ccccc4n3C)C(=C(C)C)C2=O)cc1)(C)$},-17.96880959 -526,*SC(=O)CCCCCCCCC(=O)SCCCCCC*,{},-39.13613776 -527,*c1nc(ccc1)C(=O)NCCCCCCCCCCNC(=O)*,{},49.59402876 -528,*NC(=O)c1ccc(cc1)C(=O)NCCCCCCCCCC*,{},50.1155014 -529,*Nc1ccc(cc1)NC(=O)c1ccc(cc1)NC(=O)C=Cc1ccc(cc1)C=CC(=O)Nc1ccc(cc1)C(=O)*,{},169.7759737 -530,*NNC(=O)CCCCCCCCC(=O)NNC(=S)c1cc(ccc1)C(=S)*,{},32.4414411 -531,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)NC(=O)c2cc(ccc2)C(=O)Nc2ccc(cc2)*)cc1,{},155.8375326 -532,*NC(=S)C=Cc1ccc(cc1)Cc1ccc(cc1)C=CC(=S)NCC*,{},99.87529352 -533,*c1ccc2[nH]c3c(c2c1)cc(cc3)C(=O)c1c(cc(c(c1)C(=O)*)C(=O)O)C(=O)O,{},106.1795053 -534,*c1nc(sc1)NC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)Nc1nc(cs1)c1ccc(cc1)Oc1ccc(cc1)*,{},221.6298798 -535,*OC(=O)c1ccc(cc1)N1ON1c1ccc(cc1)C(=O)OCCOCC*,{},15.34426557 -536,*c1ccc2nc3c(Sc4cc(ccc4N3)c3cc4Sc5c(Nc4cc3)nc3ccc(cc3n5)O*)nc2c1,{},383.4 -537,*c1cc2Sc3c(=Nc2cc1)[nH]c1ccc(cc1n3)c1ccc2[nH]c3=Nc4c(Sc3nc2c1)cc(cc4)*,{},418.69 -538,*c1nc2c([nH]1)ccc(c2)c1ccc2c(nc([nH]2)c2oc(cc2)*)c1,{},419.98 -539,*=C1OC(=c2cc3ccc4=CC(=*)C=c5ccc(c2)c3c45)c2c3c4c1ccc1cccc(c41)c1c3c(ccc1)cc2,{<=c1cc2ccc3cc(cc4ccc(c1)c2c34)=c4oc(c5ccc6cccc7c8cccc9ccc4c(c98)c5c67)=>},432.43 -540,*c1cc2c3n(c4ccc(cc4n3)Oc3ccc4n5c(nc4c3)c3ccc(cc3C5=O)C(*)(C(F)(F)F)C(F)(F)F)C(=O)c2cc1,{},395.15 -541,*c1cc2n3c(=O)c4cc5c(cc4c3nc2cc1)c(=O)n1c2ccc(cc2nc51)O*,{},416.53 -542,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc(cc1)c1c(cc(cc1c1ccc(cc1)c1ccccc1)c1ccc(cc1)c1cc(c(c(c1)c1ccc(cc1)c1ccccc1)c1ccc(cc1)*)c1ccc(cc1)c1ccccc1)c1ccc(cc1)c1ccccc1,{},435 -543,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc2c(nc([nH]2)c2ccc(cc2)*)c1,{},456.35 -544,*N1C(=O)c2cc3C(c4c(Oc3cc2C1=O)cc1C(=O)N(C(=O)c1c4)c1cc(c(cc1)c1c(cc(cc1)*)C(F)(F)F)C(F)(F)F)(C(F)(F)F)C(F)(F)F,{},472.25 -545,*c1cc2nc3c4c5c6c(c3nc2cc1)cccc6c1nc2ccc(cc2nc1c5ccc4)*,{},411.97 -546,*=C1C=c2ccc3cc(=C4c5ccccc5C(=*)c5ccccc45)cc4ccc(=C1)c2c34,{<=c1cc2ccc3cc(cc4ccc(c1)c2c34)=c4c5ccccc5c(c5ccccc45)=>},437.49 -547,*c1n(c(cc1)*)C(C(=O)OC)C,{},279.4452403 -548,*NC(C(=O)NCC(=O)NCC(=O)*)C,{},208.6397491 -549,*c1sc2cc3c(cc2n1)sc(n3)c1c(cc(c(c1)OCCCCCC)*)OCCCCCC,{},168.5263131 -550,*C(*)C(=O)OC(CC)(C)C,{$C(C(=O)OC(C)(C)CC)$},136.5678336 -551,*N(c1ccc(cc1)*)CCCCCCC,{},110.7170963 -552,*Oc1ccc(cc1)OC(=O)c1c(cc(cc1)C(=O)*)c1ccccc1,"{C(=O)c2ccc(cc2c2ccccc2)C(=O)>}",227.700588 -553,*S(=O)(=O)NCCNS(=O)(=O)c1ccc(cc1)c1ccc(cc1)*,{},173.2454244 -554,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)*,{},213.4133554 -555,*c1[nH]c(cc1c1ccccc1)*,{},120.4503456 -556,*c1ccc(cc1)C1C(C(C1C(=O)OCC)*)C(=O)OCC,{},164.3224631 -557,*C(C(*)O)C,{$C(C)C(O)$},113.5665564 -558,*C(C*)C(CC)CC,{$CC(C(CC)CC)$},38.96888215 -559,*OC(=O)CCC(=O)OCCCCCCCCCCCCCCCCCCCC*,{},-32.76938912 -560,*C(C*)C(=O)N(CC)CC,{$CC(C(=O)N(CC)CC)$},56.77009786 -561,*NNC(=O)CCC(=O)NNC(=O)CCCCCCCCC(=O)*,{},64.69850401 -562,*NC(=O)CCC(=O)NCCCCCCCC*,{},69.22130195 -563,*NC(C(C(=O)*)(C)C)c1ccccc1,{},154.3595069 -564,*OC(=O)C/C=C/CC(=O)OCCCCCCCCCCCCCC*,{},-41.10158883 -565,*C(C*)(C(=O)OCC)CO,{$CC(C(=O)OCC)(CO)$},22.36004964 -566,*O[Si](CCCN=C1c2c(ccc(c2C(=NCCC[Si](*)(C)C)c2ccccc12)O)O)(C)C,{},51.3 -567,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)C(c1ccc(cc1)*)(c1ccc(cc1)O)C,{},255.52 -568,*OP(=O)(N=Nc1ccc(cc1)COC(=O)c1cc(cc(c1)C(C)(C)C)C(=O)OCc1ccc(cc1)N=NP(=O)(OCCCCCC*)OC)OC,{},62.39 -569,*c1cc2c(C(=O)N(C2=O)c2c(ccc(c2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)CP(=O)(OCC)OCC)cc1,{},264.06 -570,*Nc1c(cc(cc1)NC(=O)c1ccc(cc1)NC(=O)CCCCCCCCCCC(=O)Nc1ccc(cc1)C(=O)*)C(=O)OCCCCCCCCCCCCCCCC,{},187.43 -571,*C(C*)(C(=O)OCCF)C,{$CC(C(=O)OCCF)(C)$},76.42 -572,*N1C(=O)c2c(C1=O)cc(cc2)Oc1ccc(cc1)Oc1ccc(cc1)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)Sc2ccc(cc2)Oc2ccc(cc2)*)cc1,{},225.33 -573,*C(C*)c1ccc(cc1)C(=O)N(C)C,{$CC(c1ccc(C(=O)N(C)C)cc1)$},146.9 -574,*OC(=O)c1ccc(cc1)C(c1ccc(cc1)C(=O)*)(C)C,{},46.39 -575,*Oc1cc2c(cc1)ccc(c2)Oc1ccc(cc1)C(=O)Nc1cc(ccc1)NC(=O)c1ccc(cc1)*,{},204.47 -576,*SCC(=O)NCCCCCCNC(=O)C*,{},29.78 -577,*c1cc2C(c3c(c2cc1)ccc(c3)c1ccc(cc1)c1c(c(cc(c1)c1ccc(cc1)OCC(CCCC)CC)c1ccc(cc1)*)c1ccc(cc1)OCC(CCCC)CC)(CCCCCC)CCCCCC,{},123.52 -578,*OC(=O)COCC(=O)OCCCC*,{},27.22 -579,*C1(CCN(CC1)C(=O)C(CC(=O)N1CCC(CC1)(CCC*)C)C)C,{},111.94 -580,*c1nc2c(nc1)cc(cc2)Oc1cc2c(nc(cn2)c2ccc(cc2)*)cc1,{},381.02 -581,*C(C*)OC(=O)c1c(cccc1)C,{$CC(OC(=O)c1ccccc1C)$},81.84 -582,*C1(CC(c2c1cc(cc2)*)(C)C)C,{},261.41 -583,*c1cc2c(C(=O)N(C2=O)c2ccc3Cc4c(c3c2)cc(cc4)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)cc1,{},442.63 -584,*c1cc2c(C(=O)N(C2=O)c2c3c(ccc2)c(ccc3)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2nc(nc(n2)NC(=O)*)c2ccccc2)cc1,{},330.59 -585,*Oc1ccc(cc1)OC(=O)c1c(cc(cc1)C(=O)*)Sc1ccc(cc1)Cl,{},146.43 -586,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)CCCCCCCCCCCC*)cc1,{},80.63 -587,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)OCCCCC*,{},89.92 -588,*C1C(=O)N(C(=O)C1C(C*)c1ccccc1)CCCCCCCCCCCC,{},70.24 -589,*OC(=O)Nc1c(ccc(c1)NC(=O)OCCCCCCCC*)C,{},117.25 -590,*C(C*)(C(=O)OCCCCCCOc1ccc(cc1)C(=O)Oc1ccc2c(c1)oc(=O)cc2)C,{$CC(C(=O)OCCCCCCOc1ccc(C(=O)Oc2ccc3ccc(=O)oc3c2)cc1)(C)$},118.96 -591,*Oc1ccc(cc1)CC(NC(=O)Cc1ccc(cc1)OC(=O)CCCCCCC(=O)*)C(=O)OCC,{},66.4 -592,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2cc(ccc2)C(c2cc(ccc2)*)O[Si](O[Si](O[Si](C)(C)C)(C)C)(C)C)cc1,{},196.68 -593,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)C(=O)OCCOCCOCCOC(=O)*)cc1,{},79.22 -594,*Oc1ccc(cc1)C1(c2ccccc2c2ccccc12)c1ccc(cc1)Oc1c(cc(cc1)C(=O)c1ccc(cc1)C(=O)c1cc(c(cc1)*)C(F)(F)F)C(F)(F)F,{},265.8 -595,*O[Si](O[Si](O[Si](O[Si](CC[Si](O[Si](O[Si](O[Si](O[Si](CC[Si](*)(c1ccccc1)c1ccccc1)(C)C)(C)C)(C)C)(C)C)(C)C)(c1ccccc1)c1ccccc1)(c1ccccc1)c1ccccc1)(c1ccccc1)c1ccccc1)(c1ccccc1)c1ccccc1,{},11.62 -596,*N1C(=O)c2c(C1=O)c(ccc2)Oc1c(c(cc(c1)C(C)(C)C)C(C)(C)C)Oc1c2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)*)ccc1,"{N3C(=O)c4cccc(c4C3=O)Oc5c(cc(C(C)(C)C)cc5C(C)(C)C)Oc6cccc7c6C(=O)N(C7=O)>}",301.26 -597,*Oc1ccc(cc1)c1ccc(cc1)Oc1c(c(c(c(c1F)F)COC(c1cc(ccc1)C(OCc1c(c(c(c(c1F)F)*)F)F)(C(F)(F)F)C(F)(F)F)(C(F)(F)F)C(F)(F)F)F)F,{},139.73 -598,*S(=O)(=O)c1ccc(cc1)NC(=O)c1cc(cc(c1)NC(=O)c1ccc(cc1)NC(=O)C(N1C(=O)c2c(C1=O)cccc2)C)C(=O)Nc1ccc(cc1)*,{},272.65 -599,*NC(=O)NCc1ccc(cc1)CNC(=O)NCCCCCCCCCCCCCCCCCC*,{},62.4 -600,*C1CC2CC(CC(C1)O2)OC(=O)O*,{},196.52 -601,*C(C(*)C(=O)OC(C)(C)C)C(=O)OC,{$C(C(=O)OC)C(C(=O)OC(C)(C)C)$},148.77 -602,*Oc1ccc(cc1)c1ccc(cc1)C(=O)OCC(COC(=O)c1ccc(cc1)c1ccc(cc1)OC(CC*)C)C,{},116.96 -603,*N1C(=O)c2c(C1=O)cc(cc2)c1cc2c(C(=O)N(C2=O)c2c(cc(cc2C)C(c2cc(c(c(c2)C)*)C)c2c3c(ccc2)cccc3)C)cc1,{},369 -604,*c1cc2c(nc(c(n2)c2ccccc2)c2ccc(cc2)c2c(nc3c(n2)cc(cc3)C(=O)*)c2ccccc2)cc1,{},326.27 -605,*C(C*)(C(=O)OCC)F,{$CC(C(=O)OCC)(F)$},124.69 -606,*NC(=O)CCCCC(=O)NCC(CC(CC*)(C)C)C,{},54.82 -607,*c1c2C(=O)N(C(=O)c2c(c2ccccc12)c1ccc(cc1)Oc1ccc(cc1)C(=O)c1cc(ccc1)C(=O)c1ccc(cc1)Oc1ccc(cc1)*)CCCCCCCCCCCC,{},161.91 -608,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=S)*)C,{},101.06 -609,*C(C*)OCC(CC)(C)C,{$CC(OCC(C)(C)CC)$},12.2 -610,*c1sc(cc1)C(=O)Oc1ccc(cc1)[Si](c1ccc(cc1)OC(=O)*)(CC)CC,{},-36.93 -611,*Oc1c(cc(cc1C)*)C(CCCCCCCCCCCC)C,{},44.22 -612,*Oc1cc(ccc1)C(C(C(c1cc(ccc1)OC(=O)c1cc(ccc1)C(C(C(c1cc(ccc1)C(=O)*)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F,{},105.91 -613,*C(C*)C(=O)Oc1ccc(cc1)C(=O)Oc1ccc(cc1)OC(=O)c1ccc(cc1)OCCCC,{$CC(C(=O)Oc1ccc(C(=O)Oc2ccc(OC(=O)c3ccc(OCCCC)cc3)cc2)cc1)$},42.05 -614,*C(C*)c1c(cccc1)C(=O)OCCC,{$CC(c1ccccc1C(=O)OCCC)$},135.21 -615,*N=P(*)(OCc1ccc(cc1)c1ccccc1)OCc1ccc(cc1)c1ccccc1,{},51.23 -616,*c1oc(nn1)c1ccc(cc1)C(=O)OCCCCCCOc1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)OCCCCCCOC(=O)c2ccc(cc2)*)CCC1,{},90.11 -617,*OC(=O)NCCCCCCNC(=O)OCCCCCCCCCCCC*,{},35.91 -618,*c1oc(nc1)c1cc(c(cc1)Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1c(cc(cc1)*)C(F)(F)F)C(F)(F)F,{},267.06 -619,*c1nc(nc(n1)c1ccc(cc1)Oc1ccc(cc1)C(c1ccc(cc1)Oc1ccc(cc1)*)(C(F)(F)F)C(F)(F)F)c1ccccc1,{},303.48 -620,*Oc1c(c(c(c(c1C)C)Oc1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)*)C)C,{},337.62 -621,*N1C(=O)c2c(C1=O)c(ccc2)c1cc2c(C(=O)N(C2=O)c2cc(ccc2)*)cc1,"{N2C(=O)c3ccc(cc3C2=O)c4cccc5c4C(=O)N(C5=O)>}",348.28 -622,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2cc(c(c(c2)Br)Oc2c(cc(cc2Br)*)Br)Br)cc1,{},302.26 -623,*N1C(=O)C2C3C4C(C(C2C1=O)C=C3)C(=O)N(C4=O)c1ccc(cc1)Sc1cc2c(C(=O)N(C2=O)c2ccc(cc2)CC)cc1Sc1ccc(cc1)*,{},307.1 -624,*C1C(C(C(C1)C=C*)(F)F)(C(F)(F)F)C(F)(F)F,{$C=CC1CC(C(C(F)(F)F)(C(F)(F)F)C1(F)F)$},163.71 -625,*OC(CCC(OC(=O)CCCCCCCCC(=O)*)C)C,"{C(=O)CCCCCCCCC(=O)>}",-18.21 -626,*c1ccc2c(nc([nH]2)c2cc(ccc2)c2nc3c([nH]2)ccc(c3)C(=O)Nc2cc(cc(c2)c2nc3c([nH]2)cccc3)NC(=O)*)c1,{},348.55 -627,*C(C*)C(=O)c1ccc(cc1)CC,{$CC(C(=O)c1ccc(CC)cc1)$},68.42 -628,*C1C(=O)N(C(=O)C1*)CCOc1ccc(cc1)c1ccc(cc1)C#N,{},158.36 -629,*C1COC2C1OCC2OC(=O)CCC(=O)O*,{},86.76 -630,*OC(=O)c1ccc(cc1)C(=O)OCCCCOC(=O)CCCCC(=O)OCCCC*,{},33.64 -631,*C(C*)c1ccc(cc1)COCC(CCCC)CC,{$CC(c1ccc(COCC(CC)CCCC)cc1)$},13.66 -632,*C(C*)(C(=O)Oc1cc(c(cc1)C(=O)c1ccccc1)O)C,{$CC(C(=O)Oc1ccc(C(=O)c2ccccc2)c(O)c1)(C)$},186.45 -633,*C(C*)C(=O)OCCOC(C(F)F)(F)F,{$CC(C(=O)OCCOC(F)(F)C(F)F)$},11.16 -634,*OC(=O)OCC(C(C*)O)O,{},98.39 -635,*Oc1ccc(cc1)NC(=O)c1cc(cc(c1)C(C)(C)C)C(=O)Nc1ccc(cc1)OCCCCCC*,{},158.08 -636,*N(c1ccc(cc1)C(c1ccc(cc1)*)c1ccccc1)c1ccc(cc1)C,{},263.16 -637,*Nc1ccc(cc1)C(c1ccc(cc1)NC(=O)c1cc(cc(c1)N1C(=O)C2C(C1=O)CC=CC2)C(=O)*)(C)C,{},358.53 -638,*C(C*)(CC(=O)OCCCc1ccccc1)C(=O)OCCCc1ccccc1,{$CC(C(=O)OCCCc1ccccc1)(CC(=O)OCCCc1ccccc1)$},29.9 -639,*C=CCCCC(CCC*)Cl,{$C=CCCCC(Cl)CCC$},-9.32 -640,*C1C(=O)N(C(=O)C1C(C*)(C)C)c1c(cccc1)C,{},286.47 -641,*C1(c2c(C(=O)O1)cccc2)c1ccc(cc1)Oc1c(c(c(c(c1F)F)C(=O)c1c(c(c(c(c1F)F)Oc1ccc(cc1)*)F)F)F)F,{},276.34 -642,*Nc1cc(ccc1)NC(=O)CCCCCCC(=O)*,{},128.17 -643,*C(C*)(C(=O)OCCOc1ccc(cc1)N=Nc1ccc(cc1)C#N)C,{$CC(C(=O)OCCOc1ccc(N=Nc2ccc(C#N)cc2)cc1)(C)$},141.68 -644,*N1C(=O)c2c(C1=O)cc(cc2)Oc1ccc(cc1)C1(CCC(CC1)c1ccccc1)c1ccc(cc1)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)Cc2ccc(cc2)*)cc1,{},249.05 -645,*Oc1ccc(cc1)Oc1ccc(cc1)C(=O)c1c(c(c(c(c1c1ccc(cc1)F)c1ccc(cc1)F)c1ccc(cc1)F)c1ccc(cc1)F)C(=O)c1ccc(cc1)*,{},252.56 -646,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)NC(=O)Nc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2ccc(cc2)c2ccc(cc2)NC(=O)*)cc1,{},253.71 -647,*OC(C*)CCl,"{,}",2.36 -648,*NC(=O)c1cc(cc(c1)C(C)(C)C)C(=O)NCC(CCC(C*)C)C,{},157.37 -649,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)C(=O)c2cc(ccc2)C(=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)cc1,{},240.35 -650,*Oc1c(cc(cc1OC)C=Cc1ccc(cc1)C=Cc1cc(c(c(c1)OC)OCCCCCCCC*)OC)OC,{},65.39 -651,*Nc1c2c(ccc1)c(ccc2)NC(=O)c1cc(ccc1)C(=O)*,{},339.66 -652,*OS(=O)(=O)c1ccc(cc1)*,{},140.64 -653,*c1ccc2ccc3c(c2n1)nc(cc3)c1ccc(cc1)c1c(cc(c(c1)CCCCCC)c1ccc(cc1)*)CCCCCC,{},160.44 -654,*C(C*)(C(=O)OCCCCCCCCCCn1c2ccc(cc2c2ccccc12)N=Nc1ccc(cc1)[N+](=O)[O-])C,{$CC(C(=O)OCCCCCCCCCCn1c2ccccc2c2cc(N=Nc3ccc([N+](=O)[O-])cc3)ccc21)(C)$},60.81 -655,*N=Nc1ccc(cc1)NC(=O)CCC(=O)Nc1ccc(cc1)*,{},208.85 -656,*C(C*)c1cc(ccc1)Cl,{$CC(c1cccc(Cl)c1)$},122.57 -657,*Oc1ccc(cc1)C1(c2cc(ccc2c2ccc(cc12)[N+](=O)[O-])OC)c1ccc(cc1)OC(=O)CCCC(=O)*,{},195.71 -658,*c1cc2c(C(=O)N(C2=O)c2c(cc(c(c2C)C(=O)c2cc(ccc2)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)C)C)cc1,{},280.09 -659,*Oc1ccc(cc1)Oc1ccc(cc1)C(=O)c1cc(ccc1)NC(=O)c1cc(ccc1)C(=O)Nc1cc(ccc1)C(=O)c1ccc(cc1)*,{},222.22 -660,*Oc1ccc(cc1)CC(NC(=O)CCc1ccc(cc1)OC(=O)CCCC(=O)*)C(=O)OCCOCCOCC,{},42.34 -661,*OP(=O)(OCCCCCCCCCCOc1ccc(cc1)C=Cc1ccc(cc1)OCCCCCCCCCC*)OCCCCCCCCCCOc1ccc(cc1)N=Nc1ccc(cc1)C#N,{},51.61 +# Data from https://doi.org/10.1038/s41597-024-03212-4 +,SMILES,BigSMILES,Tg (C) +0,*C1COC2C1OCC2Oc1ccc(cc1)CNC(=O)CCCCCCC(=O)NCc1ccc(cc1)O*,{},21.58173134 +1,*OC(CCC(OC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)*)C)C,{},63.5893379 +2,*OC(=O)c1ccc(cc1)C(=O)OCCCC(=O)NCc1ccc(cc1)CNC(=O)CCC*,{},53.55726117 +3,*OC(=O)NCCNC(=O)OCC*,{},5.896093021 +4,*SCCCCC*,{},-55.37860961 +5,*Oc1ccc(cc1)C(=O)OC(=O)c1ccc(cc1)OCCCCCC*,{},64.73496741 +6,*c1[nH]c(cc1CC(=O)OCCCCCCCC)*,{},-4.076963699 +7,*C(C*)(CC(=O)OCCCCCCCCCC)C(=O)OCCCCCCCCCC,{$CC(C(=O)OCCCCCCCCCC)(CC(=O)OCCCCCCCCCC)$},75.04044311 +8,*OCC1C(C1)C*,{},-28.98581149 +9,*N(C(=O)CCCCCCCCCCCCCCCCC(=O)N(CCCCCC*)C)C,{},49.34222836 +10,*O[Si](*)(CCC(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C,{},-68.87734458 +11,*S(=O)(=O)c1ccc(cc1)C(=O)CNc1ccc(cc1)NCC(=O)c1ccc(cc1)*,{},198.9026743 +12,*S(=O)(=O)c1ccc(cc1)C(=O)NCCNC(=O)c1ccc(cc1)*,{},205.0712987 +13,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)C(c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)CCCCC)cc1,{},204.7640603 +14,*NC(=O)C(=O)NCCCCCCCCCCNC(=O)CCCCCCCCC(=O)NCCCCCCCCCC*,{},82.2677155 +15,*OC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCCCCCCCCCCCC*,{},17.27783213 +16,*Oc1ccc(cc1)C(=O)Oc1ccc(cc1)Cc1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*,{},88.16409459 +17,*Oc1ccc(cc1)SSCCCCSSc1ccc(cc1)*,{},102.260186 +18,*Oc1ccc(cc1)C(=O)Nc1c(cc(cc1)c1cc(c(cc1)NC(=O)c1ccc(cc1)OCCCCCC*)Cl)Cl,{},133.9866306 +19,*c1c(cc(c(c1)C=Cc1ccc(cc1)OCCC(CCCC(C)C)C)C=C*)C=Cc1ccc(cc1)OCCC(CCCC(C)C)C,{$C=Cc1cc(C=Cc2ccc(OCCC(C)CCCC(C)C)cc2)c(cc1C=Cc1ccc(OCCC(C)CCCC(C)C)cc1)$},2.868925186 +20,*c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)C(=O)CCCCC(=O)*)CC1,{},197.4539358 +21,*SC(=O)CCCCC(=O)SCc1ccc(cc1)c1ccc(cc1)C*,{},-14.32353196 +22,*C(C*)(C(=O)OCCNC(=O)N(CC(C(C(OC1OC(C(C(C1O)O)O)CO)C(CO)O)O)O)CCCCCC)C,{$CC(C(=O)OCCNC(=O)N(CCCCCC)CC(O)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO)(C)$},23.2402228 +23,*Oc1ccc(cc1)OC(=O)c1cc(cc(c1)NC(=O)c1ccc(cc1)NC(=O)C(N1C(=O)c2c(C1=O)cccc2)C)C(=O)*,{},58.7319127 +24,*OC(=O)Nc1ccc(cc1)C(=O)OCC1CCC(CC1)C*,{},148.8519878 +25,*Nc1c(cccc1)NC(=O)c1ccc(cc1)C(=O)*,{},231.8080905 +26,*C(C*)(C(=O)OCCCCCCOc1ccc(cc1)C(=O)Oc1ccc(cc1)OCCCCCC)C,{$CC(C(=O)OCCCCCCOc1ccc(C(=O)Oc2ccc(OCCCCCC)cc2)cc1)(C)$},14.67015775 +27,*Oc1ccc(cc1)S(=O)(=O)c1c2c(ccc1)c(ccc2)S(=O)(=O)c1ccc(cc1)Oc1c(cc(c(c1C)C)c1c(c(c(c(c1)C)*)C)C)C,{},300.6954413 +28,*OC(=O)NC1CCC(CC1)CC1CCC(CC1)NC(=O)OCC(C*)(C)C,{},15.9792949 +29,*OC(=O)NCCCCCC(=O)OCCCC*,{},47.28425166 +30,*N(c1c(cc(cc1)Cc1cc(c(cc1)N(C(=O)c1ccc(cc1)C(=O)*)CCC)C)C)CCC,{},179.9023847 +31,*Nc1ccc(cc1)NC(=O)c1cc(cc(c1)NC(=O)C(CCSC)N1C(=O)c2c(C1=O)cccc2)C(=O)*,{},230.3191562 +32,*N(c1c(cc(cc1)Cc1cc(c(cc1)N(C(=O)c1ccc(cc1)C(=O)*)C)C)C)C,{},167.9642319 +33,*OC(=O)CCCCCCC(=O)OCCc1ccc(cc1)N1ON1c1ccc(cc1)CC*,{},17.91289325 +34,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*)(C(F)(F)F)C(F)(F)F,"{C(=O)c3ccc(cc3)C(=O)>}",150.9011854 +35,*c1ccc2[nH]c3c(c2c1)cc(cc3)*,{},246.5898577 +36,*C(C*)C(=O)NCCCC,{$CC(C(=O)NCCCC)$},-84.72234668 +37,*C(=C*)CCCCCCCCCCOc1ccc(cc1)c1ccc(cc1)OCC(CCCCCC)F,{$C=C(CCCCCCCCCCOc1ccc(c2ccc(OCC(F)CCCCCC)cc2)cc1)$},27.34580481 +38,*NC(=O)NCCCCNC(=O)NCCCCCC*,{},60.10434296 +39,*Oc1ccc(cc1)C(=Cc1ccc(cc1)OC(=O)OCCCCCOC(=O)*)C,{},39.38257392 +40,*N1CCN(CC1)C(=O)CCN1CCN(CC1)CCC(=O)*,{},40.58060093 +41,*c1c(cc(cc1)C#C*)SCCCCCCCCCCCC,{$C#Cc1ccc(c(SCCCCCCCCCCCC)c1)$},53.41644401 +42,*OC(=O)Nc1cc(ccc1)C(=O)OCCCCCCCCCC*,{},45.09440244 +43,*Oc1c(cc(cc1)C(=O)*)CCCCCC,{},85.05865655 +44,*Nc1ccc(cc1)NC(=O)C=CC(=O)*,{},115.1255691 +45,*C(C*)(C(=O)OC(Oc1ccccc1)C)C,{$CC(C(=O)OC(C)Oc1ccccc1)(C)$},31.196908 +46,*C(*)C,{$C(C)$},122.3867673 +47,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2c(c3c(C(CC3)(C)C)c(c2C)*)C)cc1,{},325.2939228 +48,*Nc1ccc(cc1)C(=O)*,{},92.05287264 +49,*C(=C*)C,{$C=C(C)$},4.023221393 +50,*c1nc(nc(n1)Oc1c2c(ccc1C(=O)Oc1ccc(cc1)OC(=O)c1c(c3c(cc1)cccc3)O*)cccc2)N1CCCCC1,{},288.7077709 +51,*OS(=O)(=O)c1ccc(cc1)Sc1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)C1(CCCCC1)c1ccc(cc1)*,{},157.8104834 +52,*c1c(c(c(c(c1Cl)Cl)CC*)Cl)Cl,{$CCc1c(Cl)c(Cl)c(c(Cl)c1Cl)$},56.1987128 +53,*OC(=O)c1ccc(cc1)C(=O)OCCCC(=O)NCCCCCCNC(=O)CCC*,{},60.02078862 +54,*c1c(c(cc(c1)*)C=Nc1ccc(cc1)F)O,{$c1cc(c(O)c(C=Nc2ccc(F)cc2)c1)$},-121.5212841 +55,*N=C1C=CC(=NC2=CC(=O)C(=CC2=O)*)C=C1,{},212.9967182 +56,*OC(=O)NCCSCCCCCSCCNC(=O)OCC1CCC(CC1)C*,{},77.84678315 +57,*OS(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)C1(CCCCC1)c1ccc(cc1)*,{},149.9071313 +58,*c1nc(nc(n1)Oc1cc2c(cc1)cc(cc2)C(=O)Oc1c(cccc1)OC(=O)c1cc2c(cc1)cc(cc2)O*)N1CCN(CC1)C,{},54.90728348 +59,*OC(=O)CC(=O)OCC*,{},-90.52916041 +60,*OC(=O)CCCCCCCC(=O)OCC(C*)(C)C,{},17.01342593 +61,*OC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)OCCN(CC*)CCCC(=O)Nc1ccc(cc1)N=Nc1ccccc1,{},-32.25789051 +62,*[Si](*)(CCCCCC)CCCCCC,{$[Si](CCCCCC)(CCCCCC)$},-11.37918107 +63,*c1n(c(cc1)C=C*)CCCCCC,{},-15.03969582 +64,*NC1CC(CC(C1)(CNC(=O)c1cc(cc(c1)NC(=O)c1ccc(cc1)NC(=O)C(CC(C)C)N1C(=O)c2c(C1=O)cccc2)C(=O)*)C)(C)C,{},195.2683571 +65,*Nc1cc(cc(c1)C(=O)Nc1ccccc1)NC(=O)c1cc(ccc1)C(=O)*,{},233.599405 +66,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Oc2ccc(cc2)OC(=O)*)cc1,{},-71.68107141 +67,*OC(=O)CCCCCC(=O)OCCC*,{},-71.28231613 +68,*N1C(=O)C(CC1=O)SCCOCCSC1C(=O)N(C(=O)C1)c1ccc(cc1)C(=O)OCCCCCCOC(=O)c1ccc(cc1)*,{},19.42674542 +69,*NC(=O)CCCCCCCCC(=O)NCCCP(CCC*)c1ccccc1,{},56.20366062 +70,*c1nc2c([nH]1)cc(cc2)S(=O)(=O)c1cc2c(nc([nH]2)c2ccc(cc2)Oc2ccc(cc2)*)cc1,{},315.1120806 +71,*C(C*)(C(=O)OCCCCCCOc1ccc(cc1)C=Nc1ccc(cc1)CCCC)C,{$CC(C(=O)OCCCCCCOc1ccc(C=Nc2ccc(CCCC)cc2)cc1)(C)$},12.41121342 +72,*Oc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1cc(cc(c1)N1C(=O)c2c(C1=O)c(c(c(c2Cl)Cl)Cl)Cl)C(=O)Nc1ccc(cc1)*,{},315.5911196 +73,*c1ccc(cc1)/C=C/c1c(cc(c(c1)CCCCCC)/C=C/*)CCCCCC,{},51.04706134 +74,*Oc1ccc(cc1)OC(=O)*,"{C(=O)>}",105.0499992 +75,*C1(c2c(C(=O)O1)cccc2)c1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)*,{},292.5925873 +76,*c1c(cc(c(c1)OCCOCCOCCOCCC(=O)O[Na])C#Cc1c(cc(c(c1)OC(COCCOCCOCCOC)COCCOCCOCCOC)C#C*)OC(COCCOCCOCCOC)COCCOCCOCCOC)OCCOCCOCCOCCC(=O)O[Na],{},-43.36051195 +77,*N(CC*)C(=O)CCCCCCCC,{},-49.55374382 +78,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)c2ccc(cc2)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)cc1,{},232.2309602 +79,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CCCCCC(=O)Oc1cc2c(cc1)cc(cc2)OC(=O)CCCCC*,{},-10.30414379 +80,*NC(=O)CCP(CCC(=O)NCCCCCC*)c1ccccc1,{},9.575432391 +81,*c1cc2C(c3c(c2cc1)ccc(c3)c1cc2C(c3c(c2cc1)ccc(c3)*)(CCCCCCCCCCCC)CCCCCCCCCCCC)(CCCCCCCC)CCCCCCCC,{},37.51200969 +82,*C1OC(=O)C(C1)CCC1C(=O)OC(C1)CCCCCCCCCC*,{},84.1573412 +83,*C(C*)C(=O)OC(CC(C)C)C,{$CC(C(=O)OC(C)CC(C)C)$},11.47675289 +84,*OC(C*)COc1ccc(cc1)C,{},-55.31696784 +85,*N(C(=O)CCCCCCCCCCCCCCC(=O)N(Cc1ccc(cc1)C*)C)C,{},-14.32362541 +86,*C1OC(OC(C1)*)O,{},136.0523749 +87,*c1c(cc(c(c1)OCCCCCCCCCC)c1ccc(cc1)*)OCCCCCCCCCC,{},61.07976291 +88,*c1c2c(nccc2)c(cc1)OCCOc1c2ncccc2c(cc1)C*,{},74.04018308 +89,*NC(CC(=O)*)C(=O)OCCCCCC,{},0.713924343 +90,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)*)C,{},81.15934241 +91,*n1c(=O)c2ccc3c(=O)n(c(=O)c4ccc(c1=O)c2c34)CCCCCCCCCCCC*,{},101.4116865 +92,*Oc1c(cc(cc1)C=C1C(=O)C(=Cc2cc(c(cc2)OC(=O)c2ccc(cc2)C(=O)*)OC)CCC1)OC,{},105.2246285 +93,*Nc1ccc(cc1)NC(=O)C1C(C(=CC(C1)C(C(=O)*)CC(=O)O)C)C(=O)O,{},149.7687276 +94,*Oc1ccc(cc1)OC(=O)c1c(cccc1)C=Cc1ccc(cc1)C=Cc1c(cccc1)C(=O)*,{},86.85824281 +95,*OC(=O)c1ccc(cc1)NC(=O)CCCCCCCCCCC(=O)Nc1ccc(cc1)C(=O)OCC*,{},14.08686284 +96,*OC(=O)N(c1ccc(cc1)N(C(=O)OCC(C*)CCCCCCOc1ccc(cc1)c1ccc(cc1)C#N)C)C,{},78.11061669 +97,*C1C(C(C(C1)C=C*)C(=O)OCCCCCOc1ccc(cc1)c1ccc(cc1)C#N)C(=O)OCCCCCOc1ccc(cc1)c1ccc(cc1)C#N,{},27.68661296 +98,*C#CC(=C(*)CCCCOC(=O)NCCCCCC)CCCCOC(=O)NCCCCCC,{$C#CC(CCCCOC(=O)NCCCCCC)=C(CCCCOC(=O)NCCCCCC)$},5.219913288 +99,*c1ccc(cc1)/C=C/*,{},43.3930774 +100,*c1nc2c([nH]1)ccc(c2)c1ccc2c(nc([nH]2)c2ccc3S(=O)(=O)c4c(c3c2)cc(cc4)*)c1,{},355.5073659 +101,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc(cc1)C(c1ccc(cc1)*)C,{},307.2645731 +102,*Oc1c(cc(c(c1)OC(=O)c1ccc(cc1)C=Nc1cc(c(cc1)OCCCCCCOc1c(cc(cc1)N=Cc1ccc(cc1)C(=O)*)Cl)Cl)Cl)Cl,{},82.06188226 +103,*c1ccc2n(c3c(c2c1)cc(cc3)/C=C/c1ccc(cc1)Oc1c(cc(cc1)c1ccc(cc1)c1ccc(cc1)c1cc(c(cc1)Oc1ccc(cc1)/C=C/*)C(F)(F)F)C(F)(F)F)CC(CCCC)CC,{},187.4626146 +104,*Oc1ccc(cc1)Oc1ccc(cc1)C(=Nc1ccc(cc1)Oc1ccc(cc1)N=C(c1ccc(cc1)*)C)C,{},117.5082044 +105,*Oc1ccc(cc1)NC(=O)NCCCCCCNC(=O)*,{},122.4088725 +106,*C(C*)OC(=O)CC(=O)C,{$CC(OC(=O)CC(C)=O)$},9.819405608 +107,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)OCCOc1ccc(cc1)C(=O)*)C,{},68.01369706 +108,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)*,{},176.5386789 +109,*N1C(=S)SC(=CC=C2SC(=S)N(C2=O)CC*)C1=O,{},164.0140939 +110,*C1NC(=O)C(NC1=O)CCC(=O)OCCCC=CCCCOC(=O)CC*,{},25.37727633 +111,*Oc1ccc(cc1)N=Cc1cc(c(cc1)OC(=O)CCCCC(=O)Oc1c(cc(cc1)C=Nc1ccc(cc1)*)OC)OC,{},47.38250675 +112,*c1n(c(nn1)c1ccc(cc1)c1ccc(cc1)c1n(c(nn1)COc1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)OC*)CC1)c1ccccc1)c1ccccc1,{},140.326419 +113,*N1CCN(CC1)C(=O)C1C(C1)C(=O)*,{},100.5840527 +114,*C(=C*)c1c(sc(c1)C(F)(F)F)C(F)(F)F,{$C=C(c1cc(C(F)(F)F)sc1C(F)(F)F)$},54.57900551 +115,*N1C(=O)c2c(C1=O)cc(cc2)c1cc2c(C(=O)N(C2=O)c2ccc(cc2)c2ccc(cc2)*)cc1,{},421.9822435 +116,*c1cc2c(C(=O)N(C2=O)c2c(c(c(c(c2F)F)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)F)F)cc1,{},388.0137456 +117,*C#CC(=C(*)OS(=O)(=O)c1ccc(cc1)C)OS(=O)(=O)c1ccc(cc1)C,{$C#CC(OS(=O)(=O)c1ccc(C)cc1)=C(OS(=O)(=O)c1ccc(C)cc1)$},164.1018919 +118,*OC(=O)CCC(=O)OCCOCCOCC*,{},14.21396213 +119,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)c1cc(ccc1)C(=O)*)(CC)C,"{C(=O)c3cccc(c3)C(=O)>}",194.1629808 +120,*n1c(=O)c2c(c3c(cc2c1=O)c(=O)n(c3=O)CCCCCCCCCC*)Br,{},137.1087261 +121,*C(C*)(C(=O)OCCCCCCCCOC(=O)OC1CC2=CCC3C(CCC4(C(CCC34)C(CCCC(C)C)C)C)C2(CC1)C)C,{$CC(C(=O)OCCCCCCCCOC(=O)OC1CCC2(C)C(=CCC3C2CCC2(C)C(C(C)CCCC(C)C)CCC32)C1)(C)$},47.48888608 +122,*OC(=O)CCCCCCCC(=O)OCCCCCCCCC*,{},3.667443154 +123,*Oc1ccc(cc1)c1c(cc(c(c1)OCCCCCC)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*)OCCCCCC,{},81.55306128 +124,*c1c2c(nccc2)c(cc1)CC*,{},70.93028064 +125,*c1n(c(cc1)CCC(=O)OC(=O)CC*)C,{},78.11959859 +126,*N1C(=O)C2OC3C(C2C1=O)C(=O)N(C3=O)c1ccc(cc1)c1c(nc2c(n1)cc(cc2)S(=O)(=O)c1cc2c(nc(c(n2)c2ccc(cc2)*)c2ccccc2)cc1)c1ccccc1,{},373.476202 +127,*c1oc(cc1)Sc1oc(cc1)C=C1C(=O)C(=C*)CC1,{},102.2883172 +128,*Oc1c(c(ccc1)Oc1ccc(cc1)Nc1ccc(cc1)Nc1ccc(cc1)Nc1ccc(cc1)Nc1ccc(cc1)*)C(=O)Nc1ccc(cc1)N=Nc1ccccc1,{},238.4094841 +129,*=C=C=C(C(=*)CO)CO,{$=C=C=C(CO)C(CO)=$},42.01413885 +130,*c1oc(cc1)Sc1oc(cc1)C=NCCN=C*,{},95.6213855 +131,*NC(=O)/C=C/CC/C=C/C(=O)NCCCCCC*,{},2.127453311 +132,*Nc1c(cc(c(c1)SCCC#N)NC(=O)c1ccc(cc1)C(=O)*)SCCC#N,{},214.7483216 +133,*c1c(cc(cc1)CC*)C(=O)C,{$CCc1ccc(c(C(C)=O)c1)$},8.997925789 +134,*Oc1ccc(cc1)C(=O)c1ccc(cc1)Oc1ccc(cc1)C(=O)c1ccc(cc1)C(=O)c1ccc(cc1)*,{},138.8344532 +135,*c1sc(nn1)c1cc(ccc1)OCCCCCCCCCCCOC(=O)CCCCC(=O)OCCCCCCCCCCCOc1cc(ccc1)*,{},35.51374296 +136,*Nc1c(cc(c(c1)OC)*)OC,{},168.2573371 +137,*SCSCCCC*,{},1.410869343 +138,*c1nc(sc1)NC(=O)NCCCCCCNC(=O)Nc1nc(cs1)c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)*)CCC1,{},47.02441211 +139,*N(CC*)C(=O)c1ccc(cc1)C,{},87.3045689 +140,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CCCCCC(=O)Oc1ccc(cc1)c1ccc(cc1)OC(=O)CCCCC*,{},60.41501019 +141,*Oc1c(c(c(c(c1[2H])[2H])C(c1c(c(c(c(c1[2H])[2H])OC(=O)*)[2H])[2H])(C([2H])([2H])[2H])C([2H])([2H])[2H])[2H])[2H],{},230.9913822 +142,*Oc1ccc(cc1)Oc1ccc(cc1)Oc1ccc(cc1)c1ccc(cc1)Oc1ccc(cc1)Oc1ccc(cc1)Oc1c(cc(cc1)C(=O)c1cc(c(cc1)*)S(=O)(=O)O)S(=O)(=O)O,{},151.7193386 +143,*c1c2c(nsn2)c(cc1)c1sc(c(c1)CCCCCC)C=Cc1sc(c(c1CCCCCC)CCCCCC)C=Cc1sc(cc1CCCCCC)*,{},17.9542177 +144,*N1C(=O)c2c(C1=O)c(ccc2)c1cc2c(C(=O)N(C2=O)c2ccc(cc2)C(=O)Nc2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)NC(=O)c2ccc(cc2)*)cc1,{},178.6163373 +145,*c1cc2n(c3c(c2cc1CCCCCC)cc(c(c3)C=C*)CCCCCC)CCCCCCCC,{},45.45691415 +146,*C#CC(=C(*)c1cncnc1)CCCCOC(=O)NCC(=O)OCCCC,{$C#CC(CCCCOC(=O)NCC(=O)OCCCC)=C(c1cncnc1)$},73.99227071 +147,*OC(=O)c1ccc(cc1)C(=O)NCCCNC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCC*,{},76.72929425 +148,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1c(cc(cc1)C(c1cc(c(cc1)*)[N-][N+]#N)(C)C)[N-][N+]#N,{},181.1592195 +149,*c1sc(cc1CCCCCCCC)c1sc(cc1CCCCCC(C(C(C(F)(F)F)(F)F)(F)F)(F)F)*,{},6.476080227 +150,*c1c2C(=O)N(C(=O)c2c(c2ccccc12)c1ccc(cc1)Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)*)c1c(cccc1)F,{},314.5599695 +151,*NC(=O)CCCCCCCCCCC(=O)NCC1CC(CCC1)C*,{},26.18556129 +152,*Nc1ccc(cc1)NC(=O)c1cc(cc(c1)C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)C(=O)*,{},235.6895845 +153,*c1c(cc(c(c1)O)O)*,{$c1cc(O)c(O)cc1$},158.9171015 +154,*Oc1ccc(cc1)C(=Cc1ccc(cc1)OCCCCCCC*)C,{},36.76976951 +155,*C(=C(*)C)[Si](CC)(C)C,{$C(C)=C([Si](C)(C)CC)$},157.1009863 +156,*OC(=O)CCCCCCCC(=O)*,{},-68.69432556 +157,*OC(=O)c1ccc(cc1)NC(=O)CCCCCCCCCCC(=O)Nc1ccc(cc1)C(=O)OCCCCCCCCCCCC*,{},-26.12929226 +158,*c1c(cc(c(c1)OCCCCCCCCCC)*)OCCCCCCCCCC,{},5.423112055 +159,*C(C(=O)*)(C)C,{$C(=O)C(C)(C)$},105.8292511 +160,*C#CC(=C(*)Cn1c2ccc(cc2c2cc(ccc12)CCCCCCCCCCCCCCCC)CCCCCCCCCCCCCCCC)Cn1c2ccc(cc2c2cc(ccc12)CCCCCCCCCCCCCCCC)CCCCCCCCCCCCCCCC,{$C#CC(Cn1c2ccc(CCCCCCCCCCCCCCCC)cc2c2cc(CCCCCCCCCCCCCCCC)ccc21)=C(Cn1c2ccc(CCCCCCCCCCCCCCCC)cc2c2cc(CCCCCCCCCCCCCCCC)ccc21)$},-2.237818484 +161,*C(C*)CCCC(C)C,{$CC(CCCC(C)C)$},5.980301023 +162,*N1C(=O)C(CC1=O)Oc1ccc(cc1)N=Cc1ccc(cc1)OC1C(=O)N(C(=O)C1)c1ccc(cc1)Oc1ccc(cc1)*,{},154.6342574 +163,*c1nc(nc(n1)Oc1c(cccc1)C(=O)Oc1ccc(cc1)OC(=O)c1c(cccc1)O*)N(CC)c1ccccc1,{},146.7365026 +164,*c1nc2c(n1CCCS(=O)(=O)O)ccc(c2)c1ccc2c(nc(n2CCCS(=O)(=O)O)c2cc(ccc2)*)c1,{},208.6363648 +165,*OC(=O)C1C(=O)CC(C(=O)C1)C(=O)OCCCCCC*,{},-36.66445232 +166,*Oc1ccc(cc1)C(c1ccc(cc1)Oc1ccc(cc1)C(=O)c1ccc(cc1)*)c1c(cccc1)C(=O)O[Na],{},169.1341304 +167,*N(C(=O)CCCCCCCCCCCCCCCCC(=O)N(Cc1ccc(cc1)C*)CC)CC,{},4.279694171 +168,*c1nc(nc(n1)Oc1ccc(cc1)C(c1ccc(cc1)O*)(C)C)OC,{},121.2094568 +169,*OC(=O)NCc1ccc(cc1)CNC(=O)OCCCCCCCC*,{},-25.31417235 +170,*NC(=O)CCCCCCCC(=O)NCCCCCCCCC*,{},9.904824538 +171,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2cc(ccc2)Oc2ccc(cc2)S(=O)(=O)c2ccc(cc2)Oc2cc(ccc2)*)cc1,{},199.6619548 +172,*c1n(c(cc1)*)CC,{},112.6054346 +173,*OC(=O)CCCCS(=O)(=O)CCCCC(=O)OCCCCCCCCCC*,{},37.87494117 +174,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C3(C(=O)N(C2=O)C(=O)*)CCCCC3)cc1,{},277.0792131 +175,*C(=C*)c1c(cccc1)C,{$C=C(c1ccccc1C)$},261.6623551 +176,*Oc1cc2C(c3c(c2cc1)ccc(c3)Oc1ccc(cc1)c1c2cc(ccc2c(c2ccccc12)c1ccc(cc1)*)CCC)(c1ccc(cc1)N(c1ccccc1)c1ccccc1)c1ccc(cc1)N(c1ccccc1)c1ccccc1,{},187.3941077 +177,*SC(=O)CCCCC(=O)SCC*,{},-21.91188888 +178,*OC(CC*)(C(F)(F)F)C(F)(F)F,{},-8.026293088 +179,*C(C(*)c1ccccc1)[N+](=O)[O-],{$C(c1ccccc1)C([N+](=O)[O-])$},173.9913454 +180,*N1C(=O)c2c(C1=O)cc(cc2)C(c1cc2c(C(=O)N(C2=O)CC(=O)NNC(=O)c2ccc(cc2)C(=O)NNC(=O)C*)cc1)(C(F)(F)F)C(F)(F)F,{},226.7602549 +181,*C(C*)CCCCCCCCCCCCCCCCCCCC,{$CC(CCCCCCCCCCCCCCCCCCCC)$},21.77937998 +182,*OS(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)c1ccc(cc1)*,{},190.021745 +183,*SCCCCCC(=O)NCCc1ccc(cc1)CCNC(=O)CCCCC*,{},-18.01175462 +184,*N1CCC(CC1)C(=O)c1c(cc(cc1)C(=O)N1CCC(CC1)CCC*)Oc1ccccc1,{},131.1821319 +185,*C(C(CC*)(F)F)(Cl)F,{$CCC(F)(F)C(Cl)(F)$},-41.84388225 +186,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CC(=O)OCCCCCCOC(=O)C*,{},55.49864746 +187,*c1oc(cc1)Sc1oc(cc1)C=Nc1cc(ccc1)N=C*,{},122.5813993 +188,*C(C*)S(=O)c1ccccc1,{$CC(S(=O)c1ccccc1)$},137.9705527 +189,*OC(=O)c1cc(c(cc1)C(=O)Nc1ccc(cc1)C(c1ccc(cc1)C(c1ccc(cc1)NC(=O)c1c(ccc(c1)C(=O)OCCN(CC*)CCCCOc1ccc(cc1)N=Nc1ccc(cc1)CCCCCC)C(=O)O)(C)C)(C)C)C(=O)O,{},82.18427933 +190,*=C=C=C(C(=*)COC(=O)NCC)COC(=O)NCC,{$=C=C=C(COC(=O)NCC)C(COC(=O)NCC)=$},172.0633872 +191,*Oc1ccc(cc1)c1ccc(cc1)OCCCCCCOC(=O)Nc1c(ccc(c1)NC(=O)OCCCCOC(=O)Nc1cc(c(cc1)C)NC(=O)OCCCCCC*)C,{},41.49405569 +192,*c1sc(c(c1C#N)C#N)N=Cc1ccc(cc1)C=N*,{},204.4271705 +193,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)cc1,{},126.2551791 +194,*c1cc2c(C(=O)N(C2=O)c2cc(ccc2)P(=O)(c2cc(ccc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C(=O)N(C(=O)C2(C)C)C(=O)*)c2ccccc2)cc1,{},138.7903633 +195,*OCOCCCCCC*,{},-41.68464364 +196,*Oc1ccc(cc1)OC(=O)c1c(cc(c(c1)OCCCCCCCCCCCCCCCC)C(=O)*)OCCCCCCCCCCCCCCCC,{},77.98044592 +197,*c1sc(cc1)/C=C/c1ccc(cc1)N(c1ccc(cc1)N(c1ccc(cc1)/C=C/c1sc(cc1)/C=C(/C(=O)Nc1cc(ccc1)NC(=O)/C(=C/*)/C#N)\C#N)c1ccccc1)c1ccccc1,{},243.9892983 +198,*N(c1ccc(cc1)Cc1ccc(cc1)N(C(=O)c1ccc(cc1)C(=O)*)CCC)CCC,{},173.7390032 +199,*Oc1ccc(cc1)C(c1ccc(cc1)OCC#CC#CC*)(C)C,{},48.046331 +200,*NC(=O)C(C(C(=O)NCCCCCC*)O)O,{},149.1553302 +201,*NC(=O)CCCCCC(=O)NCc1ccc(cc1)C*,{},61.73836642 +202,*Nc1c(c(c(cc1)*)Cl)Cl,{},115.5762842 +203,*NC(=O)C(C(=O)NCCCCCCCCCC*)CCCCCCCCCCCC,{},-0.868265132 +204,*N1C(=O)c2c(C1=O)c(c(c(c2F)F)Oc1c(c(c(c(c1F)F)Oc1c(c2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)*)c(c1F)F)F)F)F)F,{},283.2696326 +205,*C1(c2c(C(=O)O1)cccc2)c1cc(c(c(c1)Br)OC(=O)c1cc(ccc1)C(=O)Oc1c(cc(cc1Br)*)Br)Br,{},245.9668197 +206,*OC(=O)CCCCSCCCCC(=O)OCCCCCCCCCC*,{},-22.30064955 +207,*OC(=O)c1c(cccc1)C(=O)OCc1ccc(cc1)C*,{},95.33964688 +208,*C=CCC(CC*)(C(=O)OC)C,{$C=CCC(C)(C(=O)OC)CC$},-28.15217408 +209,*O[Si](*)(CCCCCOc1ccc(cc1)OC(=O)c1ccc(cc1)OCCCC)C,{},-64.12566312 +210,*Nc1ccc(cc1)CCc1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)*,{},245.2414672 +211,*=NN=CC#C[Si](C#CC=*)(c1ccccc1)c1ccccc1,{<=CC#C[Si](c1ccccc1)(c1ccccc1)C#CC=NN=>},131.2577432 +212,*Oc1ccc(cc1)C(=O)OCCOCCOCCOC(=O)c1ccc(cc1)OC(=O)Nc1c(ccc(c1)NC(=O)*)C,{},100.4455143 +213,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1c(cc(cc1)C(=O)*)Oc1ccc(cc1)C(c1ccccc1)(C)C,"{C(=O)c3ccc(cc3Oc3ccc(C(C)(C)c4ccccc4)cc3)C(=O)>}",135.0490958 +214,*OC(=O)NCCSCCCCCSCCNC(=O)OCCCC*,{},3.000872148 +215,*Oc1cc(ccc1)OC(=O)c1cc(c(c(c1)C(C)(C)C)OC(=O)c1ccc(cc1)C(=O)Oc1c(cc(cc1C(C)(C)C)C(=O)*)C(C)(C)C)C(C)(C)C,{},135.2011857 +216,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)*)(CCCC)C,"{C(=O)>}",118.8361345 +217,*c1cc2n3c(nc2cc1)c1ccc(cc1C3=O)Oc1cc2c3n(c4ccc(cc4n3)O*)C(=O)c2cc1,{},291.281809 +218,*Oc1c(cc(cc1)OC(=O)Oc1ccc(cc1)OC(=O)*)C,{},120.5520321 +219,*N1C(=O)c2c(C1=O)c(ccc2)c1cc2c(C(=O)N(C2=O)c2ccc(cc2)OCCCCCCCCCOc2ccc(cc2)*)cc1,{},47.98864595 +220,*C1OC(CO1)COCC1OC(OC1)CCCCCCCC(=O)OCCOC(=O)CCCCCCC*,{},-2.553202642 +221,*Oc1ccc(cc1)Oc1ccc(cc1)Oc1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*,{},136.3718208 +222,*C(C(=O)C*)c1ccc(cc1)C,{$CC(=O)C(c1ccc(C)cc1)$},67.57313464 +223,*C(C*)C(=O)OCC1(COC(OC1)(C)C)C,{$CC(C(=O)OCC1(C)COC(C)(C)OC1)$},95.74104893 +224,*Oc1c(cc(cc1)C=C1C(=O)C(=Cc2cc(c(cc2)OCCCC*)OC)CC1)OC,{},36.19808899 +225,*N=C1c2ccccc2C(=Nc2ccc(cc2)*)c2ccccc12,{},158.2579262 +226,*NC(=O)CCCCCCCCCCCCCCCC*,{},15.38660594 +227,*Nc1cc(ccc1)NC(=O)CCCCCC(=O)*,{},-12.07623639 +228,*c1cc2c(cc1)cc(cc2)*,{},239.5402998 +229,*c1ncc(cc1)c1ccc(nc1)*,{},138.2497069 +230,*C(C*)n1c2ccc(cc2c2cc(ccc12)Br)Br,{$CC(n1c2ccc(Br)cc2c2cc(Br)ccc21)$},115.6170166 +231,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)c2ccc(cc2)c2ccc(cc2)*)cc1,{},357.2965065 +232,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)c2ccc(cc2)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)cc1,{},302.652739 +233,*OC(=O)NC1C(C1)NC(=O)OCCCC*,{},47.31586772 +234,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1c(cc(cc1C)C1C(CC(CC1)C(c1cc(c(c(c1)C)*)C)(C)C)C)C,{},250.477212 +235,*C1C(=O)N(C(=O)C1*)c1ccccc1,{},197.1821336 +236,*[Si](c1ccc(cc1)*)(C)C,{},126.7080905 +237,*Nc1c(c(c(c(c1[2H])[2H])*)[2H])[2H],{},235.370823 +238,*C1c2c(C(C=C1)C=C*)cc(cc2)CCCCCC,{$C=CC1C=CC(c2ccc(CCCCCC)cc21)$},9.704073219 +239,*c1ccc2n(c3c(c2c1)cc(cc3)/C=C/c1ccc(cc1)Oc1c(cc(cc1)c1ccc(cc1)c1ccc(cc1)c1cc(c(cc1)Oc1ccc(cc1)/C=C/*)C#N)C#N)CC(CCCC)CC,{},129.2206103 +240,*c1sc(c(c1OCCCCCCC)C)*,{},32.04437108 +241,*OC(=O)C(=O)OCCCCCCCCCC*,{},-65.08541819 +242,*C=CC(C(*)C)C,{$C=CC(C)C(C)$},78.17880502 +243,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(CC2(C)C)(c2ccc(cc2)Oc2cc3c(C(=O)N(C3=O)c3cc(ccc3)N3C(=O)c4c(C3=O)cc(cc4)Oc3ccc(cc3)C3(CC(c4c3cc(cc4)Oc3cc4c(C(=O)N(C4=O)c4cc(ccc4)*)cc3)(C)C)C)cc2)C)cc1,{},231.956044 +244,*=C1SC(=S)N(C1=O)c1ccc(cc1)N1C(=S)SC(=Cc2ccc(cc2)C=*)C1=O,{<=Cc1ccc(cc1)C=C2SC(=S)N(C2=O)c3ccc(cc3)N4C(=O)C(SC4=S)=>},187.1036635 +245,*Nc1c(cc(c(c1)C(=O)O)*)OC,{},93.18891585 +246,*NNC(=O)CCCCC(=O)NNC(=O)CCCCCCCC(=O)*,{},101.0022623 +247,*n1c(=O)c2sc3c(sc2c1=O)c(=O)n(c3=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)NC(=O)c1ccc(cc1)*,{},327.2441987 +248,*C1CC(CC1)C*,{},42.35931484 +249,*Nc1ccc(cc1)NC(=O)c1c(cc(c(c1)C(=O)O)C(=O)*)C(=O)O,{},240.3936293 +250,*NNC(=O)c1cc(ccc1)C(=O)*,{},232.2266265 +251,*c1sc(c(c1CCCCCCCCCCCC)CCCCCCCCCCCC)c1sc(cc1)c1sc(cc1)*,{},14.60597757 +252,*C(C*)C(=O)NCC,{$CC(C(=O)NCC)$},55.10481694 +253,*Oc1c(cc(cc1C(C)(C)C)C(=O)*)C(C)(C)C,{},206.7951116 +254,*OS(=O)(=O)c1ccc(cc1)c1ccc(cc1)S(=O)(=O)Oc1c(cc(cc1Br)C1(CCCCC1)c1cc(c(c(c1)Br)*)Br)Br,{},268.4647521 +255,*C(C*)C(=O)NCCCCCCCCCCCC,{$CC(C(=O)NCCCCCCCCCCCC)$},21.73577755 +256,*N(c1ccc(cc1)*)CCCCCC,{},-128.6299242 +257,*Oc1c(cc(cc1)C(c1cc(c(cc1)OC(=O)c1cc(ccc1)C(=O)*)[N+](=O)[O-])(CCC(=O)O)C)[N+](=O)[O-],{},127.0784722 +258,*c1ncc(cc1)C(=O)NNC(=O)*,{},135.1836518 +259,*c1n(c(cc1)*)C(C(=O)NO)n1ccc2c1cccc2,{},180.3082327 +260,*N=Nc1ccc(cc1)*,{},144.6951233 +261,*c1sc2c(c1)sc(c2)c1sc(cc1CCCCCCCCCCBr)c1sc(c(c1)CCCCCCCCCCBr)*,{},24.95900908 +262,*C(CC(C*)c1ccccc1)(C(=O)OC)C#N,{$CC(c1ccccc1)CC(C(=O)OC)(C#N)$},127.0156605 +263,*=C=C=C(C(=*)COS(=O)(=O)c1ccc(cc1)OC)COS(=O)(=O)c1ccc(cc1)OC,{$=C=C=C(COS(=O)(=O)c1ccc(OC)cc1)C(COS(=O)(=O)c1ccc(OC)cc1)=$},55.30555285 +264,*c1c(cc(c(c1)OCCCCCCCCCC)C#C*)OCCCCCCCCCC,{},61.85443636 +265,*OCCCC(=O)NCCCCCCNC(=O)CCCCCC*,{},1.467345964 +266,*c1cncc(c1)C(=O)NCCCCCCCCCCNC(=O)*,{},85.60365033 +267,*C(C(C(C(*)(F)F)(F)F)(F)F)(C(F)(F)F)F,{$C(F)(F)C(F)(F)C(F)(F)C(C(F)(F)F)(F)$},-86.88823628 +268,*Oc1cc(ccc1)C(=O)NNC(=O)c1cc(ccc1)C(=O)NNC(=O)CC*,{},234.2133464 +269,*Oc1cc(ccc1)NC(=O)c1ccc(cc1)C(c1ccc(cc1)C(=O)Nc1ccc(cc1)*)(C(F)(F)F)C(F)(F)F,{},157.6228264 +270,*c1n(c(nn1)CCCCCCCC*)N,{},-41.92176029 +271,*c1nc2c([nH]1)cc(cc2)c1cc2c(nc([nH]2)c2ccc(cc2)NC(=O)c2ccc(cc2)C(=O)Nc2ccc(cc2)*)cc1,{},358.667269 +272,*Nc1c(cc(cc1Cl)*)Cl,{},-0.214281278 +273,*c1ccc2n(c3c(c2c1)cc(cc3)N=Cc1sc(cc1)c1sc(cc1)C=N*)CCCCCC,{},95.07860424 +274,*N1C2(CCCC2)C(=O)N(C1=O)C(=O)c1ccc(cc1)N=Nc1ccc(cc1)C(=O)*,{},17.25361376 +275,*P(=Nc1ccc(cc1)N=P(CC*)(Cl)Cl)(Cl)Cl,{},-27.34500252 +276,*Nc1c(cc(c(c1)SCCC#N)NC(=O)c1cc(ccc1)C(=O)*)SCCC#N,{},38.16065966 +277,*C(C*)(c1ccc(cc1)OC(=O)CC)OC(=O)C,{$CC(c1ccc(OC(=O)CC)cc1)(OC(C)=O)$},2.162388076 +278,*C1C(=O)N(C(=O)C1C(C*)c1ccccc1)CCCCCCCC,{},105.6965321 +279,*c1sc2cc3c(cc2n1)sc(n3)CCCCC*,{},87.36313445 +280,*NC(C(=O)*)CCC(=O)OCCCCCCCCCCCC,{},30.09586697 +281,*NC(C(=O)*)CO,{},84.57547927 +282,*Nc1c(cccc1)NC(=O)CCCCCCC(=O)*,{},71.92438381 +283,*Oc1c(cc(cc1)C=Cc1ccc(cc1)C=Cc1cc(c(cc1)OC(=O)CCCCCCCCC(=O)*)C)C,{},35.47523522 +284,*c1c2c(nccn2)c(cc1)*,{},216.378732 +285,*Oc1ccc(cc1)Oc1ccc(cc1)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)c1ccc(cc1)Oc1ccc(cc1)NC(=O)c1ccc(cc1)*,{},165.0428244 +286,*OP(=O)(OCCCCCCCCCCOc1ccc(cc1)C=Cc1ccc(cc1)OCCCCCCCCCC*)OCCCCCCCCCCOc1ccc(cc1)N=Nc1ccc(cc1)F,{},-7.2122695 +287,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)OCCCOc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2ccc(cc2)S(=O)(=O)c2ccc(cc2)NC(=O)*)cc1,{},163.1829015 +288,*OC(=O)c1ccc(cc1)C(=O)NCCCCCCNC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCC*,{},-20.66610996 +289,*NNC(=O)c1ccc(cc1)NC(=O)c1cc(cc(c1)N1C(=O)c2c(C1=O)c(c(c(c2Cl)Cl)Cl)Cl)C(=O)*,{},133.1528291 +290,*N=P(*)(OCCC(=O)C=C)OCCC(=O)C=C,{},-42.12432011 +291,*S(=O)(=O)c1ccc(cc1)c1ccc(cc1)*,{},229.0539301 +292,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)S(=O)(=O)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C(=O)N(C(=O)C2(CC)C)C(=O)*)cc1,{},242.6534046 +293,*OP(=O)(OCCCCCCCCCCOc1ccc(cc1)C=Cc1ccc(cc1)OCCCCCCCCCC*)OCCCCCCCCCCOc1ccc(cc1)N=Nc1ccc(cc1)C,{},5.817024886 +294,*NC(CCCCNC(=O)NCCCCNC(=O)*)C(=O)OC,{},0.336556425 +295,*C(=C*)c1nc2c(n1C)cccc2,{$C=C(c1nc2ccccc2n1C)$},-6.104199835 +296,*Oc1ccc(cc1)N=Nc1ccc(cc1)*,{},116.9759489 +297,*c1nc(nc(n1)NC(=O)c1c(cc(c(c1)C(=O)N*)C(=O)O)C(=O)OC(=O)Nc1c(ccc(c1)NC(=O)OCCCCCCCC)C)c1ccccc1,{},-1.691479041 +298,*c1[nH]c2cc3c(cc2n1)nc([nH]3)c1ccc(cc1)*,{},340.5865983 +299,*OC(=O)Cc1ccc(cc1)CC(=O)OCCCC*,{},-34.10658315 +300,*SC(=O)NCCCCCCNC(=O)SCCCC*,{},-33.13797704 +301,*c1c(cc(c(c1)OC)*)OC,{},63.90036252 +302,*N(CC*)C(=O)CCC(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F,{},-52.64825421 +303,*OC(=O)c1c(cccc1)NC(=O)c1ccc(cc1)C(=O)Nc1c(cccc1)C(=O)OC(=O)c1cc(ccc1)C(=O)*,{},185.0999075 +304,*c1sc(cc1CCCCCCCC)Nc1ccc(cc1)*,{},49.50827798 +305,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc(cc1)c1ccc(cc1)c1ccc(cc1)*,{},278.5221779 +306,*n1c(=O)c2c(c3c(c(c2c1=O)F)c(=O)n(c3=O)c1ccc(cc1)*)F,{},337.816724 +307,*OC(=O)c1ccc(cc1)C(=O)NCCNC(=O)c1ccc(cc1)C(=O)OCCCCCCCCCC*,{},4.250402609 +308,*NC(=O)NC(=O)NCc1c(c(cc(c1)Cc1c(c(cc(c1)C*)C=O)O)C=O)O,{},190.6072154 +309,*OC(=O)C(C*)(CCCC)CCCC,{},-17.98562642 +310,*C1(C(=O)C(CCC1)C*)CO,{},71.70468573 +311,*N1C(=O)c2c(C1=O)cc(cc2)Oc1ccc(cc1)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)Oc2ccc(cc2)*)cc1,"{N4C(=O)c5ccc(cc5C4=O)Oc6ccc(cc6)Oc7ccc8c(c7)C(=O)N(C8=O)>}",191.2304459 +312,*C(=C*)c1cc(c(c(c1)CO)OCc1ccc(cc1)CNC(COCCCCCCCC)C)CO,{$C=C(c1cc(CO)c(OCc2ccc(CNC(C)COCCCCCCCC)cc2)c(CO)c1)$},65.78481038 +313,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)CCCCCCCCCCCC*,{},73.37639604 +314,*c1sc2cc3c(cc2n1)sc(n3)c1ccc(cc1)c1ccc(cc1)*,{},343.9030515 +315,*c1sc(cc1COCCCCCCOc1ccc(cc1)c1ccc(cc1)C#N)*,{},59.88076613 +316,*Nc1ccc(cc1)CC(=O)*,{},63.68828699 +317,*C(C*)C(=O)N(c1ccccc1)O,{$CC(C(=O)N(O)c1ccccc1)$},141.1832818 +318,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)C(=O)*)c1ccccc1,"{C(=O)c2ccc(cc2)C(=O)>}",65.88587862 +319,*OC(CC(=O)*)C(C)C,{},-24.82439314 +320,*NC(C(=O)*)CC(=O)OCc1ccccc1,{},70.10647363 +321,*c1c(c(cc(c1)N=Nc1ccc(cc1)[N+](=O)[O-])*)O,{},165.6801535 +322,*OCCCCC(=O)NCCCCCCNC(=O)CCCC*,{},-56.49395983 +323,*=C=C=C(C(=*)CCCCOC(=O)NC(=O)OCCCC)CCCCOC(=O)NC(=O)OCCCC,{$=C=C=C(CCCCOC(=O)NC(=O)OCCCC)C(CCCCOC(=O)NC(=O)OCCCC)=$},27.51087357 +324,*Oc1cc(c(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)C(c1ccc(cc1)*)(C)C)[N-][N+]#N,{},196.3053138 +325,*c1sc(nn1)SCC(=O)NN=Cc1ccc(cc1)OCCCCOc1ccc(cc1)C=NNC(=O)CS*,{},39.79677892 +326,*OC(=O)NCCSCCCCSCCNC(=O)OCC*,{},26.25521298 +327,*C(C*)C(=O)N1CC[N+](CC1)(CCCCCCCCCCCC)C,{$CC(C(=O)N1CC[N+](C)(CCCCCCCCCCCC)CC1)$},-81.38297384 +328,*OC(=O)c1cc(ccc1)c1cc(ccc1)C(=O)OCCCCCCCCCC*,{},8.531981028 +329,*C1(C(=O)OC(=O)C1)C*,{},143.854555 +330,*Oc1cc(ccc1)OC(=O)c1ccc(cc1)C(=O)Oc1cc(ccc1)OCCCCCCCCCC*,{},20.92794041 +331,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)Oc1ccc(cc1)OC(=O)c1cc(ccc1)C(=O)*,{},119.4238893 +332,*=C1CN(CC(=Cc2oc(cc2)C(=O)CCCCC(=O)c2oc(cc2)C=*)C1=O)C,{<=Cc1ccc(o1)C(=O)CCCCC(=O)c2ccc(o2)C=C3CN(C)CC(C3=O)=>},95.14114513 +333,*Oc1ccc(cc1)NC(=O)c1c(c(c(c(c1F)F)C(=O)Nc1ccc(cc1)*)F)F,{},168.6168885 +334,*C(=C(CC*)C)C,{$CCC(C)=C(C)$},46.49641909 +335,*S(=O)(=O)NCCNS(=O)(=O)c1ccc(cc1)*,{},7.54081281 +336,*N(C(=O)*)c1ccccc1,{},150.7130657 +337,*OC(=O)C(C(=O)OCCCCCC*)CCCCCCOc1ccc(cc1)c1ccc(cc1)OCc1ccc(cc1)[N+](=O)[O-],{},17.95743357 +338,*N1C(=S)SC(=Cc2ccc(cc2)C=C2SC(=S)N(C2=O)CCCCCC*)C1=O,{},35.37444876 +339,*c1oc(cc1)Sc1oc(cc1)C=Nc1ccc(cc1)N=C*,{},95.64631957 +340,*OC(=O)c1ccc(cc1)S(=O)(=O)CCCCCCS(=O)(=O)c1ccc(cc1)C(=O)OCCCCCC*,{},35.86441642 +341,*OS(=O)(=O)c1cc(ccc1)S(=O)(=O)Oc1cc(ccc1)*,{},29.19231194 +342,*SSC(=O)N(c1ccc(cc1)Cc1ccc(cc1)N(C(=O)SSCCCC*)C)C,{},20.68923822 +343,*C(C*)(c1ccc(cc1)OC(=O)C)OC(=O)C,{$CC(c1ccc(OC(C)=O)cc1)(OC(C)=O)$},74.85696518 +344,*Oc1ccc(cc1)C(=Cc1ccc(cc1)OC(=O)CCCCCCCCCCC(=O)*)C,{},6.458707655 +345,*N(C(=O)CCCCC(=O)N(CC(C(C(C*)(F)F)(F)F)(F)F)CC)CC,{},10.59395774 +346,*c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)C(=O)c2ccc(cc2)C(=O)*)CC1,{},192.2096838 +347,*Oc1cc(c(cc1)N1ON1c1c(cc(cc1)OC(=O)CCCCCCCCCCC(=O)*)C)C,{},50.67469492 +348,*c1c(cc(c(c1)Oc1ccc(cc1)S(=O)(=O)O[Na])c1ccc(cc1)*)Oc1ccc(cc1)S(=O)(=O)O[Na],{},172.5717242 +349,*OC(=O)NCCCCC*,{},-13.55087665 +350,*N1C(=O)C(CC1=O)Nc1ccc(cc1)NC1C(=O)N(C(=O)C1)c1ccc(cc1)Cc1ccc(cc1)*,{},248.5034267 +351,*C(=C*)c1ccc(cc1)OCCCCCC(=O)Oc1c(c(c(c(c1F)F)F)F)F,{$C=C(c1ccc(OCCCCCC(=O)Oc2c(F)c(F)c(F)c(F)c2F)cc1)$},73.83198457 +352,*Oc1cc(ccc1)OC(=O)c1ccc(cc1)C=Nc1ccc(cc1)OCCCCCCOc1ccc(cc1)N=Cc1ccc(cc1)C(=O)*,{},83.83402436 +353,*C1=NC2=CC(C=CC2=C1)*,{},103.156476 +354,*C=CCCCCCCCC*,{$C=CCCCCCCCC$},-17.2820223 +355,*C(CCC*)Cl,{$CCCC(Cl)$},-30.93658282 +356,*C#CC(=C(*)CCCCOC(=O)NCCC)CCCCOC(=O)NCCC,{$C#CC(CCCCOC(=O)NCCC)=C(CCCCOC(=O)NCCC)$},40.70123878 +357,*NC(CC(=O)*)c1ccccc1,{},-30.79261317 +358,*S(=O)(=O)C(C=CC(*)C)C,{},33.82674704 +359,*C(C(*)(C([2H])([2H])[2H])C([2H])([2H])[2H])([2H])[2H],{$C([2H])([2H])C(C([2H])([2H])[2H])(C([2H])([2H])[2H])$},85.2671101 +360,*c1sc(cc1)C(=O)NCCCCCCNC(=O)*,{},42.90894132 +361,*SC(=O)c1ccc(cc1)C(=O)SCc1c(c(c(c(c1C)C)C*)C)C,{},127.8963011 +362,*c1ccc2c(c1)c(=O)oc(n2)c1cc(cc(c1)N1C(=O)c2c(C1=O)c(c(c(c2Cl)Cl)Cl)Cl)c1oc(=O)c2c(ccc(c2)C*)n1,{},378.8956296 +363,*Nc1ccc(cc1)NC(=O)c1c(cc(c(c1)SCCCCCCCC)C(=O)*)SCCCCCCCC,{},69.67482713 +364,*Oc1c(cc(cc1)C=Nc1ccc(cc1)N=Cc1cc(c(cc1)OC(=O)CCCCC(=O)*)OC)OC,{},-41.85748469 +365,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)CCCCCCC(=O)*,"{C(=O)c2ccc(cc2)OC(=O)CCCCCCC(=O)>}",72.32081554 +366,*c1nc(sc1)N=Cc1cc(c(cc1)OCCCCOc1c(cc(cc1)C=Nc1nc(cs1)c1ccc(cc1)Oc1ccc(cc1)*)OC)OC,{},104.0626058 +367,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)C(=O)NNC(=O)c1cc(c(cc1)NC(=O)c1ccc(cc1)*)O,{},111.457133 +368,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1cc(ccc1)Oc1ccc(cc1)C(=O)*,"{C(=O)c3cccc(c3)Oc4ccc(cc4)C(=O)>}",89.5930824 +369,*C(C*)OCCC,{$CC(OCCC)$},-17.16506937 +370,*OC(=O)CCCC(=O)OCC(C(C(C(C*)(F)F)(F)F)(F)F)(F)F,{},-64.11657159 +371,*OC1CCC(CC1)OC(=O)CCCCCCC(=O)*,"{C(=O)CCCCCCC(=O)>}",-35.83964331 +372,*N1C(=O)N(C(=O)C1(C)C)C(=O)c1ccc(cc1)N=Nc1ccc(cc1)C(=O)*,{},302.3547996 +373,*Oc1ccc(cc1)OCC(=O)OC(=O)c1ccc(cc1)C(=O)OC(=O)C*,{},80.44433982 +374,*c1c2c(c(cc1)*)cccc2,{},189.0821827 +375,*C1C(=O)N(C(=O)C1C(C*)c1ccccc1)c1ccc(cc1)Cl,{},168.825845 +376,*[Si](c1ccc(cc1)*)(OCC)OCC,{},7.02246836 +377,*NC(CNC(=O)NCCCCCCNC(=O)*)C,{},55.35434565 +378,*c1nc(cs1)c1ccc(cc1)c1nc(sc1)CCCC*,{},110.5188915 +379,*Oc1ccc(cc1)OC(=O)CCCCCCCC(=O)*,"{C(=O)CCCCCCCC(=O)>}",-6.058372606 +380,*C(C*)OC(=O)C(CC)(CC)CC,{$CC(OC(=O)C(CC)(CC)CC)$},20.41787387 +381,*NC(=O)CCCCCCCCCCCCCCC(=O)NCCc1ccc(cc1)CC*,{},65.25488595 +382,*C1CCN(CC1)SC(=O)OCCCCOC(=O)SN1CCC(CC1)CCC*,{},-6.032415023 +383,*Oc1cc2c(C(CC2(C)C)(c2ccc(cc2)Oc2ccc(cc2)C2(CC(c3c2cc(cc3)*)(C)C)C)C)cc1,{},308.5311609 +384,*OC(=O)c1ccc(cc1)CCc1ccc(cc1)C(=O)OCC*,{},53.2795594 +385,*c1oc2c(n1)cc(cc2)c1cc2c(oc(n2)CCCCCCCC*)cc1,{},61.93714913 +386,*C(=C*)CNS(=O)(=O)CC,{$C=C(CNS(=O)(=O)CC)$},44.748248 +387,*c1nc(ccc1)Oc1ccc(cc1)Oc1ccc(cc1)O*,{},-19.11943844 +388,*C(=C(*)C)[Si](CCCC)(C)C,{$C(C)=C([Si](C)(C)CCCC)$},152.5242236 +389,*OC(=O)CCSCCC(=O)*,{},6.950276755 +390,*Oc1ccc(cc1)Oc1ccc(cc1)NC(=C(C#N)C#N)c1ccc(cc1)c1ccc(cc1)C(=C(C#N)C#N)Nc1ccc(cc1)*,{},270.2905197 +391,*Oc1cc(ccc1)OC(=O)Oc1ccc(cc1)OC(=O)*,{},-104.3379932 +392,*OC(=O)c1ccc(cc1)NC(=O)CCCCC(=O)Nc1ccc(cc1)C(=O)OCCC*,{},83.99780359 +393,*c1nc2c([nH]1)cc(cc2)NC(=NO)C(=NO)Nc1ccc2c(nc([nH]2)CCCC*)c1,{},170.1130329 +394,*Nc1c(c(c(c(c1C)C)NC(=O)c1ccc(cc1)C(=O)*)C)C,{},205.6552433 +395,*OC(=O)Nc1ccc(cc1)NC(=O)OCC*,{},134.2908917 +396,*Oc1c(cc(cc1)OC(=O)c1ccc(cc1)C(=O)*)Cl,{},119.6241841 +397,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1c(cc(c(c1)C(=O)OCCCCCC)C(=O)*)C(=O)OCCCCCC,{},77.13970172 +398,*N1CCN(CC1)CCC(=O)N(CCN(C(=O)CC*)C(C)C)C(C)C,{},-43.71593166 +399,*c1ccc2n(c3c(c2c1)cc(cc3)C=NN(c1ccc(cc1)S(=O)(=O)c1ccc(cc1)N(N=C*)CCCC)CCCC)CC,{},139.604302 +400,*Sc1ccc(cc1)c1ccc(cc1)SC(=O)CCCCC(=O)*,{},13.2621556 +401,*N(c1ccc(cc1)c1ccc(cc1)N(C(=O)c1ccc(cc1)C(=O)*)CC)CC,{},62.56896406 +402,*S(=O)(=O)CCCC*,{},-14.41108992 +403,*c1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)C(=O)c2ccc(cc2)C(=O)*)CCC1,{},240.3798309 +404,*c1ccc2n(c3c(c2c1)cc(cc3)*)CC,{},206.6525359 +405,*OC(C(C(=O)*)(C)C)c1ccccc1,{},65.42132156 +406,*c1nc2c([nH]1)cc(cc2)c1ccc2c(nc([nH]2)c2ccc(cc2)C(=O)c2ccc(cc2)*)c1,{},300.9001313 +407,*Nc1ccc(cc1)NC(=S)NC(=O)c1ccc(cc1)C(=O)NC(=S)*,{},220.8197438 +408,*=c1c2cc3c(cc2c(=O)o1)c(=O)oc3=Nc1cc(ccc1)Oc1cc(ccc1)Oc1cc(ccc1)N=*,{<=Nc1cccc(c1)Oc2cccc(c2)Oc3cccc(c3)N=C4OC(=O)c5cc6c(cc54)C(OC6=O)=>},132.5253262 +409,*OS(=O)(=O)c1ccc(cc1)S(=O)(=O)c1ccc(cc1)S(=O)(=O)Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)*,{},194.2678014 +410,*OC(CCOC(=O)c1cc(ccc1)C(=O)*)C,"{C(=O)c1cccc(c1)C(=O)>}",-42.28619333 +411,*NC(=O)CCCCCCCCC(=O)NC*,{},66.76854223 +412,*NC(=O)CCP(=O)(CCC(=O)NCC*)C,{},143.8466204 +413,*C(C*)c1c(cccc1)OC,{$CC(c1ccccc1OC)$},160.10962 +414,*OCCCCCOCCCCCCOCCCCCC*,{},-72.01996519 +415,*NC(=O)C(=O)NCCNC(=O)CCCCCCCC(=O)NCC*,{},112.5256843 +416,*OC(=O)c1ccc(cc1)C(=O)OCC(C*)(CCl)CCl,{},11.46555203 +417,*OC(COC(=O)CCCCC(=O)*)C,"{C(=O)CCCCC(=O)>}",-15.95518318 +418,*c1ccc(cc1)c1ccc(cc1)C(*)(C)C,{},210.9469969 +419,*Oc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)*,{},203.5999878 +420,*Oc1c(cc(cc1)Oc1ccc(cc1)C(=O)c1ccc(cc1)*)CBr,{},98.31320425 +421,*Oc1cc(ccc1)C(=O)OC(=O)c1cc(ccc1)OCC*,{},3.697542049 +422,*C(=C*)c1ccc(cc1)[N+](=O)[O-],{$C=C(c1ccc([N+](=O)[O-])cc1)$},-12.60583746 +423,*c1sc(cc1)[Si](c1sc(cc1)[SiH](*)C)(C)C,{},60.04219897 +424,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)c1ccc(cc1)Oc1ccc(cc1)C(=O)*)C,"{C(=O)c3ccc(cc3)Oc4ccc(cc4)C(=O)>}",76.95210942 +425,*C(C*)C(=O)n1sc2c(c1=O)cccc2,{$CC(C(=O)n1sc2ccccc2c1=O)$},48.67425788 +426,*Nc1cc(cc(c1)C(=O)OCCN(c1ccc(cc1)S(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C)NC(=O)c1cc(cc(c1)OCCN(c1ccc(cc1)S(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C)C(=O)*,{},172.7344272 +427,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1c(cc(cc1)c1cc(c(cc1)*)C)C,{},271.2577887 +428,*c1c(cc(c(c1)OCCCCCCOc1ccc(cc1)C1CCC(CC1)CCCCC)C=C*)OCCCCCCOc1ccc(cc1)C1CCC(CC1)CCCCC,{},36.67582945 +429,*OC(=O)SSC(=O)OCCCC*,{},-58.94578339 +430,*NC(C(=O)NCC(=O)*)C,{},19.96697332 +431,*NC(=O)CNC(=O)CC*,{},70.42884183 +432,*SC(=O)CCCCC(=O)SCc1c(c(c(c(c1C)C)C*)C)C,{},44.80678229 +433,*C(=C*)c1ccccc1,{$C=C(c1ccccc1)$},66.12991425 +434,*c1cc2c(C(=O)N(C2=O)c2cc(ccc2)NC(=O)c2cc(ccc2)C(=O)Nc2cc(ccc2)NC(=O)*)cc1,{},213.4893774 +435,*Oc1c(cc(cc1Br)C(c1cc(c(c(c1)Br)OC(=O)c1cc(ccc1)C(=O)*)Br)(CCC(=O)O)C)Br,{},175.2273279 +436,*OC(=O)CCCCC(=O)OCC(C*)(CCl)CCl,{},2.83724036 +437,*N1CCN(CC1)C(=O)SSCCCCSSC(=O)*,{},-3.571974592 +438,*N1C(=O)N(C(=O)C1(c1ccccc1)c1ccccc1)C(=O)c1ccc(cc1)N=Nc1ccc(cc1)C(=O)*,{},183.844758 +439,*c1cc2n(c3c(c2cc1)ccc(c3)C#CC#C*)CCCCCCCCCCCCCCCC,{},68.35869686 +440,*c1c2c(nccc2)c(cc1)OCc1ccc(cc1)COc1c2ncccc2c(cc1)C*,{},200.3538526 +441,*OC(=O)c1ccc(cc1)C(=O)OCCOCCOCC*,{},36.06576581 +442,*c1c2c(c(s1)*)sc(n2)CCCCCCCCC,{},56.24069524 +443,*=C=C=C(C(=*)COS(=O)(=O)c1ccc(cc1)C)COS(=O)(=O)c1ccc(cc1)C,{$=C=C=C(COS(=O)(=O)c1ccc(C)cc1)C(COS(=O)(=O)c1ccc(C)cc1)=$},76.80290526 +444,*Oc1c(cc(cc1)C=CC=Cc1cc(c(cc1)OCCCCCCC*)C)C,{},41.89270134 +445,*Oc1ccc(cc1)C(=O)OCCCCOC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)*,{},68.79315579 +446,*Oc1c(c(ccc1)Oc1ccc(cc1)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1ccc(cc1)*)C#N,{},160.9324159 +447,*C1C(=O)N(C(=O)C1*)c1ccc(cc1)COC(C)(C)C,{},122.5873684 +448,*c1ccc2n(c3c(c2c1)cc(cc3)C(=O)Oc1ccc(cc1)C(c1ccc(cc1)OC(=O)*)(C)C)C,{},73.83261176 +449,*/C=C/*,{$/C=C/$},59.5588378 +450,*c1ncc(cc1)c1n(c(cc1)c1n(c(cc1)*)C)C,{},256.5965094 +451,*c1ncc(cc1)*,{},322.0959561 +452,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2ccc(cc2)NC(=O)*)cc1,{},48.20867337 +453,*N(C(=O)*)CCCCCC,{},14.34014558 +454,*Nc1cc(ccc1)C#Cc1cc(ccc1)NC(=O)c1c(cc(cc1)C(=O)*)C(=O)O,{},187.6187871 +455,*c1c(cc(c(c1)C*)C)O,{$Cc1cc(c(O)cc1C)$},12.40187977 +456,*Oc1cc(c(cc1)C(=O)Nc1ccc(cc1)NC(=O)c1c(cc(cc1)*)C(=O)O)C(=O)O,{},216.6231471 +457,*c1nc(nc(n1)Oc1cc(ccc1)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)NC(=O)c1cc(ccc1)O*)Sc1ccccc1,{},184.9515774 +458,*=C1c2c(C(=O)O1)cc(cc2)c1cc2c(C(=O)OC2=Nc2cc(ccc2)Oc2cc(ccc2)Oc2cc(ccc2)N=*)cc1,{<=Nc1cccc(c1)Oc2cccc(c2)Oc3cccc(c3)N=C4OC(=O)c5ccc(cc54)c6ccc7c(c6)C(=O)OC7=>},193.7356518 +459,*Nc1c(cc(cc1)*)CC,{},162.1855704 +460,*OC(=O)NCCCCCCNC(=O)OCCN(CC*)c1ccc(cc1)N=Nc1ccc(cc1)C,{},24.09354398 +461,*c1nc2c([nH]1)ccc(c2)c1ccc2c(nc([nH]2)c2ccc(cc2)*)c1,{},423.6341908 +462,*c1ncc(cc1)C(=O)OC(=O)COc1ccc(cc1)OCC(=O)OC(=O)*,{},155.9709567 +463,*C(C*)C(=O)OCCN(S(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)C,{$CC(C(=O)OCCN(C)S(=O)(=O)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F)$},-148.0297376 +464,*OC(=O)c1cc(ccc1)C(=O)OCC1C(C1)C*,{},10.23490017 +465,*NC(C(=O)*)C(C)C,{},153.0278775 +466,*C(C*)(C(=O)OCC(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)F,{$CC(C(=O)OCC(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F)(F)$},-98.70472035 +467,*C(*)C(=O)OC(C)(C)C,{$C(C(=O)OC(C)(C)C)$},12.73837737 +468,*OC(=O)c1c(cccc1)c1c(cccc1)C(=O)OCCCC*,{},71.34273106 +469,*Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)Nc1ccc(cc1)CCc1ccc(cc1)NC(=O)*,{},183.1975298 +470,*C1C(=O)N(C(=O)C1C(C*)OC(=O)C)c1ccccc1,{},157.3759721 +471,*OC(CC(=O)*)C(Cl)Cl,{},110.2326669 +472,*OC(=O)NCCCCCCCCCCNC(=O)OCCCCCCCC*,{},24.78605928 +473,*C1C=CC(CC1)*,{},103.377349 +474,*NC(=O)NCCCP(CCC*)c1ccccc1,{},-49.19787298 +475,*c1oc(nn1)CCCCCCCC*,{},-48.88416733 +476,*OC(=O)NCCCCCCNC(=O)OCC(C*)(C)C,{},47.60248847 +477,*N=P(*)(OCC(C(C(F)(F)F)(F)F)(F)F)OCC(C(C(F)(F)F)(F)F)(F)F,{},-77.91107652 +478,*OC(=O)c1ccc(cc1)C(=O)OCCCCCC(=O)NCCNC(=O)CCCCC*,{},81.65245915 +479,*c1cc2c(C(=O)OC2=Nc2cc(ccc2)N=C2OC(=O)c3c2cc(cc3)C(=O)*)cc1,{},246.6584182 +480,*C*,{$C$},-2.526682925 +481,*c1c(nnc(n1)c1nc(ccc1)c1nc(c(nn1)c1ccccc1)c1ccc(cc1)Sc1ccc(cc1)*)c1ccccc1,{},419.5781202 +482,*c1cc2c(C(=O)N(C2=O)c2c(cc(cc2)c2cc(c(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Oc2ccc(cc2)C(c2ccc(cc2)OC(=O)*)(C)C)C)C)cc1,{},87.17702398 +483,*=c1sc(cs1)c1ccc(cc1)C=*,{<=Cc1ccc(cc1)C2=CSC(S2)=>},105.3372816 +484,*Oc1ccc(cc1)N=Cc1ccc(cc1)OC(=O)c1ccc(cc1)C=Nc1ccc(cc1)OCCCCOC(=O)NCCCCCCNC(=O)OCCCC*,{},-34.25655466 +485,*Oc1ccc(cc1)C(=O)CNc1ccc(cc1)NCC(=O)c1ccc(cc1)*,{},158.6944649 +486,*OC(=O)N(c1c(ccc(c1)N(C(=O)OCC*)C)C)C,{},42.46691504 +487,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)N2C(=O)N(C(C(=C2C)C(=O)OCC)c2ccc(cc2)Cl)C(=O)*)cc1,{},170.3261787 +488,*C(C*)(C(=O)Oc1ccc(cc1)C)C,{$CC(C(=O)Oc1ccc(C)cc1)(C)$},126.1154692 +489,*c1sc2c(n1)ccc(c2)OCCCCCCCCCCCOC(=O)CCCCC(=O)OCCCCCCCCCCCOc1ccc(cc1)*,{},-17.21827415 +490,*c1nc2c(c(c1)OCCO*)cc(cc2)C,{},88.77435741 +491,*Oc1ccc(cc1)C(=O)CCCCCCCCC(=O)c1ccc(cc1)OC(=O)*,{},6.568396994 +492,*c1ccc2c(nc([nH]2)c2cc(ccc2)c2nc3c([nH]2)ccc(c3)C(=O)Nc2ccc(cc2)NC(=O)*)c1,{},309.3374253 +493,*OCC1(C2CCC(C1)CC2)C*,{},81.08225465 +494,*c1nc(ccc1)C=Nc1ccc(cc1)N=C*,{},139.1234551 +495,*/C(=C(/*)\c1ccccc1)/c1ccccc1,{$/C(c1ccccc1)=C(c1ccccc1)/$},206.5698859 +496,*N1C(CN(C(C1)C)C(=O)OCCOC(=O)*)C,{},60.10633691 +497,*C1C(CC1)*,{$C1CCC1$},69.57488221 +498,*OC(C*)CCCCCCCCOc1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)OCC(CC)C,{},-0.17829817 +499,*C(C*)(C(=O)OCCOC(=O)c1cc(cc(c1)OC(=O)c1ccc(cc1)N=Nc1ccc(cc1)OCCCCCCC)OC(=O)c1ccc(cc1)N=Nc1ccc(cc1)OCCCCCCC)C,{$CC(C(=O)OCCOC(=O)c1cc(OC(=O)c2ccc(N=Nc3ccc(OCCCCCCC)cc3)cc2)cc(OC(=O)c2ccc(N=Nc3ccc(OCCCCCCC)cc3)cc2)c1)(C)$},9.014452923 +500,*C(C*)C(=O)OCCOC(C(F)(F)F)(C(F)(F)F)F,{$CC(C(=O)OCCOC(F)(C(F)(F)F)C(F)(F)F)$},-51.63721715 +501,*Sc1ccc(cc1)*,{},64.586946 +502,*Oc1ccc(cc1)CCCNC(=O)CCCCC(=O)NCCCc1ccc(cc1)OCCCCC*,{},28.2211431 +503,*SSCCCCSSCCCCCC*,{},-41.26672381 +504,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)OCCN(CCOC(=O)*)c2ccc(cc2)N=Nc2ccc(cc2)[N+](=O)[O-])cc1,{},145.3751112 +505,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)C(C(=O)N1C(=O)N(C(C1=O)(C)C)C(=O)C(*)C)C,{},252.5865186 +506,*OC(=O)CCCCC(=O)OCc1ccc(cc1)C*,{},-4.158432897 +507,*Nc1c(cc(cc1)NC(=O)c1c(cc(c(c1)C(=O)*)C(=O)O)C(=O)O)S(=O)(=O)O[Na],{},129.970858 +508,*Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)c1cc(cc(c1)NC(=O)C(CC(C)C)N1C(=O)c2c(C1=O)cccc2)C(=O)*,{},36.87047274 +509,*N1C(CN(C(C1)C)SC(=O)OCCCCOC(=O)S*)C,{},-30.512105 +510,*N(C(=O)*)CC=C,{},164.8639006 +511,*[Si](c1ccc(cc1)*)(c1ccc(cc1)CN(C)C)C,{},12.90627629 +512,*c1sc(cc1)C#CC#C*,{},49.13696662 +513,*c1ncnc(c1)C=Cc1ccc(cc1)C=C*,{},84.80053451 +514,*c1nc(nc(n1)Oc1c2c(ccc1C(=O)Nc1ccc(cc1)NC(=O)c1c(c3c(cc1)cccc3)O*)cccc2)N1CCCCC1,{},162.6671353 +515,*OC(=O)CCC(=O)OCCCCCCCCCC*,{},1.783806133 +516,*c1cc2n3c(=O)c4cc5c(cc4c3nc2cc1)c(=O)n1c2ccc(cc2nc51)*,{},384.637936 +517,*c1ccc2n(c3c(c2c1)cc(cc3)C(=O)Oc1ccc(cc1)C1(c2c(C(=O)O1)cccc2)c1ccc(cc1)OC(=O)*)C,{},262.5942508 +518,*=Nc1ccc(cc1)N=C(Nc1c(cc(cc1)c1cc(c(cc1)NC(=*)C)C(=O)O)C(=O)O)C,{<=Nc1ccc(cc1)N=C(C)Nc2ccc(cc2C(=O)O)c3ccc(c(C(=O)O)c3)NC(C)=>},89.38045943 +519,*OC(=O)C=C(CC(=O)OCC*)c1ccc(cc1)OCC,{},1.131191733 +520,*OC(=O)Nc1c(ccc(c1)NC(=O)OCCOCCOCCC*)C,{},-26.75831261 +521,*Nc1c(cccc1)CCc1c(cccc1)NC(=O)*,{},207.655323 +522,*SC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)SCc1ccc(cc1)C*,{},90.77725081 +523,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)NCCC[Si](O[Si](CCCNC(=O)*)(C)C)(C)C)cc1,{},143.0502256 +524,*OC(=O)c1cc(ccc1)C(=O)OCCNC(=O)c1ccc(cc1)C(=O)NCC*,{},121.124261 +525,*C(C*)(C(=O)OCCCCCCCCCCOc1ccc(cc1)N1C(=O)C(=Cc2c(c3c(n2C)cccc3)C)C(=C(C)C)C1=O)C,{$CC(C(=O)OCCCCCCCCCCOc1ccc(N2C(=O)C(=Cc3c(C)c4ccccc4n3C)C(=C(C)C)C2=O)cc1)(C)$},-17.96880959 +526,*SC(=O)CCCCCCCCC(=O)SCCCCCC*,{},-39.13613776 +527,*c1nc(ccc1)C(=O)NCCCCCCCCCCNC(=O)*,{},49.59402876 +528,*NC(=O)c1ccc(cc1)C(=O)NCCCCCCCCCC*,{},50.1155014 +529,*Nc1ccc(cc1)NC(=O)c1ccc(cc1)NC(=O)C=Cc1ccc(cc1)C=CC(=O)Nc1ccc(cc1)C(=O)*,{},169.7759737 +530,*NNC(=O)CCCCCCCCC(=O)NNC(=S)c1cc(ccc1)C(=S)*,{},32.4414411 +531,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)NC(=O)c2cc(ccc2)C(=O)Nc2ccc(cc2)*)cc1,{},155.8375326 +532,*NC(=S)C=Cc1ccc(cc1)Cc1ccc(cc1)C=CC(=S)NCC*,{},99.87529352 +533,*c1ccc2[nH]c3c(c2c1)cc(cc3)C(=O)c1c(cc(c(c1)C(=O)*)C(=O)O)C(=O)O,{},106.1795053 +534,*c1nc(sc1)NC(=O)Nc1ccc(cc1)Cc1ccc(cc1)NC(=O)Nc1nc(cs1)c1ccc(cc1)Oc1ccc(cc1)*,{},221.6298798 +535,*OC(=O)c1ccc(cc1)N1ON1c1ccc(cc1)C(=O)OCCOCC*,{},15.34426557 +536,*c1ccc2nc3c(Sc4cc(ccc4N3)c3cc4Sc5c(Nc4cc3)nc3ccc(cc3n5)O*)nc2c1,{},383.4 +537,*c1cc2Sc3c(=Nc2cc1)[nH]c1ccc(cc1n3)c1ccc2[nH]c3=Nc4c(Sc3nc2c1)cc(cc4)*,{},418.69 +538,*c1nc2c([nH]1)ccc(c2)c1ccc2c(nc([nH]2)c2oc(cc2)*)c1,{},419.98 +539,*=C1OC(=c2cc3ccc4=CC(=*)C=c5ccc(c2)c3c45)c2c3c4c1ccc1cccc(c41)c1c3c(ccc1)cc2,{<=c1cc2ccc3cc(cc4ccc(c1)c2c34)=c4oc(c5ccc6cccc7c8cccc9ccc4c(c98)c5c67)=>},432.43 +540,*c1cc2c3n(c4ccc(cc4n3)Oc3ccc4n5c(nc4c3)c3ccc(cc3C5=O)C(*)(C(F)(F)F)C(F)(F)F)C(=O)c2cc1,{},395.15 +541,*c1cc2n3c(=O)c4cc5c(cc4c3nc2cc1)c(=O)n1c2ccc(cc2nc51)O*,{},416.53 +542,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc(cc1)c1c(cc(cc1c1ccc(cc1)c1ccccc1)c1ccc(cc1)c1cc(c(c(c1)c1ccc(cc1)c1ccccc1)c1ccc(cc1)*)c1ccc(cc1)c1ccccc1)c1ccc(cc1)c1ccccc1,{},435 +543,*n1c(=O)c2cc3c(cc2c1=O)c(=O)n(c3=O)c1ccc2c(nc([nH]2)c2ccc(cc2)*)c1,{},456.35 +544,*N1C(=O)c2cc3C(c4c(Oc3cc2C1=O)cc1C(=O)N(C(=O)c1c4)c1cc(c(cc1)c1c(cc(cc1)*)C(F)(F)F)C(F)(F)F)(C(F)(F)F)C(F)(F)F,{},472.25 +545,*c1cc2nc3c4c5c6c(c3nc2cc1)cccc6c1nc2ccc(cc2nc1c5ccc4)*,{},411.97 +546,*=C1C=c2ccc3cc(=C4c5ccccc5C(=*)c5ccccc45)cc4ccc(=C1)c2c34,{<=c1cc2ccc3cc(cc4ccc(c1)c2c34)=c4c5ccccc5c(c5ccccc45)=>},437.49 +547,*c1n(c(cc1)*)C(C(=O)OC)C,{},279.4452403 +548,*NC(C(=O)NCC(=O)NCC(=O)*)C,{},208.6397491 +549,*c1sc2cc3c(cc2n1)sc(n3)c1c(cc(c(c1)OCCCCCC)*)OCCCCCC,{},168.5263131 +550,*C(*)C(=O)OC(CC)(C)C,{$C(C(=O)OC(C)(C)CC)$},136.5678336 +551,*N(c1ccc(cc1)*)CCCCCCC,{},110.7170963 +552,*Oc1ccc(cc1)OC(=O)c1c(cc(cc1)C(=O)*)c1ccccc1,"{C(=O)c2ccc(cc2c2ccccc2)C(=O)>}",227.700588 +553,*S(=O)(=O)NCCNS(=O)(=O)c1ccc(cc1)c1ccc(cc1)*,{},173.2454244 +554,*Oc1ccc(cc1)c1ccc(cc1)OC(=O)c1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)C(=O)*,{},213.4133554 +555,*c1[nH]c(cc1c1ccccc1)*,{},120.4503456 +556,*c1ccc(cc1)C1C(C(C1C(=O)OCC)*)C(=O)OCC,{},164.3224631 +557,*C(C(*)O)C,{$C(C)C(O)$},113.5665564 +558,*C(C*)C(CC)CC,{$CC(C(CC)CC)$},38.96888215 +559,*OC(=O)CCC(=O)OCCCCCCCCCCCCCCCCCCCC*,{},-32.76938912 +560,*C(C*)C(=O)N(CC)CC,{$CC(C(=O)N(CC)CC)$},56.77009786 +561,*NNC(=O)CCC(=O)NNC(=O)CCCCCCCCC(=O)*,{},64.69850401 +562,*NC(=O)CCC(=O)NCCCCCCCC*,{},69.22130195 +563,*NC(C(C(=O)*)(C)C)c1ccccc1,{},154.3595069 +564,*OC(=O)C/C=C/CC(=O)OCCCCCCCCCCCCCC*,{},-41.10158883 +565,*C(C*)(C(=O)OCC)CO,{$CC(C(=O)OCC)(CO)$},22.36004964 +566,*O[Si](CCCN=C1c2c(ccc(c2C(=NCCC[Si](*)(C)C)c2ccccc12)O)O)(C)C,{},51.3 +567,*Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1ccc(cc1)C(c1ccc(cc1)*)(c1ccc(cc1)O)C,{},255.52 +568,*OP(=O)(N=Nc1ccc(cc1)COC(=O)c1cc(cc(c1)C(C)(C)C)C(=O)OCc1ccc(cc1)N=NP(=O)(OCCCCCC*)OC)OC,{},62.39 +569,*c1cc2c(C(=O)N(C2=O)c2c(ccc(c2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)CP(=O)(OCC)OCC)cc1,{},264.06 +570,*Nc1c(cc(cc1)NC(=O)c1ccc(cc1)NC(=O)CCCCCCCCCCC(=O)Nc1ccc(cc1)C(=O)*)C(=O)OCCCCCCCCCCCCCCCC,{},187.43 +571,*C(C*)(C(=O)OCCF)C,{$CC(C(=O)OCCF)(C)$},76.42 +572,*N1C(=O)c2c(C1=O)cc(cc2)Oc1ccc(cc1)Oc1ccc(cc1)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)Sc2ccc(cc2)Oc2ccc(cc2)*)cc1,{},225.33 +573,*C(C*)c1ccc(cc1)C(=O)N(C)C,{$CC(c1ccc(C(=O)N(C)C)cc1)$},146.9 +574,*OC(=O)c1ccc(cc1)C(c1ccc(cc1)C(=O)*)(C)C,{},46.39 +575,*Oc1cc2c(cc1)ccc(c2)Oc1ccc(cc1)C(=O)Nc1cc(ccc1)NC(=O)c1ccc(cc1)*,{},204.47 +576,*SCC(=O)NCCCCCCNC(=O)C*,{},29.78 +577,*c1cc2C(c3c(c2cc1)ccc(c3)c1ccc(cc1)c1c(c(cc(c1)c1ccc(cc1)OCC(CCCC)CC)c1ccc(cc1)*)c1ccc(cc1)OCC(CCCC)CC)(CCCCCC)CCCCCC,{},123.52 +578,*OC(=O)COCC(=O)OCCCC*,{},27.22 +579,*C1(CCN(CC1)C(=O)C(CC(=O)N1CCC(CC1)(CCC*)C)C)C,{},111.94 +580,*c1nc2c(nc1)cc(cc2)Oc1cc2c(nc(cn2)c2ccc(cc2)*)cc1,{},381.02 +581,*C(C*)OC(=O)c1c(cccc1)C,{$CC(OC(=O)c1ccccc1C)$},81.84 +582,*C1(CC(c2c1cc(cc2)*)(C)C)C,{},261.41 +583,*c1cc2c(C(=O)N(C2=O)c2ccc3Cc4c(c3c2)cc(cc4)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)cc1,{},442.63 +584,*c1cc2c(C(=O)N(C2=O)c2c3c(ccc2)c(ccc3)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2nc(nc(n2)NC(=O)*)c2ccccc2)cc1,{},330.59 +585,*Oc1ccc(cc1)OC(=O)c1c(cc(cc1)C(=O)*)Sc1ccc(cc1)Cl,{},146.43 +586,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)CCCCCCCCCCCC*)cc1,{},80.63 +587,*Oc1ccc(cc1)OC(=O)c1ccc(cc1)C(=O)Oc1ccc(cc1)OCCCCC*,{},89.92 +588,*C1C(=O)N(C(=O)C1C(C*)c1ccccc1)CCCCCCCCCCCC,{},70.24 +589,*OC(=O)Nc1c(ccc(c1)NC(=O)OCCCCCCCC*)C,{},117.25 +590,*C(C*)(C(=O)OCCCCCCOc1ccc(cc1)C(=O)Oc1ccc2c(c1)oc(=O)cc2)C,{$CC(C(=O)OCCCCCCOc1ccc(C(=O)Oc2ccc3ccc(=O)oc3c2)cc1)(C)$},118.96 +591,*Oc1ccc(cc1)CC(NC(=O)Cc1ccc(cc1)OC(=O)CCCCCCC(=O)*)C(=O)OCC,{},66.4 +592,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2cc(ccc2)C(c2cc(ccc2)*)O[Si](O[Si](O[Si](C)(C)C)(C)C)(C)C)cc1,{},196.68 +593,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)C(=O)OCCOCCOCCOC(=O)*)cc1,{},79.22 +594,*Oc1ccc(cc1)C1(c2ccccc2c2ccccc12)c1ccc(cc1)Oc1c(cc(cc1)C(=O)c1ccc(cc1)C(=O)c1cc(c(cc1)*)C(F)(F)F)C(F)(F)F,{},265.8 +595,*O[Si](O[Si](O[Si](O[Si](CC[Si](O[Si](O[Si](O[Si](O[Si](CC[Si](*)(c1ccccc1)c1ccccc1)(C)C)(C)C)(C)C)(C)C)(C)C)(c1ccccc1)c1ccccc1)(c1ccccc1)c1ccccc1)(c1ccccc1)c1ccccc1)(c1ccccc1)c1ccccc1,{},11.62 +596,*N1C(=O)c2c(C1=O)c(ccc2)Oc1c(c(cc(c1)C(C)(C)C)C(C)(C)C)Oc1c2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)*)ccc1,"{N3C(=O)c4cccc(c4C3=O)Oc5c(cc(C(C)(C)C)cc5C(C)(C)C)Oc6cccc7c6C(=O)N(C7=O)>}",301.26 +597,*Oc1ccc(cc1)c1ccc(cc1)Oc1c(c(c(c(c1F)F)COC(c1cc(ccc1)C(OCc1c(c(c(c(c1F)F)*)F)F)(C(F)(F)F)C(F)(F)F)(C(F)(F)F)C(F)(F)F)F)F,{},139.73 +598,*S(=O)(=O)c1ccc(cc1)NC(=O)c1cc(cc(c1)NC(=O)c1ccc(cc1)NC(=O)C(N1C(=O)c2c(C1=O)cccc2)C)C(=O)Nc1ccc(cc1)*,{},272.65 +599,*NC(=O)NCc1ccc(cc1)CNC(=O)NCCCCCCCCCCCCCCCCCC*,{},62.4 +600,*C1CC2CC(CC(C1)O2)OC(=O)O*,{},196.52 +601,*C(C(*)C(=O)OC(C)(C)C)C(=O)OC,{$C(C(=O)OC)C(C(=O)OC(C)(C)C)$},148.77 +602,*Oc1ccc(cc1)c1ccc(cc1)C(=O)OCC(COC(=O)c1ccc(cc1)c1ccc(cc1)OC(CC*)C)C,{},116.96 +603,*N1C(=O)c2c(C1=O)cc(cc2)c1cc2c(C(=O)N(C2=O)c2c(cc(cc2C)C(c2cc(c(c(c2)C)*)C)c2c3c(ccc2)cccc3)C)cc1,{},369 +604,*c1cc2c(nc(c(n2)c2ccccc2)c2ccc(cc2)c2c(nc3c(n2)cc(cc3)C(=O)*)c2ccccc2)cc1,{},326.27 +605,*C(C*)(C(=O)OCC)F,{$CC(C(=O)OCC)(F)$},124.69 +606,*NC(=O)CCCCC(=O)NCC(CC(CC*)(C)C)C,{},54.82 +607,*c1c2C(=O)N(C(=O)c2c(c2ccccc12)c1ccc(cc1)Oc1ccc(cc1)C(=O)c1cc(ccc1)C(=O)c1ccc(cc1)Oc1ccc(cc1)*)CCCCCCCCCCCC,{},161.91 +608,*Oc1ccc(cc1)C(c1ccc(cc1)OC(=S)*)C,{},101.06 +609,*C(C*)OCC(CC)(C)C,{$CC(OCC(C)(C)CC)$},12.2 +610,*c1sc(cc1)C(=O)Oc1ccc(cc1)[Si](c1ccc(cc1)OC(=O)*)(CC)CC,{},-36.93 +611,*Oc1c(cc(cc1C)*)C(CCCCCCCCCCCC)C,{},44.22 +612,*Oc1cc(ccc1)C(C(C(c1cc(ccc1)OC(=O)c1cc(ccc1)C(C(C(c1cc(ccc1)C(=O)*)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F,{},105.91 +613,*C(C*)C(=O)Oc1ccc(cc1)C(=O)Oc1ccc(cc1)OC(=O)c1ccc(cc1)OCCCC,{$CC(C(=O)Oc1ccc(C(=O)Oc2ccc(OC(=O)c3ccc(OCCCC)cc3)cc2)cc1)$},42.05 +614,*C(C*)c1c(cccc1)C(=O)OCCC,{$CC(c1ccccc1C(=O)OCCC)$},135.21 +615,*N=P(*)(OCc1ccc(cc1)c1ccccc1)OCc1ccc(cc1)c1ccccc1,{},51.23 +616,*c1oc(nn1)c1ccc(cc1)C(=O)OCCCCCCOc1ccc(cc1)C=C1C(=O)C(=Cc2ccc(cc2)OCCCCCCOC(=O)c2ccc(cc2)*)CCC1,{},90.11 +617,*OC(=O)NCCCCCCNC(=O)OCCCCCCCCCCCC*,{},35.91 +618,*c1oc(nc1)c1cc(c(cc1)Oc1ccc(cc1)S(=O)(=O)c1ccc(cc1)Oc1c(cc(cc1)*)C(F)(F)F)C(F)(F)F,{},267.06 +619,*c1nc(nc(n1)c1ccc(cc1)Oc1ccc(cc1)C(c1ccc(cc1)Oc1ccc(cc1)*)(C(F)(F)F)C(F)(F)F)c1ccccc1,{},303.48 +620,*Oc1c(c(c(c(c1C)C)Oc1ccc(cc1)NC(=O)c1ccc(cc1)C(=O)Nc1ccc(cc1)*)C)C,{},337.62 +621,*N1C(=O)c2c(C1=O)c(ccc2)c1cc2c(C(=O)N(C2=O)c2cc(ccc2)*)cc1,"{N2C(=O)c3ccc(cc3C2=O)c4cccc5c4C(=O)N(C5=O)>}",348.28 +622,*N1C(=O)c2c(C1=O)cc(cc2)Oc1cc2c(C(=O)N(C2=O)c2cc(c(c(c2)Br)Oc2c(cc(cc2Br)*)Br)Br)cc1,{},302.26 +623,*N1C(=O)C2C3C4C(C(C2C1=O)C=C3)C(=O)N(C4=O)c1ccc(cc1)Sc1cc2c(C(=O)N(C2=O)c2ccc(cc2)CC)cc1Sc1ccc(cc1)*,{},307.1 +624,*C1C(C(C(C1)C=C*)(F)F)(C(F)(F)F)C(F)(F)F,{$C=CC1CC(C(C(F)(F)F)(C(F)(F)F)C1(F)F)$},163.71 +625,*OC(CCC(OC(=O)CCCCCCCCC(=O)*)C)C,"{C(=O)CCCCCCCCC(=O)>}",-18.21 +626,*c1ccc2c(nc([nH]2)c2cc(ccc2)c2nc3c([nH]2)ccc(c3)C(=O)Nc2cc(cc(c2)c2nc3c([nH]2)cccc3)NC(=O)*)c1,{},348.55 +627,*C(C*)C(=O)c1ccc(cc1)CC,{$CC(C(=O)c1ccc(CC)cc1)$},68.42 +628,*C1C(=O)N(C(=O)C1*)CCOc1ccc(cc1)c1ccc(cc1)C#N,{},158.36 +629,*C1COC2C1OCC2OC(=O)CCC(=O)O*,{},86.76 +630,*OC(=O)c1ccc(cc1)C(=O)OCCCCOC(=O)CCCCC(=O)OCCCC*,{},33.64 +631,*C(C*)c1ccc(cc1)COCC(CCCC)CC,{$CC(c1ccc(COCC(CC)CCCC)cc1)$},13.66 +632,*C(C*)(C(=O)Oc1cc(c(cc1)C(=O)c1ccccc1)O)C,{$CC(C(=O)Oc1ccc(C(=O)c2ccccc2)c(O)c1)(C)$},186.45 +633,*C(C*)C(=O)OCCOC(C(F)F)(F)F,{$CC(C(=O)OCCOC(F)(F)C(F)F)$},11.16 +634,*OC(=O)OCC(C(C*)O)O,{},98.39 +635,*Oc1ccc(cc1)NC(=O)c1cc(cc(c1)C(C)(C)C)C(=O)Nc1ccc(cc1)OCCCCCC*,{},158.08 +636,*N(c1ccc(cc1)C(c1ccc(cc1)*)c1ccccc1)c1ccc(cc1)C,{},263.16 +637,*Nc1ccc(cc1)C(c1ccc(cc1)NC(=O)c1cc(cc(c1)N1C(=O)C2C(C1=O)CC=CC2)C(=O)*)(C)C,{},358.53 +638,*C(C*)(CC(=O)OCCCc1ccccc1)C(=O)OCCCc1ccccc1,{$CC(C(=O)OCCCc1ccccc1)(CC(=O)OCCCc1ccccc1)$},29.9 +639,*C=CCCCC(CCC*)Cl,{$C=CCCCC(Cl)CCC$},-9.32 +640,*C1C(=O)N(C(=O)C1C(C*)(C)C)c1c(cccc1)C,{},286.47 +641,*C1(c2c(C(=O)O1)cccc2)c1ccc(cc1)Oc1c(c(c(c(c1F)F)C(=O)c1c(c(c(c(c1F)F)Oc1ccc(cc1)*)F)F)F)F,{},276.34 +642,*Nc1cc(ccc1)NC(=O)CCCCCCC(=O)*,{},128.17 +643,*C(C*)(C(=O)OCCOc1ccc(cc1)N=Nc1ccc(cc1)C#N)C,{$CC(C(=O)OCCOc1ccc(N=Nc2ccc(C#N)cc2)cc1)(C)$},141.68 +644,*N1C(=O)c2c(C1=O)cc(cc2)Oc1ccc(cc1)C1(CCC(CC1)c1ccccc1)c1ccc(cc1)Oc1cc2c(C(=O)N(C2=O)c2ccc(cc2)Cc2ccc(cc2)*)cc1,{},249.05 +645,*Oc1ccc(cc1)Oc1ccc(cc1)C(=O)c1c(c(c(c(c1c1ccc(cc1)F)c1ccc(cc1)F)c1ccc(cc1)F)c1ccc(cc1)F)C(=O)c1ccc(cc1)*,{},252.56 +646,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)NC(=O)Nc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)Nc2ccc(cc2)c2ccc(cc2)NC(=O)*)cc1,{},253.71 +647,*OC(C*)CCl,"{,}",2.36 +648,*NC(=O)c1cc(cc(c1)C(C)(C)C)C(=O)NCC(CCC(C*)C)C,{},157.37 +649,*c1cc2c(C(=O)N(C2=O)c2ccc(cc2)Oc2ccc(cc2)C(=O)c2cc(ccc2)C(=O)c2ccc(cc2)Oc2ccc(cc2)N2C(=O)c3c(C2=O)cc(cc3)C(=O)*)cc1,{},240.35 +650,*Oc1c(cc(cc1OC)C=Cc1ccc(cc1)C=Cc1cc(c(c(c1)OC)OCCCCCCCC*)OC)OC,{},65.39 +651,*Nc1c2c(ccc1)c(ccc2)NC(=O)c1cc(ccc1)C(=O)*,{},339.66 +652,*OS(=O)(=O)c1ccc(cc1)*,{},140.64 +653,*c1ccc2ccc3c(c2n1)nc(cc3)c1ccc(cc1)c1c(cc(c(c1)CCCCCC)c1ccc(cc1)*)CCCCCC,{},160.44 +654,*C(C*)(C(=O)OCCCCCCCCCCn1c2ccc(cc2c2ccccc12)N=Nc1ccc(cc1)[N+](=O)[O-])C,{$CC(C(=O)OCCCCCCCCCCn1c2ccccc2c2cc(N=Nc3ccc([N+](=O)[O-])cc3)ccc21)(C)$},60.81 +655,*N=Nc1ccc(cc1)NC(=O)CCC(=O)Nc1ccc(cc1)*,{},208.85 +656,*C(C*)c1cc(ccc1)Cl,{$CC(c1cccc(Cl)c1)$},122.57 +657,*Oc1ccc(cc1)C1(c2cc(ccc2c2ccc(cc12)[N+](=O)[O-])OC)c1ccc(cc1)OC(=O)CCCC(=O)*,{},195.71 +658,*c1cc2c(C(=O)N(C2=O)c2c(cc(c(c2C)C(=O)c2cc(ccc2)N2C(=O)c3c(C2=O)cc(cc3)C(*)(C(F)(F)F)C(F)(F)F)C)C)cc1,{},280.09 +659,*Oc1ccc(cc1)Oc1ccc(cc1)C(=O)c1cc(ccc1)NC(=O)c1cc(ccc1)C(=O)Nc1cc(ccc1)C(=O)c1ccc(cc1)*,{},222.22 +660,*Oc1ccc(cc1)CC(NC(=O)CCc1ccc(cc1)OC(=O)CCCC(=O)*)C(=O)OCCOCCOCC,{},42.34 +661,*OP(=O)(OCCCCCCCCCCOc1ccc(cc1)C=Cc1ccc(cc1)OCCCCCCCCCC*)OCCCCCCCCCCOc1ccc(cc1)N=Nc1ccc(cc1)C#N,{},51.61 diff --git a/test/test_tokenize_bigsmiles.py b/test/test_tokenize_bigsmiles.py index 9633ce6..f455aa8 100644 --- a/test/test_tokenize_bigsmiles.py +++ b/test/test_tokenize_bigsmiles.py @@ -112,18 +112,6 @@ def _roundtrip_fixtures() -> list[tuple[str, list[tuple[str, str]]]]: "A([$1[Inner]1])R(A'[$1[Inner]1])(A[$1[Inner]2])A'[$1[Inner]2]", 9, ), - ( - "A([$1[$1]1])R(A'[$1[$1]1])(A[$1[$1]2])A'[$1[$1]2]", - 5, - ), - ( - "A([$1[$1]1])R(A'[$1[$2]1])(A[$1[$1]2])A'[$1[$2]2]", - 5, - ), - ( - "A([$1[<1]1])R(A'[$1[>1]1])(A[$1[<1]2])A'[$1[>1]2]", - 5, - ), ] BARE_LABEL_BIGSMILES_WITH_DEFINITIONS = [ ( From a6e503a32689a7b66ff9274e0efdd8ed8b503df7 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Tue, 28 Apr 2026 18:22:52 -0400 Subject: [PATCH 25/52] add fragment label handling in docs notebook --- docs/big_smirk_demo.ipynb | 74 ++++++++++++++++++++++----- src/pre_tokenizers/split_bigsmiles.rs | 12 ++--- 2 files changed, 66 insertions(+), 20 deletions(-) diff --git a/docs/big_smirk_demo.ipynb b/docs/big_smirk_demo.ipynb index c29bea5..0024dea 100644 --- a/docs/big_smirk_demo.ipynb +++ b/docs/big_smirk_demo.ipynb @@ -106,9 +106,7 @@ "id": "7", "metadata": {}, "source": [ - "### Token Coloring Render\n", - "\n", - "Visualize BigSMILES token boundaries for PVC (Polyvinyl chloride ) and sPP (Syndiotactic Polypropylene) by coloring each token in sequence." + "Let's visualize BigSMILES token boundaries for PVC (Polyvinyl chloride ) and sPP (Syndiotactic Polypropylene) by coloring each token in sequence." ] }, { @@ -159,13 +157,61 @@ "cell_type": "markdown", "id": "10", "metadata": {}, + "source": [ + "### Handling the Fragment Name Definition Notation\n", + "\n", + "The BigSMILES line notation allows some portions of the BigSMILES representations be replaced by more abstract but compact proxy [fragment names], for example, the names of repeating units.\n", + "The smirk BigSMILES tokenizers handles this `'[' + '#' + fragment_name + ']'` syntax by replacing the fragment name with its definition and then tokenizing the expanded BigSMILES.\n", + "The definition should be provided within the BigSMILES as specified by the line notation i.e.:\n", + "`BigSMILES_string + '[' + '#' + fragment_name + ']' + BigSMILES_string + '.' + '{' + '#' + fragment_name + '=' + BigSMILES_string + '}'`\n", + "\n", + "For example:\n", + "```\n", + "C([#Arm1])([#Arm2]).{#Arm1=CO}.{#Arm2=N} --> C(CO)(N) ---> 'C', '(', 'C', 'O', ')', '(', 'N', ')'\n", + "```\n", + "More examples of valid and invalid use of the fragment name definition notation are provided below.\n", + "[fragment names]: (https://olsenlabmit.github.io/BigSMILES/docs/line_notation.html#simplifications-and-abbreviations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "abstract_label_examples = {\n", + " \"defined [#label] placeholder\": \"C([#Arm])([#Arm])([#Arm])[#Arm].{#Arm=CO{[<][>]CCO[<][>]}}\",\n", + " \"multiple [#label] placeholders\": \"C([#Arm1])([#Arm2]).{#Arm1=CO}.{#Arm2=N}\",\n", + " \"[#label] in stochastic object\": \"{[][$]CC(C)([#Side])[$][]}.{#Side=C(=O)OCC}\",\n", + " \"undefined [#label] placeholder\": \"C([#Arm])([#Arm])\",\n", + " \"bare labels stay unknown\": r\"A([$1[<1]1])R(A'[$1[>1]1]).{#A=C}.{#R=C}\",\n", + "}\n", + "\n", + "for label, text in abstract_label_examples.items():\n", + " encoded = bigsmirk(text, add_special_tokens=False)\n", + " tokens = bigsmirk.convert_ids_to_tokens(encoded[\"input_ids\"])\n", + " decoded = bigsmirk.decode(encoded[\"input_ids\"], skip_special_tokens=True)\n", + "\n", + " print(label)\n", + " print(\"input:\", text)\n", + " print(\"tokens:\", tokens)\n", + " print(\"unknowns:\", tokens.count(bigsmirk.unk_token))\n", + " print(\"decoded:\", decoded)\n", + " print()\n" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, "source": [ "## Zero to Polymer Foundation Model with Smirk!" ] }, { "cell_type": "markdown", - "id": "11", + "id": "13", "metadata": { "editable": true, "slideshow": { @@ -183,7 +229,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "14", "metadata": { "editable": true, "slideshow": { @@ -201,7 +247,7 @@ }, { "cell_type": "markdown", - "id": "13", + "id": "15", "metadata": { "editable": true, "slideshow": { @@ -226,7 +272,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "16", "metadata": { "editable": true, "slideshow": { @@ -249,7 +295,7 @@ }, { "cell_type": "markdown", - "id": "15", + "id": "17", "metadata": { "editable": true, "slideshow": { @@ -267,7 +313,7 @@ { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "18", "metadata": { "editable": true, "slideshow": { @@ -282,7 +328,7 @@ }, { "cell_type": "markdown", - "id": "17", + "id": "19", "metadata": { "editable": true, "slideshow": { @@ -298,7 +344,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "20", "metadata": { "editable": true, "slideshow": { @@ -335,7 +381,7 @@ { "cell_type": "code", "execution_count": null, - "id": "19", + "id": "21", "metadata": { "editable": true, "slideshow": { @@ -352,7 +398,7 @@ }, { "cell_type": "markdown", - "id": "20", + "id": "22", "metadata": {}, "source": [ "### MLM Example: Predict a Masked Token\n", @@ -363,7 +409,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21", + "id": "23", "metadata": {}, "outputs": [], "source": [ diff --git a/src/pre_tokenizers/split_bigsmiles.rs b/src/pre_tokenizers/split_bigsmiles.rs index 0b6bf4d..6f20fb0 100644 --- a/src/pre_tokenizers/split_bigsmiles.rs +++ b/src/pre_tokenizers/split_bigsmiles.rs @@ -38,12 +38,12 @@ const BRACKETED_SYMBOL: &'static str = concat!( const CHIRAL: &'static str = r"@(?:@|AL|OH|SP|T(?:B|H))?"; pub const MATCH_OUTER_BIGSMILES: &'static str = concat!( - r"Br?|Cl?|F|I|N|O|P|S|", // organic subset elements - r"b|c|n|o|p|s|", // Aromatic organic subset - r"\*|", // Wildcard - r"[\.\-=\#\$:/\\]|", // Bonds - r"\d|%|", // Ring closures - r"\(|\)|", // Branch delimiters in SMILES and BigSMILES + r"Br?|Cl?|F|I|N|O|P|S|", // organic subset elements + r"b|c|n|o|p|s|", // Aromatic organic subset + r"\*|", // Wildcard + r"[\.\-=\#\$:/\\]|", // Bonds + r"\d|%|", // Ring closures + r"\(|\)|", // Branch delimiters in SMILES and BigSMILES r"\{|\}|", // Stochastic object delimiters r",|;|", // Repeat unit separator and end group separator r"[A-Z][A-Za-z0-9']*|", // Bare spec labels From 45e0e2a86e0b1d4d7389f6a7e0ba040a998294d5 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Wed, 6 May 2026 14:34:35 -0400 Subject: [PATCH 26/52] fix: link to fragment names in notebook md --- docs/big_smirk_demo.ipynb | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/docs/big_smirk_demo.ipynb b/docs/big_smirk_demo.ipynb index 0024dea..70512ce 100644 --- a/docs/big_smirk_demo.ipynb +++ b/docs/big_smirk_demo.ipynb @@ -170,7 +170,8 @@ "C([#Arm1])([#Arm2]).{#Arm1=CO}.{#Arm2=N} --> C(CO)(N) ---> 'C', '(', 'C', 'O', ')', '(', 'N', ')'\n", "```\n", "More examples of valid and invalid use of the fragment name definition notation are provided below.\n", - "[fragment names]: (https://olsenlabmit.github.io/BigSMILES/docs/line_notation.html#simplifications-and-abbreviations)" + "\n", + "[fragment names]: https://olsenlabmit.github.io/BigSMILES/docs/line_notation.html#simplifications-and-abbreviations" ] }, { @@ -463,18 +464,6 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.8" } }, "nbformat": 4, From 0594550dcfcfe151b38f90bdd6e577f182b246cf Mon Sep 17 00:00:00 2001 From: Alexius Wadell Date: Sun, 10 May 2026 17:27:46 -0600 Subject: [PATCH 27/52] fix dependabot --- .github/dependabot.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml index 8244ed7..141a33f 100644 --- a/.github/dependabot.yaml +++ b/.github/dependabot.yaml @@ -4,8 +4,12 @@ updates: directory: / schedule: interval: monthly - versioning-strategy: increase + versioning-strategy: increase-if-necessary - package-ecosystem: pip directory: / schedule: interval: monthly + - package-ecosystem: github-actions + directory: / + schedule: + interval: monthly From 8cdf3336ae6c65e4a39ad4c22d3293edf6514f04 Mon Sep 17 00:00:00 2001 From: Alexius Wadell Date: Sun, 10 May 2026 17:52:11 -0600 Subject: [PATCH 28/52] use default versioning-strategy --- .github/dependabot.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml index 141a33f..619bbe7 100644 --- a/.github/dependabot.yaml +++ b/.github/dependabot.yaml @@ -4,7 +4,6 @@ updates: directory: / schedule: interval: monthly - versioning-strategy: increase-if-necessary - package-ecosystem: pip directory: / schedule: From 01e6fca9a9fca954727bf2984470aa5aa2fdbd73 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 23:53:40 +0000 Subject: [PATCH 29/52] Bump rdkit from 2024.9.5 to 2026.3.1 Bumps [rdkit](https://github.com/rdkit/rdkit) from 2024.9.5 to 2026.3.1. - [Release notes](https://github.com/rdkit/rdkit/releases) - [Changelog](https://github.com/rdkit/rdkit/blob/master/ReleaseNotes.md) - [Commits](https://github.com/rdkit/rdkit/commits) --- updated-dependencies: - dependency-name: rdkit dependency-version: 2026.3.1 dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 510439a..7b69c99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ docs = [ "accelerate~=1.3", "datasets~=3.3", "torch~=2.0", - "rdkit==2024.9.5", + "rdkit==2026.3.1", "transformers~=4.48.2", ] From 8925a271927082b484b1e4d96ab95ca1bce7c046 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 23:52:59 +0000 Subject: [PATCH 30/52] Bump actions/upload-artifact from 5 to 7 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 5 to 7. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v5...v7) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-version: '7' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/CD.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CD.yml b/.github/workflows/CD.yml index 07bad33..870f73d 100644 --- a/.github/workflows/CD.yml +++ b/.github/workflows/CD.yml @@ -43,7 +43,7 @@ jobs: sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} manylinux: auto - name: Upload wheels - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v7 with: name: wheels-linux-${{ matrix.platform.target }} path: dist @@ -71,7 +71,7 @@ jobs: args: --release --out dist --find-interpreter sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} - name: Upload wheels - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v7 with: name: wheels-windows-${{ matrix.platform.target }} path: dist @@ -96,7 +96,7 @@ jobs: args: --release --out dist --find-interpreter sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} - name: Upload wheels - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v7 with: name: wheels-macos-${{ matrix.platform.target }} path: dist @@ -110,7 +110,7 @@ jobs: command: sdist args: --out dist - name: Upload sdist - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v7 with: name: wheels-sdist path: dist From a9032f5d7ee437617708bc8d0462b29ff2fbce5d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 23:53:02 +0000 Subject: [PATCH 31/52] Bump actions/attest-build-provenance from 3 to 4 Bumps [actions/attest-build-provenance](https://github.com/actions/attest-build-provenance) from 3 to 4. - [Release notes](https://github.com/actions/attest-build-provenance/releases) - [Changelog](https://github.com/actions/attest-build-provenance/blob/main/RELEASE.md) - [Commits](https://github.com/actions/attest-build-provenance/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/attest-build-provenance dependency-version: '4' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/CD.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CD.yml b/.github/workflows/CD.yml index 870f73d..468932e 100644 --- a/.github/workflows/CD.yml +++ b/.github/workflows/CD.yml @@ -133,7 +133,7 @@ jobs: steps: - uses: actions/download-artifact@v6 - name: Generate artifact attestation - uses: actions/attest-build-provenance@v3 + uses: actions/attest-build-provenance@v4 with: subject-path: wheels-*/* - name: Install uv From 973612835769649b1fc29fa19092c43d394e1e0b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 23:53:05 +0000 Subject: [PATCH 32/52] Bump actions/upload-pages-artifact from 4 to 5 Bumps [actions/upload-pages-artifact](https://github.com/actions/upload-pages-artifact) from 4 to 5. - [Release notes](https://github.com/actions/upload-pages-artifact/releases) - [Commits](https://github.com/actions/upload-pages-artifact/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/upload-pages-artifact dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index aecb352..2feb601 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -17,7 +17,7 @@ jobs: cache-dependency-glob: pyproject.toml - run: uv sync --group docs - run: uv run --group docs --with pip sphinx-build docs build/html - - uses: actions/upload-pages-artifact@v4 + - uses: actions/upload-pages-artifact@v5 with: path: build/html/ deploy: From 4b32738dbfcfac9d62469f955add94cabfe17ac8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 23:53:08 +0000 Subject: [PATCH 33/52] Bump actions/checkout from 3 to 6 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 6. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v6) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/CI.yaml | 6 +++--- .github/workflows/docs.yml | 2 +- .github/workflows/pre-commit.yml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index 42e2f40..ff14454 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -9,14 +9,14 @@ jobs: name: Check runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - run: cargo check test: name: Test Suite runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - run: cargo test --benches --verbose --all @@ -24,7 +24,7 @@ jobs: name: Test Python runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install uv and set the python version uses: astral-sh/setup-uv@v5 with: diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2feb601..f15c863 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,7 +9,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: astral-sh/setup-uv@v5 with: python-version: "3.10" diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 89ff056..13047a9 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -8,7 +8,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v6 - uses: actions/setup-python@v3 - uses: astral-sh/setup-uv@v5 with: From 46bf9a1c20d6b4711eddd6965aa7b0aefe1e9612 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 23:53:11 +0000 Subject: [PATCH 34/52] Bump actions/deploy-pages from 4 to 5 Bumps [actions/deploy-pages](https://github.com/actions/deploy-pages) from 4 to 5. - [Release notes](https://github.com/actions/deploy-pages/releases) - [Commits](https://github.com/actions/deploy-pages/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/deploy-pages dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f15c863..0914d91 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -33,4 +33,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v4 + uses: actions/deploy-pages@v5 From 6e2b2f0e8c7449119cc7db756dee134c1690eac0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 23:53:33 +0000 Subject: [PATCH 35/52] Bump furo from 2024.8.6 to 2025.12.19 Bumps [furo](https://github.com/pradyunsg/furo) from 2024.8.6 to 2025.12.19. - [Release notes](https://github.com/pradyunsg/furo/releases) - [Changelog](https://github.com/pradyunsg/furo/blob/main/docs/changelog.md) - [Commits](https://github.com/pradyunsg/furo/compare/2024.08.06...2025.12.19) --- updated-dependencies: - dependency-name: furo dependency-version: 2025.12.19 dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7b69c99..f268996 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dev = [ "maturin~=1.7", ] docs = [ - "furo==2024.8.6", + "furo==2025.12.19", "jupyter~=1.1", "myst-nb~=1.2.0", "sphinx~=7.4", From cb4d2d046134ecbb5e3e81caab2c880f6e048c9e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 00:20:37 +0000 Subject: [PATCH 36/52] Update datasets requirement from ~=3.3 to >=3.3,<5.0 Updates the requirements on [datasets](https://github.com/huggingface/datasets) to permit the latest version. - [Release notes](https://github.com/huggingface/datasets/releases) - [Commits](https://github.com/huggingface/datasets/compare/3.3.0...4.5.0) --- updated-dependencies: - dependency-name: datasets dependency-version: 4.5.0 dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f268996..16f967a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ docs = [ "sphinx-autobuild==2024.10.3", "sphinx-copybutton==0.5.2", "accelerate~=1.3", - "datasets~=3.3", + "datasets>=3.3,<5.0", "torch~=2.0", "rdkit==2026.3.1", "transformers~=4.48.2", From e53c4260257a9c1bc49029e0e26f93bc674dba16 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 00:20:25 +0000 Subject: [PATCH 37/52] Update transformers requirement from ~=4.48.2 to >=4.48.2,<4.58.0 Updates the requirements on [transformers](https://github.com/huggingface/transformers) to permit the latest version. - [Release notes](https://github.com/huggingface/transformers/releases) - [Commits](https://github.com/huggingface/transformers/compare/v4.48.2...v4.57.6) --- updated-dependencies: - dependency-name: transformers dependency-version: 4.57.6 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 16f967a..5e3dc37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ docs = [ "datasets>=3.3,<5.0", "torch~=2.0", "rdkit==2026.3.1", - "transformers~=4.48.2", + "transformers>=4.48.2,<4.58.0", ] [build-system] From b8c7f43bb0e8f2e42217a8dc423a891ba98b13f3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 23:53:17 +0000 Subject: [PATCH 38/52] Update pyo3 requirement from ^0.27 to ^0.28 Updates the requirements on [pyo3](https://github.com/pyo3/pyo3) to permit the latest version. - [Release notes](https://github.com/pyo3/pyo3/releases) - [Changelog](https://github.com/PyO3/pyo3/blob/main/CHANGELOG.md) - [Commits](https://github.com/pyo3/pyo3/compare/v0.27.0...v0.28.3) --- updated-dependencies: - dependency-name: pyo3 dependency-version: 0.28.3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 84e8226..c00e7ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ either = "1.13.0" macro_rules_attribute = "0.2.0" once_cell = "1.19.0" paste = "1.0.14" -pyo3 = { version = "^0.27", features = ["extension-module"] } +pyo3 = { version = "^0.28", features = ["extension-module"] } regex = "1.10.3" serde = "1.0.197" serde_json = "1.0.114" From 9e57f5192c2bf1cd9228abf36bc846f1191423d5 Mon Sep 17 00:00:00 2001 From: Alexius Wadell Date: Sun, 10 May 2026 18:57:11 -0600 Subject: [PATCH 39/52] fix: renamed allow_threads -> detach --- src/tokenizer.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7d6b3c9..17d8632 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -143,7 +143,7 @@ impl SmirkTokenizer { .map(|x| EncodeInput::from(x.to_string())) .collect(); // Release the GIL while tokenizing batch - let out = py.allow_threads(|| { + let out = py.detach(|| { self.tokenizer .encode_batch_char_offsets(inputs, add_special_tokens) .unwrap() @@ -161,7 +161,7 @@ impl SmirkTokenizer { ids: Vec>, skip_special_tokens: bool, ) -> PyResult> { - py.allow_threads(|| { + py.detach(|| { let sequences = ids.iter().map(|x| &x[..]).collect::>(); Ok(self .tokenizer @@ -446,7 +446,7 @@ impl SmirkTokenizer { // Train tokenizer let mut trainer: TrainerWrapper = builder.build().unwrap().into(); - let _ = py.allow_threads(|| tokenizer.train_from_files(&mut trainer, files).unwrap()); + let _ = py.detach(|| tokenizer.train_from_files(&mut trainer, files).unwrap()); Ok(SmirkTokenizer::new(tokenizer)) } } From 2003b9c91fd8198688a62ba2a566b77060f9c795 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 01:05:44 +0000 Subject: [PATCH 40/52] Update tokenizers requirement from ^0.21 to ^0.23 Updates the requirements on [tokenizers](https://github.com/huggingface/tokenizers) to permit the latest version. - [Release notes](https://github.com/huggingface/tokenizers/releases) - [Changelog](https://github.com/huggingface/tokenizers/blob/main/RELEASE.md) - [Commits](https://github.com/huggingface/tokenizers/compare/v0.21.0...v0.23.1) --- updated-dependencies: - dependency-name: tokenizers dependency-version: 0.23.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c00e7ad..78db2dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ regex = "1.10.3" serde = "1.0.197" serde_json = "1.0.114" serde_with = "3.8.0" -tokenizers = { version = "^0.21"} +tokenizers = { version = "^0.23"} [dev-dependencies] tempfile = "3.10.1" From 9c9014b1e240cff4a1cdf55238c0dc760c3c8c8c Mon Sep 17 00:00:00 2001 From: Alexius Wadell Date: Sun, 10 May 2026 19:11:24 -0600 Subject: [PATCH 41/52] fix: token addition for tokenizer v0.23 --- src/tokenizer.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 17d8632..82bfcae 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -361,7 +361,9 @@ impl SmirkTokenizer { }) }) .collect::, PyErr>>()?; - Ok(self.tokenizer.add_tokens(&tokens)) + self.tokenizer + .add_tokens(tokens) + .map_err(|e| PyValueError::new_err(e.to_string())) } #[pyo3(signature = (files, **kwargs))] From fb1d4bb70bdea7fc2b0f9f4c0d278e73c35f9d80 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 01:19:35 +0000 Subject: [PATCH 42/52] Bump actions/download-artifact from 6 to 8 Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 6 to 8. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v6...v8) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-version: '8' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/CD.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CD.yml b/.github/workflows/CD.yml index 468932e..5365fd3 100644 --- a/.github/workflows/CD.yml +++ b/.github/workflows/CD.yml @@ -131,7 +131,7 @@ jobs: # Used to generate artifact attestation attestations: write steps: - - uses: actions/download-artifact@v6 + - uses: actions/download-artifact@v8 - name: Generate artifact attestation uses: actions/attest-build-provenance@v4 with: From cebed2102987d41995395dd40b1ed7c9bc9b6a2f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 01:19:39 +0000 Subject: [PATCH 43/52] Bump astral-sh/setup-uv from 5 to 7 Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 5 to 7. - [Release notes](https://github.com/astral-sh/setup-uv/releases) - [Commits](https://github.com/astral-sh/setup-uv/compare/v5...v7) --- updated-dependencies: - dependency-name: astral-sh/setup-uv dependency-version: '7' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/CI.yaml | 2 +- .github/workflows/docs.yml | 2 +- .github/workflows/pre-commit.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index ff14454..4c2778e 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -26,7 +26,7 @@ jobs: steps: - uses: actions/checkout@v6 - name: Install uv and set the python version - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v7 with: python-version: "3.10" enable-cache: true diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0914d91..f67969a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - - uses: astral-sh/setup-uv@v5 + - uses: astral-sh/setup-uv@v7 with: python-version: "3.10" enable-cache: true diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 13047a9..b8a774d 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -10,7 +10,7 @@ jobs: steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v3 - - uses: astral-sh/setup-uv@v5 + - uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: pyproject.toml From 7787b31deb5fde711616990174226bc1a33dd9c0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 01:19:43 +0000 Subject: [PATCH 44/52] Update myst-nb requirement from ~=1.2.0 to >=1.2,<1.4 Updates the requirements on [myst-nb](https://github.com/executablebooks/myst-nb) to permit the latest version. - [Release notes](https://github.com/executablebooks/myst-nb/releases) - [Changelog](https://github.com/executablebooks/MyST-NB/blob/main/CHANGELOG.md) - [Commits](https://github.com/executablebooks/myst-nb/compare/v1.2.0...v1.3.0) --- updated-dependencies: - dependency-name: myst-nb dependency-version: 1.3.0 dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5e3dc37..4c7bf64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dev = [ docs = [ "furo==2025.12.19", "jupyter~=1.1", - "myst-nb~=1.2.0", + "myst-nb>=1.2,<1.4", "sphinx~=7.4", "sphinx-argparse==0.4.0", "sphinx-autobuild==2024.10.3", From fc69a89670347f94d6a8b51bc03d260be0e7ecbb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 01:19:54 +0000 Subject: [PATCH 45/52] Update pre-commit requirement from ~=4.1.0 to >=4.1,<4.4 Updates the requirements on [pre-commit](https://github.com/pre-commit/pre-commit) to permit the latest version. - [Release notes](https://github.com/pre-commit/pre-commit/releases) - [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md) - [Commits](https://github.com/pre-commit/pre-commit/compare/v4.1.0...v4.3.0) --- updated-dependencies: - dependency-name: pre-commit dependency-version: 4.3.0 dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4c7bf64..a31bf08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dev = [ "ruff~=0.9.4", "torch~=2.0", "numpy~=2.0", - "pre-commit~=4.1.0", + "pre-commit>=4.1,<4.4", "maturin~=1.7", ] docs = [ From 79f7f09cb32f85647a45595a0cc1006de81f612f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 03:20:56 +0000 Subject: [PATCH 46/52] Bump actions/setup-python from 3 to 6 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 3 to 6. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v3...v6) --- updated-dependencies: - dependency-name: actions/setup-python dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/pre-commit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index b8a774d..5ba8c99 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v6 - uses: astral-sh/setup-uv@v7 with: enable-cache: true From ace7e79f3f684250bb817061f0a7344cb0b07beb Mon Sep 17 00:00:00 2001 From: Alexius Wadell Date: Sun, 10 May 2026 21:27:15 -0600 Subject: [PATCH 47/52] doc: dep bump changelog entry --- CHANGELOG.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bc46ff..2d06d72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased -### Added +### Changed -- `SmirkBigSmilesFast` Tokenizer for BigSMILES line notation representation of polymers ([#8](https://github.com/BattModels/smirk/pull/8)) +- Bumped GitHub Actions, Python, Rust, and documentation dependencies ([#10](https://github.com/BattModels/smirk/pull/10) -- [#24](https://github.com/BattModels/smirk/pull/24)) ### Fixed -- Build issue due to leading `./` in included file paths ([#7](https://github.com/BattModels/smirk/pull/7)) +- Build issue due to leading `./` in included file paths ([#7](https://github.com/BattModels/smirk/pull/7)) +- Fixed Dependabot configuration ([#9](https://github.com/BattModels/smirk/pull/9) ## [v0.2.0](https://github.com/BattModels/smirk/tree/v0.2.0) From 4f96b39c18417e8ab0347a13597750edfbc41c70 Mon Sep 17 00:00:00 2001 From: Alexius Wadell Date: Sun, 10 May 2026 21:33:37 -0600 Subject: [PATCH 48/52] fix: typo in CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d06d72..a2afb16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Build issue due to leading `./` in included file paths ([#7](https://github.com/BattModels/smirk/pull/7)) -- Fixed Dependabot configuration ([#9](https://github.com/BattModels/smirk/pull/9) +- Fixed Dependabot configuration ([#9](https://github.com/BattModels/smirk/pull/9)) ## [v0.2.0](https://github.com/BattModels/smirk/tree/v0.2.0) From 8dd8594dfe1cf42c36500c384337ea392318bb1a Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sun, 26 Apr 2026 13:31:17 -0400 Subject: [PATCH 49/52] remove redundant jupyter notebook dep --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a31bf08..dcb8a7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,4 +53,4 @@ requires = ["maturin~=1.7"] [tool.maturin] python-source = "python" -include = [ "python/smirk/vocab_smiles.json", "python/smirk/vocab_bigsmiles.json", "python/smirk/vocab_selfies.json" ] +include = [ "python/smirk/vocab_smiles.json", "python/smirk/vocab_bigsmiles.json", "python/smirk/vocab_selfies.json" ] \ No newline at end of file From 072723030408b145cdd4599017ce4a8549f25476 Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Sun, 26 Apr 2026 13:37:08 -0400 Subject: [PATCH 50/52] add BigSMILES tokenizer to changelog --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2afb16..083758c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- `SmirkBigSmilesFast` Tokenizer for BigSMILES line notation representation of polymers ([#8](https://github.com/BattModels/smirk/pull/8)) + ### Changed - Bumped GitHub Actions, Python, Rust, and documentation dependencies ([#10](https://github.com/BattModels/smirk/pull/10) -- [#24](https://github.com/BattModels/smirk/pull/24)) ### Fixed -- Build issue due to leading `./` in included file paths ([#7](https://github.com/BattModels/smirk/pull/7)) +- Build issue due to leading `./` in included file paths ([#7](https://github.com/BattModels/smirk/pull/7)) - Fixed Dependabot configuration ([#9](https://github.com/BattModels/smirk/pull/9)) ## [v0.2.0](https://github.com/BattModels/smirk/tree/v0.2.0) From 54caede634d8a9a48b147cd8cf03b3269e0b3c0e Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Wed, 13 May 2026 13:42:32 -0400 Subject: [PATCH 51/52] add warning about predefined Common Repeat Units --- pyproject.toml | 2 +- python/smirk/__init__.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dcb8a7c..a31bf08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,4 +53,4 @@ requires = ["maturin~=1.7"] [tool.maturin] python-source = "python" -include = [ "python/smirk/vocab_smiles.json", "python/smirk/vocab_bigsmiles.json", "python/smirk/vocab_selfies.json" ] \ No newline at end of file +include = [ "python/smirk/vocab_smiles.json", "python/smirk/vocab_bigsmiles.json", "python/smirk/vocab_selfies.json" ] diff --git a/python/smirk/__init__.py b/python/smirk/__init__.py index ba40f96..11e5376 100644 --- a/python/smirk/__init__.py +++ b/python/smirk/__init__.py @@ -347,6 +347,14 @@ def __init__(self, tokenizer_file: Optional[os.PathLike] = None, **kwargs): A Chemically-Complete Tokenizer for core BigSMILES line notation. For a specification of of the reference see: https://olsenlabmit.github.io/BigSMILES/docs/line_notation.html. + + .. warning:: + SmirkBigSmilesFast supports explicit BigSMILES fragment definitions + such as ``[#R].{#R=...}``, but it does not load or expand the + predefined Common Repeat Unit table from the BigSMILES v1.1 + documentation. Common repeat unit placeholders must therefore be + defined explicitly. + :param tokenizer_file: Path to a JSON serialize SmirkTokenizerFast tokenizers :param kwargs: Additional kwargs are passed to :py:class:`SmirkTokenizerFast` """ From 39a2197aa0e2795a09d08b0b77e93d29274c7bfd Mon Sep 17 00:00:00 2001 From: Anoushka Bhutani Date: Wed, 13 May 2026 13:56:02 -0400 Subject: [PATCH 52/52] deseralizer rejects all except v1.1 --- src/pre_tokenizers/bigsmirk.rs | 39 +++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/src/pre_tokenizers/bigsmirk.rs b/src/pre_tokenizers/bigsmirk.rs index d60c0c6..5887f10 100644 --- a/src/pre_tokenizers/bigsmirk.rs +++ b/src/pre_tokenizers/bigsmirk.rs @@ -102,13 +102,14 @@ impl<'de> Visitor<'de> for BigSmirkPreTokenizerVisitor { let mut outer: Option = None; let mut inner: Option = None; let mut type_field: Option = None; + let mut bigsmiles_version: Option = None; while let Some(key) = map.next_key::()? { match key.as_ref() { "type" => { type_field = Some(map.next_value()?); } "bigsmiles_version" => { - let _: serde::de::IgnoredAny = map.next_value()?; + bigsmiles_version = Some(map.next_value()?); } "outer" => { if let Some(x) = map.next_value()? { @@ -133,6 +134,18 @@ impl<'de> Visitor<'de> for BigSmirkPreTokenizerVisitor { )); } } + match bigsmiles_version.as_deref() { + Some(BigSmirkPreTokenizer::BIGSMILES_VERSION) => {} + Some(version) => { + return Err(serde::de::Error::invalid_value( + serde::de::Unexpected::Str(version), + &"BigSMILES version `1.1`", + )); + } + None => { + return Err(serde::de::Error::missing_field("bigsmiles_version")); + } + } Ok(BigSmirkPreTokenizer::new( outer.expect("Missing `outer`").as_str(), inner.expect("Missing `inner`").as_str(), @@ -386,6 +399,30 @@ pub mod tests { ); } + #[test] + fn rejects_missing_bigsmiles_version() { + let mut value = serde_json::to_value(BigSmirkPreTokenizer::default()).unwrap(); + value.as_object_mut().unwrap().remove("bigsmiles_version"); + + let err = serde_json::from_value::(value).unwrap_err(); + assert!(err + .to_string() + .contains("missing field `bigsmiles_version`")); + } + + #[test] + fn rejects_unsupported_bigsmiles_versions() { + for version in ["1.0", "1.2", "2.0", "not-a-version"] { + let mut value = serde_json::to_value(BigSmirkPreTokenizer::default()).unwrap(); + value["bigsmiles_version"] = serde_json::Value::String(version.to_string()); + + let err = serde_json::from_value::(value).unwrap_err(); + let message = err.to_string(); + assert!(message.contains(&format!("invalid value: string \"{}\"", version))); + assert!(message.contains("expected BigSMILES version `1.1`")); + } + } + #[test] fn serialize_pretok() { let pretok = BigSmirkPreTokenizer::new(r".|\[.*?]", ".");