diff --git a/apps/internal/chemistry/bcl_app_link_fragments.cpp b/apps/internal/chemistry/bcl_app_link_fragments.cpp index 39c925b26..576326b29 100644 --- a/apps/internal/chemistry/bcl_app_link_fragments.cpp +++ b/apps/internal/chemistry/bcl_app_link_fragments.cpp @@ -258,7 +258,7 @@ namespace bcl command::Parameter ( "type", "the type of druglikenes to use", - command::ParameterCheckAllowed( storage::Vector< std::string>::Create( "IsConstitutionDruglike", "IsConstitutionDruglikeAndHitlike", "None")), + command::ParameterCheckAllowed( storage::Vector< std::string>::Create( "IsConstitutionDruglike", "IsConstitutionDruglikeAndHitlike", "Constant(1.0)")), "IsConstitutionDruglike" ) ) @@ -1526,7 +1526,7 @@ namespace bcl ( cleaner.Clean ( - new_frag_v, REFERENCE, m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue() + new_frag_v, REFERENCE, descriptor::CheminfoProperty( m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue()) ) ); return clean_frag; @@ -1561,7 +1561,7 @@ namespace bcl ); static chemistry::FragmentMapConformer conf_mapper ( - m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue(), + descriptor::CheminfoProperty( m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue()), m_MDLString, m_PocketFilename, SCORER, diff --git a/apps/internal/chemistry/bcl_app_molecule_mutate.cpp b/apps/internal/chemistry/bcl_app_molecule_mutate.cpp index 0fbcca4a7..927036914 100644 --- a/apps/internal/chemistry/bcl_app_molecule_mutate.cpp +++ b/apps/internal/chemistry/bcl_app_molecule_mutate.cpp @@ -13,22 +13,24 @@ // (c) // initialize the static initialization fiasco finder, if macro ENABLE_FIASCO_FINDER is defined -#include "command/bcl_command_parameter_check_ranged.h" #include "util/bcl_util_static_initialization_fiasco_finder.h" BCL_StaticInitializationFiascoFinder // include header of this class -#include "app/bcl_app_apps.h" +#include "internal/chemistry/bcl_app_molecule_mutate.h" + +// include bcl headers #include "chemistry/bcl_chemistry_fragment_feed.h" #include "chemistry/bcl_chemistry_fragment_mutate_interface.h" #include "command/bcl_command_app_default_flags.h" #include "command/bcl_command_flag_dynamic.h" #include "command/bcl_command_flag_static.h" +#include "command/bcl_command_parameter_check_ranged.h" #include "command/bcl_command_parameter_check_serializable.h" -#include "internal/chemistry/bcl_app_molecule_mutate.h" #include "io/bcl_io_file.h" #include "math/bcl_math_mutate_interface.h" #include "storage/bcl_storage_template_instantiations.h" + namespace bcl { namespace app @@ -83,7 +85,20 @@ namespace bcl new command::FlagStatic ( "accumulate", - "accumulate mutations into one molecule; be careful" + "accumulate mutations at specified 'mutable_atoms' into one molecule; " + "only applicable if using the application-level 'mutable_atoms'; " + "similar behavior can be achieved using multiple implementations sequentially " + "with careful mutate-level atom selections." + ) + ), + m_FinalProductOnlyFlag + ( + new command::FlagStatic + ( + "final_product_only", + "only return the product resulting from all mutates; by default a " + "molecule is returned after each mutation; this flag is useful if you " + "are only interested in the final product." ) ), m_RecenterFlag @@ -115,6 +130,7 @@ namespace bcl MoleculeMutate::MoleculeMutate( const MoleculeMutate &PARENT) : m_ImplementationFlag( PARENT.m_ImplementationFlag), m_AccumulateFlag( PARENT.m_AccumulateFlag), + m_FinalProductOnlyFlag( PARENT.m_FinalProductOnlyFlag), m_MutableAtomsFlag( PARENT.m_MutableAtomsFlag), m_RecenterFlag( PARENT.m_RecenterFlag), m_OutputFilenameFlag( PARENT.m_OutputFilenameFlag), @@ -173,6 +189,9 @@ namespace bcl //! make all mutations to one molecule sp_cmd->AddFlag( m_AccumulateFlag); + //! only return the last molecule after all mutations have been applied + sp_cmd->AddFlag( m_FinalProductOnlyFlag); + //! whether to recenter the molecules sp_cmd->AddFlag( m_RecenterFlag); @@ -295,8 +314,15 @@ namespace bcl current_frag = *( mutated_object.GetArgument()); continue; } - chemistry::FragmentComplete fragment( *( mutated_object.GetArgument())); - Write( fragment); + if + ( + !m_FinalProductOnlyFlag->GetFlag() || + i+1 == n_mutates + ) + { + chemistry::FragmentComplete fragment( *( mutated_object.GetArgument())); + Write( fragment); + } } } } @@ -322,8 +348,15 @@ namespace bcl if( mutated_object.GetArgument().IsDefined()) { current_frag = *( mutated_object.GetArgument()); - chemistry::FragmentComplete fragment( *( mutated_object.GetArgument())); - Write( fragment); + if + ( + !m_FinalProductOnlyFlag->GetFlag() || + i+1 == n_mutates + ) + { + chemistry::FragmentComplete fragment( *( mutated_object.GetArgument())); + Write( fragment); + } } } } diff --git a/apps/internal/chemistry/bcl_app_molecule_mutate.h b/apps/internal/chemistry/bcl_app_molecule_mutate.h index c11187463..784dab674 100644 --- a/apps/internal/chemistry/bcl_app_molecule_mutate.h +++ b/apps/internal/chemistry/bcl_app_molecule_mutate.h @@ -68,6 +68,9 @@ namespace bcl //! accumulate all mutations onto a single molecule util::ShPtr< command::FlagInterface> m_AccumulateFlag; + //! only return the last molecule after all mutations have been applied + util::ShPtr< command::FlagInterface> m_FinalProductOnlyFlag; + //! whether to recenter the molecules util::ShPtr< command::FlagInterface> m_RecenterFlag; diff --git a/apps/internal/chemistry/bcl_app_reaction_combichem.cpp b/apps/internal/chemistry/bcl_app_reaction_combichem.cpp index 79ae4964f..8b9fa9e02 100644 --- a/apps/internal/chemistry/bcl_app_reaction_combichem.cpp +++ b/apps/internal/chemistry/bcl_app_reaction_combichem.cpp @@ -203,7 +203,7 @@ namespace bcl command::Parameter ( "type", "the type of druglikenes to use", - command::ParameterCheckAllowed( storage::Vector< std::string>::Create( "IsConstitutionDruglike", "IsConstitutionDruglikeAndHitlike", "None")), + command::ParameterCheckAllowed( storage::Vector< std::string>::Create( "IsConstitutionDruglike", "IsConstitutionDruglikeAndHitlike", "Constant(1.0)")), "IsConstitutionDruglike" ) ) @@ -641,7 +641,7 @@ namespace bcl chemistry::HydrogensHandler::Remove( atoms); chemistry::FragmentMapConformer cleaner ( - m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue(), + descriptor::CheminfoProperty( m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue()), m_CorinaFlag->GetFlag() ); util::ShPtr< chemistry::FragmentComplete> clean_mol @@ -650,7 +650,7 @@ namespace bcl ( atoms, *ens_itr, - m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue(), + descriptor::CheminfoProperty( m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue()), false ) ); @@ -765,17 +765,17 @@ namespace bcl static chemistry::FragmentMapConformer cleaner; if( m_CorinaFlag->GetFlag()) { - cleaner = chemistry::FragmentMapConformer( "None", true); + cleaner = chemistry::FragmentMapConformer( descriptor::CheminfoProperty( "Constant(1.0)"), true); } else { - cleaner = chemistry::FragmentMapConformer( "None", false); + cleaner = chemistry::FragmentMapConformer( descriptor::CheminfoProperty( "Constant(1.0)"), false); } util::ShPtr< chemistry::FragmentComplete> clean_frag( cleaner.Clean ( new_frag_v, REFERENCE, - m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue() + descriptor::CheminfoProperty( m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue()) )); return clean_frag; } @@ -811,7 +811,7 @@ namespace bcl ); static chemistry::FragmentMapConformer conf_mapper ( - m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue(), + descriptor::CheminfoProperty( m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue()), m_MDLString, m_PocketFilename, SCORER, diff --git a/apps/molecule/bcl_app_focused_library_design.cpp b/apps/molecule/bcl_app_focused_library_design.cpp index 1093147e2..4b0c88501 100644 --- a/apps/molecule/bcl_app_focused_library_design.cpp +++ b/apps/molecule/bcl_app_focused_library_design.cpp @@ -16,6 +16,9 @@ #include "util/bcl_util_static_initialization_fiasco_finder.h" BCL_StaticInitializationFiascoFinder +// include header for this class +//#include "molecule/bcl_app_focused_library_design.h" + // include headers from the bcl - sorted alphabetically #include "app/bcl_app_apps.h" #include "chemistry/bcl_chemistry_configuration_set.h" @@ -23,24 +26,7 @@ BCL_StaticInitializationFiascoFinder #include "chemistry/bcl_chemistry_constitution_set.h" #include "chemistry/bcl_chemistry_fragment_configuration_shared.h" #include "chemistry/bcl_chemistry_fragment_constitution_shared.h" -#include "chemistry/bcl_chemistry_fragment_evolve_base.h" -#include "chemistry/bcl_chemistry_fragment_grow.h" -#include "chemistry/bcl_chemistry_fragment_mutate_add_med_chem.h" -#include "chemistry/bcl_chemistry_fragment_mutate_alchemy.h" -#include "chemistry/bcl_chemistry_fragment_mutate_cyclize.h" -#include "chemistry/bcl_chemistry_fragment_mutate_extend_with_linker.h" -#include "chemistry/bcl_chemistry_fragment_mutate_fluorinate.h" -#include "chemistry/bcl_chemistry_fragment_mutate_halogenate.h" -#include "chemistry/bcl_chemistry_fragment_mutate_mcm.h" -#include "chemistry/bcl_chemistry_fragment_mutate_remove_atom.h" -#include "chemistry/bcl_chemistry_fragment_mutate_remove_bond.h" -#include "chemistry/bcl_chemistry_fragment_mutate_ring_swap.h" -#include "chemistry/bcl_chemistry_fragment_split_interface.h" -#include "chemistry/bcl_chemistry_fragment_track_mutable_atoms.h" -#include "chemistry/bcl_chemistry_pick_atom_random.h" -#include "chemistry/bcl_chemistry_pick_fragment_random.h" -#include "chemistry/bcl_chemistry_rotamer_library_file.h" -#include "chemistry/bcl_chemistry_sample_conformations.h" +#include "chemistry/bcl_chemistry_fragment_mutate_interface.h" #include "chemistry/bcl_chemistry_score_function_generic.h" #include "command/bcl_command_app_default_flags.h" #include "command/bcl_command_command.h" @@ -99,6 +85,66 @@ namespace bcl private: + // typedefs for convenience + typedef util::ShPtr< math::MutateDecisionNode< chemistry::FragmentComplete> > Mutates; + typedef util::ShPtr< math::FunctionInterfaceSerializable< chemistry::FragmentComplete, double> > Score; + typedef util::ShPtrVector< descriptor::CheminfoProperty> Properties; + typedef opti::Tracker< chemistry::FragmentComplete, double> Tracker; + typedef util::ShPtr< chemistry::FragmentComplete> FragmentComplete_p; + + ////////// + // data // + ////////// + + //! flag to control input base fragment + util::ShPtr< command::FlagInterface> m_StartFragmentFlag; + + //! flag for defining output filename, + util::ShPtr< command::FlagInterface> m_OutputFilenameFlag; + + //! flag to specify mutate implementations + util::ShPtr< command::FlagInterface> m_MutateFlag; + + //! flag to specify mutate implementation probabilities + util::ShPtr< command::FlagInterface> m_MutateProbabilityFlag; + + //! flag controlling the maximum possible number of sequential mutates that can occur between MCM evaluation + util::ShPtr< command::FlagInterface> m_MaxSequentialMutatesFlag; + + //! flag to specify the MCM score function as a descriptor + util::ShPtr< command::FlagInterface> m_PropertyScoringFunctionFlag; + + //! flag for descriptors to compute for the final ensemble + util::ShPtr< command::FlagInterface> m_FinalMetricsFlag; + + //! flag to control number of molecules to be generated + util::ShPtr< command::FlagInterface> m_NumberMoleculesFlag; + + //! flag to control the number of MC iterations in molecule optimization + util::ShPtr< command::FlagInterface> m_NumberIterationsFlag; + + //! flag to control the number of maximum allowed consecutive unimproved MC iterations + util::ShPtr< command::FlagInterface> m_NumberUnimprovedFlag; + + //! flag to control the number of maximum allowed skipped MC iterations + util::ShPtr< command::FlagInterface> m_NumberSkippedFlag; + + //! flag to control the temperature for the Metropolis criterion + util::ShPtr< command::FlagInterface> m_MetropolisTemperatureFlag; + + //! flag to maximize score istead of minimize + util::ShPtr< command::FlagInterface> m_LargerIsBetterFlag; + + //! flag to save all molecules accepted or improved by main MCM + util::ShPtr< command::FlagInterface> m_SaveAllAcceptedImprovedFlag; + + /////////////////////////////////// + // construction and destruction // + /////////////////////////////////// + + //! default constructor + FocusedLibraryDesign(); + // ThreadManager needs access to private nested classes friend class ThreadManager; friend class Worker; @@ -123,33 +169,14 @@ namespace bcl // Data // /////////// - const size_t m_NumberOfMoleculesRequested; // Number of molecules to build - size_t m_NumberOfMoleculesBuilt; // Number of molecules already built - const size_t m_NumberMCIterations; // Number of iterations in the MC approximator - const size_t m_NumberMCUnimproved; // Number of allowed consecutive unimproved MC iterations - const size_t m_NumberMCSkipped; // Number of allowed skipped MC iterations - const float m_MetropolisTemperature; // Tempterature during Metropolis criterion evaluation - const size_t m_Threads; // Number of threads - chemistry::FragmentEnsemble m_Molecules; // The molecules which have been built and are ready for output - chemistry::ConstitutionSet m_UniqueConsts; // The unique molecules which have been built - chemistry::ConfigurationSet m_UniqueConfigs; // The unique molecules which have been built - io::OFStream m_OutputStream; // Output file to write molecules to - const std::string m_DrugLikenessType; // type of druglikeness filter to use for skipping MCM steps - const float m_VDWScoreCutoff; // internal VDW score cutoff for 3D conformer (used to check for mols with reasonable substitutions) - const util::Implementation< chemistry::FragmentSplitInterface> m_SplitImplementation; // splitter to use when making fragments for internal MCM optimization - const std::string m_PoseDependentMDLProperty; // enable pose-dependent scoring with the receptor indicated by this property - const std::string m_PoseDependentResolveClashes; // resolve clashes between ligand and receptor - const size_t m_MaxSequentialMutates; - const float m_RingSwapProb; - const float m_CyclizeProb; - const float m_AlchemyProb; - const float m_RemoveAtomProb; - const float m_RemoveBondProb; - const float m_AddMedChemProb; - const float m_FluorinateProb; - const float m_HalogenateProb; - const float m_ExtendWithLinkerProb; - sched::Mutex m_Mutex; // Lock for updating Workers + const size_t m_NumberOfMoleculesRequested; //< Number of molecules to build + size_t m_NumberOfMoleculesBuilt; //< Number of molecules already built + const size_t m_Threads; //< Number of threads + chemistry::FragmentEnsemble m_Molecules; //< The molecules which have been built and are ready for output + chemistry::ConstitutionSet m_UniqueConsts; //< The unique molecules which have been built + chemistry::ConfigurationSet m_UniqueConfigs; //< The unique molecules which have been built + io::OFStream m_OutputStream; //< Output file to write molecules to + sched::Mutex m_Mutex; //< Lock for updating Workers ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //! @@ -162,23 +189,19 @@ namespace bcl ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct Worker { - // Rotamer library to use - read in at Main() - util::ShPtr< chemistry::FragmentComplete> m_StartFragment; // Base fragment to use -// util::ShPtr< chemistry::FragmentComplete> m_MutableFragment; // mutable fragment in base fragment - chemistry::FragmentEnsemble m_MutableFragment; // mutable fragment in base fragment - storage::Vector< size_t> m_MutableAtomIndices; // mutable atoms in base fragment - descriptor::CheminfoProperty m_PropertyScorer; // Set objective function with property instead of model - size_t m_NumberMCIterations; // Number of MC iterations - size_t m_NumberMCUnimproved; // Number of allowed consecutive unimproved MC iterations - size_t m_NumberMCSkipped; // Number of allowed consecutive unimproved MC iterations - float m_MetropolisTemperature; // Temperature during Metropolis criterion evaluation - opti::Tracker< chemistry::FragmentComplete, double> m_OptiGoal; - bool m_SaveAllAcceptedImproved; - std::string m_ConformationComparer; // Conformation comparer - util::ShPtr< math::FunctionInterfaceSerializable< chemistry::FragmentComplete, double> > m_Score; // Objective function - util::ShPtr< math::MutateInterface< chemistry::FragmentComplete> > m_Mutate; // Grow molecules from scaffold - bool m_Corina; // enables corina conformers during cleaning - util::SiPtr< ThreadManager> m_ThreadManager; // Pointer to the thread manager, needed so Worker can be updated + util::SiPtr< ThreadManager> m_ThreadManager; //< Pointer to the thread manager, needed so Worker can be updated + FragmentComplete_p m_StartFragment; //< Base fragment to use + descriptor::CheminfoProperty m_PropertyScorer; //< Set objective function with property instead of model + Mutates m_Mutate; //< Grow molecules from scaffold + Score m_Score; //< Objective function + Properties m_FinalMetrics; //< Metrics to compute on final ensemble + Tracker m_OptiGoal; //< Tracker tracking up or down + size_t m_NumberMCIterations; //< Number of MC iterations + size_t m_NumberMCUnimproved; //< Number of allowed consecutive unimproved MC iterations + size_t m_NumberMCSkipped; //< Number of allowed consecutive unimproved MC iterations + float m_MetropolisTemperature; //< Temperature during Metropolis criterion evaluation + bool m_SaveAllAcceptedImproved; //< Collect every molecule that is accepted or improved + descriptor::CheminfoProperty m_BondDruglikeness; //< MoleculeTotalDruglikeBondEnergy // Builds and score the molecule void RunThread() @@ -219,24 +242,11 @@ namespace bcl ); // assume we start with druglike molecule - static descriptor::CheminfoProperty bonde( "MoleculeTotalDruglikeBondEnergy"); double druglike_mol_activity( ( *m_Score)( approximator.GetTracker().GetCurrent()->First())); // tell me about the scaffold - BCL_MessageStd("Scaffold properties"); - BCL_MessageStd( "MolWeight: " + util::Format()( descriptor::GetCheminfoProperties().calc_MolWeight->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0))); - BCL_MessageStd( "# of HBondAcceptors + HBondDonors: " + - util::Format()( descriptor::GetCheminfoProperties().calc_HbondAcceptor->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0) - + descriptor::GetCheminfoProperties().calc_HbondDonor->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0))); - BCL_MessageStd( "# of NRotBonds: " + util::Format()( descriptor::GetCheminfoProperties().calc_NRotBond->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0))); - BCL_MessageStd( "LogP: " + util::Format()( descriptor::GetCheminfoProperties().calc_XLogP->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0))); - BCL_MessageStd( "Bond energy and atom propensity score: " + util::Format()( bonde->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 3))); - BCL_MessageStd( "# of F: " + util::Format()( descriptor::GetCheminfoProperties().calc_IsF->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0))); - BCL_MessageStd( "# of Cl: " + util::Format()(descriptor::GetCheminfoProperties().calc_IsCl->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0))); - BCL_MessageStd( "# of Br: " + util::Format()(descriptor::GetCheminfoProperties().calc_IsBr->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0))); - BCL_MessageStd( "# of I: " + util::Format()( descriptor::GetCheminfoProperties().calc_IsI->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0))); - BCL_MessageStd( "# of Halogens: " + util::Format()( descriptor::GetCheminfoProperties().calc_IsHalogen->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0))); - BCL_MessageStd( "Complexity : " + util::Format()( descriptor::GetCheminfoProperties().calc_MolComplexity->SumOverObject( approximator.GetTracker().GetCurrent()->First())( 0))); + BCL_MessageStd( "Scaffold properties"); + ReportThreadMoleculeProgress( approximator.GetTracker().GetCurrent()->First()); BCL_MessageStd( "FLD_Score: " + util::Format()( druglike_mol_activity)); // run the approximator @@ -255,23 +265,7 @@ namespace bcl // tell me about the new mol BCL_MessageStd( "Molecule tracker updated at iteration: " + util::Format()( approximator.GetTracker().GetIteration())); - BCL_MessageStd( "MolWeight: " + util::Format()( descriptor::GetCheminfoProperties().calc_MolWeight->SumOverObject( last_accepted->First())( 0))); - BCL_MessageStd( - "# of HBondAcceptors + HBondDonors: " + - util::Format()( - descriptor::GetCheminfoProperties().calc_HbondAcceptor->SumOverObject( last_accepted->First())( 0) - + descriptor::GetCheminfoProperties().calc_HbondDonor->SumOverObject( last_accepted->First())( 0) - ) - ); - BCL_MessageStd( "# of NRotBonds: " + util::Format()( descriptor::GetCheminfoProperties().calc_NRotBond->SumOverObject( last_accepted->First())( 0))); - BCL_MessageStd( "LogP: " + util::Format()( descriptor::GetCheminfoProperties().calc_XLogP->SumOverObject( last_accepted->First())( 0))); - BCL_MessageStd( "Bond energy and atom propensity score: " + util::Format()( bonde->SumOverObject( last_accepted->First())( 3))); - BCL_MessageStd( "# of F: " + util::Format()( descriptor::GetCheminfoProperties().calc_IsF->SumOverObject( last_accepted->First())( 0))); - BCL_MessageStd( "# of Cl: " + util::Format()(descriptor::GetCheminfoProperties().calc_IsCl->SumOverObject( last_accepted->First())( 0))); - BCL_MessageStd( "# of Br: " + util::Format()(descriptor::GetCheminfoProperties().calc_IsBr->SumOverObject( last_accepted->First())( 0))); - BCL_MessageStd( "# of I: " + util::Format()( descriptor::GetCheminfoProperties().calc_IsI->SumOverObject( last_accepted->First())( 0))); - BCL_MessageStd( "# of Halogens: " + util::Format()( descriptor::GetCheminfoProperties().calc_IsHalogen->SumOverObject( last_accepted->First())( 0))); - BCL_MessageStd( "Complexity : " + util::Format()( descriptor::GetCheminfoProperties().calc_MolComplexity->SumOverObject( last_accepted->First())( 0))); +// ReportThreadMoleculeProgress( last_accepted->First()); BCL_MessageStd( "FLD_Score: " + util::Format()( last_accepted->Second())); } else @@ -280,20 +274,39 @@ namespace bcl BCL_MessageStd( "MCM Rejected"); } // save every accepted/improved step of MCM - // hack - add this to approximator at some point + // TODO: add this to approximator to avoid having to manually do the MCM if( last_accepted.IsDefined() && m_SaveAllAcceptedImproved) { m_ThreadManager->m_Mutex.Lock(); if( m_ThreadManager->GetNumberMoleculesBuilt() + 1 <= m_ThreadManager->GetNumberMoleculesToBuild()) { - // get best molecule and best score +// // get best molecule and best score +// BCL_MessageStd +// ( +// "Chemical perturbations from most recent to least recent: " +// ); +// approximator.GetMu + + // grab the last accepted molecule chemistry::FragmentComplete best_mol( last_accepted->First()); - linal::Vector< double> best_score( 1, last_accepted->Second()); - best_mol.StoreProperty( "FLD_Score", best_score); // save the final MCM molecule if( m_ThreadManager->CheckUniqueConfiguration( best_mol)) { + + // assign final FLD score to last accepted molecule + linal::Vector< double> best_score( 1, last_accepted->Second()); + best_mol.StoreProperty( "FLD_Score", best_score); + + // compute the final metrics and save on molecule + for( size_t i( 0), sz( m_FinalMetrics.GetSize()); i < sz; ++i) + { + const std::string &property_name( ( *m_FinalMetrics( i))->GetAlias()); + const linal::Vector< float> &property_value( ( *m_FinalMetrics( i))->SumOverObject( best_mol)); + best_mol.StoreProperty( property_name, property_value); + } + + // add final molecule to ensemble m_ThreadManager->AddMolecule( best_mol); m_ThreadManager->IncreaseMoleculeBuiltCount(); } @@ -343,13 +356,23 @@ namespace bcl ); // get best molecule and best score - chemistry::FragmentComplete best_mol( last_accepted->First()); - linal::Vector< double> best_score( 1, last_accepted->Second()); +// chemistry::FragmentComplete best_mol( last_accepted->First()); + chemistry::FragmentComplete best_mol( approximator.GetTracker().GetBest()->First()); +// linal::Vector< double> best_score( 1, last_accepted->Second()); + linal::Vector< double> best_score( 1, approximator.GetTracker().GetBest()->Second()); best_mol.StoreProperty( "FLD_Score", best_score); // save the final MCM molecule if( m_ThreadManager->CheckUniqueConfiguration( best_mol)) { + // compute the final metrics and save on molecule + for( size_t i( 0), sz( m_FinalMetrics.GetSize()); i < sz; ++i) + { + const std::string &property_name( ( *m_FinalMetrics( i))->GetAlias()); + const linal::Vector< float> &property_value( ( *m_FinalMetrics( i))->SumOverObject( best_mol)); + best_mol.StoreProperty( property_name, property_value); + } + m_ThreadManager->AddMolecule( best_mol); m_ThreadManager->IncreaseMoleculeBuiltCount(); } @@ -359,6 +382,23 @@ namespace bcl } while( m_ThreadManager->UpdateWorker( *this)); } // RunThread() + void ReportThreadMoleculeProgress( const chemistry::FragmentComplete &MOLECULE) + { + BCL_MessageStd( "MolWeight: " + util::Format()( descriptor::GetCheminfoProperties().calc_MolWeight->SumOverObject( MOLECULE)( 0))); + BCL_MessageStd( "# of HBondAcceptors + HBondDonors: " + + util::Format()( descriptor::GetCheminfoProperties().calc_HbondAcceptor->SumOverObject( MOLECULE)( 0) + + descriptor::GetCheminfoProperties().calc_HbondDonor->SumOverObject( MOLECULE)( 0))); + BCL_MessageStd( "# of NRotBonds: " + util::Format()( descriptor::GetCheminfoProperties().calc_NRotBond->SumOverObject( MOLECULE)( 0))); + BCL_MessageStd( "LogP: " + util::Format()( descriptor::GetCheminfoProperties().calc_XLogP->SumOverObject( MOLECULE)( 0))); + BCL_MessageStd( "Bond energy and atom propensity score: " + util::Format()( m_BondDruglikeness->SumOverObject( MOLECULE)( 3))); + BCL_MessageStd( "# of F: " + util::Format()( descriptor::GetCheminfoProperties().calc_IsF->SumOverObject( MOLECULE)( 0))); + BCL_MessageStd( "# of Cl: " + util::Format()(descriptor::GetCheminfoProperties().calc_IsCl->SumOverObject( MOLECULE)( 0))); + BCL_MessageStd( "# of Br: " + util::Format()(descriptor::GetCheminfoProperties().calc_IsBr->SumOverObject( MOLECULE)( 0))); + BCL_MessageStd( "# of I: " + util::Format()( descriptor::GetCheminfoProperties().calc_IsI->SumOverObject( MOLECULE)( 0))); + BCL_MessageStd( "# of Halogens: " + util::Format()( descriptor::GetCheminfoProperties().calc_IsHalogen->SumOverObject( MOLECULE)( 0))); + BCL_MessageStd( "Complexity : " + util::Format()( descriptor::GetCheminfoProperties().calc_MolComplexity->SumOverObject( MOLECULE)( 0))); + } + }; // struct Worker // Tests to see if the worker should keep running @@ -381,222 +421,38 @@ namespace bcl //! param ThreadManager( - util::ShPtr< chemistry::FragmentComplete> START_FRAGMENT, // Base fragment to use -// util::ShPtr< chemistry::FragmentComplete> MUTABLE_FRAGMENT, // mutable fragment in base fragment - chemistry::FragmentEnsemble MUTABLE_FRAGMENT, // mutable fragment in base fragment - storage::Vector< size_t> MUTABLE_ATOM_INDICES, // mutable atom indices in base fragment - util::ShPtr< chemistry::FragmentEnsemble> FRAGMENT_POOL, // Fragments to add to base fragment - descriptor::CheminfoProperty PROPERTY_SCORER, // alternative scorer - bool INTERNAL_MCM_OPTI, - opti::Tracker< chemistry::FragmentComplete, double> MCM_OPTI_GOAL, - bool SAVE_ALL_ACCEPTED_IMPROVED, - const size_t &NUMBER_OF_MOLECULES, // Number to build - const size_t &NUMBER_OF_ITERATIONS, // Number of MC iterations - const size_t &NUMBER_UNIMPROVED_ITERATIONS, // Number of allowed consecutive unimproved MC iterations - const size_t &NUMBER_SKIPPED_ITERATIONS, // Number of allowed consecutive unimproved MC iterations - const float &METROPOLIS_TEMPERATURE, // Temperature during Metropolis criterion evaluation - const size_t &NUMBER_THREADS, // Number of threads (from scheduler) - const std::string &OUTPUT_FILENAME, - const std::string &DRUG_LIKENESS_TYPE, - const float &VDW_SCORE_CUTOFF, - const util::Implementation< chemistry::FragmentSplitInterface> &SPLIT_IMPLEMENTATION, - const size_t &MAX_SEQUENTIAL_MUTATES, - const float &RING_SWAP_PROB, - const float &CYCLIZE_PROB, - const float &ALCHEMY_PROB, - const float &REMOVE_ATOM_PROB, - const float &REMOVE_BOND_PROB, - const float &ADD_MEDCHEM_PROB, - const float &FLUORINATE_PROB, - const float &HALOGENATE_PROB, - const float &EXTEND_WITH_LINKER_PROB, - const std::string &POSE_DEPENDENT_MDL_PROPERTY, // pose-dependent scoring - const std::string &POSE_DEPENDENT_RESOLVE_CLASHES, // resolve clashes - const bool &CORINA_CONFS // enables cornina conformers during cleaning + const size_t NUMBER_THREADS, //< Number of threads (from scheduler) + const FragmentComplete_p &START_FRAGMENT, //< Base fragment to use + const std::string &OUTPUT_FILENAME, //< Output filename + const Mutates &MUTATES, //< Mutates with probabilities to apply to chemical scaffold + const descriptor::CheminfoProperty &PROPERTY_SCORER, //< Alternative scorer + const Properties &FINAL_METRICS, //< Metrics to compute on final ensemble + const Tracker &MCM_OPTI_GOAL, //< Whether to optimize up or down + const size_t NUMBER_OF_MOLECULES, //< Number to build + const size_t NUMBER_OF_ITERATIONS, //< Number of MC iterations + const size_t NUMBER_UNIMPROVED_ITERATIONS, //< Number of allowed consecutive unimproved MC iterations + const size_t NUMBER_SKIPPED_ITERATIONS, //< Number of allowed consecutive unimproved MC iterations + const float METROPOLIS_TEMPERATURE, //< Temperature during Metropolis criterion evaluation + bool SAVE_ALL_ACCEPTED_IMPROVED, //< Save every accepted/improved molecule + const size_t MAX_SEQUENTIAL_MUTATES //< Maximum number of times to apply the mutate prior to scoring ) : m_NumberOfMoleculesRequested( NUMBER_OF_MOLECULES), m_NumberOfMoleculesBuilt( 0), - m_NumberMCIterations( NUMBER_OF_ITERATIONS), - m_NumberMCUnimproved( NUMBER_UNIMPROVED_ITERATIONS), - m_NumberMCSkipped( NUMBER_SKIPPED_ITERATIONS), - m_MetropolisTemperature( METROPOLIS_TEMPERATURE), - m_Threads( std::min( NUMBER_THREADS, NUMBER_OF_MOLECULES)), - m_DrugLikenessType( DRUG_LIKENESS_TYPE), - m_VDWScoreCutoff( VDW_SCORE_CUTOFF), - m_SplitImplementation( SPLIT_IMPLEMENTATION), - m_MaxSequentialMutates( MAX_SEQUENTIAL_MUTATES), - m_RingSwapProb( RING_SWAP_PROB), - m_CyclizeProb( CYCLIZE_PROB), - m_AlchemyProb( ALCHEMY_PROB), - m_RemoveAtomProb( REMOVE_ATOM_PROB), - m_RemoveBondProb( REMOVE_BOND_PROB), - m_AddMedChemProb( ADD_MEDCHEM_PROB), - m_FluorinateProb( FLUORINATE_PROB), - m_HalogenateProb( HALOGENATE_PROB), - m_ExtendWithLinkerProb( EXTEND_WITH_LINKER_PROB), - m_PoseDependentMDLProperty( POSE_DEPENDENT_MDL_PROPERTY), - m_PoseDependentResolveClashes( POSE_DEPENDENT_RESOLVE_CLASHES) + m_Threads( std::min( NUMBER_THREADS, NUMBER_OF_MOLECULES)) { // prepare output filestream io::File::MustOpenOFStream( m_OutputStream, OUTPUT_FILENAME); - // tree search for RingSwap - util::ShPtr< chemistry::SearchFragmentLibraryFromTree> tree_search - ( - new chemistry::SearchFragmentLibraryFromTree - ( - *util::Implementation< chemistry::RotamerLibraryInterface>( chemistry::RotamerLibraryInterface::GetDefault()) - ) - ); - - // set up our primary mutater object - util::ShPtr< math::MutateDecisionNode< chemistry::FragmentComplete> > mutater - ( - new math::MutateDecisionNode< chemistry::FragmentComplete>() - ); - - // get the starting molecule minus the mutable region for local mutations - util::ShPtr< chemistry::FragmentComplete> scaffold_fragment( new chemistry::FragmentComplete()); - if( MUTABLE_FRAGMENT.GetMolecules().FirstElement().GetSize() || MUTABLE_ATOM_INDICES.GetSize()) - { - static chemistry::FragmentTrackMutableAtoms atom_tracker; - scaffold_fragment = - util::ShPtr< chemistry::FragmentComplete>( new chemistry::FragmentComplete - ( - atom_tracker.GetBaseFragment - ( - *START_FRAGMENT, - MUTABLE_FRAGMENT.GetMolecules().FirstElement(), - MUTABLE_ATOM_INDICES - ) - )); - BCL_Assert( scaffold_fragment->GetSize(), "Exiting because of incompatible mutable options"); - } - - // if the internal MCM local optimization option is selected - if( INTERNAL_MCM_OPTI) - { - // POSE-DEPENDENT CONSTRUCTION OF MUTATES // - if( !POSE_DEPENDENT_MDL_PROPERTY.empty()) - { - BCL_MessageStd( "Pose-dependent scoring enabled"); - // set clash resolver - bool clash_resolver; - POSE_DEPENDENT_RESOLVE_CLASHES == "true" ? - clash_resolver = true: - clash_resolver = false; - mutater->AddMutate - ( - chemistry::FragmentMutateMCM - ( - MCM_OPTI_GOAL, - SPLIT_IMPLEMENTATION, - tree_search, - FRAGMENT_POOL, - m_DrugLikenessType, - *START_FRAGMENT, - chemistry::FragmentEnsemble( storage::List< chemistry::FragmentComplete>( 1, *START_FRAGMENT)), - MUTABLE_ATOM_INDICES, - POSE_DEPENDENT_MDL_PROPERTY, - PROPERTY_SCORER, - clash_resolver, - storage::Vector< float>(), - CORINA_CONFS, - m_MaxSequentialMutates, - m_RingSwapProb, - m_CyclizeProb, - m_AlchemyProb, - m_RemoveAtomProb, - m_RemoveBondProb, - m_AddMedChemProb, - m_FluorinateProb, - m_HalogenateProb, - m_ExtendWithLinkerProb - ), - 1.0 - ); - } - // POSE-INDEPENDENT CONSTRUCTION OF MUTATES // - else - { - mutater->AddMutate - ( - chemistry::FragmentMutateMCM - ( - MCM_OPTI_GOAL, - SPLIT_IMPLEMENTATION, - tree_search, - FRAGMENT_POOL, - m_DrugLikenessType, - *START_FRAGMENT, - chemistry::FragmentEnsemble( storage::List< chemistry::FragmentComplete>( 1, *START_FRAGMENT)), - MUTABLE_ATOM_INDICES, - PROPERTY_SCORER, - CORINA_CONFS, - m_MaxSequentialMutates, - m_RingSwapProb, - m_CyclizeProb, - m_AlchemyProb, - m_RemoveAtomProb, - m_RemoveBondProb, - m_AddMedChemProb, - m_FluorinateProb, - m_HalogenateProb, - m_ExtendWithLinkerProb - ), - 1.0 - ); - } - } - // otherwise, just add the mutates and let them fly - else - { - // POSE-DEPENDENT CONSTRUCTION OF MUTATES // - chemistry::FragmentEnsemble scaffold_ens( storage::List< chemistry::FragmentComplete>( 1, *scaffold_fragment)); - if( !POSE_DEPENDENT_MDL_PROPERTY.empty()) - { - BCL_MessageStd( "Pose-dependent scoring enabled"); - // set clash resolver - bool clash_resolver; - POSE_DEPENDENT_RESOLVE_CLASHES == "true" ? - clash_resolver = true: - clash_resolver = false; - mutater->AddMutate( chemistry::FragmentMutateRingSwap( tree_search, m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, POSE_DEPENDENT_MDL_PROPERTY, PROPERTY_SCORER, clash_resolver, storage::Vector< float>(), CORINA_CONFS, true, false, 0.1, true, true), m_RingSwapProb); - mutater->AddMutate( chemistry::FragmentMutateCyclize( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, POSE_DEPENDENT_MDL_PROPERTY, PROPERTY_SCORER, clash_resolver, storage::Vector< float>(), CORINA_CONFS), m_CyclizeProb); - mutater->AddMutate( chemistry::FragmentMutateAlchemy( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, POSE_DEPENDENT_MDL_PROPERTY, PROPERTY_SCORER, clash_resolver, storage::Vector< float>(), CORINA_CONFS), m_AlchemyProb); - mutater->AddMutate( chemistry::FragmentMutateRemoveAtom( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, POSE_DEPENDENT_MDL_PROPERTY, PROPERTY_SCORER, clash_resolver, storage::Vector< float>(), CORINA_CONFS), m_RemoveAtomProb); - mutater->AddMutate( chemistry::FragmentMutateRemoveBond( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, POSE_DEPENDENT_MDL_PROPERTY, PROPERTY_SCORER, clash_resolver, storage::Vector< float>(), CORINA_CONFS), m_RemoveBondProb); - mutater->AddMutate( chemistry::FragmentMutateExtendWithLinker( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, POSE_DEPENDENT_MDL_PROPERTY, PROPERTY_SCORER, clash_resolver, storage::Vector< float>(), CORINA_CONFS), m_ExtendWithLinkerProb); - mutater->AddMutate( chemistry::FragmentMutateAddMedChem( FRAGMENT_POOL, m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, POSE_DEPENDENT_MDL_PROPERTY, PROPERTY_SCORER, clash_resolver, storage::Vector< float>(), CORINA_CONFS), m_AddMedChemProb); - mutater->AddMutate( chemistry::FragmentMutateFluorinate( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, POSE_DEPENDENT_MDL_PROPERTY, PROPERTY_SCORER, clash_resolver, storage::Vector< float>(), CORINA_CONFS), m_FluorinateProb); - mutater->AddMutate( chemistry::FragmentMutateHalogenate( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, POSE_DEPENDENT_MDL_PROPERTY, PROPERTY_SCORER, clash_resolver, storage::Vector< float>(), CORINA_CONFS), m_HalogenateProb); - } - // POSE-INDEPENDENT CONSTRUCTION OF MUTATES // - else - { - BCL_MessageStd( "Pose-independent scoring enabled"); - mutater->AddMutate( chemistry::FragmentMutateRingSwap( tree_search, m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, CORINA_CONFS, true, false, 0.1, true, true), m_RingSwapProb); - mutater->AddMutate( chemistry::FragmentMutateCyclize( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, CORINA_CONFS), m_CyclizeProb); - mutater->AddMutate( chemistry::FragmentMutateAlchemy( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, CORINA_CONFS), m_AlchemyProb); - mutater->AddMutate( chemistry::FragmentMutateRemoveAtom( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, CORINA_CONFS), m_RemoveAtomProb); - mutater->AddMutate( chemistry::FragmentMutateRemoveBond( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, CORINA_CONFS), m_RemoveBondProb); - mutater->AddMutate( chemistry::FragmentMutateExtendWithLinker( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, CORINA_CONFS), m_ExtendWithLinkerProb); - mutater->AddMutate( chemistry::FragmentMutateAddMedChem( FRAGMENT_POOL, m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, CORINA_CONFS), m_AddMedChemProb); - mutater->AddMutate( chemistry::FragmentMutateFluorinate( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, CORINA_CONFS), m_FluorinateProb); - mutater->AddMutate( chemistry::FragmentMutateHalogenate( m_DrugLikenessType, *START_FRAGMENT, scaffold_ens, MUTABLE_ATOM_INDICES, CORINA_CONFS), m_HalogenateProb); - } - } - // set up sequential mutate to perform 1 to N mutates in a row prior to scoring (does not bypass druglikeness filtering) - util::ShPtr< math::MutateInterface< chemistry::FragmentComplete> > mutate_repeater - ( - new math::MutateRepeat< chemistry::FragmentComplete> - ( - mutater, - 1, - m_MaxSequentialMutates - ) - ); +// util::ShPtr< math::MutateInterface< chemistry::FragmentComplete> > mutate_repeater +// ( +// new math::MutateRepeat< chemistry::FragmentComplete> +// ( +// MUTATES, +// 1, +// MAX_SEQUENTIAL_MUTATES +// ) +// ); // Set up workers std::vector< Worker> workers( m_Threads); @@ -607,31 +463,21 @@ namespace bcl ) { Worker &worker_ref( *itr); + worker_ref.m_ThreadManager = this; worker_ref.m_StartFragment = START_FRAGMENT.HardCopy(); worker_ref.m_StartFragment->GetCacheMap() = util::ShPtr< descriptor::CacheMap>( new descriptor::CacheMap); - worker_ref.m_MutableFragment = MUTABLE_FRAGMENT; - worker_ref.m_MutableAtomIndices = MUTABLE_ATOM_INDICES; - worker_ref.m_NumberMCIterations = m_NumberMCIterations; - worker_ref.m_NumberMCUnimproved = m_NumberMCUnimproved; - worker_ref.m_NumberMCSkipped = m_NumberMCSkipped; - worker_ref.m_MetropolisTemperature = m_MetropolisTemperature; +// worker_ref.m_Mutate = mutate_repeater; + worker_ref.m_Mutate = MUTATES; + worker_ref.m_PropertyScorer = PROPERTY_SCORER.HardCopy(); + worker_ref.m_Score = Score( new chemistry::ScoreFunctionGeneric( worker_ref.m_PropertyScorer) ); + worker_ref.m_FinalMetrics = FINAL_METRICS; worker_ref.m_OptiGoal = MCM_OPTI_GOAL; + worker_ref.m_NumberMCIterations = NUMBER_OF_ITERATIONS; + worker_ref.m_NumberMCUnimproved = NUMBER_UNIMPROVED_ITERATIONS; + worker_ref.m_NumberMCSkipped = NUMBER_SKIPPED_ITERATIONS; + worker_ref.m_MetropolisTemperature = METROPOLIS_TEMPERATURE; worker_ref.m_SaveAllAcceptedImproved = SAVE_ALL_ACCEPTED_IMPROVED; - worker_ref.m_Corina = CORINA_CONFS; - worker_ref.m_ThreadManager = this; - worker_ref.m_PropertyScorer = PROPERTY_SCORER.HardCopy(); - worker_ref.m_Score = util::ShPtr< math::FunctionInterfaceSerializable< chemistry::FragmentComplete, double> > - ( - new chemistry::ScoreFunctionGeneric( worker_ref.m_PropertyScorer) - ); - if( INTERNAL_MCM_OPTI) - { - worker_ref.m_Mutate = mutater; - } - else - { - worker_ref.m_Mutate = mutate_repeater; - } + worker_ref.m_BondDruglikeness = descriptor::CheminfoProperty( "MoleculeTotalBondEnergy"); } // Allocate space for jobs @@ -678,31 +524,19 @@ namespace bcl m_NumberOfMoleculesBuilt++; } + // Return the number of molecules built size_t GetNumberMoleculesBuilt() { return m_NumberOfMoleculesBuilt; } + // Return the number of molecules requested be built size_t GetNumberMoleculesToBuild() { return m_NumberOfMoleculesRequested; } - size_t GetNumberMCIterations() - { - return m_NumberMCIterations; - } - - size_t GetNumberMCUnimproved() - { - return m_NumberMCUnimproved; - } - - size_t GetNumberMCSkipped() - { - return m_NumberMCSkipped; - } - + // Return true if the molecule is unique among built molecules bool CheckUniqueConstitution( const chemistry::FragmentComplete &MOLECULE) { bool unique( m_UniqueConsts.Insert( chemistry::FragmentConstitutionShared( MOLECULE)).second); @@ -711,6 +545,7 @@ namespace bcl return unique; } + // Return true if the molecule is unique among built molecules bool CheckUniqueConfiguration( const chemistry::FragmentComplete &MOLECULE) { bool unique( m_UniqueConfigs.Insert( chemistry::FragmentConfigurationShared( MOLECULE)).second); @@ -719,6 +554,7 @@ namespace bcl return unique; } + // Add molecule to final ensemble void AddMolecule( const chemistry::FragmentComplete &MOLECULE) { m_Molecules.PushBack( MOLECULE); @@ -758,88 +594,6 @@ namespace bcl }; // class ThreadManager - ////////// - // data // - ////////// - - //! flag to control number of molecules to be generated - util::ShPtr< command::FlagInterface> m_NumberMoleculesFlag; - - //! flag to control the number of MC iterations in molecule optimization - util::ShPtr< command::FlagInterface> m_NumberIterationsFlag; - - //! flag to control the number of maximum allowed consecutive unimproved MC iterations - util::ShPtr< command::FlagInterface> m_NumberUnimprovedFlag; - - //! flag to control the number of maximum allowed skipped MC iterations - util::ShPtr< command::FlagInterface> m_NumberSkippedFlag; - - //! flag to control the temperature for the Metropolis criterion - util::ShPtr< command::FlagInterface> m_MetropolisTemperatureFlag; - - //! flag to control input base fragment - util::ShPtr< command::FlagInterface> m_StartFragmentFlag; - - //! flag to control input mutable fragment within base fragment - util::ShPtr< command::FlagInterface> m_MutableFragmentFlag; - - //! flag to control input mutable atoms within base fragment - util::ShPtr< command::FlagInterface> m_MutableAtomsFlag; - - //! flag for defining output filename, - util::ShPtr< command::FlagInterface> m_OutputFilenameFlag; - - //! flag for defining input fragments - util::ShPtr< command::FlagInterface> m_GrowFragmentsFlag; - - //! flag for an alternative score function to just the trained model - util::ShPtr< command::FlagInterface> m_PropertyScoringFunctionFlag; - - //! flag for the druglikeness filter to use - util::ShPtr< command::FlagInterface> m_DrugLikenessTypeFlag; - - //! flag to split molecules - util::ShPtr< command::FlagInterface> m_SplitImplementationFlag; - - //! flag to do an internal MCM optimization - util::ShPtr< command::FlagInterface> m_SimulatedAnnealingFlag; - - //! flag to maximize score istead of minimize - util::ShPtr< command::FlagInterface> m_LargerIsBetterFlag; - - //! flag to save all molecules accepted or improved by main MCM - util::ShPtr< command::FlagInterface> m_SaveAllAcceptedImprovedFlag; - - //! flag to use corina to generate starting conformer - util::ShPtr< command::FlagInterface> m_Corina; - - //! flag to set 3D VDW score cutoff - util::ShPtr< command::FlagInterface> m_VDWClashCutoffFlag; - - //! flag to enable pose-dependent scoring (default is ligand-based scoring) - util::ShPtr< command::FlagInterface> m_PoseDependentFlag; - - //! flag controlling the maximum possible number of sequential mutates that can occur between MCM evaluation - util::ShPtr< command::FlagInterface> m_MaxSequentialMutatesFlag; - - //! flags controling relative probabilities of different mutate objects - util::ShPtr< command::FlagInterface> m_RingSwapProbFlag; - util::ShPtr< command::FlagInterface> m_CyclizeProbFlag; - util::ShPtr< command::FlagInterface> m_AlchemyProbFlag; - util::ShPtr< command::FlagInterface> m_RemoveAtomProbFlag; - util::ShPtr< command::FlagInterface> m_RemoveBondProbFlag; - util::ShPtr< command::FlagInterface> m_AddMedChemProbFlag; - util::ShPtr< command::FlagInterface> m_FluorinateProbFlag; - util::ShPtr< command::FlagInterface> m_HalogenateProbFlag; - util::ShPtr< command::FlagInterface> m_ExtendWithLinkerProbFlag; - - /////////////////////////////////// - // construction and destruction // - /////////////////////////////////// - - //! default constructor - FocusedLibraryDesign(); - public: //! @brief Clone function @@ -882,8 +636,7 @@ namespace bcl //! @return a brief (no more than 3 line) description for the application std::string GetDescription() const { - return "Generates distributions of molecules utilizing alchemical mutations and a " - "property-based score metric, such as a QSAR model."; + return "Generates distributions of molecules utilizing chemical perturbations ('mutations') and a property-based score metric, such as a QSAR model."; } //! @brief initializes the command object for that executable @@ -907,35 +660,20 @@ namespace bcl ///////////////////// // insert all the flags and params + sp_cmd->AddFlag( m_StartFragmentFlag); + sp_cmd->AddFlag( m_OutputFilenameFlag); + sp_cmd->AddFlag( m_MutateFlag); + sp_cmd->AddFlag( m_MutateProbabilityFlag); + sp_cmd->AddFlag( m_MaxSequentialMutatesFlag); + sp_cmd->AddFlag( m_PropertyScoringFunctionFlag); + sp_cmd->AddFlag( m_FinalMetricsFlag); sp_cmd->AddFlag( m_NumberMoleculesFlag); sp_cmd->AddFlag( m_NumberIterationsFlag); sp_cmd->AddFlag( m_NumberUnimprovedFlag); sp_cmd->AddFlag( m_NumberSkippedFlag); sp_cmd->AddFlag( m_MetropolisTemperatureFlag); - sp_cmd->AddFlag( m_StartFragmentFlag); - sp_cmd->AddFlag( m_MutableFragmentFlag); - sp_cmd->AddFlag( m_MutableAtomsFlag); - sp_cmd->AddFlag( m_OutputFilenameFlag); - sp_cmd->AddFlag( m_GrowFragmentsFlag); - sp_cmd->AddFlag( m_PropertyScoringFunctionFlag); - sp_cmd->AddFlag( m_DrugLikenessTypeFlag); - sp_cmd->AddFlag( m_SplitImplementationFlag); - sp_cmd->AddFlag( m_SimulatedAnnealingFlag); sp_cmd->AddFlag( m_LargerIsBetterFlag); sp_cmd->AddFlag( m_SaveAllAcceptedImprovedFlag); - sp_cmd->AddFlag( m_Corina); - sp_cmd->AddFlag( m_VDWClashCutoffFlag); - sp_cmd->AddFlag( m_PoseDependentFlag); - sp_cmd->AddFlag( m_MaxSequentialMutatesFlag); - sp_cmd->AddFlag( m_RingSwapProbFlag); - sp_cmd->AddFlag( m_CyclizeProbFlag); - sp_cmd->AddFlag( m_AlchemyProbFlag); - sp_cmd->AddFlag( m_RemoveAtomProbFlag); - sp_cmd->AddFlag( m_RemoveBondProbFlag); - sp_cmd->AddFlag( m_AddMedChemProbFlag); - sp_cmd->AddFlag( m_FluorinateProbFlag); - sp_cmd->AddFlag( m_HalogenateProbFlag); - sp_cmd->AddFlag( m_ExtendWithLinkerProbFlag); /////////////////// // default flags // @@ -953,99 +691,54 @@ namespace bcl int Main() const { - // setup the base fragment + // setup the starting fragment io::IFStream input; io::File::MustOpenIFStream( input, m_StartFragmentFlag->GetFirstParameter()->GetValue()); - - // Needs to be wrapped in a ShPtr so it can be passed to ThreadManager util::ShPtr< chemistry::FragmentComplete> sp_startfragment ( - new chemistry::FragmentComplete( sdf::FragmentFactory::MakeFragment( input, sdf::e_Maintain)) + new chemistry::FragmentComplete( sdf::FragmentFactory::MakeFragment( input)) ); io::File::CloseClearFStream( input); - // setup the mutable fragment - util::ShPtr< chemistry::FragmentComplete> sp_mutablefragment( new chemistry::FragmentComplete()); - chemistry::FragmentEnsemble mutable_fragments; - if( m_MutableFragmentFlag->GetFlag()) - { - io::File::MustOpenIFStream( input, m_MutableFragmentFlag->GetFirstParameter()->GetValue()); - - // Needs to be wrapped in a ShPtr so it can be passed to ThreadManager - chemistry::FragmentComplete frag( sdf::FragmentFactory::MakeFragment( input, sdf::e_Maintain)); - sp_mutablefragment = util::ShPtr< chemistry::FragmentComplete>( new chemistry::FragmentComplete( frag)); - mutable_fragments = chemistry::FragmentEnsemble( storage::List< chemistry::FragmentComplete>( 1, *sp_mutablefragment)); - // message indicating using mutable fragment - BCL_MessageStd( - "Mutating substructure atoms specified in the file '" + - util::Format()( m_MutableFragmentFlag->GetFirstParameter()->GetValue()) + "'" - ); - } - io::File::CloseClearFStream( input); - - // setup the mutable atom indices - storage::Vector< size_t> mutable_atom_indices; - if( m_MutableAtomsFlag->GetFlag()) - { - // convert the functionalization points to numeric values - storage::Vector< std::string> fxnl_pts_str( m_MutableAtomsFlag->GetStringList()); - - // output mutable indices to terminal - std::string mutable_indices_message; - for - ( - auto itr( fxnl_pts_str.Begin()), itr_end( fxnl_pts_str.End()); - itr != itr_end; - ++itr - ) - { - mutable_indices_message.append(util::Format()( *itr)); - mutable_indices_message.append(","); - } - BCL_MessageStd( "Mutable atom indices: " + util::Format()( mutable_indices_message)); - - storage::Set< size_t> fxnl_pts_set; - for( size_t i( 0), l( fxnl_pts_str.GetSize()); i < l; ++i) - { - size_t point; - if( !util::TryConvertFromString( point, fxnl_pts_str( i), util::GetLogger())) - { - BCL_MessageStd( "Could not parse \"" + fxnl_pts_str( i) + "\" as a number"); - continue; - } - if( point < sp_startfragment->GetSize()) - { - fxnl_pts_set.Insert( point); - } - else - { - BCL_MessageStd - ( - "Warning: specified point \"" + util::Format()( point) + "\"" - " has an index greater than the number of atoms in the molecule, not using this point" - ); - } - } - mutable_atom_indices = storage::Vector< size_t>( fxnl_pts_set.Begin(), fxnl_pts_set.End()); - } + // setup the scorer + descriptor::CheminfoProperty property_scorer + ( + m_PropertyScoringFunctionFlag->GetFlag() ? + m_PropertyScoringFunctionFlag->GetFirstParameter()->GetValue() : + "Constant(0.0)" + ); - // try to read cheminfo property scorer - descriptor::CheminfoProperty property_scorer; - if( m_PropertyScoringFunctionFlag->GetFlag()) + // setup the mutates + auto mutate_input( m_MutateFlag->GetStringList()); + auto mutate_probs( m_MutateProbabilityFlag->GetNumericalList< float>()); + if( mutate_probs.GetSize() != mutate_input.GetSize()) { - property_scorer = m_PropertyScoringFunctionFlag->GetFirstParameter()->GetValue(); + mutate_probs = storage::Vector< float>( mutate_input.GetSize(), 1.0); } - else + Mutates mutates( new math::MutateDecisionNode< chemistry::FragmentComplete>()); + for + ( + size_t mutate_i( 0), mutate_sz( mutate_input.GetSize()); + mutate_i < mutate_sz; + ++mutate_i + ) { - // flat energy surface - property_scorer = "Constant(0.0)"; + util::Implementation< chemistry::FragmentMutateInterface> implementation( mutate_input( mutate_i)); + mutates->AddMutate( *implementation, mutate_probs( mutate_i)); } - // read internal mcm opti flag - bool internal_mcm_opti( false); - if( m_SimulatedAnnealingFlag->GetFlag()) + // setup the final metrics + auto final_metrics_input( m_FinalMetricsFlag->GetStringList()); + Properties final_metrics; + for + ( + size_t prop_i( 0), prop_sz( final_metrics_input.GetSize()); + prop_i < prop_sz; + ++prop_i + ) { - internal_mcm_opti = true; + util::ShPtr< descriptor::CheminfoProperty> property( new descriptor::CheminfoProperty( final_metrics_input( prop_i))); + final_metrics.PushBack( property); } // set MCM optimization goal @@ -1062,44 +755,10 @@ namespace bcl save_all_accepted_improved = true; } - // set corina conformers - bool corina_confs( false); - if( m_Corina->GetFlag()) - { - corina_confs = true; - } - - // get splitter - util::Implementation< chemistry::FragmentSplitInterface> splitter; - if( m_SplitImplementationFlag->GetFlag()) - { - splitter = m_SplitImplementationFlag->GetFirstParameter()->GetValue(); - } - ///////////////////////// // parse the arguments // ///////////////////////// - // get all filename for grow fragments - const storage::Vector< std::string> filenames( m_GrowFragmentsFlag->GetStringList()); - - // creating ShPtr of growfragments - util::ShPtr< chemistry::FragmentEnsemble> sp_fragment_pool( new chemistry::FragmentEnsemble); - - for - ( - storage::Vector< std::string>::const_iterator - itr( filenames.Begin()), itr_end( filenames.End()); - itr != itr_end; - ++itr - ) - { - // read in grow fragments ensemble - io::File::MustOpenIFStream( input, *itr); - sp_fragment_pool->ReadMoreFromMdl( input, sdf::e_Maintain); - io::File::CloseClearFStream( input); - } - ///////////////////////////// // Prepare rotamer library // ///////////////////////////// @@ -1109,82 +768,23 @@ namespace bcl threadmanager_timer.Start(); // Build the molecules using metropolis monte-carlo - if( m_PoseDependentFlag->GetFlag()) - { - ThreadManager thread_manager - ( - sp_startfragment, -// sp_mutablefragment, - mutable_fragments, - mutable_atom_indices, - sp_fragment_pool, - property_scorer, - internal_mcm_opti, - mcm_opti_goal, - save_all_accepted_improved, - m_NumberMoleculesFlag->GetFirstParameter()->GetNumericalValue< size_t>(), - m_NumberIterationsFlag->GetFirstParameter()->GetNumericalValue< size_t>(), - m_NumberUnimprovedFlag->GetFirstParameter()->GetNumericalValue< size_t>(), - m_NumberSkippedFlag->GetFirstParameter()->GetNumericalValue< size_t>(), - m_MetropolisTemperatureFlag->GetFirstParameter()->GetNumericalValue< float>(), - sched::GetNumberCPUs(), - m_OutputFilenameFlag->GetFirstParameter()->GetValue(), - m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue(), - m_VDWClashCutoffFlag->GetFirstParameter()->GetNumericalValue< float>(), - splitter, - m_MaxSequentialMutatesFlag->GetFirstParameter()->GetNumericalValue< size_t>(), - m_RingSwapProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_CyclizeProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_AlchemyProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_RemoveAtomProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_RemoveBondProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_AddMedChemProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_FluorinateProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_HalogenateProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_ExtendWithLinkerProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_PoseDependentFlag->GetParameterList()( 0)->GetValue(), - m_PoseDependentFlag->GetParameterList()( 1)->GetValue(), - corina_confs - ); - } - else - { - ThreadManager thread_manager - ( - sp_startfragment, -// sp_mutablefragment, - mutable_fragments, - mutable_atom_indices, - sp_fragment_pool, - property_scorer, - internal_mcm_opti, - mcm_opti_goal, - save_all_accepted_improved, - m_NumberMoleculesFlag->GetFirstParameter()->GetNumericalValue< size_t>(), - m_NumberIterationsFlag->GetFirstParameter()->GetNumericalValue< size_t>(), - m_NumberUnimprovedFlag->GetFirstParameter()->GetNumericalValue< size_t>(), - m_NumberSkippedFlag->GetFirstParameter()->GetNumericalValue< size_t>(), - m_MetropolisTemperatureFlag->GetFirstParameter()->GetNumericalValue< float>(), - sched::GetNumberCPUs(), - m_OutputFilenameFlag->GetFirstParameter()->GetValue(), - m_DrugLikenessTypeFlag->GetFirstParameter()->GetValue(), - m_VDWClashCutoffFlag->GetFirstParameter()->GetNumericalValue< float>(), - splitter, - m_MaxSequentialMutatesFlag->GetFirstParameter()->GetNumericalValue< size_t>(), - m_RingSwapProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_CyclizeProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_AlchemyProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_RemoveAtomProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_RemoveBondProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_AddMedChemProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_FluorinateProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_HalogenateProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - m_ExtendWithLinkerProbFlag->GetFirstParameter()->GetNumericalValue< float>(), - std::string(), - std::string(), - corina_confs - ); - } + ThreadManager thread_manager + ( + sched::GetNumberCPUs(), + sp_startfragment, + m_OutputFilenameFlag->GetFirstParameter()->GetValue(), + mutates, + property_scorer, + final_metrics, + mcm_opti_goal, + m_NumberMoleculesFlag->GetFirstParameter()->GetNumericalValue< size_t>(), + m_NumberIterationsFlag->GetFirstParameter()->GetNumericalValue< size_t>(), + m_NumberUnimprovedFlag->GetFirstParameter()->GetNumericalValue< size_t>(), + m_NumberSkippedFlag->GetFirstParameter()->GetNumericalValue< size_t>(), + m_MetropolisTemperatureFlag->GetFirstParameter()->GetNumericalValue< float>(), + save_all_accepted_improved, + m_MaxSequentialMutatesFlag->GetFirstParameter()->GetNumericalValue< size_t>() + ); // End track time threadmanager_timer.Stop(); @@ -1227,85 +827,60 @@ namespace bcl //! @brief standard constructor FocusedLibraryDesign::FocusedLibraryDesign() : - m_NumberMoleculesFlag - ( - new command::FlagStatic - ( - "number_molecules", "flag for number of molecules to generate", - command::Parameter - ( - "number_molecules", "total number of molecules", - command::ParameterCheckRanged< int>( 1, std::numeric_limits< int>::max()), "10" - ) - ) - ), - m_NumberIterationsFlag - ( - new command::FlagStatic - ( - "number_iterations", "flag for number of MC iterations", - command::Parameter - ( - "number_iterations", "maximum number of MC iterations", - command::ParameterCheckRanged< int>( 1, std::numeric_limits< int>::max()), "100" - ) - ) - ), - m_NumberUnimprovedFlag + m_StartFragmentFlag ( new command::FlagStatic ( - "number_unimproved", "flag for number of maximum allowed consecutive unimproved MC iterations", + "start_fragment", "filename for input starting fragment", command::Parameter ( - "number_unimproved", "maximum number of allowed consecutive unimproved MC iterations", - command::ParameterCheckRanged< int>( 1, std::numeric_limits< int>::max()), "100" + "fragment_filename", "filename for input sdf of molecules", "" ) ) ), - m_NumberSkippedFlag + m_OutputFilenameFlag ( new command::FlagStatic ( - "number_skipped", "flag for number of maximum allowed skipped MC iterations", + "output_filename", "flag selecting the output file name", command::Parameter ( - "number_skipped", "maximum number of allowed skipped MC iterations", - command::ParameterCheckRanged< int>( 1, std::numeric_limits< int>::max()), "100" + "output_filename_param", "filename for output sdf of molecules" ) ) ), - m_MetropolisTemperatureFlag + m_MutateFlag ( - new command::FlagStatic + new command::FlagDynamic ( - "temperature", "flag for the temperature used in the Metropolis criterion;" - " units match the units of the score function", + "mutates", + "methods with which to mutate molecules; " + "multiple mutates may be provided; the mutate performed at each iteration will be chosen at random " + "based on the weighted probability of each mutation.", command::Parameter ( - "temperature", "temperature for MCM evaluation", - command::ParameterCheckRanged< float>( 1.0, std::numeric_limits< float>::max()), "1.0" + "mutate", + "", + command::ParameterCheckSerializable + ( + util::Implementation< chemistry::FragmentMutateInterface>() + ) ) ) ), - m_PoseDependentFlag + m_MutateProbabilityFlag ( new command::FlagDynamic ( - "pose_dependent_scoring", "enables pose-dependent scoring", - storage::Vector< command::Parameter>::Create + "mutate_probs", + "relative probability of performing a mutate; the number of values passed here must " + "be equal to the number of mutates passed via the 'mutates' flag, otherwise " + "each mutate will be initialized with an equal probability.", + command::Parameter ( - command::Parameter - ( - "MDL_property", - "MDL property specifying the PDB filename for the receptor; " - "needs to match MDL property name used to train the corresponding machine learning model" - ), - command::Parameter - ( - "resolve_clashes", "resolve clashes between the protein and ligand", - command::ParameterCheckAllowed( storage::Vector< std::string>::Create( "true", "false")) - ) + "mutate", + "", + command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()) ) ) ), @@ -1322,301 +897,99 @@ namespace bcl ) ) ), - m_RingSwapProbFlag - ( - new command::FlagStatic - ( - "mutate_ringswap_prob", "flag for the relative probability of performing a ringswap mutation during molecule optimization; " - "automatically rescaled between 0 and 1 with any other mutates", - command::Parameter - ( - "mutate_ringswap", "replace a single atom or whole ring with a new ring structure from an internal ring library; " - "substituents on the altered ring are randomly assigned a position on the new ring", - command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()), "0.1" - ) - ) - ), - m_CyclizeProbFlag - ( - new command::FlagStatic - ( - "mutate_cyclize_prob", "flag for the relative probability of performing a cyclize mutation during molecule optimization; " - "automatically rescaled between 0 and 1 with any other mutates", - command::Parameter - ( - "mutate_cyclize", "connect non-ring moieties with other non-ring or ring moieties", - command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()), "0.1" - ) - ) - ), - m_AlchemyProbFlag - ( - new command::FlagStatic - ( - "mutate_alchemy_prob", "flag for the relative probability of performing an alchemy mutation during molecule optimization; " - "automatically rescaled between 0 and 1 with any other mutates", - command::Parameter - ( - "mutate_alchemy", "transform one element type into another preserving charge and optimizing the bond type to the new element; " - "restricted to H, C, N, O, S, F, Cl, Br; " - "the probability of a transformation is based on the prevalence of each element type in a sample of druglike molecules", - command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()), "0.1" - ) - ) - ), - m_RemoveAtomProbFlag - ( - new command::FlagStatic - ( - "mutate_remove_atom_prob", "flag for the relative probability of performing an atom removal mutation during molecule optimization; " - "automatically rescaled between 0 and 1 with any other mutates", - command::Parameter - ( - "mutate_remove_atom", "remove an atom from the molecule; if the " - "molecule is split into multiple fragments, keep only the largest fragment", - command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()), "0.1" - ) - ) - ), - m_RemoveBondProbFlag - ( - new command::FlagStatic - ( - "mutate_remove_bond_prob", "flag for the relative probability of performing a bond removal mutation during molecule optimization; " - "automatically rescaled between 0 and 1 with any other mutates", - command::Parameter - ( - "mutate_remove_bond", "remove a bond from the molecule; preserving only the " - "largest fragment after the molecule is split", - command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()), "0.1" - ) - ) - ), - m_AddMedChemProbFlag - ( - new command::FlagStatic - ( - "mutate_add_medchem_prob", "flag for the relative probability of performing an add medchem group mutation during molecule optimization; " - "automatically rescaled between 0 and 1 with any other mutates", - command::Parameter - ( - "mutate_add_medchem", "append medchem-like functional groups to the molecule from an internal library", - command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()), "0.1" - ) - ) - ), - m_FluorinateProbFlag - ( - new command::FlagStatic - ( - "mutate_fluorinate_prob", "flag for the relative probability of performing a fluorinate mutation during molecule optimization; " - "automatically rescaled between 0 and 1 with any other mutates", - command::Parameter - ( - "mutate_fluorinate", "strips a carbon atom of any bonded hydrogen atoms and replaces them with fluorine atoms; " - "equal probability to strip all fluorine atoms from a carbon and replace with hydrogen atoms", - command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()), "0.1" - ) - ) - ), - m_HalogenateProbFlag - ( - new command::FlagStatic - ( - "mutate_halogenate_prob", "flag for the relative probability of performing a halogenate mutation during molecule optimization; " - "automatically rescaled between 0 and 1 with any other mutates", - command::Parameter - ( - "mutate_halogenate", "add an F, Cl, Br, or I atom to an aromatic ring system; " - "the probability of a transformation is based on the relative prevalence of each " - "halogen type in a sample of druglike molecules; " - "F - 25%, Cl - 60%, Br - 10%, I - 5%", - command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()), "0.1" - ) - ) - ), - m_ExtendWithLinkerProbFlag - ( - new command::FlagStatic - ( - "mutate_extendwithlinker_prob", "flag for the relative probability of performing an extendwithlinker mutation during molecule optimization; " - "automatically rescaled between 0 and 1 with any other mutates", - command::Parameter - ( - "mutate_extendwithlinker", "split current molecule into two fragments and re-connect them with a linker consisting of " - "either a ring, alkyl/methoxy/ethoxy chain, or single element; alternatively, beginning with a ring, create a linker" - " to a new ring system", - command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()), "0.1" - ) - ) - ), - m_StartFragmentFlag - ( - new command::FlagStatic - ( - "start_fragment", "filename for input starting fragment", - command::Parameter - ( - "fragment_filename", "filename for input sdf of molecules", "" - ) - ) - ), - m_MutableFragmentFlag + m_PropertyScoringFunctionFlag ( - new command::FlagStatic + new command::FlagDynamic ( - "mutable_fragment", "filename for fragment in base fragment that can be mutated", + "score_function", + "the scoring function to use", command::Parameter ( - "mutable_fragment_filename", "if no filename is supplied, defaults to all fragments in base fragment are mutable;" - " note that this option cannot be used simultaneously with 'mutable_atoms'", - "" + "function", + "the scoring function implementation to use", + command::ParameterCheckSerializable + ( +// chemistry::ScoreFunctionGeneric() + descriptor::CheminfoProperty() + ) ) ) ), - m_MutableAtomsFlag + m_FinalMetricsFlag ( new command::FlagDynamic ( - "mutable_atoms", "filename for 0-indexed atom indices that can be mutated", + "final_metrics", + "descriptors to compute on the final molecules", command::Parameter ( - "mutable_atom_index", "if no filename is supplied, defaults to all atoms are mutable;" - " note that this option cannot be used simultaneously with 'mutable_fragment'", - command::ParameterCheckRanged< size_t>( 0, std::numeric_limits< size_t>::max()), - "" + "descriptors", + "", + command::ParameterCheckSerializable + ( + descriptor::CheminfoProperty() + ) ) ) ), - m_OutputFilenameFlag + m_NumberMoleculesFlag ( new command::FlagStatic ( - "output_filename", "flag selecting the output file name", + "number_molecules", "flag for number of molecules to generate", command::Parameter ( - "output_filename_param", "filename for output sdf of molecules" + "number_molecules", "total number of molecules", + command::ParameterCheckRanged< int>( 1, std::numeric_limits< int>::max()), "10" ) ) ), - m_GrowFragmentsFlag + m_NumberIterationsFlag ( new command::FlagStatic ( - "grow_fragments", - "files containing fragments to append to the molecule", + "number_iterations", "flag for number of MC iterations", command::Parameter ( - "grow fragments filename", - "name of file containing grow fragments", - command::ParameterCheckFileExistence(), - chemistry::RotamerLibraryFile::GetRotamerFinder().FindFile( "") + ( "bcl_buildfrag_0.sdf.gz") + "number_iterations", "maximum number of MC iterations", + command::ParameterCheckRanged< int>( 1, std::numeric_limits< int>::max()), "100" ) ) ), - m_PropertyScoringFunctionFlag + m_NumberUnimprovedFlag ( - new command::FlagDynamic + new command::FlagStatic ( - "scoring_function", - "the scoring function to use", + "number_unimproved", "flag for number of maximum allowed consecutive unimproved MC iterations", command::Parameter ( - "function", - "the scoring function implementation to use", - command::ParameterCheckSerializable - ( - chemistry::ScoreFunctionGeneric() - ) + "number_unimproved", "maximum number of allowed consecutive unimproved MC iterations", + command::ParameterCheckRanged< int>( 1, std::numeric_limits< int>::max()), "100" ) ) ), - m_DrugLikenessTypeFlag + m_NumberSkippedFlag ( new command::FlagStatic ( - "druglikeness_type", - "the type of druglikeness filter to use to determine when a molecule is skipped by the Monte Carlo algorithm", + "number_skipped", "flag for number of maximum allowed skipped MC iterations", command::Parameter ( - "type", - "the type of druglikeness to use", - command::ParameterCheckAllowed - ( - storage::Vector< std::string>::Create - ( - "IsConstitutionDruglike", - "IsConstitutionDruglikeAndHitlike", - "None" - ) - ), - "IsConstitutionDruglike" + "number_skipped", "maximum number of allowed skipped MC iterations", + command::ParameterCheckRanged< int>( 1, std::numeric_limits< int>::max()), "100" ) ) ), - m_SplitImplementationFlag + m_MetropolisTemperatureFlag ( new command::FlagStatic ( - "split_implementation", - "method to split molecules if performing internal MCM optimization", + "temperature", "flag for the temperature used in the Metropolis criterion;" + " units match the units of the score function", command::Parameter ( - "", - "", - command::ParameterCheckSerializable( util::Implementation< chemistry::FragmentSplitInterface>()), - "Rings" - ) - ) - ), - m_SimulatedAnnealingFlag - ( - new command::FlagDynamic - ( - "internal_mcm_simulated_annealing", - "Perform MCM simulated annealing (SA) optimization trials for randomly chosen or manually specified" - "substructures of molecule. The output from each MCM-SA trial is passed to the main MCM " - "engine for scoring and decision-making. Repeats until main MCM reaches termination criteria. " - "This is an alternative to performing general MCM optimization of the whole molecule or a single specific fragment " - "or set of atoms using the standard mutates with one MCM engine at a fixed temperature. In principle, it allows " - "the main MCM to evaluate optimized substructure changes; however, it is computationally more demanding. \n" - "The SA engine dynamically adjusts the temperature according to the ratio of accepted steps. It tries the match the" - "current ratio of accepted steps to the one calculated between given start ratio and end ratio and using the" - "number of steps as a linear predictor. It only tries to adjust the temperature every Nth step where N is" - "specified by the user. If the ratio is lower than the expected one, it increases the temperature, otherwise it" - "decreases the temperature. For cases where the actual ratio is close to expected ratio ( within 0.1) it" - "adjusts the temperature by multiplying/diving by a small coefficient, while for cases where the difference" - "is larger, the temperature is adjusted by multiplying/diving by a larger coefficient.", - storage::Vector< command::Parameter>::Create - ( - command::Parameter - ( - "temp_accept_start", - "fraction of MCM moves to be accepted initially", - command::ParameterCheckRanged< float>( 0.0, 1.0), - "0.90" - ), - command::Parameter - ( - "temp_accept_end", - "fraction of MCM moves to be accepted by the end", - command::ParameterCheckRanged< float>( 0.0, 1.0), - "0.10" - ), - command::Parameter - ( - "initial_temp", - "starting temperature", - command::ParameterCheckRanged< float>( 0.0, std::numeric_limits< float>::max()), - "1.0" - ), - command::Parameter - ( - "steps_per_update", - "number of steps between ", - command::ParameterCheckRanged< size_t>( 1, std::numeric_limits< size_t>::max()), - "10" - ) + "temperature", "temperature for MCM evaluation", + command::ParameterCheckRanged< float>( 1.0, std::numeric_limits< float>::max()), "1.0" ) ) ), @@ -1638,38 +1011,6 @@ namespace bcl "overall distribution of molecule scores will be skewed worse, but intermediate " "structures will be available for analysis" ) - ), - m_Corina - ( - new command::FlagStatic - ( - "corina", - "make a system call to Corina to make the starting conformer during molecule cleaning;" - "this means that if only 1 conformer is desired (i.e. pose-independent scoring) it will be the corina default conformer," - "while if multiple conformers are desired (i.e. refinement phase of pose-dependent scoring) there will be no effect; " - "this option is meant primarily to allow backward compatibility for QSAR models generated with Corina conformers" - ) - ), - m_VDWClashCutoffFlag - ( - new command::FlagStatic - ( - "conf_vdw_cutoff", - "maximum Van der Waals score of a valid 3D conformer", - command::Parameter - ( - "vdw_score", - "Internal Van der Waals score of the molecule conformer normalized by the number of atoms in the molecule; " - "computed with MoleculeVDWScore; " - "note that ligand-based methods do not perform much conformer optimization (for efficiency purposes), and " - "may have high VDW scores even if the molecule is consistitutionally acceptable (especially if generated " - "with Corina). Thus, for ligand-based design tasks we recommend increasing the VDW score cutoff to at least " - "2.0 - 5.0, though higher can also be appropriate. For structure-based pose-dependent optimization lower " - "cutoff scores can be used, and this may depend on the number of iterations used for pose optimization " - "after the generation of each molecule.", - command::ParameterCheckRanged< float>( 0.0, 1000.0), "5.0" - ) - ) ) { } diff --git a/apps/molecule/bcl_app_focused_library_design.h b/apps/molecule/bcl_app_focused_library_design.h new file mode 100644 index 000000000..246e8412e --- /dev/null +++ b/apps/molecule/bcl_app_focused_library_design.h @@ -0,0 +1,445 @@ +// (c) Copyright BCL @ Vanderbilt University 2014 +// (c) BCL Homepage: http://www.meilerlab.org/bclcommons +// (c) BCL Code Repository: https://github.com/BCLCommons/bcl +// (c) +// (c) The BioChemical Library (BCL) was originally developed by contributing members of the Meiler Lab @ Vanderbilt University. +// (c) +// (c) The BCL is now made available as an open-source software package distributed under the permissive MIT license, +// (c) developed and maintained by the Meiler Lab at Vanderbilt University and contributing members of the BCL Commons. +// (c) +// (c) External code contributions to the BCL are welcome. Please visit the BCL Commons GitHub page for information on how you can contribute. +// (c) +// (c) This file is part of the BCL software suite and is made available under the MIT license. +// (c) + +#ifndef BCL_APP_FOCUSED_LIBRARY_DESIGN_H_ +#define BCL_APP_FOCUSED_LIBRARY_DESIGN_H_ + +// (c) Copyright BCL @ Vanderbilt University 2014 +// (c) BCL Homepage: http://www.meilerlab.org/bclcommons +// (c) BCL Code Repository: https://github.com/BCLCommons/bcl +// (c) +// (c) The BioChemical Library (BCL) was originally developed by contributing members of the Meiler Lab @ Vanderbilt University. +// (c) +// (c) The BCL is now made available as an open-source software package distributed under the permissive MIT license, +// (c) developed and maintained by the Meiler Lab at Vanderbilt University and contributing members of the BCL Commons. +// (c) +// (c) External code contributions to the BCL are welcome. Please visit the BCL Commons GitHub page for information on how you can contribute. +// (c) +// (c) This file is part of the BCL software suite and is made available under the MIT license. +// (c) + +//// (c) Copyright BCL @ Vanderbilt University 2014 +//// (c) BCL Homepage: http://www.meilerlab.org/bclcommons +//// (c) BCL Code Repository: https://github.com/BCLCommons/bcl +//// (c) +//// (c) The BioChemical Library (BCL) was originally developed by contributing members of the Meiler Lab @ Vanderbilt University. +//// (c) +//// (c) The BCL is now made available as an open-source software package distributed under the permissive MIT license, +//// (c) developed and maintained by the Meiler Lab at Vanderbilt University and contributing members of the BCL Commons. +//// (c) +//// (c) External code contributions to the BCL are welcome. Please visit the BCL Commons GitHub page for information on how you can contribute. +//// (c) +//// (c) This file is part of the BCL software suite and is made available under the MIT license. +//// (c) +// +//#ifndef BCL_APP_FOCUSED_LIBRARY_DESIGN_H_ +//#define BCL_APP_FOCUSED_LIBRARY_DESIGN_H_ +// +//// include headers from the bcl - sorted alphabetically +//#include "app/bcl_app_apps.h" +//#include "chemistry/bcl_chemistry_configuration_set.h" +//#include "chemistry/bcl_chemistry_constitution_graph_converter.h" +//#include "chemistry/bcl_chemistry_constitution_set.h" +//#include "chemistry/bcl_chemistry_fragment_add_med_chem.h" +//#include "chemistry/bcl_chemistry_fragment_alchemy.h" +//#include "chemistry/bcl_chemistry_fragment_configuration_shared.h" +//#include "chemistry/bcl_chemistry_fragment_constitution_shared.h" +//#include "chemistry/bcl_chemistry_fragment_cyclize.h" +//#include "chemistry/bcl_chemistry_fragment_evolve_base.h" +//#include "chemistry/bcl_chemistry_fragment_extend_with_linker.h" +//#include "chemistry/bcl_chemistry_fragment_fluorinate.h" +//#include "chemistry/bcl_chemistry_fragment_grow.h" +//#include "chemistry/bcl_chemistry_fragment_halogenate.h" +//#include "chemistry/bcl_chemistry_fragment_mutate_mcm.h" +//#include "chemistry/bcl_chemistry_fragment_remove_atom.h" +//#include "chemistry/bcl_chemistry_fragment_remove_bond.h" +//#include "chemistry/bcl_chemistry_fragment_ring_swap.h" +//#include "chemistry/bcl_chemistry_fragment_split_interface.h" +//#include "chemistry/bcl_chemistry_fragment_track_mutable_atoms.h" +//#include "chemistry/bcl_chemistry_pick_atom_random.h" +//#include "chemistry/bcl_chemistry_pick_fragment_random.h" +//#include "chemistry/bcl_chemistry_rotamer_library_file.h" +//#include "chemistry/bcl_chemistry_sample_conformations.h" +//#include "chemistry/bcl_chemistry_score_function_generic.h" +//#include "command/bcl_command_app_default_flags.h" +//#include "command/bcl_command_command.h" +//#include "command/bcl_command_flag_dynamic.h" +//#include "command/bcl_command_flag_static.h" +//#include "command/bcl_command_parameter.h" +//#include "command/bcl_command_parameter_check_allowed.h" +//#include "command/bcl_command_parameter_check_file_existence.h" +//#include "command/bcl_command_parameter_check_ranged.h" +//#include "command/bcl_command_parameter_check_serializable.h" +//#include "descriptor/bcl_descriptor_cheminfo_properties.h" +//#include "descriptor/bcl_descriptor_combine.h" +//#include "io/bcl_io_file.h" +//#include "math/bcl_math_const_function.h" +//#include "math/bcl_math_mutate_combine.h" +//#include "math/bcl_math_mutate_decision_node.h" +//#include "math/bcl_math_mutate_repeat.h" +//#include "math/bcl_math_template_instantiations.h" +//#include "mc/bcl_mc_approximator.h" +//#include "mc/bcl_mc_temperature_accepted.h" +//#include "mc/bcl_mc_temperature_default.h" +//#include "mc/bcl_mc_temperature_interface.h" +//#include "model/bcl_model_retrieve_interface.h" +//#include "opti/bcl_opti_criterion_combine.h" +//#include "opti/bcl_opti_criterion_function.h" +//#include "opti/bcl_opti_criterion_number_iterations.h" +//#include "opti/bcl_opti_criterion_skipped_steps.h" +//#include "opti/bcl_opti_criterion_unimproved.h" +//#include "random/bcl_random_uniform_distribution.h" +//#include "sched/bcl_sched_scheduler_interface.h" +//#include "sched/bcl_sched_thunk_job.h" +//#include "sdf/bcl_sdf_fragment_factory.h" +//#include "sdf/bcl_sdf_mdl_handler.h" +//#include "util/bcl_util_format.h" +//#include "util/bcl_util_implementation.h" +//#include "util/bcl_util_sh_ptr.h" +// +//namespace bcl +//{ +// namespace app +// { +// +// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// //! +// //! @class FocusedLibraryDesign +// //! @brief Application for generating libraries for synthesis using QSAR models and a MCM structure generator +// //! +// //! @author brownbp1, mendenjl, loweew, geanesar +// //! @date 05/09/2020 +// //! +// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// class BCL_API FocusedLibraryDesign : +// public InterfaceRelease +// { +// +// private: +// +// ////////// +// // data // +// ////////// +// +// //! flag to control number of molecules to be generated +// util::ShPtr< command::FlagInterface> m_NumberMoleculesFlag; +// +// //! flag to control the number of MC iterations in molecule optimization +// util::ShPtr< command::FlagInterface> m_NumberIterationsFlag; +// +// //! flag to control the number of maximum allowed consecutive unimproved MC iterations +// util::ShPtr< command::FlagInterface> m_NumberUnimprovedFlag; +// +// //! flag to control the number of maximum allowed skipped MC iterations +// util::ShPtr< command::FlagInterface> m_NumberSkippedFlag; +// +// //! flag to control the temperature for the Metropolis criterion +// util::ShPtr< command::FlagInterface> m_MetropolisTemperatureFlag; +// +// //! flag to control input base fragment +// util::ShPtr< command::FlagInterface> m_StartFragmentFlag; +// +// //! flag to control input mutable fragment within base fragment +// util::ShPtr< command::FlagInterface> m_MutableFragmentFlag; +// +// //! flag to control input mutable atoms within base fragment +// util::ShPtr< command::FlagInterface> m_MutableAtomsFlag; +// +// //! flag for defining output filename, +// util::ShPtr< command::FlagInterface> m_OutputFilenameFlag; +// +// //! flag for defining input fragments +// util::ShPtr< command::FlagInterface> m_GrowFragmentsFlag; +// +// //! flag for an alternative score function to just the trained model +// util::ShPtr< command::FlagInterface> m_PropertyScoringFunctionFlag; +// +// //! flag for the druglikeness filter to use +// util::ShPtr< command::FlagInterface> m_DrugLikenessTypeFlag; +// +// //! flag to split molecules +// util::ShPtr< command::FlagInterface> m_SplitImplementationFlag; +// +// //! flag to do an internal MCM optimization +// util::ShPtr< command::FlagInterface> m_SimulatedAnnealingFlag; +// +// //! flag to maximize score istead of minimize +// util::ShPtr< command::FlagInterface> m_LargerIsBetterFlag; +// +// //! flag to save all molecules accepted or improved by main MCM +// util::ShPtr< command::FlagInterface> m_SaveAllAcceptedImprovedFlag; +// +// //! flag to use corina to generate starting conformer +// util::ShPtr< command::FlagInterface> m_Corina; +// +// //! flag to set 3D VDW score cutoff +// util::ShPtr< command::FlagInterface> m_VDWClashCutoffFlag; +// +// //! flag to enable pose-dependent scoring (default is ligand-based scoring) +// util::ShPtr< command::FlagInterface> m_PoseDependentFlag; +// +// //! flag controlling the maximum possible number of sequential mutates that can occur between MCM evaluation +// util::ShPtr< command::FlagInterface> m_MaxSequentialMutatesFlag; +// +// //! flags controling relative probabilities of different mutate objects +// util::ShPtr< command::FlagInterface> m_RingSwapProbFlag; +// util::ShPtr< command::FlagInterface> m_CyclizeProbFlag; +// util::ShPtr< command::FlagInterface> m_AlchemyProbFlag; +// util::ShPtr< command::FlagInterface> m_RemoveAtomProbFlag; +// util::ShPtr< command::FlagInterface> m_RemoveBondProbFlag; +// util::ShPtr< command::FlagInterface> m_AddMedChemProbFlag; +// util::ShPtr< command::FlagInterface> m_FluorinateProbFlag; +// util::ShPtr< command::FlagInterface> m_HalogenateProbFlag; +// util::ShPtr< command::FlagInterface> m_ExtendWithLinkerProbFlag; +// +// /////////////////////////////////// +// // construction and destruction // +// /////////////////////////////////// +// +// //! default constructor +// FocusedLibraryDesign(); +// +// // instantiate enumerator for PrepareSmallMoleculeEnsemble class +// static const ApplicationType FocusedLibraryDesign_Instance; +// +// // ThreadManager needs access to private nested classes +// friend class ThreadManager; +// friend class Worker; +// +// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// //! +// //! @class ThreadManager +// //! @brief manages threads for multithreaded structure generation +// //! +// //! @author mendenjl, geanesar, brownbp1 +// //! @date Nov 7, 2013 +// //! +// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// class ThreadManager : +// public util::ObjectInterface +// { +// +// private: +// +// const size_t m_NumberOfMoleculesRequested; // Number of molecules to build +// size_t m_NumberOfMoleculesBuilt; // Number of molecules already built +// const size_t m_NumberMCIterations; // Number of iterations in the MC approximator +// const size_t m_NumberMCUnimproved; // Number of allowed consecutive unimproved MC iterations +// const size_t m_NumberMCSkipped; // Number of allowed skipped MC iterations +// const float m_MetropolisTemperature; // Tempterature during Metropolis criterion evaluation +// const size_t m_Threads; // Number of threads +// chemistry::FragmentEnsemble m_Molecules; // The molecules which have been built and are ready for output +// chemistry::ConstitutionSet m_UniqueConsts; // The unique molecules which have been built +// chemistry::ConfigurationSet m_UniqueConfigs; // The unique molecules which have been built +// io::OFStream m_OutputStream; // Output file to write molecules to +// const std::string m_DrugLikenessType; // type of druglikeness filter to use for skipping MCM steps +// const float m_VDWScoreCutoff; // internal VDW score cutoff for 3D conformer (used to check for mols with reasonable substitutions) +// const util::Implementation< chemistry::FragmentSplitInterface> m_SplitImplementation; // splitter to use when making fragments for internal MCM optimization +// const std::string m_PoseDependentMDLProperty; // enable pose-dependent scoring with the receptor indicated by this property +// const std::string m_PoseDependentResolveClashes; // resolve clashes between ligand and receptor +// const size_t m_MaxSequentialMutates; +// const float m_RingSwapProb; +// const float m_CyclizeProb; +// const float m_AlchemyProb; +// const float m_RemoveAtomProb; +// const float m_RemoveBondProb; +// const float m_AddMedChemProb; +// const float m_FluorinateProb; +// const float m_HalogenateProb; +// const float m_ExtendWithLinkerProb; +// sched::Mutex m_Mutex; // Lock for updating Workers +// +// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// //! +// //! @class Worker +// //! @brief runs the threads for Worker - builds molecules using metropolis monte-carlo routines +// //! +// //! @author brownbp1, mendenjl, geanesar +// //! @date May 09, 2020 +// //! +// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// struct Worker +// { +// // Rotamer library to use - read in at Main() +// util::ShPtr< chemistry::FragmentComplete> m_StartFragment; // Base fragment to use +// chemistry::FragmentEnsemble m_MutableFragment; // mutable fragment in base fragment +// storage::Vector< size_t> m_MutableAtomIndices; // mutable atoms in base fragment +// descriptor::CheminfoProperty m_PropertyScorer; // Set objective function with property instead of model +// size_t m_NumberMCIterations; // Number of MC iterations +// size_t m_NumberMCUnimproved; // Number of allowed consecutive unimproved MC iterations +// size_t m_NumberMCSkipped; // Number of allowed consecutive unimproved MC iterations +// float m_MetropolisTemperature; // Temperature during Metropolis criterion evaluation +// opti::Tracker< chemistry::FragmentComplete, double> m_OptiGoal; +// bool m_SaveAllAcceptedImproved; +// std::string m_ConformationComparer; // Conformation comparer +// util::ShPtr< math::FunctionInterfaceSerializable< chemistry::FragmentComplete, double> > m_Score; // Objective function +// util::ShPtr< math::MutateInterface< chemistry::FragmentComplete> > m_Mutate; // Grow molecules from scaffold +// bool m_Corina; // enables corina conformers during cleaning +// util::SiPtr< ThreadManager> m_ThreadManager; // Pointer to the thread manager, needed so Worker can be updated +// +// // Builds and score the molecule +// void RunThread(); +// +// // Print to screen the properties of our last accepted molecule +// void ReportThreadMoleculeProgress(); +// +// }; // struct Worker +// +// public: +// +// //! @brief constructor +// ThreadManager( +// util::ShPtr< chemistry::FragmentComplete> START_FRAGMENT, // Base fragment to use +// chemistry::FragmentEnsemble MUTABLE_FRAGMENT, // mutable fragment in base fragment +// storage::Vector< size_t> MUTABLE_ATOM_INDICES, // mutable atom indices in base fragment +// util::ShPtr< chemistry::FragmentEnsemble> FRAGMENT_POOL, // Fragments to add to base fragment +// descriptor::CheminfoProperty PROPERTY_SCORER, // alternative scorer +// bool INTERNAL_MCM_OPTI, +// opti::Tracker< chemistry::FragmentComplete, double> MCM_OPTI_GOAL, +// bool SAVE_ALL_ACCEPTED_IMPROVED, +// const size_t &NUMBER_OF_MOLECULES, // Number to build +// const size_t &NUMBER_OF_ITERATIONS, // Number of MC iterations +// const size_t &NUMBER_UNIMPROVED_ITERATIONS, // Number of allowed consecutive unimproved MC iterations +// const size_t &NUMBER_SKIPPED_ITERATIONS, // Number of allowed consecutive unimproved MC iterations +// const float &METROPOLIS_TEMPERATURE, // Temperature during Metropolis criterion evaluation +// const size_t &NUMBER_THREADS, // Number of threads (from scheduler) +// const std::string &OUTPUT_FILENAME, +// const std::string &DRUG_LIKENESS_TYPE, +// const float &VDW_SCORE_CUTOFF, +// const util::Implementation< chemistry::FragmentSplitInterface> &SPLIT_IMPLEMENTATION, +// const size_t &MAX_SEQUENTIAL_MUTATES, +// const float &RING_SWAP_PROB, +// const float &CYCLIZE_PROB, +// const float &ALCHEMY_PROB, +// const float &REMOVE_ATOM_PROB, +// const float &REMOVE_BOND_PROB, +// const float &ADD_MEDCHEM_PROB, +// const float &FLUORINATE_PROB, +// const float &HALOGENATE_PROB, +// const float &EXTEND_WITH_LINKER_PROB, +// const std::string &POSE_DEPENDENT_MDL_PROPERTY, // pose-dependent scoring +// const std::string &POSE_DEPENDENT_RESOLVE_CLASHES, // resolve clashes +// const bool &CORINA_CONFS // enables cornina conformers during cleaning +// ); +// +// //! @brief clone function +// ThreadManager *Clone() const; +// +// //! @brief Get class identifier string +// const std::string &GetClassIdentifier() const; +// +// //! @brief +// size_t GetNumberMoleculesBuilt(); +// +// //! @brief +// size_t GetNumberMoleculesToBuild(); +// +// //! @brief +// size_t GetNumberMCIterations(); +// +// //! @brief +// size_t GetNumberMCUnimproved(); +// +// //! @brief +// size_t GetNumberMCSkipped(); +// +// //! @brief Return FragmentEnsemble of the generated molecules +// chemistry::FragmentEnsemble &GetMolecules() +// +// //! @brief Tests to see if the worker should keep running +// bool UpdateWorker( Worker &WORKER); +// +// //! @brief Increase the number of molecules that have been built +// void IncreaseMoleculeBuiltCount(); +// +// //! @brief Return true if this molecule is unique from the generated molecules at the constitutional level +// bool CheckUniqueConstitution( const chemistry::FragmentComplete &MOLECULE); +// +// //! @brief Return true if this molecule is unique from the generated molecules at the configurational level +// bool CheckUniqueConfiguration( const chemistry::FragmentComplete &MOLECULE); +// +// //! @brief Save a molecule to the growing ensemble +// void AddMolecule( const chemistry::FragmentComplete &MOLECULE); +// +// protected: +// +// std::istream &Read( std::istream &INSTREAM); +// +// std::ostream &Write( std::ostream &OUTSTREAM, const size_t INDENT) const; +// +// }; // class ThreadManager +// +// public: +// +// //! @brief Clone function +// //! @return pointer to new FoldProtein +// FocusedLibraryDesign *Clone() const; +// +// ///////////////// +// // data access // +// ///////////////// +// +// //! @brief returns class name of the object behind a pointer or the current object +// //! @return the class name +// const std::string &GetClassIdentifier() const; +// +// //! @brief returns readme information +// //! @return string containing information about application +// const std::string &GetReadMe() const; +// +// //! @brief get a description for the app +// //! @return a brief (no more than 3 line) description for the application +// std::string GetDescription() const; +// +// ////////////////////// +// // helper functions // +// ////////////////////// +// +// //////////////// +// // operations // +// //////////////// +// +// //! @brief initializes the command object for that executable +// util::ShPtr< command::Command> InitializeCommand() const; +// +// //! @brief the Main function +// //! @return error code - 0 for success +// int Main() const; +// +// ////////////////////// +// // input and output // +// ////////////////////// +// +// protected: +// +// //! @brief read from std::istream +// //! @param ISTREAM input stream +// //! @return istream which was read from +// std::istream &Read( std::istream &ISTREAM); +// +// //! @brief write to std::ostream +// //! @param OSTREAM output stream to write to +// //! @param INDENT number of indentations +// //! @return output stream which was written to +// std::ostream &Write( std::ostream &OSTREAM, const size_t INDENT) const; +// +// }; // FocusedLibraryDesign +// +// } // namespace app +//} // namespace bcl +// +//#endif BCL_APP_FOCUSED_LIBRARY_DESIGN_H_ +#endif // BCL_APP_FOCUSED_LIBRARY_DESIGN_H_ diff --git a/apps/molecule/bcl_app_molecule_fit.cpp b/apps/molecule/bcl_app_molecule_fit.cpp index a55614cc3..eead4fa5c 100644 --- a/apps/molecule/bcl_app_molecule_fit.cpp +++ b/apps/molecule/bcl_app_molecule_fit.cpp @@ -254,6 +254,7 @@ namespace bcl } // add fit molecules to molecule vector and increment index + BCL_MessageStd( "Completed molecule fitting procedure. Saving repositioned molecule."); m_ThreadManager->m_Mutex.Lock(); if( m_CurrentWorkerMolIndex < m_ThreadManager->GetNumberMoleculesToFit()) { @@ -342,12 +343,14 @@ namespace bcl chemistry::FragmentEnsemble final_poses; // Perform property-bases small molecule alignment + BCL_MessageStd("Run small molecule property-based alignment!"); storage::Vector< // indexes scaffold storage::Vector< // indexes alignment solution storage::Triplet< chemistry::FragmentComplete, chemistry::FragmentComplete, double // single alignment result > > > aligned_mols( PropertyBasedAlignment( MOLECULE, SCAFFOLDS)); // Find the best alignment + BCL_MessageStd("Find best alignment..."); float min_rmsdx( math::GetHighestBoundedValue< float>()); for ( @@ -356,6 +359,7 @@ namespace bcl ++scaffold_index ) { + BCL_MessageStd("A"); // go over each alignment solution per scaffold for ( @@ -365,13 +369,17 @@ namespace bcl ) { // make a reference to our mol to shorthand this - chemistry::FragmentComplete &mol( aligned_mols( scaffold_index)( alignment_index).First()); - float mol_rmsdx( mol.GetStoredProperties().GetMDLPropertyAsVector( "RMSDX")( 0)); + BCL_MessageStd("B"); + const chemistry::FragmentComplete &mol( aligned_mols( scaffold_index)( alignment_index).First()); + BCL_MessageStd("C"); + const float &mol_rmsdx( aligned_mols( scaffold_index)( alignment_index).Third()); + BCL_MessageStd("RMSDX to scaffold #" + util::Format()( scaffold_index) + " is " + util::Format()( mol_rmsdx)); // save the best alignment if( mol_rmsdx < min_rmsdx) { min_rmsdx = mol_rmsdx; + BCL_MessageStd("New best RMSDX: " + util::Format()( min_rmsdx)); MOLECULE = mol; } } // end current alignment solution @@ -537,6 +545,19 @@ namespace bcl // cast scaffold to FragmentEnsemble chemistry::FragmentEnsemble scaffold( storage::List< chemistry::FragmentComplete>( size_t( 1), SCAFFOLDS( scaffold_index))); + storage::Vector< size_t> keep_indices_a, keep_indices_b; + chemistry::ConformationComparisonPsiFlexField::GetNonMaskedAtoms + ( + ensemble.GetMolecules().FirstElement(), + scaffold.GetMolecules().FirstElement(), + m_WorkerMolAlign.GetExclusionIndicesA(), + m_WorkerMolAlign.GetExclusionIndicesB(), + keep_indices_a, + keep_indices_b + ); + m_WorkerMolAlign.SetKeepIndicesA( keep_indices_a); + m_WorkerMolAlign.SetKeepIndicesB( keep_indices_b); + // for each molecule-scaffold pair, we generate multiple solutions BCL_MessageStd( "Aligning molecule to scaffold: " + util::Format()( scaffold_index) + " (0-indexed)"); storage::Vector< storage::Triplet< chemistry::FragmentComplete, chemistry::FragmentComplete, double> > aligned_mols @@ -1665,7 +1686,9 @@ namespace bcl "number_outputs=1," "align_to_scaffold=0," "initial_rand_rotation=0," - "exclusion_indices_a="",exclusion_indices_b="",pose_tolerance=0.125," + "exclusion_indices_a=""," + "exclusion_indices_b=""," + "pose_tolerance=0.125," "pose_score_threshold=2," "flip_prob=0.06," "big_rot_prob=0.06," diff --git a/include/chemistry/bcl_chemistry_fragment_map_conformer.h b/include/chemistry/bcl_chemistry_fragment_map_conformer.h index 03918f00f..868d23397 100644 --- a/include/chemistry/bcl_chemistry_fragment_map_conformer.h +++ b/include/chemistry/bcl_chemistry_fragment_map_conformer.h @@ -68,7 +68,7 @@ namespace bcl ////////// //! type of drug likeness filter to apply during molecule cleaning - std::string m_DrugLikenessType; + descriptor::CheminfoProperty m_DrugLikenessType; //! MDL property label specifying path to protein binding pocket std::string m_MDL; @@ -129,7 +129,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMapConformer ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool CORINA_CONFS, const storage::Vector< size_t> &MOVEABLE_INDICES = storage::Vector< size_t>() ); @@ -143,7 +143,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMapConformer ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const std::string &MDL, const std::string &BINDING_POCKET_FILENAME, const descriptor::CheminfoProperty &PROPERTY_SCORER, @@ -191,9 +191,10 @@ namespace bcl AtomVector< AtomComplete> CleanAtoms ( const AtomVector< AtomComplete> &ATOM_VEC, - const std::string &DRUG_LIKENESS_TYPE = "None", - const bool &SKIP_NEUT = true, - const bool &SKIP_SATURATE_H = false + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE = descriptor::CheminfoProperty( "Constant(1.0)"), + const bool SKIP_NEUT = true, + const bool SKIP_SATURATE_H = false, + const bool SKIP_SPLIT = false ) const; //! @brief virtual operator taking an fragment and generating a new fragment by growing on a valence @@ -205,8 +206,10 @@ namespace bcl ( const AtomVector< AtomComplete> &ATOM_VEC, const FragmentComplete &REFERENCE_MOL, - const std::string &DRUG_LIKENESS_TYPE = "None", - const bool &SKIP_NEUT = true + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE = descriptor::CheminfoProperty( "Constant(1.0)"), + const bool SKIP_NEUT = true, + const bool SKIP_SATURATE_H = false, + const bool SKIP_SPLIT = false ) const; //! @brief preserve conformational information from starting molecule in new molecule diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_add.h b/include/chemistry/bcl_chemistry_fragment_mutate_add.h index ce2a2fb10..d3dfaab0f 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_add.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_add.h @@ -105,7 +105,7 @@ namespace bcl FragmentMutateAdd ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ); @@ -118,7 +118,7 @@ namespace bcl FragmentMutateAdd ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -138,7 +138,7 @@ namespace bcl FragmentMutateAdd ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -161,7 +161,7 @@ namespace bcl FragmentMutateAdd ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_add_med_chem.h b/include/chemistry/bcl_chemistry_fragment_mutate_add_med_chem.h index 2ffc636e0..a8a9748d9 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_add_med_chem.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_add_med_chem.h @@ -105,7 +105,7 @@ namespace bcl FragmentMutateAddMedChem ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ); @@ -118,7 +118,7 @@ namespace bcl FragmentMutateAddMedChem ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -138,7 +138,7 @@ namespace bcl FragmentMutateAddMedChem ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -161,7 +161,7 @@ namespace bcl FragmentMutateAddMedChem ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_alchemy.h b/include/chemistry/bcl_chemistry_fragment_mutate_alchemy.h index 7dbdc4f4d..c13093dda 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_alchemy.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_alchemy.h @@ -105,7 +105,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateAlchemy ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ); @@ -114,7 +114,7 @@ namespace bcl //! @param SCAFFOLD_FRAGMENT fragment to which the new mutated molecule will be aligned based on substructure FragmentMutateAlchemy ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const bool &CORINA_CONFS ); @@ -126,7 +126,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateAlchemy ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -144,7 +144,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateAlchemy ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -165,7 +165,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateAlchemy ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_combine.h b/include/chemistry/bcl_chemistry_fragment_mutate_combine.h index 13cfa9e57..4ed16f023 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_combine.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_combine.h @@ -105,7 +105,7 @@ namespace bcl FragmentMutateCombine ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ); @@ -118,7 +118,7 @@ namespace bcl FragmentMutateCombine ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -138,7 +138,7 @@ namespace bcl FragmentMutateCombine ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -161,7 +161,7 @@ namespace bcl FragmentMutateCombine ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_connect.h b/include/chemistry/bcl_chemistry_fragment_mutate_connect.h index e90773410..2ccb72204 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_connect.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_connect.h @@ -214,7 +214,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateConnect ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ); @@ -225,7 +225,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateConnect ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -243,7 +243,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateConnect ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -264,7 +264,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateConnect ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_cyclize.h b/include/chemistry/bcl_chemistry_fragment_mutate_cyclize.h index 8fabb5919..efe6d0831 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_cyclize.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_cyclize.h @@ -95,7 +95,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateCyclize ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -113,7 +113,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateCyclize ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -134,7 +134,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateCyclize ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_extend_with_linker.h b/include/chemistry/bcl_chemistry_fragment_mutate_extend_with_linker.h index 162504136..d0725f152 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_extend_with_linker.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_extend_with_linker.h @@ -163,7 +163,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateExtendWithLinker ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -181,7 +181,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateExtendWithLinker ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -202,7 +202,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateExtendWithLinker ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_fluorinate.h b/include/chemistry/bcl_chemistry_fragment_mutate_fluorinate.h index 4d9c8a831..980336bef 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_fluorinate.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_fluorinate.h @@ -105,7 +105,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateFluorinate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ); @@ -116,7 +116,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateFluorinate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -134,7 +134,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateFluorinate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -155,7 +155,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateFluorinate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -198,7 +198,7 @@ namespace bcl //////////////// //! @brief set reversibility - void SetReverisibility( const bool REVERSIBLE); + void SetReversibility( const bool REVERSIBLE); protected: diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_halogenate.h b/include/chemistry/bcl_chemistry_fragment_mutate_halogenate.h index 684107e4f..05b4c51b0 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_halogenate.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_halogenate.h @@ -101,7 +101,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateHalogenate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ); @@ -112,7 +112,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateHalogenate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -130,7 +130,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateHalogenate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -151,7 +151,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateHalogenate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_interface.h b/include/chemistry/bcl_chemistry_fragment_mutate_interface.h index e512d5061..eda30eb89 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_interface.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_interface.h @@ -70,9 +70,8 @@ namespace bcl //////// General miscellaneous //////// - // TODO: replace string with a CheminfoProperty object? //! type of drug likeness filter to apply during molecule cleaning - std::string m_DrugLikenessType = "None"; + descriptor::CheminfoProperty m_DrugLikenessType = descriptor::CheminfoProperty( "IsConstitutionDruglike"); //! MDL property label specifying path to protein binding pocket std::string m_MDL = std::string(); @@ -90,7 +89,7 @@ namespace bcl bool m_Corina = false; //! max number of tries for a single mutate - size_t m_NumberMaxAttempts = 10; + size_t m_NumberMaxAttempts = size_t( 10); //! shuffle hydrogen atoms bonded to a target heavy atom prior to removal for opening valences bool m_OVShuffleH = true; @@ -98,6 +97,31 @@ namespace bcl //! reverse the direction of valence opening such that higher index hydrogen atoms are removed first bool m_OVReverse = false; + //! do not neutralize the molecule after making a change + bool m_SkipNeutralization = true; + + //! do not saturate with hydrogen atoms after making a change + bool m_SkipSaturateH = false; + + //! do not split small fragments from a complex (split) molecule after a change + bool m_SkipSplit = false; + + //! add atoms with bad geometry to the atoms that can be sampled for conformer generation + //! after the mutate is applied + bool m_FixGeometry = true; + + //! extend atoms included in conformational sampling this many bonds out from any perturbed atom + size_t m_ExtendAdjacentAtoms = size_t( 1); + + //! add atoms in a shared ring with the perturbed atom(s) to the list of moveable atoms during + //! 3D conformer generation; generally recommended for aromatic systems, but may or may not + //! be needed for nonconjugated ring systems + bool m_ExtendRingAtoms = true; + + //! perform a quick substructure-based ensemble align and choose best conformer based on ChargeRMSD; + //! generally only worthwhile for ringswaps that occur in the middle of a molecule + bool m_ChooseBestAlignedConf = false; + //! reference molecule for substructure-based alignment during 3D conformer construction FragmentComplete m_ScaffoldFragment = FragmentComplete(); std::string m_ScaffoldFragmentFilename; @@ -316,7 +340,7 @@ namespace bcl //////////////// //! @brief set druglikeness filter type - void SetDruglikenessType( const std::string &DRUGLIKENESS_TYPE); + void SetDruglikenessType( const descriptor::CheminfoProperty &DRUGLIKENESS_TYPE); //! @brief set MDL SDF property label for the receptor path for BCL structure-based scoring void SetMDLReceptorLabel( const std::string &MDL); @@ -420,31 +444,29 @@ namespace bcl // helper functions // ////////////////////// - // TODO make static //! @brief remove a hydrogen atom from a target atom //! @param FRAGMENT the molecule of interest //! @param ATOM_INDEX the index of the atom in the molecule of interest //! @param SHUFFLE_H if true, randomly select a hydrogen atom to remove //! @param REVERSE_H if true and not SHUFFLE_H, begin removal with the highest index hydrogen atom //! @return the new molecule, the index of the desired atom, and the original index of the removed hydrogen atom - storage::Triplet< FragmentComplete, size_t, size_t> OpenValence + static storage::Triplet< FragmentComplete, size_t, size_t> OpenValence ( const FragmentComplete &FRAGMENT, const size_t &ATOM_INDEX, const bool SHUFFLE_H = true, const bool REVERSE_H = false - ) const; + ); - // TODO make static //! @brief checks whether substitution at this atom is ortho, meta, or para directed //! @param MOLECULE the small molecule of interest //! @param ATOM simple pointer to the atom of interest in the molecule //! @return return true if the substitution is directed correctly - bool IsRingSubstitutionDirected + static bool IsRingSubstitutionDirected ( const FragmentComplete &MOLECULE, util::SiPtr< const AtomConformationalInterface> &ATOM - ) const; + ); //! @brief select an atom from the target fragment //! @brief MOLECULE molecule from which to choose atom diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_mcm.h b/include/chemistry/bcl_chemistry_fragment_mutate_mcm.h index 7c262e944..80b2aeb3c 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_mcm.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_mcm.h @@ -126,7 +126,7 @@ namespace bcl const util::Implementation< FragmentSplitInterface> &SPLITTER, const util::ShPtr< SearchFragmentLibraryFromTree> &TREE_SEARCH, const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -145,7 +145,7 @@ namespace bcl const util::Implementation< FragmentSplitInterface> &SPLITTER, const util::ShPtr< SearchFragmentLibraryFromTree> &TREE_SEARCH, const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -178,7 +178,7 @@ namespace bcl const util::Implementation< FragmentSplitInterface> &SPLITTER, const util::ShPtr< SearchFragmentLibraryFromTree> &TREE_SEARCH, const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -204,7 +204,7 @@ namespace bcl const util::Implementation< FragmentSplitInterface> &SPLITTER, const util::ShPtr< SearchFragmentLibraryFromTree> &TREE_SEARCH, const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_react.h b/include/chemistry/bcl_chemistry_fragment_mutate_react.h index d486f2a9f..d40c06063 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_react.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_react.h @@ -80,15 +80,9 @@ namespace bcl //! overrides 3D conformer settings to just produce an arbitrary conformer without preserving spatial information bool m_LigandBased; - //! pose-dependent; if 3D conformer matters, fix atoms with bad geometry even if they are in reference structure - bool m_CorrectGeometry; - //! pose-dependent; if 3D conformer matters, add all ring atoms from non-reference scaffolds to mobile selection bool m_CorrectNonReferenceRingGeometry; - //! pose-dependent; if 3D conformer matters, fix atoms this many bonds out from any other mobile atom - size_t m_AdditionalAdjacentAtoms; - public: //! single instance of that class diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_remove_atom.h b/include/chemistry/bcl_chemistry_fragment_mutate_remove_atom.h index 003b510d8..a7b8ba495 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_remove_atom.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_remove_atom.h @@ -68,6 +68,13 @@ namespace bcl // data // ////////// + + //! only mutate hydrogen atoms; if heavy atoms are made mutable, only consider their hydrogen(s); + bool m_RestrictToBondedH; + + //! only mutate heavy atoms; if hydrogen atoms are made mutable, only consider their bonded partner + bool m_RestrictToBondedHeavy; + public: ////////// @@ -88,7 +95,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateRemoveAtom ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ); @@ -99,7 +106,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateRemoveAtom ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -117,7 +124,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveAtom ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -138,7 +145,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveAtom ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_remove_bond.h b/include/chemistry/bcl_chemistry_fragment_mutate_remove_bond.h index 7adb69bb2..53fa425ba 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_remove_bond.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_remove_bond.h @@ -115,7 +115,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateRemoveBond ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ); @@ -126,7 +126,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateRemoveBond ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -144,7 +144,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveBond ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -165,7 +165,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveBond ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_remove_fragment.h b/include/chemistry/bcl_chemistry_fragment_mutate_remove_fragment.h index c1ceea531..05d3d9bd7 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_remove_fragment.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_remove_fragment.h @@ -96,7 +96,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateRemoveFragment ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ); @@ -108,7 +108,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateRemoveFragment ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -127,7 +127,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveFragment ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -149,7 +149,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveFragment ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/include/chemistry/bcl_chemistry_fragment_mutate_ring_swap.h b/include/chemistry/bcl_chemistry_fragment_mutate_ring_swap.h index 6b56e39c1..9b9c88513 100644 --- a/include/chemistry/bcl_chemistry_fragment_mutate_ring_swap.h +++ b/include/chemistry/bcl_chemistry_fragment_mutate_ring_swap.h @@ -85,14 +85,6 @@ namespace bcl //! This also defines the probability of removing a ring entirely double m_RingInitiationProbability; - //! Try to fix the conformation after the ring swap (bond angles and lengths only. Dihedrals are not preserved) - //! This is *very* slow and not recommended if you're only using 2d descriptors - bool m_FixGeometry; - - //! Neutralize the molecule after ring swap - //! Recommended unless explicitly using a model trained on formally charged molecules - bool m_Neutralize; - //! if true, at most one ring may change size. For an additional ring to be added to the system then, all other //! rings must have the same size bool m_RestrictToNoMoreThanOneRingSizeChange; @@ -103,11 +95,11 @@ namespace bcl //! if true, align new ring to current ring prior to substitution to preserve topological distances between substituents as best as possible bool m_AlignRings; - //! extend atoms included in conformational sampling this many bonds out from any perturbed atom - size_t m_ExtendAdjacentAtoms; - - //! perform a quick substructure-based ensemble align and choose best conformer based on ChargeRMSD - bool m_ChooseBestAlignedConf; + //! if true, ignore stored counts and set to the count for each fragment to 1; + //! this has the effect of making each individual entry in the fragment pool equally likely to be selected; + //! note that this also has the effect of allowing users to control fragment selection probability through + //! multiple entries rather than the 'ScaffoldCount' MDL property + bool m_SetCountsToOne; //! Scheme used for comparing whether two bonds are equivalent ConfigurationalBondTypeData::DataEnum m_BondComparisonType; @@ -135,7 +127,7 @@ namespace bcl explicit FragmentMutateRingSwap ( const util::ShPtr< SearchFragmentLibraryFromTree> &FRAGMENT_LIBRARY, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -144,7 +136,6 @@ namespace bcl const bool &RESOLVE_CLASHES, const storage::Vector< float> &BFACTORS, const bool &CORINA, - const bool &FIX_GEOMETRY, const bool &NEUTRALIZE, const double &RING_INITIATION_PROBABILITY, const bool &PREVENT_MORE_THAN_ONE_RING_FROM_CHANGING_SIZE, @@ -155,12 +146,11 @@ namespace bcl explicit FragmentMutateRingSwap ( const util::ShPtr< SearchFragmentLibraryFromTree> &FRAGMENT_LIBRARY, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, const bool &CORINA, - const bool &FIX_GEOMETRY, const bool &NEUTRALIZE, const double &RING_INITIATION_PROBABILITY, const bool &PREVENT_MORE_THAN_ONE_RING_FROM_CHANGING_SIZE, diff --git a/include/chemistry/bcl_chemistry_fragment_split_by_index.h b/include/chemistry/bcl_chemistry_fragment_split_by_index.h new file mode 100644 index 000000000..9422446ab --- /dev/null +++ b/include/chemistry/bcl_chemistry_fragment_split_by_index.h @@ -0,0 +1,155 @@ +// (c) Copyright BCL @ Vanderbilt University 2014 +// (c) BCL Homepage: http://www.meilerlab.org/bclcommons +// (c) BCL Code Repository: https://github.com/BCLCommons/bcl +// (c) +// (c) The BioChemical Library (BCL) was originally developed by contributing members of the Meiler Lab @ Vanderbilt University. +// (c) +// (c) The BCL is now made available as an open-source software package distributed under the permissive MIT license, +// (c) developed and maintained by the Meiler Lab at Vanderbilt University and contributing members of the BCL Commons. +// (c) +// (c) External code contributions to the BCL are welcome. Please visit the BCL Commons GitHub page for information on how you can contribute. +// (c) +// (c) This file is part of the BCL software suite and is made available under the MIT license. +// (c) + +#ifndef BCL_CHEMISTRY_FRAGMENT_SPLIT_BY_INDEX_H_ +#define BCL_CHEMISTRY_FRAGMENT_SPLIT_BY_INDEX_H_ + +// include the namespace header +#include "bcl_chemistry.h" + +// include other forward headers - sorted alphabetically + +// includes from bcl - sorted alphabetically +#include "bcl_chemistry_fragment_ensemble.h" +#include "bcl_chemistry_fragment_split_interface.h" +#include "io/bcl_io_serializer.h" +#include "util/bcl_util_enumerated.h" + +namespace bcl +{ + namespace chemistry + { + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //! + //! @class FragmentSplitByIndex + //! @brief Splits molecules into the largest common substructure they possess relative to molecules of an input file + //! + //! @see @link example_chemistry_fragment_split_largest_common_substructure.cpp @endlink + //! @author ben + //! @date Jul 07, 2022 + //! + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class BCL_API FragmentSplitByIndex : + public FragmentSplitInterface + { + + private: + + ////////// + // data // + ////////// + + //! the atom indices to remove from the input molecules + std::string m_AtomIndicesString; + mutable storage::Vector< size_t> m_AtomIndices; + + //! remove bonded hydrogen atoms to target atoms + bool m_RemoveBondedH; + + //! invert the atom index selection prior to removal + bool m_Invert; + + //! break a fragment complex into isolated fragments + bool m_Break; + + //! close open valences with hydrogen atoms after breaks + bool m_CloseOpenValences; + + public: + + //! single instance of that class + static const util::SiPtr< const util::ObjectInterface> s_Instance; + + ////////////////////////////////// + // construction and destruction // + ////////////////////////////////// + + //! virtual copy constructor + FragmentSplitByIndex *Clone() const; + + //! @brief constructor + FragmentSplitByIndex + ( + const storage::Vector< size_t> &ATOM_INDICES = storage::Vector< size_t>(), + const bool REMOVE_BONDED_H = true, + const bool INVERT = false, + const bool BREAK = false, + const bool CLOSE_OPEN_VALENCES = false + ); + + ///////////////// + // data access // + ///////////////// + + //! @brief returns class name + //! @return the class name as const ref std::string + const std::string &GetClassIdentifier() const; + + //! @brief returns the name used for this class in an object data label + //! @return the name used for this class in an object data label + const std::string &GetAlias() const; + + //! @brief Get a description for what this class does (used when writing help) + //! @return a description for what this class does (used when writing help) + const std::string &GetClassDescription() const; + + //! @return the minimum size of fragments + const size_t GetMinSize() const; + + private: + + //! @return the indices of all hydrogen atoms bonded to target atoms + storage::Vector< size_t> GetBondedHydrogenAtoms + ( + const storage::Vector< size_t> &ATOM_INDICES, + const AtomVector< AtomComplete> &MOLECULE_ATOMS + ) const; + + //////////////// + // operations // + //////////////// + + public: + + //! @brief returns an ensemble of fragments of a molecule + //! @param CONFORMATION molecule of interest + //! @return an ensemble of common substructures relative to those in a file + FragmentEnsemble operator()( const ConformationInterface &CONFORMATION) const; + + ////////////////////// + // helper functions // + ////////////////////// + + //! @brief reads in molecules from a given file if it is necessary + void ReadFile() const; + + protected: + + //! @brief Set the members of this property from the given LABEL + //! @param LABEL the label to parse + //! @param ERR_STREAM stream to write out errors to + bool ReadInitializerSuccessHook( const util::ObjectDataLabel &LABEL, std::ostream &ERR_STREAM); + + //! @brief return parameters for member data that are set up from the labels + //! @return parameters for member data that are set up from the labels + io::Serializer GetSerializer() const; + + }; + + } // namespace chemistry +} // namespace bcl + +#endif // BCL_CHEMISTRY_FRAGMENT_SPLIT_BY_INDEX_H_ diff --git a/include/chemistry/bcl_chemistry_score_function_generic.h b/include/chemistry/bcl_chemistry_score_function_generic.h index 6a6a012f6..58c516781 100644 --- a/include/chemistry/bcl_chemistry_score_function_generic.h +++ b/include/chemistry/bcl_chemistry_score_function_generic.h @@ -27,6 +27,7 @@ #include "descriptor/bcl_descriptor_cheminfo_properties.h" #include "math/bcl_math_function_interface_serializable.h" #include "util/bcl_util_sh_ptr_vector.h" +#include "util/bcl_util_wrapper_enum.h" // external includes - sorted alphabetically @@ -37,7 +38,8 @@ namespace bcl //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //! //! @class ScoreFunctionGeneric - //! @brief Initializes a score function as a CheminfoProperty and returns the mean property value across all indices + //! @brief Initializes a score function as a CheminfoProperty, computes that property on the given molecule, and + //! returns one of several metrics from the property vector. //! //! @see @link example_chemistry_score_function_generic.cpp @endlink //! @author brownbp1 @@ -48,11 +50,55 @@ namespace bcl class BCL_API ScoreFunctionGeneric : public math::FunctionInterfaceSerializable< FragmentComplete, double> { + + public: + + ////////// + // Enum // + ////////// + + //! @brief methods for how molecules should be selected + enum CalculationType + { + e_Index = 0, //!< Return a property value by a specific vector index + e_Sum, //!< Return the sum of all property values + e_Mean, //!< Return the average of all property values + e_Min, //!< Return the minimum of all property values + e_Max, //!< Return the maximum of all property values + e_NormMax, //!< Return the normalized maximum of all property values + e_SoftMax, //!< Return the exponentiated normalized maximum of all property values + e_Entropy, //!< Return the information entropy computed from all values + s_NumberCalculationTypes + }; + + //! @brief CalculationType as string + //! @param CALCULATION_TYPE the calculation type whose name is desired + //! @return the name as string + static const std::string &GetCalculationTypeName( const CalculationType &CALCULATION_TYPE); + + //! ConjugationEnum simplifies the usage of the Conjugation enum of this class + typedef util::WrapperEnum< CalculationType, &GetCalculationTypeName, s_NumberCalculationTypes> CalculationTypeEnum; + private: - // the descriptor to use + //! the descriptor to use descriptor::CheminfoProperty m_Descriptor; + //! the method to use when returning the descriptor value + CalculationTypeEnum m_CalculationType; + + //! the reference index in the property vector for certain scores + size_t m_PropertyIndex; + + //! invert the property values; occurs prior to any normalization + bool m_Invert; + + //! normalize the property values; occurs after any inversion + bool m_Normalize; + + //! add some noise to bins so that ln(0) is not undefined + double m_Noise; + public: //! single instance of that class @@ -65,13 +111,28 @@ namespace bcl //! @brief default constructor ScoreFunctionGeneric(); - //! @brief constructor with parameters + //! @brief constructor with a property parameter //! @param DESCRIPTOR the descriptor to use ScoreFunctionGeneric ( const descriptor::CheminfoProperty &DESCRIPTOR ); + //! @brief constructor with all parameters + //! @param DESCRIPTOR the descriptor to use + //! @param INDEX the reference index for certain scores + //! @param INVERT invert each value in the property array + //! @param NORMALIZE normalize property array to sum of values + //! @param NOISE add some small value to bins to avoid ln(0) = nan + explicit ScoreFunctionGeneric + ( + const descriptor::CheminfoProperty &DESCRIPTOR, + const size_t INDEX, + const bool INVERT, + const bool NORMALIZE, + const double NOISE + ); + //! @brief Clone function //! @return pointer to new ScoreFunctionGeneric ScoreFunctionGeneric *Clone() const; @@ -88,6 +149,30 @@ namespace bcl //! @return the class name when used in a dynamic context const std::string &GetAlias() const; + //! @brief return the value at a single index + const double CalcIndexValue( const linal::Vector< float> &PROPERTIES) const; + + //! @brief return the sum of property values + const double CalcSum( const linal::Vector< float> &PROPERTIES) const; + + //! @brief return the mean descriptor value + const double CalcMean( const linal::Vector< float> &PROPERTIES) const; + + //! @brief return the min descriptor value + const double CalcMin( const linal::Vector< float> &PROPERTIES) const; + + //! @brief return the max descriptor value + const double CalcMax( const linal::Vector< float> &PROPERTIES) const; + + //! @brief return the maximum value after prop[ref_index]/sum_0-->N(prop) + const double CalcNormMax( const linal::Vector< float> &PROPERTIES) const; + + //! @brief return exp(prop[ref_index])/sum_0-->N(exp(prop)) + const double CalcSoftMax( const linal::Vector< float> &PROPERTIES) const; + + //! @brief return the entropy of the dataset + const double CalcEntropy( const linal::Vector< float> &PROPERTIES) const; + /////////////// // operators // /////////////// @@ -100,6 +185,16 @@ namespace bcl const FragmentComplete &MOLECULE ) const; + ////////////////////// + // helper functions // + ////////////////////// + + //! @brief invert each value of the property vector + void Invert( linal::Vector< float> PROPERTIES) const; + + //! @brief normalize property vector by sum of all values + void Normalize( linal::Vector< float> PROPERTIES) const; + ////////////////////// // input and output // ////////////////////// diff --git a/source/chemistry/CMakeLists.txt b/source/chemistry/CMakeLists.txt index 77b67c7c7..3d2e2b5ce 100644 --- a/source/chemistry/CMakeLists.txt +++ b/source/chemistry/CMakeLists.txt @@ -114,6 +114,7 @@ SET( ${CMAKE_CURRENT_SOURCE_DIR}/bcl_chemistry_fragment_mutate_ring_swap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/bcl_chemistry_fragment_probability_score.cpp ${CMAKE_CURRENT_SOURCE_DIR}/bcl_chemistry_fragment_react.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/bcl_chemistry_fragment_split_by_index.cpp ${CMAKE_CURRENT_SOURCE_DIR}/bcl_chemistry_fragment_split_common_substructure.cpp ${CMAKE_CURRENT_SOURCE_DIR}/bcl_chemistry_fragment_split_conformations.cpp ${CMAKE_CURRENT_SOURCE_DIR}/bcl_chemistry_fragment_split_ecfp_fragments.cpp diff --git a/source/chemistry/bcl_chemistry_conformation_comparison_psi_flex_field.cpp b/source/chemistry/bcl_chemistry_conformation_comparison_psi_flex_field.cpp index 9f9f9c313..3af991e48 100644 --- a/source/chemistry/bcl_chemistry_conformation_comparison_psi_flex_field.cpp +++ b/source/chemistry/bcl_chemistry_conformation_comparison_psi_flex_field.cpp @@ -189,6 +189,8 @@ namespace bcl const ConformationInterface &MOLECULE_B ) const { + + // TODO: there needs to be a catch for this in FieldOptimizeOrientationFlex storage::Vector< size_t> keep_indices_a, keep_indices_b; GetNonMaskedAtoms( MOLECULE_A, MOLECULE_B, m_ExclusionIndicesA, m_ExclusionIndicesB, keep_indices_a, keep_indices_b); m_KeepIndicesA = keep_indices_a; diff --git a/source/chemistry/bcl_chemistry_fragment_map_conformer.cpp b/source/chemistry/bcl_chemistry_fragment_map_conformer.cpp index 58515d52e..5a30233c8 100644 --- a/source/chemistry/bcl_chemistry_fragment_map_conformer.cpp +++ b/source/chemistry/bcl_chemistry_fragment_map_conformer.cpp @@ -79,7 +79,7 @@ namespace bcl //! @brief default constructor FragmentMapConformer::FragmentMapConformer() : - m_DrugLikenessType( "IsConstitutionDruglike"), + m_DrugLikenessType( descriptor::CheminfoProperty( "IsConstitutionDruglike")), m_ResolveClashes( false), m_VDWClashCutoff( 5.0), m_Corina( false), @@ -95,11 +95,11 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMapConformer::FragmentMapConformer ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool CORINA_CONFS, const storage::Vector< size_t> &MOVEABLE_INDICES ) : - m_DrugLikenessType( "IsConstitutionDruglike"), + m_DrugLikenessType( descriptor::CheminfoProperty( "IsConstitutionDruglike")), m_ResolveClashes( false), m_VDWClashCutoff( 5.0), m_Corina( CORINA_CONFS), @@ -120,7 +120,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMapConformer::FragmentMapConformer ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const std::string &MDL, const std::string &BINDING_POCKET_FILENAME, const descriptor::CheminfoProperty &PROPERTY_SCORER, @@ -200,14 +200,16 @@ namespace bcl ( const AtomVector< AtomComplete> &ATOM_VEC, const FragmentComplete &REFERENCE_MOL, - const std::string &DRUG_LIKENESS_TYPE, - const bool &SKIP_NEUT + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, + const bool SKIP_NEUT, + const bool SKIP_SATURATE_H, + const bool SKIP_SPLIT ) const { // clean atoms AtomVector< AtomComplete> new_mol_atoms_noh ( - CleanAtoms( ATOM_VEC, DRUG_LIKENESS_TYPE, SKIP_NEUT) + CleanAtoms( ATOM_VEC, DRUG_LIKENESS_TYPE, SKIP_NEUT,SKIP_SATURATE_H, SKIP_SPLIT) ); // exit if we failed the atom cleaning @@ -417,7 +419,7 @@ namespace bcl } // check if defined with atoms and reasonable geometry - if( gen_mol_3d_sp.IsDefined() && gen_mol_3d_sp->GetSize() && !gen_mol_3d_sp->HasBadGeometry()) + if( gen_mol_3d_sp.IsDefined() && gen_mol_3d_sp->GetSize() && ( !gen_mol_3d_sp->HasBadGeometry() || SKIP_SPLIT )) { bool good_conf( true); // filter molecules with strained 3D conformers @@ -452,9 +454,9 @@ namespace bcl { BCL_MessageStd( "Molecule cleaning failed to generate a valid 3D conformer!") BCL_MessageStd( "Defined: " + util::Format()( gen_mol_3d_sp.IsDefined() ? "true" : "false")); - BCL_MessageStd("Has good geometry: " + util::Format()( gen_mol_3d_sp->HasBadGeometry() ? "false" : "true" )); - BCL_MessageStd("Final molecule size: " + util::Format()( gen_mol_3d_sp->GetSize())); - BCL_MessageStd("Returning null...") + BCL_MessageStd( "Has good geometry: " + util::Format()( gen_mol_3d_sp->HasBadGeometry() ? "false" : "true" )); + BCL_MessageStd( "Final molecule size: " + util::Format()( gen_mol_3d_sp->GetSize())); + BCL_MessageStd( "Returning null...") } return util::ShPtr< FragmentComplete>(); } @@ -466,9 +468,10 @@ namespace bcl AtomVector< AtomComplete> FragmentMapConformer::CleanAtoms ( const AtomVector< AtomComplete> &ATOM_VEC, - const std::string &DRUG_LIKENESS_TYPE, - const bool &SKIP_NEUT, - const bool &SKIP_SATURATE_H + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, + const bool SKIP_NEUT, + const bool SKIP_SATURATE_H, + const bool SKIP_SPLIT ) const { // make sure we have atoms @@ -505,20 +508,28 @@ namespace bcl return AtomVector< AtomComplete>(); } } - FragmentSplitLargestComponent splitter; - FragmentEnsemble largest_component( splitter( new_mol)); - new_mol = largest_component.GetMolecules().FirstElement(); + + // remove smaller fragments from a complex (split) molecule + if( !SKIP_SPLIT) + { + FragmentSplitLargestComponent splitter; + FragmentEnsemble largest_component( splitter( new_mol)); + new_mol = largest_component.GetMolecules().FirstElement(); + } + + // add hydrogen atoms if( !SKIP_SATURATE_H) { new_mol.SaturateWithH(); } // check drug-likeness - if( DRUG_LIKENESS_TYPE.size() && DRUG_LIKENESS_TYPE != "None") + if( DRUG_LIKENESS_TYPE.GetAlias() != "Constant(1.0)") { GetMutex().Lock(); - static descriptor::CheminfoProperty drug_likeness_filter( DRUG_LIKENESS_TYPE); - bool druglike( drug_likeness_filter->SumOverObject( new_mol)( 0)); +// static descriptor::CheminfoProperty drug_likeness_filter( DRUG_LIKENESS_TYPE); +// bool druglike( drug_likeness_filter->SumOverObject( new_mol)( 0)); + bool druglike( DRUG_LIKENESS_TYPE->SumOverObject( new_mol)( 0)); GetMutex().Unlock(); if( druglike) { @@ -804,7 +815,7 @@ namespace bcl { // generate 3D conformer FragmentComplete mol( MOL); - util::ShPtr< FragmentComplete> clean_mol( Clean( MOL.GetAtomVector(), MOL, "None", true)); + util::ShPtr< FragmentComplete> clean_mol( Clean( MOL.GetAtomVector(), MOL, descriptor::CheminfoProperty( "Constant(1.0)"), true)); if( !util::IsDefined( clean_mol)) { return FragmentComplete(); diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_add.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_add.cpp index 93dfdb5a4..750058723 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_add.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_add.cpp @@ -92,7 +92,7 @@ namespace bcl FragmentMutateAdd::FragmentMutateAdd ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ) : m_FragmentPool( FRAGMENT_POOL), @@ -114,7 +114,7 @@ namespace bcl FragmentMutateAdd::FragmentMutateAdd ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -145,7 +145,7 @@ namespace bcl FragmentMutateAdd::FragmentMutateAdd ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -184,7 +184,7 @@ namespace bcl FragmentMutateAdd::FragmentMutateAdd ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -230,7 +230,7 @@ namespace bcl //! @return a short name for this class const std::string &FragmentMutateAdd::GetAlias() const { - static const std::string s_name( "Combine"); + static const std::string s_name( "Add"); return s_name; } @@ -304,7 +304,12 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // clean and output @@ -312,14 +317,19 @@ namespace bcl // Remove hydrogen atoms to allow bond type adjustment HydrogensHandler::Remove( atoms); - if( m_ScaffoldFragment.GetSize()) - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atoms, m_ScaffoldFragment, m_DrugLikenessType), *this); - } - else - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atoms, FRAGMENT, m_DrugLikenessType), *this); - } + return math::MutateResult< FragmentComplete> + ( + cleaner.Clean + ( + atoms, + m_ScaffoldFragment.GetSize() ? m_ScaffoldFragment : FRAGMENT, + m_DrugLikenessType, + m_SkipNeutralization, + m_SkipSaturateH, + m_SkipSplit + ), + *this + ); } return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); } diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_add_med_chem.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_add_med_chem.cpp index 31cadd15d..40e176379 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_add_med_chem.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_add_med_chem.cpp @@ -86,7 +86,7 @@ namespace bcl FragmentMutateAddMedChem::FragmentMutateAddMedChem ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ) : m_FragmentPool( FRAGMENT_POOL), @@ -108,7 +108,7 @@ namespace bcl FragmentMutateAddMedChem::FragmentMutateAddMedChem ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -139,7 +139,7 @@ namespace bcl FragmentMutateAddMedChem::FragmentMutateAddMedChem ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -178,7 +178,7 @@ namespace bcl FragmentMutateAddMedChem::FragmentMutateAddMedChem ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -475,7 +475,12 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // clean and output @@ -483,14 +488,19 @@ namespace bcl // Remove hydrogen atoms to allow bond type adjustment HydrogensHandler::Remove( atoms); - if( m_ScaffoldFragment.GetSize()) - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atoms, m_ScaffoldFragment, m_DrugLikenessType), *this); - } - else - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atoms, fragment, m_DrugLikenessType), *this); - } + return math::MutateResult< FragmentComplete> + ( + cleaner.Clean + ( + atoms, + m_ScaffoldFragment.GetSize() ? m_ScaffoldFragment : FRAGMENT, + m_DrugLikenessType, + m_SkipNeutralization, + m_SkipSaturateH, + m_SkipSplit + ), + *this + ); } return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); } diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_alchemy.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_alchemy.cpp index b62ad479e..e3040c5cf 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_alchemy.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_alchemy.cpp @@ -71,7 +71,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateAlchemy::FragmentMutateAlchemy ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ) : m_AllowedElements( storage::Vector< ElementType>()), @@ -91,7 +91,7 @@ namespace bcl //! @param SCAFFOLD_FRAGMENT fragment to which the new mutated molecule will be aligned based on substructure FragmentMutateAlchemy::FragmentMutateAlchemy ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const bool &CORINA_CONFS ) : @@ -115,7 +115,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateAlchemy::FragmentMutateAlchemy ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -147,7 +147,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateAlchemy::FragmentMutateAlchemy ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -187,7 +187,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateAlchemy::FragmentMutateAlchemy ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -377,14 +377,19 @@ namespace bcl BCL_MessageDbg( "n_nonh_bonds: " + util::Format()( n_current_bonds)); BCL_MessageDbg( "n_e_bonds: " + util::Format()( picked_atom->GetAtomType()->GetNumberElectronsInBonds())); BCL_MessageDbg( "n_nonh_e: " + util::Format()( n_nonh_e)); + BCL_MessageDbg( "aromatic: " + util::Format()( picked_atom_aromatic)); PossibleAtomTypesForAtom available_atom_types ( GetChosenElementType(), n_nonh_e, n_current_bonds, util::IsDefined( m_FormalCharge) ? m_FormalCharge : picked_atom->GetCharge(), - picked_atom_aromatic + m_ChosenElementType == GetElementTypes().e_Sulfur ? false : picked_atom_aromatic + // TODO this is a hack because we are failing to identify when a Sulfur atom is a valid aromatic replacement; + // importantly, aromaticity is detected correctly during conformer generation, which means this is potentially + // an issue with PossibleAtomTypesForAtom ); + BCL_MessageDbg( "n_poss_types: " + util::Format()( available_atom_types.GetNumberPossibleTypes())); // find something with the requested formal charge if( available_atom_types.GetNumberPossibleTypes() && util::IsDefined( m_FormalCharge)) { @@ -493,27 +498,32 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina -// storage::Vector< size_t>(), -// false, -// false, -// 4 + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // remove hydrogen atoms to ease burden on the isomorphism search during cleaning HydrogensHandler::Remove( new_atom_vector); // Standardize and return - if( m_ScaffoldFragment.GetSize()) - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( new_atom_vector, m_ScaffoldFragment, m_DrugLikenessType), *this); - } - else - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( new_atom_vector, FRAGMENT, m_DrugLikenessType), *this); - } + return math::MutateResult< FragmentComplete> + ( + cleaner.Clean + ( + new_atom_vector, + m_ScaffoldFragment.GetSize() ? m_ScaffoldFragment : FRAGMENT, + m_DrugLikenessType, + m_SkipNeutralization, + m_SkipSaturateH, + m_SkipSplit + ), + *this + ); } - // if no luck, return null ptr return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); } @@ -620,7 +630,7 @@ namespace bcl // parse input const storage::Vector< std::string> allowed_elements ( - util::SplitString( util::TrimString( m_AllowedElementsString), " \t\n\r,") + util::SplitString( util::TrimString( m_AllowedElementsString), " ") ); // stupid check to add only the correct elements diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_combine.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_combine.cpp index 11d6fa645..3f5aac73f 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_combine.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_combine.cpp @@ -89,7 +89,7 @@ namespace bcl FragmentMutateCombine::FragmentMutateCombine ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ) : m_FragmentPool( FRAGMENT_POOL), @@ -111,7 +111,7 @@ namespace bcl FragmentMutateCombine::FragmentMutateCombine ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -142,7 +142,7 @@ namespace bcl FragmentMutateCombine::FragmentMutateCombine ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -181,7 +181,7 @@ namespace bcl FragmentMutateCombine::FragmentMutateCombine ( const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -450,7 +450,12 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // clean and output @@ -461,14 +466,19 @@ namespace bcl // Remove hydrogen atoms to allow bond type adjustment HydrogensHandler::Remove( atoms); - if( m_ScaffoldFragment.GetSize()) - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atoms, m_ScaffoldFragment, m_DrugLikenessType), *this); - } - else - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atoms, FRAGMENT, m_DrugLikenessType), *this); - } + return math::MutateResult< FragmentComplete> + ( + cleaner.Clean + ( + atoms, + m_ScaffoldFragment.GetSize() ? m_ScaffoldFragment : FRAGMENT, + m_DrugLikenessType, + m_SkipNeutralization, + m_SkipSaturateH, + m_SkipSplit + ), + *this + ); } return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); } diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_connect.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_connect.cpp index 64eb8c44d..71a2f678a 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_connect.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_connect.cpp @@ -83,7 +83,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateConnect::FragmentMutateConnect ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ) : // m_RotamerLibrarySearcher( util::ShPtr< SearchFragmentLibraryFromTree>()), @@ -108,7 +108,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateConnect::FragmentMutateConnect ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -143,7 +143,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateConnect::FragmentMutateConnect ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -186,7 +186,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateConnect::FragmentMutateConnect ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_cyclize.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_cyclize.cpp index a030b362a..ed5929d68 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_cyclize.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_cyclize.cpp @@ -68,7 +68,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateCyclize::FragmentMutateCyclize ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -97,7 +97,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateCyclize::FragmentMutateCyclize ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -134,7 +134,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateCyclize::FragmentMutateCyclize ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -449,7 +449,12 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // Remove hydrogen atoms before clean to allow proper bondtype selection @@ -459,8 +464,8 @@ namespace bcl util::ShPtr< FragmentComplete> new_mol_ptr ( m_ScaffoldFragment.GetSize() - ? cleaner.Clean( not_empty, m_ScaffoldFragment, m_DrugLikenessType) - : cleaner.Clean( not_empty, FRAGMENT, m_DrugLikenessType) + ? cleaner.Clean( not_empty, m_ScaffoldFragment, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) + : cleaner.Clean( not_empty, FRAGMENT, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) ); if( !new_mol_ptr.IsDefined() || new_mol_ptr->HasNonGasteigerAtomTypes()) @@ -469,10 +474,10 @@ namespace bcl } // split out rings - FragmentSplitRings ring_splitter( true, 4); - FragmentEnsemble split_rings( ring_splitter( *new_mol_ptr)); - - // make sure all rings are found in the ring dataset +// FragmentSplitRings ring_splitter( true, 4); +// FragmentEnsemble split_rings( ring_splitter( *new_mol_ptr)); +// +// // make sure all rings are found in the ring dataset // for // ( // FragmentEnsemble::iterator split_rings_itr( split_rings.Begin()), split_rings_itr_end( split_rings.End()); diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_extend_with_linker.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_extend_with_linker.cpp index 425b9621d..3c473cfa2 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_extend_with_linker.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_extend_with_linker.cpp @@ -101,7 +101,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateExtendWithLinker::FragmentMutateExtendWithLinker ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -154,7 +154,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateExtendWithLinker::FragmentMutateExtendWithLinker ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -215,7 +215,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateExtendWithLinker::FragmentMutateExtendWithLinker ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -665,28 +665,47 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // Remove hydrogen atoms before clean to allow proper bondtype selection AtomVector< AtomComplete> not_empty( new_mol.GetAtomVector()); HydrogensHandler::Remove( not_empty); - - // Check for valid atom types - util::ShPtr< FragmentComplete> new_mol_ptr + return math::MutateResult< FragmentComplete> ( - m_ScaffoldFragment.GetSize() - ? cleaner.Clean( not_empty, m_ScaffoldFragment, m_DrugLikenessType) - : cleaner.Clean( not_empty, FRAGMENT, m_DrugLikenessType) + cleaner.Clean + ( + not_empty, + m_ScaffoldFragment.GetSize() ? m_ScaffoldFragment : FRAGMENT, + m_DrugLikenessType, + m_SkipNeutralization, + m_SkipSaturateH, + m_SkipSplit + ), + *this ); - - if( !new_mol_ptr.IsDefined() || new_mol_ptr->HasNonGasteigerAtomTypes()) - { - return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); - } - - // return the new molecule - return math::MutateResult< FragmentComplete>( new_mol_ptr, *this); + return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); +// +// // Check for valid atom types +// util::ShPtr< FragmentComplete> new_mol_ptr +// ( +// m_ScaffoldFragment.GetSize() +// ? cleaner.Clean( not_empty, m_ScaffoldFragment, m_DrugLikenessType) +// : cleaner.Clean( not_empty, FRAGMENT, m_DrugLikenessType) +// ); +// +// if( !new_mol_ptr.IsDefined() || new_mol_ptr->HasNonGasteigerAtomTypes()) +// { +// return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); +// } +// +// // return the new molecule +// return math::MutateResult< FragmentComplete>( new_mol_ptr, *this); } //////////////// diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_fluorinate.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_fluorinate.cpp index 0894b3273..db8dc812b 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_fluorinate.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_fluorinate.cpp @@ -61,7 +61,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateFluorinate::FragmentMutateFluorinate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ) : m_Reversible( false) @@ -79,7 +79,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateFluorinate::FragmentMutateFluorinate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -107,7 +107,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateFluorinate::FragmentMutateFluorinate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -143,7 +143,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateFluorinate::FragmentMutateFluorinate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -219,7 +219,12 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // make equal probability to add or remove fluorine atoms @@ -307,8 +312,8 @@ namespace bcl util::ShPtr< FragmentComplete> new_mol_ptr ( m_ScaffoldFragment.GetSize() ? - cleaner.Clean( atom_vector, m_ScaffoldFragment, m_DrugLikenessType) : - cleaner.Clean( atom_vector, FRAGMENT, m_DrugLikenessType) + cleaner.Clean( atom_vector, m_ScaffoldFragment, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) : + cleaner.Clean( atom_vector, FRAGMENT, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) ); if( !new_mol_ptr.IsDefined()) { @@ -372,8 +377,8 @@ namespace bcl util::ShPtr< FragmentComplete> new_mol_ptr ( m_ScaffoldFragment.GetSize() ? - cleaner.Clean( atom_vector, m_ScaffoldFragment, m_DrugLikenessType) : - cleaner.Clean( atom_vector, FRAGMENT, m_DrugLikenessType) + cleaner.Clean( atom_vector, m_ScaffoldFragment, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) : + cleaner.Clean( atom_vector, FRAGMENT, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) ); if( !new_mol_ptr.IsDefined()) { @@ -392,7 +397,7 @@ namespace bcl //////////////// //! @brief set reversibility - void FragmentMutateFluorinate::SetReverisibility( const bool REVERSIBLE) + void FragmentMutateFluorinate::SetReversibility( const bool REVERSIBLE) { m_Reversible = REVERSIBLE; } diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_halogenate.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_halogenate.cpp index a11e1cfcb..63586e789 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_halogenate.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_halogenate.cpp @@ -68,7 +68,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateHalogenate::FragmentMutateHalogenate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ) : m_AllowedHalogens( storage::Vector< AtomType>()), @@ -90,7 +90,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateHalogenate::FragmentMutateHalogenate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -122,7 +122,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateHalogenate::FragmentMutateHalogenate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -162,7 +162,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateHalogenate::FragmentMutateHalogenate ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -234,7 +234,12 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); for( size_t i( 0); i < m_NumberMaxAttempts; ++i) @@ -348,8 +353,8 @@ namespace bcl util::ShPtr< FragmentComplete> new_mol_ptr ( m_ScaffoldFragment.GetSize() - ? cleaner.Clean( atom_vector, m_ScaffoldFragment, m_DrugLikenessType) - : cleaner.Clean( atom_vector, FRAGMENT, m_DrugLikenessType) + ? cleaner.Clean( atom_vector, m_ScaffoldFragment, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) + : cleaner.Clean( atom_vector, FRAGMENT, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) ); return math::MutateResult< FragmentComplete>( new_mol_ptr, *this); } @@ -405,8 +410,8 @@ namespace bcl util::ShPtr< FragmentComplete> new_mol_ptr ( m_ScaffoldFragment.GetSize() - ? cleaner.Clean( frag_atom_v, m_ScaffoldFragment, m_DrugLikenessType) - : cleaner.Clean( frag_atom_v, FRAGMENT, m_DrugLikenessType) + ? cleaner.Clean( frag_atom_v, m_ScaffoldFragment, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) + : cleaner.Clean( frag_atom_v, FRAGMENT, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) ); return math::MutateResult< FragmentComplete>( new_mol_ptr, *this); } diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_interface.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_interface.cpp index 72b0a48d9..a973c7cb5 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_interface.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_interface.cpp @@ -220,7 +220,7 @@ namespace bcl //////////////// //! @brief set druglikeness type - void FragmentMutateInterface::SetDruglikenessType( const std::string &DRUGLIKENESS_TYPE) + void FragmentMutateInterface::SetDruglikenessType( const descriptor::CheminfoProperty &DRUGLIKENESS_TYPE) { m_DrugLikenessType = DRUGLIKENESS_TYPE; } @@ -410,7 +410,7 @@ namespace bcl const size_t &ATOM_INDEX, const bool SHUFFLE_H, const bool REVERSE_H - ) const + ) { // find a hydrogen atom attached to specified atom index storage::Vector< size_t> h_indices; @@ -511,7 +511,7 @@ namespace bcl ( const FragmentComplete &MOLECULE, util::SiPtr< const AtomConformationalInterface> &ATOM - ) const + ) { // compute our chosen atom pi charge double atom_pi_charge @@ -717,7 +717,7 @@ namespace bcl "the type of druglikeness filter to apply; " "returns input molecule if fails filter", io::Serialization::GetAgent( &m_DrugLikenessType), - "None" + "IsConstitutionDruglike" ); parameters.AddInitializer @@ -765,6 +765,32 @@ namespace bcl "false" ); + parameters.AddInitializer + ( + "skip_neutralization", + "if true, do not neutralize the molecule after applying a mutate", + io::Serialization::GetAgent( &m_SkipNeutralization), + "true" + ); + + parameters.AddInitializer + ( + "skip_saturation", + "if true, do not saturate hydrogen atoms on open valences after applying a mutate", + io::Serialization::GetAgent( &m_SkipSaturateH), + "false" + ); + + parameters.AddInitializer + ( + "skip_split", + "if true and the applied mutate separates a molecule into multiple disconnected fragments, " + "do not remove the smaller fragments; note that by default we keep only the largest fragment; " + "note also that if you choose to keep disconnected fragments then conformer sampling will not work.", + io::Serialization::GetAgent( &m_SkipSplit), + "false" + ); + parameters.AddInitializer ( "corina", @@ -1005,6 +1031,47 @@ namespace bcl "BondOrderAmideOrAromaticWithRingness" ); + + parameters.AddInitializer + ( + "fix_geometry", + "If True, then any atom/bonds with bad geometry is included for conformational sampling. If False, " + "then atoms with bad geometry will not be included unless they are also one of the perturbed atoms or " + "included as adjacent to the perturbed atoms.", + io::Serialization::GetAgent( &m_FixGeometry), + "true" + ); + + parameters.AddInitializer + ( + "refine_alignment", + "If True, then choose the returned conformer based on a flexible substructure-based alignment scored with ChargeRMSD. " + "This method generates a conformational ensemble, performs a greedy disconnected substructure alignment of each conformer, " + "and then chooses the best one by ChargeRMSD score. If False, select the best conformer based on BCL::Conf score. " + "This option will reduce the speed of the mutate and is mostly recommended for pose-dependent replacement of ring " + "structures at the core of the molecule via RingSwap.", + io::Serialization::GetAgent( &m_ChooseBestAlignedConf), + "false" + ); + + parameters.AddInitializer + ( + "extend_adjacent_atoms", + "include adjacent atoms out this many bonds from any perturbed atom when generating a new 3D conformer", + io::Serialization::GetAgent( &m_ExtendAdjacentAtoms), + "1" + ); + + parameters.AddInitializer + ( + "extend_ring_atoms", + "add atoms in a shared ring with the perturbed atom(s) to the list of moveable atoms during 3D conformer " + "generation; generally recommended for aromatic systems, but may or may not be needed for " + "nonconjugated ring systems", + io::Serialization::GetAgent( &m_ExtendRingAtoms), + "true" + ); + return parameters; } diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_mcm.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_mcm.cpp index feb9a4615..a39b88d45 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_mcm.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_mcm.cpp @@ -100,7 +100,7 @@ namespace bcl const util::Implementation< FragmentSplitInterface> &SPLITTER, const util::ShPtr< SearchFragmentLibraryFromTree> &TREE_SEARCH, const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -143,7 +143,7 @@ namespace bcl const util::Implementation< FragmentSplitInterface> &SPLITTER, const util::ShPtr< SearchFragmentLibraryFromTree> &TREE_SEARCH, const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -200,7 +200,7 @@ namespace bcl const util::Implementation< FragmentSplitInterface> &SPLITTER, const util::ShPtr< SearchFragmentLibraryFromTree> &TREE_SEARCH, const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -253,7 +253,7 @@ namespace bcl const util::Implementation< FragmentSplitInterface> &SPLITTER, const util::ShPtr< SearchFragmentLibraryFromTree> &TREE_SEARCH, const util::ShPtr< FragmentEnsemble> &FRAGMENT_POOL, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -481,7 +481,7 @@ namespace bcl // POSE-DEPENDENT CONSTRUCTION OF MUTATES // if( !MDL.empty()) { - mutater->AddMutate( FragmentMutateRingSwap( tree_search, m_DrugLikenessType, START_FRAGMENT, MUTABLE_FRAGMENTS, MUTABLE_ATOM_INDICES, MDL, PROPERTY_SCORER, RESOLVE_CLASHES, storage::Vector< float>(), CORINA_CONFS, true, false, 0.1, true, true), RING_SWAP_PROB); + mutater->AddMutate( FragmentMutateRingSwap( tree_search, m_DrugLikenessType, START_FRAGMENT, MUTABLE_FRAGMENTS, MUTABLE_ATOM_INDICES, MDL, PROPERTY_SCORER, RESOLVE_CLASHES, storage::Vector< float>(), CORINA_CONFS, false, 0.1, true, true), RING_SWAP_PROB); mutater->AddMutate( FragmentMutateCyclize( m_DrugLikenessType, START_FRAGMENT, MUTABLE_FRAGMENTS, MUTABLE_ATOM_INDICES, MDL, PROPERTY_SCORER, RESOLVE_CLASHES, storage::Vector< float>(), CORINA_CONFS), CYCLIZE_PROB); mutater->AddMutate( FragmentMutateAlchemy( m_DrugLikenessType, START_FRAGMENT, MUTABLE_FRAGMENTS, MUTABLE_ATOM_INDICES, MDL, PROPERTY_SCORER, RESOLVE_CLASHES, storage::Vector< float>(), CORINA_CONFS), ALCHEMY_PROB); mutater->AddMutate( FragmentMutateRemoveAtom( m_DrugLikenessType, START_FRAGMENT, MUTABLE_FRAGMENTS, MUTABLE_ATOM_INDICES, MDL, PROPERTY_SCORER, RESOLVE_CLASHES, storage::Vector< float>(), CORINA_CONFS), REMOVE_ATOM_PROB); @@ -494,7 +494,7 @@ namespace bcl // POSE-INDEPENDENT CONSTRUCTION OF MUTATES // else { - mutater->AddMutate( FragmentMutateRingSwap( tree_search, m_DrugLikenessType, START_FRAGMENT, MUTABLE_FRAGMENTS, MUTABLE_ATOM_INDICES, CORINA_CONFS, true, false, 0.1, true, true), RING_SWAP_PROB); + mutater->AddMutate( FragmentMutateRingSwap( tree_search, m_DrugLikenessType, START_FRAGMENT, MUTABLE_FRAGMENTS, MUTABLE_ATOM_INDICES, CORINA_CONFS, false, 0.1, true, true), RING_SWAP_PROB); mutater->AddMutate( FragmentMutateCyclize( m_DrugLikenessType, START_FRAGMENT, MUTABLE_FRAGMENTS, MUTABLE_ATOM_INDICES, CORINA_CONFS), CYCLIZE_PROB); mutater->AddMutate( FragmentMutateAlchemy( m_DrugLikenessType, START_FRAGMENT, MUTABLE_FRAGMENTS, MUTABLE_ATOM_INDICES, CORINA_CONFS), ALCHEMY_PROB); mutater->AddMutate( FragmentMutateRemoveAtom( m_DrugLikenessType, START_FRAGMENT, MUTABLE_FRAGMENTS, MUTABLE_ATOM_INDICES, CORINA_CONFS), REMOVE_ATOM_PROB); diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_react.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_react.cpp index 66ec117b6..f544acdf6 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_react.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_react.cpp @@ -49,9 +49,7 @@ namespace bcl //! @brief default constructor FragmentMutateReact::FragmentMutateReact() : m_LigandBased( false), - m_CorrectGeometry( false), - m_CorrectNonReferenceRingGeometry( false), - m_AdditionalAdjacentAtoms( size_t( 0)) + m_CorrectNonReferenceRingGeometry( false) { // important to get options from base class and initialize reaction search this->ReadInitializerSuccessHook( util::ObjectDataLabel(), util::GetLogger()); @@ -64,9 +62,7 @@ namespace bcl ) : FragmentReact( REACT), m_LigandBased( false), - m_CorrectGeometry( false), - m_CorrectNonReferenceRingGeometry( false), - m_AdditionalAdjacentAtoms( size_t( 0)) + m_CorrectNonReferenceRingGeometry( false) { // important to get options from base class and initialize reaction search this->ReadInitializerSuccessHook( util::ObjectDataLabel(), util::GetLogger()); @@ -145,9 +141,9 @@ namespace bcl { BCL_MessageStd( "Setting pose-dependent options"); BCL_MessageStd( "Ligand-based: " + util::Format()( m_LigandBased ? "true" : "false")); - BCL_MessageStd( "Fix bad geometry: " + util::Format()( m_CorrectGeometry ? "true" : "false")); - BCL_MessageStd("Fix bad ring geometry: " + util::Format()( m_CorrectNonReferenceRingGeometry ? "true" : "false")); - BCL_MessageStd("Extend adjacent atoms: " + util::Format()( m_AdditionalAdjacentAtoms)); + BCL_MessageStd( "Fix bad geometry: " + util::Format()( m_FixGeometry ? "true" : "false")); + BCL_MessageStd( "Fix bad ring geometry: " + util::Format()( m_CorrectNonReferenceRingGeometry ? "true" : "false")); + BCL_MessageStd( "Extend adjacent atoms: " + util::Format()( m_ExtendAdjacentAtoms)); } // try a few times @@ -173,8 +169,8 @@ namespace bcl m_Corina, storage::Vector< size_t>(), false, - m_CorrectGeometry, - m_AdditionalAdjacentAtoms + m_FixGeometry, + m_ExtendAdjacentAtoms ); // remove hydrogen atoms so ease burden on the isomorphism search during cleaning @@ -192,8 +188,8 @@ namespace bcl util::ShPtr< FragmentComplete> new_mol_ptr ( m_ScaffoldFragment.GetSize() ? - cleaner.Clean( unclean_mol.GetAtomVector(), m_ScaffoldFragment, m_DrugLikenessType) : - cleaner.Clean( unclean_mol.GetAtomVector(), FRAGMENT, m_DrugLikenessType) + cleaner.Clean( unclean_mol.GetAtomVector(), m_ScaffoldFragment, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) : + cleaner.Clean( unclean_mol.GetAtomVector(), FRAGMENT, m_DrugLikenessType, m_SkipNeutralization, m_SkipSaturateH, m_SkipSplit) ); return math::MutateResult< FragmentComplete>( new_mol_ptr, *this); } @@ -271,15 +267,6 @@ namespace bcl "false" ); - parameters.AddInitializer - ( - "fix_geometry", - "pose-dependent; " - "if 3D conformer matters, fix atoms with bad geometry even if they are in reference structure", - io::Serialization::GetAgent( &m_CorrectGeometry), - "false" - ); - parameters.AddInitializer ( "fix_ring_geometry", @@ -289,15 +276,6 @@ namespace bcl "false" ); - parameters.AddInitializer - ( - "extend_adjacent_atoms", - "pose-dependent; " - "include adjacent atoms out this many bonds from any perturbed atom when generating a new 3D conformer", - io::Serialization::GetAgent( &m_AdditionalAdjacentAtoms), - "0" - ); - return parameters; } @@ -368,9 +346,9 @@ namespace bcl if( !m_LigandBased) { // set pose-dependent options - m_ReactionWorker.SetCorrectGeometry( m_CorrectGeometry); + m_ReactionWorker.SetCorrectGeometry( m_FixGeometry); m_ReactionWorker.SetCorrectNonReferenceRingGeometry( m_CorrectNonReferenceRingGeometry); - m_ReactionWorker.SetAdditionalAdjacentAtoms( m_AdditionalAdjacentAtoms); + m_ReactionWorker.SetAdditionalAdjacentAtoms( m_ExtendAdjacentAtoms); } // done diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_remove_atom.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_remove_atom.cpp index 3fea49d78..cf2947674 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_remove_atom.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_remove_atom.cpp @@ -20,9 +20,6 @@ BCL_StaticInitializationFiascoFinder #include "chemistry/bcl_chemistry_fragment_mutate_remove_atom.h" // includes from bcl - sorted alphabetically -#include "iostream" -#include "iterator" -#include "vector" #include "chemistry/bcl_chemistry_atoms_complete_standardizer.h" #include "chemistry/bcl_chemistry_fragment_map_conformer.h" #include "chemistry/bcl_chemistry_fragment_split_largest_component.h" @@ -33,7 +30,11 @@ BCL_StaticInitializationFiascoFinder #include "io/bcl_io_file.h" #include "io/bcl_io_ifstream.h" #include "random/bcl_random_uniform_distribution.h" + // external includes - sorted alphabetically +#include +#include +#include namespace bcl { @@ -64,7 +65,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateRemoveAtom::FragmentMutateRemoveAtom ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ) { @@ -81,7 +82,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateRemoveAtom::FragmentMutateRemoveAtom ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -108,7 +109,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveAtom::FragmentMutateRemoveAtom ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -143,7 +144,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveAtom::FragmentMutateRemoveAtom ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -191,6 +192,13 @@ namespace bcl { BCL_MessageStd( "RemoveAtom!"); + // these two options are incompatible if enabled simultaneously + if( ( m_RestrictToBondedHeavy == m_RestrictToBondedH) == true) + { + BCL_MessageStd( "Cannot simultaneously restrict atom selection to heavy atoms and hydrogen atoms; returning NULL"); + return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); + } + // pick an atom to remove util::SiPtr< const AtomConformationalInterface> picked_atom; if( m_MutableAtomIndices.GetSize() || m_MutableElements.GetSize() || m_MutableFragments.GetSize()) @@ -202,15 +210,57 @@ namespace bcl picked_atom = this->PickAtom( FRAGMENT, true); } -// // if atom is hydrogen atom, grab the atom to which it is connected -// if( picked_atom->GetElementType() == GetElementTypes().e_Hydrogen) -// { -// if( !picked_atom->GetBonds().GetSize()) -// { -// continue; -// } -// picked_atom = util::SiPtr( picked_atom->GetBonds().Begin()->GetTargetAtom()); -// } + // if atom is hydrogen atom, grab the atom to which it is connected + if( picked_atom->GetElementType() == GetElementTypes().e_Hydrogen && m_RestrictToBondedHeavy) + { + if( !picked_atom->GetBonds().GetSize()) + { + BCL_MessageStd + ( + "Hydrogen atom selected and restrict to bonded heavy atoms is enabled. " + "However, no bonded heavy atom is found. Returning NULL." + ); + return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); + } + picked_atom = util::SiPtr( picked_atom->GetBonds().Begin()->GetTargetAtom()); + } + + // if restricted to hydrogen atoms get the new picked atom + else if( m_RestrictToBondedH && picked_atom->GetElementType() != GetElementTypes().e_Hydrogen) + { + // loop over bonds and find the hydrogen atom indices + storage::Vector< size_t> h_indices; + for + ( + auto bond_itr( picked_atom->GetBonds().Begin()), + bond_itr_end( picked_atom->GetBonds().End()); + bond_itr != bond_itr_end; + ++bond_itr + ) + { + if( bond_itr->GetTargetAtom().GetElementType() == GetElementTypes().e_Hydrogen) + { + h_indices.PushBack( FRAGMENT.GetAtomVector().GetAtomIndex( bond_itr->GetTargetAtom())); + } + } + + // require some hydrogen atoms that can be mutated + if( !h_indices.GetSize()) + { + BCL_MessageStd + ( + "Heavy atom selected and restrict to bonded hydrogen atom is enabled. " + "However, no bonded hydrogen atom is found. Returning NULL." + ); + return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); + } + // set new picked atom + else if( h_indices.GetSize() > size_t( 1)) + { + h_indices.Shuffle(); + } + picked_atom = util::SiPtr< const AtomConformationalInterface>( FRAGMENT.GetAtomVector()( h_indices( 0))); + } // removal atom index size_t picked_atom_index( FRAGMENT.GetAtomVector().GetAtomIndex( *picked_atom)); @@ -232,22 +282,34 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // standardize and return HydrogensHandler::Remove( atoms); - if( m_ScaffoldFragment.GetSize()) - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atoms, m_ScaffoldFragment, m_DrugLikenessType), *this); - } - else - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atoms, FRAGMENT, m_DrugLikenessType), *this); - } + return math::MutateResult< FragmentComplete> + ( + cleaner.Clean + ( + atoms, + m_ScaffoldFragment.GetSize() ? m_ScaffoldFragment : FRAGMENT, + m_DrugLikenessType, + m_SkipNeutralization, + m_SkipSaturateH, + m_SkipSplit + ), + *this + ); // TODO: consider adding a step to have a certain probability of closing the gap created by this atom removal + // failed all tries; return null + return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); } //////////////// diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_remove_bond.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_remove_bond.cpp index 78ad1796c..e4c7d52ce 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_remove_bond.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_remove_bond.cpp @@ -80,7 +80,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateRemoveBond::FragmentMutateRemoveBond ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ) : m_BondChange( FragmentMutateRemoveBond::BondTreatment::e_RemoveBond), @@ -101,7 +101,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateRemoveBond::FragmentMutateRemoveBond ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -132,7 +132,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveBond::FragmentMutateRemoveBond ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -171,7 +171,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveBond::FragmentMutateRemoveBond ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -357,19 +357,29 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // Remove hydrogen atoms to allow bond type adjustment HydrogensHandler::Remove( atom_vector); - if( m_ScaffoldFragment.GetSize()) - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atom_vector, m_ScaffoldFragment, m_DrugLikenessType), *this); - } - else - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atom_vector, FRAGMENT, m_DrugLikenessType), *this); - } + return math::MutateResult< FragmentComplete> + ( + cleaner.Clean + ( + atom_vector, + m_ScaffoldFragment.GetSize() ? m_ScaffoldFragment : FRAGMENT, + m_DrugLikenessType, + m_SkipNeutralization, + m_SkipSaturateH, + m_SkipSplit + ), + *this + ); } // failed all tries; return null return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_remove_fragment.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_remove_fragment.cpp index 4f09fa991..fb26a518e 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_remove_fragment.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_remove_fragment.cpp @@ -81,7 +81,7 @@ namespace bcl //! @param DRUG_LIKENESS_TYPE type of druglikeness filter to apply during clean FragmentMutateRemoveFragment::FragmentMutateRemoveFragment ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const bool &CORINA_CONFS ) { @@ -98,7 +98,7 @@ namespace bcl //! @param MUTABLE_ATOM_INDICES indices of atoms that can be mutated FragmentMutateRemoveFragment::FragmentMutateRemoveFragment ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -125,7 +125,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveFragment::FragmentMutateRemoveFragment ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -160,7 +160,7 @@ namespace bcl //! @param BFACTORS vector of values indicating per-residue flexibility (higher values are more flexible) FragmentMutateRemoveFragment::FragmentMutateRemoveFragment ( - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -203,7 +203,7 @@ namespace bcl //! @return a short name for this class const std::string &FragmentMutateRemoveFragment::GetAlias() const { - static const std::string s_name( "Combine"); + static const std::string s_name( "RemoveFragment"); return s_name; } @@ -287,7 +287,12 @@ namespace bcl m_PropertyScorer, m_ResolveClashes, m_BFactors, - m_Corina + m_Corina, + storage::Vector< size_t>(), + m_ChooseBestAlignedConf, + m_FixGeometry, + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // clean and output @@ -297,14 +302,19 @@ namespace bcl // Remove hydrogen atoms to allow bond type adjustment HydrogensHandler::Remove( atoms); - if( m_ScaffoldFragment.GetSize()) - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atoms, m_ScaffoldFragment, m_DrugLikenessType), *this); - } - else - { - return math::MutateResult< FragmentComplete>( cleaner.Clean( atoms, FRAGMENT, m_DrugLikenessType), *this); - } + return math::MutateResult< FragmentComplete> + ( + cleaner.Clean + ( + atoms, + m_ScaffoldFragment.GetSize() ? m_ScaffoldFragment : FRAGMENT, + m_DrugLikenessType, + m_SkipNeutralization, + m_SkipSaturateH, + m_SkipSplit + ), + *this + ); } return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); } diff --git a/source/chemistry/bcl_chemistry_fragment_mutate_ring_swap.cpp b/source/chemistry/bcl_chemistry_fragment_mutate_ring_swap.cpp index 407288a74..3a6c4098a 100644 --- a/source/chemistry/bcl_chemistry_fragment_mutate_ring_swap.cpp +++ b/source/chemistry/bcl_chemistry_fragment_mutate_ring_swap.cpp @@ -61,13 +61,9 @@ namespace bcl FragmentMutateRingSwap::FragmentMutateRingSwap() : m_RotamerLibrarySearcher( util::ShPtr< SearchFragmentLibraryFromTree>()), m_RingInitiationProbability( 0.1), - m_FixGeometry( true), - m_Neutralize( false), m_RestrictToNoMoreThanOneRingSizeChange( true), m_AllowLargeRingCollapse( true), m_AlignRings( false), - m_ExtendAdjacentAtoms( size_t( 1)), - m_ChooseBestAlignedConf( false), m_BondComparisonType( ConfigurationalBondTypeData::e_BondOrderOrAromaticWithRingness), m_AtomComparisonType( ConformationGraphConverter::e_ElementType) { @@ -78,7 +74,7 @@ namespace bcl FragmentMutateRingSwap::FragmentMutateRingSwap ( const util::ShPtr< SearchFragmentLibraryFromTree> &FRAGMENT_LIBRARY, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, @@ -87,7 +83,6 @@ namespace bcl const bool &RESOLVE_CLASHES, const storage::Vector< float> &BFACTORS, const bool &CORINA, - const bool &FIX_GEOMETRY, const bool &NEUTRALIZE, const double &RING_INITIATION_PROBABILITY, const bool &PREVENT_MORE_THAN_ONE_RING_FROM_CHANGING_SIZE, @@ -95,13 +90,9 @@ namespace bcl ) : m_RotamerLibrarySearcher( FRAGMENT_LIBRARY), m_RingInitiationProbability( RING_INITIATION_PROBABILITY), - m_FixGeometry( FIX_GEOMETRY), - m_Neutralize( NEUTRALIZE), m_RestrictToNoMoreThanOneRingSizeChange( PREVENT_MORE_THAN_ONE_RING_FROM_CHANGING_SIZE), m_AllowLargeRingCollapse( ALLOW_LARGE_RING_COLLAPSE), m_AlignRings( false), - m_ExtendAdjacentAtoms( size_t( 1)), - m_ChooseBestAlignedConf( false), m_BondComparisonType( ConfigurationalBondTypeData::e_BondOrderOrAromaticWithRingness), m_AtomComparisonType( ConformationGraphConverter::e_ElementType) { @@ -122,12 +113,11 @@ namespace bcl FragmentMutateRingSwap::FragmentMutateRingSwap ( const util::ShPtr< SearchFragmentLibraryFromTree> &FRAGMENT_LIBRARY, - const std::string &DRUG_LIKENESS_TYPE, + const descriptor::CheminfoProperty &DRUG_LIKENESS_TYPE, const FragmentComplete &SCAFFOLD_FRAGMENT, const FragmentEnsemble &MUTABLE_FRAGMENTS, const storage::Vector< size_t> &MUTABLE_ATOM_INDICES, const bool &CORINA, - const bool &FIX_GEOMETRY, const bool &NEUTRALIZE, const double &RING_INITIATION_PROBABILITY, const bool &PREVENT_MORE_THAN_ONE_RING_FROM_CHANGING_SIZE, @@ -135,13 +125,9 @@ namespace bcl ) : m_RotamerLibrarySearcher( FRAGMENT_LIBRARY), m_RingInitiationProbability( RING_INITIATION_PROBABILITY), - m_FixGeometry( FIX_GEOMETRY), - m_Neutralize( NEUTRALIZE), m_RestrictToNoMoreThanOneRingSizeChange( PREVENT_MORE_THAN_ONE_RING_FROM_CHANGING_SIZE), m_AllowLargeRingCollapse( ALLOW_LARGE_RING_COLLAPSE), m_AlignRings( false), - m_ExtendAdjacentAtoms( size_t( 1)), - m_ChooseBestAlignedConf( false), m_BondComparisonType( ConfigurationalBondTypeData::e_BondOrderOrAromaticWithRingness), m_AtomComparisonType( ConformationGraphConverter::e_ElementType) { @@ -240,7 +226,7 @@ namespace bcl itr_ring_select = m_FragmentPool( n_double_bonds).Begin(); while( itr_ring_select != itr_ring_select_end) { - frag_value -= GetCounts( *itr_ring_select); + frag_value -= m_SetCountsToOne ? size_t( 1) : GetCounts( *itr_ring_select); if( frag_value < 0) { break; @@ -452,7 +438,7 @@ namespace bcl itr_ring_select = m_FragmentPool( n_double_bonds).Begin(); while( itr_ring_select != itr_ring_select_end) { - frag_value -= GetCounts( *itr_ring_select); + frag_value -= m_SetCountsToOne ? size_t( 1) : GetCounts( *itr_ring_select); if( frag_value < 0) { break; @@ -702,17 +688,27 @@ namespace bcl storage::Vector< size_t>(), m_ChooseBestAlignedConf, m_FixGeometry, - m_ExtendAdjacentAtoms + m_ExtendAdjacentAtoms, + m_ExtendRingAtoms ); // clean the molecule - util::ShPtr< FragmentComplete> frag( util::ShPtr< FragmentComplete>( new FragmentComplete( atoms, ""))); HydrogensHandler::Remove( atoms); - m_ScaffoldFragment.GetSize() - ? frag = cleaner.Clean( atoms, m_ScaffoldFragment, m_DrugLikenessType) - : frag = cleaner.Clean( atoms, FRAGMENT, m_DrugLikenessType); + return math::MutateResult< FragmentComplete> + ( + cleaner.Clean + ( + atoms, + m_ScaffoldFragment.GetSize() ? m_ScaffoldFragment : FRAGMENT, + m_DrugLikenessType, + m_SkipNeutralization, + m_SkipSaturateH, + m_SkipSplit + ), + *this + ); // return the new constitution - return math::MutateResult< FragmentComplete>( frag, *this); + return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); } //////////////// @@ -1083,28 +1079,6 @@ namespace bcl "true" ); - parameters.AddInitializer - ( - "fix_geometry", - "If True, then any atom/bonds with bad geometry is included for conformational sampling. If False, " - "then atoms with bad geometry will not be included unless they are also one of the perturbed atoms or " - "included as adjacent to the perturbed atoms.", - io::Serialization::GetAgent( &m_FixGeometry), - "true" - ); - - parameters.AddInitializer - ( - "refine_alignment", - "If True, then choose the returned conformer based on a flexible substructure-based alignment scored with ChargeRMSD. " - "This method generates a conformational ensemble, performs a greedy disconnected substructure alignment of each conformer, " - "and then chooses the best one by ChargeRMSD score. If False, select the best conformer based on BCL::Conf score. " - "This option will reduce the speed of the mutate and is mostly recommended for pose-dependent replacement of ring " - "structures at the core of the molecule.", - io::Serialization::GetAgent( &m_ChooseBestAlignedConf), - "false" - ); - parameters.AddInitializer ( "ring_initiation_probability", @@ -1139,10 +1113,15 @@ namespace bcl parameters.AddInitializer ( - "extend_adjacent_atoms", - "include adjacent atoms out this many bonds from any perturbed atom when generating a new 3D conformer", - io::Serialization::GetAgent( &m_ExtendAdjacentAtoms), - "1" + "set_ring_fragment_counts_to_one", + "the probability of a ring fragment to be selected is equal to its 'ScaffoldCount' value divided by " + "the sum of 'ScaffoldCount' values across all molecules in the input ring library. " + "if true, ignore stored counts in 'ScaffoldCount' and set to the count for each fragment to 1; " + "this has the effect of making each individual entry in the fragment pool equally likely to be selected; " + "note that this also has the effect of allowing users to control fragment selection probability through " + "multiple entries rather than the 'ScaffoldCount' MDL property", + io::Serialization::GetAgent( &m_SetCountsToOne), + "false" ); return parameters; @@ -1189,7 +1168,7 @@ namespace bcl m_FragmentPoolScaffoldSums.Resize( n_dbv + size_t( 1), size_t( 0)); } m_FragmentPool( n_dbv).PushBack( *itr_ensemble); - m_FragmentPoolScaffoldSums( n_dbv) += GetCounts( *itr_ensemble); + m_FragmentPoolScaffoldSums( n_dbv) += m_SetCountsToOne ? size_t( 1) : GetCounts( *itr_ensemble); } // we require a ring library for this mutate to function diff --git a/source/chemistry/bcl_chemistry_fragment_split_by_index.cpp b/source/chemistry/bcl_chemistry_fragment_split_by_index.cpp new file mode 100644 index 000000000..9d22d6f52 --- /dev/null +++ b/source/chemistry/bcl_chemistry_fragment_split_by_index.cpp @@ -0,0 +1,298 @@ +// (c) Copyright BCL @ Vanderbilt University 2014 +// (c) BCL Homepage: http://www.meilerlab.org/bclcommons +// (c) BCL Code Repository: https://github.com/BCLCommons/bcl +// (c) +// (c) The BioChemical Library (BCL) was originally developed by contributing members of the Meiler Lab @ Vanderbilt University. +// (c) +// (c) The BCL is now made available as an open-source software package distributed under the permissive MIT license, +// (c) developed and maintained by the Meiler Lab at Vanderbilt University and contributing members of the BCL Commons. +// (c) +// (c) External code contributions to the BCL are welcome. Please visit the BCL Commons GitHub page for information on how you can contribute. +// (c) +// (c) This file is part of the BCL software suite and is made available under the MIT license. +// (c) + +// initialize the static initialization fiasco finder, if macro ENABLE_FIASCO_FINDER is defined +#include "util/bcl_util_static_initialization_fiasco_finder.h" +BCL_StaticInitializationFiascoFinder + +// include header of this class +#include "chemistry/bcl_chemistry_fragment_split_by_index.h" + +// includes from bcl - sorted alphabetically +#include "chemistry/bcl_chemistry_fragment_complete.h" +#include "chemistry/bcl_chemistry_fragment_split_isolate.h" +#include "io/bcl_io_file.h" +#include "util/bcl_util_binary_function_stl_wrapper.h" +#include "util/bcl_util_si_ptr.h" + +// external includes - sorted alphabetically + +namespace bcl +{ + namespace chemistry + { + + // add the interface to the set of known implementations + const util::SiPtr< const util::ObjectInterface> FragmentSplitByIndex::s_Instance + ( + util::Enumerated< FragmentSplitInterface>::AddInstance( new FragmentSplitByIndex) + ); + + ////////////////////////////////// + // construction and destruction // + ////////////////////////////////// + + //! @brief constructor, sets default steps to 4 + FragmentSplitByIndex::FragmentSplitByIndex + ( + const storage::Vector< size_t> &ATOM_INDICES, + const bool REMOVE_BONDED_H, + const bool INVERT, + const bool BREAK, + const bool CLOSE_OPEN_VALENCES + ) : + m_AtomIndices( ATOM_INDICES), + m_RemoveBondedH( REMOVE_BONDED_H), + m_Invert( INVERT), + m_Break( BREAK), + m_CloseOpenValences( CLOSE_OPEN_VALENCES) + { + } + + //! virtual copy constructor + FragmentSplitByIndex *FragmentSplitByIndex::Clone() const + { + return new FragmentSplitByIndex( *this); + } + + ///////////////// + // data access // + ///////////////// + + //! @brief returns class name + //! the class name as const ref std::string + const std::string &FragmentSplitByIndex::GetClassIdentifier() const + { + return GetStaticClassName( *this); + } + + //! @brief get a short name for this class + //! @return a short name for this class + const std::string &FragmentSplitByIndex::GetAlias() const + { + static const std::string s_name( "Index"); + return s_name; + } + + //! @brief Get a description for what this class does (used when writing help) + //! @return a description for what this class does (used when writing help) + const std::string &FragmentSplitByIndex::GetClassDescription() const + { + return GetStaticClassName( *this); + } + + //! @return the minimum size of fragments + const size_t FragmentSplitByIndex::GetMinSize() const + { + return 0; + } + + //! @return the indices of all hydrogen atoms bonded to target atoms + storage::Vector< size_t> FragmentSplitByIndex::GetBondedHydrogenAtoms + ( + const storage::Vector< size_t> &ATOM_INDICES, + const AtomVector< AtomComplete> &MOLECULE_ATOMS + ) const + { + // initialize output + storage::Vector< size_t> h_atoms; + + // find each target atom's bonded hydrogens + for + ( + auto atom_itr( ATOM_INDICES.Begin()), atom_itr_end( ATOM_INDICES.End()); + atom_itr != atom_itr_end; + ++atom_itr + ) + { + // skip if selected atom is a hydrogen; + // yes, this does prevent someone from specifying removal of a hydrogen atoms + // and its bonded hydrogen if the entire fragment is H2; probably not the desired use-case + if( MOLECULE_ATOMS( *atom_itr).GetElementType() == GetElementTypes().e_Hydrogen) + { + continue; + } + + // if heavy atom, go over bonds + for + ( + auto bond_itr( MOLECULE_ATOMS( *atom_itr).GetBonds().Begin()), + bond_itr_end( MOLECULE_ATOMS( *atom_itr).GetBonds().End()); + bond_itr != bond_itr_end; + ++bond_itr + ) + { + if( bond_itr->GetTargetAtom().GetElementType() == GetElementTypes().e_Hydrogen) + { + h_atoms.PushBack( MOLECULE_ATOMS.GetAtomIndex( bond_itr->GetTargetAtom())); + } + } + } + + // return hydrogen atom indices + return h_atoms; + } + + ///////////////// + // operations // + ///////////////// + + //! @brief returns an ensemble of fragments of a molecule + //! @param CONFORMATION molecule of interest + //! @return an ensemble of common substructures relative to those in a file + //! TODO: Implement this + FragmentEnsemble FragmentSplitByIndex::operator()( const ConformationInterface &CONFORMATION) const + { + // we will want to construct an atom vector to build our return ensemble + storage::Vector< sdf::AtomInfo> atominfo( CONFORMATION.GetAtomInfo()); + storage::Vector< sdf::BondInfo> bondinfo( CONFORMATION.GetBondInfo()); + AtomVector< AtomComplete> atoms( atominfo, bondinfo); + + // invert atom indices if desired + storage::Vector< size_t> keep_indices; + if( m_Invert) + { + // we need to add the bonded hydrogens after assigning the atom indices + if( !m_RemoveBondedH) + { + auto h_atoms( GetBondedHydrogenAtoms( m_AtomIndices, atoms)); + storage::Set< size_t> keep_indices_set( m_AtomIndices.Begin(), m_AtomIndices.End()); + keep_indices_set.InsertElements( h_atoms.Begin(), h_atoms.End()); + keep_indices = storage::Vector< size_t>( keep_indices_set.Begin(), keep_indices_set.End()); + } + else + { + keep_indices = m_AtomIndices; + } + } + else + { + // if we are not inverting but want to remove bonded H, then we must do it to + // the m_Atomindices vector before we identify keep indices + if( m_RemoveBondedH) + { + auto h_atoms( GetBondedHydrogenAtoms( m_AtomIndices, atoms)); + storage::Set< size_t> keep_indices_set( m_AtomIndices.Begin(), m_AtomIndices.End()); + keep_indices_set.InsertElements( h_atoms.Begin(), h_atoms.End()); + m_AtomIndices = storage::Vector< size_t>( keep_indices_set.Begin(), keep_indices_set.End()); + } + + // each atom in original conformation + for( size_t i( 0); i < CONFORMATION.GetSize(); ++i) + { + // if not found in removal indices + if( m_AtomIndices.Find( i) >= m_AtomIndices.GetSize()) + { + keep_indices.PushBack( i); + } + } + } + + // remove atoms except those we designated to save + atoms.Reorder( keep_indices); + + // build a new molecule + FragmentComplete mol( atoms, CONFORMATION.GetName()); + + // saturate with hydrogen atoms to close valences + if( m_CloseOpenValences) + { + mol.SaturateWithH(); + } + + // break if desired + if( m_Break) + { + // accept fragments as small as 1 atom + FragmentSplitIsolate isolater( 1); + + // return the isolated ensemble + return isolater( mol); + } + + return FragmentEnsemble( storage::List< FragmentComplete>( 1, mol)); + } + + //! @brief Set the members of this property from the given LABEL + //! @param LABEL the label to parse + //! @param ERR_STREAM stream to write out errors to + bool FragmentSplitByIndex::ReadInitializerSuccessHook + ( + const util::ObjectDataLabel &LABEL, + std::ostream &ERR_STREAM + ) + { + // read in atom indices + if( m_AtomIndicesString.size()) + { + m_AtomIndices.Reset(); + m_AtomIndices = util::SplitStringToNumerical< size_t>( m_AtomIndicesString); + return true; + } + + BCL_MessageStd( "No atom indices provided! Exiting without splitting molecules."); + return false; + } + + //! @brief return parameters for member data that are set up from the labels + //! @return parameters for member data that are set up from the labels + io::Serializer FragmentSplitByIndex::GetSerializer() const + { + io::Serializer parameters; + parameters.SetClassDescription + ( + "splits molecules into fragments by removing specified indices" + ); + parameters.AddInitializer + ( + "atom_indices", + "the 0-indexed atom indices to remove from the input molecules", + io::Serialization::GetAgent( &m_AtomIndicesString), + "" + ); + parameters.AddInitializer + ( + "include_bonded_h", + "in addition to removing the specified 'atom_indices', also remove their bonded hydrogen atoms", + io::Serialization::GetAgent( &m_RemoveBondedH), + "true" + ); + parameters.AddInitializer + ( + "invert", + "invert the atom index selection prior to atom removal", + io::Serialization::GetAgent( &m_Invert), + "false" + ); + parameters.AddInitializer + ( + "break", + "if removing an atom separates a molecule into isolated components, return those isolated " + "components as separate fragments; by default, a fragment complex is returned in such cases.", + io::Serialization::GetAgent( &m_Break), + "false" + ); + parameters.AddInitializer + ( + "close_open_valences", + "after splitting, close any unsatisfied valences with hydrogen atoms in the resulting molecules", + io::Serialization::GetAgent( &m_CloseOpenValences), + "false" + ); + + return parameters; + } + + } // namespace chemistry +} // namespace bcl diff --git a/source/chemistry/bcl_chemistry_molecule_evolutionary_optimizer.cpp b/source/chemistry/bcl_chemistry_molecule_evolutionary_optimizer.cpp index 8b756cbd5..c7c23451f 100644 --- a/source/chemistry/bcl_chemistry_molecule_evolutionary_optimizer.cpp +++ b/source/chemistry/bcl_chemistry_molecule_evolutionary_optimizer.cpp @@ -13,8 +13,8 @@ // (c) // initialize the static initialization fiasco finder, if macro ENABLE_FIASCO_FINDER is defined -#include #include "util/bcl_util_static_initialization_fiasco_finder.h" +#include BCL_StaticInitializationFiascoFinder // unit header @@ -915,7 +915,7 @@ namespace bcl ) { // std::stringstream hist; - auto hist( last_pop[mol_one].GetMoleculeHistory()); + auto hist( last_pop[ mol_one].GetMoleculeHistory()); hist.Append( "Recombine,"); // hist << "Recombine,"; // hist << last_pop[ mol_one].GetMoleculeHistory(); diff --git a/source/chemistry/bcl_chemistry_molecule_fragment_recombination.cpp b/source/chemistry/bcl_chemistry_molecule_fragment_recombination.cpp index 9298c4287..e3955e899 100644 --- a/source/chemistry/bcl_chemistry_molecule_fragment_recombination.cpp +++ b/source/chemistry/bcl_chemistry_molecule_fragment_recombination.cpp @@ -13,6 +13,13 @@ // (c) // initialize the static initialization fiasco finder, if macro ENABLE_FIASCO_FINDER is defined +#include "util/bcl_util_static_initialization_fiasco_finder.h" +BCL_StaticInitializationFiascoFinder + +// includes from bcl - sorted alphabetically +#include "chemistry/bcl_chemistry_atom_conformational_interface.h" +#include "chemistry/bcl_chemistry_atoms_complete_standardizer.h" +#include "chemistry/bcl_chemistry_bond_isometry_handler.h" #include "chemistry/bcl_chemistry_configuration_set.h" #include "chemistry/bcl_chemistry_conformation_comparison_psi_field.h" #include "chemistry/bcl_chemistry_conformation_graph_converter.h" @@ -20,26 +27,19 @@ #include "chemistry/bcl_chemistry_fragment_configuration_shared.h" #include "chemistry/bcl_chemistry_fragment_constitution_shared.h" #include "chemistry/bcl_chemistry_fragment_graph_marker.h" +#include "chemistry/bcl_chemistry_fragment_map_conformer.h" #include "chemistry/bcl_chemistry_fragment_split_interface.h" #include "chemistry/bcl_chemistry_fragment_split_largest_component.h" #include "chemistry/bcl_chemistry_fragment_split_rings.h" -#include "graph/bcl_graph_connectivity.h" -#include "graph/bcl_graph_subgraph.h" -#include "io/bcl_io_directory_entry.h" -#include "io/bcl_io_ofstream.h" -#include "util/bcl_util_static_initialization_fiasco_finder.h" -BCL_StaticInitializationFiascoFinder - -// includes from bcl - sorted alphabetically -#include "chemistry/bcl_chemistry_atom_conformational_interface.h" -#include "chemistry/bcl_chemistry_atoms_complete_standardizer.h" -#include "chemistry/bcl_chemistry_bond_isometry_handler.h" -#include "chemistry/bcl_chemistry_fragment_map_conformer.h" #include "chemistry/bcl_chemistry_hydrogens_handler.h" #include "chemistry/bcl_chemistry_molecule_fragment_recombination.h" #include "chemistry/bcl_chemistry_stereocenters_handler.h" +#include "graph/bcl_graph_connectivity.h" +#include "graph/bcl_graph_subgraph.h" #include "graph/bcl_graph_subgraph_isomorphism.h" +#include "io/bcl_io_directory_entry.h" #include "io/bcl_io_file.h" +#include "io/bcl_io_ofstream.h" #include "math/bcl_math_running_average.h" #include "sched/bcl_sched_scheduler_interface.h" #include "sched/bcl_sched_thunk_job.h" @@ -644,8 +644,8 @@ namespace bcl } // clean the new molecule - FragmentMapConformer cleaner( "None", false, storage::Vector< size_t>( conf_moveable_indices.Begin(), conf_moveable_indices.End())); - util::ShPtr< FragmentComplete> new_mol( cleaner.Clean( new_mol_vec, BASE_MOL_A, "None", false)); + FragmentMapConformer cleaner( descriptor::CheminfoProperty( "Constant(1.0)"), false, storage::Vector< size_t>( conf_moveable_indices.Begin(), conf_moveable_indices.End())); + util::ShPtr< FragmentComplete> new_mol( cleaner.Clean( new_mol_vec, BASE_MOL_A, descriptor::CheminfoProperty( "Constant(1.0)"), false)); if( new_mol.IsDefined()) { return *new_mol; diff --git a/source/chemistry/bcl_chemistry_reaction_worker.cpp b/source/chemistry/bcl_chemistry_reaction_worker.cpp index 08e090d12..36a5e88bf 100644 --- a/source/chemistry/bcl_chemistry_reaction_worker.cpp +++ b/source/chemistry/bcl_chemistry_reaction_worker.cpp @@ -1024,7 +1024,7 @@ namespace bcl std::string mobile_atoms_str; FragmentMapConformer cleaner ( - "", // druglikeness type + descriptor::CheminfoProperty( "Constant(1.0)"), // druglikeness type "", // receptor MDL property "", // receptor filename descriptor::CheminfoProperty(), // affinity net @@ -1042,7 +1042,7 @@ namespace bcl { // give the product realistic 3D coordinates BCL_MessageVrb( "Cleaning product atoms"); - AtomVector< AtomComplete> temp_vec( cleaner.CleanAtoms( parent_atoms, "None", true, true)); + AtomVector< AtomComplete> temp_vec( cleaner.CleanAtoms( parent_atoms, descriptor::CheminfoProperty( "Constant(1.0)"), true, true)); FragmentComplete clean_mol( temp_vec, ""); BCL_MessageVrb( "Cleaning 3D coordinates of product"); clean_mol = cleaner.Clean3DCoords( clean_mol); diff --git a/source/chemistry/bcl_chemistry_score_function_generic.cpp b/source/chemistry/bcl_chemistry_score_function_generic.cpp index eec51797f..a815233c4 100644 --- a/source/chemistry/bcl_chemistry_score_function_generic.cpp +++ b/source/chemistry/bcl_chemistry_score_function_generic.cpp @@ -31,6 +31,30 @@ namespace bcl namespace chemistry { + /////////// + // Enums // + /////////// + + //! @brief CalculationType as string + //! @param CALCULATION_TYPE the calculation type whose name is desired + //! @return the name as string + const std::string &ScoreFunctionGeneric::GetCalculationTypeName( const CalculationType &CALCULATION_TYPE) + { + static const std::string s_Names[ size_t( s_NumberCalculationTypes) + 1] = + { + "Index", + "Sum", + "Mean", + "Min", + "Max", + "NormMax", + "SoftMax", + "Entropy", + GetStaticClassName< CalculationType>() + }; + return s_Names[ CALCULATION_TYPE]; + } + ////////// // data // ////////// @@ -47,7 +71,12 @@ namespace bcl //! @brief default constructor ScoreFunctionGeneric::ScoreFunctionGeneric() : - m_Descriptor() + m_Descriptor( descriptor::CheminfoProperty()), + m_CalculationType( ScoreFunctionGeneric::e_Index), + m_PropertyIndex( 0), + m_Invert( false), + m_Normalize( false), + m_Noise( 0.00000001) { } @@ -57,7 +86,35 @@ namespace bcl ( const descriptor::CheminfoProperty &DESCRIPTOR ) : - m_Descriptor( DESCRIPTOR) + m_Descriptor( DESCRIPTOR), + m_CalculationType( ScoreFunctionGeneric::e_Index), + m_PropertyIndex( 0), + m_Invert( false), + m_Normalize( false), + m_Noise( 0.00000001) + { + } + + //! @brief constructor with all parameters + //! @param DESCRIPTOR the descriptor to use + //! @param INDEX the reference index for certain scores + //! @param INVERT invert each value in the property array + //! @param NORMALIZE normalize property array to sum of values + //! @param NOISE add some small value to bins to avoid ln(0) = nan + ScoreFunctionGeneric::ScoreFunctionGeneric + ( + const descriptor::CheminfoProperty &DESCRIPTOR, + const size_t INDEX, + const bool INVERT, + const bool NORMALIZE, + const double NOISE + ) : + m_Descriptor( DESCRIPTOR), + m_CalculationType( ScoreFunctionGeneric::e_Index), + m_PropertyIndex( INDEX), + m_Invert( INVERT), + m_Normalize( NORMALIZE), + m_Noise( NOISE) { } @@ -87,6 +144,96 @@ namespace bcl return s_alias; } + //! @brief return the value at a single index + const double ScoreFunctionGeneric::CalcIndexValue( const linal::Vector< float> &PROPERTIES) const + { + return PROPERTIES( m_PropertyIndex); + } + + //! @brief return the sum of property values + const double ScoreFunctionGeneric::CalcSum( const linal::Vector< float> &PROPERTIES) const + { + return PROPERTIES.Sum(); + } + + //! @brief return the mean descriptor value + const double ScoreFunctionGeneric::CalcMean( const linal::Vector< float> &PROPERTIES) const + { + return PROPERTIES.Sum() / PROPERTIES.GetSize(); + } + + //! @brief return the min descriptor value + const double ScoreFunctionGeneric::CalcMin( const linal::Vector< float> &PROPERTIES) const + { + return PROPERTIES.Min(); + } + + //! @brief return the max descriptor value + const double ScoreFunctionGeneric::CalcMax( const linal::Vector< float> &PROPERTIES) const + { + return PROPERTIES.Max(); + } + + //! @brief return the maximum value after prop_i/sum_0-->N(prop) + const double ScoreFunctionGeneric::CalcNormMax( const linal::Vector< float> &PROPERTIES) const + { + // need a non-const vector + linal::Vector< float> properties( PROPERTIES); + + const double &sum( CalcSum( properties)); + properties.Normalize(); + properties( m_PropertyIndex) /= sum; + return properties.Max(); + } + + //! @brief return the maximum value after exp(prop_i)/sum_0-->N(exp(prop)) + const double ScoreFunctionGeneric::CalcSoftMax( const linal::Vector< float> &PROPERTIES) const + { + // need a non-const vector + linal::Vector< float> properties( PROPERTIES); + + // compute exp(value) + for + ( + auto itr( properties.Begin()), itr_end( properties.End()); + itr != itr_end; + ++itr + ) + { + *itr = std::exp( *itr); + } + + // obtain normalized exponentials + Normalize( properties); + + // return the maximum exponential normalized value + return properties.Max(); + } + + //! @brief return the entropy of the dataset + const double ScoreFunctionGeneric::CalcEntropy( const linal::Vector< float> &PROPERTIES) const + { + // need a non-const vector + linal::Vector< float> properties( PROPERTIES); + + // denominator for relative likelihood + Normalize( properties); + + // compute p*ln(p) for each value, where p is a normalized bin value + for + ( + auto itr( properties.Begin()), itr_end( properties.End()); + itr != itr_end; + ++itr + ) + { + // add some noise inside the natural log to avoid nan + *itr = ( *itr) * ( std::log( *itr + m_Noise)); + } + + return -1.0 * CalcSum( properties); + } + /////////////// // operators // /////////////// @@ -99,25 +246,67 @@ namespace bcl const FragmentComplete &MOLECULE ) const { - // initialize activity - double activity( util::GetUndefinedDouble()); - // setup score function options if( m_Descriptor.IsDefined()) { // use passed property linal::Vector< double> properties( m_Descriptor->SumOverObject( MOLECULE)); - activity = properties.Sum() / properties.GetSize(); + + // invert each element of the property vector + if( m_Invert) + { + Invert( properties); + } + + // return raw value after potential inversion if the array is of size 1 + if( properties.GetSize() == size_t( 1)) + { + return properties( 0); + } + + // normalize property vector by sum + if( m_Normalize) + { + Normalize( properties); + } + } - else + // no no score defined; return 0.0 + BCL_MessageVrb( "No score defined; returning 0.0!"); + return 0.0; + } + + ////////////////////// + // helper functions // + ////////////////////// + + //! @brief invert each value of the property vector + void ScoreFunctionGeneric::Invert( linal::Vector< float> PROPERTIES) const + { + for + ( + auto itr( PROPERTIES.Begin()), itr_end( PROPERTIES.End()); + itr != itr_end; + ++itr + ) { - // flat score landscape - BCL_MessageStd( "No score defined; returning 0.0 to approximator!"); - activity = 0.0; + *itr = 1.0 / *itr; } + } - //end - return activity; + //! @brief normalize property vector by sum of all values + void ScoreFunctionGeneric::Normalize( linal::Vector< float> PROPERTIES) const + { + const double &sum( CalcSum( PROPERTIES)); + for + ( + auto itr( PROPERTIES.Begin()), itr_end( PROPERTIES.End()); + itr != itr_end; + ++itr + ) + { + *itr = *itr / sum; + } } ////////////////////// @@ -129,15 +318,63 @@ namespace bcl io::Serializer ScoreFunctionGeneric::GetSerializer() const { io::Serializer member_data; - - member_data.SetClassDescription( "scores molecules using the raw mean output from a descriptor"); - + member_data.SetClassDescription + ( + "Compute a chemical property and transform the resultant (potentially multi-dimensional) array " + "using one of several calculation types to return a final score" + ); member_data.AddInitializer ( "descriptor", - "the descriptor to calculate; if multi-valued, this will return the mean value.", + "the descriptor to calculate; " + "if multi-valued, this will be transformed with the specified calculation type.", io::Serialization::GetAgent( &m_Descriptor) ); + member_data.AddInitializer + ( + "calculation_type", + "transform multi-dimensional array output with one of these allowed " + "operations; final output will be a scalar value", + io::Serialization::GetAgent( &m_CalculationType), + "Index" + ); + member_data.AddInitializer + ( + "property_index", + "the index of interest in a multi-dimensional property; " + "no effect if the calculation type does not require a reference index", + io::Serialization::GetAgent( &m_PropertyIndex), + "0" + ); + member_data.AddInitializer + ( + "invert", + "invert property values; occurs prior to any normalization", + io::Serialization::GetAgent( &m_Invert), + "false" + ); + member_data.AddInitializer + ( + "normalize", + "normalize property values; occurs after any inversion; " + "note that this is a raw normalization, therefore if the sum of " + "all values is 0 then the resulting normalized vector will be undefined;" + "this behavior is kept intentionally because if all values in a multi- " + "dimensional array are 0 there is typically an issue.", + io::Serialization::GetAgent( &m_Normalize), + "false" + ); + member_data.AddInitializer + ( + "noise", + "noise added to each value prior to taking the natural logarithm of said value; " + "this allows calculation types such as 'SoftMax' to be estimated even when " + "some of the property bins are 0; " + "this is specific to calculation types that make use of logarithms.", + io::Serialization::GetAgent( &m_Noise), + "0.00000001" + ); + return member_data; }