diff --git a/CMakeLists.txt b/CMakeLists.txt index f4523040..a017baae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,7 @@ project(MACIS VERSION 0.1 LANGUAGES C CXX) option( MACIS_ENABLE_MPI "Enable MPI Bindings" ON ) option( MACIS_ENABLE_OPENMP "Enable OpenMP Bindings" ON ) option( MACIS_ENABLE_BOOST "Enable Boost" ON ) +option( MACIS_ENABLE_TREXIO "Enable TrexIO" ON ) # CMake Modules list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ) diff --git a/include/macis/asci/alpha_constraint.hpp b/include/macis/asci/alpha_constraint.hpp new file mode 100644 index 00000000..ae12ca3f --- /dev/null +++ b/include/macis/asci/alpha_constraint.hpp @@ -0,0 +1,82 @@ +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ + +#pragma once + +#include +namespace macis { + +template +class alpha_constraint { + public: + using wfn_traits = WfnTraits; + using wfn_type = typename WfnTraits::wfn_type; + using spin_wfn_type = spin_wfn_t; + + using constraint_type = spin_wfn_type; + using constraint_traits = wavefunction_traits; + + private: + constraint_type C_; + constraint_type B_; + uint32_t C_min_; + uint32_t count_; + + public: + alpha_constraint(constraint_type C, constraint_type B, uint32_t C_min) + : C_(C), B_(B), C_min_(C_min), count_(constraint_traits::count(C)) {} + + alpha_constraint(const alpha_constraint&) = default; + alpha_constraint& operator=(const alpha_constraint&) = default; + + alpha_constraint(alpha_constraint&& other) noexcept = default; + alpha_constraint& operator=(alpha_constraint&&) noexcept = default; + + inline auto C() const { return C_; } + inline auto B() const { return B_; } + inline auto C_min() const { return C_min_; } + inline auto count() const { return count_; } + + inline spin_wfn_type c_mask_union(spin_wfn_type state) const { + return state & C_; + } + inline spin_wfn_type b_mask_union(spin_wfn_type state) const { + return state & B_; + } + + inline spin_wfn_type symmetric_difference(spin_wfn_type state) const { + return state ^ C_; + } + // inline spin_wfn_type symmetric_difference(wfn_type state) const { + // return symmetric_difference(wfn_traits::alpha_string(state)); + // } + + template + inline auto overlap(WfnType state) const { + return constraint_traits::count(c_mask_union(state)); + } + + template + inline bool satisfies_constraint(WfnType state) const { + return overlap(state) == count_ and + constraint_traits::count(symmetric_difference(state) >> C_min_) == 0; + } + + static alpha_constraint make_triplet(unsigned i, unsigned j, unsigned k) { + constraint_type C = 0; + C.flip(i).flip(j).flip(k); + // constraint_type B = 1; + // static_assert(B.size() <= 64, "ULLONG NOT POSSIBLE HERE"); + // B <<= k; + // B = B.to_ullong() - 1; + constraint_type B = full_mask(k); + return alpha_constraint(C, B, k); + } +}; + +} // namespace macis diff --git a/include/macis/asci/determinant_contributions.hpp b/include/macis/asci/determinant_contributions.hpp index 7b3315f3..3de8de56 100644 --- a/include/macis/asci/determinant_contributions.hpp +++ b/include/macis/asci/determinant_contributions.hpp @@ -16,22 +16,27 @@ namespace macis { template struct asci_contrib { WfnT state; - double rv; + double c_times_matel; + double h_diag; + + auto rv() const { return c_times_matel / h_diag; } + auto pt2() const { return rv() * c_times_matel; } }; template using asci_contrib_container = std::vector>; -template +template void append_singles_asci_contributions( - double coeff, wfn_t<2 * N> state_full, wfn_t state_same, + double coeff, WfnType state_full, SpinWfnType state_same, const std::vector& occ_same, const std::vector& vir_same, const std::vector& occ_othr, const double* eps_same, const double* T_pq, const size_t LDT, const double* G_kpq, const size_t LDG, const double* V_kpq, const size_t LDV, double h_el_tol, double root_diag, - double E0, HamiltonianGenerator<2 * N>& ham_gen, - asci_contrib_container>& asci_contributions) { + double E0, const HamiltonianGeneratorBase& ham_gen, + asci_contrib_container& asci_contributions) { + using wfn_traits = wavefunction_traits; const auto LDG2 = LDG * LDG; const auto LDV2 = LDV * LDV; for(auto i : occ_same) @@ -44,11 +49,11 @@ void append_singles_asci_contributions( for(auto p : occ_othr) h_el += V_ov[p]; // Early Exit - if(std::abs(h_el) < h_el_tol) continue; + if(std::abs(coeff * h_el) < h_el_tol) continue; // Calculate Excited Determinant - auto ex_det = state_full; - ex_det.flip(i + NShift).flip(a + NShift); + auto ex_det = wfn_traits::template single_excitation_no_check( + state_full, i, a); // Calculate Excitation Sign in a Canonical Way auto sign = single_excitation_sign(state_same, a, i); @@ -57,22 +62,25 @@ void append_singles_asci_contributions( // Calculate fast diagonal matrix element auto h_diag = ham_gen.fast_diag_single(eps_same[i], eps_same[a], i, a, root_diag); - h_el /= (E0 - h_diag); + // h_el /= (E0 - h_diag); // Append to return values - asci_contributions.push_back({ex_det, coeff * h_el}); + asci_contributions.push_back({ex_det, coeff * h_el, E0 - h_diag}); } // Loop over single extitations } -template +template void append_ss_doubles_asci_contributions( - double coeff, wfn_t<2 * N> state_full, wfn_t state_spin, - const std::vector& ss_occ, const std::vector& vir, - const std::vector& os_occ, const double* eps_same, - const double* G, size_t LDG, double h_el_tol, double root_diag, double E0, - HamiltonianGenerator<2 * N>& ham_gen, - asci_contrib_container>& asci_contributions) { + double coeff, WfnType state_full, SpinWfnType state_same, + SpinWfnType state_other, const std::vector& ss_occ, + const std::vector& vir, const std::vector& os_occ, + const double* eps_same, const double* G, size_t LDG, double h_el_tol, + double root_diag, double E0, + const HamiltonianGeneratorBase& ham_gen, + asci_contrib_container& asci_contributions) { + using wfn_traits = wavefunction_traits; + using spin_wfn_traits = wavefunction_traits; const size_t nocc = ss_occ.size(); const size_t nvir = vir.size(); @@ -90,8 +98,9 @@ void append_ss_doubles_asci_contributions( const auto jb = b + j * LDG; const auto G_aibj = G_ai[jb]; - if(std::abs(G_aibj) < h_el_tol) continue; + if(std::abs(coeff * G_aibj) < h_el_tol) continue; +#if 0 // Calculate excited determinant string (spin) const auto full_ex_spin = wfn_t(0).flip(i).flip(j).flip(a).flip(b); auto ex_det_spin = state_spin ^ full_ex_spin; @@ -102,6 +111,21 @@ void append_ss_doubles_asci_contributions( // Calculate full excited determinant const auto full_ex = expand_bitset<2 * N>(full_ex_spin) << NShift; auto ex_det = state_full ^ full_ex; +#else + // TODO: Can this be made faster since the orbital indices are known + // in advance? + // Compute excited determinant (spin) + const auto full_ex_spin = spin_wfn_traits::double_excitation_no_check( + SpinWfnType(0), i, j, a, b); + const auto ex_det_spin = state_same ^ full_ex_spin; + + // Calculate the sign in a canonical way + double sign = doubles_sign(state_same, ex_det_spin, full_ex_spin); + + // Calculate full excited determinant + auto ex_det = + wfn_traits::template from_spin(ex_det_spin, state_other); +#endif // Update sign of matrix element auto h_el = sign * G_aibj; @@ -110,25 +134,27 @@ void append_ss_doubles_asci_contributions( auto h_diag = ham_gen.fast_diag_ss_double(eps_same[i], eps_same[j], eps_same[a], eps_same[b], i, j, a, b, root_diag); - h_el /= (E0 - h_diag); + // h_el /= (E0 - h_diag); // Append {det, c*h_el} - asci_contributions.push_back({ex_det, coeff * h_el}); + asci_contributions.push_back({ex_det, coeff * h_el, E0 - h_diag}); } // Restricted BJ loop } // AI Loop } -template +template void append_os_doubles_asci_contributions( - double coeff, wfn_t<2 * N> state_full, wfn_t state_alpha, - wfn_t state_beta, const std::vector& occ_alpha, + double coeff, WfnType state_full, SpinWfnType state_alpha, + SpinWfnType state_beta, const std::vector& occ_alpha, const std::vector& occ_beta, const std::vector& vir_alpha, const std::vector& vir_beta, const double* eps_alpha, const double* eps_beta, const double* V, size_t LDV, double h_el_tol, - double root_diag, double E0, HamiltonianGenerator<2 * N>& ham_gen, - asci_contrib_container>& asci_contributions) { + double root_diag, double E0, + const HamiltonianGeneratorBase& ham_gen, + asci_contrib_container& asci_contributions) { + using wfn_traits = wavefunction_traits; const size_t LDV2 = LDV * LDV; for(auto i : occ_alpha) for(auto a : vir_alpha) { @@ -140,21 +166,26 @@ void append_os_doubles_asci_contributions( const auto jb = b + j * LDV; const auto V_aibj = V_ai[jb * LDV2]; - if(std::abs(V_aibj) < h_el_tol) continue; + if(std::abs(coeff * V_aibj) < h_el_tol) continue; double sign_beta = single_excitation_sign(state_beta, b, j); double sign = sign_alpha * sign_beta; - auto ex_det = state_full; - ex_det.flip(a).flip(i).flip(j + N).flip(b + N); + // auto ex_det = state_full; + // ex_det.flip(a).flip(i).flip(j + N).flip(b + N); + auto ex_det = + wfn_traits::template single_excitation_no_check( + state_full, a, i); + ex_det = wfn_traits::template single_excitation_no_check( + ex_det, b, j); auto h_el = sign * V_aibj; // Evaluate fast diagonal element auto h_diag = ham_gen.fast_diag_os_double(eps_alpha[i], eps_beta[j], eps_alpha[a], eps_beta[b], i, j, a, b, root_diag); - h_el /= (E0 - h_diag); + // h_el /= (E0 - h_diag); - asci_contributions.push_back({ex_det, coeff * h_el}); + asci_contributions.push_back({ex_det, coeff * h_el, E0 - h_diag}); } // BJ loop } // AI loop } diff --git a/include/macis/asci/determinant_search.hpp b/include/macis/asci/determinant_search.hpp index 7d66813c..7b1bae34 100644 --- a/include/macis/asci/determinant_search.hpp +++ b/include/macis/asci/determinant_search.hpp @@ -7,6 +7,7 @@ */ #pragma once +#include #include #include #include @@ -27,7 +28,7 @@ template struct asci_contrib_topk_comparator { using type = asci_contrib; constexpr bool operator()(const type& a, const type& b) const { - return std::abs(a.rv) > std::abs(b.rv); + return std::abs(a.rv()) > std::abs(b.rv()); } }; @@ -38,6 +39,18 @@ struct ASCISettings { double h_el_tol = 1e-8; double rv_prune_tol = 1e-8; size_t pair_size_max = 5e8; + + double pt2_tol = 1e-16; + size_t pt2_reserve_count = 70000000; + bool pt2_prune = false; + bool pt2_precompute_eps = false; + bool pt2_precompute_idx = false; + bool pt2_print_progress = false; + size_t pt2_bigcon_thresh = 250; + + size_t nxtval_bcount_thresh = 1000; + size_t nxtval_bcount_inc = 10; + bool just_singles = false; size_t grow_factor = 8; size_t max_refine_iter = 6; @@ -48,6 +61,9 @@ struct ASCISettings { // bool dist_triplet_random = false; int constraint_level = 2; // Up To Quints + int pt2_max_constraint_level = 5; + int pt2_min_constraint_level = 0; + int64_t pt2_constraint_refine_force = 0; }; template @@ -56,7 +72,10 @@ asci_contrib_container> asci_contributions_standard( wavefunction_iterator_t cdets_end, const double E_ASCI, const std::vector& C, size_t norb, const double* T_pq, const double* G_red, const double* V_red, const double* G_pqrs, - const double* V_pqrs, HamiltonianGenerator& ham_gen) { + const double* V_pqrs, HamiltonianGenerator>& ham_gen) { + using wfn_traits = wavefunction_traits>; + using spin_wfn_type = spin_wfn_t>; + using spin_wfn_traits = wavefunction_traits; auto logger = spdlog::get("asci_search"); const size_t ncdets = std::distance(cdets_begin, cdets_end); @@ -68,13 +87,13 @@ asci_contrib_container> asci_contributions_standard( for(size_t i = 0; i < ncdets; ++i) { // Alias state data auto state = *(cdets_begin + i); - auto state_alpha = bitset_lo_word(state); - auto state_beta = bitset_hi_word(state); + auto state_alpha = wfn_traits::alpha_string(state); + auto state_beta = wfn_traits::beta_string(state); auto coeff = C[i]; // Get occupied and virtual indices - bitset_to_occ_vir(norb, state_alpha, occ_alpha, vir_alpha); - bitset_to_occ_vir(norb, state_beta, occ_beta, vir_beta); + spin_wfn_traits::state_to_occ_vir(norb, state_alpha, occ_alpha, vir_alpha); + spin_wfn_traits::state_to_occ_vir(norb, state_beta, occ_beta, vir_beta); // Precompute orbital energies auto eps_alpha = ham_gen.single_orbital_ens(norb, occ_alpha, occ_beta); @@ -86,27 +105,27 @@ asci_contrib_container> asci_contributions_standard( const double h_el_tol = asci_settings.h_el_tol; // Singles - AA - append_singles_asci_contributions<(N / 2), 0>( + append_singles_asci_contributions( coeff, state, state_alpha, occ_alpha, vir_alpha, occ_beta, eps_alpha.data(), T_pq, norb, G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); // Singles - BB - append_singles_asci_contributions<(N / 2), (N / 2)>( + append_singles_asci_contributions( coeff, state, state_beta, occ_beta, vir_beta, occ_alpha, eps_beta.data(), T_pq, norb, G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); if(not asci_settings.just_singles) { // Doubles - AAAA - append_ss_doubles_asci_contributions( - coeff, state, state_alpha, occ_alpha, vir_alpha, occ_beta, + append_ss_doubles_asci_contributions( + coeff, state, state_alpha, state_beta, occ_alpha, vir_alpha, occ_beta, eps_alpha.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); // Doubles - BBBB - append_ss_doubles_asci_contributions( - coeff, state, state_beta, occ_beta, vir_beta, occ_alpha, + append_ss_doubles_asci_contributions( + coeff, state, state_beta, state_alpha, occ_beta, vir_beta, occ_alpha, eps_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); @@ -122,7 +141,7 @@ asci_contrib_container> asci_contributions_standard( // Remove small contributions auto it = std::partition( asci_pairs.begin(), asci_pairs.end(), [=](const auto& x) { - return std::abs(x.rv) > asci_settings.rv_prune_tol; + return std::abs(x.rv()) > asci_settings.rv_prune_tol; }); asci_pairs.erase(it, asci_pairs.end()); logger->info(" * Pruning at DET = {} NSZ = {}", i, asci_pairs.size()); @@ -143,37 +162,31 @@ asci_contrib_container> asci_contributions_standard( #ifdef MACIS_ENABLE_MPI template asci_contrib_container> asci_contributions_constraint( - ASCISettings asci_settings, wavefunction_iterator_t cdets_begin, + ASCISettings asci_settings, const size_t ntdets, + wavefunction_iterator_t cdets_begin, wavefunction_iterator_t cdets_end, const double E_ASCI, const std::vector& C, size_t norb, const double* T_pq, const double* G_red, const double* V_red, const double* G_pqrs, - const double* V_pqrs, HamiltonianGenerator& ham_gen, MPI_Comm comm) { + const double* V_pqrs, HamiltonianGenerator>& ham_gen, + MPI_Comm comm) { using clock_type = std::chrono::high_resolution_clock; using duration_type = std::chrono::duration; + using wfn_traits = wavefunction_traits>; + using spin_wfn_type = spin_wfn_t>; + using spin_wfn_traits = wavefunction_traits; + using wfn_comp = typename wfn_traits::spin_comparator; + if(!std::is_sorted(cdets_begin, cdets_end, wfn_comp{})) + throw std::runtime_error("ASCI Search Only Works with Sorted Wfns"); auto logger = spdlog::get("asci_search"); const size_t ncdets = std::distance(cdets_begin, cdets_end); - asci_contrib_container> asci_pairs; - std::vector occ_alpha, vir_alpha; - std::vector occ_beta, vir_beta; - - // Get unique alpha strings - std::vector> uniq_alpha_wfn(cdets_begin, cdets_end); - std::transform(uniq_alpha_wfn.begin(), uniq_alpha_wfn.end(), - uniq_alpha_wfn.begin(), - [=](const auto& w) { return w & full_mask(); }); - std::sort(uniq_alpha_wfn.begin(), uniq_alpha_wfn.end(), - bitset_less_comparator{}); - { - auto it = std::unique(uniq_alpha_wfn.begin(), uniq_alpha_wfn.end()); - uniq_alpha_wfn.erase(it, uniq_alpha_wfn.end()); - } - const size_t nuniq_alpha = uniq_alpha_wfn.size(); + auto world_rank = comm_rank(comm); + auto world_size = comm_size(comm); // For each unique alpha, create a list of beta string and store metadata struct beta_coeff_data { - wfn_t beta_string; + spin_wfn_type beta_string; std::vector occ_beta; std::vector vir_beta; std::vector orb_ens_alpha; @@ -183,19 +196,16 @@ asci_contrib_container> asci_contributions_constraint( beta_coeff_data(double c, size_t norb, const std::vector& occ_alpha, wfn_t w, - const HamiltonianGenerator& ham_gen) { + const HamiltonianGenerator>& ham_gen) { coeff = c; - // Compute Beta string - const auto beta_shift = w >> N / 2; - // Reduce the number of times things shift in inner loop - beta_string = beta_shift << N / 2; + beta_string = wfn_traits::beta_string(w); // Compute diagonal matrix element h_diag = ham_gen.matrix_element(w, w); // Compute occ/vir for beta string - bitset_to_occ_vir(norb, beta_shift, occ_beta, vir_beta); + spin_wfn_traits::state_to_occ_vir(norb, beta_string, occ_beta, vir_beta); // Precompute orbital energies orb_ens_alpha = ham_gen.single_orbital_ens(norb, occ_alpha, occ_beta); @@ -203,32 +213,40 @@ asci_contrib_container> asci_contributions_constraint( } }; - struct unique_alpha_data { - std::vector bcd; - }; + auto uniq_alpha = get_unique_alpha(cdets_begin, cdets_end); + const size_t nuniq_alpha = uniq_alpha.size(); + std::vector> uniq_alpha_wfn(nuniq_alpha); + std::transform( + uniq_alpha.begin(), uniq_alpha.end(), uniq_alpha_wfn.begin(), + [](const auto& p) { return wfn_traits::from_spin(p.first, 0); }); + using unique_alpha_data = std::vector; std::vector uad(nuniq_alpha); - for(auto i = 0; i < nuniq_alpha; ++i) { - const auto wfn_a = uniq_alpha_wfn[i]; + for(auto i = 0, iw = 0; i < nuniq_alpha; ++i) { std::vector occ_alpha, vir_alpha; - bitset_to_occ_vir(norb, wfn_a, occ_alpha, vir_alpha); - for(auto j = 0; j < ncdets; ++j) { - const auto w = *(cdets_begin + j); - if((w & full_mask()) == wfn_a) { - uad[i].bcd.emplace_back(C[j], norb, occ_alpha, w, ham_gen); - } + spin_wfn_traits::state_to_occ_vir(norb, uniq_alpha[i].first, occ_alpha, + vir_alpha); + + const auto nbeta = uniq_alpha[i].second; + uad[i].reserve(nbeta); + for(auto j = 0; j < nbeta; ++j, ++iw) { + const auto& w = *(cdets_begin + iw); + uad[i].emplace_back(C[iw], norb, occ_alpha, w, ham_gen); } } - auto world_rank = comm_rank(comm); - auto world_size = comm_size(comm); - - const auto n_occ_alpha = uniq_alpha_wfn[0].count(); + // const auto n_occ_alpha = wfn_traits::count(uniq_alpha_wfn[0]); + const auto n_occ_alpha = spin_wfn_traits::count(uniq_alpha[0].first); const auto n_vir_alpha = norb - n_occ_alpha; const auto n_sing_alpha = n_occ_alpha * n_vir_alpha; const auto n_doub_alpha = (n_sing_alpha * (n_sing_alpha - norb + 1)) / 4; - logger->info(" * NS = {} ND = {}", n_sing_alpha, n_doub_alpha); + const auto n_occ_beta = cdets_begin->count() - n_occ_alpha; + const auto n_vir_beta = norb - n_occ_beta; + const auto n_sing_beta = n_occ_beta * n_vir_beta; + const auto n_doub_beta = (n_sing_beta * (n_sing_beta - norb + 1)) / 4; + + // logger->info(" * NS = {} ND = {}", n_sing_alpha, n_doub_alpha); // Generate mask constraints if(!world_rank) { @@ -255,139 +273,152 @@ asci_contrib_container> asci_contributions_constraint( } auto gen_c_st = clock_type::now(); - auto constraints = - dist_constraint_general(asci_settings.constraint_level, norb, - n_sing_alpha, n_doub_alpha, uniq_alpha_wfn, comm); + auto constraints = gen_constraints_general>( + asci_settings.constraint_level, norb, n_sing_beta, n_doub_beta, + uniq_alpha, world_size); auto gen_c_en = clock_type::now(); duration_type gen_c_dur = gen_c_en - gen_c_st; logger->info(" * GEN_DUR = {:.2e} ms", gen_c_dur.count()); - size_t max_size = std::min(asci_settings.pair_size_max, - ncdets * (2 * n_sing_alpha + // AA + BB - 2 * n_doub_alpha + // AAAA + BBBB - n_sing_alpha * n_sing_alpha // AABB - )); - asci_pairs.reserve(max_size); - - // Process ASCI pair contributions for each constraint - for(auto con : constraints) { - auto size_before = asci_pairs.size(); - - const double h_el_tol = asci_settings.h_el_tol; - const auto& [C, B, C_min] = con; - wfn_t O = full_mask(norb); - - // Loop over unique alpha strings - for(size_t i_alpha = 0; i_alpha < nuniq_alpha; ++i_alpha) { - const auto& det = uniq_alpha_wfn[i_alpha]; - const auto occ_alpha = bits_to_indices(det); - - // AA excitations - for(const auto& bcd : uad[i_alpha].bcd) { - const auto& beta = bcd.beta_string; - const auto& coeff = bcd.coeff; - const auto& h_diag = bcd.h_diag; - const auto& occ_beta = bcd.occ_beta; - const auto& orb_ens_alpha = bcd.orb_ens_alpha; - generate_constraint_singles_contributions_ss( - coeff, det, C, O, B, beta, occ_alpha, occ_beta, - orb_ens_alpha.data(), T_pq, norb, G_red, norb, V_red, norb, - h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); - } + size_t max_size = + std::min(std::min(ntdets, asci_settings.pair_size_max), + ncdets * (n_sing_alpha + n_sing_beta + // AA + BB + n_doub_alpha + n_doub_beta + // AAAA + BBBB + n_sing_alpha * n_sing_beta // AABB + )); - // AAAA excitations - for(const auto& bcd : uad[i_alpha].bcd) { - const auto& beta = bcd.beta_string; - const auto& coeff = bcd.coeff; - const auto& h_diag = bcd.h_diag; - const auto& occ_beta = bcd.occ_beta; - const auto& orb_ens_alpha = bcd.orb_ens_alpha; - generate_constraint_doubles_contributions_ss( - coeff, det, C, O, B, beta, occ_alpha, occ_beta, - orb_ens_alpha.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, - ham_gen, asci_pairs); - } + const size_t ncon_total = constraints.size(); - // AABB excitations - for(const auto& bcd : uad[i_alpha].bcd) { - const auto& beta = bcd.beta_string; - const auto& coeff = bcd.coeff; - const auto& h_diag = bcd.h_diag; - const auto& occ_beta = bcd.occ_beta; - const auto& vir_beta = bcd.vir_beta; - const auto& orb_ens_alpha = bcd.orb_ens_alpha; - const auto& orb_ens_beta = bcd.orb_ens_beta; - generate_constraint_doubles_contributions_os( - coeff, det, C, O, B, beta, occ_alpha, occ_beta, vir_beta, - orb_ens_alpha.data(), orb_ens_beta.data(), V_pqrs, norb, h_el_tol, - h_diag, E_ASCI, ham_gen, asci_pairs); - } + // Global atomic task-id counter + global_atomic nxtval(comm); - // If the alpha determinant satisfies the constraint, - // append BB and BBBB excitations - if(satisfies_constraint(det, C, C_min)) { - for(const auto& bcd : uad[i_alpha].bcd) { - const auto& beta = bcd.beta_string; - const auto& coeff = bcd.coeff; - const auto& h_diag = bcd.h_diag; - const auto& occ_beta = bcd.occ_beta; - const auto& vir_beta = bcd.vir_beta; - const auto& eps_beta = bcd.orb_ens_beta; - - const auto state = det | beta; - const auto state_beta = bitset_hi_word(beta); - // BB Excitations - append_singles_asci_contributions<(N / 2), (N / 2)>( - coeff, state, state_beta, occ_beta, vir_beta, occ_alpha, - eps_beta.data(), T_pq, norb, G_red, norb, V_red, norb, h_el_tol, - h_diag, E_ASCI, ham_gen, asci_pairs); - - // BBBB Excitations - append_ss_doubles_asci_contributions( - coeff, state, state_beta, occ_beta, vir_beta, occ_alpha, - eps_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, ham_gen, - asci_pairs); - - } // Beta Loop - } // Triplet Check - - // Prune Down Contributions - if(asci_pairs.size() > asci_settings.pair_size_max) { - // Remove small contributions - auto it = std::partition( - asci_pairs.begin(), asci_pairs.end(), [=](const auto& x) { - return std::abs(x.rv) > asci_settings.rv_prune_tol; - }); - asci_pairs.erase(it, asci_pairs.end()); - - auto c_indices = bits_to_indices(C); - std::string c_string; - for(int i = 0; i < c_indices.size(); ++i) - c_string += std::to_string(c_indices[i]) + " "; - logger->info(" * Pruning at CON = {}, NSZ = {}", c_string, - asci_pairs.size()); - - // Extra Pruning if not sufficient - if(asci_pairs.size() > asci_settings.pair_size_max) { - logger->info(" * Removing Duplicates"); + asci_contrib_container> asci_pairs_total; +#pragma omp parallel + { + // Process ASCI pair contributions for each constraint + asci_contrib_container> asci_pairs; + asci_pairs.reserve(max_size); + + size_t ic = 0; + while(ic < ncon_total) { + auto size_before = asci_pairs.size(); + const double h_el_tol = asci_settings.h_el_tol; + + // Atomically get the next task ID and increment for other + // MPI ranks and threads + size_t ntake = ic < asci_settings.nxtval_bcount_thresh + ? 1 + : asci_settings.nxtval_bcount_inc; + ic = nxtval.fetch_and_add(ntake); + + // Loop over assigned tasks + const size_t c_end = std::min(ncon_total, ic + ntake); + for(; ic < c_end; ++ic) { + const auto& con = constraints[ic].first; + // printf("[rank %4d tid:%4d] %10lu / %10lu\n", world_rank, + // omp_get_thread_num(), ic, ncon_total); + + for(size_t i_alpha = 0, iw = 0; i_alpha < nuniq_alpha; ++i_alpha) { + const auto& alpha_det = uniq_alpha[i_alpha].first; + const auto occ_alpha = bits_to_indices(alpha_det); + const bool alpha_satisfies_con = satisfies_constraint(alpha_det, con); + + const auto& bcd = uad[i_alpha]; + const size_t nbeta = bcd.size(); + for(size_t j_beta = 0; j_beta < nbeta; ++j_beta, ++iw) { + const auto w = *(cdets_begin + iw); + const auto c = C[iw]; + const auto& beta_det = bcd[j_beta].beta_string; + const auto h_diag = bcd[j_beta].h_diag; + const auto& occ_beta = bcd[j_beta].occ_beta; + const auto& vir_beta = bcd[j_beta].vir_beta; + const auto& orb_ens_alpha = bcd[j_beta].orb_ens_alpha; + const auto& orb_ens_beta = bcd[j_beta].orb_ens_beta; + + // AA excitations + generate_constraint_singles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), T_pq, + norb, G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + // AAAA excitations + generate_constraint_doubles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), G_pqrs, + norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + + // AABB excitations + generate_constraint_doubles_contributions_os( + c, w, con, occ_alpha, occ_beta, vir_beta, orb_ens_alpha.data(), + orb_ens_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + if(alpha_satisfies_con) { + // BB excitations + append_singles_asci_contributions( + c, w, beta_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), T_pq, norb, G_red, norb, V_red, norb, + h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + + // BBBB excitations + append_ss_doubles_asci_contributions( + c, w, beta_det, alpha_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + // No excitation (push inf to remove from list) + asci_pairs.push_back( + {w, std::numeric_limits::infinity(), 1.0}); + } + } + + // Prune Down Contributions + if(asci_pairs.size() > asci_settings.pair_size_max) { + logger->info(" * PRUNING AT CON = {} IALPHA = {}", ic, i_alpha); + auto uit = sort_and_accumulate_asci_pairs( + asci_pairs.begin() + size_before, asci_pairs.end()); + asci_pairs.erase(uit, asci_pairs.end()); + if(asci_pairs.size() > asci_settings.pair_size_max) + throw std::runtime_error("DIE DIE DIE"); + } + + } // Unique Alpha Loop + + // Local S&A for each quad + { + if(size_before > asci_pairs.size()) + throw std::runtime_error("DIE DIE DIE"); auto uit = sort_and_accumulate_asci_pairs( asci_pairs.begin() + size_before, asci_pairs.end()); asci_pairs.erase(uit, asci_pairs.end()); - logger->info(" * NSZ = {}", asci_pairs.size()); - } - } // Pruning - } // Unique Alpha Loop + // Remove small contributions + uit = std::partition(asci_pairs.begin() + size_before, + asci_pairs.end(), [=](const auto& x) { + return std::abs(x.rv()) > + asci_settings.rv_prune_tol; + }); + asci_pairs.erase(uit, asci_pairs.end()); + } + } // Loc constraint loop + } // Constraint Loop - // Local S&A for each quad +// Insert into list +#pragma omp critical { - auto uit = sort_and_accumulate_asci_pairs( - asci_pairs.begin() + size_before, asci_pairs.end()); - asci_pairs.erase(uit, asci_pairs.end()); + if(asci_pairs_total.size()) { + // Preallocate space for insertion + asci_pairs_total.reserve(asci_pairs.size() + asci_pairs_total.size()); + asci_pairs_total.insert(asci_pairs_total.end(), asci_pairs.begin(), + asci_pairs.end()); + } else { + asci_pairs_total = std::move(asci_pairs); + } + asci_contrib_container>().swap(asci_pairs); } - } // Constraint Loop - return asci_pairs; + } // OpenMP + + return asci_pairs_total; } #endif @@ -399,7 +430,7 @@ std::vector> asci_search( const std::vector& C, size_t norb, const double* T_pq, const double* G_red, const double* V_red, const double* G_pqrs, const double* V_pqrs, - HamiltonianGenerator& ham_gen MACIS_MPI_CODE(, MPI_Comm comm)) { + HamiltonianGenerator>& ham_gen MACIS_MPI_CODE(, MPI_Comm comm)) { using clock_type = std::chrono::high_resolution_clock; using duration_type = std::chrono::duration; @@ -437,6 +468,9 @@ std::vector> asci_search( ncdets, ndets_max, asci_settings.h_el_tol, asci_settings.rv_prune_tol); logger->info(" MAX_RV_SIZE = {}, JUST_SINGLES = {}", asci_settings.pair_size_max, asci_settings.just_singles); + logger->info(" CDET_SUM = {:.2e}", + std::accumulate(C.begin(), C.begin() + ncdets, 0.0, + [](auto s, auto c) { return s + c * c; })); MACIS_MPI_CODE(MPI_Barrier(comm);) auto asci_search_st = clock_type::now(); @@ -444,16 +478,16 @@ std::vector> asci_search( // Expand Search Space with Connected ASCI Contributions auto pairs_st = clock_type::now(); asci_contrib_container> asci_pairs; - if(world_size == 1) - asci_pairs = asci_contributions_standard( - asci_settings, cdets_begin, cdets_end, E_ASCI, C, norb, T_pq, G_red, - V_red, G_pqrs, V_pqrs, ham_gen); -#ifdef MACIS_ENABLE_MPI - else - asci_pairs = asci_contributions_constraint( - asci_settings, cdets_begin, cdets_end, E_ASCI, C, norb, T_pq, G_red, - V_red, G_pqrs, V_pqrs, ham_gen MACIS_MPI_CODE(, comm)); -#endif + // if(world_size == 1) + // asci_pairs = asci_contributions_standard( + // asci_settings, cdets_begin, cdets_end, E_ASCI, C, norb, T_pq, G_red, + // V_red, G_pqrs, V_pqrs, ham_gen); + // #ifdef MACIS_ENABLE_MPI + // else + asci_pairs = asci_contributions_constraint( + asci_settings, ndets_max, cdets_begin, cdets_end, E_ASCI, C, norb, T_pq, + G_red, V_red, G_pqrs, V_pqrs, ham_gen MACIS_MPI_CODE(, comm)); + // #endif auto pairs_en = clock_type::now(); { @@ -463,6 +497,8 @@ std::vector> asci_search( size_t npairs = asci_pairs.size(); #endif logger->info(" * ASCI Kept {} Pairs", npairs); + if(npairs < ndets_max) + logger->info(" * WARNING: Kept ASCI pairs less than requested TDETS"); #ifdef MACIS_ENABLE_MPI if(world_size > 1) { @@ -526,8 +562,12 @@ std::vector> asci_search( } auto keep_large_st = clock_type::now(); +#if 0 // Finalize scores - for(auto& x : asci_pairs) x.rv = -std::abs(x.rv); + for(auto& x : asci_pairs) { + x.c_times_matel = -std::abs(x.c_times_matel); + x.h_diag = std::abs(x.h_diag); + } // Insert all dets with their coefficients as seeds for(size_t i = 0; i < ncdets; ++i) { @@ -540,11 +580,18 @@ std::vector> asci_search( keep_only_largest_copy_asci_pairs(asci_pairs); asci_pairs.erase(std::partition(asci_pairs.begin(), asci_pairs.end(), - [](const auto& p) { return p.rv < 0.0; }), + [](const auto& p) { return p.rv() < 0.0; }), asci_pairs.end()); - // Only do top-K on (ndets_max - ncdets) b/c CDETS will be added later - const size_t top_k_elements = ndets_max - ncdets; +#else + // Remove core dets + // XXX: This assumes the constraint search for now + { + auto inf_ptr = std::partition(asci_pairs.begin(), asci_pairs.end(), + [](auto& p) { return !std::isinf(p.rv()); }); + asci_pairs.erase(inf_ptr, asci_pairs.end()); + } +#endif auto keep_large_en = clock_type::now(); duration_type keep_large_dur = keep_large_en - keep_large_st; @@ -560,6 +607,9 @@ std::vector> asci_search( logger->info(" * KEEP_LARG_DUR = {:.2e} s", keep_large_dur.count()); } + // Only do top-K on (ndets_max - ncdets) b/c CDETS will be added later + const size_t top_k_elements = ndets_max - ncdets; + // Do Top-K to get the largest determinant contributions auto asci_sort_st = clock_type::now(); if(world_size > 1 or asci_pairs.size() > top_k_elements) { @@ -569,18 +619,19 @@ std::vector> asci_search( // Strip scores std::vector scores(asci_pairs.size()); std::transform(asci_pairs.begin(), asci_pairs.end(), scores.begin(), - [](const auto& p) { return std::abs(p.rv); }); + [](const auto& p) { return std::abs(p.rv()); }); // Determine kth-ranked scores auto kth_score = dist_quickselect(scores.begin(), scores.end(), top_k_elements, comm, std::greater{}, std::equal_to{}); + logger->info(" * Kth Score Pivot = {:.2e}", kth_score); // Partition local pairs into less / eq batches auto [g_begin, e_begin, l_begin, _end] = leg_partition( asci_pairs.begin(), asci_pairs.end(), kth_score, - [=](const auto& p, const auto& s) { return std::abs(p.rv) > s; }, - [=](const auto& p, const auto& s) { return std::abs(p.rv) == s; }); + [=](const auto& p, const auto& s) { return std::abs(p.rv()) > s; }, + [=](const auto& p, const auto& s) { return std::abs(p.rv()) == s; }); // Determine local counts size_t n_greater = std::distance(g_begin, e_begin); diff --git a/include/macis/asci/determinant_sort.hpp b/include/macis/asci/determinant_sort.hpp index 829f7240..ec738f14 100644 --- a/include/macis/asci/determinant_sort.hpp +++ b/include/macis/asci/determinant_sort.hpp @@ -11,6 +11,7 @@ #if __has_include() #define MACIS_USE_BOOST_SORT #include +#include #endif namespace macis { @@ -36,6 +37,54 @@ void reorder_ci_on_coeff(std::vector& dets, std::vector& C) { dets = std::move(reorder_dets); } +template +void reorder_ci_on_alpha(WfnIterator begin, WfnIterator end, double* C) { + using wfn_type = typename WfnIterator::value_type; + using wfn_traits = wavefunction_traits; + using cmp_type = typename wfn_traits::spin_comparator; + const size_t ndets = std::distance(begin, end); + + cmp_type comparator{}; + std::vector idx(ndets); + std::iota(idx.begin(), idx.end(), 0); + std::sort(idx.begin(), idx.end(), [&](auto i, auto j) { + return comparator(*(begin + i), *(begin + j)); + }); + + std::vector reorder_C(ndets); + std::vector reorder_dets(ndets); + for(auto i = 0ul; i < ndets; ++i) { + reorder_C[i] = C[idx[i]]; + reorder_dets[i] = *(begin + idx[i]); + } + + std::copy(reorder_dets.begin(), reorder_dets.end(), begin); + std::copy(reorder_C.begin(), reorder_C.end(), C); +} + +template +PairIterator accumulate_asci_pairs(PairIterator pairs_begin, + PairIterator pairs_end) { + // Accumulate the ASCI scores into first instance of unique bitstrings + auto cur_it = pairs_begin; + for(auto it = cur_it + 1; it != pairs_end; ++it) { + // If iterate is not the one being tracked, update the iterator + if(it->state != cur_it->state) { + cur_it = it; + } + + // Accumulate + else { + cur_it->c_times_matel += it->c_times_matel; + it->c_times_matel = NAN; // Zero out to expose potential bugs + } + } + + // Remote duplicate bitstrings + return std::unique(pairs_begin, pairs_end, + [](auto x, auto y) { return x.state == y.state; }); +} + template PairIterator sort_and_accumulate_asci_pairs(PairIterator pairs_begin, PairIterator pairs_end) { @@ -55,24 +104,29 @@ PairIterator sort_and_accumulate_asci_pairs(PairIterator pairs_begin, #endif (pairs_begin, pairs_end, comparator); - // Accumulate the ASCI scores into first instance of unique bitstrings - auto cur_it = pairs_begin; - for(auto it = cur_it + 1; it != pairs_end; ++it) { - // If iterate is not the one being tracked, update the iterator - if(it->state != cur_it->state) { - cur_it = it; - } + return accumulate_asci_pairs(pairs_begin, pairs_end); +} - // Accumulate - else { - cur_it->rv += it->rv; - it->rv = 0; // Zero out to expose potential bugs - } - } +template +PairIterator stable_sort_and_accumulate_asci_pairs(PairIterator pairs_begin, + PairIterator pairs_end) { + const size_t npairs = std::distance(pairs_begin, pairs_end); - // Remote duplicate bitstrings - return std::unique(pairs_begin, pairs_end, - [](auto x, auto y) { return x.state == y.state; }); + if(!npairs) return pairs_end; + + auto comparator = [](const auto& x, const auto& y) { + return bitset_less(x.state, y.state); + }; + +// Sort by bitstring +#ifdef MACIS_USE_BOOST_SORT + boost::sort::flat_stable_sort +#else + std::stable_sort +#endif + (pairs_begin, pairs_end, comparator); + + return accumulate_asci_pairs(pairs_begin, pairs_end); } template @@ -108,7 +162,8 @@ void keep_only_largest_copy_asci_pairs( // Keep only max value else { - cur_it->rv = std::max(cur_it->rv, it->rv); + cur_it->c_times_matel = + std::max(cur_it->c_times_matel, it->c_times_matel); } } diff --git a/include/macis/asci/grow.hpp b/include/macis/asci/grow.hpp index f3f4260e..f72226f3 100644 --- a/include/macis/asci/grow.hpp +++ b/include/macis/asci/grow.hpp @@ -16,7 +16,7 @@ namespace macis { template auto asci_grow(ASCISettings asci_settings, MCSCFSettings mcscf_settings, double E0, std::vector> wfn, std::vector X, - HamiltonianGenerator& ham_gen, + HamiltonianGenerator>& ham_gen, size_t norb MACIS_MPI_CODE(, MPI_Comm comm)) { #ifdef MACIS_ENABLE_MPI auto world_rank = comm_rank(comm); @@ -119,12 +119,13 @@ auto asci_grow(ASCISettings asci_settings, MCSCFSettings mcscf_settings, #endif // Regenerate intermediates - ham_gen.generate_integral_intermediates(ham_gen.V_pqrs_); + ham_gen.generate_integral_intermediates(); logger->trace(" * Rediagonalizing"); auto rdg_st = hrt_t::now(); std::vector X_local; - selected_ci_diag( + throw "DIE DIE DIE"; + selected_ci_diag( wfn.begin(), wfn.end(), ham_gen, mcscf_settings.ci_matel_tol, mcscf_settings.ci_max_subspace, mcscf_settings.ci_res_tol, X_local MACIS_MPI_CODE(, comm)); diff --git a/include/macis/asci/iteration.hpp b/include/macis/asci/iteration.hpp index aa6e271c..604ffde6 100644 --- a/include/macis/asci/iteration.hpp +++ b/include/macis/asci/iteration.hpp @@ -8,15 +8,15 @@ #pragma once #include +#include #include -#include namespace macis { -template +template auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings, size_t ndets_max, double E0, std::vector> wfn, - std::vector X, HamiltonianGenerator& ham_gen, + std::vector X, HamiltonianGenerator>& ham_gen, size_t norb MACIS_MPI_CODE(, MPI_Comm comm)) { // Sort wfn on coefficient weights if(wfn.size() > 1) reorder_ci_on_coeff(wfn, X); @@ -24,14 +24,23 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings, // Sanity check on search determinants size_t nkeep = std::min(asci_settings.ncdets_max, wfn.size()); + // Sort kept dets on alpha string + if(wfn.size() > 1) + reorder_ci_on_alpha(wfn.begin(), wfn.begin() + nkeep, X.data()); + // Perform the ASCI search wfn = asci_search(asci_settings, ndets_max, wfn.begin(), wfn.begin() + nkeep, E0, X, norb, ham_gen.T(), ham_gen.G_red(), ham_gen.V_red(), ham_gen.G(), ham_gen.V(), ham_gen MACIS_MPI_CODE(, comm)); + // std::sort(wfn.begin(), wfn.end(), bitset_less_comparator{}); + using wfn_traits = wavefunction_traits>; + using wfn_comp = typename wfn_traits::spin_comparator; + std::sort(wfn.begin(), wfn.end(), wfn_comp{}); + // Rediagonalize std::vector X_local; // Precludes guess reuse - auto E = selected_ci_diag( + auto E = selected_ci_diag( wfn.begin(), wfn.end(), ham_gen, mcscf_settings.ci_matel_tol, mcscf_settings.ci_max_subspace, mcscf_settings.ci_res_tol, X_local MACIS_MPI_CODE(, comm)); diff --git a/include/macis/asci/mask_constraints.hpp b/include/macis/asci/mask_constraints.hpp index c9907e09..03881225 100644 --- a/include/macis/asci/mask_constraints.hpp +++ b/include/macis/asci/mask_constraints.hpp @@ -7,6 +7,7 @@ */ #pragma once +#include #include #include #include @@ -14,96 +15,73 @@ namespace macis { -template -struct wfn_constraint { - wfn_t C; - wfn_t B; - unsigned C_min; -}; - -template -auto make_triplet(unsigned i, unsigned j, unsigned k) { - wfn_constraint con; - - con.C = 0; - con.C.flip(i).flip(j).flip(k); - con.B = 1; - con.B <<= k; - con.B = con.B.to_ullong() - 1; - con.C_min = k; - - return con; +template +bool satisfies_constraint(WfnType det, ConType C) { + return C.satisfies_constraint(det); } -template -auto make_quad(unsigned i, unsigned j, unsigned k, unsigned l) { - wfn_constraint con; - - con.C = 0; - con.C.flip(i).flip(j).flip(k).flip(l); - con.B = 1; - con.B <<= l; - con.B = con.B.to_ullong() - 1; - con.C_min = l; +template +auto generate_constraint_single_excitations(WfnType det, ConType constraint) { + using constraint_traits = typename ConType::constraint_traits; + const auto C = constraint.C(); + const auto B = constraint.B(); - return con; -} + // need to have at most one different from the constraint + if(constraint.overlap(det) < (constraint.count() - 1)) + return std::make_pair(WfnType(0), WfnType(0)); -template -bool satisfies_constraint(wfn_t det, wfn_t C, unsigned C_min) { - return (det & C).count() == C.count() and ((det ^ C) >> C_min).count() == 0; -} + auto o = constraint.symmetric_difference(det); + auto v = constraint.b_mask_union(~det); -template -auto generate_constraint_single_excitations(wfn_t det, wfn_t C, - wfn_t O_mask, wfn_t B) { - if((det & C).count() < - (C.count() - 1)) // need to have at most one different from the constraint - return std::make_pair(wfn_t(0), wfn_t(0)); - - auto o = det ^ C; - auto v = (~det) & O_mask & B; - - if((o & C).count() == - 1) { // don't have to change this necessarily, but more clear without >= + if(constraint_traits::count(o & C) == 1) { v = o & C; o ^= v; } - if((o & ~B).count() > 1) return std::make_pair(wfn_t(0), wfn_t(0)); + const auto o_and_not_b = o & ~B; + const auto o_and_not_b_count = constraint_traits::count(o_and_not_b); + if(o_and_not_b_count > 1) return std::make_pair(WfnType(0), WfnType(0)); - if((o & ~B).count() == 1) o &= ~B; + if(o_and_not_b_count == 1) o = o_and_not_b; return std::make_pair(o, v); } -template -auto generate_constraint_double_excitations(wfn_t det, wfn_t C, - wfn_t O_mask, wfn_t B) { +template +auto generate_constraint_double_excitations(WfnType det, ConType constraint) { + using constraint_traits = typename ConType::constraint_traits; + using constraint_type = typename ConType::constraint_type; + const auto C = constraint.C(); + const auto B = constraint.B(); // Occ/Vir pairs to generate excitations - std::vector> O, V; + std::vector O, V; - if((det & C) == 0) return std::make_tuple(O, V); + if(constraint.overlap(det) == 0) return std::make_tuple(O, V); - auto o = det ^ C; - auto v = (~det) & O_mask & B; + auto o = constraint.symmetric_difference(det); + auto v = constraint.b_mask_union(~det); - if((o & C).count() >= 3) return std::make_tuple(O, V); + auto o_and_c = o & C; + auto o_and_c_count = constraint_traits::count(o_and_c); + if(o_and_c_count >= 3) return std::make_tuple(O, V); // Generate Virtual Pairs - if((o & C).count() == 2) { - v = o & C; + if(o_and_c_count == 2) { + v = o_and_c; o ^= v; + // Regenerate since o changed + // XXX: This apparently is not needed, but leaving because + o_and_c = o & C; + o_and_c_count = constraint_traits::count(o_and_c); } const auto virt_ind = bits_to_indices(v); - const auto o_and_t = o & C; - switch((o & C).count()) { + switch(o_and_c_count) { case 1: for(auto a : virt_ind) { - V.emplace_back(o_and_t).flip(a); + V.emplace_back(constraint_traits::create_no_check(o_and_c, a)); } - o ^= o_and_t; + o ^= o_and_c; break; default: generate_pairs(virt_ind, V); @@ -112,16 +90,17 @@ auto generate_constraint_double_excitations(wfn_t det, wfn_t C, // Generate Occupied Pairs const auto o_and_not_b = o & ~B; - if(o_and_not_b.count() > 2) return std::make_tuple(O, V); + const auto o_and_not_b_count = constraint_traits::count(o_and_not_b); + if(o_and_not_b_count > 2) return std::make_tuple(O, V); - switch(o_and_not_b.count()) { + switch(o_and_not_b_count) { case 1: for(auto i : bits_to_indices(o & B)) { - O.emplace_back(o_and_not_b).flip(i); + O.emplace_back(constraint_traits::create_no_check(o_and_not_b, i)); } break; default: - if(o_and_not_b.count() == 2) o = o_and_not_b; + if(o_and_not_b_count == 2) o = o_and_not_b; generate_pairs(bits_to_indices(o), O); break; } @@ -129,36 +108,39 @@ auto generate_constraint_double_excitations(wfn_t det, wfn_t C, return std::make_tuple(O, V); } -template -void generate_constraint_singles(wfn_t det, wfn_t T, wfn_t O_mask, - wfn_t B, std::vector>& t_singles) { - auto [o, v] = generate_constraint_single_excitations(det, T, O_mask, B); - const auto oc = o.count(); - const auto vc = v.count(); +template +void generate_constraint_singles(WfnType det, ConType constraint, + std::vector& t_singles) { + using constraint_traits = typename ConType::constraint_traits; + auto [o, v] = generate_constraint_single_excitations(det, constraint); + const auto oc = constraint_traits::count(o); + const auto vc = constraint_traits::count(v); if(!oc or !vc) return; t_singles.clear(); t_singles.reserve(oc * vc); - const auto occ = bits_to_indices(o); - const auto vir = bits_to_indices(v); + const auto occ = constraint_traits::state_to_occ(o); + const auto vir = constraint_traits::state_to_occ(v); for(auto i : occ) { - auto temp = det; - temp.flip(i); - for(auto a : vir) t_singles.emplace_back(temp).flip(a); + auto temp = constraint_traits::create_no_check(det, i); + for(auto a : vir) + t_singles.emplace_back(constraint_traits::create_no_check(temp, a)); } } -template -unsigned count_constraint_singles(Args&&... args) { - auto [o, v] = - generate_constraint_single_excitations(std::forward(args)...); - return o.count() * v.count(); +template +unsigned count_constraint_singles(WfnType det, ConType constraint) { + using constraint_traits = typename ConType::constraint_traits; + auto [o, v] = generate_constraint_single_excitations(det, constraint); + const auto oc = constraint_traits::count(o); + const auto vc = constraint_traits::count(v); + return oc * vc; } -template -void generate_constraint_doubles(wfn_t det, wfn_t T, wfn_t O_mask, - wfn_t B, std::vector>& t_doubles) { - auto [O, V] = generate_constraint_double_excitations(det, T, O_mask, B); +template +void generate_constraint_doubles(WfnType det, ConType constraint, + std::vector& t_doubles) { + auto [O, V] = generate_constraint_double_excitations(det, constraint); t_doubles.clear(); for(auto ij : O) { @@ -172,30 +154,36 @@ void generate_constraint_doubles(wfn_t det, wfn_t T, wfn_t O_mask, /** * @param[in] det Input root determinant * @param[in] T Triplet constraint mask - * @param[in] O Overfill mask (full mask 0 -> norb) * @param[in] B B mask (?) */ -template -unsigned count_constraint_doubles(wfn_t det, wfn_t C, wfn_t O, - wfn_t B) { - if((det & C) == 0) return 0; +template +unsigned count_constraint_doubles(WfnType det, ConType constraint) { + using constraint_traits = typename ConType::constraint_traits; + const auto C = constraint.C(); + const auto B = constraint.B(); + if(constraint.overlap(det) == 0) return 0; - auto o = det ^ C; - auto v = (~det) & O & B; + auto o = constraint.symmetric_difference(det); + auto v = constraint.b_mask_union(~det); - if((o & C).count() >= 3) return 0; + auto o_and_c = o & C; + auto o_and_c_count = constraint_traits::count(o_and_c); + if(o_and_c_count >= 3) return 0; // Generate Virtual Pairs - if((o & C).count() == 2) { - v = o & C; + if(o_and_c_count == 2) { + v = o_and_c; o ^= v; + // Regenerate since o changed + // XXX: This apparently is not needed, but leaving because + o_and_c = o & C; + o_and_c_count = constraint_traits::count(o_and_c); } - unsigned nv_pairs = v.count(); - const auto o_and_t = o & C; - switch((o & C).count()) { + unsigned nv_pairs = constraint_traits::count(v); + switch(o_and_c_count) { case 1: - o ^= o_and_t; + o ^= o_and_c; break; default: nv_pairs = (nv_pairs * (nv_pairs - 1)) / 2; @@ -204,16 +192,17 @@ unsigned count_constraint_doubles(wfn_t det, wfn_t C, wfn_t O, // Generate Occupied Pairs const auto o_and_not_b = o & ~B; - if(o_and_not_b.count() > 2) return 0; + const auto o_and_not_b_count = constraint_traits::count(o_and_not_b); + if(o_and_not_b_count > 2) return 0; unsigned no_pairs = 0; - switch(o_and_not_b.count()) { + switch(o_and_not_b_count) { case 1: - no_pairs = (o & B).count(); + no_pairs = constraint_traits::count(o & B); break; default: - if(o_and_not_b.count() == 2) o = o_and_not_b; - no_pairs = o.count(); + if(o_and_not_b_count == 2) o = o_and_not_b; + no_pairs = constraint_traits::count(o); no_pairs = (no_pairs * (no_pairs - 1)) / 2; break; } @@ -221,48 +210,49 @@ unsigned count_constraint_doubles(wfn_t det, wfn_t C, wfn_t O, return no_pairs * nv_pairs; } -template -size_t constraint_histogram(wfn_t det, size_t n_os_singles, - size_t n_os_doubles, wfn_t T, wfn_t O_mask, - wfn_t B) { - auto ns = count_constraint_singles(det, T, O_mask, B); - auto nd = count_constraint_doubles(det, T, O_mask, B); +template +size_t constraint_histogram(WfnType det, size_t n_os_singles, + size_t n_os_doubles, ConType constraint) { + auto ns = count_constraint_singles(det, constraint); + auto nd = count_constraint_doubles(det, constraint); size_t ndet = 0; ndet += ns; // AA ndet += nd; // AAAA ndet += ns * n_os_singles; // AABB - auto T_min = ffs(T) - 1; - if(satisfies_constraint(det, T, T_min)) { + if(satisfies_constraint(det, constraint)) { ndet += n_os_singles + n_os_doubles + 1; // BB + BBBB + No Excitations } return ndet; } -template +template void generate_constraint_singles_contributions_ss( - double coeff, wfn_t det, wfn_t T, wfn_t O, wfn_t B, - wfn_t os_det, const std::vector& occ_same, + double coeff, WfnType det, ConType constraint, + const std::vector& occ_same, const std::vector& occ_othr, const double* eps, const double* T_pq, const size_t LDT, const double* G_kpq, const size_t LDG, const double* V_kpq, const size_t LDV, double h_el_tol, double root_diag, - double E0, HamiltonianGenerator& ham_gen, - asci_contrib_container>& asci_contributions) { - auto [o, v] = generate_constraint_single_excitations(det, T, O, B); - const auto no = o.count(); - const auto nv = v.count(); + double E0, HamiltonianGeneratorBase& ham_gen, + asci_contrib_container& asci_contributions) { + using wfn_traits = wavefunction_traits; + using constraint_traits = typename ConType::constraint_traits; + auto [o, v] = generate_constraint_single_excitations( + wfn_traits::alpha_string(det), constraint); + const auto no = constraint_traits::count(o); + const auto nv = constraint_traits::count(v); if(!no or !nv) return; const size_t LDG2 = LDG * LDG; const size_t LDV2 = LDV * LDV; for(int ii = 0; ii < no; ++ii) { const auto i = fls(o); - o.flip(i); + o = constraint_traits::create_no_check(o, i); // o.flip(i) auto v_cpy = v; for(int aa = 0; aa < nv; ++aa) { const auto a = fls(v_cpy); - v_cpy.flip(a); + v_cpy = constraint_traits::create_no_check(v_cpy, a); // v_cpy.flip(a) double h_el = T_pq[a + i * LDT]; const double* G_ov = G_kpq + a * LDG + i * LDG2; @@ -274,8 +264,9 @@ void generate_constraint_singles_contributions_ss( if(std::abs(coeff * h_el) < h_el_tol) continue; // Calculate Excited Determinant - auto ex_det = det | os_det; - ex_det.flip(i).flip(a); + auto ex_det = + wfn_traits::template single_excitation_no_check(det, i, + a); // Compute Sign in a Canonical Way auto sign = single_excitation_sign(det, a, i); @@ -283,22 +274,25 @@ void generate_constraint_singles_contributions_ss( // Compute Fast Diagonal Matrix Element auto h_diag = ham_gen.fast_diag_single(eps[i], eps[a], i, a, root_diag); - h_el /= (E0 - h_diag); + // h_el /= (E0 - h_diag); - asci_contributions.push_back({ex_det, coeff * h_el}); + asci_contributions.push_back({ex_det, coeff * h_el, E0 - h_diag}); } } } -template +template void generate_constraint_doubles_contributions_ss( - double coeff, wfn_t det, wfn_t T, wfn_t O_mask, wfn_t B, - wfn_t os_det, const std::vector& occ_same, + double coeff, WfnType det, ConType constraint, + const std::vector& occ_same, const std::vector& occ_othr, const double* eps, const double* G, const size_t LDG, double h_el_tol, double root_diag, double E0, - HamiltonianGenerator& ham_gen, - asci_contrib_container>& asci_contributions) { - auto [O, V] = generate_constraint_double_excitations(det, T, O_mask, B); + HamiltonianGeneratorBase& ham_gen, + asci_contrib_container& asci_contributions) { + using wfn_traits = wavefunction_traits; + using spin_wfn_traits = wavefunction_traits>; + auto [O, V] = generate_constraint_double_excitations( + wfn_traits::alpha_string(det), constraint); const auto no_pairs = O.size(); const auto nv_pairs = V.size(); if(!no_pairs or !nv_pairs) return; @@ -309,7 +303,9 @@ void generate_constraint_doubles_contributions_ss( const auto i = ffs(ij) - 1; const auto j = fls(ij); const auto G_ij = G + (j + i * LDG2) * LDG; - const auto ex_ij = det ^ ij; + const auto ex_ij = + wfn_traits::template single_excitation_no_check( + det, i, j); // det ^ ij; for(int _ab = 0; _ab < nv_pairs; ++_ab) { const auto ab = V[_ab]; const auto a = ffs(ab) - 1; @@ -321,14 +317,20 @@ void generate_constraint_doubles_contributions_ss( if(std::abs(coeff * G_aibj) < h_el_tol) continue; // Calculate Excited Determinant (spin) - const auto full_ex_spin = ij | ab; - const auto ex_det_spin = ex_ij | ab; + const auto full_ex_spin = + spin_wfn_traits::template single_excitation_no_check( + ij, a, b); // ij | ab; + const auto ex_det_spin = + wfn_traits::template single_excitation_no_check( + ex_ij, a, b); // ex_ij | ab; // Compute Sign in a Canonical Way - auto sign = doubles_sign(det, ex_det_spin, full_ex_spin); + auto sign = + doubles_sign(wfn_traits::alpha_string(det), + wfn_traits::alpha_string(ex_det_spin), full_ex_spin); // Calculate Full Excited Determinant - const auto full_ex = ex_det_spin | os_det; + const auto full_ex = ex_det_spin; // | os_det; // Update Sign of Matrix Element auto h_el = sign * G_aibj; @@ -336,36 +338,39 @@ void generate_constraint_doubles_contributions_ss( // Evaluate fast diagonal matrix element auto h_diag = ham_gen.fast_diag_ss_double(eps[i], eps[j], eps[a], eps[b], i, j, a, b, root_diag); - h_el /= (E0 - h_diag); + // h_el /= (E0 - h_diag); - asci_contributions.push_back({full_ex, coeff * h_el}); + asci_contributions.push_back({full_ex, coeff * h_el, E0 - h_diag}); } } } -template +template void generate_constraint_doubles_contributions_os( - double coeff, wfn_t det, wfn_t T, wfn_t O, wfn_t B, - wfn_t os_det, const std::vector& occ_same, + double coeff, WfnType det, ConType constraint, + const std::vector& occ_same, const std::vector& occ_othr, const std::vector& vir_othr, const double* eps_same, const double* eps_othr, const double* V, const size_t LDV, double h_el_tol, - double root_diag, double E0, HamiltonianGenerator& ham_gen, - asci_contrib_container>& asci_contributions) { + double root_diag, double E0, HamiltonianGeneratorBase& ham_gen, + asci_contrib_container& asci_contributions) { + using wfn_traits = wavefunction_traits; + using constraint_traits = typename ConType::constraint_traits; // Generate Single Excitations that Satisfy the Constraint - auto [o, v] = generate_constraint_single_excitations(det, T, O, B); - const auto no = o.count(); - const auto nv = v.count(); + auto [o, v] = generate_constraint_single_excitations( + wfn_traits::alpha_string(det), constraint); + const auto no = constraint_traits::count(o); + const auto nv = constraint_traits::count(v); if(!no or !nv) return; const size_t LDV2 = LDV * LDV; for(int ii = 0; ii < no; ++ii) { const auto i = fls(o); - o.flip(i); + o = constraint_traits::create_no_check(o, i); // o.flip(i) auto v_cpy = v; for(int aa = 0; aa < nv; ++aa) { const auto a = fls(v_cpy); - v_cpy.flip(a); + v_cpy = constraint_traits::create_no_check(v_cpy, a); // v_cpy.flip(a) const auto* V_ai = V + a + i * LDV; double sign_same = single_excitation_sign(det, a, i); @@ -381,12 +386,17 @@ void generate_constraint_doubles_contributions_os( // double sign_othr = single_excitation_sign( os_det >> (N/2), b, j // ); double sign_othr = - single_excitation_sign(bitset_hi_word(os_det), b, j); + single_excitation_sign(wfn_traits::beta_string(det), b, j); double sign = sign_same * sign_othr; // Compute Excited Determinant - auto ex_det = det | os_det; - ex_det.flip(i).flip(a).flip(j + N / 2).flip(b + N / 2); + // auto ex_det = det | os_det; + // ex_det.flip(i).flip(a).flip(j + N / 2).flip(b + N / 2); + auto ex_det = + wfn_traits::template single_excitation_no_check( + det, i, a); + ex_det = wfn_traits::template single_excitation_no_check( + ex_det, j, b); // Finalize Matrix Element auto h_el = sign * V_aibj; @@ -394,9 +404,9 @@ void generate_constraint_doubles_contributions_os( auto h_diag = ham_gen.fast_diag_os_double(eps_same[i], eps_othr[j], eps_same[a], eps_othr[b], i, j, a, b, root_diag); - h_el /= (E0 - h_diag); + // h_el /= (E0 - h_diag); - asci_contributions.push_back({ex_det, coeff * h_el}); + asci_contributions.push_back({ex_det, coeff * h_el, E0 - h_diag}); } // BJ } // A @@ -482,43 +492,370 @@ auto dist_triplets_histogram(size_t norb, size_t ns_othr, size_t nd_othr, } #endif -#ifdef MACIS_ENABLE_MPI template +auto make_triplet(unsigned i, unsigned j, unsigned k) { + using wfn_type = wfn_t; + using wfn_traits = wavefunction_traits; + using constraint_type = alpha_constraint; + using string_type = typename constraint_type::constraint_type; + + string_type C = 0; + C.flip(i).flip(j).flip(k); + string_type B = 1; + static_assert(B.size() <= 64, "ULLONG NOT POSSIBLE HERE"); + B <<= k; + B = B.to_ullong() - 1; + + return constraint_type(C, B, k); +} + +#ifdef MACIS_ENABLE_MPI +#if 0 +template auto dist_constraint_general(size_t nlevels, size_t norb, size_t ns_othr, size_t nd_othr, - const std::vector>& unique_alpha, + const ContainerType& unique_alpha, MPI_Comm comm) { + + using wfn_traits = wavefunction_traits; + using constraint_type = alpha_constraint; + using string_type = typename constraint_type::constraint_type; auto world_rank = comm_rank(comm); auto world_size = comm_size(comm); - wfn_t O = full_mask(norb); + constexpr bool flat_container = std::is_same_v< + std::decay_t, + std::decay_t + >; + + // Generate triplets + heuristic + std::vector> constraint_sizes; + constraint_sizes.reserve(norb * norb * norb); + size_t total_work = 0; + for(int t_i = 0; t_i < norb; ++t_i) + for(int t_j = 0; t_j < t_i; ++t_j) + for(int t_k = 0; t_k < t_j; ++t_k) { + auto constraint = constraint_type::make_triplet(t_i, t_j, t_k); + + size_t nw = 0; + for(const auto& alpha : unique_alpha) { + if constexpr (flat_container) + nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, + nd_othr, constraint); + else + nw += alpha.second * + constraint_histogram(alpha.first, ns_othr, nd_othr, constraint); + } + if(nw) constraint_sizes.emplace_back(constraint, nw); + total_work += nw; + } + + size_t local_average = (0.8 * total_work) / world_size; + + for(size_t ilevel = 0; ilevel < nlevels; ++ilevel) { + // Select constraints larger than average to be broken apart + std::vector> tps_to_next; + { + auto it = std::partition( + constraint_sizes.begin(), constraint_sizes.end(), + [=](const auto& a) { return a.second <= local_average; }); + + // Remove constraints from full list + tps_to_next = decltype(tps_to_next)(it, constraint_sizes.end()); + constraint_sizes.erase(it, constraint_sizes.end()); + for(auto [t, s] : tps_to_next) total_work -= s; + } + + if(!tps_to_next.size()) break; + + // Break apart constraints + for(auto [c, nw_trip] : tps_to_next) { + const auto C_min = c.C_min(); + + // Loop over possible constraints with one more element + for(auto q_l = 0; q_l < C_min; ++q_l) { + // Generate masks / counts + string_type cn_C = c.C(); + cn_C.flip(q_l); + string_type cn_B = c.B() >> (C_min - q_l); + constraint_type c_next(cn_C, cn_B, q_l); + + size_t nw = 0; + + for(const auto& alpha : unique_alpha) { + if constexpr (flat_container) + nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, + nd_othr, c_next); + else + nw += alpha.second * + constraint_histogram(alpha.first, ns_othr, nd_othr, c_next); + } + if(nw) constraint_sizes.emplace_back(c_next, nw); + total_work += nw; + } + } + } // Recurse into constraints + + // Sort to get optimal bucket partitioning + std::sort(constraint_sizes.begin(), constraint_sizes.end(), + [](const auto& a, const auto& b) { return a.second > b.second; }); + // Global workloads std::vector workloads(world_size, 0); + // Assign work + std::vector constraints; + constraints.reserve(constraint_sizes.size() / world_size); + + for(auto [c, nw] : constraint_sizes) { + // Get rank with least amount of work + auto min_rank_it = std::min_element(workloads.begin(), workloads.end()); + int min_rank = std::distance(workloads.begin(), min_rank_it); + + // Assign constraint + *min_rank_it += nw; + if(world_rank == min_rank) { + constraints.emplace_back(c); + } + } + + // if(world_rank == 0) + // printf("[rank %2d] AFTER LOCAL WORK = %lu TOTAL WORK = %lu\n", world_rank, + // workloads[world_rank], total_work); + + return constraints; +} +#else +template +auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, + size_t nd_othr, const ContainerType& unique_alpha, + int world_size, size_t nlevel_min = 0, + int64_t nrec_min = -1) { + using wfn_traits = wavefunction_traits; + using constraint_type = alpha_constraint; + using string_type = typename constraint_type::constraint_type; + + constexpr bool flat_container = + std::is_same_v, + std::decay_t>; + // Generate triplets + heuristic - std::vector, size_t>> constraint_sizes; + std::vector> constraint_sizes; constraint_sizes.reserve(norb * norb * norb); +#if 0 size_t total_work = 0; for(int t_i = 0; t_i < norb; ++t_i) for(int t_j = 0; t_j < t_i; ++t_j) for(int t_k = 0; t_k < t_j; ++t_k) { - auto constraint = make_triplet(t_i, t_j, t_k); - const auto& [T, B, _] = constraint; + auto constraint = constraint_type::make_triplet(t_i, t_j, t_k); size_t nw = 0; for(const auto& alpha : unique_alpha) { - nw += constraint_histogram(alpha, ns_othr, nd_othr, T, O, B); + if constexpr(flat_container) + nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, + nd_othr, constraint); + else + nw += alpha.second * constraint_histogram(alpha.first, ns_othr, + nd_othr, constraint); } if(nw) constraint_sizes.emplace_back(constraint, nw); total_work += nw; } - size_t local_average = (0.6 * total_work) / world_size; + size_t local_average = (0.8 * total_work) / world_size; +#else + // Generate all the triplets + for(int t_i = 0; t_i < norb; ++t_i) + for(int t_j = 0; t_j < t_i; ++t_j) + for(int t_k = 0; t_k < t_j; ++t_k) { + auto constraint = constraint_type::make_triplet(t_i, t_j, t_k); + constraint_sizes.emplace_back(constraint, 0ul); + } + + // Build up higher-order constraints as base if requested + if(nrec_min < 0 or + nrec_min >= constraint_sizes.size()) // nrec_min < 0 implies that you want + // all the constraints upfront + for(size_t ilevel = 0; ilevel < nlevel_min; ++ilevel) { + decltype(constraint_sizes) cur_constraints; + cur_constraints.reserve(constraint_sizes.size() * norb); + for(auto [c, nw] : constraint_sizes) { + const auto C_min = c.C_min(); + for(auto q_l = 0; q_l < C_min; ++q_l) { + // Generate masks / counts + string_type cn_C = c.C(); + cn_C.flip(q_l); + string_type cn_B = c.B() >> (C_min - q_l); + constraint_type c_next(cn_C, cn_B, q_l); + cur_constraints.emplace_back(c_next, 0ul); + } + } + constraint_sizes = std::move(cur_constraints); + } + + struct atomic_wrapper { + std::atomic value; + atomic_wrapper(size_t i = 0) : value(i){}; + atomic_wrapper(const atomic_wrapper& other) + : atomic_wrapper(other.value.load()){}; + atomic_wrapper& operator=(const atomic_wrapper& other) { + value.store(other.value.load()); + return *this; + } + }; + + // Compute histogram + const auto ntrip_full = constraint_sizes.size(); + std::vector constraint_work(ntrip_full, 0ul); + { + global_atomic nxtval(MPI_COMM_WORLD); +#pragma omp parallel + { + size_t i_trip = 0; + while(i_trip < ntrip_full) { + i_trip = nxtval.fetch_and_add(1); + if(i_trip >= ntrip_full) break; + // if(!(i_trip%1000)) printf("cgen %lu / %lu\n", i_trip, ntrip_full); + auto& [constraint, __nw] = constraint_sizes[i_trip]; + auto& c_nw = constraint_work[i_trip]; + size_t nw = 0; + for(const auto& alpha : unique_alpha) { + if constexpr(flat_container) + nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, + nd_othr, constraint); + else + nw += alpha.second * constraint_histogram(alpha.first, ns_othr, + nd_othr, constraint); + } + if(nw) c_nw.value.fetch_add(nw); + } + } + } // Scope nxtval + + std::vector constraint_work_bare(ntrip_full); + for(auto i_trip = 0; i_trip < ntrip_full; ++i_trip) { + constraint_work_bare[i_trip] = constraint_work[i_trip].value.load(); + } + allreduce(constraint_work_bare.data(), ntrip_full, MPI_SUM, MPI_COMM_WORLD); + + // Copy over constraint work + for(auto i_trip = 0; i_trip < ntrip_full; ++i_trip) { + constraint_sizes[i_trip].second = constraint_work_bare[i_trip]; + } + + // Remove zeros + { + auto it = std::partition(constraint_sizes.begin(), constraint_sizes.end(), + [](const auto& p) { return p.second > 0; }); + constraint_sizes.erase(it, constraint_sizes.end()); + } + + // Compute average + size_t total_work = + std::accumulate(constraint_sizes.begin(), constraint_sizes.end(), 0ul, + [](auto s, const auto& p) { return s + p.second; }); + size_t local_average = total_work / world_size; + + // Manual refinement of top configurations + if(nrec_min > 0 and nrec_min < constraint_sizes.size()) { + const size_t nleave = constraint_sizes.size() - nrec_min; + std::vector> constraint_to_refine, + constraint_to_leave; + constraint_to_refine.reserve(nrec_min); + constraint_to_refine.reserve(nleave); + + std::copy_n(constraint_sizes.begin(), nrec_min, + std::back_inserter(constraint_to_refine)); + std::copy_n(constraint_sizes.begin() + nrec_min, nleave, + std::back_inserter(constraint_to_leave)); + + // Deallocate original array + decltype(constraint_sizes)().swap(constraint_sizes); + + // Generate refined constraints + for(size_t ilevel = 0; ilevel < nlevel_min; ++ilevel) { + decltype(constraint_sizes) cur_constraints; + cur_constraints.reserve(constraint_to_refine.size() * norb); + for(auto [c, nw] : constraint_to_refine) { + const auto C_min = c.C_min(); + for(auto q_l = 0; q_l < C_min; ++q_l) { + // Generate masks / counts + string_type cn_C = c.C(); + cn_C.flip(q_l); + string_type cn_B = c.B() >> (C_min - q_l); + constraint_type c_next(cn_C, cn_B, q_l); + cur_constraints.emplace_back(c_next, 0ul); + } + } + constraint_to_refine = std::move(cur_constraints); + } + + const size_t nrefine = constraint_to_refine.size(); + + global_atomic nxtval(MPI_COMM_WORLD); + std::vector().swap(constraint_work); + std::vector().swap(constraint_work_bare); + constraint_work.resize(nrefine, 0ul); +#pragma omp parallel + { + size_t i_ref = 0; + while(i_ref < nrefine) { + i_ref = nxtval.fetch_and_add(1); + if(i_ref >= nrefine) break; + // if(!(i_ref%1000)) printf("cgen %lu / %lu\n", i_ref, nrefine); + auto& [constraint, __nw] = constraint_to_refine[i_ref]; + auto& c_nw = constraint_work[i_ref]; + size_t nw = 0; + for(const auto& alpha : unique_alpha) { + if constexpr(flat_container) + nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, + nd_othr, constraint); + else + nw += alpha.second * constraint_histogram(alpha.first, ns_othr, + nd_othr, constraint); + } + if(nw) c_nw.value.fetch_add(nw); + } // constraint "loop" + } // OpenMP Context + + constraint_work_bare.resize(nrefine); + for(auto i_ref = 0; i_ref < nrefine; ++i_ref) { + constraint_work_bare[i_ref] = constraint_work[i_ref].value.load(); + } + allreduce(constraint_work_bare.data(), nrefine, MPI_SUM, MPI_COMM_WORLD); + + // Copy over constraint work + for(auto i_ref = 0; i_ref < nrefine; ++i_ref) { + constraint_to_refine[i_ref].second = constraint_work_bare[i_ref]; + } + + // Remove zeros + { + auto it = std::partition(constraint_to_refine.begin(), + constraint_to_refine.end(), + [](const auto& p) { return p.second > 0; }); + constraint_to_refine.erase(it, constraint_to_refine.end()); + } + + // Concatenate the arrays + constraint_sizes.reserve(nrefine + nleave); + std::copy_n(constraint_to_refine.begin(), nrefine, + std::back_inserter(constraint_sizes)); + std::copy_n(constraint_to_leave.begin(), nleave, + std::back_inserter(constraint_sizes)); + + size_t tmp = + std::accumulate(constraint_sizes.begin(), constraint_sizes.end(), 0ul, + [](auto s, const auto& p) { return s + p.second; }); + if(tmp != total_work) throw std::runtime_error("Incorrect Refinement"); + } // Selective refinement logic + +#endif for(size_t ilevel = 0; ilevel < nlevels; ++ilevel) { // Select constraints larger than average to be broken apart - std::vector, size_t>> tps_to_next; + std::vector> tps_to_next; { auto it = std::partition( constraint_sizes.begin(), constraint_sizes.end(), @@ -534,21 +871,25 @@ auto dist_constraint_general(size_t nlevels, size_t norb, size_t ns_othr, // Break apart constraints for(auto [c, nw_trip] : tps_to_next) { - const auto C_min = c.C_min; + const auto C_min = c.C_min(); // Loop over possible constraints with one more element for(auto q_l = 0; q_l < C_min; ++q_l) { // Generate masks / counts - wfn_constraint c_next = c; - c_next.C.flip(q_l); - c_next.B >>= (C_min - q_l); - c_next.C_min = q_l; + string_type cn_C = c.C(); + cn_C.flip(q_l); + string_type cn_B = c.B() >> (C_min - q_l); + constraint_type c_next(cn_C, cn_B, q_l); size_t nw = 0; for(const auto& alpha : unique_alpha) { - nw += constraint_histogram(alpha, ns_othr, nd_othr, c_next.C, O, - c_next.B); + if constexpr(flat_container) + nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, + nd_othr, c_next); + else + nw += alpha.second * + constraint_histogram(alpha.first, ns_othr, nd_othr, c_next); } if(nw) constraint_sizes.emplace_back(c_next, nw); total_work += nw; @@ -556,30 +897,32 @@ auto dist_constraint_general(size_t nlevels, size_t norb, size_t ns_othr, } } // Recurse into constraints - // if(!world_rank) { - // const auto ntrip = std::count_if(constraint_sizes.begin(), - // constraint_sizes.end(), [](auto &c){ return c.first.C.count() == 3; }); - // printf("[rank 0] NTRIP = %lu\n", ntrip); - // if(nlevels > 0) { - // const auto nquad = std::count_if(constraint_sizes.begin(), - // constraint_sizes.end(), [](auto &c){ return c.first.C.count() == 4; - // }); - // printf("[rank 0] NQUAD = %lu\n", nquad); - // } - // if(nlevels > 1) { - // const auto nquint = std::count_if(constraint_sizes.begin(), - // constraint_sizes.end(), [](auto &c){ return c.first.C.count() == 5; - // }); - // printf("[rank 0] NQINT = %lu\n", nquint); - // } - // } - // Sort to get optimal bucket partitioning std::sort(constraint_sizes.begin(), constraint_sizes.end(), [](const auto& a, const auto& b) { return a.second > b.second; }); + return constraint_sizes; +} + +template +auto dist_constraint_general(size_t nlevels, size_t norb, size_t ns_othr, + size_t nd_othr, const ContainerType& unique_alpha, + MPI_Comm comm) { + using wfn_traits = wavefunction_traits; + using constraint_type = alpha_constraint; + + auto world_rank = comm_rank(comm); + auto world_size = comm_size(comm); + + // Generate constraints subject to expected workload + auto constraint_sizes = gen_constraints_general( + nlevels, norb, ns_othr, nd_othr, unique_alpha, world_size); + + // Global workloads + std::vector workloads(world_size, 0); + // Assign work - std::vector> constraints; + std::vector constraints; constraints.reserve(constraint_sizes.size() / world_size); for(auto [c, nw] : constraint_sizes) { @@ -601,6 +944,7 @@ auto dist_constraint_general(size_t nlevels, size_t norb, size_t ns_othr, return constraints; } #endif +#endif #if 0 template diff --git a/include/macis/asci/pt2.hpp b/include/macis/asci/pt2.hpp new file mode 100644 index 00000000..ab7ba4c3 --- /dev/null +++ b/include/macis/asci/pt2.hpp @@ -0,0 +1,529 @@ +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ + +#pragma once +#include +#include +#include +#include +#include + +namespace macis { + +template +double asci_pt2_constraint(ASCISettings asci_settings, + wavefunction_iterator_t cdets_begin, + wavefunction_iterator_t cdets_end, + const double E_ASCI, const std::vector& C, + size_t norb, const double* T_pq, const double* G_red, + const double* V_red, const double* G_pqrs, + const double* V_pqrs, + HamiltonianGenerator>& ham_gen, + MPI_Comm comm) { + using clock_type = std::chrono::high_resolution_clock; + using duration_type = std::chrono::duration; + using wfn_traits = wavefunction_traits>; + using spin_wfn_type = spin_wfn_t>; + using spin_wfn_traits = wavefunction_traits; + using wfn_comp = typename wfn_traits::spin_comparator; + if(!std::is_sorted(cdets_begin, cdets_end, wfn_comp{})) + throw std::runtime_error("PT2 Only Works with Sorted Wfns"); + + auto world_rank = comm_rank(comm); + auto world_size = comm_size(comm); + auto logger = spdlog::get("asci_pt2"); + if(!logger) + logger = world_rank ? spdlog::null_logger_mt("asci_pt2") + : spdlog::stdout_color_mt("asci_pt2"); + + const size_t ncdets = std::distance(cdets_begin, cdets_end); + logger->info("[ASCI PT2 Settings]"); + logger->info(" * NDETS = {}", ncdets); + logger->info(" * PT2_TOL = {}", asci_settings.pt2_tol); + logger->info(" * PT2_RESERVE_COUNT = {}", + asci_settings.pt2_reserve_count); + logger->info(" * PT2_CONSTRAINT_LVL_MAX = {}", + asci_settings.pt2_max_constraint_level); + logger->info(" * PT2_CONSTRAINT_LVL_MIN = {}", + asci_settings.pt2_min_constraint_level); + logger->info(" * PT2_CNSTRNT_RFNE_FORCE = {}", + asci_settings.pt2_constraint_refine_force); + logger->info(" * PT2_PRUNE = {}", asci_settings.pt2_prune); + logger->info(" * PT2_PRECOMP_EPS = {}", + asci_settings.pt2_precompute_eps); + logger->info(" * PT2_BIGCON_THRESH = {}", + asci_settings.pt2_bigcon_thresh); + logger->info(" * NXTVAL_BCOUNT_THRESH = {}", + asci_settings.nxtval_bcount_thresh); + logger->info(" * NXTVAL_BCOUNT_INC = {}", + asci_settings.nxtval_bcount_inc); + logger->info(""); + + // For each unique alpha, create a list of beta string and store metadata + struct beta_coeff_data { + spin_wfn_type beta_string; + std::vector occ_beta; + std::vector vir_beta; + std::vector orb_ens_alpha; + std::vector orb_ens_beta; + double coeff; + double h_diag; + + size_t mem() const { + return sizeof(spin_wfn_type) + + (occ_beta.capacity() + vir_beta.capacity()) * sizeof(uint8_t) + + (2 + orb_ens_alpha.capacity() + orb_ens_beta.capacity()) * + sizeof(double); + } + + beta_coeff_data(double c, size_t norb, + const std::vector& occ_alpha, wfn_t w, + const HamiltonianGenerator>& ham_gen, bool pce, + bool pci) { + coeff = c; + + beta_string = wfn_traits::beta_string(w); + + // Compute diagonal matrix element + h_diag = ham_gen.matrix_element(w, w); + + // Compute occ/vir for beta string + std::vector o_32, v_32; + if(pce or pci) { + spin_wfn_traits::state_to_occ_vir(norb, beta_string, o_32, v_32); + occ_beta.resize(o_32.size()); + std::copy(o_32.begin(), o_32.end(), occ_beta.begin()); + vir_beta.resize(v_32.size()); + std::copy(v_32.begin(), v_32.end(), vir_beta.begin()); + } + + // Precompute orbital energies + if(pce) { + orb_ens_alpha = ham_gen.single_orbital_ens(norb, occ_alpha, o_32); + orb_ens_beta = ham_gen.single_orbital_ens(norb, o_32, occ_alpha); + } + } + }; + + auto uniq_alpha = get_unique_alpha(cdets_begin, cdets_end); + const size_t nuniq_alpha = uniq_alpha.size(); + logger->info(" * NUNIQ_ALPHA = {}", nuniq_alpha); + std::vector uniq_alpha_ioff(nuniq_alpha); + std::transform_exclusive_scan( + uniq_alpha.begin(), uniq_alpha.end(), uniq_alpha_ioff.begin(), 0ul, + std::plus(), [](const auto& p) { return p.second; }); + + using unique_alpha_data = std::vector; + std::vector uad(nuniq_alpha); + for(auto i = 0, iw = 0; i < nuniq_alpha; ++i) { + std::vector occ_alpha, vir_alpha; + spin_wfn_traits::state_to_occ_vir(norb, uniq_alpha[i].first, occ_alpha, + vir_alpha); + + const auto nbeta = uniq_alpha[i].second; + uad[i].reserve(nbeta); + for(auto j = 0; j < nbeta; ++j, ++iw) { + const auto& w = *(cdets_begin + iw); + uad[i].emplace_back(C[iw], norb, occ_alpha, w, ham_gen, + asci_settings.pt2_precompute_eps, + asci_settings.pt2_precompute_idx); + } + } + + if(world_rank == 0) { + constexpr double gib = 1024 * 1024 * 1024; + logger->info("MEM REQ DETS = {:.2e}", ncdets * sizeof(wfn_t) / gib); + logger->info("MEM REQ C = {:.2e}", ncdets * sizeof(double) / gib); + size_t mem_alpha = 0; + for(auto i = 0ul; i < nuniq_alpha; ++i) { + mem_alpha += sizeof(spin_wfn_type); + for(auto j = 0ul; j < uad[i].size(); ++j) { + mem_alpha += uad[i][j].mem(); + } + } + logger->info("MEM REQ ALPH = {:.2e}", mem_alpha / gib); + logger->info( + "MEM REQ CONT = {:.2e}", + asci_settings.pt2_reserve_count * sizeof(asci_contrib>) / gib); + } + MPI_Barrier(comm); + + const auto n_occ_alpha = spin_wfn_traits::count(uniq_alpha[0].first); + const auto n_vir_alpha = norb - n_occ_alpha; + const auto n_sing_alpha = n_occ_alpha * n_vir_alpha; + const auto n_doub_alpha = (n_sing_alpha * (n_sing_alpha - norb + 1)) / 4; + + const auto n_occ_beta = cdets_begin->count() - n_occ_alpha; + const auto n_vir_beta = norb - n_occ_beta; + const auto n_sing_beta = n_occ_beta * n_vir_beta; + const auto n_doub_beta = (n_sing_beta * (n_sing_beta - norb + 1)) / 4; + + logger->info(" * NS = {} ND = {}", n_sing_alpha, n_doub_alpha); + + auto gen_c_st = clock_type::now(); + // auto constraints = dist_constraint_general>( + // 5, norb, n_sing_beta, n_doub_beta, uniq_alpha, comm); + auto constraints = gen_constraints_general>( + asci_settings.pt2_max_constraint_level, norb, n_sing_beta, n_doub_beta, + uniq_alpha, world_size * omp_get_max_threads(), + asci_settings.pt2_min_constraint_level, + asci_settings.pt2_constraint_refine_force); + auto gen_c_en = clock_type::now(); + duration_type gen_c_dur = gen_c_en - gen_c_st; + logger->info(" * GEN_DUR = {:.2e} ms", gen_c_dur.count()); + // if(!world_rank) { + // std::ofstream c_file("constraint_work.txt"); + // std::stringstream ss; + // for(auto [c,s] : constraints) { + // ss << c.C() << " " << s << std::endl; + // } + // auto str = ss.str(); + // c_file.write(str.c_str(), str.size()); + // } + // if(!world_rank) { + // std::ofstream c_file("unique_alpha.txt"); + // std::stringstream ss; + // for(size_t i = 0; i < nuniq_alpha; ++i) { + // ss << uniq_alpha[i].first << " " << uniq_alpha[i].second << std::endl; + // } + // auto str = ss.str(); + // c_file.write(str.c_str(), str.size()); + // } + + double EPT2 = 0.0; + size_t NPT2 = 0; + + const size_t ncon_total = constraints.size(); + const size_t ncon_big = asci_settings.pt2_bigcon_thresh; + const size_t ncon_small = ncon_total - ncon_big; + + // Global atomic task-id counter + global_atomic nxtval_big(comm, 0); + global_atomic nxtval_small(comm, ncon_big); + const double h_el_tol = asci_settings.pt2_tol; + + auto pt2_st = clock_type::now(); + // Assign each "big" constraint to an MPI rank, thread over contributions + { + size_t ic = 0; + while(ic < ncon_big) { + // Atomically get the next task ID and increment for other + // MPI ranks + ic = nxtval_big.fetch_and_add(1); + if(ic >= ncon_big) continue; + if(asci_settings.pt2_print_progress) + printf("[pt2_big rank %4d] %10lu / %10lu\n", world_rank, ic, + ncon_total); + const auto& con = constraints[ic].first; + + asci_contrib_container> asci_pairs_con; +#pragma omp parallel + { + asci_contrib_container> asci_pairs; +#pragma omp for schedule(dynamic) + for(size_t i_alpha = 0; i_alpha < nuniq_alpha; ++i_alpha) { + const size_t old_pair_size = asci_pairs.size(); + const auto& alpha_det = uniq_alpha[i_alpha].first; + const auto ncon_alpha = constraint_histogram(alpha_det, 1, 1, con); + if(!ncon_alpha) continue; + const auto occ_alpha = bits_to_indices(alpha_det); + const bool alpha_satisfies_con = satisfies_constraint(alpha_det, con); + + const auto& bcd = uad[i_alpha]; + const size_t nbeta = bcd.size(); + for(size_t j_beta = 0; j_beta < nbeta; ++j_beta) { + const size_t iw = uniq_alpha_ioff[i_alpha] + j_beta; + const auto w = *(cdets_begin + iw); + const auto c = C[iw]; + const auto& beta_det = bcd[j_beta].beta_string; + const auto h_diag = bcd[j_beta].h_diag; + +// TODO: These copies are slow +#if 0 + const auto& occ_beta_8 = bcd[j_beta].occ_beta; + const auto& vir_beta_8 = bcd[j_beta].vir_beta; + std::vector occ_beta(occ_beta_8.size()), vir_beta(vir_beta_8.size()); + std::copy(occ_beta_8.begin(), occ_beta_8.end(), occ_beta.begin()); + std::copy(vir_beta_8.begin(), vir_beta_8.end(), vir_beta.begin()); +#else + std::vector occ_beta, vir_beta; + spin_wfn_traits::state_to_occ_vir(norb, beta_det, occ_beta, + vir_beta); +#endif + + std::vector orb_ens_alpha, orb_ens_beta; + if(asci_settings.pt2_precompute_eps) { + orb_ens_alpha = bcd[j_beta].orb_ens_alpha; + orb_ens_beta = bcd[j_beta].orb_ens_beta; + } else { + orb_ens_alpha = + ham_gen.single_orbital_ens(norb, occ_alpha, occ_beta); + orb_ens_beta = + ham_gen.single_orbital_ens(norb, occ_beta, occ_alpha); + } + + // AA excitations + generate_constraint_singles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), T_pq, + norb, G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + // AAAA excitations + generate_constraint_doubles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), G_pqrs, + norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + + // AABB excitations + generate_constraint_doubles_contributions_os( + c, w, con, occ_alpha, occ_beta, vir_beta, orb_ens_alpha.data(), + orb_ens_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + if(alpha_satisfies_con) { + // BB excitations + append_singles_asci_contributions( + c, w, beta_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), T_pq, norb, G_red, norb, V_red, norb, + h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + + // BBBB excitations + append_ss_doubles_asci_contributions( + c, w, beta_det, alpha_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + // No excitation (push inf to remove from list) + asci_pairs.push_back( + {w, std::numeric_limits::infinity(), 1.0}); + } + } +#if 0 + if(asci_settings.pt2_prune and asci_pairs.size() > asci_settings.pt2_reserve_count and asci_pairs.size() != old_pair_size) { + // Cleanup + auto uit = stable_sort_and_accumulate_asci_pairs(asci_pairs.begin(), + asci_pairs.end()); + asci_pairs.erase(uit, asci_pairs.end()); + //uit = std::stable_partition(asci_pairs.begin(), asci_pairs.end(), [&](const auto& p){ return std::abs(p.pt2()) > h_el_tol; }); + //asci_pairs.erase(uit, asci_pairs.end()); + printf("[pt2_prune rank %4d tid:%4d] IC = %lu / %lu IA = %lu / %lu SZ = %lu\n", world_rank, + omp_get_thread_num(), ic, ncon_total, i_alpha, + nuniq_alpha, asci_pairs.size()); + } +#endif + + } // Unique Alpha Loop + + // S&A Thread local pairs + sort_and_accumulate_asci_pairs(asci_pairs); + +// Insert +#pragma omp critical + { + if(asci_pairs_con.size()) { + asci_pairs_con.reserve(asci_pairs.size() + asci_pairs_con.size()); + asci_pairs_con.insert(asci_pairs_con.end(), asci_pairs.begin(), + asci_pairs.end()); + } else { + asci_pairs_con = std::move(asci_pairs); + } + } + + } // OpenMP + + double EPT2_local = 0.0; + size_t NPT2_local = 0; + size_t pair_size = 0; + // Local S&A for each quad + update EPT2 + { + auto uit = sort_and_accumulate_asci_pairs(asci_pairs_con.begin(), + asci_pairs_con.end()); + pair_size = std::distance(asci_pairs_con.begin(), uit); + for(auto it = asci_pairs_con.begin(); it != uit; ++it) { + if(!std::isinf(it->c_times_matel)) { + EPT2_local += it->pt2(); + NPT2_local++; + } + } + asci_pairs_con.clear(); + if(asci_settings.pt2_print_progress) + printf("[pt2_big rank %4d] CAPACITY %lu SZ %lu\n", world_rank, + asci_pairs_con.capacity(), pair_size); + } + + EPT2 += EPT2_local; + NPT2 += NPT2_local; + } // Constraint "loop" + } // "Big constraints" + + // Parallelize over both MPI + threads for "small" constraints +#pragma omp parallel reduction(+ : EPT2) reduction(+ : NPT2) + { + // Process ASCI pair contributions for each constraint + asci_contrib_container> asci_pairs; + // asci_pairs.reserve(asci_settings.pt2_reserve_count); + size_t ic = 0; + while(ic < ncon_total) { + // Atomically get the next task ID and increment for other + // MPI ranks and threads + size_t ntake = ic < asci_settings.nxtval_bcount_thresh + ? 1 + : asci_settings.nxtval_bcount_inc; + ic = nxtval_small.fetch_and_add(ntake); + + // Loop over assigned tasks + const size_t c_end = std::min(ncon_total, ic + ntake); + for(; ic < c_end; ++ic) { + const auto& con = constraints[ic].first; + if(asci_settings.pt2_print_progress) + printf("[pt2_small rank %4d tid:%4d] %10lu / %10lu\n", world_rank, + omp_get_thread_num(), ic, ncon_total); + + for(size_t i_alpha = 0; i_alpha < nuniq_alpha; ++i_alpha) { + const size_t old_pair_size = asci_pairs.size(); + const auto& alpha_det = uniq_alpha[i_alpha].first; + const auto ncon_alpha = constraint_histogram(alpha_det, 1, 1, con); + if(!ncon_alpha) continue; + const auto occ_alpha = bits_to_indices(alpha_det); + const bool alpha_satisfies_con = satisfies_constraint(alpha_det, con); + + const auto& bcd = uad[i_alpha]; + const size_t nbeta = bcd.size(); + for(size_t j_beta = 0; j_beta < nbeta; ++j_beta) { + const size_t iw = uniq_alpha_ioff[i_alpha] + j_beta; + const auto w = *(cdets_begin + iw); + const auto c = C[iw]; + const auto& beta_det = bcd[j_beta].beta_string; + const auto h_diag = bcd[j_beta].h_diag; + +// TODO: These copies are slow +#if 0 + const auto& occ_beta_8 = bcd[j_beta].occ_beta; + const auto& vir_beta_8 = bcd[j_beta].vir_beta; + std::vector occ_beta(occ_beta_8.size()), vir_beta(vir_beta_8.size()); + std::copy(occ_beta_8.begin(), occ_beta_8.end(), occ_beta.begin()); + std::copy(vir_beta_8.begin(), vir_beta_8.end(), vir_beta.begin()); +#else + std::vector occ_beta, vir_beta; + spin_wfn_traits::state_to_occ_vir(norb, beta_det, occ_beta, + vir_beta); +#endif + + std::vector orb_ens_alpha, orb_ens_beta; + if(asci_settings.pt2_precompute_eps) { + orb_ens_alpha = bcd[j_beta].orb_ens_alpha; + orb_ens_beta = bcd[j_beta].orb_ens_beta; + } else { + orb_ens_alpha = + ham_gen.single_orbital_ens(norb, occ_alpha, occ_beta); + orb_ens_beta = + ham_gen.single_orbital_ens(norb, occ_beta, occ_alpha); + } + + // AA excitations + generate_constraint_singles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), T_pq, + norb, G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + // AAAA excitations + generate_constraint_doubles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), G_pqrs, + norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + + // AABB excitations + generate_constraint_doubles_contributions_os( + c, w, con, occ_alpha, occ_beta, vir_beta, orb_ens_alpha.data(), + orb_ens_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + if(alpha_satisfies_con) { + // BB excitations + append_singles_asci_contributions( + c, w, beta_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), T_pq, norb, G_red, norb, V_red, norb, + h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + + // BBBB excitations + append_ss_doubles_asci_contributions( + c, w, beta_det, alpha_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + // No excitation (push inf to remove from list) + asci_pairs.push_back( + {w, std::numeric_limits::infinity(), 1.0}); + } + } + if(asci_settings.pt2_prune and + asci_pairs.size() > asci_settings.pt2_reserve_count and + asci_pairs.size() != old_pair_size) { + // Cleanup + auto uit = stable_sort_and_accumulate_asci_pairs(asci_pairs.begin(), + asci_pairs.end()); + asci_pairs.erase(uit, asci_pairs.end()); + // uit = std::stable_partition(asci_pairs.begin(), asci_pairs.end(), + // [&](const auto& p){ return std::abs(p.pt2()) > h_el_tol; }); + // asci_pairs.erase(uit, asci_pairs.end()); + if(asci_settings.pt2_print_progress) + printf( + "[pt2_prune rank %4d tid:%4d] IC = %lu / %lu IA = %lu / %lu " + "SZ = %lu\n", + world_rank, omp_get_thread_num(), ic, ncon_total, i_alpha, + nuniq_alpha, asci_pairs.size()); + + if(asci_pairs.size() > asci_settings.pt2_reserve_count) { + printf("* WARNING: PRUNED SIZE LARGER THAN RESERVE COUNT\n"); + } + } + + } // Unique Alpha Loop + + double EPT2_local = 0.0; + size_t NPT2_local = 0; + // Local S&A for each quad + update EPT2 + { + auto uit = sort_and_accumulate_asci_pairs(asci_pairs.begin(), + asci_pairs.end()); + for(auto it = asci_pairs.begin(); it != uit; ++it) { + if(!std::isinf(it->c_times_matel)) { + EPT2_local += it->pt2(); + NPT2_local++; + } + } + asci_pairs.clear(); + // Deallocate + if(asci_pairs.capacity() > asci_settings.pt2_reserve_count) + asci_contrib_container>().swap(asci_pairs); + } + + EPT2 += EPT2_local; + NPT2 += NPT2_local; + } // Loc constraint loop + } // Constraint Loop + } // OpenMP + auto pt2_en = clock_type::now(); + + EPT2 = allreduce(EPT2, MPI_SUM, comm); + + double local_pt2_dur = duration_type(pt2_en - pt2_st).count(); + if(world_size > 1) { + double total_dur = allreduce(local_pt2_dur, MPI_SUM, comm); + double min_dur = allreduce(local_pt2_dur, MPI_MIN, comm); + double max_dur = allreduce(local_pt2_dur, MPI_MAX, comm); + logger->info("* PT2_DUR MIN = {:.2e}, MAX = {:.2e}, AVG = {:.2e} ms", + min_dur, max_dur, total_dur / world_size); + } else { + logger->info("* PT2_DUR = ${:.2e} ms", local_pt2_dur); + } + + NPT2 = allreduce(NPT2, MPI_SUM, comm); + logger->info("* NPT2 = {}", NPT2); + + return EPT2; +} +} // namespace macis diff --git a/include/macis/asci/refine.hpp b/include/macis/asci/refine.hpp index 4a1f53e6..34701c5b 100644 --- a/include/macis/asci/refine.hpp +++ b/include/macis/asci/refine.hpp @@ -14,7 +14,7 @@ namespace macis { template auto asci_refine(ASCISettings asci_settings, MCSCFSettings mcscf_settings, double E0, std::vector> wfn, std::vector X, - HamiltonianGenerator& ham_gen, + HamiltonianGenerator>& ham_gen, size_t norb MACIS_MPI_CODE(, MPI_Comm comm)) { auto logger = spdlog::get("asci_refine"); #ifdef MACIS_ENABLE_MPI diff --git a/include/macis/bitset_operations.hpp b/include/macis/bitset_operations.hpp index e0f5ec27..66a56a59 100644 --- a/include/macis/bitset_operations.hpp +++ b/include/macis/bitset_operations.hpp @@ -86,7 +86,8 @@ template uint128_t to_uint128(std::bitset bits) { static_assert(N <= 128, "N > 128"); if constexpr(N == 128) { - auto _x = reinterpret_cast(&bits); + alignas(alignof(uint128_t)) std::bitset cpy = bits; + auto _x = reinterpret_cast(&cpy); return *_x; } else { return fast_to_ullong(bits); @@ -156,12 +157,15 @@ uint32_t ffs(std::bitset bits) { return ffsl(fast_to_ulong(bits)); else if constexpr(N <= 64) return ffsll(fast_to_ullong(bits)); - else if constexpr(N <= 128) { + else if constexpr(N % 64 == 0) { auto as_words = reinterpret_cast(&bits); - if(as_words[0]) - return ffsll(as_words[0]); - else - return ffsll(as_words[1]) + 64; + constexpr int n_words = N / 64; + int off = 0; + for(int i = 0; i < n_words; ++i) { + if(as_words[i]) return ffsll(as_words[i]) + off; + off += 64; + } + return 0; } else { uint32_t ind = 0; for(ind = 0; ind < N; ++ind) @@ -187,17 +191,20 @@ uint32_t fls(std::bitset bits) { return fls(fast_to_ulong(bits)); else if constexpr(N <= 64) return fls(fast_to_ullong(bits)); - else if constexpr(N <= 128) { + else if constexpr(N % 64 == 0) { auto as_words = reinterpret_cast(&bits); - if(as_words[1]) - return fls(as_words[1]) + 64; - else - return fls(as_words[0]); + constexpr int n_words = N / 64; + int off = N - 64; + for(int i = n_words - 1; i >= 0; --i) { + if(as_words[i]) return fls(as_words[i]) + off; + off -= 64; + } + return UINT32_MAX; } else { uint32_t ind = 0; for(ind = N - 1; ind >= 0; ind--) if(bits[ind]) return ind; - return ind; + return UINT32_MAX; } abort(); } @@ -224,6 +231,7 @@ std::vector bits_to_indices(std::bitset bits) { return indices; } +#if 0 /// Truncate a bitset to one of smaller width template inline std::bitset truncate_bitset(std::bitset bits) { @@ -242,6 +250,7 @@ inline std::bitset truncate_bitset(std::bitset bits) { return trunc_bits; } } +#endif /// Expand a bitset to one of larger width template @@ -264,24 +273,68 @@ inline std::bitset expand_bitset(std::bitset bits) { /// Extract to lo word of a bitset of even width template inline std::bitset bitset_lo_word(std::bitset bits) { - static_assert(N == 128 or N == 64, "Not Supported"); - if constexpr(N == 128) { - return std::bitset<64>(reinterpret_cast(&bits)[0]); - } + static_assert((N % 64 == 0) and (N == 64 or N / 2 % 64 == 0), + "Not Supported"); if constexpr(N == 64) { return std::bitset<32>(reinterpret_cast(&bits)[0]); + } else { + std::bitset lo; + constexpr int nword = (N / 2) / 64; + auto lo_as_words = reinterpret_cast(&lo); + auto bi_as_words = reinterpret_cast(&bits); + for(int i = 0; i < nword; ++i) lo_as_words[i] = bi_as_words[i]; + return lo; + } +} + +template +inline void set_bitset_lo_word(std::bitset& bits, std::bitset word) { + static_assert((N % 64 == 0) and (N == 64 or N / 2 % 64 == 0), + "Not Supported"); + + if constexpr(N == 64) { + auto bi_as_words = reinterpret_cast(&bits); + auto wo_as_words = reinterpret_cast(&word); + bi_as_words[0] = wo_as_words[0]; + } else { + constexpr int nword = (N / 2) / 64; + auto bi_as_words = reinterpret_cast(&bits); + auto wo_as_words = reinterpret_cast(&word); + for(int i = 0; i < nword; ++i) bi_as_words[i] = wo_as_words[i]; } } /// Extract to hi word of a bitset of even width template inline std::bitset bitset_hi_word(std::bitset bits) { - static_assert(N == 128 or N == 64, "Not Supported"); - if constexpr(N == 128) { - return std::bitset<64>(reinterpret_cast(&bits)[1]); - } + static_assert((N % 64 == 0) and (N == 64 or N / 2 % 64 == 0), + "Not Supported"); if constexpr(N == 64) { return std::bitset<32>(reinterpret_cast(&bits)[1]); + } else { + std::bitset hi; + constexpr int nword = (N / 2) / 64; + auto hi_as_words = reinterpret_cast(&hi); + auto bi_as_words = reinterpret_cast(&bits); + for(int i = 0; i < nword; ++i) hi_as_words[i] = bi_as_words[i + nword]; + return hi; + } +} + +template +inline void set_bitset_hi_word(std::bitset& bits, std::bitset word) { + static_assert((N % 64 == 0) and (N == 64 or N / 2 % 64 == 0), + "Not Supported"); + + if constexpr(N == 64) { + auto bi_as_words = reinterpret_cast(&bits); + auto wo_as_words = reinterpret_cast(&word); + bi_as_words[1] = wo_as_words[0]; + } else { + constexpr int nword = (N / 2) / 64; + auto bi_as_words = reinterpret_cast(&bits); + auto wo_as_words = reinterpret_cast(&word); + for(int i = 0; i < nword; ++i) bi_as_words[i + nword] = wo_as_words[i]; } } @@ -293,9 +346,19 @@ bool bitset_less(std::bitset x, std::bitset y) { else if constexpr(N <= 64) return fast_to_ullong(x) < fast_to_ullong(y); else if constexpr(N == 128) { - auto _x = reinterpret_cast(&x); - auto _y = reinterpret_cast(&y); - return *_x < *_y; + auto _x = to_uint128(x); + auto _y = to_uint128(y); + return _x < _y; + } else if constexpr(N % 64 == 0) { + auto x_as_words = reinterpret_cast(&x); + auto y_as_words = reinterpret_cast(&y); + constexpr auto nwords = N / 64; + for(int i = nwords - 1; i >= 0; --i) { + const auto x_i = x_as_words[i]; + const auto y_i = y_as_words[i]; + if(x_i != y_i) return x_i < y_i; + } + return false; } else { for(int i = N - 1; i >= 0; i--) { if(x[i] ^ y[i]) return y[i]; diff --git a/include/macis/csr_hamiltonian.hpp b/include/macis/csr_hamiltonian.hpp index 2a9dc24b..cea646cd 100644 --- a/include/macis/csr_hamiltonian.hpp +++ b/include/macis/csr_hamiltonian.hpp @@ -18,12 +18,12 @@ #include namespace macis { -// Base implementation of bitset CSR generation -template +// Base implementation of CSR hamiltonian generation +template sparsexx::csr_matrix make_csr_hamiltonian_block( - wavefunction_iterator_t bra_begin, wavefunction_iterator_t bra_end, - wavefunction_iterator_t ket_begin, wavefunction_iterator_t ket_end, - HamiltonianGenerator& ham_gen, double H_thresh) { + WfnIterator bra_begin, WfnIterator bra_end, WfnIterator ket_begin, + WfnIterator ket_end, HamiltonianGenerator& ham_gen, + double H_thresh) { size_t nbra = std::distance(bra_begin, bra_end); size_t nket = std::distance(ket_begin, ket_end); @@ -35,21 +35,21 @@ sparsexx::csr_matrix make_csr_hamiltonian_block( } } -template +template sparsexx::csr_matrix make_csr_hamiltonian( - wavefunction_iterator_t sd_begin, wavefunction_iterator_t sd_end, - HamiltonianGenerator& ham_gen, double H_thresh) { + WfnIterator sd_begin, WfnIterator sd_end, + HamiltonianGenerator& ham_gen, double H_thresh) { return make_csr_hamiltonian_block(sd_begin, sd_end, sd_begin, sd_end, ham_gen, H_thresh); } #ifdef MACIS_ENABLE_MPI // Base implementation of dist-CSR H construction for bitsets -template +template sparsexx::dist_sparse_matrix> -make_dist_csr_hamiltonian(MPI_Comm comm, wavefunction_iterator_t sd_begin, - wavefunction_iterator_t sd_end, - HamiltonianGenerator& ham_gen, +make_dist_csr_hamiltonian(MPI_Comm comm, WfnIterator sd_begin, + WfnIterator sd_end, + HamiltonianGenerator& ham_gen, const double H_thresh) { using namespace sparsexx; using namespace sparsexx::detail; @@ -69,7 +69,7 @@ make_dist_csr_hamiltonian(MPI_Comm comm, wavefunction_iterator_t sd_begin, if(world_size > 1) { // Create a copy of SD's with local bra dets zero'd out - std::vector> sds_offdiag(sd_begin, sd_end); + std::vector sds_offdiag(sd_begin, sd_end); for(auto i = bra_st; i < bra_en; ++i) sds_offdiag[i] = 0ul; // Build off-diagonal part diff --git a/include/macis/hamiltonian_generator.hpp b/include/macis/hamiltonian_generator.hpp index 9ef9583b..4c828ded 100644 --- a/include/macis/hamiltonian_generator.hpp +++ b/include/macis/hamiltonian_generator.hpp @@ -7,61 +7,19 @@ */ #pragma once -#include +#include #include -#include -#include namespace macis { -template -class HamiltonianGenerator { - static_assert(N % 2 == 0, "N Must Be Even"); - +template +class HamiltonianGenerator : public HamiltonianGeneratorBase { public: - constexpr static size_t nbits = N; - - using full_det_t = std::bitset; - using spin_det_t = std::bitset; - - template - using sparse_matrix_type = sparsexx::csr_matrix; - - using full_det_iterator = wavefunction_iterator_t; - - using matrix_span_t = matrix_span; - using rank3_span_t = rank3_span; - using rank4_span_t = rank4_span; - - public: - inline spin_det_t alpha_string(full_det_t str) { return bitset_lo_word(str); } - inline spin_det_t beta_string(full_det_t str) { return bitset_hi_word(str); } - - size_t norb_; - size_t norb2_; - size_t norb3_; - matrix_span_t T_pq_; - rank4_span_t V_pqrs_; - - // G(i,j,k,l) = (ij|kl) - (il|kj) - std::vector G_pqrs_data_; - rank4_span_t G_pqrs_; - - // G_red(i,j,k) = G(i,j,k,k) - std::vector G_red_data_; - rank3_span_t G_red_; - - // V_red(i,j,k) = (ij|kk) - std::vector V_red_data_; - rank3_span_t V_red_; - - // G2_red(i,j) = 0.5 * G(i,i,j,j) - std::vector G2_red_data_; - matrix_span_t G2_red_; + using full_det_t = WfnType; + using spin_det_t = spin_wfn_t; - // V2_red(i,j) = (ii|jj) - std::vector V2_red_data_; - matrix_span_t V2_red_; + using full_det_container = std::vector; + using full_det_iterator = typename full_det_container::iterator; virtual sparse_matrix_type make_csr_hamiltonian_block_32bit_( full_det_iterator, full_det_iterator, full_det_iterator, @@ -72,16 +30,10 @@ class HamiltonianGenerator { full_det_iterator, double) = 0; public: - HamiltonianGenerator(matrix_span_t T, rank4_span_t V); - virtual ~HamiltonianGenerator() noexcept = default; - - void generate_integral_intermediates(rank4_span_t V); + HamiltonianGenerator(matrix_span_t T, rank4_span_t V) + : HamiltonianGeneratorBase(T, V){}; - inline auto* T() const { return T_pq_.data_handle(); } - inline auto* G_red() const { return G_red_data_.data(); } - inline auto* V_red() const { return V_red_data_.data(); } - inline auto* G() const { return G_pqrs_data_.data(); } - inline auto* V() const { return V_pqrs_.data_handle(); } + virtual ~HamiltonianGenerator() noexcept = default; double matrix_element_4(spin_det_t bra, spin_det_t ket, spin_det_t ex) const; double matrix_element_22(spin_det_t bra_alpha, spin_det_t ket_alpha, @@ -101,42 +53,6 @@ class HamiltonianGenerator { const std::vector& bra_occ_alpha, const std::vector& bra_occ_beta) const; - double single_orbital_en(uint32_t orb, const std::vector& ss_occ, - const std::vector& os_occ) const; - - std::vector single_orbital_ens( - size_t norb, const std::vector& ss_occ, - const std::vector& os_occ) const; - - double fast_diag_single(const std::vector& ss_occ, - const std::vector& os_occ, uint32_t orb_hol, - uint32_t orb_par, double orig_det_Hii) const; - - double fast_diag_single(double hol_en, double par_en, uint32_t orb_hol, - uint32_t orb_par, double orig_det_Hii) const; - - double fast_diag_ss_double(double en_hol1, double en_hol2, double en_par1, - double en_par2, uint32_t orb_hol1, - uint32_t orb_hol2, uint32_t orb_par1, - uint32_t orb_par2, double orig_det_Hii) const; - - double fast_diag_ss_double(const std::vector& ss_occ, - const std::vector& os_occ, - uint32_t orb_hol1, uint32_t orb_hol2, - uint32_t orb_par1, uint32_t orb_par2, - double orig_det_Hii) const; - - double fast_diag_os_double(double en_holu, double en_hold, double en_paru, - double en_pard, uint32_t orb_holu, - uint32_t orb_hold, uint32_t orb_paru, - uint32_t orb_pard, double orig_det_Hii) const; - - double fast_diag_os_double(const std::vector& up_occ, - const std::vector& do_occ, - uint32_t orb_holu, uint32_t orb_hold, - uint32_t orb_paru, uint32_t orb_pard, - double orig_det_Hii) const; - double matrix_element(full_det_t bra, full_det_t ket) const; template @@ -180,11 +96,9 @@ class HamiltonianGenerator { full_det_iterator, full_det_iterator, double* C, matrix_span_t ordm, rank4_span_t trdm) = 0; - void rotate_hamiltonian_ordm(const double* ordm); - virtual void SetJustSingles(bool /*_js*/) {} virtual bool GetJustSingles() { return false; } - virtual size_t GetNimp() const { return N / 2; } + // virtual size_t GetNimp() const { return N / 2; } }; } // namespace macis diff --git a/include/macis/hamiltonian_generator/base.hpp b/include/macis/hamiltonian_generator/base.hpp new file mode 100644 index 00000000..64dcb67f --- /dev/null +++ b/include/macis/hamiltonian_generator/base.hpp @@ -0,0 +1,106 @@ +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ + +#pragma once +#include +#include + +namespace macis { + +template +class HamiltonianGeneratorBase { + protected: + template + using sparse_matrix_type = sparsexx::csr_matrix; + + using matrix_span_t = matrix_span; + using rank3_span_t = rank3_span; + using rank4_span_t = rank4_span; + + size_t norb_; + size_t norb2_; + size_t norb3_; + matrix_span_t T_pq_; + rank4_span_t V_pqrs_; + + // G(i,j,k,l) = (ij|kl) - (il|kj) + std::vector G_pqrs_data_; + rank4_span_t G_pqrs_; + + // G_red(i,j,k) = G(i,j,k,k) + std::vector G_red_data_; + rank3_span_t G_red_; + + // V_red(i,j,k) = (ij|kk) + std::vector V_red_data_; + rank3_span_t V_red_; + + // G2_red(i,j) = 0.5 * G(i,i,j,j) + std::vector G2_red_data_; + matrix_span_t G2_red_; + + // V2_red(i,j) = (ii|jj) + std::vector V2_red_data_; + matrix_span_t V2_red_; + + void generate_integral_intermediates_(rank4_span_t V); + + public: + HamiltonianGeneratorBase(matrix_span_t T, rank4_span_t V); + virtual ~HamiltonianGeneratorBase() noexcept = default; + + inline auto* T() const { return T_pq_.data_handle(); } + inline auto* G_red() const { return G_red_data_.data(); } + inline auto* V_red() const { return V_red_data_.data(); } + inline auto* G() const { return G_pqrs_data_.data(); } + inline auto* V() const { return V_pqrs_.data_handle(); } + + inline void generate_integral_intermediates() { + generate_integral_intermediates_(V_pqrs_); + } + + double single_orbital_en(uint32_t orb, const std::vector& ss_occ, + const std::vector& os_occ) const; + + std::vector single_orbital_ens( + size_t norb, const std::vector& ss_occ, + const std::vector& os_occ) const; + + double fast_diag_single(const std::vector& ss_occ, + const std::vector& os_occ, uint32_t orb_hol, + uint32_t orb_par, double orig_det_Hii) const; + + double fast_diag_single(double hol_en, double par_en, uint32_t orb_hol, + uint32_t orb_par, double orig_det_Hii) const; + + double fast_diag_ss_double(double en_hol1, double en_hol2, double en_par1, + double en_par2, uint32_t orb_hol1, + uint32_t orb_hol2, uint32_t orb_par1, + uint32_t orb_par2, double orig_det_Hii) const; + + double fast_diag_ss_double(const std::vector& ss_occ, + const std::vector& os_occ, + uint32_t orb_hol1, uint32_t orb_hol2, + uint32_t orb_par1, uint32_t orb_par2, + double orig_det_Hii) const; + + double fast_diag_os_double(double en_holu, double en_hold, double en_paru, + double en_pard, uint32_t orb_holu, + uint32_t orb_hold, uint32_t orb_paru, + uint32_t orb_pard, double orig_det_Hii) const; + + double fast_diag_os_double(const std::vector& up_occ, + const std::vector& do_occ, + uint32_t orb_holu, uint32_t orb_hold, + uint32_t orb_paru, uint32_t orb_pard, + double orig_det_Hii) const; + + void rotate_hamiltonian_ordm(const Scalar* ordm); +}; + +} // namespace macis diff --git a/include/macis/hamiltonian_generator/double_loop.hpp b/include/macis/hamiltonian_generator/double_loop.hpp index 5628a5e6..fe2f85d0 100644 --- a/include/macis/hamiltonian_generator/double_loop.hpp +++ b/include/macis/hamiltonian_generator/double_loop.hpp @@ -13,10 +13,10 @@ namespace macis { -template -class DoubleLoopHamiltonianGenerator : public HamiltonianGenerator { +template +class DoubleLoopHamiltonianGenerator : public HamiltonianGenerator { public: - using base_type = HamiltonianGenerator; + using base_type = HamiltonianGenerator; using full_det_t = typename base_type::full_det_t; using spin_det_t = typename base_type::spin_det_t; using full_det_iterator = typename base_type::full_det_iterator; @@ -31,6 +31,7 @@ class DoubleLoopHamiltonianGenerator : public HamiltonianGenerator { sparse_matrix_type make_csr_hamiltonian_block_( full_det_iterator bra_begin, full_det_iterator bra_end, full_det_iterator ket_begin, full_det_iterator ket_end, double H_thresh) { + using wfn_traits = wavefunction_traits; const size_t nbra_dets = std::distance(bra_begin, bra_end); const size_t nket_dets = std::distance(ket_begin, ket_end); @@ -47,10 +48,10 @@ class DoubleLoopHamiltonianGenerator : public HamiltonianGenerator { const auto bra = *(bra_begin + i); size_t nrow = 0; - if(bra.count()) { + if(wfn_traits::count(bra)) { // Separate out into alpha/beta components - spin_det_t bra_alpha = bitset_lo_word(bra); - spin_det_t bra_beta = bitset_hi_word(bra); + spin_det_t bra_alpha = wfn_traits::alpha_string(bra); + spin_det_t bra_beta = wfn_traits::beta_string(bra); // Get occupied indices bits_to_indices(bra_alpha, bra_occ_alpha); @@ -59,14 +60,14 @@ class DoubleLoopHamiltonianGenerator : public HamiltonianGenerator { // Loop over ket determinants for(size_t j = 0; j < nket_dets; ++j) { const auto ket = *(ket_begin + j); - if(ket.count()) { - spin_det_t ket_alpha = bitset_lo_word(ket); - spin_det_t ket_beta = bitset_hi_word(ket); + if(wfn_traits::count(ket)) { + spin_det_t ket_alpha = wfn_traits::alpha_string(ket); + spin_det_t ket_beta = wfn_traits::beta_string(ket); full_det_t ex_total = bra ^ ket; - if(ex_total.count() <= 4) { - spin_det_t ex_alpha = bitset_lo_word(ex_total); - spin_det_t ex_beta = bitset_hi_word(ex_total); + if(wfn_traits::count(ex_total) <= 4) { + spin_det_t ex_alpha = wfn_traits::alpha_string(ex_total); + spin_det_t ex_beta = wfn_traits::beta_string(ex_total); // Compute Matrix Element const auto h_el = this->matrix_element( @@ -117,6 +118,7 @@ class DoubleLoopHamiltonianGenerator : public HamiltonianGenerator { void form_rdms(full_det_iterator bra_begin, full_det_iterator bra_end, full_det_iterator ket_begin, full_det_iterator ket_end, double *C, matrix_span_t ordm, rank4_span_t trdm) override { + using wfn_traits = wavefunction_traits; const size_t nbra_dets = std::distance(bra_begin, bra_end); const size_t nket_dets = std::distance(ket_begin, ket_end); @@ -126,10 +128,10 @@ class DoubleLoopHamiltonianGenerator : public HamiltonianGenerator { for(size_t i = 0; i < nbra_dets; ++i) { const auto bra = *(bra_begin + i); // if( (i%1000) == 0 ) std::cout << i << std::endl; - if(bra.count()) { + if(wfn_traits::count(bra)) { // Separate out into alpha/beta components - spin_det_t bra_alpha = bitset_lo_word(bra); - spin_det_t bra_beta = bitset_hi_word(bra); + spin_det_t bra_alpha = wfn_traits::alpha_string(bra); + spin_det_t bra_beta = wfn_traits::beta_string(bra); // Get occupied indices bits_to_indices(bra_alpha, bra_occ_alpha); @@ -138,14 +140,14 @@ class DoubleLoopHamiltonianGenerator : public HamiltonianGenerator { // Loop over ket determinants for(size_t j = 0; j < nket_dets; ++j) { const auto ket = *(ket_begin + j); - if(ket.count()) { - spin_det_t ket_alpha = bitset_lo_word(ket); - spin_det_t ket_beta = bitset_hi_word(ket); + if(wfn_traits::count(ket)) { + spin_det_t ket_alpha = wfn_traits::alpha_string(ket); + spin_det_t ket_beta = wfn_traits::beta_string(ket); full_det_t ex_total = bra ^ ket; - if(ex_total.count() <= 4) { - spin_det_t ex_alpha = bitset_lo_word(ex_total); - spin_det_t ex_beta = bitset_hi_word(ex_total); + if(wfn_traits::count(ex_total) <= 4) { + spin_det_t ex_alpha = wfn_traits::alpha_string(ex_total); + spin_det_t ex_beta = wfn_traits::beta_string(ex_total); const double val = C[i] * C[j]; @@ -167,7 +169,7 @@ class DoubleLoopHamiltonianGenerator : public HamiltonianGenerator { public: template DoubleLoopHamiltonianGenerator(Args &&...args) - : HamiltonianGenerator(std::forward(args)...) {} + : HamiltonianGenerator(std::forward(args)...) {} }; } // namespace macis diff --git a/include/macis/hamiltonian_generator/impl.hpp b/include/macis/hamiltonian_generator/impl.hpp index 2ead987b..d4d8e964 100644 --- a/include/macis/hamiltonian_generator/impl.hpp +++ b/include/macis/hamiltonian_generator/impl.hpp @@ -7,70 +7,5 @@ */ #pragma once -#include -namespace macis { - -template -HamiltonianGenerator::HamiltonianGenerator(matrix_span T, - rank4_span_t V) - : norb_(T.extent(0)), - norb2_(norb_ * norb_), - norb3_(norb2_ * norb_), - T_pq_(T), - V_pqrs_(V) { - generate_integral_intermediates(V_pqrs_); -} - -template -void HamiltonianGenerator::generate_integral_intermediates(rank4_span_t V) { - if(V.extent(0) != norb_ or V.extent(1) != norb_ or V.extent(2) != norb_ or - V.extent(3) != norb_) - throw std::runtime_error("V has incorrect dimensions"); - - size_t no = norb_; - size_t no2 = no * no; - size_t no3 = no2 * no; - size_t no4 = no3 * no; - - // G(i,j,k,l) = V(i,j,k,l) - V(i,l,k,j) - G_pqrs_data_ = std::vector(begin(V), end(V)); - G_pqrs_ = rank4_span_t(G_pqrs_data_.data(), no, no, no, no); - for(auto i = 0ul; i < no; ++i) - for(auto j = 0ul; j < no; ++j) - for(auto k = 0ul; k < no; ++k) - for(auto l = 0ul; l < no; ++l) { - G_pqrs_(i, j, k, l) -= V(i, l, k, j); - } - - // G_red(i,j,k) = G(i,j,k,k) = G(k,k,i,j) - // V_red(i,j,k) = V(i,j,k,k) = V(k,k,i,j) - G_red_data_.resize(no3); - V_red_data_.resize(no3); - G_red_ = rank3_span_t(G_red_data_.data(), no, no, no); - V_red_ = rank3_span_t(V_red_data_.data(), no, no, no); - for(auto j = 0ul; j < no; ++j) - for(auto i = 0ul; i < no; ++i) - for(auto k = 0ul; k < no; ++k) { - G_red_(k, i, j) = G_pqrs_(k, k, i, j); - V_red_(k, i, j) = V(k, k, i, j); - } - - // G2_red(i,j) = 0.5 * G(i,i,j,j) - // V2_red(i,j) = V(i,i,j,j) - G2_red_data_.resize(no2); - V2_red_data_.resize(no2); - G2_red_ = matrix_span(G2_red_data_.data(), no, no); - V2_red_ = matrix_span(V2_red_data_.data(), no, no); - for(auto j = 0ul; j < no; ++j) - for(auto i = 0ul; i < no; ++i) { - G2_red_(i, j) = 0.5 * G_pqrs_(i, i, j, j); - V2_red_(i, j) = V(i, i, j, j); - } -} - -} // namespace macis - -#include #include -#include diff --git a/include/macis/hamiltonian_generator/matrix_elements.hpp b/include/macis/hamiltonian_generator/matrix_elements.hpp index 7c232a3f..36c80b9d 100644 --- a/include/macis/hamiltonian_generator/matrix_elements.hpp +++ b/include/macis/hamiltonian_generator/matrix_elements.hpp @@ -11,13 +11,14 @@ namespace macis { -template -double HamiltonianGenerator::matrix_element(full_det_t bra, - full_det_t ket) const { - auto bra_alpha = bitset_lo_word(bra); - auto ket_alpha = bitset_lo_word(ket); - auto bra_beta = bitset_hi_word(bra); - auto ket_beta = bitset_hi_word(ket); +template +double HamiltonianGenerator::matrix_element(full_det_t bra, + full_det_t ket) const { + using wfn_traits = wavefunction_traits; + auto bra_alpha = wfn_traits::alpha_string(bra); + auto ket_alpha = wfn_traits::alpha_string(ket); + auto bra_beta = wfn_traits::beta_string(bra); + auto ket_beta = wfn_traits::beta_string(ket); auto ex_alpha = bra_alpha ^ ket_alpha; auto ex_beta = bra_beta ^ ket_beta; @@ -29,14 +30,15 @@ double HamiltonianGenerator::matrix_element(full_det_t bra, ex_beta, bra_occ_alpha, bra_occ_beta); } -template -double HamiltonianGenerator::matrix_element( +template +double HamiltonianGenerator::matrix_element( spin_det_t bra_alpha, spin_det_t ket_alpha, spin_det_t ex_alpha, spin_det_t bra_beta, spin_det_t ket_beta, spin_det_t ex_beta, const std::vector& bra_occ_alpha, const std::vector& bra_occ_beta) const { - const uint32_t ex_alpha_count = ex_alpha.count(); - const uint32_t ex_beta_count = ex_beta.count(); + using spin_wfn_traits = wavefunction_traits; + const uint32_t ex_alpha_count = spin_wfn_traits::count(ex_alpha); + const uint32_t ex_beta_count = spin_wfn_traits::count(ex_beta); if((ex_alpha_count + ex_beta_count) > 4) return 0.; @@ -62,16 +64,17 @@ double HamiltonianGenerator::matrix_element( return matrix_element_diag(bra_occ_alpha, bra_occ_beta); } -template -double HamiltonianGenerator::matrix_element_4(spin_det_t bra, spin_det_t ket, - spin_det_t ex) const { +template +double HamiltonianGenerator::matrix_element_4(spin_det_t bra, + spin_det_t ket, + spin_det_t ex) const { auto [o1, v1, o2, v2, sign] = doubles_sign_indices(bra, ket, ex); return sign * G_pqrs_(v1, o1, v2, o2); } -template -double HamiltonianGenerator::matrix_element_22( +template +double HamiltonianGenerator::matrix_element_22( spin_det_t bra_alpha, spin_det_t ket_alpha, spin_det_t ex_alpha, spin_det_t bra_beta, spin_det_t ket_beta, spin_det_t ex_beta) const { auto [o1, v1, sign_a] = @@ -83,8 +86,8 @@ double HamiltonianGenerator::matrix_element_22( return sign * V_pqrs_(v1, o1, v2, o2); } -template -double HamiltonianGenerator::matrix_element_2( +template +double HamiltonianGenerator::matrix_element_2( spin_det_t bra, spin_det_t ket, spin_det_t ex, const std::vector& bra_occ_alpha, const std::vector& bra_occ_beta) const { @@ -105,8 +108,8 @@ double HamiltonianGenerator::matrix_element_2( return sign * h_el; } -template -double HamiltonianGenerator::matrix_element_diag( +template +double HamiltonianGenerator::matrix_element_diag( const std::vector& occ_alpha, const std::vector& occ_beta) const { double h_el = 0; diff --git a/include/macis/hamiltonian_generator/sorted_double_loop.hpp b/include/macis/hamiltonian_generator/sorted_double_loop.hpp new file mode 100644 index 00000000..af8760dd --- /dev/null +++ b/include/macis/hamiltonian_generator/sorted_double_loop.hpp @@ -0,0 +1,326 @@ +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ + +#pragma once +#include +#include +#include +#include + +namespace macis { + +template +class SortedDoubleLoopHamiltonianGenerator + : public HamiltonianGenerator { + public: + using base_type = HamiltonianGenerator; + using full_det_t = typename base_type::full_det_t; + using spin_det_t = typename base_type::spin_det_t; + using full_det_iterator = typename base_type::full_det_iterator; + using matrix_span_t = typename base_type::matrix_span_t; + using rank4_span_t = typename base_type::rank4_span_t; + + template + using sparse_matrix_type = sparsexx::csr_matrix; + + protected: + template + sparse_matrix_type make_csr_hamiltonian_block_( + full_det_iterator bra_begin, full_det_iterator bra_end, + full_det_iterator ket_begin, full_det_iterator ket_end, double H_thresh) { + using wfn_traits = wavefunction_traits; + using spin_wfn_type = typename wfn_traits::spin_wfn_type; + using spin_wfn_traits = wavefunction_traits; + const size_t nbra_dets = std::distance(bra_begin, bra_end); + const size_t nket_dets = std::distance(ket_begin, ket_end); + + const bool is_symm = bra_begin == ket_begin and bra_end == ket_end; + auto world_rank = comm_rank(MPI_COMM_WORLD); + + // Get unique alpha strings + auto setup_st = std::chrono::high_resolution_clock::now(); + auto unique_alpha_bra = get_unique_alpha(bra_begin, bra_end); + auto unique_alpha_ket = + is_symm ? unique_alpha_bra : get_unique_alpha(ket_begin, ket_end); + + const size_t nuniq_bra = unique_alpha_bra.size(); + const size_t nuniq_ket = unique_alpha_ket.size(); + + // Compute offsets + std::vector unique_alpha_bra_idx(nuniq_bra + 1); + std::transform_exclusive_scan( + unique_alpha_bra.begin(), unique_alpha_bra.end(), + unique_alpha_bra_idx.begin(), 0ul, std::plus{}, + [](auto& x) { return x.second; }); + std::vector unique_alpha_ket_idx(nuniq_ket + 1); + if(is_symm) { + unique_alpha_ket_idx = unique_alpha_bra_idx; + } else { + std::transform_exclusive_scan( + unique_alpha_ket.begin(), unique_alpha_ket.end(), + unique_alpha_ket_idx.begin(), 0ul, std::plus{}, + [](auto& x) { return x.second; }); + } + + unique_alpha_bra_idx.back() = nbra_dets; + unique_alpha_ket_idx.back() = nket_dets; + auto setup_en = std::chrono::high_resolution_clock::now(); + + // std::cout << "AVERAGE NBETA = " << + // std::accumulate(unique_alpha_bra.begin(), unique_alpha_bra.end(), + // 0ul, [](auto a, auto b){ return a + b.second; }) / double(nuniq_bra) + // << std::endl; + + // Populate COO matrix locally + // sparsexx::coo_matrix coo_mat(nbra_dets, nket_dets, 0, + // 0); + std::vector row_ind, col_ind; + std::vector nz_val; + + // size_t skip1 = 0; + // size_t skip2 = 0; + + auto fast_insert = [](auto& old_vec, auto&& new_vec) { + if(old_vec.size() == 0) + old_vec = std::move(new_vec); + else { + old_vec.reserve(old_vec.size() + new_vec.size()); + old_vec.insert(old_vec.end(), new_vec.begin(), new_vec.end()); + } + }; + // Loop over uniq alphas in bra/ket + auto pop_st = std::chrono::high_resolution_clock::now(); +#pragma omp parallel + { + std::vector row_ind_loc, col_ind_loc; + std::vector nz_val_loc; + std::vector bra_occ_alpha, bra_occ_beta; +#pragma omp for schedule(dynamic) + for(size_t ia_bra = 0; ia_bra < nuniq_bra; ++ia_bra) { + if(unique_alpha_bra[ia_bra].first.any()) { + if(!(ia_bra % 100)) + printf("[ham_gen rank %d] IA_BRA = %lu / %lu\n", world_rank, ia_bra, + nuniq_bra); + // Extract alpha bra + const auto bra_alpha = unique_alpha_bra[ia_bra].first; + const size_t beta_st_bra = unique_alpha_bra_idx[ia_bra]; + const size_t beta_en_bra = unique_alpha_bra_idx[ia_bra + 1]; + spin_wfn_traits::state_to_occ(bra_alpha, bra_occ_alpha); + + const auto ket_lower = is_symm ? ia_bra : 0; + for(size_t ia_ket = ket_lower; ia_ket < nuniq_ket; ++ia_ket) { + if(unique_alpha_ket[ia_ket].first.any()) { + // Extract alpha ket + const auto ket_alpha = unique_alpha_ket[ia_ket].first; + + // Compute alpha excitation + const auto ex_alpha = bra_alpha ^ ket_alpha; + const auto ex_alpha_count = spin_wfn_traits::count(ex_alpha); + + // Early exit + if(ex_alpha_count > 4) { + // skip1++; + continue; + } + + // Precompute all-alpha excitation if it will be used + const double mat_el_4_alpha = + (ex_alpha_count == 4) + ? this->matrix_element_4(bra_alpha, ket_alpha, ex_alpha) + : 0.0; + if(ex_alpha_count == 4 and std::abs(mat_el_4_alpha) < H_thresh) { + // The only possible matrix element is too-small, skip everyhing + // skip2++; + continue; + } + + const size_t beta_st_ket = unique_alpha_ket_idx[ia_ket]; + const size_t beta_en_ket = unique_alpha_ket_idx[ia_ket + 1]; + + // Loop over local betas according to their global indices + for(size_t ibra = beta_st_bra; ibra < beta_en_bra; ++ibra) { + const auto bra_beta = + wfn_traits::beta_string(*(bra_begin + ibra)); + spin_wfn_traits::state_to_occ(bra_beta, bra_occ_beta); + for(size_t iket = beta_st_ket; iket < beta_en_ket; ++iket) { + if(is_symm and (iket < ibra)) continue; + const auto ket_beta = + wfn_traits::beta_string(*(ket_begin + iket)); + + // Compute beta excitation + const auto ex_beta = bra_beta ^ ket_beta; + const auto ex_beta_count = spin_wfn_traits::count(ex_beta); + + if((ex_alpha_count + ex_beta_count) > 4) continue; + + double h_el = 0.0; + if(ex_alpha_count == 4) { + // Use precomputed value + h_el = mat_el_4_alpha; + } else if(ex_beta_count == 4) { + h_el = this->matrix_element_4(bra_beta, ket_beta, ex_beta); + } else if(ex_alpha_count == 2) { + if(ex_beta_count == 2) { + h_el = this->matrix_element_22(bra_alpha, ket_alpha, + ex_alpha, bra_beta, + ket_beta, ex_beta); + } else { + h_el = + this->matrix_element_2(bra_alpha, ket_alpha, ex_alpha, + bra_occ_alpha, bra_occ_beta); + } + } else if(ex_beta_count == 2) { + h_el = this->matrix_element_2(bra_beta, ket_beta, ex_beta, + bra_occ_beta, bra_occ_alpha); + } else { + // Diagonal matrix element + h_el = + this->matrix_element_diag(bra_occ_alpha, bra_occ_beta); + } + + // Insert matrix element + if(std::abs(h_el) > H_thresh) { + // coo_mat.template insert(ibra, iket, h_el); + row_ind_loc.emplace_back(ibra); + col_ind_loc.emplace_back(iket); + nz_val_loc.emplace_back(h_el); + if(is_symm and ibra != iket) { + // coo_mat.template insert(iket, ibra, h_el); + row_ind_loc.emplace_back(iket); + col_ind_loc.emplace_back(ibra); + nz_val_loc.emplace_back(h_el); + } + } + + } // ket beta + } // bra beta + } + } // Loop over ket alphas + } + } // Loop over bra alphas + +// Atomically insert into larger matrix arrays +#pragma omp critical + { +#if 0 + row_ind.insert(row_ind.end(), row_ind_loc.begin(), row_ind_loc.end()); + // row_ind_loc.clear(); row_ind_loc.shrink_to_fit(); + col_ind.insert(col_ind.end(), col_ind_loc.begin(), col_ind_loc.end()); + // col_ind_loc.clear(); col_ind_loc.shrink_to_fit(); + nz_val.insert(nz_val.end(), nz_val_loc.begin(), nz_val_loc.end()); + // nz_val_loc.clear(); nz_val_loc.shrink_to_fit(); +#else + fast_insert(row_ind, row_ind_loc); + fast_insert(col_ind, col_ind_loc); + fast_insert(nz_val, nz_val_loc); +#endif + } + + } // OpenMP + auto pop_en = std::chrono::high_resolution_clock::now(); + + // Generate Sparse Matrix + sparsexx::coo_matrix coo_mat( + nbra_dets, nket_dets, std::move(col_ind), std::move(row_ind), + std::move(nz_val), 0); + + // Sort for CSR Conversion + auto sort_st = std::chrono::high_resolution_clock::now(); + printf("[ham_gen rank %d] BEFORE SORT\n", world_rank); + coo_mat.sort_by_row_index(); + auto sort_en = std::chrono::high_resolution_clock::now(); + + auto conv_st = std::chrono::high_resolution_clock::now(); + printf("[ham_gen rank %d] BEFORE CONV\n", world_rank); + sparse_matrix_type csr_mat(coo_mat); // Convert to CSR Matrix + auto conv_en = std::chrono::high_resolution_clock::now(); + + printf("Setup %.2e Pop %.2e Sort %.2e Conv %.2e\n", + std::chrono::duration(setup_en - setup_st).count(), + std::chrono::duration(pop_en - pop_st).count(), + std::chrono::duration(sort_en - sort_st).count(), + std::chrono::duration(conv_en - conv_st).count()); + + return csr_mat; + } + + sparse_matrix_type make_csr_hamiltonian_block_32bit_( + full_det_iterator bra_begin, full_det_iterator bra_end, + full_det_iterator ket_begin, full_det_iterator ket_end, + double H_thresh) override { + return make_csr_hamiltonian_block_(bra_begin, bra_end, ket_begin, + ket_end, H_thresh); + } + + sparse_matrix_type make_csr_hamiltonian_block_64bit_( + full_det_iterator bra_begin, full_det_iterator bra_end, + full_det_iterator ket_begin, full_det_iterator ket_end, + double H_thresh) override { + return make_csr_hamiltonian_block_(bra_begin, bra_end, ket_begin, + ket_end, H_thresh); + } + + public: + void form_rdms(full_det_iterator bra_begin, full_det_iterator bra_end, + full_det_iterator ket_begin, full_det_iterator ket_end, + double* C, matrix_span_t ordm, rank4_span_t trdm) override { + using wfn_traits = wavefunction_traits; + const size_t nbra_dets = std::distance(bra_begin, bra_end); + const size_t nket_dets = std::distance(ket_begin, ket_end); + + std::vector bra_occ_alpha, bra_occ_beta; + + // Loop over bra determinants + for(size_t i = 0; i < nbra_dets; ++i) { + const auto bra = *(bra_begin + i); + // if( (i%1000) == 0 ) std::cout << i << std::endl; + if(wfn_traits::count(bra)) { + // Separate out into alpha/beta components + spin_det_t bra_alpha = wfn_traits::alpha_string(bra); + spin_det_t bra_beta = wfn_traits::beta_string(bra); + + // Get occupied indices + bits_to_indices(bra_alpha, bra_occ_alpha); + bits_to_indices(bra_beta, bra_occ_beta); + + // Loop over ket determinants + for(size_t j = 0; j < nket_dets; ++j) { + const auto ket = *(ket_begin + j); + if(wfn_traits::count(ket)) { + spin_det_t ket_alpha = wfn_traits::alpha_string(ket); + spin_det_t ket_beta = wfn_traits::beta_string(ket); + + full_det_t ex_total = bra ^ ket; + if(wfn_traits::count(ex_total) <= 4) { + spin_det_t ex_alpha = wfn_traits::alpha_string(ex_total); + spin_det_t ex_beta = wfn_traits::beta_string(ex_total); + + const double val = C[i] * C[j]; + + // Compute Matrix Element + if(std::abs(val) > 1e-16) { + rdm_contributions(bra_alpha, ket_alpha, ex_alpha, bra_beta, + ket_beta, ex_beta, bra_occ_alpha, + bra_occ_beta, val, ordm, trdm); + } + } // Possible non-zero connection (Hamming distance) + + } // Non-zero ket determinant + } // Loop over ket determinants + + } // Non-zero bra determinant + } // Loop over bra determinants + } + + public: + template + SortedDoubleLoopHamiltonianGenerator(Args&&... args) + : HamiltonianGenerator(std::forward(args)...) {} +}; + +} // namespace macis diff --git a/include/macis/util/cas.hpp b/include/macis/mcscf/cas.hpp similarity index 84% rename from include/macis/util/cas.hpp rename to include/macis/mcscf/cas.hpp index 63c8f4a2..82a12ec6 100644 --- a/include/macis/util/cas.hpp +++ b/include/macis/mcscf/cas.hpp @@ -7,9 +7,9 @@ */ #pragma once +#include #include #include -#include namespace macis { @@ -34,8 +34,7 @@ double compute_casci_rdms( MCSCFSettings settings, NumOrbital norb, size_t nalpha, size_t nbeta, double* T, double* V, double* ORDM, double* TRDM, std::vector& C MACIS_MPI_CODE(, MPI_Comm comm)) { - constexpr auto nbits = HamGen::nbits; - + using wfn_type = typename HamGen::full_det_t; #ifdef MACIS_ENABLE_MPI int rank; MPI_Comm_rank(comm, &rank); @@ -49,11 +48,11 @@ double compute_casci_rdms( rank4_span(V, no, no, no, no)); // Compute Lowest Energy Eigenvalue (ED) - auto dets = generate_hilbert_space(norb.get(), nalpha, nbeta); - double E0 = - selected_ci_diag(dets.begin(), dets.end(), ham_gen, settings.ci_matel_tol, - settings.ci_max_subspace, settings.ci_res_tol, C, - MACIS_MPI_CODE(comm, ) true); + auto dets = generate_hilbert_space(norb.get(), nalpha, nbeta); + double E0 = selected_ci_diag( + dets.begin(), dets.end(), ham_gen, settings.ci_matel_tol, + settings.ci_max_subspace, settings.ci_res_tol, C, + MACIS_MPI_CODE(comm, ) true); // Compute RDMs ham_gen.form_rdms(dets.begin(), dets.end(), dets.begin(), dets.end(), diff --git a/include/macis/util/diis.hpp b/include/macis/mcscf/diis.hpp similarity index 100% rename from include/macis/util/diis.hpp rename to include/macis/mcscf/diis.hpp diff --git a/include/macis/util/fock_matrices.hpp b/include/macis/mcscf/fock_matrices.hpp similarity index 100% rename from include/macis/util/fock_matrices.hpp rename to include/macis/mcscf/fock_matrices.hpp diff --git a/include/macis/util/mcscf.hpp b/include/macis/mcscf/mcscf.hpp similarity index 100% rename from include/macis/util/mcscf.hpp rename to include/macis/mcscf/mcscf.hpp diff --git a/include/macis/util/mcscf_impl.hpp b/include/macis/mcscf/mcscf_impl.hpp similarity index 98% rename from include/macis/util/mcscf_impl.hpp rename to include/macis/mcscf/mcscf_impl.hpp index 48d1f850..294cae59 100644 --- a/include/macis/util/mcscf_impl.hpp +++ b/include/macis/mcscf/mcscf_impl.hpp @@ -7,14 +7,14 @@ */ #pragma once -#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include #include // #include diff --git a/include/macis/util/orbital_energies.hpp b/include/macis/mcscf/orbital_energies.hpp similarity index 100% rename from include/macis/util/orbital_energies.hpp rename to include/macis/mcscf/orbital_energies.hpp diff --git a/include/macis/util/orbital_gradient.hpp b/include/macis/mcscf/orbital_gradient.hpp similarity index 100% rename from include/macis/util/orbital_gradient.hpp rename to include/macis/mcscf/orbital_gradient.hpp diff --git a/include/macis/util/orbital_hessian.hpp b/include/macis/mcscf/orbital_hessian.hpp similarity index 96% rename from include/macis/util/orbital_hessian.hpp rename to include/macis/mcscf/orbital_hessian.hpp index 53f59c18..952ae35a 100644 --- a/include/macis/util/orbital_hessian.hpp +++ b/include/macis/mcscf/orbital_hessian.hpp @@ -7,9 +7,9 @@ */ #pragma once +#include +#include #include -#include -#include namespace macis { diff --git a/include/macis/util/orbital_rotation_utilities.hpp b/include/macis/mcscf/orbital_rotation_utilities.hpp similarity index 100% rename from include/macis/util/orbital_rotation_utilities.hpp rename to include/macis/mcscf/orbital_rotation_utilities.hpp diff --git a/include/macis/util/orbital_steps.hpp b/include/macis/mcscf/orbital_steps.hpp similarity index 100% rename from include/macis/util/orbital_steps.hpp rename to include/macis/mcscf/orbital_steps.hpp diff --git a/include/macis/sd_operations.hpp b/include/macis/sd_operations.hpp index c04267f8..4f4e9781 100644 --- a/include/macis/sd_operations.hpp +++ b/include/macis/sd_operations.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include namespace macis { @@ -27,47 +28,13 @@ namespace macis { * @returns The bitstring HF state consisting of the specified number of * occupied orbitals. */ -template -std::bitset canonical_hf_determinant(uint32_t nalpha, uint32_t nbeta) { - static_assert((N % 2) == 0, "N Must Be Even"); - std::bitset alpha = full_mask(nalpha); - std::bitset beta = full_mask(nbeta) << (N / 2); - return alpha | beta; -} - -/** - * @brief Generate canonical HF determinant. - * - * Generates a string representation of the canonical HF determinant - * consisting of a specifed number of alpha and beta orbitals. This variant - * does not assume energetic ordering of the HF orbitals. - * - * TODO: This assumes restricted orbitals - * - * @tparam N Number of bits for the total bit string of the state - * @param[in] nalpha Number of occupied alpha orbitals in the HF state - * @param[in] nbeta Number of occupied beta orbitals in the HF state - * @param[in] orb_ens Orbital eigenenergies. - * - * @returns The bitstring HF state consisting of the specified number of - * occupied orbitals populated according to the ordering of `orb_ens`. - */ -template -std::bitset canonical_hf_determinant(uint32_t nalpha, uint32_t nbeta, - const std::vector& orb_ens) { - static_assert((N % 2) == 0, "N Must Be Even"); - // First, find the sorted indices for the orbital energies - std::vector idx(orb_ens.size()); - std::iota(idx.begin(), idx.end(), 0); - std::stable_sort(idx.begin(), idx.end(), [&orb_ens](size_t i1, size_t i2) { - return orb_ens[i1] < orb_ens[i2]; - }); - // Next, fill the electrons by energy - std::bitset alpha(0), beta(0); - for(uint32_t i = 0; i < nalpha; i++) alpha.flip(idx[i]); - for(uint32_t i = 0; i < nbeta; i++) beta.flip(idx[i] + N / 2); - return alpha | beta; -} +// template +// std::bitset canonical_hf_determinant(uint32_t nalpha, uint32_t nbeta) { +// static_assert((N % 2) == 0, "N Must Be Even"); +// std::bitset alpha = full_mask(nalpha); +// std::bitset beta = full_mask(nbeta) << (N / 2); +// return alpha | beta; +// } /** * @brief Generate the list of (un)occupied orbitals for a paricular state. @@ -78,57 +45,77 @@ std::bitset canonical_hf_determinant(uint32_t nalpha, uint32_t nbeta, * @param[out] occ List of occupied orbitals in `state` * @param[out] vir List of unoccupied orbitals in `state` */ +// template +// void bitset_to_occ_vir(size_t norb, std::bitset state, +// std::vector& occ, std::vector& +// vir) { +// occ = bits_to_indices(state); +// const auto nocc = occ.size(); +// assert(nocc < norb); +// +// const auto nvir = norb - nocc; +// vir.resize(nvir); +// state = ~state; +// for(int i = 0; i < nvir; ++i) { +// auto a = ffs(state) - 1; +// vir[i] = a; +// state.flip(a); +// } +// } + +// template +// auto single_excitation(std::bitset state, unsigned p, unsigned q) { +// return state.flip(p).flip(q); +// } + +// template +// auto single_excitation_spin(std::bitset state, unsigned p, unsigned q) { +// static_assert(N%2 == 0, "Num Bits Must Be Even"); +// if constexpr (Sigma == Spin::Alpha) +// return single_excitation(state,p,q); +// else +// return single_excitation(state,p+N/2,q+N/2); +// } + +// template +// auto double_excitation(std::bitset state, unsigned p, unsigned q, unsigned +// r, unsigned s) { +// return state.flip(p).flip(q).flip(r).flip(s); +// } + +// template +// auto double_excitation_spin(std::bitset state, unsigned p, unsigned q, +// unsigned r, unsigned s) { +// static_assert(N%2 == 0, "Num Bits Must Be Even"); +// if constexpr (Sigma == Spin::Alpha) +// return double_excitation(state,p,q,r,s); +// else +// return double_excitation(state,p+N/2,q+N/2,r+N/2,s+N/2); +// } + +// TODO: Test this function template -void bitset_to_occ_vir(size_t norb, std::bitset state, - std::vector& occ, std::vector& vir) { - occ = bits_to_indices(state); - const auto nocc = occ.size(); - assert(nocc < norb); - - const auto nvir = norb - nocc; - vir.resize(nvir); - state = ~state; - for(int i = 0; i < nvir; ++i) { - auto a = ffs(state) - 1; - vir[i] = a; - state.flip(a); - } +uint32_t first_occupied_flipped(std::bitset state, std::bitset ex) { + return ffs(state & ex) - 1u; } -/** - * @brief Generate the list of (un)occupied orbitals for a paricular state. - * - * TODO: Test this function - * - * @tparam N Number of bits for the total bit string of the state - * @param[in] norb Number of orbitals used to describe the state (<= `N`) - * @param[in] state The state from which to determine orbital occupations. - * @param[out] occ List of occupied orbitals in `state` - * @param[out] vir List of unoccupied orbitals in `state` - * @param[in] as_orbs TODO:???? - */ +// TODO: Test this function template -void bitset_to_occ_vir_as(size_t norb, std::bitset state, - std::vector& occ, - std::vector& vir, - const std::vector& as_orbs) { - occ.clear(); - for(const auto i : as_orbs) - if(state[i]) occ.emplace_back(i); - const auto nocc = occ.size(); - assert(nocc <= norb); - - const auto nvir = as_orbs.size() - nocc; - vir.resize(nvir); - auto it = vir.begin(); - for(const auto i : as_orbs) - if(!state[i]) *(it++) = i; +double single_excitation_sign(std::bitset state, unsigned p, unsigned q) { + std::bitset mask = 0ul; + + if(p > q) { + mask = state & (full_mask(p) ^ full_mask(q + 1)); + } else { + mask = state & (full_mask(q) ^ full_mask(p + 1)); + } + return (mask.count() % 2) ? -1. : 1.; } -template -void append_singles(std::bitset state, const std::vector& occ, - const std::vector& vir, - std::vector>& singles) { +template +void append_singles(WfnType state, const std::vector& occ, + const std::vector& vir, WfnContainer& singles) { + using wfn_traits = wavefunction_traits; const size_t nocc = occ.size(); const size_t nvir = vir.size(); @@ -137,15 +124,15 @@ void append_singles(std::bitset state, const std::vector& occ, for(size_t a = 0; a < nvir; ++a) for(size_t i = 0; i < nocc; ++i) { - auto ex = std::bitset(0).flip(occ[i]).flip(vir[a]); - singles.emplace_back(state ^ ex); + singles.emplace_back( + wfn_traits::single_excitation_no_check(state, occ[i], vir[a])); } } -template -void append_doubles(std::bitset state, const std::vector& occ, - const std::vector& vir, - std::vector>& doubles) { +template +void append_doubles(WfnType state, const std::vector& occ, + const std::vector& vir, WfnContainer& doubles) { + using wfn_traits = wavefunction_traits; const size_t nocc = occ.size(); const size_t nvir = vir.size(); @@ -158,39 +145,37 @@ void append_doubles(std::bitset state, const std::vector& occ, for(size_t i = 0; i < nocc; ++i) for(size_t b = a + 1; b < nvir; ++b) for(size_t j = i + 1; j < nocc; ++j) { - auto ex = - std::bitset(0).flip(occ[i]).flip(vir[a]).flip(occ[j]).flip( - vir[b]); - doubles.emplace_back(state ^ ex); + doubles.emplace_back(wfn_traits::double_excitation_no_check( + state, occ[i], occ[j], vir[a], vir[b])); } } -template -void generate_singles(size_t norb, std::bitset state, - std::vector>& singles) { +template +void generate_singles(size_t norb, WfnType state, WfnContainer& singles) { + using wfn_traits = wavefunction_traits; std::vector occ_orbs, vir_orbs; - bitset_to_occ_vir(norb, state, occ_orbs, vir_orbs); + wfn_traits::state_to_occ_vir(norb, state, occ_orbs, vir_orbs); singles.clear(); append_singles(state, occ_orbs, vir_orbs, singles); } -template -void generate_doubles(size_t norb, std::bitset state, - std::vector>& doubles) { +template +void generate_doubles(size_t norb, WfnType state, WfnContainer& doubles) { + using wfn_traits = wavefunction_traits; std::vector occ_orbs, vir_orbs; - bitset_to_occ_vir(norb, state, occ_orbs, vir_orbs); + wfn_traits::state_to_occ_vir(norb, state, occ_orbs, vir_orbs); doubles.clear(); append_doubles(state, occ_orbs, vir_orbs, doubles); } -template -void generate_singles_doubles(size_t norb, std::bitset state, - std::vector>& singles, - std::vector>& doubles) { +template +void generate_singles_doubles(size_t norb, WfnType state, WfnContainer& singles, + WfnContainer& doubles) { + using wfn_traits = wavefunction_traits; std::vector occ_orbs, vir_orbs; - bitset_to_occ_vir(norb, state, occ_orbs, vir_orbs); + wfn_traits::state_to_occ_vir(norb, state, occ_orbs, vir_orbs); singles.clear(); doubles.clear(); @@ -198,197 +183,62 @@ void generate_singles_doubles(size_t norb, std::bitset state, append_doubles(state, occ_orbs, vir_orbs, doubles); } -// TODO: Test this function -template -void generate_singles_as(size_t norb, std::bitset state, - std::vector>& singles, - const std::vector& as_orbs) { - std::vector occ_orbs, vir_orbs; - bitset_to_occ_vir_as(norb, state, occ_orbs, vir_orbs, as_orbs); +template +void generate_singles_spin(size_t norb, WfnType state, WfnContainer& singles) { + using wfn_traits = wavefunction_traits; - singles.clear(); - append_singles(state, occ_orbs, vir_orbs, singles); -} + auto state_alpha = wfn_traits::alpha_string(state); + auto state_beta = wfn_traits::beta_string(state); -// TODO: Test this function -template -void generate_singles_doubles_as(size_t norb, std::bitset state, - std::vector>& singles, - std::vector>& doubles, - const std::vector& as_orbs) { - std::vector occ_orbs, vir_orbs; - bitset_to_occ_vir_as(norb, state, occ_orbs, vir_orbs, as_orbs); - - singles.clear(); - doubles.clear(); - append_singles(state, occ_orbs, vir_orbs, singles); - append_doubles(state, occ_orbs, vir_orbs, doubles); -} - -// TODO: Test this function -template -void generate_singles_spin_as(size_t norb, std::bitset state, - std::vector>& singles, - const std::vector as_orbs) { - auto state_alpha = bitset_lo_word(state); - auto state_beta = bitset_hi_word(state); - - std::vector> singles_alpha, singles_beta; - - // Generate Spin-Specific singles - generate_singles_as(norb, state_alpha, singles_alpha, as_orbs); - generate_singles_as(norb, state_beta, singles_beta, as_orbs); - - auto state_alpha_expand = expand_bitset(state_alpha); - auto state_beta_expand = expand_bitset(state_beta) << (N / 2); - - // Generate Singles in full space - singles.clear(); - - // Single Alpha + No Beta - for(auto s_alpha : singles_alpha) { - auto s_state = expand_bitset(s_alpha); - s_state = s_state | state_beta_expand; - singles.emplace_back(s_state); - } - - // No Alpha + Single Beta - for(auto s_beta : singles_beta) { - auto s_state = expand_bitset(s_beta) << (N / 2); - s_state = s_state | state_alpha_expand; - singles.emplace_back(s_state); - } -} - -// TODO: Test this function -template -void generate_singles_doubles_spin_as(size_t norb, std::bitset state, - std::vector>& singles, - std::vector>& doubles, - const std::vector& as_orbs) { - auto state_alpha = bitset_lo_word(state); - auto state_beta = bitset_hi_word(state); - - std::vector> singles_alpha, singles_beta; - std::vector> doubles_alpha, doubles_beta; - - // Generate Spin-Specific singles / doubles - generate_singles_doubles_as(norb, state_alpha, singles_alpha, doubles_alpha, - as_orbs); - generate_singles_doubles_as(norb, state_beta, singles_beta, doubles_beta, - as_orbs); - - auto state_alpha_expand = expand_bitset(state_alpha); - auto state_beta_expand = expand_bitset(state_beta) << (N / 2); - - // Generate Singles in full space - singles.clear(); - - // Single Alpha + No Beta - for(auto s_alpha : singles_alpha) { - auto s_state = expand_bitset(s_alpha); - s_state = s_state | state_beta_expand; - singles.emplace_back(s_state); - } - - // No Alpha + Single Beta - for(auto s_beta : singles_beta) { - auto s_state = expand_bitset(s_beta) << (N / 2); - s_state = s_state | state_alpha_expand; - singles.emplace_back(s_state); - } - - // Generate Doubles in full space - doubles.clear(); - - // Double Alpha + No Beta - for(auto d_alpha : doubles_alpha) { - auto d_state = expand_bitset(d_alpha); - d_state = d_state | state_beta_expand; - doubles.emplace_back(d_state); - } - - // No Alpha + Double Beta - for(auto d_beta : doubles_beta) { - auto d_state = expand_bitset(d_beta) << (N / 2); - d_state = d_state | state_alpha_expand; - doubles.emplace_back(d_state); - } - - // Single Alpha + Single Beta - for(auto s_alpha : singles_alpha) - for(auto s_beta : singles_beta) { - auto d_state_alpha = expand_bitset(s_alpha); - auto d_state_beta = expand_bitset(s_beta) << (N / 2); - doubles.emplace_back(d_state_alpha | d_state_beta); - } -} - -template -void generate_singles_spin(size_t norb, std::bitset state, - std::vector>& singles) { - auto state_alpha = bitset_lo_word(state); - auto state_beta = bitset_hi_word(state); - - std::vector> singles_alpha, singles_beta; + using spin_wfn_type = spin_wfn_t; + std::vector singles_alpha, singles_beta; // Generate Spin-Specific singles / doubles generate_singles(norb, state_alpha, singles_alpha); generate_singles(norb, state_beta, singles_beta); - auto state_alpha_expand = expand_bitset(state_alpha); - auto state_beta_expand = expand_bitset(state_beta) << (N / 2); - // Generate Singles in full space singles.clear(); // Single Alpha + No Beta for(auto s_alpha : singles_alpha) { - auto s_state = expand_bitset(s_alpha); - s_state = s_state | state_beta_expand; - singles.emplace_back(s_state); + singles.emplace_back(wfn_traits::from_spin(s_alpha, state_beta)); } // No Alpha + Single Beta for(auto s_beta : singles_beta) { - auto s_state = expand_bitset(s_beta) << (N / 2); - s_state = s_state | state_alpha_expand; - singles.emplace_back(s_state); + singles.emplace_back(wfn_traits::from_spin(state_alpha, s_beta)); } } -template -void generate_singles_doubles_spin(size_t norb, std::bitset state, - std::vector>& singles, - std::vector>& doubles) { - auto state_alpha = bitset_lo_word(state); - auto state_beta = bitset_hi_word(state); +template +void generate_singles_doubles_spin(size_t norb, WfnType state, + WfnContainer& singles, + WfnContainer& doubles) { + using wfn_traits = wavefunction_traits; + + auto state_alpha = wfn_traits::alpha_string(state); + auto state_beta = wfn_traits::beta_string(state); - std::vector> singles_alpha, singles_beta; - std::vector> doubles_alpha, doubles_beta; + using spin_wfn_type = spin_wfn_t; + std::vector singles_alpha, singles_beta; + std::vector doubles_alpha, doubles_beta; // Generate Spin-Specific singles / doubles generate_singles_doubles(norb, state_alpha, singles_alpha, doubles_alpha); generate_singles_doubles(norb, state_beta, singles_beta, doubles_beta); - auto state_alpha_expand = expand_bitset(state_alpha); - auto state_beta_expand = expand_bitset(state_beta) << (N / 2); - // Generate Singles in full space singles.clear(); // Single Alpha + No Beta for(auto s_alpha : singles_alpha) { - auto s_state = expand_bitset(s_alpha); - s_state = s_state | state_beta_expand; - singles.emplace_back(s_state); + singles.emplace_back(wfn_traits::from_spin(s_alpha, state_beta)); } // No Alpha + Single Beta for(auto s_beta : singles_beta) { - auto s_state = expand_bitset(s_beta) << (N / 2); - s_state = s_state | state_alpha_expand; - singles.emplace_back(s_state); + singles.emplace_back(wfn_traits::from_spin(state_alpha, s_beta)); } // Generate Doubles in full space @@ -396,58 +246,51 @@ void generate_singles_doubles_spin(size_t norb, std::bitset state, // Double Alpha + No Beta for(auto d_alpha : doubles_alpha) { - auto d_state = expand_bitset(d_alpha); - d_state = d_state | state_beta_expand; - doubles.emplace_back(d_state); + doubles.emplace_back(wfn_traits::from_spin(d_alpha, state_beta)); } // No Alpha + Double Beta for(auto d_beta : doubles_beta) { - auto d_state = expand_bitset(d_beta) << (N / 2); - d_state = d_state | state_alpha_expand; - doubles.emplace_back(d_state); + doubles.emplace_back(wfn_traits::from_spin(state_alpha, d_beta)); } // Single Alpha + Single Beta for(auto s_alpha : singles_alpha) for(auto s_beta : singles_beta) { - auto d_state_alpha = expand_bitset(s_alpha); - auto d_state_beta = expand_bitset(s_beta) << (N / 2); - doubles.emplace_back(d_state_alpha | d_state_beta); + doubles.emplace_back(wfn_traits::from_spin(s_alpha, s_beta)); } } -template -void generate_cisd_hilbert_space(size_t norb, std::bitset state, - std::vector>& dets) { +template +void generate_cisd_hilbert_space(size_t norb, WfnType state, + WfnContainer& dets) { dets.clear(); dets.emplace_back(state); - std::vector> singles, doubles; + std::vector singles, doubles; generate_singles_doubles_spin(norb, state, singles, doubles); dets.insert(dets.end(), singles.begin(), singles.end()); dets.insert(dets.end(), doubles.begin(), doubles.end()); } -template -std::vector> generate_cisd_hilbert_space(size_t norb, - std::bitset state) { - std::vector> dets; +template +auto generate_cisd_hilbert_space(size_t norb, WfnType state) { + std::vector dets; generate_cisd_hilbert_space(norb, state, dets); return dets; } -template -std::vector> generate_combs(uint64_t nbits, uint64_t nset) { +template +std::vector generate_combs(uint64_t nbits, uint64_t nset) { + using wfn_traits = wavefunction_traits; std::vector v(nbits, false); std::fill_n(v.begin(), nset, true); - std::vector> store; + std::vector store; do { - std::bitset temp = 0ul; - std::bitset one = 1ul; + WfnType temp(0ul); for(uint64_t i = 0; i < nbits; ++i) if(v[i]) { - temp = temp | (one << i); + temp = wfn_traits::create_no_check(temp, i); } store.emplace_back(temp); @@ -456,66 +299,47 @@ std::vector> generate_combs(uint64_t nbits, uint64_t nset) { return store; } -template -std::vector> generate_hilbert_space(size_t norbs, size_t nalpha, - size_t nbeta) { +template +std::vector generate_hilbert_space(size_t norbs, size_t nalpha, + size_t nbeta) { + using spin_wfn_type = spin_wfn_t; + using wfn_traits = wavefunction_traits; + // Get all alpha and beta combs - auto alpha_dets = generate_combs(norbs, nalpha); - auto beta_dets = generate_combs(norbs, nbeta); + auto alpha_dets = generate_combs(norbs, nalpha); + auto beta_dets = generate_combs(norbs, nbeta); - std::vector> states; + std::vector states; states.reserve(alpha_dets.size() * beta_dets.size()); for(auto alpha_det : alpha_dets) for(auto beta_det : beta_dets) { - std::bitset state = alpha_det | (beta_det << (N / 2)); - states.emplace_back(state); + states.emplace_back(wfn_traits::from_spin(alpha_det, beta_det)); } return states; } -template -void generate_cis_hilbert_space(size_t norb, std::bitset state, - std::vector>& dets) { +template +void generate_cis_hilbert_space(size_t norb, WfnType state, + WfnContainer& dets) { dets.clear(); dets.emplace_back(state); - std::vector> singles; + std::vector singles; generate_singles_spin(norb, state, singles); dets.insert(dets.end(), singles.begin(), singles.end()); } -template -std::vector> generate_cis_hilbert_space(size_t norb, - std::bitset state) { - std::vector> dets; +template +std::vector generate_cis_hilbert_space(size_t norb, WfnType state) { + std::vector dets; generate_cis_hilbert_space(norb, state, dets); return dets; } // TODO: Test this function -template -uint32_t first_occupied_flipped(std::bitset state, std::bitset ex) { - return ffs(state & ex) - 1u; -} - -// TODO: Test this function -template -double single_excitation_sign(std::bitset state, unsigned p, unsigned q) { - std::bitset mask = 0ul; - - if(p > q) { - mask = state & (full_mask(p) ^ full_mask(q + 1)); - } else { - mask = state & (full_mask(q) ^ full_mask(p + 1)); - } - return (mask.count() % 2) ? -1. : 1.; -} - -// TODO: Test this function -template -inline auto single_excitation_sign_indices(std::bitset bra, - std::bitset ket, - std::bitset ex) { +template +inline auto single_excitation_sign_indices(WfnType bra, WfnType ket, + WfnType ex) { auto o1 = first_occupied_flipped(ket, ex); auto v1 = first_occupied_flipped(bra, ex); auto sign = single_excitation_sign(ket, v1, o1); @@ -524,81 +348,59 @@ inline auto single_excitation_sign_indices(std::bitset bra, } // TODO: Test this function -template -inline auto doubles_sign_indices(std::bitset bra, std::bitset ket, - std::bitset ex) { - const auto o1 = first_occupied_flipped(ket, ex); - const auto v1 = first_occupied_flipped(bra, ex); - auto sign = single_excitation_sign(ket, v1, o1); +template +inline auto doubles_sign_indices(WfnType bra, WfnType ket, WfnType ex) { + using wfn_traits = wavefunction_traits; + auto [o1, v1, sign1] = single_excitation_sign_indices(bra, ket, ex); - ket.flip(o1).flip(v1); - ex.flip(o1).flip(v1); + ket = wfn_traits::single_excitation_no_check(ket, o1, v1); + ex = wfn_traits::single_excitation_no_check(ex, o1, v1); - const auto o2 = first_occupied_flipped(ket, ex); - const auto v2 = first_occupied_flipped(bra, ex); - sign *= single_excitation_sign(ket, v2, o2); + auto [o2, v2, sign2] = single_excitation_sign_indices(bra, ket, ex); + auto sign = sign1 * sign2; return std::make_tuple(o1, v1, o2, v2, sign); } // TODO: Test this function -template -inline auto doubles_sign(std::bitset bra, std::bitset ket, - std::bitset ex) { +template +inline auto doubles_sign(WfnType bra, WfnType ket, WfnType ex) { auto [p, q, r, s, sign] = doubles_sign_indices(bra, ket, ex); return sign; } -// TODO: Test this function -template -void generate_residues(std::bitset state, std::vector>& res) { - auto state_alpha = bitset_lo_word(state); - auto state_beta = bitset_hi_word(state); - - auto occ_alpha = bits_to_indices(state_alpha); - const int nalpha = occ_alpha.size(); - - auto occ_beta = bits_to_indices(state_beta); - const int nbeta = occ_beta.size(); - - std::bitset state_alpha_full = expand_bitset(state_alpha); - std::bitset state_beta_full = expand_bitset(state_beta); - state_beta_full = state_beta_full << (N / 2); - - std::bitset one = 1ul; - - // Double alpha - for(auto i = 0; i < nalpha; ++i) - for(auto j = i + 1; j < nalpha; ++j) { - auto mask = (one << occ_alpha[i]) | (one << occ_alpha[j]); - std::bitset _r = expand_bitset(state_alpha & ~mask); - res.emplace_back(_r | state_beta_full); - } - - // Double beta - for(auto i = 0; i < nbeta; ++i) - for(auto j = i + 1; j < nbeta; ++j) { - auto mask = (one << occ_beta[i]) | (one << occ_beta[j]); - std::bitset _r = expand_bitset(state_beta & ~mask) << (N / 2); - res.emplace_back(_r | state_alpha_full); +template +auto get_unique_alpha(WfnIterator begin, WfnIterator end) { + using wfn_type = typename std::iterator_traits::value_type; + using wfn_traits = wavefunction_traits; + using spin_wfn_type = typename wfn_traits::spin_wfn_type; + + std::vector> unique_alpha; + unique_alpha.push_back({wfn_traits::alpha_string(*begin), 1}); + for(auto it = begin + 1; it != end; ++it) { + auto& [cur_alpha, cur_count] = unique_alpha.back(); + auto alpha_i = wfn_traits::alpha_string(*it); + if(alpha_i == cur_alpha) { + cur_count++; + } else { + unique_alpha.push_back({alpha_i, 1}); } + } - // Mixed - for(auto i = 0; i < nalpha; ++i) - for(auto j = 0; j < nbeta; ++j) { - std::bitset mask = expand_bitset(one << occ_alpha[i]); - mask = mask | (expand_bitset(one << occ_beta[j]) << (N / 2)); - res.emplace_back(state & ~mask); - } + return unique_alpha; } -template -std::string to_canonical_string(std::bitset state) { - static_assert((N % 2) == 0, "N Odd"); - auto state_alpha = bitset_lo_word(state); - auto state_beta = bitset_hi_word(state); +template +std::string to_canonical_string(WfnType state) { + using wfn_traits = wavefunction_traits; + using spin_wfn_type = spin_wfn_t; + using spin_wfn_traits = wavefunction_traits; + + auto state_alpha = wfn_traits::alpha_string(state); + auto state_beta = wfn_traits::beta_string(state); std::string str; - for(size_t i = 0; i < N / 2; ++i) { + + for(size_t i = 0; i < spin_wfn_traits::size(); ++i) { if(state_alpha[i] and state_beta[i]) str.push_back('2'); else if(state_alpha[i]) @@ -611,20 +413,23 @@ std::string to_canonical_string(std::bitset state) { return str; } -template -std::bitset from_canonical_string(std::string str) { - std::bitset state_alpha(0), state_beta(0); - for(auto i = 0ul; i < str.length(); ++i) { +template +WfnType from_canonical_string(std::string str) { + using spin_wfn_type = spin_wfn_t; + using wfn_traits = wavefunction_traits; + using spin_wfn_traits = wavefunction_traits; + spin_wfn_type state_alpha(0), state_beta(0); + for(auto i = 0ul; i < std::min(str.length(), spin_wfn_traits::size()); ++i) { if(str[i] == '2') { - state_alpha.set(i); - state_beta.set(i); + state_alpha = spin_wfn_traits::create_no_check(state_alpha, i); + state_beta = spin_wfn_traits::create_no_check(state_beta, i); } else if(str[i] == 'u') { - state_alpha.set(i); + state_alpha = spin_wfn_traits::create_no_check(state_alpha, i); } else if(str[i] == 'd') { - state_beta.set(i); + state_beta = spin_wfn_traits::create_no_check(state_beta, i); } } - auto state = state_alpha | (state_beta << (N / 2)); + auto state = wfn_traits::from_spin(state_alpha, state_beta); return state; } diff --git a/include/macis/solvers/davidson.hpp b/include/macis/solvers/davidson.hpp index 63756772..bde3ef17 100644 --- a/include/macis/solvers/davidson.hpp +++ b/include/macis/solvers/davidson.hpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -96,6 +97,7 @@ void p_diagonal_guess(size_t N_local, const SpMatType& A, double* X) { // Determine min index auto D_min = std::min_element(D.begin(), D.end()); auto min_idx = std::distance(D.begin(), D_min); + // printf("[rank %d] DMIN %lu %.6e\n", world_rank, min_idx, *D_min); // Zero out guess for(size_t i = 0; i < N_local; ++i) X[i] = 0.; @@ -252,6 +254,8 @@ inline void p_gram_schmidt(int64_t N_local, int64_t K, const double* V_old, double dot = blas::dot(N_local, V_new, 1, V_new, 1); dot = allreduce(dot, MPI_SUM, comm); double nrm = std::sqrt(dot); + // printf("[rank %d] GS DOT %.6e NRM %.6e\n", comm_rank(comm), + // dot, nrm); blas::scal(N_local, 1. / nrm, V_new, 1); } @@ -267,11 +271,16 @@ inline void p_rayleigh_ritz(int64_t N_local, int64_t K, const double* X, // Reduce result if(LDC != K) throw std::runtime_error("DIE DIE DIE RR"); - allreduce(C, K * K, MPI_SUM, comm); + // allreduce(C, K * K, MPI_SUM, comm); + std::allocator alloc; + double* tmp_c = world_rank ? nullptr : alloc.allocate(K * K); + reduce(C, tmp_c, K * K, MPI_SUM, 0, comm); // Do local diagonalization on rank-0 if(!world_rank) { + memcpy(C, tmp_c, K * K * sizeof(double)); lapack::syev(lapack::Job::Vec, lapack::Uplo::Lower, K, C, LDC, W); + alloc.deallocate(tmp_c, K * K); } // Broadcast results @@ -378,8 +387,18 @@ auto p_davidson(int64_t N_local, int64_t max_m, const Functor& op, // Compute new vector // (D - LAM(0)*I) * W = -R ==> W = -(D - LAM(0)*I)**-1 * R + double E1_denom = 0, E1_num = 0; for(auto j = 0; j < N_local; ++j) { R_local[j] = -R_local[j] / (D_local[j] - LAM[0]); + E1_num += X_local[j] * R_local[j]; + E1_denom += X_local[j] * X_local[j] / (D_local[j] - LAM[0]); + } + E1_denom = allreduce(E1_denom, MPI_SUM, comm); + E1_num = allreduce(E1_num, MPI_SUM, comm); + const double E1 = E1_num / E1_denom; + + for(auto j = 0; j < N_local; ++j) { + R_local[j] += E1 * X_local[j] / (D_local[j] - LAM[0]); } // Project new vector out form old vectors diff --git a/include/macis/solvers/selected_ci_diag.hpp b/include/macis/solvers/selected_ci_diag.hpp index 0a1e293f..9edffb41 100644 --- a/include/macis/solvers/selected_ci_diag.hpp +++ b/include/macis/solvers/selected_ci_diag.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -121,10 +122,9 @@ double serial_selected_ci_diag(const SpMatType& H, size_t davidson_max_m, return E; } -template -double selected_ci_diag(wavefunction_iterator_t dets_begin, - wavefunction_iterator_t dets_end, - HamiltonianGenerator& ham_gen, double h_el_tol, +template +double selected_ci_diag(WfnIterator dets_begin, WfnIterator dets_end, + HamiltonianGenerator& ham_gen, double h_el_tol, size_t davidson_max_m, double davidson_res_tol, std::vector& C_local, MACIS_MPI_CODE(MPI_Comm comm, ) @@ -146,9 +146,25 @@ double selected_ci_diag(wavefunction_iterator_t dets_begin, MACIS_MPI_CODE(MPI_Barrier(comm);) auto H_st = clock_type::now(); + auto world_size = comm_size(comm); + auto world_rank = comm_rank(comm); + //{ + // std::ofstream wfn_file("wfn_" + + // std::to_string(std::distance(dets_begin,dets_end)) + "_" + + // std::to_string(world_rank) + "." + std::to_string(world_size) + ".txt"); + // for(auto it = dets_begin; it != dets_end; ++it) { + // wfn_file << *it << "\n"; + //} + // wfn_file << std::flush; + //} + #ifdef MACIS_ENABLE_MPI auto H = make_dist_csr_hamiltonian(comm, dets_begin, dets_end, ham_gen, h_el_tol); + // sparsexx::write_dist_mm("ham_" + std::to_string(H.n()) + "." + + // std::to_string(world_size) + ".mtx", H, 1); + // MACIS_MPI_CODE(MPI_Barrier(comm);) + // if(H.n() >= 10000000) throw "DIE DIE DIE"; #else auto H = make_csr_hamiltonian(dets_begin, dets_end, ham_gen, h_el_tol); @@ -171,7 +187,6 @@ double selected_ci_diag(wavefunction_iterator_t dets_begin, duration_type(H_en - H_st).count()); #ifdef MACIS_ENABLE_MPI - auto world_size = comm_size(comm); if(world_size > 1) { double local_hdur = duration_type(H_en - H_st).count(); double max_hdur = allreduce(local_hdur, MPI_MAX, comm); @@ -185,7 +200,7 @@ double selected_ci_diag(wavefunction_iterator_t dets_begin, #endif logger->info(" {} = {:.2e} GiB", "HMEM_LOC", H.mem_footprint() / 1073741824.); - logger->info(" {} = {:.2f}%", "H_SPARSE", + logger->info(" {} = {:.2e}%", "H_SPARSE", total_nnz / double(H.n() * H.n()) * 100); #ifdef MACIS_ENABLE_MPI if(world_size > 1) { diff --git a/include/macis/types.hpp b/include/macis/types.hpp index 97331b52..00f7c061 100644 --- a/include/macis/types.hpp +++ b/include/macis/types.hpp @@ -16,8 +16,8 @@ namespace macis { -namespace KokkosEx = - MDSPAN_IMPL_STANDARD_NAMESPACE::MDSPAN_IMPL_PROPOSED_NAMESPACE; +namespace KokkosEx = Kokkos; +// MDSPAN_IMPL_STANDARD_NAMESPACE::MDSPAN_IMPL_PROPOSED_NAMESPACE; template using col_major_span = @@ -50,7 +50,7 @@ using wavefunction_iterator_t = typename std::vector >::iterator; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" -using uint128_t = unsigned __int128; +using uint128_t = __uint128_t; #pragma GCC diagnostic pop template diff --git a/include/macis/util/mpi.hpp b/include/macis/util/mpi.hpp index a5ae545a..6375dbbc 100644 --- a/include/macis/util/mpi.hpp +++ b/include/macis/util/mpi.hpp @@ -125,6 +125,7 @@ REGISTER_MPI_TYPE(int, MPI_INT); REGISTER_MPI_TYPE(double, MPI_DOUBLE); REGISTER_MPI_TYPE(float, MPI_FLOAT); REGISTER_MPI_TYPE(size_t, MPI_UINT64_T); +REGISTER_MPI_TYPE(int64_t, MPI_INT64_T); #undef REGISTER_MPI_TYPE @@ -145,6 +146,18 @@ mpi_datatype make_contiguous_mpi_datatype(int n) { return make_managed_mpi_datatype(contig_dtype); } +template +void reduce(const T* send, T* recv, size_t count, MPI_Op op, int root, + MPI_Comm comm) { + auto dtype = mpi_traits::datatype(); + + size_t intmax = std::numeric_limits::max(); + size_t nchunk = count / intmax; + if(nchunk) throw std::runtime_error("Msg over INT_MAX not yet tested"); + + MPI_Reduce(send, recv, count, dtype, op, root, comm); +} + /** * @brief Type-safe wrapper for MPI_Allreduce * @@ -228,5 +241,43 @@ struct mpi_traits> { } }; +template +class global_atomic { + MPI_Win window_; + T* buffer_; + + public: + global_atomic() = delete; + + global_atomic(MPI_Comm comm, T init = 0) { + MPI_Win_allocate(sizeof(T), sizeof(T), MPI_INFO_NULL, comm, &buffer_, + &window_); + if(window_ == MPI_WIN_NULL) { + throw std::runtime_error("Window creation failed"); + } + *buffer_ = init; + MPI_Win_lock_all(MPI_MODE_NOCHECK, window_); + } + + ~global_atomic() noexcept { + MPI_Win_unlock_all(window_); + MPI_Win_free(&window_); + } + + global_atomic(const global_atomic&) = delete; + global_atomic(global_atomic&&) noexcept = delete; + + T fetch_and_op(T val, MPI_Op op) { + T next_val; + MPI_Fetch_and_op(&val, &next_val, mpi_traits::datatype(), 0, 0, op, + window_); + MPI_Win_flush(0, window_); + return next_val; + } + + T fetch_and_add(T val) { return fetch_and_op(val, MPI_SUM); } + T fetch_and_min(T val) { return fetch_and_op(val, MPI_MIN); } +}; + } // namespace macis #endif diff --git a/include/macis/util/rdms.hpp b/include/macis/util/rdms.hpp index 72b55bb3..875cd968 100644 --- a/include/macis/util/rdms.hpp +++ b/include/macis/util/rdms.hpp @@ -103,8 +103,9 @@ inline void rdm_contributions(wfn_t bra_alpha, wfn_t ket_alpha, const IndexType& bra_occ_alpha, const IndexType& bra_occ_beta, T val, matrix_span ordm, rank4_span trdm) { - const uint32_t ex_alpha_count = ex_alpha.count(); - const uint32_t ex_beta_count = ex_beta.count(); + using wfn_traits = wavefunction_traits>; + const uint32_t ex_alpha_count = wfn_traits::count(ex_alpha); + const uint32_t ex_beta_count = wfn_traits::count(ex_beta); if((ex_alpha_count + ex_beta_count) > 4) return; diff --git a/include/macis/util/trexio.hpp b/include/macis/util/trexio.hpp new file mode 100644 index 00000000..04a86621 --- /dev/null +++ b/include/macis/util/trexio.hpp @@ -0,0 +1,53 @@ +#pragma once +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ +#include + +#include + +namespace macis { + +class trexio_exception : public std::exception { + std::string msg_; + const char* what() const noexcept override; + + public: + trexio_exception(std::string func_name, trexio_exit_code rc); +}; + +class TREXIOFile { + trexio_t* file_handle_ = nullptr; + + public: + TREXIOFile() noexcept = default; + + TREXIOFile(std::string name, char mode, int backend); + ~TREXIOFile() noexcept; + + TREXIOFile(const TREXIOFile&) = delete; + TREXIOFile(TREXIOFile&& other) noexcept; + + int64_t read_mo_num() const; + int64_t read_mo_2e_int_eri_size() const; + double read_nucleus_repulsion() const; + void read_mo_1e_int_core_hamiltonian(double* h) const; + void read_mo_2e_int_eri(double* V) const; + int64_t read_determinant_num() const; + int32_t get_determinant_int64_num() const; + void read_determinant_list(int64_t ndet, int64_t* dets, + int64_t ioff = 0) const; + + void write_mo_num(int64_t nmo); + void write_nucleus_repulsion(double E); + void write_mo_1e_int_core_hamiltonian(const double* h); + void write_mo_2e_int_eri(const double* V); + void write_determinant_list(int64_t ndet, const int64_t* dets, + int64_t ioff = 0); +}; + +} // namespace macis diff --git a/include/macis/util/unused_untested.hpp b/include/macis/util/unused_untested.hpp new file mode 100644 index 00000000..af7b6639 --- /dev/null +++ b/include/macis/util/unused_untested.hpp @@ -0,0 +1,245 @@ +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ + +#if 0 +/** + * @brief Generate canonical HF determinant. + * + * Generates a string representation of the canonical HF determinant + * consisting of a specifed number of alpha and beta orbitals. This variant + * does not assume energetic ordering of the HF orbitals. + * + * TODO: This assumes restricted orbitals + * + * @tparam N Number of bits for the total bit string of the state + * @param[in] nalpha Number of occupied alpha orbitals in the HF state + * @param[in] nbeta Number of occupied beta orbitals in the HF state + * @param[in] orb_ens Orbital eigenenergies. + * + * @returns The bitstring HF state consisting of the specified number of + * occupied orbitals populated according to the ordering of `orb_ens`. + */ +template +std::bitset canonical_hf_determinant(uint32_t nalpha, uint32_t nbeta, + const std::vector& orb_ens) { + static_assert((N % 2) == 0, "N Must Be Even"); + // First, find the sorted indices for the orbital energies + std::vector idx(orb_ens.size()); + std::iota(idx.begin(), idx.end(), 0); + std::stable_sort(idx.begin(), idx.end(), [&orb_ens](size_t i1, size_t i2) { + return orb_ens[i1] < orb_ens[i2]; + }); + // Next, fill the electrons by energy + std::bitset alpha(0), beta(0); + for(uint32_t i = 0; i < nalpha; i++) alpha.flip(idx[i]); + for(uint32_t i = 0; i < nbeta; i++) beta.flip(idx[i] + N / 2); + return alpha | beta; +} + +/** + * @brief Generate the list of (un)occupied orbitals for a paricular state. + * + * TODO: Test this function + * + * @tparam N Number of bits for the total bit string of the state + * @param[in] norb Number of orbitals used to describe the state (<= `N`) + * @param[in] state The state from which to determine orbital occupations. + * @param[out] occ List of occupied orbitals in `state` + * @param[out] vir List of unoccupied orbitals in `state` + * @param[in] as_orbs TODO:???? + */ +template +void bitset_to_occ_vir_as(size_t norb, std::bitset state, + std::vector& occ, + std::vector& vir, + const std::vector& as_orbs) { + occ.clear(); + for(const auto i : as_orbs) + if(state[i]) occ.emplace_back(i); + const auto nocc = occ.size(); + assert(nocc <= norb); + + const auto nvir = as_orbs.size() - nocc; + vir.resize(nvir); + auto it = vir.begin(); + for(const auto i : as_orbs) + if(!state[i]) *(it++) = i; +} + + + +// TODO: Test this function +template +void generate_singles_as(size_t norb, std::bitset state, + std::vector>& singles, + const std::vector& as_orbs) { + std::vector occ_orbs, vir_orbs; + bitset_to_occ_vir_as(norb, state, occ_orbs, vir_orbs, as_orbs); + + singles.clear(); + append_singles(state, occ_orbs, vir_orbs, singles); +} + +// TODO: Test this function +template +void generate_singles_doubles_as(size_t norb, std::bitset state, + std::vector>& singles, + std::vector>& doubles, + const std::vector& as_orbs) { + std::vector occ_orbs, vir_orbs; + bitset_to_occ_vir_as(norb, state, occ_orbs, vir_orbs, as_orbs); + + singles.clear(); + doubles.clear(); + append_singles(state, occ_orbs, vir_orbs, singles); + append_doubles(state, occ_orbs, vir_orbs, doubles); +} + +// TODO: Test this function +template +void generate_singles_spin_as(size_t norb, std::bitset state, + std::vector>& singles, + const std::vector as_orbs) { + auto state_alpha = bitset_lo_word(state); + auto state_beta = bitset_hi_word(state); + + std::vector> singles_alpha, singles_beta; + + // Generate Spin-Specific singles + generate_singles_as(norb, state_alpha, singles_alpha, as_orbs); + generate_singles_as(norb, state_beta, singles_beta, as_orbs); + + auto state_alpha_expand = expand_bitset(state_alpha); + auto state_beta_expand = expand_bitset(state_beta) << (N / 2); + + // Generate Singles in full space + singles.clear(); + + // Single Alpha + No Beta + for(auto s_alpha : singles_alpha) { + auto s_state = expand_bitset(s_alpha); + s_state = s_state | state_beta_expand; + singles.emplace_back(s_state); + } + + // No Alpha + Single Beta + for(auto s_beta : singles_beta) { + auto s_state = expand_bitset(s_beta) << (N / 2); + s_state = s_state | state_alpha_expand; + singles.emplace_back(s_state); + } +} + +// TODO: Test this function +template +void generate_singles_doubles_spin_as(size_t norb, std::bitset state, + std::vector>& singles, + std::vector>& doubles, + const std::vector& as_orbs) { + auto state_alpha = bitset_lo_word(state); + auto state_beta = bitset_hi_word(state); + + std::vector> singles_alpha, singles_beta; + std::vector> doubles_alpha, doubles_beta; + + // Generate Spin-Specific singles / doubles + generate_singles_doubles_as(norb, state_alpha, singles_alpha, doubles_alpha, + as_orbs); + generate_singles_doubles_as(norb, state_beta, singles_beta, doubles_beta, + as_orbs); + + auto state_alpha_expand = expand_bitset(state_alpha); + auto state_beta_expand = expand_bitset(state_beta) << (N / 2); + + // Generate Singles in full space + singles.clear(); + + // Single Alpha + No Beta + for(auto s_alpha : singles_alpha) { + auto s_state = expand_bitset(s_alpha); + s_state = s_state | state_beta_expand; + singles.emplace_back(s_state); + } + + // No Alpha + Single Beta + for(auto s_beta : singles_beta) { + auto s_state = expand_bitset(s_beta) << (N / 2); + s_state = s_state | state_alpha_expand; + singles.emplace_back(s_state); + } + + // Generate Doubles in full space + doubles.clear(); + + // Double Alpha + No Beta + for(auto d_alpha : doubles_alpha) { + auto d_state = expand_bitset(d_alpha); + d_state = d_state | state_beta_expand; + doubles.emplace_back(d_state); + } + + // No Alpha + Double Beta + for(auto d_beta : doubles_beta) { + auto d_state = expand_bitset(d_beta) << (N / 2); + d_state = d_state | state_alpha_expand; + doubles.emplace_back(d_state); + } + + // Single Alpha + Single Beta + for(auto s_alpha : singles_alpha) + for(auto s_beta : singles_beta) { + auto d_state_alpha = expand_bitset(s_alpha); + auto d_state_beta = expand_bitset(s_beta) << (N / 2); + doubles.emplace_back(d_state_alpha | d_state_beta); + } +} +#if 0 +// TODO: Test this function +template +void generate_residues(std::bitset state, std::vector>& res) { + auto state_alpha = bitset_lo_word(state); + auto state_beta = bitset_hi_word(state); + + auto occ_alpha = bits_to_indices(state_alpha); + const int nalpha = occ_alpha.size(); + + auto occ_beta = bits_to_indices(state_beta); + const int nbeta = occ_beta.size(); + + std::bitset state_alpha_full = expand_bitset(state_alpha); + std::bitset state_beta_full = expand_bitset(state_beta); + state_beta_full = state_beta_full << (N / 2); + + std::bitset one = 1ul; + + // Double alpha + for(auto i = 0; i < nalpha; ++i) + for(auto j = i + 1; j < nalpha; ++j) { + auto mask = (one << occ_alpha[i]) | (one << occ_alpha[j]); + std::bitset _r = expand_bitset(state_alpha & ~mask); + res.emplace_back(_r | state_beta_full); + } + + // Double beta + for(auto i = 0; i < nbeta; ++i) + for(auto j = i + 1; j < nbeta; ++j) { + auto mask = (one << occ_beta[i]) | (one << occ_beta[j]); + std::bitset _r = expand_bitset(state_beta & ~mask) << (N / 2); + res.emplace_back(_r | state_alpha_full); + } + + // Mixed + for(auto i = 0; i < nalpha; ++i) + for(auto j = 0; j < nbeta; ++j) { + std::bitset mask = expand_bitset(one << occ_alpha[i]); + mask = mask | (expand_bitset(one << occ_beta[j]) << (N / 2)); + res.emplace_back(state & ~mask); + } +} +#endif +#endif diff --git a/include/macis/wavefunction_io.hpp b/include/macis/wavefunction_io.hpp index bc38f7ee..1483d864 100644 --- a/include/macis/wavefunction_io.hpp +++ b/include/macis/wavefunction_io.hpp @@ -67,7 +67,7 @@ void read_wavefunction(std::string fname, std::vector>& states, std::stringstream ss{line}; std::string c, d; ss >> c >> d; - states.emplace_back(from_canonical_string(d)); + states.emplace_back(from_canonical_string>(d)); coeffs.emplace_back(std::stod(c)); } } diff --git a/include/macis/wfn/raw_bitset.hpp b/include/macis/wfn/raw_bitset.hpp new file mode 100644 index 00000000..3bd5f4e0 --- /dev/null +++ b/include/macis/wfn/raw_bitset.hpp @@ -0,0 +1,144 @@ +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ + +#pragma once + +#include +#include + +namespace macis { + +enum class Spin { Alpha, Beta }; + +template +struct wavefunction_traits; + +template +using spin_wfn_t = typename wavefunction_traits::spin_wfn_type; + +template +struct wavefunction_traits> { + using wfn_type = std::bitset; + using spin_wfn_type = std::bitset; + using orbidx_type = uint32_t; + using orbidx_container = std::vector; + + inline static constexpr size_t bit_size = N; + + static constexpr auto size() { return bit_size; } + + static inline auto count(wfn_type state) { return state.count(); } + + static inline spin_wfn_type alpha_string(wfn_type state) { + return bitset_lo_word(state); + } + + static inline spin_wfn_type beta_string(wfn_type state) { + return bitset_hi_word(state); + } + + using wfn_comparator = bitset_less_comparator; + + struct spin_comparator { + using spin_wfn_comparator = bitset_less_comparator; + bool operator()(wfn_type x, wfn_type y) const { + auto s_comp = spin_wfn_comparator{}; + const auto x_a = alpha_string(x); + const auto y_a = alpha_string(y); + if(x_a == y_a) { + const auto x_b = beta_string(x); + const auto y_b = beta_string(y); + return s_comp(x_b, y_b); + } else + return s_comp(x_a, y_a); + } + }; + + template + static inline wfn_type from_spin(spin_wfn_type alpha, spin_wfn_type beta) { + if constexpr(Sigma == Spin::Alpha) { + auto alpha_expand = expand_bitset(alpha); + auto beta_expand = expand_bitset(beta) << N / 2; + return alpha_expand | beta_expand; + } else { + auto alpha_expand = expand_bitset(alpha) << N / 2; + auto beta_expand = expand_bitset(beta); + return alpha_expand | beta_expand; + } + } + + static inline wfn_type canonical_hf_determinant(uint32_t nalpha, + uint32_t nbeta) { + spin_wfn_type alpha = full_mask(nalpha); + spin_wfn_type beta = full_mask(nbeta); + return from_spin(alpha, beta); + } + + template + static inline wfn_type& flip_bits(wfn_type& state, Inds&&...); + + template + static inline wfn_type& flip_bits(wfn_type& state) { + return state; + } + + template + static inline wfn_type& flip_bits(wfn_type& state, unsigned p, + Inds&&... inds) { + return flip_bits(state.flip(p + (Sigma == Spin::Alpha ? 0 : N / 2)), + std::forward(inds)...); + } + + template + static inline wfn_type create_no_check(wfn_type state, unsigned p) { + flip_bits(state, p); + return state; + } + + template + static inline wfn_type single_excitation_no_check(wfn_type state, unsigned p, + unsigned q) { + flip_bits(state, p, q); + return state; + } + + template + static inline wfn_type double_excitation_no_check(wfn_type state, unsigned p, + unsigned q, unsigned r, + unsigned s) { + flip_bits(state, p, q, r, s); + return state; + } + + static inline void state_to_occ(wfn_type state, orbidx_container& occ) { + occ = bits_to_indices(state); + } + + static inline orbidx_container state_to_occ(wfn_type state) { + return bits_to_indices(state); + } + + static inline void state_to_occ_vir(size_t norb, wfn_type state, + orbidx_container& occ, + orbidx_container& vir) { + state_to_occ(state, occ); + const auto nocc = occ.size(); + assert(nocc < norb); + + const auto nvir = norb - nocc; + vir.resize(nvir); + state = ~state; + for(int i = 0; i < nvir; ++i) { + auto a = ffs(state) - 1; + vir[i] = a; + state.flip(a); + } + } +}; + +} // namespace macis diff --git a/src/macis/CMakeLists.txt b/src/macis/CMakeLists.txt index 26155ac6..25e9abfd 100644 --- a/src/macis/CMakeLists.txt +++ b/src/macis/CMakeLists.txt @@ -6,17 +6,31 @@ add_library( macis fcidump.cxx - fock_matrices.cxx transform.cxx - orbital_gradient.cxx - casscf.cxx moller_plesset.cxx - orbital_energies.cxx - orbital_rotation_utilities.cxx - orbital_hessian.cxx - orbital_steps.cxx + + mcscf/fock_matrices.cxx + mcscf/casscf.cxx + mcscf/orbital_gradient.cxx + mcscf/orbital_energies.cxx + mcscf/orbital_rotation_utilities.cxx + mcscf/orbital_hessian.cxx + mcscf/orbital_steps.cxx + + hamiltonian_generator/base.cxx ) +if(MACIS_ENABLE_TREXIO) + FetchContent_Declare( trexio + #GIT_REPOSITORY https://github.com/TREX-CoE/trexio + GIT_REPOSITORY https://github.com/wavefunction91/trexio + GIT_TAG cmake_fixup + ) + FetchContent_MakeAvailable(trexio) + target_link_libraries(macis PUBLIC trexio::trexio) + target_sources(macis PRIVATE trexio.cxx) +endif() + target_include_directories( macis PUBLIC $ $ diff --git a/src/macis/hamiltonian_generator/base.cxx b/src/macis/hamiltonian_generator/base.cxx new file mode 100644 index 00000000..dbfd61ac --- /dev/null +++ b/src/macis/hamiltonian_generator/base.cxx @@ -0,0 +1,17 @@ +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ + +#include "base.ipp" + +#include "fast_diagonals.ipp" + +namespace macis { + +template class HamiltonianGeneratorBase; + +} diff --git a/include/macis/hamiltonian_generator/rdms.hpp b/src/macis/hamiltonian_generator/base.ipp similarity index 57% rename from include/macis/hamiltonian_generator/rdms.hpp rename to src/macis/hamiltonian_generator/base.ipp index ba4cc0f9..a39ea749 100644 --- a/include/macis/hamiltonian_generator/rdms.hpp +++ b/src/macis/hamiltonian_generator/base.ipp @@ -7,7 +7,8 @@ */ #pragma once -#include +#include + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #include @@ -16,17 +17,77 @@ namespace macis { -template -void HamiltonianGenerator::rotate_hamiltonian_ordm(const double* ordm) { +template +HamiltonianGeneratorBase::HamiltonianGeneratorBase(matrix_span_t T, + rank4_span_t V) + : norb_(T.extent(0)), + norb2_(norb_ * norb_), + norb3_(norb2_ * norb_), + T_pq_(T), + V_pqrs_(V) { + generate_integral_intermediates_(V_pqrs_); +} + +template +void HamiltonianGeneratorBase::generate_integral_intermediates_(rank4_span_t V) { + if(V.extent(0) != norb_ or V.extent(1) != norb_ or V.extent(2) != norb_ or + V.extent(3) != norb_) + throw std::runtime_error("V has incorrect dimensions"); + + size_t no = norb_; + size_t no2 = no * no; + size_t no3 = no2 * no; + size_t no4 = no3 * no; + + // G(i,j,k,l) = V(i,j,k,l) - V(i,l,k,j) + G_pqrs_data_ = std::vector(begin(V), end(V)); + G_pqrs_ = rank4_span_t(G_pqrs_data_.data(), no, no, no, no); + for(auto i = 0ul; i < no; ++i) + for(auto j = 0ul; j < no; ++j) + for(auto k = 0ul; k < no; ++k) + for(auto l = 0ul; l < no; ++l) { + G_pqrs_(i, j, k, l) -= V(i, l, k, j); + } + + // G_red(i,j,k) = G(i,j,k,k) = G(k,k,i,j) + // V_red(i,j,k) = V(i,j,k,k) = V(k,k,i,j) + G_red_data_.resize(no3); + V_red_data_.resize(no3); + G_red_ = rank3_span_t(G_red_data_.data(), no, no, no); + V_red_ = rank3_span_t(V_red_data_.data(), no, no, no); + for(auto j = 0ul; j < no; ++j) + for(auto i = 0ul; i < no; ++i) + for(auto k = 0ul; k < no; ++k) { + G_red_(k, i, j) = G_pqrs_(k, k, i, j); + V_red_(k, i, j) = V(k, k, i, j); + } + + // G2_red(i,j) = 0.5 * G(i,i,j,j) + // V2_red(i,j) = V(i,i,j,j) + G2_red_data_.resize(no2); + V2_red_data_.resize(no2); + G2_red_ = matrix_span(G2_red_data_.data(), no, no); + V2_red_ = matrix_span(V2_red_data_.data(), no, no); + for(auto j = 0ul; j < no; ++j) + for(auto i = 0ul; i < no; ++i) { + G2_red_(i, j) = 0.5 * G_pqrs_(i, i, j, j); + V2_red_(i, j) = V(i, i, j, j); + } +} + + + +template +void HamiltonianGeneratorBase::rotate_hamiltonian_ordm(const Scalar* ordm) { // SVD on ordm to get natural orbitals - std::vector natural_orbitals(ordm, ordm + norb2_); - std::vector S(norb_); + std::vector natural_orbitals(ordm, ordm + norb2_); + std::vector S(norb_); lapack::gesvd(lapack::Job::OverwriteVec, lapack::Job::NoVec, norb_, norb_, natural_orbitals.data(), norb_, S.data(), NULL, 1, NULL, 1); #if 0 { - std::vector tmp(norb2_); + std::vector tmp(norb2_); blas::gemm(blas::Layout::ColMajor, blas::Op::Trans, blas::Op::NoTrans, norb_, norb_, norb_, 1., natural_orbitals.data(), norb_, natural_orbitals.data(), norb_, 0., tmp.data(), norb_ ); @@ -34,7 +95,7 @@ void HamiltonianGenerator::rotate_hamiltonian_ordm(const double* ordm) { std::cout << "MAX = " << *std::max_element(tmp.begin(),tmp.end(), []( auto x, auto y ){ return std::abs(x) < std::abs(y); } ) << std::endl; - double max_diff = 0.; + Scalar max_diff = 0.; for( auto i = 0; i < norb_; ++i ) for( auto j = i+1; j < norb_; ++j ) { max_diff = std::max( max_diff, @@ -44,7 +105,7 @@ void HamiltonianGenerator::rotate_hamiltonian_ordm(const double* ordm) { } #endif - std::vector tmp(norb3_ * norb_), tmp2(norb3_ * norb_); + std::vector tmp(norb3_ * norb_), tmp2(norb3_ * norb_); // Transform T // T <- N**H * T * N @@ -95,7 +156,8 @@ void HamiltonianGenerator::rotate_hamiltonian_ordm(const double* ordm) { natural_orbitals.data(), norb_, 0., V_pqrs_.data_handle(), norb3_); // Regenerate intermediates - generate_integral_intermediates(V_pqrs_); + generate_integral_intermediates_(V_pqrs_); } } // namespace macis + diff --git a/include/macis/hamiltonian_generator/fast_diagonals.hpp b/src/macis/hamiltonian_generator/fast_diagonals.ipp similarity index 86% rename from include/macis/hamiltonian_generator/fast_diagonals.hpp rename to src/macis/hamiltonian_generator/fast_diagonals.ipp index 52cab806..a76292b7 100644 --- a/include/macis/hamiltonian_generator/fast_diagonals.hpp +++ b/src/macis/hamiltonian_generator/fast_diagonals.ipp @@ -11,8 +11,8 @@ namespace macis { -template -double HamiltonianGenerator::single_orbital_en( +template +double HamiltonianGeneratorBase::single_orbital_en( uint32_t orb, const std::vector& ss_occ, const std::vector& os_occ) const { // One electron component @@ -28,8 +28,8 @@ double HamiltonianGenerator::single_orbital_en( return orb_en; } -template -std::vector HamiltonianGenerator::single_orbital_ens( +template +std::vector HamiltonianGeneratorBase::single_orbital_ens( size_t norb, const std::vector& ss_occ, const std::vector& os_occ) const { std::vector ens(norb); @@ -50,8 +50,8 @@ std::vector HamiltonianGenerator::single_orbital_ens( return ens; } -template -double HamiltonianGenerator::fast_diag_single( +template +double HamiltonianGeneratorBase::fast_diag_single( // These refer to original determinant double hol_en, double par_en, uint32_t orb_hol, uint32_t orb_par, double orig_det_Hii) const { @@ -59,8 +59,8 @@ double HamiltonianGenerator::fast_diag_single( G2_red_(orb_hol, orb_par); } -template -double HamiltonianGenerator::fast_diag_single( +template +double HamiltonianGeneratorBase::fast_diag_single( // These refer to original determinant const std::vector& ss_occ, const std::vector& os_occ, uint32_t orb_hol, uint32_t orb_par, double orig_det_Hii) const { @@ -69,8 +69,8 @@ double HamiltonianGenerator::fast_diag_single( return fast_diag_single(hol_en, par_en, orb_hol, orb_par, orig_det_Hii); } -template -double HamiltonianGenerator::fast_diag_ss_double( +template +double HamiltonianGeneratorBase::fast_diag_ss_double( // These refer to original determinant double hol1_en, double hol2_en, double par1_en, double par2_en, uint32_t orb_hol1, uint32_t orb_hol2, uint32_t orb_par1, uint32_t orb_par2, @@ -84,8 +84,8 @@ double HamiltonianGenerator::fast_diag_ss_double( G2_red_(orb_par2, orb_hol2) - G2_red_(orb_hol2, orb_par2); } -template -double HamiltonianGenerator::fast_diag_ss_double( +template +double HamiltonianGeneratorBase::fast_diag_ss_double( // These refer to original determinant const std::vector& ss_occ, const std::vector& os_occ, uint32_t orb_hol1, uint32_t orb_hol2, uint32_t orb_par1, uint32_t orb_par2, @@ -98,8 +98,8 @@ double HamiltonianGenerator::fast_diag_ss_double( orb_hol2, orb_par1, orb_par2, orig_det_Hii); } -template -double HamiltonianGenerator::fast_diag_os_double( +template +double HamiltonianGeneratorBase::fast_diag_os_double( // These refer to original determinant double en_holu, double en_hold, double en_paru, double en_pard, uint32_t orb_holu, uint32_t orb_hold, uint32_t orb_paru, uint32_t orb_pard, @@ -111,8 +111,8 @@ double HamiltonianGenerator::fast_diag_os_double( V2_red_(orb_paru, orb_hold) - V2_red_(orb_holu, orb_pard); } -template -double HamiltonianGenerator::fast_diag_os_double( +template +double HamiltonianGeneratorBase::fast_diag_os_double( // These refer to original determinant const std::vector& ss_occ, const std::vector& os_occ, uint32_t orb_holu, uint32_t orb_hold, uint32_t orb_paru, uint32_t orb_pard, diff --git a/src/macis/casscf.cxx b/src/macis/mcscf/casscf.cxx similarity index 86% rename from src/macis/casscf.cxx rename to src/macis/mcscf/casscf.cxx index d58ae4df..075f3777 100644 --- a/src/macis/casscf.cxx +++ b/src/macis/mcscf/casscf.cxx @@ -7,9 +7,9 @@ */ #include -#include -#include -#include +#include +#include +#include namespace macis { @@ -19,7 +19,7 @@ double casscf_diis(MCSCFSettings settings, NumElectron nalpha, size_t LDT, double* V, size_t LDV, double* A1RDM, size_t LDD1, double* A2RDM, size_t LDD2 MACIS_MPI_CODE(, MPI_Comm comm)) { - using generator_t = DoubleLoopHamiltonianGenerator<64>; + using generator_t = DoubleLoopHamiltonianGenerator>; using functor_t = CASRDMFunctor; functor_t op; return mcscf_impl(op, settings, nalpha, nbeta, norb, ninact, nact, diff --git a/src/macis/fock_matrices.cxx b/src/macis/mcscf/fock_matrices.cxx similarity index 99% rename from src/macis/fock_matrices.cxx rename to src/macis/mcscf/fock_matrices.cxx index a2708f14..40bd82b8 100644 --- a/src/macis/fock_matrices.cxx +++ b/src/macis/mcscf/fock_matrices.cxx @@ -7,7 +7,7 @@ */ #include -#include +#include #include namespace macis { diff --git a/src/macis/orbital_energies.cxx b/src/macis/mcscf/orbital_energies.cxx similarity index 95% rename from src/macis/orbital_energies.cxx rename to src/macis/mcscf/orbital_energies.cxx index 97395b59..ef34c632 100644 --- a/src/macis/orbital_energies.cxx +++ b/src/macis/mcscf/orbital_energies.cxx @@ -6,7 +6,7 @@ * See LICENSE.txt for details */ -#include +#include namespace macis { diff --git a/src/macis/orbital_gradient.cxx b/src/macis/mcscf/orbital_gradient.cxx similarity index 99% rename from src/macis/orbital_gradient.cxx rename to src/macis/mcscf/orbital_gradient.cxx index 918bfbae..3abedee9 100644 --- a/src/macis/orbital_gradient.cxx +++ b/src/macis/mcscf/orbital_gradient.cxx @@ -7,8 +7,8 @@ */ #include -#include -#include +#include +#include #include #include diff --git a/src/macis/orbital_hessian.cxx b/src/macis/mcscf/orbital_hessian.cxx similarity index 99% rename from src/macis/orbital_hessian.cxx rename to src/macis/mcscf/orbital_hessian.cxx index fcddd407..e5734bfa 100644 --- a/src/macis/orbital_hessian.cxx +++ b/src/macis/mcscf/orbital_hessian.cxx @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include namespace macis { diff --git a/src/macis/orbital_rotation_utilities.cxx b/src/macis/mcscf/orbital_rotation_utilities.cxx similarity index 98% rename from src/macis/orbital_rotation_utilities.cxx rename to src/macis/mcscf/orbital_rotation_utilities.cxx index 8912902b..8ffdfe35 100644 --- a/src/macis/orbital_rotation_utilities.cxx +++ b/src/macis/mcscf/orbital_rotation_utilities.cxx @@ -6,7 +6,7 @@ * See LICENSE.txt for details */ -#include +#include namespace macis { diff --git a/src/macis/orbital_steps.cxx b/src/macis/mcscf/orbital_steps.cxx similarity index 99% rename from src/macis/orbital_steps.cxx rename to src/macis/mcscf/orbital_steps.cxx index 95fd4f43..d50a4a5c 100644 --- a/src/macis/orbital_steps.cxx +++ b/src/macis/mcscf/orbital_steps.cxx @@ -6,8 +6,8 @@ * See LICENSE.txt for details */ -#include -#include +#include +#include namespace macis { diff --git a/src/macis/moller_plesset.cxx b/src/macis/moller_plesset.cxx index 5e00d85c..49982ed1 100644 --- a/src/macis/moller_plesset.cxx +++ b/src/macis/moller_plesset.cxx @@ -8,8 +8,8 @@ #include #include +#include #include -#include namespace macis { diff --git a/src/macis/trexio.cxx b/src/macis/trexio.cxx new file mode 100644 index 00000000..8b260c83 --- /dev/null +++ b/src/macis/trexio.cxx @@ -0,0 +1,167 @@ +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ + +#include +#include +#include +#include + +namespace macis { + +trexio_exception::trexio_exception(std::string func_name, trexio_exit_code rc) + : msg_("TREXIO Error: " + func_name + + "\n Error Message: " + std::string(trexio_string_of_error(rc))) {} + +const char* trexio_exception::what() const noexcept { return msg_.c_str(); } + +#define TREXIO_EXCEPTION(rc) throw trexio_exception(__PRETTY_FUNCTION__, rc) + +TREXIOFile::TREXIOFile(std::string name, char mode, int backend) { + trexio_exit_code rc; + file_handle_ = trexio_open(name.c_str(), mode, backend, &rc); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); +} + +TREXIOFile::~TREXIOFile() noexcept { + if(file_handle_) trexio_close(file_handle_); +} + +TREXIOFile::TREXIOFile(TREXIOFile&& other) noexcept { + this->file_handle_ = other.file_handle_; + other.file_handle_ = nullptr; +} + +int64_t TREXIOFile::read_mo_num() const { + int64_t nmo; + auto rc = trexio_read_mo_num_64(file_handle_, &nmo); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); + return nmo; +} + +int64_t TREXIOFile::read_mo_2e_int_eri_size() const { + int64_t neri; + auto rc = trexio_read_mo_2e_int_eri_size(file_handle_, &neri); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); + return neri; +} + +double TREXIOFile::read_nucleus_repulsion() const { + double E; + auto rc = trexio_read_nucleus_repulsion(file_handle_, &E); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); + return E; +} + +void TREXIOFile::read_mo_1e_int_core_hamiltonian(double* h) const { + auto rc = trexio_read_mo_1e_int_core_hamiltonian(file_handle_, h); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); +} + +void TREXIOFile::read_mo_2e_int_eri(double* V) const { + const auto norb = read_mo_num(); + const auto neri = read_mo_2e_int_eri_size(); + const size_t nbatch = 100000; + std::vector> idx_batch(nbatch); + std::vector V_batch(nbatch); + + size_t ioff = 0; + int64_t icount = nbatch; + while(icount == nbatch) { + if(ioff < neri) { + trexio_read_mo_2e_int_eri(file_handle_, ioff, &icount, + idx_batch[0].data(), V_batch.data()); + } else { + icount = 0; + } + + for(size_t ii = 0; ii < icount; ++ii) { + const auto [i, j, k, l] = idx_batch[ii]; + const auto v = V_batch[ii]; + + V[i + j * norb + k * norb * norb + l * norb * norb * norb] = v; + V[i + j * norb + l * norb * norb + k * norb * norb * norb] = v; + V[j + i * norb + k * norb * norb + l * norb * norb * norb] = v; + V[j + i * norb + l * norb * norb + k * norb * norb * norb] = v; + V[k + l * norb + i * norb * norb + j * norb * norb * norb] = v; + V[k + l * norb + j * norb * norb + i * norb * norb * norb] = v; + V[l + k * norb + i * norb * norb + j * norb * norb * norb] = v; + V[l + k * norb + j * norb * norb + i * norb * norb * norb] = v; + } + ioff += icount; + } +} + +int64_t TREXIOFile::read_determinant_num() const { + int64_t ndet; + auto rc = trexio_read_determinant_num_64(file_handle_, &ndet); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); + return ndet; +} + +int32_t TREXIOFile::get_determinant_int64_num() const { + int32_t n64; + auto rc = trexio_get_int64_num(file_handle_, &n64); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); + return n64; +} + +void TREXIOFile::read_determinant_list(int64_t ndet, int64_t* dets, + int64_t ioff) const { + int64_t icount = ndet; + auto rc = trexio_read_determinant_list(file_handle_, ioff, &icount, dets); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); +} + +void TREXIOFile::write_mo_num(int64_t nmo) { + auto rc = trexio_write_mo_num_64(file_handle_, nmo); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); +} + +void TREXIOFile::write_nucleus_repulsion(double E) { + auto rc = trexio_write_nucleus_repulsion(file_handle_, E); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); +} + +void TREXIOFile::write_mo_1e_int_core_hamiltonian(const double* h) { + auto rc = trexio_write_mo_1e_int_core_hamiltonian(file_handle_, h); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); +} + +void TREXIOFile::write_mo_2e_int_eri(const double* V) { + const auto norb = read_mo_num(); + const auto npair = (norb * (norb + 1)) / 2; + const auto nquad = (npair * (npair + 1)) / 2; + std::vector V_compress(nquad); + std::vector> idx(nquad); + + size_t ijkl = 0; + for(size_t i = 0, ij = 0; i < norb; ++i) + for(size_t j = 0; j <= i; ++j, ++ij) { + for(size_t k = 0, kl = 0; k < norb; ++k) + for(size_t l = 0; l <= k; ++l, ++kl) { + if(kl > ij) continue; + V_compress.at(ijkl) = + V[i + j * norb + k * norb * norb + l * norb * norb * norb]; + idx.at(ijkl) = {i, j, k, l}; + ijkl++; + } + } + + auto rc = trexio_write_mo_2e_int_eri(file_handle_, 0, nquad, idx[0].data(), + V_compress.data()); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); +} + +void TREXIOFile::write_determinant_list(int64_t ndet, const int64_t* dets, + int64_t ioff) { + int64_t icount = ndet; + auto rc = trexio_write_determinant_list(file_handle_, ioff, icount, dets); + if(rc != TREXIO_SUCCESS) TREXIO_EXCEPTION(rc); +} + +} // namespace macis diff --git a/src/sparsexx/CMakeLists.txt b/src/sparsexx/CMakeLists.txt index 935419db..e9019082 100644 --- a/src/sparsexx/CMakeLists.txt +++ b/src/sparsexx/CMakeLists.txt @@ -93,7 +93,7 @@ if( SPARSEXX_ENABLE_RANGES_V3 ) add_subdirectory(${range_v3_download_SOURCE_DIR} ${range_v3_download_BINARY_DIR}) add_library( ranges INTERFACE IMPORTED ) target_link_libraries( ranges INTERFACE range-v3 ) - target_compile_definitions( ranges INTERFACE "SPARSEXX_ENABLE_RANGES_V3=1" ) + #target_compile_definitions( ranges INTERFACE "SPARSEXX_ENABLE_RANGES_V3=1" ) endif() list(APPEND SPARSEXX_EXTERNAL_LIBRARIES ranges) endif() @@ -128,7 +128,7 @@ endif() if( TARGET OpenMP::OpenMP_CXX ) target_link_libraries( sparsexx PUBLIC OpenMP::OpenMP_CXX ) endif() -target_link_libraries ( sparsexx PRIVATE ${SPARSEXX_EXTERNAL_LIBRARIES} ) +target_link_libraries ( sparsexx PUBLIC ${SPARSEXX_EXTERNAL_LIBRARIES} ) target_include_directories( sparsexx PUBLIC $ $ diff --git a/src/sparsexx/include/sparsexx/matrix_types/conversions.hpp b/src/sparsexx/include/sparsexx/matrix_types/conversions.hpp index 563cdeac..073cffbf 100644 --- a/src/sparsexx/include/sparsexx/matrix_types/conversions.hpp +++ b/src/sparsexx/include/sparsexx/matrix_types/conversions.hpp @@ -74,15 +74,40 @@ csr_matrix::csr_matrix( const auto& colind_coo = other.colind(); const auto& nzval_coo = other.nzval(); - // Compute rowptr +// Compute rowptr +#if 0 rowptr_.at(0) = other.indexing(); auto cur_row = 0; for(size_type i = 0; i < nnz_; ++i) - if(rowind_coo[i] != (cur_row + indexing_)) { + while(rowind_coo[i] != (cur_row + indexing_)) { cur_row++; rowptr_.at(cur_row) = i + indexing_; } rowptr_.at(m_) = nnz_ + indexing_; +#else + if(indexing_) throw std::runtime_error("NONZERO INDEXING"); + for(size_type i = 0; i < nnz_; ++i) { + rowptr_[rowind_coo[i] - indexing_ + 1]++; + } + for(size_type i = 0; i < m_; ++i) { + rowptr_[i + 1] += rowptr_[i]; + } + if(indexing_) + for(size_type i = 0; i < m_ + 1; ++i) { + rowptr_[i] += indexing_; + } +#endif + + // for(size_type i = 0; i < m_; ++i) { + // auto row_st = rowptr_[i]; + // auto row_en = rowptr_[i+1]; + // for(size_type j = row_st; j < row_en; ++j) { + // if(rowind_coo[j] != i) throw std::runtime_error("ROWPTR WRONG"); + // } + // if(!std::is_sorted(colind_coo.begin() + row_st, colind_coo.begin() + + // row_en)) + // throw std::runtime_error("COLIND WRONG"); + // } std::copy(colind_coo.begin(), colind_coo.end(), colind_.begin()); std::copy(nzval_coo.begin(), nzval_coo.end(), nzval_.begin()); @@ -105,7 +130,7 @@ csc_matrix::csc_matrix( colptr_.at(0) = other.indexing(); auto cur_col = 0; for(size_t i = 0; i < nnz_; ++i) - if(colind_coo[i] != (cur_col + indexing_)) { + while(colind_coo[i] != (cur_col + indexing_)) { cur_col++; colptr_.at(cur_col) = i + indexing_; } diff --git a/src/sparsexx/include/sparsexx/matrix_types/coo_matrix.hpp b/src/sparsexx/include/sparsexx/matrix_types/coo_matrix.hpp index 8e47b9ee..e0342cf2 100644 --- a/src/sparsexx/include/sparsexx/matrix_types/coo_matrix.hpp +++ b/src/sparsexx/include/sparsexx/matrix_types/coo_matrix.hpp @@ -212,6 +212,15 @@ class coo_matrix { void expand_from_triangle(); + template + inline void insert(index_type i, index_type j, value_type v) noexcept { + static_assert(not Check, "insert check NYI"); + rowind_.emplace_back(i); + colind_.emplace_back(j); + nzval_.emplace_back(v); + nnz_++; + } + #ifdef SPARSEXX_ENABLE_CEREAL template void serialize(Archive& ar) { diff --git a/src/sparsexx/include/sparsexx/matrix_types/coo_matrix_ops.hpp b/src/sparsexx/include/sparsexx/matrix_types/coo_matrix_ops.hpp index c6a2c7f0..e99bd7c1 100644 --- a/src/sparsexx/include/sparsexx/matrix_types/coo_matrix_ops.hpp +++ b/src/sparsexx/include/sparsexx/matrix_types/coo_matrix_ops.hpp @@ -9,10 +9,11 @@ #pragma once #include +#include #include "coo_matrix.hpp" -#if SPARSEXX_ENABLE_RANGES_V3 +#ifdef SPARSEXX_ENABLE_RANGES_V3 #include #endif #include @@ -22,7 +23,7 @@ namespace sparsexx { template void coo_matrix::sort_by_row_index() { -#if SPARSEXX_ENABLE_RANGES_V3 +#ifdef SPARSEXX_ENABLE_RANGES_V3 auto coo_zip = ranges::views::zip(rowind_, colind_, nzval_); // Sort lex by row index @@ -72,9 +73,7 @@ void coo_matrix::sort_by_row_index() { template void coo_matrix::expand_from_triangle() { - std::cout << "Expanding Triangle" << std::endl; - -#if SPARSEXX_ENABLE_RANGES_V3 +#ifdef SPARSEXX_ENABLE_RANGES_V3 auto idx_zip = ranges::views::zip(rowind_, colind_); @@ -110,7 +109,7 @@ void coo_matrix::expand_from_triangle() { std::cout << "UT " << upper_triangle << std::endl; if(diagonal or full_matrix) return; - std::cout << "Performing Expansion..." << std::endl; + // std::cout << "Performing Expansion..." << std::endl; size_t new_nnz = 2 * nnz_ - n_; rowind_.reserve(new_nnz); colind_.reserve(new_nnz); @@ -132,7 +131,7 @@ void coo_matrix::expand_from_triangle() { template void coo_matrix::sort_by_col_index() { -#if SPARSEXX_ENABLE_RANGES_V3 +#ifdef SPARSEXX_ENABLE_RANGES_V3 auto coo_zip = ranges::views::zip(rowind_, colind_, nzval_); // Sort lex by row index diff --git a/src/sparsexx/include/sparsexx/spblas/pspmbv.hpp b/src/sparsexx/include/sparsexx/spblas/pspmbv.hpp index 811f4a69..e18f3ad3 100644 --- a/src/sparsexx/include/sparsexx/spblas/pspmbv.hpp +++ b/src/sparsexx/include/sparsexx/spblas/pspmbv.hpp @@ -50,6 +50,8 @@ struct spmv_info { int comm_size = recv_offsets.size(); for(int i = 0; i < comm_size; ++i) if(recv_counts[i]) { + if(recv_counts[i] > std::numeric_limits::max()) + throw "DIE IN RECV"; reqs.emplace_back( detail::mpi_irecv(X + recv_offsets[i], recv_counts[i], i, 0, comm)); } @@ -62,6 +64,8 @@ struct spmv_info { int comm_size = send_offsets.size(); for(int i = 0; i < comm_size; ++i) if(send_counts[i]) { + if(send_counts[i] > std::numeric_limits::max()) + throw "DIE IN SEND"; reqs.emplace_back( detail::mpi_isend(X + send_offsets[i], send_counts[i], i, 0, comm)); } diff --git a/src/sparsexx/tests/CMakeLists.txt b/src/sparsexx/tests/CMakeLists.txt index 52e12cdb..17ba4589 100644 --- a/src/sparsexx/tests/CMakeLists.txt +++ b/src/sparsexx/tests/CMakeLists.txt @@ -24,7 +24,7 @@ if( NOT Catch2_FOUND ) endif() -add_executable( sparsexx_test ut_main.cxx test_io.cxx test_util.cxx test_graph.cxx test_spmbv.cxx ) +add_executable( sparsexx_test ut_main.cxx test_types.cxx test_io.cxx test_util.cxx test_graph.cxx test_spmbv.cxx ) target_link_libraries( sparsexx_test PUBLIC sparsexx Catch2::Catch2 ) target_compile_definitions( sparsexx_test PUBLIC "SPARSEXX_DATA_DIR=\"${CMAKE_CURRENT_LIST_DIR}\"" diff --git a/src/sparsexx/tests/test_types.cxx b/src/sparsexx/tests/test_types.cxx new file mode 100644 index 00000000..09b18c03 --- /dev/null +++ b/src/sparsexx/tests/test_types.cxx @@ -0,0 +1,34 @@ +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ + +#include +#include + +#include "catch2/catch.hpp" + +TEST_CASE("COO Matrix", "[matrix_types]") { + using mat_type = sparsexx::coo_matrix; + + SECTION("Insert") { + size_t n = 6; + mat_type A(n, n, 0, 0); // empty matrix zero-indexing + for(int i = 0; i < n; ++i) { + A.insert(i, i, 2); + if(i < n - 1) A.insert(i, i + 1, 1); + if(i > 0) A.insert(i, i - 1, 1); + } + A.sort_by_row_index(); // Put into canonical order + REQUIRE(A.nnz() == 16); + REQUIRE(A.rowind() == std::vector{0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, + 4, 4, 4, 5, 5}); + REQUIRE(A.colind() == std::vector{0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, + 3, 4, 5, 4, 5}); + REQUIRE(A.nzval() == std::vector{2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, + 2, 1, 1, 2}); + } +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 038f9dfd..120c832f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -26,6 +26,8 @@ target_include_directories( macis_test PUBLIC ${PROJECT_BINARY_DIR}/tests ) add_executable( standalone_driver standalone_driver.cxx ini_input.cxx ) target_link_libraries( standalone_driver PUBLIC macis ) +add_executable( fcidump_to_trexio fcidump_to_trexio.cxx ) +target_link_libraries( fcidump_to_trexio PUBLIC macis ) set(REF_DATA_PREFIX "${PROJECT_SOURCE_DIR}/tests/ref_data") configure_file( ut_common.hpp.in ${PROJECT_BINARY_DIR}/tests/ut_common.hpp) diff --git a/tests/asci.cxx b/tests/asci.cxx index 5542db83..40e8c406 100644 --- a/tests/asci.cxx +++ b/tests/asci.cxx @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -44,11 +45,29 @@ size_t top_set_ordinal(std::bitset word, size_t NSet) { return ord; } +template +auto make_quad(unsigned i, unsigned j, unsigned k, unsigned l) { + using wfn_type = macis::wfn_t; + using wfn_traits = macis::wavefunction_traits; + using constraint_type = macis::alpha_constraint; + using string_type = typename constraint_type::constraint_type; + + string_type C = 0; + C.flip(i).flip(j).flip(k).flip(l); + string_type B = 1; + B <<= l; + B = B.to_ullong() - 1; + + return constraint_type(C, B, l); +} + TEST_CASE("Triplets") { constexpr size_t num_bits = 64; size_t norb = 32; using wfn_less_comparator = macis::bitset_less_comparator; + using wfn_type = macis::wfn_t; + using wfn_traits = macis::wavefunction_traits; // Generate ficticious wfns std::vector> wfn_a = { @@ -158,12 +177,10 @@ TEST_CASE("Triplets") { triplets.emplace_back(i, j, k); } - const auto overfill = macis::full_mask(norb); - std::vector new_triplet_hist(triplet_hist.size(), 0); for(auto [i, j, k] : triplets) { const auto label = i * 32 * 32 + j * 32 + k; - auto [T, B, T_min] = macis::make_triplet(i, j, k); + auto constraint = macis::make_triplet(i, j, k); for(auto det : wfn_a_uniq) { const size_t nocc = det.count(); @@ -172,7 +189,7 @@ TEST_CASE("Triplets") { const size_t n_doubles = (n_singles * (n_singles - nocc - nvir + 1)) / 4; new_triplet_hist[label] += macis::constraint_histogram( - det, n_singles, n_doubles, T, overfill, B); + wfn_traits::alpha_string(det), n_singles, n_doubles, constraint); } } @@ -190,7 +207,7 @@ TEST_CASE("Triplets") { for(auto [i, j, k, l] : quads) { const size_t label = i * 32ul * 32ul * 32ul + j * 32ul * 32ul + k * 32ul + l; - auto [Q, B, Q_min] = macis::make_quad(i, j, k, l); + auto constraint = make_quad(i, j, k, l); for(auto det : wfn_a_uniq) { const size_t nocc = det.count(); @@ -199,13 +216,149 @@ TEST_CASE("Triplets") { const size_t n_doubles = (n_singles * (n_singles - nocc - nvir + 1)) / 4; new_quad_hist[label] += macis::constraint_histogram( - det, n_singles, n_doubles, Q, overfill, B); + wfn_traits::alpha_string(det), n_singles, n_doubles, constraint); } } REQUIRE(quad_hist == new_quad_hist); } +TEST_CASE("Constraints") { + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + using spin_wfn_type = typename wfn_traits::spin_wfn_type; + using spin_wfn_traits = macis::wavefunction_traits; + using wfn_comparator = typename wfn_traits::spin_comparator; + using spin_wfn_comparator = typename spin_wfn_traits::wfn_comparator; + + using constraint_type = macis::alpha_constraint; + + const size_t norb = 10; + size_t nalpha, nbeta; + + SECTION("Closed Shell") { + nalpha = 6; + nbeta = 6; + } + + SECTION("Open Shell") { + nalpha = 6; + nbeta = 3; + } + + // Generate Hilbert Space + auto dets = macis::generate_hilbert_space(norb, nalpha, nbeta); + std::sort(dets.begin(), dets.end(), wfn_comparator{}); // lex sort + + // Get Alpha Compression Data + std::vector> unique_alpha; + unique_alpha.push_back({wfn_traits::alpha_string(dets[0]), 1}); + for(size_t i = 1; i < dets.size(); ++i) { + auto& [cur_alpha, cur_count] = unique_alpha.back(); + auto alpha_i = wfn_traits::alpha_string(dets[i]); + if(alpha_i == cur_alpha) { + cur_count++; + } else { + unique_alpha.push_back({alpha_i, 1}); + } + } + + // Get offsets + std::vector unique_alpha_offsets(unique_alpha.size()); + std::transform_exclusive_scan( + unique_alpha.begin(), unique_alpha.end(), unique_alpha_offsets.begin(), + 0ul, std::plus(), [](auto p) { return p.second; }); + + // size_t ntot = std::accumulate( unique_alpha.begin(), unique_alpha.end(), + // 0ul, [](auto s, auto a){ return s + a.second; } ); + // REQUIRE(ntot == dets.size()); + + // Generate constraints + std::vector triplets, quads; + for(int i = 0; i < norb; ++i) + for(int j = 0; j < i; ++j) + for(int k = 0; k < j; ++k) { + triplets.emplace_back(macis::make_triplet<64>(i, j, k)); + for(int l = 0; l < k; ++l) { + quads.emplace_back(make_quad<64>(i, j, k, l)); + } + } + + // Check doubles + auto check_doubles = [](auto det, auto C, auto& doubles) { + // Sanity check + auto [_O, _V] = macis::generate_constraint_double_excitations(det, C); + REQUIRE(std::all_of(_O.begin(), _O.end(), + [](auto e) { return spin_wfn_traits::count(e) == 2; })); + REQUIRE(std::all_of(_V.begin(), _V.end(), + [](auto e) { return spin_wfn_traits::count(e) == 2; })); + + // Extract all double excitations that satisfy the constraint + std::vector doubles_c; + std::copy_if(doubles.begin(), doubles.end(), std::back_inserter(doubles_c), + [=](auto d) { return C.satisfies_constraint(d); }); + + // Generate all double excitations that satisfy the constraint + std::vector doubles_g; + macis::generate_constraint_doubles(det, C, doubles_g); + + // Compare and check counting + std::sort(doubles_c.begin(), doubles_c.end(), spin_wfn_comparator{}); + std::sort(doubles_g.begin(), doubles_g.end(), spin_wfn_comparator{}); + REQUIRE(doubles_c == doubles_g); + REQUIRE(doubles_g.size() == macis::count_constraint_doubles(det, C)); + }; + + // Check singles + auto check_singles = [](auto det, auto C, auto& singles) { + // Extract all double excitations that satisfy the constraint + std::vector singles_c; + std::copy_if(singles.begin(), singles.end(), std::back_inserter(singles_c), + [=](auto d) { return C.satisfies_constraint(d); }); + + // Generate all double excitations that satisfy the constraint + std::vector singles_g; + macis::generate_constraint_singles(det, C, singles_g); + + // Compare and check counting + std::sort(singles_c.begin(), singles_c.end(), spin_wfn_comparator{}); + std::sort(singles_g.begin(), singles_g.end(), spin_wfn_comparator{}); + REQUIRE(singles_c == singles_g); + REQUIRE(singles_g.size() == macis::count_constraint_singles(det, C)); + }; + + for(auto i = 0; i < unique_alpha.size(); ++i) { + auto det = unique_alpha[i].first; + + auto det_triplet = top_set_indices<3>(det); + auto det_quad = top_set_indices<4>(det); + + std::vector singles, doubles; + macis::generate_singles(norb, det, singles); + macis::generate_doubles(norb, det, doubles); + + for(auto C : triplets) { + // Check validity of constraint check + auto inds = top_set_indices<3>(C.C()); + REQUIRE(C.C_min() == inds.back()); + REQUIRE(C.satisfies_constraint(det) == (inds == det_triplet)); + + check_singles(det, C, singles); + check_doubles(det, C, doubles); + } + + for(auto C : quads) { + // Check validity of constraint check + auto inds = top_set_indices<4>(C.C()); + REQUIRE(C.C_min() == inds.back()); + REQUIRE(C.satisfies_constraint(det) == (inds == det_quad)); + + check_singles(det, C, singles); + check_doubles(det, C, doubles); + } + } +} + TEST_CASE("ASCI") { MACIS_MPI_CODE(MPI_Barrier(MPI_COMM_WORLD);) using macis::NumActive; @@ -231,7 +384,9 @@ TEST_CASE("ASCI") { macis::read_fcidump_2body(water_ccpvdz_fcidump, V.data(), norb); // Hamiltonian Genereator - using generator_t = macis::DoubleLoopHamiltonianGenerator<64>; + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + using generator_t = macis::DoubleLoopHamiltonianGenerator; generator_t ham_gen( macis::matrix_span(T.data(), norb, norb), macis::rank4_span(V.data(), norb, norb, norb, norb)); @@ -242,8 +397,8 @@ TEST_CASE("ASCI") { macis::MCSCFSettings mcscf_settings; // HF guess - std::vector> dets = { - macis::canonical_hf_determinant<64>(nalpha, nbeta)}; + std::vector dets = { + wfn_traits::canonical_hf_determinant(nalpha, nbeta)}; ; std::vector C = {1.0}; double E0 = ham_gen.matrix_element(dets[0], dets[0]); @@ -254,7 +409,8 @@ TEST_CASE("ASCI") { asci_settings, mcscf_settings, E0, std::move(dets), std::move(C), ham_gen, norb MACIS_MPI_CODE(, MPI_COMM_WORLD)); - REQUIRE(E0 == Approx(-8.542926243842e+01)); + // std::cout << E0 - -8.542926243842e+01 << std::endl; + REQUIRE(std::abs(E0 - -8.542926243842e+01) < 1e-11); REQUIRE(dets.size() == 10000); REQUIRE(C.size() == 10000); REQUIRE(std::inner_product(C.begin(), C.end(), C.begin(), 0.0) == @@ -265,12 +421,22 @@ TEST_CASE("ASCI") { asci_settings, mcscf_settings, E0, std::move(dets), std::move(C), ham_gen, norb MACIS_MPI_CODE(, MPI_COMM_WORLD)); - REQUIRE(E0 == Approx(-8.542925964708e+01)); + REQUIRE(std::abs(E0 - -8.542925964708e+01) < 1e-11); REQUIRE(dets.size() == 10000); REQUIRE(C.size() == 10000); REQUIRE(std::inner_product(C.begin(), C.end(), C.begin(), 0.0) == Approx(1.0)); + // ASCI-PT2 + auto EPT2 = macis::asci_pt2_constraint( + dets.begin(), dets.end(), E0, C, norb, ham_gen.T(), ham_gen.G_red(), + ham_gen.V_red(), ham_gen.G(), ham_gen.V(), + ham_gen MACIS_MPI_CODE(, MPI_COMM_WORLD)); + + // std::cout << std::scientific << std::setprecision(12); + // std::cout << EPT2 << std::endl; + REQUIRE(std::abs(EPT2 - -5.701585096318e-03) < 1e-10); + MACIS_MPI_CODE(MPI_Barrier(MPI_COMM_WORLD);) spdlog::drop_all(); } diff --git a/tests/bitset_operations.cxx b/tests/bitset_operations.cxx index e69516a8..2fd4673e 100644 --- a/tests/bitset_operations.cxx +++ b/tests/bitset_operations.cxx @@ -11,6 +11,9 @@ #include "ut_common.hpp" +template +using bs = std::bitset; + template void uint128_conversion_test() { using ref_type = macis::uint128_t; @@ -37,7 +40,134 @@ void uint128_conversion_test() { } template -using bs = std::bitset; +void mask_test() { + bs full = ~bs(0); + bs full_128 = N > 128 ? full >> (N - 128) : 0; + bs full_64 = N > 64 ? full >> (N - 64) : 0; + bs full_32 = N > 32 ? full >> (N - 32) : 0; + + REQUIRE(full == macis::full_mask()); + if constexpr(N > 128) REQUIRE(full_128 == macis::full_mask<128, N>()); + if constexpr(N > 64) REQUIRE(full_64 == macis::full_mask<64, N>()); + if constexpr(N > 32) REQUIRE(full_32 == macis::full_mask<32, N>()); + + REQUIRE(full == macis::full_mask(N)); + if constexpr(N > 128) REQUIRE(full_128 == macis::full_mask(128)); + if constexpr(N > 64) REQUIRE(full_64 == macis::full_mask(64)); + if constexpr(N > 32) REQUIRE(full_32 == macis::full_mask(32)); +} + +template +void ffs_test() { + REQUIRE(macis::ffs(bs()) == 0); + REQUIRE(macis::ffs(bs(1)) == 1); + + bs a(0xABC); + REQUIRE(macis::ffs(a) == 3); + if(N > 32) REQUIRE(macis::ffs(a << 32) == 3 + 32); + if(N > 64) REQUIRE(macis::ffs(a << 64) == 3 + 64); + if(N > 128) REQUIRE(macis::ffs(a << 128) == 3 + 128); +} + +template +void fls_test() { + REQUIRE(macis::fls(bs()) == UINT32_MAX); + REQUIRE(macis::fls(bs(1)) == 0); + + bs a(0xDEADBEEF); + REQUIRE(macis::fls(a) == 31); + if(N > 32) REQUIRE(macis::fls(a << 32) == 31 + 32); + if(N > 64) REQUIRE(macis::fls(a << 64) == 31 + 64); + if(N > 128) REQUIRE(macis::fls(a << 128) == 31 + 128); +} + +template +void indices_test() { + std::vector indices = {5}; + if(N > 32) indices.emplace_back(6 + 32); + if(N > 64) indices.emplace_back(7 + 64); + if(N > 128) indices.emplace_back(8 + 128); + + bs one(1), a(0); + for(auto i : indices) a |= one << i; + REQUIRE(indices == macis::bits_to_indices(a)); +} + +template +void lo_word_test() { + bs a(0); + bs b(0); + size_t n_32_words = N / 32; + for(int i = 0; i < n_32_words; ++i) { + a |= bs(0xDEADBEEF + i) << (i * 32); + } + for(int i = 0; i < n_32_words / 2; ++i) { + b |= bs(0xDEADBEEF + i) << (i * 32); + } + + REQUIRE(macis::bitset_lo_word(a) == b); + + bs c(0); + macis::set_bitset_lo_word(c, b); + REQUIRE((a << N / 2) >> N / 2 == c); +} + +template +void hi_word_test() { + bs a(0); + bs b(0); + size_t n_32_words = N / 32; + for(int i = 0; i < n_32_words; ++i) { + a |= bs(0xDEADBEEF + i) << (i * 32); + } + for(int i = 0; i < n_32_words / 2; ++i) { + b |= bs(0xDEADBEEF + (i + n_32_words / 2)) << (i * 32); + } + + REQUIRE(macis::bitset_hi_word(a) == b); + + bs c(0); + macis::set_bitset_hi_word(c, b); + REQUIRE((a >> N / 2) << N / 2 == c); +} + +template +void bitset_less_test() { + bs a(65), b(42); + REQUIRE(macis::bitset_less(b, a)); + REQUIRE_FALSE(macis::bitset_less(a, a)); + REQUIRE_FALSE(macis::bitset_less(a, b)); + + if(N > 32) { + auto aa = a << 32; + auto bb = b << 32; + REQUIRE(macis::bitset_less(bb, aa)); + REQUIRE_FALSE(macis::bitset_less(aa, aa)); + REQUIRE_FALSE(macis::bitset_less(aa, bb)); + REQUIRE_FALSE(macis::bitset_less(aa, b)); + REQUIRE_FALSE(macis::bitset_less(aa, a)); + } + + if(N > 64) { + auto aa = a << 64; + auto bb = b << 64; + REQUIRE(macis::bitset_less(bb, aa)); + REQUIRE_FALSE(macis::bitset_less(aa, aa)); + REQUIRE_FALSE(macis::bitset_less(aa, bb)); + REQUIRE_FALSE(macis::bitset_less(aa, b)); + REQUIRE_FALSE(macis::bitset_less(aa, a)); + } + + if(N > 128) { + auto aa = a << 128; + auto bb = b << 128; + REQUIRE(macis::bitset_less(bb, aa)); + REQUIRE_FALSE(macis::bitset_less(aa, aa)); + REQUIRE_FALSE(macis::bitset_less(aa, bb)); + REQUIRE_FALSE(macis::bitset_less(aa, b)); + REQUIRE_FALSE(macis::bitset_less(aa, a)); + } +} TEST_CASE("Bitset Operations") { ROOT_ONLY(MPI_COMM_WORLD); @@ -54,63 +184,34 @@ TEST_CASE("Bitset Operations") { } SECTION("Full Mask") { - bs<128> full_128 = ~bs<128>(0); - bs<128> full_64 = bs<128>(UINT64_MAX); - bs<128> full_32 = bs<128>(UINT32_MAX); - - SECTION("Static") { - REQUIRE(full_128 == macis::full_mask<128>()); - REQUIRE(full_64 == macis::full_mask<64, 128>()); - REQUIRE(full_32 == macis::full_mask<32, 128>()); - } - - SECTION("Dynamic") { - REQUIRE(full_128 == macis::full_mask<128>(128)); - REQUIRE(full_64 == macis::full_mask<128>(64)); - REQUIRE(full_32 == macis::full_mask<128>(32)); - } + SECTION("32") { mask_test<32>(); } + SECTION("64") { mask_test<64>(); } + SECTION("128") { mask_test<128>(); } + SECTION("256") { mask_test<256>(); } } SECTION("FFS") { - SECTION("Zero") { REQUIRE(macis::ffs(bs<64>()) == 0); } - SECTION("One") { REQUIRE(macis::ffs(bs<64>(1)) == 1); } - SECTION("Arbitrary") { - bs<64> a(0x0A0A); - REQUIRE(macis::ffs(a) == 2); - bs<32> b(0x0A0A); - REQUIRE(macis::ffs(b) == 2); - bs<128> c(0xABC); - REQUIRE(macis::ffs(c) == 3); - REQUIRE(macis::ffs(c << 64) == 3 + 64); - bs<256> d(0xABC); - REQUIRE(macis::ffs(d) == 3); - REQUIRE(macis::ffs(d << 64) == 3 + 64); - REQUIRE(macis::ffs(d << 128) == 3 + 128); - } + SECTION("32") { ffs_test<32>(); } + SECTION("64") { ffs_test<64>(); } + SECTION("128") { ffs_test<128>(); } + SECTION("256") { ffs_test<256>(); } } SECTION("FLS") { - bs<64> a(0x0A0A); - REQUIRE(macis::fls(a) == 12 - 1); - bs<32> b(0x0A0A); - REQUIRE(macis::fls(b) == 12 - 1); - bs<128> c(0xDEADBEEF); - REQUIRE(macis::fls(c) == 31); - REQUIRE(macis::fls(c << 64) == 31 + 64); - bs<256> d(0xDEADBEEF); - REQUIRE(macis::fls(d) == 31); - REQUIRE(macis::fls(d << 64) == 31 + 64); - REQUIRE(macis::fls(d << 128) == 31 + 128); + SECTION("32") { fls_test<32>(); } + SECTION("64") { fls_test<64>(); } + SECTION("128") { fls_test<128>(); } + SECTION("256") { fls_test<256>(); } } SECTION("Indices") { - bs<128> one(1); - bs<128> a = (one << 4) | (one << 67) | (one << 118) | (one << 31); - auto ind = macis::bits_to_indices(a); - std::vector ref = {4, 31, 67, 118}; - REQUIRE(ind == ref); + SECTION("32") { indices_test<32>(); } + SECTION("64") { indices_test<64>(); } + SECTION("128") { indices_test<128>(); } + SECTION("256") { indices_test<256>(); } } +#if 0 SECTION("Truncate") { bs<64> a_64(0xCCCCCCCCDEADDEAD); bs<32> ref(0xDEADDEAD); @@ -122,31 +223,28 @@ TEST_CASE("Bitset Operations") { bs<64> ref(0x00000000DEADDEAD); REQUIRE(macis::expand_bitset<64>(a_32) == ref); } +#endif + + SECTION("LO WORDS") { + SECTION("64") { lo_word_test<64>(); } + SECTION("128") { lo_word_test<128>(); } + SECTION("256") { lo_word_test<256>(); } + SECTION("512") { lo_word_test<512>(); } + SECTION("1024") { lo_word_test<1024>(); } + } + + SECTION("HI WORDS") { + SECTION("64") { hi_word_test<64>(); } + SECTION("128") { hi_word_test<128>(); } + SECTION("256") { hi_word_test<256>(); } + SECTION("512") { hi_word_test<512>(); } + SECTION("1024") { hi_word_test<1024>(); } + } SECTION("Compare") { - SECTION("32 bit") { - bs<32> a(65), b(42); - REQUIRE(macis::bitset_less(b, a)); - REQUIRE_FALSE(macis::bitset_less(a, b)); - } - SECTION("64 bit") { - bs<64> a(65ull << 32), b(42ull << 32); - REQUIRE(macis::bitset_less(b, a)); - REQUIRE_FALSE(macis::bitset_less(a, b)); - } - SECTION("128 bit") { - bs<128> a(65ull), b(42ull); - a = a << 64; - b = b << 64; - REQUIRE(macis::bitset_less(b, a)); - REQUIRE_FALSE(macis::bitset_less(a, b)); - } - SECTION("256 bit") { - bs<256> a(65ull), b(42ull); - a = a << 128; - b = b << 128; - REQUIRE(macis::bitset_less(b, a)); - REQUIRE_FALSE(macis::bitset_less(a, b)); - } + SECTION("32") { bitset_less_test<32>(); } + SECTION("64") { bitset_less_test<64>(); } + SECTION("128") { bitset_less_test<128>(); } + SECTION("256") { bitset_less_test<256>(); } } } diff --git a/tests/csr_hamiltonian.cxx b/tests/csr_hamiltonian.cxx index ba3e3a87..5be3cbed 100644 --- a/tests/csr_hamiltonian.cxx +++ b/tests/csr_hamiltonian.cxx @@ -11,11 +11,15 @@ #include #include #include +#include #include #include "ut_common.hpp" -TEST_CASE("CSR Hamiltonian") { +using wfn_type = macis::wfn_t<64>; +TEMPLATE_TEST_CASE("CSR Hamiltonian", "[ham_gen]", + macis::DoubleLoopHamiltonianGenerator, + macis::SortedDoubleLoopHamiltonianGenerator) { ROOT_ONLY(MPI_COMM_WORLD); size_t norb = macis::read_fcidump_norb(water_ccpvdz_fcidump); @@ -27,7 +31,8 @@ TEST_CASE("CSR Hamiltonian") { macis::read_fcidump_1body(water_ccpvdz_fcidump, T.data(), norb); macis::read_fcidump_2body(water_ccpvdz_fcidump, V.data(), norb); - using generator_type = macis::DoubleLoopHamiltonianGenerator<64>; + using wfn_traits = macis::wavefunction_traits; + using generator_type = TestType; #if 0 generator_type ham_gen(norb, V.data(), T.data()); @@ -38,12 +43,36 @@ TEST_CASE("CSR Hamiltonian") { #endif // Generate configuration space - const auto hf_det = macis::canonical_hf_determinant<64>(nocc, nocc); + const auto hf_det = wfn_traits::canonical_hf_determinant(nocc, nocc); auto dets = macis::generate_cisd_hilbert_space(norb, hf_det); + std::sort(dets.begin(), dets.end(), wfn_traits::spin_comparator{}); // Generate CSR Hamiltonian + auto st = std::chrono::high_resolution_clock::now(); auto H = macis::make_csr_hamiltonian_block( dets.begin(), dets.end(), dets.begin(), dets.end(), ham_gen, 1e-16); + auto en = std::chrono::high_resolution_clock::now(); + std::cout << std::chrono::duration(en - st).count() + << std::endl; + +// #define GENERATE_TESTS +#ifdef GENERATE_TESTS + std::string tmp_rowptr_fname = "tmp_rowptr.bin"; + std::string tmp_colind_fname = "tmp_colind.bin"; + std::string tmp_nzval_fname = "tmp_nzval.bin"; + + std::ofstream rowptr_file(tmp_rowptr_fname, std::ios::binary); + rowptr_file.write((char*)H.rowptr().data(), + H.rowptr().size() * sizeof(int32_t)); + std::ofstream colind_file(tmp_colind_fname, std::ios::binary); + colind_file.write((char*)H.colind().data(), + H.colind().size() * sizeof(int32_t)); + std::ofstream nzval_file(tmp_nzval_fname, std::ios::binary); + nzval_file.write((char*)H.nzval().data(), H.nzval().size() * sizeof(double)); +#else + + // std::cout << "NEW H " << H.m() << " " << H.nnz() << " " << H.indexing() << + // std::endl; // Read reference data std::vector ref_rowptr(H.rowptr().size()), @@ -68,10 +97,13 @@ TEST_CASE("CSR Hamiltonian") { for(auto i = 0ul; i < nnz; ++i) { REQUIRE(H.nzval()[i] == Approx(ref_nzval[i])); } +#endif } #ifdef MACIS_ENABLE_MPI -TEST_CASE("Distributed CSR Hamiltonian") { +TEMPLATE_TEST_CASE("Distributed CSR Hamiltonian", "[ham_gen]", + macis::DoubleLoopHamiltonianGenerator, + macis::SortedDoubleLoopHamiltonianGenerator) { MPI_Barrier(MPI_COMM_WORLD); size_t norb = macis::read_fcidump_norb(water_ccpvdz_fcidump); size_t nocc = 5; @@ -82,7 +114,8 @@ TEST_CASE("Distributed CSR Hamiltonian") { macis::read_fcidump_1body(water_ccpvdz_fcidump, T.data(), norb); macis::read_fcidump_2body(water_ccpvdz_fcidump, V.data(), norb); - using generator_type = macis::DoubleLoopHamiltonianGenerator<64>; + using wfn_traits = macis::wavefunction_traits; + using generator_type = TestType; #if 0 generator_type ham_gen(norb, V.data(), T.data()); @@ -93,8 +126,9 @@ TEST_CASE("Distributed CSR Hamiltonian") { #endif // Generate configuration space - const auto hf_det = macis::canonical_hf_determinant<64>(nocc, nocc); + const auto hf_det = wfn_traits::canonical_hf_determinant(nocc, nocc); auto dets = macis::generate_cisd_hilbert_space(norb, hf_det); + std::sort(dets.begin(), dets.end(), wfn_traits::spin_comparator{}); // Generate Distributed CSR Hamiltonian auto H_dist = macis::make_dist_csr_hamiltonian( diff --git a/tests/davidson.cxx b/tests/davidson.cxx index 64a9394e..db27e85b 100644 --- a/tests/davidson.cxx +++ b/tests/davidson.cxx @@ -32,7 +32,9 @@ TEST_CASE("Davidson") { macis::read_fcidump_1body(water_ccpvdz_fcidump, T.data(), norb); macis::read_fcidump_2body(water_ccpvdz_fcidump, V.data(), norb); - using generator_type = macis::DoubleLoopHamiltonianGenerator<64>; + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + using generator_type = macis::DoubleLoopHamiltonianGenerator; #if 0 generator_type ham_gen(norb, V.data(), T.data()); @@ -43,7 +45,7 @@ TEST_CASE("Davidson") { #endif // Generate configuration space - const auto hf_det = macis::canonical_hf_determinant<64>(nocc, nocc); + const auto hf_det = wfn_traits::canonical_hf_determinant(nocc, nocc); auto dets = macis::generate_cisd_hilbert_space(norb, hf_det); auto E0_ref = -7.623197835987e+01; @@ -85,7 +87,9 @@ TEST_CASE("Parallel Davidson") { macis::read_fcidump_1body(water_ccpvdz_fcidump, T.data(), norb); macis::read_fcidump_2body(water_ccpvdz_fcidump, V.data(), norb); - using generator_type = macis::DoubleLoopHamiltonianGenerator<64>; + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + using generator_type = macis::DoubleLoopHamiltonianGenerator; #if 0 generator_type ham_gen(norb, V.data(), T.data()); @@ -96,7 +100,7 @@ TEST_CASE("Parallel Davidson") { #endif // Generate configuration space - const auto hf_det = macis::canonical_hf_determinant<64>(nocc, nocc); + const auto hf_det = wfn_traits::canonical_hf_determinant(nocc, nocc); auto dets = macis::generate_cisd_hilbert_space(norb, hf_det); auto E0_ref = -7.623197835987e+01; diff --git a/tests/double_loop.cxx b/tests/double_loop.cxx index 5f326b3b..f1e4b602 100644 --- a/tests/double_loop.cxx +++ b/tests/double_loop.cxx @@ -6,6 +6,7 @@ * See LICENSE.txt for details */ +#include #include #include #include @@ -28,7 +29,9 @@ TEST_CASE("Double Loop") { macis::read_fcidump_1body(water_ccpvdz_fcidump, T.data(), norb); macis::read_fcidump_2body(water_ccpvdz_fcidump, V.data(), norb); - using generator_type = macis::DoubleLoopHamiltonianGenerator<64>; + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + using generator_type = macis::DoubleLoopHamiltonianGenerator; #if 0 generator_type ham_gen(norb, V.data(), T.data()); @@ -37,7 +40,7 @@ TEST_CASE("Double Loop") { macis::matrix_span(T.data(), norb, norb), macis::rank4_span(V.data(), norb, norb, norb, norb)); #endif - const auto hf_det = macis::canonical_hf_determinant<64>(nocc, nocc); + const auto hf_det = wfn_traits::canonical_hf_determinant(nocc, nocc); std::vector eps(norb); for(auto p = 0ul; p < norb; ++p) { @@ -91,7 +94,7 @@ TEST_CASE("Double Loop") { for(size_t i = 0; i < nocc; ++i) for(size_t a = nocc; a < norb; ++a) { // Generate excited determinant - std::bitset<64> state = hf_det; + wfn_type state = hf_det; state.flip(i).flip(a); auto el_1 = ham_gen.matrix_element(hf_det, state); auto el_2 = ham_gen.matrix_element(state, hf_det); @@ -103,7 +106,7 @@ TEST_CASE("Double Loop") { for(size_t i = 0; i < nocc; ++i) for(size_t a = nocc; a < norb; ++a) { // Generate excited determinant - std::bitset<64> state = hf_det; + wfn_type state = hf_det; state.flip(i + 32).flip(a + 32); auto el_1 = ham_gen.matrix_element(hf_det, state); auto el_2 = ham_gen.matrix_element(state, hf_det); @@ -161,8 +164,8 @@ TEST_CASE("Double Loop") { SECTION("RDM") { std::vector ordm(norb * norb, 0.0), trdm(norb3 * norb, 0.0); - std::vector> dets = { - macis::canonical_hf_determinant<64>(nocc, nocc)}; + std::vector dets = { + wfn_traits::canonical_hf_determinant(nocc, nocc)}; std::vector C = {1.}; @@ -194,14 +197,16 @@ TEST_CASE("RDMS") { macis::rank4_span V_span(V.data(), norb, norb, norb, norb); macis::rank4_span trdm_span(trdm.data(), norb, norb, norb, norb); - using generator_type = macis::DoubleLoopHamiltonianGenerator<128>; + using wfn_type = macis::wfn_t<128>; + using wfn_traits = macis::wavefunction_traits; + using generator_type = macis::DoubleLoopHamiltonianGenerator; generator_type ham_gen(T_span, V_span); auto abs_sum = [](auto a, auto b) { return a + std::abs(b); }; SECTION("HF") { - std::vector> dets = { - macis::canonical_hf_determinant<128>(nocc, nocc)}; + std::vector dets = { + wfn_traits::canonical_hf_determinant(nocc, nocc)}; std::vector C = {1.}; @@ -224,7 +229,7 @@ TEST_CASE("RDMS") { } SECTION("CI") { - std::vector> states; + std::vector states; std::vector coeffs; macis::read_wavefunction<128>(ch4_wfn_fname, states, coeffs); diff --git a/tests/fcidump.cxx b/tests/fcidump.cxx index 7f39c11c..5540564c 100644 --- a/tests/fcidump.cxx +++ b/tests/fcidump.cxx @@ -8,8 +8,8 @@ #include #include +#include #include -#include #include "ut_common.hpp" diff --git a/tests/fcidump_to_trexio.cxx b/tests/fcidump_to_trexio.cxx new file mode 100644 index 00000000..98648678 --- /dev/null +++ b/tests/fcidump_to_trexio.cxx @@ -0,0 +1,76 @@ +/* + * MACIS Copyright (c) 2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ + +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + std::vector opts(argc); + for(int i = 0; i < argc; ++i) { + opts[i] = std::string(argv[i]); + } + + std::string fcidump_fname = opts.at(1); + std::string trexio_fname = opts.at(2); + + std::cout << "FCIDUMP FILE = " << fcidump_fname << std::endl; + std::cout << "TREXIO FILE = " << trexio_fname << std::endl; + + // Read the FCIDUMP file + std::cout << "Reading FCIDUMP File..." << std::flush; + size_t norb = macis::read_fcidump_norb(fcidump_fname); + size_t norb2 = norb * norb; + size_t norb3 = norb2 * norb; + size_t norb4 = norb2 * norb2; + + std::vector T(norb2), V(norb4); + auto E_core = macis::read_fcidump_core(fcidump_fname); + macis::read_fcidump_1body(fcidump_fname, T.data(), norb); + macis::read_fcidump_2body(fcidump_fname, V.data(), norb); + + std::cout << "Done!" << std::endl; + + // Write TREXIO file + std::cout << "Writing TREXIO file..." << std::flush; + { + macis::TREXIOFile trexio_file(trexio_fname, 'w', TREXIO_HDF5); + trexio_file.write_mo_num(norb); + trexio_file.write_nucleus_repulsion(E_core); + trexio_file.write_mo_1e_int_core_hamiltonian(T.data()); + trexio_file.write_mo_2e_int_eri(V.data()); + } + std::cout << "Done!" << std::endl; + + std::vector T_chk(norb2), V_chk(norb4); + size_t norb_chk; + double E_core_chk; + std::cout << "Reading TREXIO file..." << std::flush; + { + macis::TREXIOFile trexio_file(trexio_fname, 'r', TREXIO_AUTO); + norb_chk = trexio_file.read_mo_num(); + E_core_chk = trexio_file.read_nucleus_repulsion(); + trexio_file.read_mo_1e_int_core_hamiltonian(T_chk.data()); + trexio_file.read_mo_2e_int_eri(V_chk.data()); + } + std::cout << "Done!" << std::endl; + + std::cout << "NORB_CHECK = " << norb - norb_chk << std::endl; + std::cout << "ECOR_CHECK = " << std::abs(E_core - E_core_chk) << std::endl; + + double max_diff = 0; + for(auto i = 0; i < norb2; ++i) + max_diff = std::max(max_diff, std::abs(T[i] - T_chk[i])); + std::cout << "T_CHECK = " << max_diff << std::endl; + max_diff = 0; + for(auto i = 0; i < norb2; ++i) + max_diff = std::max(max_diff, std::abs(V[i] - V_chk[i])); + std::cout << "V_CHECK = " << max_diff << std::endl; +} diff --git a/tests/fock_matrices.cxx b/tests/fock_matrices.cxx index 75a84a05..f4acdffc 100644 --- a/tests/fock_matrices.cxx +++ b/tests/fock_matrices.cxx @@ -7,9 +7,9 @@ */ #include +#include #include #include -#include #include "ut_common.hpp" diff --git a/tests/mcscf.cxx b/tests/mcscf.cxx index 7bdd1b80..c928ff1f 100644 --- a/tests/mcscf.cxx +++ b/tests/mcscf.cxx @@ -10,9 +10,9 @@ #include #include +#include #include #include -#include #include "ut_common.hpp" diff --git a/tests/ref_data/h2o.ccpvdz.cisd.colind.bin b/tests/ref_data/h2o.ccpvdz.cisd.colind.bin index fee257c3..60de79f7 100644 Binary files a/tests/ref_data/h2o.ccpvdz.cisd.colind.bin and b/tests/ref_data/h2o.ccpvdz.cisd.colind.bin differ diff --git a/tests/ref_data/h2o.ccpvdz.cisd.nzval.bin b/tests/ref_data/h2o.ccpvdz.cisd.nzval.bin index 06c86f49..f73ac81c 100644 Binary files a/tests/ref_data/h2o.ccpvdz.cisd.nzval.bin and b/tests/ref_data/h2o.ccpvdz.cisd.nzval.bin differ diff --git a/tests/ref_data/h2o.ccpvdz.cisd.rowptr.bin b/tests/ref_data/h2o.ccpvdz.cisd.rowptr.bin index 1142270e..13d1207d 100644 Binary files a/tests/ref_data/h2o.ccpvdz.cisd.rowptr.bin and b/tests/ref_data/h2o.ccpvdz.cisd.rowptr.bin differ diff --git a/tests/sd_operations.cxx b/tests/sd_operations.cxx index dd4612a6..ae540c25 100644 --- a/tests/sd_operations.cxx +++ b/tests/sd_operations.cxx @@ -16,25 +16,31 @@ TEST_CASE("Slater Det Operations") { SECTION("HF Determinant") { SECTION("Canonical Ordering") { - auto hf = macis::canonical_hf_determinant<32>(4, 2); + using wfn_type = macis::wfn_t<32>; + using wfn_traits = macis::wavefunction_traits; + auto hf = wfn_traits::canonical_hf_determinant(4, 2); REQUIRE(hf == 0x0003000F); } +#if 0 SECTION("Unordered") { std::vector orb_energies = {3, 2, 7, 8, 5}; auto hf = macis::canonical_hf_determinant<32>(3, 2, orb_energies); REQUIRE(hf == 0x00030013); } +#endif } SECTION("Occupied / Unoccupied Conversion") { SECTION("Default") { - std::bitset<64> state = 0x00000000000000AF; + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + wfn_type state = 0x00000000000000AF; std::vector ref_occ = {0, 1, 2, 3, 5, 7}; std::vector ref_vir = {4, 6, 8, 9, 10, 11}; std::vector occ, vir; - macis::bitset_to_occ_vir(12, state, occ, vir); + wfn_traits::state_to_occ_vir(12, state, occ, vir); REQUIRE(occ == ref_occ); REQUIRE(vir == ref_vir); @@ -45,14 +51,16 @@ TEST_CASE("Slater Det Operations") { SECTION("Singles") { SECTION("Single Spin") { - std::vector> ref_singles = { + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + std::vector ref_singles = { 0b00011110, 0b00101110, 0b01001110, 0b10001110, 0b00011101, 0b00101101, 0b01001101, 0b10001101, 0b00011011, 0b00101011, 0b01001011, 0b10001011, 0b00010111, 0b00100111, 0b01000111, 0b10000111}; - std::vector> singles; - auto state = macis::canonical_hf_determinant<64>(4, 0); // all alpha + std::vector singles; + auto state = wfn_traits::canonical_hf_determinant(4, 0); // all alpha macis::generate_singles(8, state, singles); auto cmp = macis::bitset_less_comparator<64>{}; @@ -63,7 +71,9 @@ TEST_CASE("Slater Det Operations") { } SECTION("Both Spin") { - std::vector> ref_singles = { + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + std::vector ref_singles = { 0x0F0000001E, 0x0F0000001D, 0x0F0000001B, 0x0F00000017, 0x0F0000002E, 0x0F0000002D, 0x0F0000002B, 0x0F00000027, 0x0F0000004E, 0x0F0000004D, 0x0F0000004B, 0x0F00000047, 0x0F0000008E, 0x0F0000008D, 0x0F0000008B, @@ -72,8 +82,8 @@ TEST_CASE("Slater Det Operations") { 0x4D0000000F, 0x4B0000000F, 0x470000000F, 0x8E0000000F, 0x8D0000000F, 0x8B0000000f, 0x870000000f}; - auto state = macis::canonical_hf_determinant<64>(4, 4); - std::vector> singles; + auto state = wfn_traits::canonical_hf_determinant(4, 4); + std::vector singles; macis::generate_singles_spin(8, state, singles); auto cmp = macis::bitset_less_comparator<64>{}; @@ -86,7 +96,9 @@ TEST_CASE("Slater Det Operations") { SECTION("Doubles") { SECTION("Single Spin") { - std::vector> ref_doubles = { + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + std::vector ref_doubles = { 0b00111100, 0b01011100, 0b10011100, 0b01101100, 0b10101100, 0b11001100, 0b00111010, 0b01011010, 0b10011010, 0b01101010, 0b10101010, 0b11001010, 0b00111001, 0b01011001, 0b10011001, @@ -96,8 +108,8 @@ TEST_CASE("Slater Det Operations") { 0b00110011, 0b01010011, 0b10010011, 0b01100011, 0b10100011, 0b11000011}; - std::vector> doubles; - auto state = macis::canonical_hf_determinant<64>(4, 0); // all alpha + std::vector doubles; + auto state = wfn_traits::canonical_hf_determinant(4, 0); // all alpha macis::generate_doubles(8, state, doubles); auto cmp = macis::bitset_less_comparator<64>{}; @@ -108,7 +120,9 @@ TEST_CASE("Slater Det Operations") { } SECTION("Both Spins") { - std::vector> ref_doubles = { + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + std::vector ref_doubles = { 0x0F0000003C, 0x0F0000003A, 0x0F00000036, 0x0F0000005C, 0x0F0000005A, 0x0F00000056, 0x0F0000009C, 0x0F0000009A, 0x0F00000096, 0x0F00000039, 0x0F00000035, 0x0F00000059, 0x0F00000055, 0x0F00000099, 0x0F00000095, @@ -175,8 +189,8 @@ TEST_CASE("Slater Det Operations") { 0x1700000087, 0x2E00000087, 0x2D00000087, 0x2B00000087, 0x2700000087, 0x4E00000087, 0x4D00000087, 0x4B00000087, 0x4700000087, 0x8E00000087, 0x8D00000087, 0x8B00000087, 0x8700000087}; - auto state = macis::canonical_hf_determinant<64>(4, 4); - std::vector> singles, doubles; + auto state = wfn_traits::canonical_hf_determinant(4, 4); + std::vector singles, doubles; macis::generate_singles_doubles_spin(8, state, singles, doubles); auto cmp = macis::bitset_less_comparator<64>{}; @@ -188,8 +202,10 @@ TEST_CASE("Slater Det Operations") { } SECTION("CIS") { - auto state = macis::canonical_hf_determinant<64>(4, 4); - std::vector> singles; + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + auto state = wfn_traits::canonical_hf_determinant(4, 4); + std::vector singles; macis::generate_singles_spin(8, state, singles); singles.push_back(state); auto cis = macis::generate_cis_hilbert_space(8, state); @@ -201,8 +217,10 @@ TEST_CASE("Slater Det Operations") { } SECTION("CISD") { - auto state = macis::canonical_hf_determinant<64>(4, 4); - std::vector> singles, doubles; + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + auto state = wfn_traits::canonical_hf_determinant(4, 4); + std::vector singles, doubles; macis::generate_singles_doubles_spin(8, state, singles, doubles); singles.insert(singles.end(), doubles.begin(), doubles.end()); singles.push_back(state); @@ -216,13 +234,13 @@ TEST_CASE("Slater Det Operations") { SECTION("FCI") { SECTION("Single Spin") { - std::vector> ref_combs = {0x3, 0x5, 0x9, 0x6, 0xA, 0xC}; - auto combs = macis::generate_combs<64>(4, 2); + std::vector> ref_combs = {0x3, 0x5, 0x9, 0x6, 0xA, 0xC}; + auto combs = macis::generate_combs>(4, 2); REQUIRE(combs == ref_combs); } SECTION("Both Spins") { - std::vector> ref_dets = { + std::vector> ref_dets = { 0x300000003, 0x500000003, 0x900000003, 0x600000003, 0xA00000003, 0xC00000003, 0x300000005, 0x500000005, 0x900000005, 0x600000005, 0xA00000005, 0xC00000005, 0x300000009, 0x500000009, 0x900000009, @@ -232,13 +250,16 @@ TEST_CASE("Slater Det Operations") { 0x30000000C, 0x50000000C, 0x90000000C, 0x60000000C, 0xA0000000C, 0xC0000000C}; - auto dets = macis::generate_hilbert_space<64>(4, 2, 2); + auto dets = macis::generate_hilbert_space>(4, 2, 2); REQUIRE(dets == ref_dets); } } SECTION("String Conversions") { - auto state = macis::canonical_hf_determinant<64>(2, 2).flip(3).flip(4 + 32); + using wfn_type = macis::wfn_t<64>; + using wfn_traits = macis::wavefunction_traits; + auto state = + wfn_traits::canonical_hf_determinant(2, 2).flip(3).flip(4 + 32); SECTION("To String") { auto str = macis::to_canonical_string(state); @@ -248,13 +269,13 @@ TEST_CASE("Slater Det Operations") { SECTION("From String") { SECTION("Full String") { std::string str = "220ud000000000000000000000000000"; - auto det = macis::from_canonical_string<64>(str); + auto det = macis::from_canonical_string(str); REQUIRE(det == state); } SECTION("Short String") { std::string str = "220ud"; - auto det = macis::from_canonical_string<64>(str); + auto det = macis::from_canonical_string(str); REQUIRE(det == state); } } diff --git a/tests/standalone_driver.cxx b/tests/standalone_driver.cxx index 44794230..2f44e0e0 100644 --- a/tests/standalone_driver.cxx +++ b/tests/standalone_driver.cxx @@ -15,16 +15,19 @@ #include #include #include +#include #include #include -#include +#include +#include +#include #include #include -#include #include #include #include #include +#include #include #include #include @@ -61,9 +64,14 @@ int main(int argc, char** argv) { spdlog::cfg::load_env_levels(); spdlog::set_pattern("[%n] %v"); - constexpr size_t nwfn_bits = 64; + constexpr size_t nwfn_bits = 256; + using wfn_type = macis::wfn_t; + using wfn_traits = macis::wavefunction_traits; - MACIS_MPI_CODE(MPI_Init(&argc, &argv);) + MACIS_MPI_CODE(int dummy; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &dummy); + if(dummy != MPI_THREAD_MULTIPLE) throw std::runtime_error( + "MPI Thread Init Failed");) #ifdef MACIS_ENABLE_MPI auto world_rank = macis::comm_rank(MPI_COMM_WORLD); @@ -84,30 +92,51 @@ int main(int argc, char** argv) { auto input_file = opts.at(1); INIFile input(input_file); - // Required Keywords - auto fcidump_fname = input.getData("CI.FCIDUMP"); - auto nalpha = input.getData("CI.NALPHA"); - auto nbeta = input.getData("CI.NBETA"); - - if(nalpha != nbeta) throw std::runtime_error("NALPHA != NBETA"); - - // Read FCIDUMP File - size_t norb = macis::read_fcidump_norb(fcidump_fname); - size_t norb2 = norb * norb; - size_t norb3 = norb2 * norb; - size_t norb4 = norb2 * norb2; - - // XXX: Consider reading this into shared memory to avoid replication - std::vector T(norb2), V(norb4); - auto E_core = macis::read_fcidump_core(fcidump_fname); - macis::read_fcidump_1body(fcidump_fname, T.data(), norb); - macis::read_fcidump_2body(fcidump_fname, V.data(), norb); - #define OPT_KEYWORD(STR, RES, DTYPE) \ if(input.containsData(STR)) { \ RES = input.getData(STR); \ } + // Required Keywords + auto nalpha = input.getData("CI.NALPHA"); + auto nbeta = input.getData("CI.NBETA"); + + std::string reference_data_format = + input.getData("CI.REF_DATA_FORMAT"); + std::string reference_data_file = + input.getData("CI.REF_DATA_FILE"); + + size_t norb, norb2, norb3, norb4; + std::vector T, V; + double E_core; + if(reference_data_format == "FCIDUMP") { + // Read FCIDUMP File + norb = macis::read_fcidump_norb(reference_data_file); + norb2 = norb * norb; + norb3 = norb2 * norb; + norb4 = norb2 * norb2; + + // XXX: Consider reading this into shared memory to avoid replication + T.resize(norb2); + V.resize(norb4); + E_core = macis::read_fcidump_core(reference_data_file); + macis::read_fcidump_1body(reference_data_file, T.data(), norb); + macis::read_fcidump_2body(reference_data_file, V.data(), norb); + } else { // TREXIO + macis::TREXIOFile trexio_file(reference_data_file, 'r', TREXIO_AUTO); + norb = trexio_file.read_mo_num(); + norb2 = norb * norb; + norb3 = norb2 * norb; + norb4 = norb2 * norb2; + + // XXX: Consider reading this into shared memory to avoid replication + T.resize(norb2); + V.resize(norb4); + E_core = trexio_file.read_nucleus_repulsion(); + trexio_file.read_mo_1e_int_core_hamiltonian(T.data()); + trexio_file.read_mo_2e_int_eri(V.data()); + } + // Set up job std::string job_str = "MCSCF"; OPT_KEYWORD("CI.JOB", job_str, std::string); @@ -167,9 +196,9 @@ int main(int argc, char** argv) { // ASCI Settings macis::ASCISettings asci_settings; - std::string asci_wfn_fname, asci_wfn_out_fname; + std::string asci_wfn_fname, asci_wfn_out_fname, asci_ham_out_fname; double asci_E0 = 0.0; - bool compute_asci_E0 = true; + bool compute_asci_E0 = true, pt2 = true; OPT_KEYWORD("ASCI.NTDETS_MAX", asci_settings.ntdets_max, size_t); OPT_KEYWORD("ASCI.NTDETS_MIN", asci_settings.ntdets_min, size_t); OPT_KEYWORD("ASCI.NCDETS_MAX", asci_settings.ncdets_max, size_t); @@ -190,17 +219,49 @@ int main(int argc, char** argv) { asci_E0 = input.getData("ASCI.E0_WFN"); compute_asci_E0 = false; } + OPT_KEYWORD("ASCI.HAM_OUT_FILE", asci_ham_out_fname, std::string); + OPT_KEYWORD("ASCI.PT2", pt2, bool); + OPT_KEYWORD("ASCI.PT2_TOL", asci_settings.pt2_tol, double); + OPT_KEYWORD("ASCI.PT2_RESERVE_COUNT", asci_settings.pt2_reserve_count, + size_t); + OPT_KEYWORD("ASCI.PT2_CONSTRAINT_LVL_MAX", + asci_settings.pt2_max_constraint_level, int); + OPT_KEYWORD("ASCI.PT2_CONSTRAINT_LVL_MIN", + asci_settings.pt2_min_constraint_level, int); + OPT_KEYWORD("ASCI.PT2_CNSTRNT_RFNE_FORCE", + asci_settings.pt2_constraint_refine_force, int64_t); + OPT_KEYWORD("ASCI.PT2_PRUNE", asci_settings.pt2_prune, bool); + OPT_KEYWORD("ASCI.PT2_PRECOMPUTE_EPS", asci_settings.pt2_precompute_eps, + bool); + OPT_KEYWORD("ASCI.PT2_PRECOMPUTE_IDX", asci_settings.pt2_precompute_idx, + bool); + OPT_KEYWORD("ASCI.PT2_PRINT_PROGRESS", asci_settings.pt2_print_progress, + bool); + OPT_KEYWORD("ASCI.PT2_BIGCON_THRESH", asci_settings.pt2_bigcon_thresh, + size_t); + OPT_KEYWORD("ASCI.NXTVAL_BCOUNT_THRESH", asci_settings.nxtval_bcount_thresh, + size_t); + OPT_KEYWORD("ASCI.NXTVAL_BCOUNT_INC", asci_settings.nxtval_bcount_inc, + size_t); bool mp2_guess = false; OPT_KEYWORD("MCSCF.MP2_GUESS", mp2_guess, bool); if(!world_rank) { + console->info("[Standalone MACIS Driver]:"); + console->info(" * NMPI = {}", world_size); + console->info(" * NTHREADS = {}", omp_get_max_threads()); console->info("[Wavefunction Data]:"); - console->info(" * JOB = {}", job_str); - console->info(" * CIEXP = {}", ciexp_str); - console->info(" * FCIDUMP = {}", fcidump_fname); + console->info(" * JOB = {}", job_str); + console->info(" * CIEXP = {}", ciexp_str); + console->info(" * REF_FILE_NAME = {}", reference_data_file); + console->info(" * REF_FILE_FMT = {}", reference_data_format); if(fci_out_fname.size()) console->info(" * FCIDUMP_OUT = {}", fci_out_fname); + console->info(" * NORBITAL = {}", norb); + console->info(" * NINACTIVE = {}", n_inactive); + console->info(" * NACTIVE = {}", n_active); + console->info(" * NVIRTUAL = {}", n_virtual); console->info(" * MP2_GUESS = {}", mp2_guess); console->debug("READ {} 1-body integrals and {} 2-body integrals", @@ -234,6 +295,9 @@ int main(int argc, char** argv) { // MP2 Guess Orbitals if(mp2_guess) { + if(nalpha != nbeta) + throw std::runtime_error("MP2 Guess only implemented for closed-shell"); + console->info("Calculating MP2 Natural Orbitals"); size_t nocc_canon = n_inactive + nalpha; size_t nvir_canon = norb - nocc_canon; @@ -278,13 +342,12 @@ int main(int argc, char** argv) { std::vector active_trdm(active_ordm.size() * active_ordm.size()); double E0 = 0; - + double EPT2 = 0; // CI if(job == Job::CI) { - using generator_t = macis::DoubleLoopHamiltonianGenerator; + using generator_t = macis::SortedDoubleLoopHamiltonianGenerator; if(ci_exp == CIExpansion::CAS) { std::vector C_local; - // TODO: VERIFY MPI + CAS E0 = macis::CASRDMFunctor::rdms( mcscf_settings, NumOrbital(n_active), nalpha, nbeta, T_active.data(), V_active.data(), active_ordm.data(), @@ -295,10 +358,10 @@ int main(int argc, char** argv) { auto det_logger = world_rank ? spdlog::null_logger_mt("determinants") : spdlog::stdout_color_mt("determinants"); - det_logger->info("Print leading determinants > {:.12f}", + det_logger->info("Print leading determinants > {:.2e}", determinants_threshold); - auto dets = macis::generate_hilbert_space( - n_active, nalpha, nbeta); + auto dets = + macis::generate_hilbert_space(n_active, nalpha, nbeta); for(size_t i = 0; i < dets.size(); ++i) { if(std::abs(C_local[i]) > determinants_threshold) { det_logger->info("{:>16.12f} {}", C_local[i], @@ -308,13 +371,16 @@ int main(int argc, char** argv) { } } else { + // if(nalpha != nbeta) + // throw std::runtime_error("ASCI Only Implemented for Closed-Shell"); + // Generate the Hamiltonian Generator generator_t ham_gen( macis::matrix_span(T_active.data(), n_active, n_active), macis::rank4_span(V_active.data(), n_active, n_active, n_active, n_active)); - std::vector> dets; + std::vector dets; std::vector C; if(asci_wfn_fname.size()) { // Read wave function from standard file @@ -338,7 +404,7 @@ int main(int argc, char** argv) { } else { // HF Guess console->info("Generating HF Guess for ASCI"); - dets = {macis::canonical_hf_determinant(nalpha, nalpha)}; + dets = {wfn_traits::canonical_hf_determinant(nalpha, nbeta)}; // std::cout << dets[0].to_ullong() << std::endl; E0 = ham_gen.matrix_element(dets[0], dets[0]); C = {1.0}; @@ -350,13 +416,13 @@ int main(int argc, char** argv) { auto asci_st = hrt_t::now(); // Growth phase - std::tie(E0, dets, C) = macis::asci_grow( + std::tie(E0, dets, C) = macis::asci_grow( asci_settings, mcscf_settings, E0, std::move(dets), std::move(C), ham_gen, n_active MACIS_MPI_CODE(, MPI_COMM_WORLD)); // Refinement phase if(asci_settings.max_refine_iter) { - std::tie(E0, dets, C) = macis::asci_refine( + std::tie(E0, dets, C) = macis::asci_refine( asci_settings, mcscf_settings, E0, std::move(dets), std::move(C), ham_gen, n_active MACIS_MPI_CODE(, MPI_COMM_WORLD)); } @@ -367,17 +433,48 @@ int main(int argc, char** argv) { if(asci_wfn_out_fname.size() and !world_rank) { console->info("Writing ASCI Wavefunction to {}", asci_wfn_out_fname); + // if(reference_data_format == "TREXIO") { + // console->info(" * Format TREXIO"); + // macis::TREXIOFile trexio_file(asci_wfn_out_fname, 'w', + // TREXIO_HDF5); trexio_file.write_mo_num(nwfn_bits/2); // Trick + // TREXIO trexio_file.write_determinant_list(dets.size(), + // reinterpret_cast(dets.data())); + // } else { + console->info(" * Format TEXT"); macis::write_wavefunction(asci_wfn_out_fname, n_active, dets, C); + //} } // Dump Hamiltonian -#if 0 - if(0) { +#if 1 + std::cout << "ASCI HAM FNAME = " << asci_ham_out_fname << std::endl; + if(asci_ham_out_fname.size()) { + std::cout << "HERE" << std::endl; auto H = macis::make_dist_csr_hamiltonian( MPI_COMM_WORLD, dets.begin(), dets.end(), ham_gen, 1e-16); - sparsexx::write_dist_mm("ham.mtx", H, 1); + sparsexx::write_dist_mm(asci_ham_out_fname, H, 1); } #endif + if(pt2) { + MPI_Barrier(MPI_COMM_WORLD); + auto pt2_st = hrt_t::now(); + EPT2 = macis::asci_pt2_constraint( + asci_settings, dets.begin(), dets.end(), + E0 - (E_inactive + E_core), C, n_active, ham_gen.T(), + ham_gen.G_red(), ham_gen.V_red(), ham_gen.G(), ham_gen.V(), + ham_gen MACIS_MPI_CODE(, MPI_COMM_WORLD)); + MPI_Barrier(MPI_COMM_WORLD); + auto pt2_en = hrt_t::now(); + dur_t pt2_dur = pt2_en - pt2_st; + console->info("* ASCI_PT2_DUR = {:.2e} ms", pt2_dur.count()); + } + } + + console->info("E(CI) = {:.12f} Eh", E0); + + if(pt2) { + console->info("E(PT2) = {:.12f} Eh", EPT2); + console->info("E(CI+PT2) = {:.12f} Eh", E0 + EPT2); } // MCSCF @@ -410,9 +507,9 @@ int main(int argc, char** argv) { NumVirtual(n_virtual), E_core, T.data(), norb, V.data(), norb, active_ordm.data(), n_active, active_trdm.data(), n_active MACIS_MPI_CODE(, MPI_COMM_WORLD)); - } - console->info("E(CI) = {:.12f} Eh", E0); + console->info("E(CASSCF) = {:.12f} Eh", E0); + } // Write FCIDUMP file if requested if(fci_out_fname.size())