Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "6.13.2"
version = "6.13.3"

homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
Expand Down
3 changes: 2 additions & 1 deletion src/include/homestore/btree/btree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,10 @@ class Btree {
uint64_t get_btree_node_cnt() const;
uint64_t get_child_node_cnt(bnodeid_t bnodeid) const;
void to_string(bnodeid_t bnodeid, std::string& buf) const;
void to_custom_string_internal(bnodeid_t bnodeid, std::string& buf, to_string_cb_t< K, V > const& cb) const;
void to_custom_string_internal(bnodeid_t bnodeid, std::string& buf, to_string_cb_t< K, V > const& cb, int nindent=-1) const;
void to_dot_keys(bnodeid_t bnodeid, std::string& buf, std::map< uint32_t, std::vector< uint64_t > >& l_map,
std::map< uint64_t, BtreeVisualizeVariables >& info_map) const;
void sanity_sub_tree(bnodeid_t bnodeid=0) const;
void validate_sanity_child(const BtreeNodePtr& parent_node, uint32_t ind) const;
void validate_sanity_next_child(const BtreeNodePtr& parent_node, uint32_t ind) const;
void print_node(const bnodeid_t& bnodeid) const;
Expand Down
84 changes: 62 additions & 22 deletions src/include/homestore/btree/detail/btree_common.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -148,23 +148,27 @@ void Btree< K, V >::to_string(bnodeid_t bnodeid, std::string& buf) const {

template < typename K, typename V >
void Btree< K, V >::to_custom_string_internal(bnodeid_t bnodeid, std::string& buf,
to_string_cb_t< K, V > const& cb) const {
to_string_cb_t< K, V > const& cb, int nindent) const {
BtreeNodePtr node;

locktype_t acq_lock = locktype_t::READ;

if (read_and_lock_node(bnodeid, node, acq_lock, acq_lock, nullptr) != btree_status_t::success) { return; }
fmt::format_to(std::back_inserter(buf), "{}\n", node->to_custom_string(cb));
if(nindent <0){
nindent = node->level();
}
std::string tabs(3*(nindent- node->level()), ' ');
fmt::format_to(std::back_inserter(buf), "{}{}\n", tabs, node->to_custom_string(cb));

if (!node->is_leaf()) {
uint32_t i = 0;
while (i < node->total_entries()) {
BtreeLinkInfo p;
node->get_nth_value(i, &p, false);
to_custom_string_internal(p.bnode_id(), buf, cb);
to_custom_string_internal(p.bnode_id(), buf, cb, nindent);
++i;
}
if (node->has_valid_edge()) { to_custom_string_internal(node->edge_id(), buf, cb); }
if (node->has_valid_edge()) { to_custom_string_internal(node->edge_id(), buf, cb, nindent); }
}
unlock_node(node, acq_lock);
}
Expand Down Expand Up @@ -222,6 +226,35 @@ uint64_t Btree< K, V >::count_keys(bnodeid_t bnodeid) const {
return result;
}

template < typename K, typename V >
void Btree< K, V >::sanity_sub_tree(bnodeid_t bnodeid) const {
if (bnodeid==0) {
bnodeid= m_root_node_info.bnode_id();
}
BtreeNodePtr node;
if (
auto ret = read_node_impl(bnodeid, node); ret!=btree_status_t::success) {
LOGINFO("reading node failed for bnodeid: {} reason: {}", bnodeid, ret);
}else{
if(node->is_leaf()){
return;
}
uint32_t nentries = node->has_valid_edge() ? node->total_entries() + 1 : node->total_entries();
std::vector<bnodeid_t> child_id_list;
child_id_list.reserve(nentries);
BT_REL_ASSERT_NE(node->has_valid_edge() && node->next_bnode() != empty_bnodeid, true, "node {} has valid edge and next id is not empty", node->to_string());
for (uint32_t i = 0; i < nentries; ++i) {
validate_sanity_child(node, i);
BtreeLinkInfo child_info;
node->get_nth_value(i, &child_info, false /* copy */);
child_id_list.push_back(child_info.bnode_id());
}
for (auto child_id: child_id_list){
sanity_sub_tree(child_id);
}
}
}

template < typename K, typename V >
void Btree< K, V >::validate_sanity_child(const BtreeNodePtr& parent_node, uint32_t ind) const {
BtreeLinkInfo child_info;
Expand All @@ -240,26 +273,33 @@ void Btree< K, V >::validate_sanity_child(const BtreeNodePtr& parent_node, uint3
}
return;
}
child_node->get_first_key(&child_first_key);
child_node->get_last_key(&child_last_key);
BT_REL_ASSERT_LE(child_first_key.compare(&child_last_key), 0);
if (ind == parent_node->total_entries()) {
BT_REL_ASSERT_NE(child_node->is_node_deleted(), true, "child node {} is deleted", child_node->to_string());
if(ind >= parent_node->total_entries()){
BT_REL_ASSERT_EQ(parent_node->has_valid_edge(), true);
if (ind > 0) {
parent_node->get_nth_key< K >(ind - 1, &parent_key, false);
BT_REL_ASSERT_GT(child_first_key.compare(&parent_key), 0);
BT_REL_ASSERT_LT(parent_key.compare_start(&child_first_key), 0);
if( ind >0){
parent_key = parent_node->get_nth_key< K >(ind -1, false);
}
} else {
parent_node->get_nth_key< K >(ind, &parent_key, false);
BT_REL_ASSERT_LE(child_first_key.compare(&parent_key), 0)
BT_REL_ASSERT_LE(child_last_key.compare(&parent_key), 0)
BT_REL_ASSERT_GE(parent_key.compare_start(&child_first_key), 0)
BT_REL_ASSERT_GE(parent_key.compare_start(&child_first_key), 0)
if (ind != 0) {
parent_node->get_nth_key< K >(ind - 1, &parent_key, false);
BT_REL_ASSERT_GT(child_first_key.compare(&parent_key), 0)
BT_REL_ASSERT_LT(parent_key.compare_start(&child_first_key), 0)
}else
{
parent_key = parent_node->get_nth_key< K >(ind, false);
}
K previous_parent_key;
if( ind >0 && parent_node->total_entries()>0){
previous_parent_key = parent_node->get_nth_key< K >(ind - 1, false);
}
for (uint32_t i = 0; i <child_node->total_entries() ; ++i) {
K cur_child_key = child_node->get_nth_key< K >(i, false);
if(ind < parent_node->total_entries()){
BT_REL_ASSERT_LE(cur_child_key.compare(parent_key), 0, " child {} {}-th key is greater than its parent's {} {}-th key", child_node->to_string(), i , parent_node->to_string(), ind);
if(ind>0) {
BT_REL_ASSERT_GT(cur_child_key.compare(previous_parent_key), 0,
" child {} {}-th key is less than its parent's {} {}-th key", child_node->to_string(),
i, parent_node->to_string(), ind - 1);
}

}else
{
BT_REL_ASSERT_GT(cur_child_key.compare(parent_key), 0, " child {} {}-th key is greater than its parent {} {}-th key", child_node->to_string(), i , parent_node->to_string(), ind);
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/include/homestore/btree/detail/btree_internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,19 +250,19 @@ struct BtreeConfig {
uint64_t m_min_keys_in_node{0};
#endif
bool m_rebalance_turned_on{false};
bool m_merge_turned_on{true};

btree_node_type m_leaf_node_type{btree_node_type::VAR_OBJECT};
btree_node_type m_int_node_type{btree_node_type::VAR_KEY};
std::string m_btree_name; // Unique name for the btree

bool m_merge_turned_on{true};
uint8_t m_max_merge_level{1};
private:
uint32_t m_suggested_min_size; // Precomputed values
uint32_t m_ideal_fill_size;

public:
BtreeConfig(uint32_t node_size, const std::string& btree_name = "") :
m_node_size{node_size}, m_btree_name{btree_name.empty() ? std::string("btree") : btree_name} {
m_node_size{node_size}, m_btree_name{btree_name.empty() ? std::string("btree") : btree_name}{
set_node_data_size(node_size - 512); // Just put estimate at this point of time.
}

Expand Down
25 changes: 9 additions & 16 deletions src/include/homestore/btree/detail/btree_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ struct persistent_hdr_t {
auto sedge = (edge_info.m_bnodeid == empty_bnodeid)
? ""
: fmt::format(" edge={}.{}", edge_info.m_bnodeid, edge_info.m_link_version);
return fmt::format("id={}{}{} {} level={} nentries={}{} mod_cp={}", node_id, snext, sedge,
leaf ? "LEAF" : "INTERIOR", level, nentries, (node_deleted == 0x1) ? " Deleted" : "",
modified_cp_id);
return fmt::format("id={}{}{} {} level={} nentries={} mod_cp={}{}", node_id, snext, sedge,
leaf ? "LEAF" : "INTERIOR", level, nentries, modified_cp_id,
node_deleted == 0x1 ? " Deleted" : " LIVE");
}
};
#pragma pack()
Expand Down Expand Up @@ -119,7 +119,6 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > {
m_trans_hdr.max_keys_in_node = cfg.m_max_keys_in_node;
m_trans_hdr.min_keys_in_node = cfg.m_min_keys_in_node;
#endif

}
virtual ~BtreeNode() = default;

Expand Down Expand Up @@ -368,9 +367,10 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > {
std::string to_custom_string(to_string_cb_t< K, V > const& cb) const {
std::string snext =
(this->next_bnode() == empty_bnodeid) ? "" : fmt::format(" next_node={}", this->next_bnode());
auto str = fmt::format("id={}.{} level={} nEntries={} {}{} node_gen={} ", this->node_id(), this->link_version(),
this->level(), this->total_entries(), (this->is_leaf() ? "LEAF" : "INTERIOR"), snext,
this->node_gen());
auto str =
fmt::format("id={}.{} level={} nEntries={} {}{} node_gen={} {} ", this->node_id(), this->link_version(),
this->level(), this->total_entries(), (this->is_leaf() ? "LEAF" : "INTERIOR"), snext,
this->node_gen(), this->is_node_deleted() ? " **DELETED**" : "");
if (this->has_valid_edge()) {
fmt::format_to(std::back_inserter(str), " edge={}.{}", this->edge_info().m_bnodeid,
this->edge_info().m_link_version);
Expand All @@ -396,12 +396,6 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > {
}
fmt::format_to(std::back_inserter(str), "]");
}

// Should not happen
if (this->is_node_deleted()) {
fmt::format_to(std::back_inserter(str), " **DELETED** ");
}

return str;
}

Expand Down Expand Up @@ -537,10 +531,9 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > {

virtual uint32_t occupied_size() const { return (node_data_size() - available_size()); }
bool is_merge_needed(const BtreeConfig& cfg) const {
if (level() > cfg.m_max_merge_level) { return false; }
#ifdef _PRERELEASE
if (min_keys_in_node()) {
return total_entries() < min_keys_in_node();
}
if (min_keys_in_node()) { return total_entries() < min_keys_in_node(); }
#endif
return (occupied_size() < cfg.suggested_min_size());
}
Expand Down
10 changes: 5 additions & 5 deletions src/include/homestore/btree/detail/btree_node_mgr.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -334,11 +334,11 @@ void Btree< K, V >::_start_of_lock(const BtreeNodePtr& node, locktype_t ltype, c
info.node = node.get();
if (ltype == locktype_t::WRITE) {
bt_thread_vars()->wr_locked_nodes.push_back(info);
LOGTRACEMOD(btree, "ADDING node {} to write locked nodes list, its size={}", (void*)info.node,
LOGTRACEMOD(btree, "ADDING node {} to write locked nodes list, its size={}", info.node->node_id(),
bt_thread_vars()->wr_locked_nodes.size());
} else if (ltype == locktype_t::READ) {
bt_thread_vars()->rd_locked_nodes.push_back(info);
LOGTRACEMOD(btree, "ADDING node {} to read locked nodes list, its size={}", (void*)info.node,
LOGTRACEMOD(btree, "ADDING node {} to read locked nodes list, its size={}", info.node->node_id(),
bt_thread_vars()->rd_locked_nodes.size());
} else {
DEBUG_ASSERT(false, "Invalid locktype_t {}", ltype);
Expand All @@ -355,7 +355,7 @@ bool Btree< K, V >::remove_locked_node(const BtreeNodePtr& node, locktype_t ltyp
if (info.node == node.get()) {
*out_info = info;
pnode_infos->pop_back();
LOGTRACEMOD(btree, "REMOVING node {} from {} locked nodes list, its size = {}", (void*)info.node,
LOGTRACEMOD(btree, "REMOVING node {} from {} locked nodes list, its size = {}",info.node->node_id(),
(ltype == locktype_t::WRITE) ? "write" : "read", pnode_infos->size());
return true;
} else if (pnode_infos->size() > 1) {
Expand All @@ -364,7 +364,7 @@ bool Btree< K, V >::remove_locked_node(const BtreeNodePtr& node, locktype_t ltyp
*out_info = info;
pnode_infos->at(pnode_infos->size() - 2) = pnode_infos->back();
pnode_infos->pop_back();
LOGTRACEMOD(btree, "REMOVING node {} from {} locked nodes list, its size = {}", (void*)info.node,
LOGTRACEMOD(btree, "REMOVING node {} from {} locked nodes list, its size = {}", info.node->node_id(),
(ltype == locktype_t::WRITE) ? "write" : "read", pnode_infos->size());
return true;
}
Expand All @@ -390,7 +390,7 @@ template < typename K, typename V >
uint64_t Btree< K, V >::end_of_lock(const BtreeNodePtr& node, locktype_t ltype) {
btree_locked_node_info info;
if (!remove_locked_node(node, ltype, &info)) {
DEBUG_ASSERT(false, "Expected node = {} is not there in locked_node_list", (void*)node.get());
DEBUG_ASSERT(false, "Expected node = {} is not there in locked_node_list", node->node_id());
return 0;
}
// DEBUG_ASSERT_EQ(node.get(), info.node);
Expand Down
38 changes: 36 additions & 2 deletions src/include/homestore/btree/detail/btree_remove_impl.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,11 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const
_src_cursor_info src_cursor;

total_size = leftmost_node->occupied_size();
uint32_t expected_entities = leftmost_node->total_entries();
#ifdef _PRERELEASE
const uint64_t max_keys = leftmost_node->max_keys_in_node();
#endif

for (auto indx = start_idx + 1; indx <= end_idx; ++indx) {
if (indx == parent_node->total_entries()) {
BT_NODE_LOG_ASSERT(parent_node->has_valid_edge(), parent_node,
Expand All @@ -271,6 +276,10 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const
// Only option is to rebalance the nodes across. If we are asked not to do so, skip it.
if (!m_bt_cfg.m_rebalance_turned_on) {
ret = btree_status_t::merge_not_required;
BT_NODE_LOG(
DEBUG, parent_node,
"MERGE disqualified for parent node {} leftmost_node {}! num_nodes {} is more than old_nodes.size() {}",
parent_node->to_string(), leftmost_node->to_string(), num_nodes, old_nodes.size());
goto out;
}
}
Expand All @@ -279,6 +288,10 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const
if (leftmost_node->occupied_size() > balanced_size) {
// If for some reason balancing increases the current size, give up.
// TODO: Is this a real case, isn't happening would mean some sort of bug in calculation of is_merge_needed?
BT_NODE_LOG(
DEBUG, parent_node,
"MERGE disqualified for parent node {} leftmost_node {}! current size {} is more than balanced size {}",
parent_node->to_string(), leftmost_node->to_string(), leftmost_node->occupied_size(), balanced_size);
BT_NODE_DBG_ASSERT(false, leftmost_node,
"Didn't expect current size is more than balanced size without rebalancing");
ret = btree_status_t::merge_not_required;
Expand All @@ -294,7 +307,19 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const
leftmost_src.ith_nodes.push_back(i);
// TODO: check whether value size of the node is greater than available_size? If so nentries is 0. Suppose if a
// node contains one entry and the value size is much bigger than available size
auto const nentries = old_nodes[i]->num_entries_by_size(0, available_size);
auto nentries = old_nodes[i]->num_entries_by_size(0, available_size);

#ifdef _PRERELEASE
if (max_keys) {
if (expected_entities + nentries > max_keys) {
nentries = max_keys - expected_entities;
expected_entities = max_keys;
} else {
expected_entities += nentries;
}
}
#endif

if ((old_nodes[i]->total_entries() - nentries) == 0) { // Entire node goes in
available_size -= old_nodes[i]->occupied_size();
if (i >= old_nodes.size() - 1) {
Expand Down Expand Up @@ -353,13 +378,22 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const
// better merge next time.
if (new_nodes.size() > old_nodes.size()) {
ret = btree_status_t::merge_not_required;
BT_NODE_LOG(
DEBUG, parent_node,
"MERGE disqualified for parent node {} leftmost_node {}! new nodes size {} is more than old nodes size {}",
parent_node->to_string(), leftmost_node->to_string(), new_nodes.size(), old_nodes.size());
goto out;
}

// There is a case where we are rebalancing and the second node which rebalanced didn't move any size, in that case
// the first node is going to be exactly same and we will do again merge, so bail out here.
if ((new_nodes.size() == old_nodes.size()) && (old_nodes[0]->occupied_size() >= new_nodes[0]->occupied_size())) {
if ((new_nodes.size() == old_nodes.size()) && (old_nodes[0]->occupied_size() == new_nodes[0]->occupied_size())) {
ret = btree_status_t::merge_not_required;
BT_NODE_LOG(DEBUG, parent_node,
"MERGE disqualified for parent node {} leftmost_node {}! old nodes occupied size {} is more than "
"as new nodes occupied size {}",
parent_node->to_string(), leftmost_node->to_string(), old_nodes[0]->occupied_size(),
new_nodes[0]->occupied_size());
goto out;
}

Expand Down
16 changes: 12 additions & 4 deletions src/include/homestore/btree/detail/simple_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,14 @@ class SimpleNode : public VariantNode< K, V > {

nentries = std::min(nentries, other.total_entries() - start_idx);
nentries = std::min(nentries, this->get_available_entries());
#ifdef _PRERELEASE
const uint64_t max_keys = this->max_keys_in_node();
if(max_keys){
if(this->total_entries() + nentries > max_keys) {
nentries = max_keys - this->total_entries();
}
}
#endif
uint32_t sz = nentries * get_nth_obj_size(0);
if (sz != 0) { std::memcpy(get_nth_obj(this->total_entries()), other.get_nth_obj_const(start_idx), sz); }
this->add_entries(nentries);
Expand Down Expand Up @@ -213,10 +221,10 @@ class SimpleNode : public VariantNode< K, V > {

std::string to_string(bool print_friendly = false) const override {
auto snext = this->next_bnode() == empty_bnodeid ? "" : fmt::format("next_node={}", this->next_bnode());
auto str = fmt::format("{}id={} level={} nEntries={} {} {} ",
auto str = fmt::format("{}id={} level={} nEntries={} {} {} {}",
(print_friendly ? "------------------------------------------------------------\n" : ""),
this->node_id(), this->level(), this->total_entries(),
(this->is_leaf() ? "LEAF" : "INTERIOR"), snext);
(this->is_leaf() ? "LEAF" : "INTERIOR"), snext, this->is_node_deleted()? " Deleted" : " LIVE");
if (this->has_valid_edge()) {
fmt::format_to(std::back_inserter(str), " edge={}.{}", this->edge_info().m_bnodeid,
this->edge_info().m_link_version);
Expand Down Expand Up @@ -379,9 +387,9 @@ class SimpleNode : public VariantNode< K, V > {
return (this->node_data_area_const() + (get_nth_obj_size(ind) * ind));
}

void set_nth_key(uint32_t ind, BtreeKey* key) {
void set_nth_key(uint32_t ind, const BtreeKey& key) {
uint8_t* entry = this->node_data_area() + (get_nth_obj_size(ind) * ind);
sisl::blob const b = key->serialize();
sisl::blob const b = key.serialize();
memcpy(entry, b.cbytes(), b.size());
}

Expand Down
Loading