[Groonga-commit] groonga/grnxx at 86d4d44 [master] Implement grnxx::map::DoubleArray.

Back to archive index

susumu.yata null+****@clear*****
Tue Jun 11 13:09:03 JST 2013


susumu.yata	2013-06-11 13:09:03 +0900 (Tue, 11 Jun 2013)

  New Revision: 86d4d446d8fc4cb7bd0bdb766f36f465191ab721
  https://github.com/groonga/grnxx/commit/86d4d446d8fc4cb7bd0bdb766f36f465191ab721

  Message:
    Implement grnxx::map::DoubleArray.

  Modified files:
    lib/grnxx/map/double_array.cpp
    lib/grnxx/map/double_array.hpp
    lib/grnxx/map/double_array/block.hpp
    lib/grnxx/map/double_array/entry.hpp
    lib/grnxx/map/double_array/header.cpp
    lib/grnxx/map/double_array/node.hpp

  Modified: lib/grnxx/map/double_array.cpp (+944 -7)
===================================================================
--- lib/grnxx/map/double_array.cpp    2013-06-07 11:16:47 +0900 (c6465c2)
+++ lib/grnxx/map/double_array.cpp    2013-06-11 13:09:03 +0900 (360d933)
@@ -21,14 +21,32 @@
 
 #include "grnxx/bytes.hpp"
 #include "grnxx/geo_point.hpp"
+#include "grnxx/intrinsic.hpp"
 #include "grnxx/logger.hpp"
+#include "grnxx/map/bytes_store.hpp"
+#include "grnxx/map/double_array/block.hpp"
+#include "grnxx/map/double_array/entry.hpp"
 #include "grnxx/map/double_array/header.hpp"
+#include "grnxx/map/double_array/node.hpp"
 #include "grnxx/map/helper.hpp"
 #include "grnxx/storage.hpp"
 
 namespace grnxx {
 namespace map {
 
+using double_array::BLOCK_MAX_FAILURE_COUNT;
+using double_array::BLOCK_MAX_LEVEL;
+using double_array::BLOCK_INVALID_ID;
+using double_array::BLOCK_SIZE;
+using double_array::BLOCK_MAX_COUNT;
+
+using double_array::NODE_TERMINAL_LABEL;
+using double_array::NODE_MAX_LABEL;
+using double_array::NODE_INVALID_LABEL;
+using double_array::NODE_INVALID_OFFSET;
+
+constexpr uint64_t ROOT_NODE_ID = 0;
+
 template <typename T>
 Map<T> *DoubleArray<T>::create(Storage *, uint32_t, const MapOptions &) {
   GRNXX_ERROR() << "invalid combination";
@@ -55,7 +73,12 @@ template class DoubleArray<GeoPoint>;
 DoubleArray<Bytes>::DoubleArray()
     : storage_(nullptr),
       storage_node_id_(STORAGE_INVALID_NODE_ID),
-      header_(nullptr) {}
+      header_(nullptr),
+      nodes_(),
+      siblings_(),
+      blocks_(),
+      entries_(),
+      store_() {}
 
 DoubleArray<Bytes>::~DoubleArray() {}
 
@@ -102,6 +125,339 @@ uint64_t DoubleArray<Bytes>::num_keys() const {
   return header_->num_keys;
 }
 
+bool DoubleArray<Bytes>::get(int64_t key_id, Key *key) {
+  if ((key_id < MAP_MIN_KEY_ID) || (key_id > header_->max_key_id)) {
+    // Out of range.
+    return false;
+  }
+  return get_key(key_id, key) == DOUBLE_ARRAY_FOUND;
+}
+
+bool DoubleArray<Bytes>::unset(int64_t key_id) {
+  Key key;
+  if (!get(key_id, &key)) {
+    // Not found or error.
+    return false;
+  }
+  return remove(key);
+}
+
+bool DoubleArray<Bytes>::reset(int64_t key_id, KeyArg dest_key) {
+  Key src_key;
+  if (!get(key_id, &src_key)) {
+    // Not found or error.
+    return false;
+  }
+  return replace(src_key, dest_key);
+}
+
+bool DoubleArray<Bytes>::find(KeyArg key, int64_t *key_id) {
+  uint64_t node_id = ROOT_NODE_ID;
+  Node node;
+  if (!nodes_->get(node_id, &node)) {
+    // Error.
+    return false;
+  }
+  uint64_t key_pos;
+  for (key_pos = 0; key_pos < key.size(); ++key_pos) {
+    if (node.is_leaf()) {
+      // Found.
+      break;
+    }
+    node_id = node.offset() ^ key[key_pos];
+    if (!nodes_->get(node_id, &node)) {
+      // Error.
+      return false;
+    }
+    if (node.label() != key[key_pos]) {
+      // Not found.
+      return false;
+    }
+  }
+  if (!node.is_leaf()) {
+    if (node.child() != NODE_TERMINAL_LABEL) {
+      // Not found.
+      return false;
+    }
+    node_id = node.offset() ^ NODE_TERMINAL_LABEL;
+    if (!nodes_->get(node_id, &node)) {
+      // Error.
+      return false;
+    }
+    if (!node.is_leaf()) {
+      // Not found.
+      return false;
+    }
+  }
+  Key stored_key;
+  if (get_key(node.key_id(), &stored_key) != DOUBLE_ARRAY_FOUND) {
+    // Not found or error.
+    return false;
+  }
+  if (key.except_prefix(key_pos) != stored_key.except_prefix(key_pos)) {
+    // Not found.
+    return false;
+  }
+  if (key_id) {
+    *key_id = node.key_id();
+  }
+  return true;
+}
+
+bool DoubleArray<Bytes>::add(KeyArg key, int64_t *key_id) {
+  Node *node;
+  uint64_t key_pos;
+  if (find_leaf(key, &node, &key_pos) == DOUBLE_ARRAY_FAILED) {
+    // Error.
+    return false;
+  }
+  switch (insert_leaf(key, node, key_pos, &node)) {
+    case DOUBLE_ARRAY_FOUND: {
+      if (key_id) {
+        *key_id = node->key_id();
+      }
+      return false;
+    }
+    case DOUBLE_ARRAY_INSERTED: {
+      break;
+    }
+    default: {
+      // Error.
+      return false;
+    }
+  }
+  const int64_t next_key_id = header_->next_key_id;
+  if (next_key_id > MAP_MAX_KEY_ID) {
+    // Error.
+    GRNXX_ERROR() << "too many keys: next_key_id = " << next_key_id
+                   << ", max_key_id = " << MAP_MAX_KEY_ID;
+    return false;
+  }
+  Entry * const entry = entries_->get_pointer(next_key_id);
+  if (!entry) {
+    // Error.
+    return false;
+  }
+  uint64_t bytes_id;
+  if (!store_->add(key, &bytes_id)) {
+    // Error.
+    return false;
+  }
+  ++header_->num_keys;
+  if (next_key_id > header_->max_key_id) {
+    header_->max_key_id = next_key_id;
+    header_->next_key_id = next_key_id + 1;
+  } else {
+    header_->next_key_id = entry->next();
+  }
+  *entry = Entry::valid_entry(bytes_id);
+  node->set_key_id(next_key_id);
+  if (key_id) {
+    *key_id = next_key_id;
+  }
+  return true;
+}
+
+bool DoubleArray<Bytes>::remove(KeyArg key) {
+  Node *node;
+  uint64_t key_pos;
+  if (find_leaf(key, &node, &key_pos) != DOUBLE_ARRAY_FOUND) {
+    // Not found or error.
+    return false;
+  }
+  Entry * const entry = entries_->get_pointer(node->key_id());
+  if (!entry) {
+    // Error.
+    return false;
+  }
+  if (!*entry) {
+    // Not found.
+    return false;
+  }
+  Key stored_key;
+  if (!store_->get(entry->bytes_id(), &stored_key)) {
+    // Error.
+    return false;
+  }
+  if (key.except_prefix(key_pos) != stored_key.except_prefix(key_pos)) {
+    // Not found.
+    return false;
+  }
+  *entry = Entry::invalid_entry(header_->next_key_id);
+  node->set_offset(NODE_INVALID_OFFSET);
+  header_->next_key_id = node->key_id();
+  --header_->num_keys;
+  return true;
+}
+
+bool DoubleArray<Bytes>::replace(KeyArg src_key, KeyArg dest_key,
+                                 int64_t *key_id) {
+  int64_t src_key_id;
+  if (!find(src_key, &src_key_id)) {
+    // Not found or error.
+    return false;
+  }
+  if (!replace_key(src_key_id, src_key, dest_key)) {
+    // Found or error.
+    return false;
+  }
+  if (key_id) {
+    *key_id = src_key_id;
+  }
+  return true;
+}
+
+bool DoubleArray<Bytes>::truncate() {
+  Node * const node = nodes_->get_pointer(ROOT_NODE_ID);
+  if (!node) {
+    // Error.
+    return false;
+  }
+  node->set_child(NODE_INVALID_LABEL);
+  node->set_offset(NODE_INVALID_OFFSET);
+  header_->max_key_id = MAP_MIN_KEY_ID - 1;
+  header_->num_keys = 0;
+  header_->next_key_id = MAP_MIN_KEY_ID;
+  return true;
+}
+
+bool DoubleArray<Bytes>::find_longest_prefix_match(KeyArg query,
+                                                   int64_t *key_id,
+                                                   Key *key) {
+  bool found = false;
+  uint64_t node_id = ROOT_NODE_ID;
+  Node node;
+  if (!nodes_->get(node_id, &node)) {
+    // Error.
+    return false;
+  }
+  uint64_t query_pos;
+  for (query_pos = 0; query_pos < query.size(); ++query_pos) {
+    if (node.is_leaf()) {
+      Key stored_key;
+      switch (get_key(node.key_id(), &stored_key)) {
+        case DOUBLE_ARRAY_FOUND: {
+          if ((stored_key.size() <= query.size()) &&
+              (stored_key.except_prefix(query_pos) ==
+               query.prefix(stored_key.size()).except_prefix(query_pos))) {
+            if (key_id) {
+              *key_id = node.key_id();
+            }
+            if (key) {
+              *key = stored_key;
+            }
+            found = true;
+          }
+          break;
+        }
+        case DOUBLE_ARRAY_NOT_FOUND: {
+          break;
+        }
+        default: {
+          // Error.
+          return false;
+        }
+      }
+      return found;
+    }
+
+    if (node.child() == NODE_TERMINAL_LABEL) {
+      Node leaf_node;
+      if (!nodes_->get(node.offset() ^ NODE_TERMINAL_LABEL, &leaf_node)) {
+        // Error.
+        return false;
+      }
+      if (leaf_node.is_leaf()) {
+        switch (get_key(leaf_node.key_id(), key)) {
+          case DOUBLE_ARRAY_FOUND: {
+            if (key_id) {
+              *key_id = leaf_node.key_id();
+            }
+            found = true;
+            break;
+          }
+          case DOUBLE_ARRAY_NOT_FOUND: {
+            break;
+          }
+          default: {
+            return false;
+          }
+        }
+      }
+    }
+
+    node_id = node.offset() ^ query[query_pos];
+    if (!nodes_->get(node_id, &node)) {
+      // Error.
+      return false;
+    }
+    if (node.label() != query[query_pos]) {
+      return found;
+    }
+  }
+
+  if (node.is_leaf()) {
+    Key stored_key;
+    switch (get_key(node.key_id(), &stored_key)) {
+      case DOUBLE_ARRAY_FOUND: {
+        if (stored_key.size() <= query.size()) {
+          if (key_id) {
+            *key_id = node.key_id();
+          }
+          if (key) {
+            *key = stored_key;
+          }
+          found = true;
+        }
+        break;
+      }
+      case DOUBLE_ARRAY_NOT_FOUND: {
+        break;
+      }
+      default: {
+        return false;
+      }
+    }
+  } else if (node.child() == NODE_TERMINAL_LABEL) {
+    if (nodes_->get(node.offset() ^ NODE_TERMINAL_LABEL, &node)) {
+      switch (get_key(node.key_id(), key)) {
+        case DOUBLE_ARRAY_FOUND: {
+          if (key_id) {
+            *key_id = node.key_id();
+          }
+          found = true;
+          break;
+        }
+        case DOUBLE_ARRAY_NOT_FOUND: {
+          break;
+        }
+        default: {
+          return false;
+        }
+      }
+    }
+  }
+  return found;
+}
+
+//MapCursor<Bytes> *DoubleArray<Bytes>::create_cursor(
+//    MapCursorAllKeys<Bytes> query, const MapCursorOptions &options) {
+//  // TODO
+//  return nullptr;
+//}
+
+//MapCursor<Bytes> *DoubleArray<Bytes>::create_cursor(
+//    const MapCursorKeyIDRange<Bytes> &query, const MapCursorOptions &options) {
+//  // TODO
+//  return nullptr;
+//}
+
+//MapCursor<Bytes> *DoubleArray<Bytes>::create_cursor(
+//    const MapCursorKeyRange<Bytes> &query, const MapCursorOptions &options) {
+//  // TODO
+//  return nullptr;
+//}
+
 bool DoubleArray<Bytes>::create_map(Storage *storage, uint32_t storage_node_id,
                                     const MapOptions &) {
   storage_ = storage;
@@ -113,13 +469,33 @@ bool DoubleArray<Bytes>::create_map(Storage *storage, uint32_t storage_node_id,
   storage_node_id_ = storage_node.id();
   header_ = static_cast<Header *>(storage_node.body());
   *header_ = Header();
-  // TODO
-  return false;
+  nodes_.reset(NodeArray::create(storage, storage_node_id_));
+  siblings_.reset(SiblingArray::create(storage, storage_node_id_));
+  blocks_.reset(BlockArray::create(storage, storage_node_id_));
+  entries_.reset(EntryArray::create(storage, storage_node_id_));
+  store_.reset(BytesStore::create(storage, storage_node_id_));
+  if (!nodes_ || !siblings_ || !blocks_ || !entries_ || !store_) {
+    storage->unlink_node(storage_node_id_);
+    return false;
+  }
+  header_->nodes_storage_node_id = nodes_->storage_node_id();
+  header_->siblings_storage_node_id = siblings_->storage_node_id();
+  header_->blocks_storage_node_id = blocks_->storage_node_id();
+  header_->entries_storage_node_id = entries_->storage_node_id();
+  header_->store_storage_node_id = store_->storage_node_id();
+  Node * const root_node = reserve_node(ROOT_NODE_ID);
+  if (!root_node) {
+    storage->unlink_node(storage_node_id_);
+    return false;
+  }
+  root_node[NODE_INVALID_OFFSET - ROOT_NODE_ID].set_is_origin(true);
+  return true;
 }
 
 bool DoubleArray<Bytes>::open_map(Storage *storage, uint32_t storage_node_id) {
   storage_ = storage;
-  StorageNode storage_node = storage->open_node(storage_node_id);
+  storage_node_id_ = storage_node_id;
+  StorageNode storage_node = storage->open_node(storage_node_id_);
   if (!storage_node) {
     return false;
   }
@@ -128,10 +504,571 @@ bool DoubleArray<Bytes>::open_map(Storage *storage, uint32_t storage_node_id) {
                   << ", header_size = " << sizeof(Header);
     return false;
   }
-  storage_node_id_ = storage_node_id;
   header_ = static_cast<Header *>(storage_node.body());
-  // TODO
-  return false;
+  nodes_.reset(NodeArray::open(storage, header_->nodes_storage_node_id));
+  siblings_.reset(
+      SiblingArray::open(storage, header_->siblings_storage_node_id));
+  blocks_.reset(BlockArray::open(storage, header_->blocks_storage_node_id));
+  entries_.reset(EntryArray::open(storage, header_->entries_storage_node_id));
+  store_.reset(BytesStore::open(storage, header_->store_storage_node_id));
+  if (!nodes_ || !siblings_ || !blocks_ || !entries_ || !store_) {
+    return false;
+  }
+  return true;
+}
+
+DoubleArrayResult DoubleArray<Bytes>::get_key(int64_t key_id, Key *key) {
+  Entry entry;
+  if (!entries_->get(key_id, &entry)) {
+    return DOUBLE_ARRAY_FAILED;
+  }
+  if (!entry) {
+    return DOUBLE_ARRAY_NOT_FOUND;
+  }
+  if (!store_->get(entry.bytes_id(), key)) {
+    return DOUBLE_ARRAY_FAILED;
+  }
+  return DOUBLE_ARRAY_FOUND;
+}
+
+bool DoubleArray<Bytes>::replace_key(int64_t key_id, KeyArg src_key,
+                                     KeyArg dest_key) {
+  Node *dest_node;
+  uint64_t key_pos;
+  if (find_leaf(dest_key, &dest_node, &key_pos) == DOUBLE_ARRAY_FAILED) {
+    return false;
+  }
+  switch (insert_leaf(dest_key, dest_node, key_pos, &dest_node)) {
+    case DOUBLE_ARRAY_FOUND: {
+      return false;
+    }
+    case DOUBLE_ARRAY_INSERTED: {
+      break;
+    }
+    default: {
+      // Error.
+      return false;
+    }
+  }
+  Entry * const entry = entries_->get_pointer(key_id);
+  if (!entry) {
+    // Error.
+    return false;
+  }
+  Node *src_node;
+  if (find_leaf(src_key, &src_node, &key_pos) != DOUBLE_ARRAY_FOUND) {
+    // Critical error.
+    return false;
+  }
+  uint64_t bytes_id;
+  if (!store_->add(dest_key, &bytes_id)) {
+    // Error.
+    return false;
+  }
+  dest_node->set_key_id(key_id);
+  *entry = Entry::valid_entry(bytes_id);
+  src_node->set_offset(NODE_INVALID_OFFSET);
+  return true;
+}
+
+DoubleArrayResult DoubleArray<Bytes>::find_leaf(KeyArg key, Node **leaf_node,
+                                                uint64_t *leaf_key_pos) {
+  Node *node = nodes_->get_pointer(ROOT_NODE_ID);
+  if (!node) {
+    return DOUBLE_ARRAY_FAILED;
+  }
+  uint64_t key_pos;
+  for (key_pos = 0; key_pos < key.size(); ++key_pos) {
+    if (node->is_leaf()) {
+      *leaf_node = node;
+      *leaf_key_pos = key_pos;
+      return DOUBLE_ARRAY_FOUND;
+    }
+    const uint64_t child_node_id = node->offset() ^ key[key_pos];
+    Node * const child_node = nodes_->get_pointer(child_node_id);
+    if (!child_node) {
+      return DOUBLE_ARRAY_FAILED;
+    }
+    if (child_node->label() != key[key_pos]) {
+      *leaf_node = node;
+      *leaf_key_pos = key_pos;
+      return DOUBLE_ARRAY_NOT_FOUND;
+    }
+    node = child_node;
+  }
+  *leaf_node = node;
+  *leaf_key_pos = key_pos;
+  if (node->is_leaf()) {
+    return DOUBLE_ARRAY_FOUND;
+  }
+  if (node->child() != NODE_TERMINAL_LABEL) {
+    return DOUBLE_ARRAY_NOT_FOUND;
+  }
+  const uint64_t node_id = node->offset() ^ NODE_TERMINAL_LABEL;
+  node = nodes_->get_pointer(node_id);
+  if (!node) {
+    return DOUBLE_ARRAY_FAILED;
+  }
+  *leaf_node = node;
+  return node->is_leaf() ? DOUBLE_ARRAY_FOUND : DOUBLE_ARRAY_NOT_FOUND;
+}
+
+DoubleArrayResult DoubleArray<Bytes>::insert_leaf(KeyArg key, Node *node,
+                                                  uint64_t key_pos,
+                                                  Node **leaf_node) {
+  if (node->is_leaf()) {
+    Key stored_key;
+    const DoubleArrayResult result = get_key(node->key_id(), &stored_key);
+    if (result != DOUBLE_ARRAY_FOUND) {
+      // Not found or error.
+      return result;
+    }
+    uint64_t i = key_pos;
+    while ((i < key.size()) && (i < stored_key.size())) {
+      if (key[i] != stored_key[i]) {
+        break;
+      }
+      ++i;
+    }
+    if ((i == key.size()) && (i == stored_key.size())) {
+      return DOUBLE_ARRAY_FOUND;
+    }
+    while (key_pos < i) {
+      if (!insert_node(node, key[key_pos], &node)) {
+        return DOUBLE_ARRAY_FAILED;
+      }
+      ++key_pos;
+    }
+    uint64_t labels[2];
+    labels[0] = (key_pos < stored_key.size()) ?
+        stored_key[key_pos] : NODE_TERMINAL_LABEL;
+    labels[1] = (key_pos < key.size()) ? key[key_pos] : NODE_TERMINAL_LABEL;
+    if (!separate(node, labels, leaf_node)) {
+      return DOUBLE_ARRAY_FAILED;
+    }
+    return DOUBLE_ARRAY_INSERTED;
+  } else if (node->label() == NODE_TERMINAL_LABEL) {
+    *leaf_node = node;
+    return DOUBLE_ARRAY_INSERTED;
+  } else {
+    const uint64_t label = (key_pos < key.size()) ?
+        key[key_pos] : NODE_TERMINAL_LABEL;
+    if (!resolve(node, label)) {
+      return DOUBLE_ARRAY_FAILED;
+    }
+    if (!insert_node(node, label, leaf_node)) {
+      return DOUBLE_ARRAY_FAILED;
+    }
+    return DOUBLE_ARRAY_INSERTED;
+  }
+}
+
+bool DoubleArray<Bytes>::insert_node(Node *node, uint64_t label,
+                                     Node **dest_node) {
+  uint64_t offset = node->offset();
+  if (node->is_leaf() || (offset == NODE_INVALID_OFFSET)) {
+    if (!find_offset(&label, 1, &offset)) {
+      // Error.
+      return false;
+    }
+  }
+  const uint64_t next_node_id = offset ^ label;
+  uint8_t *next_sibling = siblings_->get_pointer(next_node_id);
+  if (!next_sibling) {
+    // Error.
+    return false;
+  }
+  Node * const next_node = reserve_node(next_node_id);
+  if (!next_node) {
+    // Error.
+    return false;
+  }
+  next_node->set_label(label);
+  if (node->is_leaf()) {
+    next_node[offset - next_node_id].set_is_origin(true);
+    next_node->set_key_id(node->key_id());
+  } else if (node->offset() == NODE_INVALID_OFFSET) {
+    next_node[offset - next_node_id].set_is_origin(true);
+  }
+  node->set_offset(offset);
+  const uint64_t child_label = node->child();
+  if (child_label == NODE_INVALID_LABEL) {
+    node->set_child(label);
+  } else if ((label == NODE_TERMINAL_LABEL) ||
+             ((child_label != NODE_TERMINAL_LABEL) && (label < child_label))) {
+    // The child node becomes the first child.
+    *next_sibling = child_label;
+    next_node->set_has_sibling();
+    node->set_child(label);
+  } else {
+    uint64_t prev_node_id = offset ^ child_label;
+    Node *prev_node = &next_node[prev_node_id - next_node_id];
+    uint8_t *prev_sibling = &next_sibling[prev_node_id - next_node_id];
+    uint64_t sibling_label = prev_node->has_sibling() ?
+        *prev_sibling : NODE_INVALID_LABEL;
+    while (label > sibling_label) {
+      prev_node_id = offset ^ sibling_label;
+      prev_node = &next_node[prev_node_id - next_node_id];
+      prev_sibling = &next_sibling[prev_node_id - next_node_id];
+      sibling_label = prev_node->has_sibling() ?
+          *prev_sibling : NODE_INVALID_LABEL;
+    }
+    *next_sibling = *prev_sibling;
+    *prev_sibling = label;
+    if (prev_node->has_sibling()) {
+      next_node->set_has_sibling();
+    }
+    prev_node->set_has_sibling();
+  }
+  *dest_node = next_node;
+  return true;
+}
+
+bool DoubleArray<Bytes>::separate(Node *node, uint64_t labels[2],
+                                  Node **dest_node) {
+  uint64_t offset;
+  if (!find_offset(labels, 2, &offset)) {
+    // Error.
+    return false;
+  }
+  uint64_t node_ids[2] = { offset ^ labels[0], offset ^ labels[1] };
+  Node *nodes[2];
+  nodes[0] = reserve_node(node_ids[0]);
+  if (!nodes[0]) {
+    // Error.
+    return false;
+  }
+  nodes[1] = reserve_node(node_ids[1]);
+  if (!nodes[1]) {
+    // Error.
+    return false;
+  }
+  uint8_t * const sibling_block =
+      siblings_->get_pointer(offset & ~(BLOCK_SIZE - 1));
+  if (!sibling_block) {
+    // Error.
+    return false;
+  }
+  nodes[0]->set_label(labels[0]);
+  nodes[0]->set_key_id(node->key_id());
+  nodes[1]->set_label(labels[1]);
+  nodes[0][offset - node_ids[0]].set_is_origin(true);
+  node->set_offset(offset);
+  if ((labels[0] == NODE_TERMINAL_LABEL) ||
+      ((labels[1] != NODE_TERMINAL_LABEL) && (labels[0] < labels[1]))) {
+    sibling_block[node_ids[0] % BLOCK_SIZE] = static_cast<uint8_t>(labels[1]);
+    nodes[0]->set_has_sibling();
+    node->set_child(labels[0]);
+  } else {
+    sibling_block[node_ids[1] % BLOCK_SIZE] = static_cast<uint8_t>(labels[0]);
+    nodes[1]->set_has_sibling();
+    node->set_child(labels[1]);
+  }
+  *dest_node = nodes[1];
+  return true;
+}
+
+bool DoubleArray<Bytes>::resolve(Node *node, uint64_t label) {
+  uint64_t offset = node->offset();
+  if (offset == NODE_INVALID_OFFSET) {
+    return true;
+  }
+  uint64_t dest_node_id = offset ^ label;
+  Node * const dest_node = nodes_->get_pointer(dest_node_id);
+  if (!dest_node) {
+    // Error.
+    return false;
+  }
+  if (dest_node->is_phantom()) {
+    return true;
+  }
+  Node * const node_block = dest_node - (dest_node_id % BLOCK_SIZE);
+  uint8_t * const sibling_block =
+      siblings_->get_pointer(dest_node_id & ~(BLOCK_SIZE - 1));
+  if (!sibling_block) {
+    // Error.
+    return false;
+  }
+  uint64_t labels[NODE_MAX_LABEL + 1];
+  uint64_t num_labels = 0;
+  uint64_t child_label = node->child();
+  while (child_label != NODE_INVALID_LABEL) {
+    labels[num_labels++] = child_label;
+    const uint64_t child_node_id = offset ^ child_label;
+    if (node_block[child_node_id % BLOCK_SIZE].has_sibling()) {
+      child_label = sibling_block[child_node_id % BLOCK_SIZE];
+    } else {
+      child_label = NODE_INVALID_LABEL;
+    }
+  }
+  labels[num_labels] = label;
+  if (!find_offset(labels, num_labels + 1, &offset)) {
+    // Error.
+    return false;
+  }
+  if (!migrate_nodes(node, offset, labels, num_labels)) {
+    // Error.
+    return false;
+  }
+  return true;
+}
+
+bool DoubleArray<Bytes>::migrate_nodes(Node *node, uint64_t dest_offset,
+                                       const uint64_t *labels,
+                                       uint64_t num_labels) {
+  const uint64_t src_offset = node->offset();
+  Node * const src_node_block =
+      nodes_->get_pointer(src_offset & ~(BLOCK_SIZE - 1));
+  if (!src_node_block) {
+    // Error.
+    return false;
+  }
+  uint8_t * const src_sibling_block =
+      siblings_->get_pointer(src_offset & ~(BLOCK_SIZE - 1));
+  if (!src_sibling_block) {
+    // Error.
+    return false;
+  }
+  Node * const dest_node_block =
+      nodes_->get_pointer(dest_offset & ~(BLOCK_SIZE - 1));
+  if (!dest_node_block) {
+    // Error.
+    return false;
+  }
+  uint8_t * const dest_sibling_block =
+      siblings_->get_pointer(dest_offset & ~(BLOCK_SIZE - 1));
+  if (!dest_sibling_block) {
+    // Error.
+    return false;
+  }
+  // TODO: Copy siblings!
+  for (uint64_t i = 0; i < num_labels; ++i) {
+    const uint64_t src_node_id = src_offset ^ labels[i];
+    Node * const src_node = &src_node_block[src_node_id % BLOCK_SIZE];
+    uint8_t * const src_sibling = &src_sibling_block[src_node_id % BLOCK_SIZE];
+    const uint64_t dest_node_id = dest_offset ^ labels[i];
+    Node * const dest_node = reserve_node(dest_node_id);
+    if (!dest_node) {
+      // Error.
+      return false;
+    }
+    uint8_t * const dest_sibling =
+        &dest_sibling_block[dest_node_id % BLOCK_SIZE];
+    Node dummy_node = *src_node;
+    dummy_node.set_is_origin(dest_node->is_origin());
+    *dest_node = dummy_node;
+    *dest_sibling = *src_sibling;
+  }
+  header_->num_zombies += num_labels;
+  dest_node_block[dest_offset % BLOCK_SIZE].set_is_origin(true);
+  node->set_offset(dest_offset);
+  return true;
+}
+
+bool DoubleArray<Bytes>::find_offset(const uint64_t *labels,
+                                     uint64_t num_labels,
+                                     uint64_t *found_offset) {
+  // Blocks are tested in descending order of level.
+  // Generally, a lower level contains more phantom nodes.
+  uint64_t level = bit_scan_reverse(num_labels) + 1;
+  level = (level < BLOCK_MAX_LEVEL) ? (BLOCK_MAX_LEVEL - level) : 0;
+  uint64_t block_count = 0;
+  do {
+    uint64_t latest_block_id = header_->latest_blocks[level];
+    if (latest_block_id == BLOCK_INVALID_ID) {
+      // This level group is skipped because it is empty.
+      continue;
+    }
+    uint64_t block_id = latest_block_id;
+    do {
+      Block * const block = blocks_->get_pointer(block_id);
+      if (!block) {
+        // Error.
+        return false;
+      }
+      Node * const node_block = nodes_->get_pointer(block_id * BLOCK_SIZE);
+      if (!node_block) {
+        // Error.
+        return false;
+      }
+      const uint64_t first_phantom_node_id = block->first_phantom();
+      uint64_t node_id = first_phantom_node_id;
+      do {
+        const uint64_t offset = node_id ^ labels[0];
+        if (!node_block[offset].is_origin()) {
+          uint64_t i;
+          for (i = 1; i < num_labels; ++i) {
+            if (!node_block[offset ^ labels[i]].is_phantom()) {
+              break;
+            }
+          }
+          if (i >= num_labels) {
+            // Found.
+            *found_offset = (block_id * BLOCK_SIZE) | offset;
+            return true;
+          }
+        }
+        node_id = node_block[node_id].next();
+      } while (node_id != first_phantom_node_id);
+
+      Block * const prev_block = block;
+      const uint64_t prev_block_id = block_id;
+      const uint64_t next_block_id = block->next();
+      block_id = next_block_id;
+
+      // A block level is updated if this function fails many times.
+      prev_block->set_failure_count(prev_block->failure_count() + 1);
+      if (prev_block->failure_count() >= BLOCK_MAX_FAILURE_COUNT) {
+        update_block_level(prev_block_id, prev_block, level + 1);
+        if (next_block_id == latest_block_id) {
+          // All the blocks are tested.
+          break;
+        } else {
+          latest_block_id = header_->latest_blocks[level];
+          continue;
+        }
+      }
+    } while ((++block_count < BLOCK_MAX_COUNT) &&
+             (block_id != latest_block_id));
+  } while ((block_count < BLOCK_MAX_COUNT) && (level-- != 0));
+  // Use a new block.
+  *found_offset = (header_->num_blocks * BLOCK_SIZE) ^ labels[0];
+  return true;
+}
+
+auto DoubleArray<Bytes>::reserve_node(uint64_t node_id) -> Node * {
+  const uint64_t block_id = node_id / BLOCK_SIZE;
+  Block *block;
+  if (node_id >= (header_->num_blocks * BLOCK_SIZE)) {
+    block = reserve_block(block_id);
+  } else {
+    block = blocks_->get_pointer(block_id);
+  }
+  if (!block) {
+    // Error.
+    return nullptr;
+  }
+  Node * const node = nodes_->get_pointer(node_id);
+  if (!node) {
+    // Error.
+    return nullptr;
+  }
+  Node * const node_block = node - (node_id % BLOCK_SIZE);
+  Node * const next_node = &node_block[node->next()];
+  Node * const prev_node = &node_block[node->prev()];
+  if ((node_id % BLOCK_SIZE) == block->first_phantom()) {
+    block->set_first_phantom(node->next());
+  }
+  prev_node->set_next(node->next());
+  next_node->set_prev(node->prev());
+  if (block->level() != BLOCK_MAX_LEVEL) {
+    const uint64_t threshold =
+        1ULL << ((BLOCK_MAX_LEVEL - block->level() - 1) * 2);
+    if (block->num_phantoms() == threshold) {
+      update_block_level(block_id, block, block->level() + 1);
+    }
+  }
+  block->set_num_phantoms(block->num_phantoms() - 1);
+  node->unset_is_phantom();
+  --header_->num_phantoms;
+  return node;
+}
+
+auto DoubleArray<Bytes>::reserve_block(uint64_t block_id) -> Block * {
+  if (block_id >= blocks_->size()) {
+    GRNXX_ERROR() << "too many blocks: block_id = " << block_id
+                  << ", blocks_size = " << blocks_->size();
+    return nullptr;
+  }
+  Block * const block = blocks_->get_pointer(block_id);
+  if (!block) {
+    // Error.
+    return nullptr;
+  }
+  Node * const node = nodes_->get_pointer(block_id * BLOCK_SIZE);
+  if (!node) {
+    // Error.
+    return nullptr;
+  }
+  *block = Block::empty_block();
+  for (uint64_t i = 0; i < BLOCK_SIZE; ++i) {
+    node[i] = Node::phantom_node(i + 1, i - 1);
+  }
+  // The level of a new block is 0.
+  set_block_level(block_id, block, 0);
+  header_->num_blocks = block_id + 1;
+  header_->num_phantoms += BLOCK_SIZE;
+  return block;
+}
+
+bool DoubleArray<Bytes>::update_block_level(uint64_t block_id, Block *block,
+                                            uint64_t level) {
+  // TODO: If set_block_level() fails, the block gets lost.
+  if (!unset_block_level(block_id, block) ||
+      !set_block_level(block_id, block, level)) {
+    // Error.
+    return false;
+  }
+  return true;
+}
+
+bool DoubleArray<Bytes>::set_block_level(uint64_t block_id, Block *block,
+                                         uint64_t level) {
+  if (header_->latest_blocks[level] == BLOCK_INVALID_ID) {
+    // The block becomes the only one member of the level group.
+    block->set_next(block_id);
+    block->set_prev(block_id);
+    header_->latest_blocks[level] = block_id;
+  } else {
+    // The block is appended to the level group.
+    const uint64_t next_block_id = header_->latest_blocks[level];
+    Block * const next_block = blocks_->get_pointer(next_block_id);
+    if (!next_block) {
+      // Error.
+      return false;
+    }
+    const uint64_t prev_block_id = next_block->prev();
+    Block * const prev_block = blocks_->get_pointer(prev_block_id);
+    if (!prev_block) {
+      // Error.
+      return false;
+    }
+    block->set_next(next_block_id);
+    block->set_prev(prev_block_id);
+    prev_block->set_next(block_id);
+    next_block->set_prev(block_id);
+  }
+  block->set_level(level);
+  block->set_failure_count(0);
+  return true;
+}
+
+bool DoubleArray<Bytes>::unset_block_level(uint64_t block_id, Block *block) {
+  const uint64_t level = block->level();
+  const uint64_t next_block_id = block->next();
+  const uint64_t prev_block_id = block->prev();
+  if (next_block_id == prev_block_id) {
+    // The level group becomes empty.
+    header_->latest_blocks[level] = BLOCK_INVALID_ID;
+  } else {
+    Block * const next_block = blocks_->get_pointer(next_block_id);
+    if (!next_block) {
+      // Error.
+      return false;
+    }
+    Block * const prev_block = blocks_->get_pointer(prev_block_id);
+    if (!prev_block) {
+      // Error.
+      return false;
+    }
+    prev_block->set_next(next_block_id);
+    next_block->set_prev(prev_block_id);
+    if (block_id == header_->latest_blocks[level]) {
+      // The next block becomes the latest block of the level group.
+      header_->latest_blocks[level] = next_block_id;
+    }
+  }
+  return true;
 }
 
 }  // namespace map

  Modified: lib/grnxx/map/double_array.hpp (+68 -15)
===================================================================
--- lib/grnxx/map/double_array.hpp    2013-06-07 11:16:47 +0900 (b01b9f8)
+++ lib/grnxx/map/double_array.hpp    2013-06-11 13:09:03 +0900 (539487e)
@@ -22,10 +22,14 @@
 
 #include <memory>
 
+#include "grnxx/array.hpp"
 #include "grnxx/bytes.hpp"
 #include "grnxx/map.hpp"
 #include "grnxx/map_cursor.hpp"
 #include "grnxx/map_cursor_query.hpp"
+#include "grnxx/map/double_array/block.hpp"
+#include "grnxx/map/double_array/entry.hpp"
+#include "grnxx/map/double_array/node.hpp"
 #include "grnxx/types.hpp"
 
 namespace grnxx {
@@ -33,12 +37,22 @@ namespace grnxx {
 class Storage;
 
 namespace map {
+
+class BytesStore;
+
 namespace double_array {
 
 struct Header;
 
 }  // namespace double_array
 
+enum DoubleArrayResult {
+  DOUBLE_ARRAY_FOUND,
+  DOUBLE_ARRAY_NOT_FOUND,
+  DOUBLE_ARRAY_INSERTED,
+  DOUBLE_ARRAY_FAILED
+};
+
 template <typename T>
 class DoubleArray {
  public:
@@ -49,6 +63,15 @@ class DoubleArray {
 
 template <>
 class DoubleArray<Bytes> : public Map<Bytes> {
+  using Node = double_array::Node;
+  using Block = double_array::Block;
+  using Entry = double_array::Entry;
+
+  using NodeArray    = Array<Node,     65536, 8192, 8192>;  // 42-bit
+  using SiblingArray = Array<uint8_t, 262144, 4096, 4096>;  // 42-bit
+  using BlockArray   = Array<Block,     8192, 1024, 1024>;  // 33-bit
+  using EntryArray   = Array<Entry,    65536, 4096, 4096>;  // 40-bit
+
  public:
   using Header = double_array::Header;
   using Key = typename Map<Bytes>::Key;
@@ -68,40 +91,70 @@ class DoubleArray<Bytes> : public Map<Bytes> {
   int64_t max_key_id() const;
   uint64_t num_keys() const;
 
-  // TODO
-//  bool get(int64_t key_id, Key *key = nullptr);
-//  bool unset(int64_t key_id);
-//  bool reset(int64_t key_id, KeyArg dest_key);
+  bool get(int64_t key_id, Key *key = nullptr);
+  bool unset(int64_t key_id);
+  bool reset(int64_t key_id, KeyArg dest_key);
 
-//  bool find(KeyArg key, int64_t *key_id = nullptr);
-//  bool add(KeyArg key, int64_t *key_id = nullptr);
-//  bool remove(KeyArg key);
-//  bool replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id = nullptr);
+  bool find(KeyArg key, int64_t *key_id = nullptr);
+  bool add(KeyArg key, int64_t *key_id = nullptr);
+  bool remove(KeyArg key);
+  bool replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id = nullptr);
 
-//  bool truncate();
+  bool truncate();
 
-//  bool find_longest_prefix_match(KeyArg query,
-//                                 int64_t *key_id = nullptr,
-//                                 Key *key = nullptr);
+  bool find_longest_prefix_match(KeyArg query,
+                                 int64_t *key_id = nullptr,
+                                 Key *key = nullptr);
 
 //  Cursor *create_cursor(
-//      MapCursorAllKeys<T> query,
+//      MapCursorAllKeys<Bytes> query,
 //      const MapCursorOptions &options = MapCursorOptions());
 //  Cursor *create_cursor(
-//      const MapCursorKeyIDRange<T> &query,
+//      const MapCursorKeyIDRange<Bytes> &query,
 //      const MapCursorOptions &options = MapCursorOptions());
 //  Cursor *create_cursor(
-//      const MapCursorKeyRange<T> &query,
+//      const MapCursorKeyRange<Bytes> &query,
 //      const MapCursorOptions &options = MapCursorOptions());
 
  private:
   Storage *storage_;
   uint32_t storage_node_id_;
   Header *header_;
+  std::unique_ptr<NodeArray> nodes_;
+  std::unique_ptr<SiblingArray> siblings_;
+  std::unique_ptr<BlockArray> blocks_;
+  std::unique_ptr<EntryArray> entries_;
+  std::unique_ptr<BytesStore> store_;
 
   bool create_map(Storage *storage, uint32_t storage_node_id,
                   const MapOptions &options);
   bool open_map(Storage *storage, uint32_t storage_node_id);
+
+  DoubleArrayResult get_key(int64_t key_id, Key *key);
+
+  bool replace_key(int64_t key_id, KeyArg src_key, KeyArg dest_key);
+
+  DoubleArrayResult find_leaf(KeyArg key, Node **leaf_node,
+                              uint64_t *leaf_key_pos);
+  DoubleArrayResult insert_leaf(KeyArg key, Node *node, uint64_t key_pos,
+                                Node **leaf_node);
+
+  bool insert_node(Node *node, uint64_t label, Node **dest_node);
+  bool separate(Node *node, uint64_t labels[2], Node **dest_node);
+
+  bool resolve(Node *node, uint64_t label);
+  bool migrate_nodes(Node *node, uint64_t dest_offset,
+                     const uint64_t *labels, uint64_t num_labels);
+
+  bool find_offset(const uint64_t *labels, uint64_t num_labels,
+                   uint64_t *found_offset);
+
+  Node *reserve_node(uint64_t node_id);
+  Block *reserve_block(uint64_t block_id);
+
+  bool update_block_level(uint64_t block_id, Block *block, uint64_t level);
+  bool set_block_level(uint64_t block_id, Block *block, uint64_t level);
+  bool unset_block_level(uint64_t block_id, Block *block);
 };
 
 }  // namespace map

  Modified: lib/grnxx/map/double_array/block.hpp (+10 -0)
===================================================================
--- lib/grnxx/map/double_array/block.hpp    2013-06-07 11:16:47 +0900 (5da7c82)
+++ lib/grnxx/map/double_array/block.hpp    2013-06-11 13:09:03 +0900 (6db87e4)
@@ -29,6 +29,8 @@ namespace double_array {
 constexpr uint64_t BLOCK_MAX_FAILURE_COUNT  = 4;
 constexpr uint64_t BLOCK_MAX_LEVEL          = 5;
 constexpr uint64_t BLOCK_INVALID_ID         = (1ULL << 40) - 1;
+constexpr uint64_t BLOCK_SIZE               = 1ULL << 9;
+constexpr uint64_t BLOCK_MAX_COUNT          = 16;
 
 // The internal structure is as follows:
 // - values_[0]
@@ -59,6 +61,12 @@ class Block {
   static constexpr uint8_t  PREV_SHIFT          = 24;
 
  public:
+  Block() = default;
+
+  static Block empty_block() {
+    return Block(0, BLOCK_SIZE << NUM_PHANTOMS_SHIFT);
+  }
+
   // Return the first phantom node.
   uint64_t first_phantom() const {
     return (values_[0] >> FIRST_PHANTOM_SHIFT) & FIRST_PHANTOM_MASK;
@@ -113,6 +121,8 @@ class Block {
 
  private:
   uint64_t values_[2];
+
+  Block(uint64_t value_0, uint64_t value_1) : values_{ value_0, value_1 } {}
 };
 
 }  // namespace double_array

  Modified: lib/grnxx/map/double_array/entry.hpp (+2 -0)
===================================================================
--- lib/grnxx/map/double_array/entry.hpp    2013-06-07 11:16:47 +0900 (9307a94)
+++ lib/grnxx/map/double_array/entry.hpp    2013-06-11 13:09:03 +0900 (fcc7666)
@@ -39,6 +39,8 @@ class Entry {
   static constexpr uint64_t IS_VALID_FLAG = 1ULL << 63;
 
  public:
+  Entry() = default;
+
   static Entry valid_entry(uint64_t bytes_id) {
     return Entry(IS_VALID_FLAG | bytes_id);
   }

  Modified: lib/grnxx/map/double_array/header.cpp (+2 -2)
===================================================================
--- lib/grnxx/map/double_array/header.cpp    2013-06-07 11:16:47 +0900 (d33d436)
+++ lib/grnxx/map/double_array/header.cpp    2013-06-11 13:09:03 +0900 (96f4989)
@@ -24,7 +24,7 @@ namespace map {
 namespace double_array {
 
 Header::Header()
-    : map_type(MAP_ARRAY),
+    : map_type(MAP_DOUBLE_ARRAY),
       max_key_id(MAP_MIN_KEY_ID - 1),
       num_keys(0),
       nodes_storage_node_id(STORAGE_INVALID_NODE_ID),
@@ -32,7 +32,7 @@ Header::Header()
       blocks_storage_node_id(STORAGE_INVALID_NODE_ID),
       entries_storage_node_id(STORAGE_INVALID_NODE_ID),
       store_storage_node_id(STORAGE_INVALID_NODE_ID),
-      next_key_id(MAP_INVALID_KEY_ID),
+      next_key_id(MAP_MIN_KEY_ID),
       num_blocks(0),
       num_phantoms(0),
       num_zombies(0),

  Modified: lib/grnxx/map/double_array/node.hpp (+17 -12)
===================================================================
--- lib/grnxx/map/double_array/node.hpp    2013-06-07 11:16:47 +0900 (ed0ec73)
+++ lib/grnxx/map/double_array/node.hpp    2013-06-11 13:09:03 +0900 (69917bc)
@@ -29,7 +29,6 @@ namespace double_array {
 constexpr uint64_t NODE_TERMINAL_LABEL = 0x100;
 constexpr uint64_t NODE_MAX_LABEL      = NODE_TERMINAL_LABEL;
 constexpr uint64_t NODE_INVALID_LABEL  = NODE_MAX_LABEL + 1;
-
 constexpr uint64_t NODE_INVALID_OFFSET = 0;
 
 // The internal structure is as follows:
@@ -75,6 +74,8 @@ class Node {
   static constexpr uint8_t  OFFSET_SHIFT     = 18;
 
  public:
+  Node() = default;
+
   // Create a phantom node.
   static Node phantom_node(uint64_t next, uint64_t prev) {
     return Node(IS_PHANTOM_FLAG | ((next & NEXT_MASK) << NEXT_SHIFT) |
@@ -134,8 +135,12 @@ class Node {
              (NODE_INVALID_LABEL << CHILD_SHIFT) |
              (NODE_INVALID_OFFSET << OFFSET_SHIFT);
   }
-  void set_is_origin() {
-    value_ |= IS_ORIGIN_FLAG;
+  void set_is_origin(bool is_origin) {
+    if (is_origin) {
+      value_ |= IS_ORIGIN_FLAG;
+    } else {
+      value_ &= ~IS_ORIGIN_FLAG;
+    }
   }
 
   void set_next(uint64_t next) {
@@ -172,16 +177,16 @@ class Node {
     value_ = (value_ & ~(CHILD_MASK << CHILD_SHIFT)) |
              ((child & CHILD_MASK) << CHILD_SHIFT);
   }
-  // Use unset_offset() iff "offset" == INVALID_OFFSET.
   void set_offset(uint64_t offset) {
-    value_ = (value_ & ~(OFFSET_MASK << OFFSET_SHIFT)) |
-             ((offset & OFFSET_MASK) << OFFSET_SHIFT);
-  }
-  void unset_offset() {
-    value_ = (value_ & (IS_ORIGIN_FLAG | (LABEL_MASK << LABEL_SHIFT) |
-                        IS_LEAF_FLAG)) |
-             (NODE_INVALID_LABEL << CHILD_SHIFT) |
-             (NODE_INVALID_OFFSET << OFFSET_SHIFT);
+    if (value_ & IS_LEAF_FLAG) {
+      value_ = (value_ & ~(IS_LEAF_FLAG | (OFFSET_MASK << OFFSET_SHIFT) |
+                           (CHILD_MASK << CHILD_SHIFT))) |
+               (offset << OFFSET_SHIFT) |
+               (NODE_INVALID_LABEL << CHILD_SHIFT);
+    } else {
+      value_ = (value_ & ~(OFFSET_MASK << OFFSET_SHIFT)) |
+               (offset << OFFSET_SHIFT);
+    }
   }
 
  private:
-------------- next part --------------
HTML����������������������������...
下载 



More information about the Groonga-commit mailing list
Back to archive index