[Groonga-commit] groonga/grnxx at 238ac50 [master] Add small node support and fix some bugs.

Back to archive index

susumu.yata null+****@clear*****
Sat May 4 00:53:44 JST 2013


susumu.yata	2013-05-04 00:53:44 +0900 (Sat, 04 May 2013)

  New Revision: 238ac50309887feee1a5122fc0ec44cd6885017d
  https://github.com/groonga/grnxx/commit/238ac50309887feee1a5122fc0ec44cd6885017d

  Message:
    Add small node support and fix some bugs.

  Modified files:
    lib/grnxx/storage/chunk_index.cpp
    lib/grnxx/storage/chunk_index.hpp
    lib/grnxx/storage/header.cpp
    lib/grnxx/storage/header.hpp
    lib/grnxx/storage/storage_impl.cpp

  Modified: lib/grnxx/storage/chunk_index.cpp (+6 -4)
===================================================================
--- lib/grnxx/storage/chunk_index.cpp    2013-05-03 12:55:10 +0900 (877ea64)
+++ lib/grnxx/storage/chunk_index.cpp    2013-05-04 00:53:44 +0900 (e7c4369)
@@ -20,13 +20,15 @@
 namespace grnxx {
 namespace storage {
 
-ChunkIndex::ChunkIndex()
-    : id(0),
-      file_id(0),
+ChunkIndex::ChunkIndex(uint16_t id, ChunkIndexType type)
+    : id(id),
+      type(type),
       reserved_0(0),
+      file_id(0),
+      reserved_1(0),
       offset(0),
       size(0),
-      reserved_1(0) {}
+      reserved_2(0) {}
 
 }  // namespace storage
 }  // namespace grnxx

  Modified: lib/grnxx/storage/chunk_index.hpp (+13 -4)
===================================================================
--- lib/grnxx/storage/chunk_index.hpp    2013-05-03 12:55:10 +0900 (e851e2d)
+++ lib/grnxx/storage/chunk_index.hpp    2013-05-04 00:53:44 +0900 (64e0f46)
@@ -25,19 +25,28 @@ namespace storage {
 
 constexpr size_t CHUNK_INDEX_SIZE = 32;
 
+enum ChunkIndexType : uint8_t {
+  HEADER_CHUNK       = 0,
+  REGULAR_BODY_CHUNK = 1,
+  SMALL_BODY_CHUNK   = 2
+};
+
 struct ChunkIndex {
   // The chunk ID.
-  uint32_t id;
+  uint16_t id;
+  // The chunk type.
+  ChunkIndexType type;
+  uint8_t reserved_0;
   // The ID of the file to which the chunk belongs.
   uint16_t file_id;
-  uint16_t reserved_0;
+  uint16_t reserved_1;
   // The offset in file.
   uint64_t offset;
   // The chunk size.
   uint64_t size;
-  uint64_t reserved_1;
+  uint64_t reserved_2;
 
-  ChunkIndex();
+  ChunkIndex(uint16_t id, ChunkIndexType type);
 };
 
 static_assert(sizeof(ChunkIndex) == CHUNK_INDEX_SIZE,

  Modified: lib/grnxx/storage/header.cpp (+4 -2)
===================================================================
--- lib/grnxx/storage/header.cpp    2013-05-03 12:55:10 +0900 (6c14725)
+++ lib/grnxx/storage/header.cpp    2013-05-04 00:53:44 +0900 (841ae0e)
@@ -38,15 +38,17 @@ Header::Header()
       max_file_size(0),
       max_num_files(0),
       num_body_chunks(0),
+      num_small_body_chunks(0),
+      reserved_0(0),
       num_nodes(0),
-      total_size(0),
       max_num_nodes(0),
+      total_size(0),
       latest_phantom_node_id(STORAGE_INVALID_NODE_ID),
       latest_unlinked_node_id(STORAGE_INVALID_NODE_ID),
       oldest_idle_node_ids(),
       data_mutex(MUTEX_UNLOCKED),
       file_mutex(MUTEX_UNLOCKED),
-      reserved{} {
+      reserved_1{} {
   std::memcpy(version, GRNXX_STORAGE_HEADER_VERSION, HEADER_VERSION_SIZE);
   for (size_t i = 0; i < NUM_IDLE_NODE_LISTS; ++i) {
     oldest_idle_node_ids[i] = STORAGE_INVALID_NODE_ID;

  Modified: lib/grnxx/storage/header.hpp (+7 -4)
===================================================================
--- lib/grnxx/storage/header.hpp    2013-05-03 12:55:10 +0900 (bce1516)
+++ lib/grnxx/storage/header.hpp    2013-05-04 00:53:44 +0900 (e665e96)
@@ -44,15 +44,18 @@ struct Header {
   uint64_t max_file_size;
   // The maximum number of files.
   uint16_t max_num_files;
-  // The number of node body chunks.
+  // The number of body chunks.
   uint16_t num_body_chunks;
+  // The number of small body chunks.
+  uint16_t num_small_body_chunks;
+  uint16_t reserved_0;
   // The number of nodes.
   uint32_t num_nodes;
-  // The total size including headers.
-  uint64_t total_size;
   // The upper limit of the number of nodes.
   // This value is extended when a node header chunk is added.
   uint32_t max_num_nodes;
+  // The total size including headers.
+  uint64_t total_size;
   // The ID of the latest phantom node.
   // STORAGE_INVALID_NODE_ID indicates that there are no phantom nodes.
   uint32_t latest_phantom_node_id;
@@ -66,7 +69,7 @@ struct Header {
   Mutex data_mutex;
   // A mutex object for exclusively update files.
   Mutex file_mutex;
-  uint8_t reserved[148];
+  uint8_t reserved_1[144];
 
   // Initialize the members except "format".
   Header();

  Modified: lib/grnxx/storage/storage_impl.cpp (+69 -45)
===================================================================
--- lib/grnxx/storage/storage_impl.cpp    2013-05-03 12:55:10 +0900 (a2ce312)
+++ lib/grnxx/storage/storage_impl.cpp    2013-05-04 00:53:44 +0900 (cb28428)
@@ -34,28 +34,24 @@ namespace {
 
 // The size of chunk must be a multiple of CHUNK_UNIT_SIZE (64KB).
 constexpr uint64_t CHUNK_UNIT_SIZE = 1 << 16;
-// The size of node must be a multiple of NODE_UNIT_SIZE (4KB).
-constexpr uint64_t NODE_UNIT_SIZE  = 1 << 12;
-
-// TODO
-//// The size of regular node must be a multiple of REGULAR_NODE_UNIT_SIZE (4KB).
-//constexpr uint64_t REGULAR_NODE_UNIT_SIZE = 1 << 12;
-//// The size of small node must be a multiple of SMALL_NODE_UNIT_SIZE (64 bytes).
-//constexpr uint64_t SMALL_NODE_UNIT_SIZE   = 1 << 6;
-//// A node larger than NODE_SIZE_THRESHOLD (2KB) is a regular node.
-//constexpr uint64_t NODE_SIZE_THRESHOLD    = 1 << 11;
+// The size of regular node must be a multiple of REGULAR_NODE_UNIT_SIZE (4KB).
+constexpr uint64_t REGULAR_NODE_UNIT_SIZE = 1 << 12;
+// The size of small node must be a multiple of SMALL_NODE_UNIT_SIZE (64 bytes).
+constexpr uint64_t SMALL_NODE_UNIT_SIZE   = 1 << 6;
+// A node larger than NODE_SIZE_THRESHOLD (2KB) is a regular node.
+constexpr uint64_t NODE_SIZE_THRESHOLD    = 1 << 11;
 
 // The chunk size for Header and ChunkIndexes.
 constexpr uint64_t ROOT_CHUNK_SIZE = CHUNK_UNIT_SIZE;
 // The size allocated to ChunkIndexes.
 constexpr uint64_t ROOT_INDEX_SIZE = ROOT_CHUNK_SIZE - HEADER_SIZE;
 
-// The number of NodeHeaders in the initial chunk.
-constexpr uint16_t HEADER_CHUNK_UNIT_SIZE = CHUNK_UNIT_SIZE / HEADER_SIZE;
+// The number of NodeHeaders in the minimum header chunk.
+constexpr uint32_t HEADER_CHUNK_MIN_SIZE = CHUNK_UNIT_SIZE / NODE_HEADER_SIZE;
 
 // The maximum node ID.
 constexpr uint32_t MAX_NODE_ID =
-    STORAGE_INVALID_NODE_ID - HEADER_CHUNK_UNIT_SIZE;
+    STORAGE_INVALID_NODE_ID - HEADER_CHUNK_MIN_SIZE;
 
 // The maximum number of chunks for NodeHeaders.
 constexpr uint16_t MAX_NUM_HEADER_CHUNKS = 32;
@@ -65,11 +61,12 @@ constexpr uint16_t MAX_NUM_BODY_CHUNKS   =
 
 static_assert(MAX_NUM_BODY_CHUNKS >= 2000, "MAX_NUM_BODY_CHUNKS < 2000");
 
-// The minimum size of chunk for node bodies.
-// The size of an end-of-file chunk can be less than BODY_CHUNK_MIN_SiZE.
-constexpr uint64_t BODY_CHUNK_MIN_SiZE = 1 << 21;  // 2MB.
-// The ratio of the next chunk size to the storage total size.
-constexpr double BODY_CHUNK_SIZE_RATIO = 1.0 / 64;
+// The minimum size of regular body chunks.
+constexpr uint64_t REGULAR_BODY_CHUNK_MIN_SIZE = 1 << 21;  // 2MB.
+// The ratio of the next regular body chunk size to the storage total size.
+constexpr double REGULAR_BODY_CHUNK_SIZE_RATIO = 1.0 / 64;
+// The size of the minimum small body chunk.
+constexpr uint64_t SMALL_BODY_CHUNK_MIN_SIZE = CHUNK_UNIT_SIZE;
 
 }  // namespace
 
@@ -569,20 +566,20 @@ bool StorageImpl::prepare_pointers() {
 
 void StorageImpl::prepare_indexes() {
   for (uint16_t i = 0; i < MAX_NUM_HEADER_CHUNKS; ++i) {
-    header_chunk_indexes_[i] = ChunkIndex();
-    header_chunk_indexes_[i].id = i;
+    header_chunk_indexes_[i] = ChunkIndex(i, HEADER_CHUNK);
   }
   for (uint16_t i = 0; i < MAX_NUM_BODY_CHUNKS; ++i) {
-    body_chunk_indexes_[i] = ChunkIndex();
-    body_chunk_indexes_[i].id = i;
+    body_chunk_indexes_[i] = ChunkIndex(i, REGULAR_BODY_CHUNK);
   }
 }
 
 NodeHeader *StorageImpl::create_active_node(uint64_t size) {
   if (size == 0) {
-    size = NODE_UNIT_SIZE;
+    size = SMALL_NODE_UNIT_SIZE;
+  } else if (size <= NODE_SIZE_THRESHOLD) {
+    size = (size + SMALL_NODE_UNIT_SIZE - 1) & ~(SMALL_NODE_UNIT_SIZE - 1);
   } else {
-    size = (size + NODE_UNIT_SIZE - 1) & ~(NODE_UNIT_SIZE - 1);
+    size = (size + REGULAR_NODE_UNIT_SIZE - 1) & ~(REGULAR_NODE_UNIT_SIZE - 1);
   }
   NodeHeader *node_header = find_idle_node(size);
   if (!node_header) {
@@ -603,7 +600,10 @@ NodeHeader *StorageImpl::create_active_node(uint64_t size) {
 }
 
 NodeHeader *StorageImpl::find_idle_node(uint64_t size) {
-  for (size_t i = bit_scan_reverse(size); i < NUM_IDLE_NODE_LISTS; ++i) {
+  const size_t begin = bit_scan_reverse(size);
+  const size_t end = (size <= NODE_SIZE_THRESHOLD) ?
+      bit_scan_reverse(NODE_SIZE_THRESHOLD << 1) : NUM_IDLE_NODE_LISTS;
+  for (size_t i = begin; i < end; ++i) {
     if (header_->oldest_idle_node_ids[i] != STORAGE_INVALID_NODE_ID) {
       NodeHeader * const node_header =
           get_node_header(header_->oldest_idle_node_ids[i]);
@@ -621,7 +621,8 @@ NodeHeader *StorageImpl::create_idle_node(uint64_t size) {
     return nullptr;
   }
   ChunkIndex *remainder_chunk_index = nullptr;
-  ChunkIndex * const chunk_index = create_body_chunk(size);
+  ChunkIndex * const chunk_index =
+      create_body_chunk(size, &remainder_chunk_index);
   if (!chunk_index) {
     return nullptr;
   }
@@ -814,7 +815,7 @@ ChunkIndex *StorageImpl::create_header_chunk(
     return nullptr;
   }
   const uint16_t chunk_id =
-      bit_scan_reverse(header_->num_nodes + HEADER_CHUNK_UNIT_SIZE);
+      bit_scan_reverse(header_->num_nodes + HEADER_CHUNK_MIN_SIZE);
   const uint64_t size = static_cast<uint64_t>(NODE_HEADER_SIZE) << chunk_id;
   if (size > header_->max_file_size) {
     GRNXX_ERROR() << "too large chunk: size = " << size
@@ -852,12 +853,36 @@ ChunkIndex *StorageImpl::create_header_chunk(
 
 ChunkIndex *StorageImpl::create_body_chunk(
     uint64_t size, ChunkIndex **remainder_chunk_index) {
-  const uint64_t offset = header_->total_size % header_->max_file_size;
-  const uint64_t size_left = header_->max_file_size - offset;
-  if (size_left < size) {
+  uint64_t chunk_size = size;
+  if (size <= NODE_SIZE_THRESHOLD) {
+    chunk_size = SMALL_BODY_CHUNK_MIN_SIZE << header_->num_small_body_chunks;
+  }
+  uint64_t offset = header_->total_size % header_->max_file_size;
+  uint64_t size_left = header_->max_file_size - offset;
+  if (size_left < chunk_size) {
     *remainder_chunk_index = create_body_chunk(size_left);
+    size_left = header_->max_file_size;
+  }
+  if (size > NODE_SIZE_THRESHOLD) {
+    chunk_size = static_cast<uint64_t>(
+        header_->total_size * REGULAR_BODY_CHUNK_SIZE_RATIO);
+    chunk_size &= ~(CHUNK_UNIT_SIZE - 1);
+    if (size > chunk_size) {
+      chunk_size = size;
+    }
+    if (chunk_size > size_left) {
+      chunk_size = size_left;
+    }
+    if (chunk_size < REGULAR_BODY_CHUNK_MIN_SIZE) {
+      chunk_size = REGULAR_BODY_CHUNK_MIN_SIZE;
+    }
   }
-  return create_body_chunk(size);
+  ChunkIndex * const chunk_index = create_body_chunk(chunk_size);
+  if (size <= NODE_SIZE_THRESHOLD) {
+    chunk_index->type = SMALL_BODY_CHUNK;
+    ++header_->num_small_body_chunks;
+  }
+  return chunk_index;
 }
 
 ChunkIndex *StorageImpl::create_body_chunk(uint64_t size) {
@@ -882,17 +907,6 @@ ChunkIndex *StorageImpl::create_body_chunk(uint64_t size) {
                   << ", size_left = " << size_left;
     return nullptr;
   }
-  if (size < BODY_CHUNK_MIN_SiZE) {
-    size = BODY_CHUNK_MIN_SiZE;
-  }
-  const uint64_t expected_size = static_cast<uint64_t>(
-      header_->total_size * BODY_CHUNK_SIZE_RATIO);
-  if (size < expected_size) {
-    size = expected_size;
-  }
-  if (size > size_left) {
-    size = size_left;
-  }
   ChunkIndex * const chunk_index = &body_chunk_indexes_[chunk_id];
   chunk_index->file_id = file_id;
   chunk_index->offset = offset;
@@ -908,7 +922,12 @@ bool StorageImpl::register_idle_node(NodeHeader *node_header) {
     GRNXX_ERROR() << "invalid argument: status = " << node_header->status;
     return false;
   }
-  const size_t list_id = bit_scan_reverse(node_header->size);
+  size_t list_id = bit_scan_reverse(node_header->size);
+  if (body_chunk_indexes_[node_header->chunk_id].type == SMALL_BODY_CHUNK) {
+    if (list_id > bit_scan_reverse(NODE_SIZE_THRESHOLD)) {
+      list_id = bit_scan_reverse(NODE_SIZE_THRESHOLD);
+    }
+  }
   if (header_->oldest_idle_node_ids[list_id] == STORAGE_INVALID_NODE_ID) {
     // The given node is appended to the empty list.
     node_header->next_idle_node_id = node_header->id;
@@ -940,7 +959,12 @@ bool StorageImpl::unregister_idle_node(NodeHeader *node_header) {
     GRNXX_ERROR() << "invalid argument: status = " << node_header->status;
     return false;
   }
-  const size_t list_id = bit_scan_reverse(node_header->size);
+  size_t list_id = bit_scan_reverse(node_header->size);
+  if (body_chunk_indexes_[node_header->chunk_id].type == SMALL_BODY_CHUNK) {
+    if (list_id > bit_scan_reverse(NODE_SIZE_THRESHOLD)) {
+      list_id = bit_scan_reverse(NODE_SIZE_THRESHOLD);
+    }
+  }
   if (node_header->id == node_header->next_idle_node_id) {
     // The list becomes empty.
     header_->oldest_idle_node_ids[list_id] = STORAGE_INVALID_NODE_ID;
@@ -972,7 +996,7 @@ NodeHeader *StorageImpl::get_node_header(uint32_t node_id) {
     return nullptr;
   }
   const uint16_t chunk_id =
-      bit_scan_reverse(node_id + HEADER_CHUNK_UNIT_SIZE);
+      bit_scan_reverse(node_id + HEADER_CHUNK_MIN_SIZE);
   Chunk * const chunk = get_header_chunk(chunk_id);
   if (!chunk) {
     return nullptr;
-------------- next part --------------
HTML����������������������������...
下载 



More information about the Groonga-commit mailing list
Back to archive index