susumu.yata
null+****@clear*****
Tue Dec 16 10:42:55 JST 2014
susumu.yata 2014-11-17 17:31:49 +0900 (Mon, 17 Nov 2014) New Revision: 575be14337664c835394c10a6b03b36324229d77 https://github.com/groonga/grnxx/commit/575be14337664c835394c10a6b03b36324229d77 Message: Add Column<Vector<Int>>. (#110) Modified files: lib/grnxx/impl/column/base.cpp lib/grnxx/impl/column/vector.hpp lib/grnxx/impl/column/vector/Makefile.am lib/grnxx/impl/column/vector/int.cpp lib/grnxx/impl/column/vector/int.hpp Modified: lib/grnxx/impl/column/base.cpp (+4 -4) =================================================================== --- lib/grnxx/impl/column/base.cpp 2014-11-17 17:17:38 +0900 (97d4f55) +++ lib/grnxx/impl/column/base.cpp 2014-11-17 17:31:49 +0900 (efec40f) @@ -203,10 +203,10 @@ std::unique_ptr<ColumnBase> ColumnBase::create( column.reset(new impl::Column<Vector<Bool>>(table, name, options)); break; } -// case INT_VECTOR_DATA: { -// column.reset(new impl::Column<Vector<Int>>(table, name, options)); -// break; -// } + case INT_VECTOR_DATA: { + column.reset(new impl::Column<Vector<Int>>(table, name, options)); + break; + } // case FLOAT_VECTOR_DATA: { // column.reset(new impl::Column<Vector<Float>>(table, name, options)); // break; Modified: lib/grnxx/impl/column/vector.hpp (+1 -1) =================================================================== --- lib/grnxx/impl/column/vector.hpp 2014-11-17 17:17:38 +0900 (1721cf5) +++ lib/grnxx/impl/column/vector.hpp 2014-11-17 17:31:49 +0900 (76c7d66) @@ -4,7 +4,7 @@ #include "grnxx/impl/column/vector/bool.hpp" //#include "grnxx/impl/column/vector/float.hpp" //#include "grnxx/impl/column/vector/geo_point.hpp" -//#include "grnxx/impl/column/vector/int.hpp" +#include "grnxx/impl/column/vector/int.hpp" //#include "grnxx/impl/column/vector/text.hpp" #endif // GRNXX_IMPL_COLUMN_VECTOR_HPP Modified: lib/grnxx/impl/column/vector/Makefile.am (+2 -2) =================================================================== --- lib/grnxx/impl/column/vector/Makefile.am 2014-11-17 17:17:38 +0900 (01b77d1) +++ lib/grnxx/impl/column/vector/Makefile.am 2014-11-17 17:31:49 +0900 (c176333) @@ -9,11 +9,11 @@ lib_LTLIBRARIES = libgrnxx_impl_column_vector.la libgrnxx_impl_column_vector_la_LDFLAGS = @AM_LTLDFLAGS@ libgrnxx_impl_column_vector_la_SOURCES = \ - bool.cpp + bool.cpp \ + int.cpp # float.cpp \ # geo_point.cpp \ -# int.cpp \ # text.cpp libgrnxx_impl_column_vector_includedir = ${includedir}/grnxx/impl/column/vector Modified: lib/grnxx/impl/column/vector/int.cpp (+181 -110) =================================================================== --- lib/grnxx/impl/column/vector/int.cpp 2014-11-17 17:17:38 +0900 (c743c08) +++ lib/grnxx/impl/column/vector/int.cpp 2014-11-17 17:31:49 +0900 (370cd73) @@ -1,152 +1,223 @@ -#include "grnxx/impl/column/column_vector_int.hpp" +#include "grnxx/impl/column/vector/int.hpp" + +#include <cstring> -#include "grnxx/cursor.hpp" #include "grnxx/impl/db.hpp" #include "grnxx/impl/table.hpp" +//#include "grnxx/impl/index.hpp" namespace grnxx { namespace impl { -bool Column<Vector<Int>>::set(Error *error, Int row_id, - const Datum &datum) { - if (datum.type() != INT_VECTOR_DATA) { - GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Wrong data type"); - return false; +Column<Vector<Int>>::Column(Table *table, + const String &name, + const ColumnOptions &options) + : ColumnBase(table, name, INT_VECTOR_DATA), + headers_(), + bodies_() { + if (!options.reference_table_name.is_empty()) { + reference_table_ = table->_db()->find_table(options.reference_table_name); + if (!reference_table_) { + throw "Table not found"; // TODO + } } - if (!table_->test_row(error, row_id)) { - return false; +} + +Column<Vector<Int>>::~Column() {} + +void Column<Vector<Int>>::set(Int row_id, const Datum &datum) { + Vector<Int> new_value = parse_datum(datum); + if (!table_->test_row(row_id)) { + throw "Invalid row ID"; // TODO } - Vector<Int> value = datum.force_int_vector(); - if (value.size() == 0) { - headers_[row_id] = 0; - return true; + if (new_value.is_na()) { + unset(row_id); + return; } - if (ref_table_) { - for (Int i = 0; i < value.size(); ++i) { - if (!ref_table_->test_row(error, value[i])) { - return false; + if (reference_table_) { + size_t new_value_size = new_value.size().value(); + for (size_t i = 0; i < new_value_size; ++i) { + if (!reference_table_->test_row(new_value[i])) { + throw "Invalid reference"; // TODO } } } - Int offset = bodies_.size(); - if (value.size() < 0xFFFF) { - if (!bodies_.resize(error, offset + value.size())) { - return false; - } - for (Int i = 0; i < value.size(); ++i) { - bodies_[offset + i] = value[i]; - } - headers_[row_id] = (offset << 16) | value.size(); + Vector<Int> old_value = get(row_id); + if ((old_value == new_value).is_true()) { + return; + } + if (!old_value.is_na()) { + // TODO: Remove the old value from indexes. +// for (size_t i = 0; i < num_indexes(); ++i) { +// indexes_[i]->remove(row_id, old_value); +// } + } + size_t value_id = row_id.value(); + if (value_id >= headers_.size()) { + headers_.resize(value_id + 1, na_header()); + } + // TODO: Insert the new value into indexes. +// for (size_t i = 0; i < num_indexes(); ++i) try { +// indexes_[i]->insert(row_id, datum)) { +// } catch (...) { +// for (size_t j = 0; j < i; ++i) { +// indexes_[j]->remove(row_id, datum); +// } +// throw; +// } + // TODO: Error handling. + size_t offset = bodies_.size(); + size_t size = new_value.size().value(); + uint64_t header; + if (size < 0xFFFF) { + bodies_.resize(offset + size); + std::memcpy(&bodies_[offset], new_value.data(), sizeof(Int) * size); + header = (offset << 16) | size; } else { // The size of a long vector is stored in front of the body. - if (!bodies_.resize(error, offset + 1 + value.size())) { - return false; + if ((offset % sizeof(uint64_t)) != 0) { + offset += sizeof(uint64_t) - (offset % sizeof(uint64_t)); } - bodies_[offset] = value.size(); - for (Int i = 0; i < value.size(); ++i) { - bodies_[offset + 1 + i] = value[i]; - } - headers_[row_id] = (offset << 16) | 0xFFFF; + bodies_.resize(offset + sizeof(uint64_t) + size); + *reinterpret_cast<uint64_t *>(&bodies_[offset]) = size; + std::memcpy(&bodies_[offset + sizeof(uint64_t)], + new_value.data(), sizeof(Int) * size); + header = (offset << 16) | 0xFFFF; } - return true; + headers_[value_id] = header; } -bool Column<Vector<Int>>::get(Error *error, Int row_id, Datum *datum) const { - if (!table_->test_row(error, row_id)) { - return false; +void Column<Vector<Int>>::get(Int row_id, Datum *datum) const { + size_t value_id = row_id.value(); + if (value_id >= headers_.size()) { + *datum = Vector<Int>::na(); + } else { + // TODO + *datum = get(row_id); } - *datum = get(row_id); - return true; } -unique_ptr<Column<Vector<Int>>> Column<Vector<Int>>::create( - Error *error, - Table *table, - const StringCRef &name, - const ColumnOptions &options) { - unique_ptr<Column> column(new (nothrow) Column); - if (!column) { - GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed"); - return nullptr; - } - if (!column->initialize_base(error, table, name, INT_VECTOR_DATA, options)) { - return nullptr; - } - if (!column->headers_.resize(error, table->max_row_id() + 1, 0)) { - return nullptr; - } - if (column->ref_table()) { - if (!column->ref_table_->append_referrer_column(error, column.get())) { - return nullptr; +bool Column<Vector<Int>>::contains(const Datum &datum) const { + // TODO: Use an index if exists. + Vector<Int> value = parse_datum(datum); + if (value.is_na()) { + for (size_t i = 0; i < headers_.size(); ++i) { + if (headers_[i] == na_header()) { + return true; + } + } + } else { + for (size_t i = 0; i < headers_.size(); ++i) { + // TODO: Improve this. + if ((get(Int(i)) == value).is_true()) { + return true; + } } } - return column; + return false; } -Column<Vector<Int>>::~Column() {} - -bool Column<Vector<Int>>::set_default_value(Error *error, Int row_id) { - if (row_id >= headers_.size()) { - if (!headers_.resize(error, row_id + 1)) { - return false; +Int Column<Vector<Int>>::find_one(const Datum &datum) const { + // TODO: Use an index if exists. + Vector<Int> value = parse_datum(datum); + if (value.is_na()) { + for (size_t i = 0; i < headers_.size(); ++i) { + if (headers_[i] == na_header()) { + return Int(i); + } + } + } else { + for (size_t i = 0; i < headers_.size(); ++i) { + // TODO: Improve this. + if ((get(Int(i)) == value).is_true()) { + return Int(i); + } } } - headers_[row_id] = 0; - return true; + return Int::na(); } void Column<Vector<Int>>::unset(Int row_id) { - headers_[row_id] = 0; + Vector<Int> value = get(row_id); + if (!value.is_na()) { + // TODO: Update indexes if exist. +// for (size_t i = 0; i < num_indexes(); ++i) { +// indexes_[i]->remove(row_id, value); +// } + headers_[row_id.value()] = na_header(); + } } -void Column<Vector<Int>>::clear_references(Int row_id) { - auto cursor = table_->create_cursor(nullptr); - if (!cursor) { - // Error. - return; +void Column<Vector<Int>>::read(ArrayCRef<Record> records, + ArrayRef<Vector<Int>> values) const { + if (records.size() != values.size()) { + throw "Data size conflict"; // TODO + } + for (size_t i = 0; i < records.size(); ++i) { + values.set(i, get(records[i].row_id)); } - Array<Record> records; - for ( ; ; ) { - auto result = cursor->read(nullptr, 1024, &records); - if (!result.is_ok) { - // Error. - return; - } else if (result.count == 0) { - return; +} + +Vector<Int> Column<Vector<Int>>::parse_datum(const Datum &datum) { + switch (datum.type()) { + case NA_DATA: { + return Vector<Int>::na(); } - for (Int i = 0; i < records.size(); ++i) { - Int value_row_id = records.get_row_id(i); - Int value_size = static_cast<Int>(headers_[value_row_id] & 0xFFFF); - if (value_size == 0) { - continue; - } - Int value_offset = static_cast<Int>(headers_[value_row_id] >> 16); - if (value_size >= 0xFFFF) { - value_size = bodies_[value_offset]; - ++value_offset; - } - Int count = 0; - for (Int j = 0; j < value_size; ++j) { - if (bodies_[value_offset + j] != row_id) { - bodies_[value_offset + count] = bodies_[value_offset + j]; - ++count; - } - } - if (count < value_size) { - if (count == 0) { - headers_[value_row_id] = 0; - } else if (count < 0xFFFF) { - headers_[value_row_id] = count | (value_offset << 16); - } else { - bodies_[value_offset - 1] = count; - } - } + case INT_VECTOR_DATA: { + return datum.as_int_vector(); + } + default: { + throw "Wrong data type"; // TODO } - records.clear(); } } -Column<Vector<Int>>::Column() : ColumnBase(), headers_(), bodies_() {} +//void Column<Vector<Int>>::clear_references(Int row_id) { +// auto cursor = table_->create_cursor(nullptr); +// if (!cursor) { +// // Error. +// return; +// } +// Array<Record> records; +// for ( ; ; ) { +// auto result = cursor->read(nullptr, 1024, &records); +// if (!result.is_ok) { +// // Error. +// return; +// } else if (result.count == 0) { +// return; +// } +// for (Int i = 0; i < records.size(); ++i) { +// Int value_row_id = records.get_row_id(i); +// Int value_size = static_cast<Int>(headers_[value_row_id] & 0xFFFF); +// if (value_size == 0) { +// continue; +// } +// Int value_offset = static_cast<Int>(headers_[value_row_id] >> 16); +// if (value_size >= 0xFFFF) { +// value_size = bodies_[value_offset]; +// ++value_offset; +// } +// Int count = 0; +// for (Int j = 0; j < value_size; ++j) { +// if (bodies_[value_offset + j] != row_id) { +// bodies_[value_offset + count] = bodies_[value_offset + j]; +// ++count; +// } +// } +// if (count < value_size) { +// if (count == 0) { +// headers_[value_row_id] = 0; +// } else if (count < 0xFFFF) { +// headers_[value_row_id] = count | (value_offset << 16); +// } else { +// bodies_[value_offset - 1] = count; +// } +// } +// } +// records.clear(); +// } +//} } // namespace impl } // namespace grnxx Modified: lib/grnxx/impl/column/vector/int.hpp (+99 -31) =================================================================== --- lib/grnxx/impl/column/vector/int.hpp 2014-11-17 17:17:38 +0900 (e009fe5) +++ lib/grnxx/impl/column/vector/int.hpp 2014-11-17 17:31:49 +0900 (9fec61e) @@ -1,71 +1,139 @@ #ifndef GRNXX_IMPL_COLUMN_VECTOR_INT_HPP #define GRNXX_IMPL_COLUMN_VECTOR_INT_HPP -#include "grnxx/impl/column/column.hpp" +#include <limits> +#include <cstdint> + +#include "grnxx/impl/column/base.hpp" namespace grnxx { namespace impl { -// TODO +template <typename T> class Column; + template <> class Column<Vector<Int>> : public ColumnBase { public: - // -- Public API -- + // -- Public API (grnxx/column.hpp) -- - bool set(Error *error, Int row_id, const Datum &datum); - bool get(Error *error, Int row_id, Datum *datum) const; + Column(Table *table, const String &name, const ColumnOptions &options); + ~Column(); - // -- Internal API -- + void set(Int row_id, const Datum &datum); + void get(Int row_id, Datum *datum) const; - // Create a new column. - // - // Returns a pointer to the column on success. - // On failure, returns nullptr and stores error information into "*error" if - // "error" != nullptr. - static unique_ptr<Column> create(Error *error, - Table *table, - const StringCRef &name, - const ColumnOptions &options); + bool contains(const Datum &datum) const; + Int find_one(const Datum &datum) const; - ~Column(); + // -- Internal API (grnxx/impl/column/base.hpp) -- - bool set_default_value(Error *error, Int row_id); void unset(Int row_id); - void clear_references(Int row_id); + // -- Internal API -- - // Return a value identified by "row_id". + // Return a value. // - // Assumes that "row_id" is valid. Otherwise, the result is undefined. + // If "row_id" is valid, returns the stored value. + // If "row_id" is invalid, returns N/A. + // + // TODO: Vector cannot reuse allocated memory because of this interface. Vector<Int> get(Int row_id) const { - Int size = static_cast<Int>(headers_[row_id] & 0xFFFF); + size_t value_id = row_id.value(); + if (value_id >= headers_.size()) { + return Vector<Int>::na(); + } + if (headers_[value_id] == na_header()) { + return Vector<Int>::na(); + } + size_t size = headers_[value_id] & 0xFFFF; if (size == 0) { return Vector<Int>(nullptr, 0); } - Int offset = static_cast<Int>(headers_[row_id] >> 16); + size_t offset = headers_[value_id] >> 16; if (size < 0xFFFF) { return Vector<Int>(&bodies_[offset], size); } else { // The size of a long vector is stored in front of the body. - size = bodies_[offset]; + size = *reinterpret_cast<const uint64_t *>(&bodies_[offset]); return Vector<Int>(&bodies_[offset + 1], size); } } - // Read values. - void read(ArrayCRef<Record> records, ArrayRef<Vector<Int>> values) const { - for (Int i = 0; i < records.size(); ++i) { - values.set(i, get(records.get_row_id(i))); - } - } + // + // On failure, throws an exception. + void read(ArrayCRef<Record> records, ArrayRef<Vector<Int>> values) const; private: - Array<UInt> headers_; + Array<uint64_t> headers_; Array<Int> bodies_; - Column(); + static constexpr uint64_t na_header() { + return std::numeric_limits<uint64_t>::max(); + } + + static Vector<Int> parse_datum(const Datum &datum); }; +//// TODO +//template <> +//class Column<Vector<Int>> : public ColumnBase { +// public: +// // -- Public API -- + +// bool set(Error *error, Int row_id, const Datum &datum); +// bool get(Error *error, Int row_id, Datum *datum) const; + +// // -- Internal API -- + +// // Create a new column. +// // +// // Returns a pointer to the column on success. +// // On failure, returns nullptr and stores error information into "*error" if +// // "error" != nullptr. +// static unique_ptr<Column> create(Error *error, +// Table *table, +// const StringCRef &name, +// const ColumnOptions &options); + +// ~Column(); + +// bool set_default_value(Error *error, Int row_id); +// void unset(Int row_id); + +// void clear_references(Int row_id); + +// // Return a value identified by "row_id". +// // +// // Assumes that "row_id" is valid. Otherwise, the result is undefined. +// Vector<Int> get(Int row_id) const { +// Int size = static_cast<Int>(headers_[row_id] & 0xFFFF); +// if (size == 0) { +// return Vector<Int>(nullptr, 0); +// } +// Int offset = static_cast<Int>(headers_[row_id] >> 16); +// if (size < 0xFFFF) { +// return Vector<Int>(&bodies_[offset], size); +// } else { +// // The size of a long vector is stored in front of the body. +// size = bodies_[offset]; +// return Vector<Int>(&bodies_[offset + 1], size); +// } +// } + +// // Read values. +// void read(ArrayCRef<Record> records, ArrayRef<Vector<Int>> values) const { +// for (Int i = 0; i < records.size(); ++i) { +// values.set(i, get(records.get_row_id(i))); +// } +// } + +// private: +// Array<UInt> headers_; +// Array<Int> bodies_; + +// Column(); +//}; + } // namespace impl } // namespace grnxx -------------- next part -------------- HTML����������������������������... 下载