[Groonga-commit] groonga/groonga-command-parser at ef5dc13 [master] Support JRuby

Back to archive index

Kouhei Sutou null+****@clear*****
Thu May 21 22:20:57 JST 2015


Kouhei Sutou	2015-05-21 22:20:57 +0900 (Thu, 21 May 2015)

  New Revision: ef5dc13d8f30c2d5afd07714e2f4f2c5bbda764b
  https://github.com/groonga/groonga-command-parser/commit/ef5dc13d8f30c2d5afd07714e2f4f2c5bbda764b

  Message:
    Support JRuby
    
    It replaces yajl with ffi-yajl. Because ffi-yajl works with JRuby.
    
    And performance will be improved. Because ffi-yajl can be more
    customizable to fix Groonga's load JSON.
    
    GitHub: fix #1
    
    Reported by Hiroyuki Sato. Thanks!!!

  Added files:
    lib/groonga/command/parser/load-values-parser.rb
    test/test-load-value-parser.rb
  Modified files:
    .travis.yml
    groonga-command-parser.gemspec
    lib/groonga/command/parser.rb
    lib/groonga/command/parser/error.rb
    test/run-test.rb
    test/test-parser.rb

  Modified: .travis.yml (+1 -0)
===================================================================
--- .travis.yml    2015-05-17 11:51:52 +0900 (b3a8810)
+++ .travis.yml    2015-05-21 22:20:57 +0900 (8713059)
@@ -7,3 +7,4 @@ rvm:
   - 2.0.0
   - 2.1
   - 2.2
+  - jruby

  Modified: groonga-command-parser.gemspec (+2 -2)
===================================================================
--- groonga-command-parser.gemspec    2015-05-17 11:51:52 +0900 (0a97286)
+++ groonga-command-parser.gemspec    2015-05-21 22:20:57 +0900 (1e8cab8)
@@ -1,6 +1,6 @@
 # -*- mode: ruby; coding: utf-8 -*-
 #
-# Copyright (C) 2012-2014  Kouhei Sutou <kou �� clear-code.com>
+# Copyright (C) 2012-2015  Kouhei Sutou <kou �� clear-code.com>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -53,7 +53,7 @@ Gem::Specification.new do |spec|
   spec.require_paths = ["lib"]
 
   spec.add_runtime_dependency("groonga-command", ">= 1.0.9")
-  spec.add_runtime_dependency("yajl-ruby")
+  spec.add_runtime_dependency("ffi-yajl")
 
   spec.add_development_dependency("test-unit")
   spec.add_development_dependency("test-unit-notify")

  Modified: lib/groonga/command/parser.rb (+30 -87)
===================================================================
--- lib/groonga/command/parser.rb    2015-05-17 11:51:52 +0900 (10072e8)
+++ lib/groonga/command/parser.rb    2015-05-21 22:20:57 +0900 (757e18a)
@@ -18,13 +18,13 @@
 
 require "English"
 require "cgi"
-
-require "yajl"
+require "json"
 
 require "groonga/command"
 
 require "groonga/command/parser/error"
 require "groonga/command/parser/command-line-splitter"
+require "groonga/command/parser/load-values-parser"
 require "groonga/command/parser/version"
 
 module Groonga
@@ -91,7 +91,7 @@ module Groonga
           end
           parser.on_load_complete do |command|
             parsed_command = command
-            parsed_command[:values] ||= Yajl::Encoder.encode(values)
+            parsed_command[:values] ||= JSON.generate(values)
           end
 
           consume_data(parser, data)
@@ -117,6 +117,7 @@ module Groonga
       def initialize
         reset
         initialize_hooks
+        initialize_load_values_parser
       end
 
       # Streaming parsing command.
@@ -215,78 +216,10 @@ module Groonga
       end
 
       def consume_load_values(tag)
-        if @in_load_values
-          json, separator, rest =****@buffe*****(/[\]},]/)
-          if @load_value_completed
-            throw(tag) if separator.empty?
-            if separator == ","
-              if /\A\s*\z/ =~ json
-                @command.original_source << json << separator
-                @buffer = rest
-                @load_value_completed = false
-                return
-              else
-                raise Error.new("record separate comma is missing",
-                                @command.original_source.lines.to_a.last,
-                                json)
-              end
-            elsif separator == "]"
-              if /\A\s*\z/ =~ json
-                @command.original_source << json << separator
-                @buffer = rest
-                on_load_complete(@command)
-                reset
-                return
-              end
-            end
-          end
-          @buffer = rest
-          parse_json(json)
-          if separator.empty?
-            throw(tag)
-          else
-            @load_value_completed = false
-            parse_json(separator)
-          end
-        else
-          spaces, start_json, rest =****@buffe*****("[")
-          unless /\A\s*\z/ =~ spaces
-            raise Error.new("there are garbages before JSON",
-                            @command.original_source.lines.to_a.last,
-                            spaces)
-          end
-          if start_json.empty?
-            @command.original_source << @buffer
-            @buffer.clear
-            throw(tag)
-          else
-            @command.original_source << spaces << start_json
-            @buffer = rest
-            @json_parser = Yajl::Parser.new
-            @json_parser.on_parse_complete = lambda do |object|
-              if object.is_a?(::Array) and****@comma*****?
-                @command.columns = object
-                on_load_columns(@command, object)
-              else
-                on_load_value(@command, object)
-              end
-              @load_value_completed = true
-            end
-            @in_load_values = true
-          end
-        end
-      end
-
-      def parse_json(json)
-        @command.original_source << json
-        begin
-          @json_parser << json
-        rescue Yajl::ParseError
-          before_json =****@comma*****_source[0..(-json.bytesize)]
-          message = "invalid JSON: #{$!.message}: <#{json}>:\n"
-          message << before_json
-          raise Error.new(message, before_json, json)
-        end
+        throw(tag) if****@buffe*****?
+        @command.original_source << @buffer
+        @load_values_parser << @buffer
+        @buffer.clear
       end
 
       def consume_line(tag)
@@ -322,16 +255,7 @@ module Groonga
             on_load_columns(@command, @command.columns)
           end
           if @command[:values]
-            values = Yajl::Parser.parse(@command[:values])
-            if****@comma*****? and values.first.is_a?(::Array)
-              header = values.shift
-              @command.columns = header
-              on_load_columns(@command, header)
-            end
-            values.each do |value|
-              on_load_value(@command, value)
-            end
-            on_load_complete(@command)
+            @load_values_parser << @command[:values]
             reset
           else
             @command.original_source << "\n"
@@ -399,8 +323,6 @@ module Groonga
       def reset
         @command = nil
         @loading = false
-        @in_load_values = false
-        @load_value_completed = false
         @buffer = "".force_encoding("ASCII-8BIT")
       end
 
@@ -411,6 +333,27 @@ module Groonga
         @on_load_value_hook = nil
         @on_load_complete_hook = nil
       end
+
+      def initialize_load_values_parser
+        @load_values_parser = LoadValuesParser.new
+        @load_values_parser.on_value = lambda do |value|
+          if value.is_a?(::Array) and****@comma*****?
+            @command.columns = value
+            on_load_columns(@command, value)
+          else
+            on_load_value(@command, value)
+          end
+        end
+        @load_values_parser.on_end = lambda do |rest|
+          if rest
+            original_source_size =****@comma*****_source.size
+            @command.original_source.slice!(original_source_size - rest.size,
+                                            rest.size)
+          end
+          on_load_complete(@command)
+          reset
+        end
+      end
     end
   end
 end

  Modified: lib/groonga/command/parser/error.rb (+17 -4)
===================================================================
--- lib/groonga/command/parser/error.rb    2015-05-17 11:51:52 +0900 (a972018)
+++ lib/groonga/command/parser/error.rb    2015-05-21 22:20:57 +0900 (765ed49)
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2011-2013  Kouhei Sutou <kou �� clear-code.com>
+# Copyright (C) 2011-2015  Kouhei Sutou <kou �� clear-code.com>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -42,9 +42,22 @@ module Groonga
             location << " " * before.bytesize + "^"
             location << after
           else
-            location << before
-            location << after
-            location << " " * before.bytesize + "^"
+            before_lines = before.lines
+            after_lines = after.lines
+            last_before_line = before_lines.last
+            if last_before_line
+              error_offset = last_before_line.bytesize
+            else
+              error_offset = 0
+            end
+            before_lines.each do |before_line|
+              location << before_line
+            end
+            location << after_lines.first
+            location << " " * error_offset + "^\n"
+            after_lines[1..-1].each do |after_line|
+              location << after_line
+            end
           end
           location
         end

  Added: lib/groonga/command/parser/load-values-parser.rb (+188 -0) 100644
===================================================================
--- /dev/null
+++ lib/groonga/command/parser/load-values-parser.rb    2015-05-21 22:20:57 +0900 (0b764ad)
@@ -0,0 +1,188 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2015  Kouhei Sutou <kou �� clear-code.com>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+ENV["FORCE_FFI_YAJL"] = "ffi"
+require "ffi_yajl"
+
+module FFI_Yajl
+  attach_function :yajl_get_bytes_consumed, [:yajl_handle], :size_t
+end
+
+module Groonga
+  module Command
+    class Parser
+      class LoadValuesParser
+        attr_writer :on_value
+        attr_writer :on_end
+        def initialize
+          initialize_callbacks
+          @handle = nil
+          @callbacks_memory = nil
+          @on_value = nil
+          @on_end = nil
+          @containers = []
+          @keys = []
+        end
+
+        def <<(data)
+          data_size = data.bytesize
+          return self if data_size.zero?
+
+          ensure_handle
+
+          status = FFI_Yajl.yajl_parse(@handle, data, data_size)
+
+          if status != :yajl_status_ok
+            consumed = FFI_Yajl.yajl_get_bytes_consumed(@handle)
+            if consumed > 0
+              consumed -= 1
+            end
+            if****@conta*****?
+              message = "there are garbages before JSON"
+            else
+              message = FFI_Yajl.yajl_get_error(@handle, 0, nil, 0).chomp
+            end
+            begin
+              raise Error.new(message,
+                              data[0, consumed],
+                              data[consumed..-1])
+            ensure
+              finalize_handle
+            end
+          end
+
+          if****@conta*****?
+            consumed = FFI_Yajl.yajl_get_bytes_consumed(@handle)
+            begin
+              if consumed < data_size
+                @on_end.call(data[consumed..-1])
+              else
+                @on_end.call(nil)
+              end
+            ensure
+              finalize_handle
+            end
+          end
+
+          self
+        end
+
+        private
+        def callback(*arguments)
+          FFI::Function.new(:int, [:pointer, *arguments]) do |_, *values|
+            yield(*values)
+            1
+          end
+        end
+
+        def initialize_callbacks
+          @null_callback = callback do
+            push_value(nil)
+          end
+          @boolean_callback = callback(:int) do |c_boolean|
+            push_value(c_boolean != 0)
+          end
+          @integer_callback = callback(:long_long) do |integer|
+            push_value(integer)
+          end
+          @double_callback = callback(:double) do |double|
+            push_value(double)
+          end
+          @string_callback = callback(:string, :size_t) do |data, size|
+            string = data.slice(0, size)
+            string.force_encoding(Encoding::UTF_8)
+            push_value(string)
+          end
+          @start_map_callback = callback do
+            push_container({})
+          end
+          @map_key_callback = callback(:string, :size_t) do |data, size|
+            key = data.slice(0, size)
+            key.force_encoding(Encoding::UTF_8)
+            @keys.push(key)
+          end
+          @end_map_callback = callback do
+            pop_container
+          end
+          @start_array_callback = callback do
+            push_container([])
+          end
+          @end_array_callback = callback do
+            pop_container
+          end
+        end
+
+        def push_container(container)
+          @containers.push(container)
+        end
+
+        def pop_container
+          container =****@conta*****
+          if****@conta***** == 1
+            @on_value.call(container)
+          else
+            push_value(container)
+          end
+        end
+
+        def push_value(value)
+          container =****@conta*****
+          case container
+          when Hash
+            container[@keys.pop] = value
+          when Array
+            container.push(value)
+          end
+        end
+
+        def ensure_handle
+          return if @handle
+          initialize_handle
+        end
+
+        def initialize_handle
+          @callbacks_memory = FFI::MemoryPointer.new(FFI_Yajl::YajlCallbacks)
+          callbacks = FFI_Yajl::YajlCallbacks.new(@callbacks_memory)
+          callbacks[:yajl_null] = @null_callback
+          callbacks[:yajl_boolean] = @boolean_callback
+          callbacks[:yajl_integer] = @integer_callback
+          callbacks[:yajl_double] = @double_callback
+          callbacks[:yajl_number] = nil
+          callbacks[:yajl_string] = @string_callback
+          callbacks[:yajl_start_map] = @start_map_callback
+          callbacks[:yajl_map_key] = @map_key_callback
+          callbacks[:yajl_end_map] = @end_map_callback
+          callbacks[:yajl_start_array] = @start_array_callback
+          callbacks[:yajl_end_array] = @end_array_callback
+
+          @handle = FFI_Yajl.yajl_alloc(@callbacks_memory, nil, nil)
+          FFI_Yajl.yajl_config(@handle,
+                               :yajl_allow_trailing_garbage,
+                               :int,
+                               1)
+        end
+
+        def finalize_handle
+          @callbacks_memory = nil
+          FFI_Yajl.yajl_free(@handle)
+          @handle = nil
+        end
+      end
+    end
+  end
+end

  Modified: test/run-test.rb (+7 -0)
===================================================================
--- test/run-test.rb    2015-05-17 11:51:52 +0900 (3e82a6d)
+++ test/run-test.rb    2015-05-21 22:20:57 +0900 (da73afe)
@@ -36,6 +36,13 @@ end
 $LOAD_PATH.unshift(lib_dir)
 $LOAD_PATH.unshift(test_dir)
 
+# TODO: Remove me when suppress warnings patches are merged int
+# ffi_yajl.
+require "stringio"
+$VERBOSE = false
+require "ffi_yajl/ffi"
+$VERBOSE = true
+
 require "groonga-command-parser-test-utils"
 
 ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] ||= "5000"

  Added: test/test-load-value-parser.rb (+149 -0) 100644
===================================================================
--- /dev/null
+++ test/test-load-value-parser.rb    2015-05-21 22:20:57 +0900 (10eb20b)
@@ -0,0 +1,149 @@
+# Copyright (C) 2015  Kouhei Sutou <kou �� clear-code.com>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+class LoadValuesParserTest < Test::Unit::TestCase
+  def setup
+    @values = []
+    @parser = Groonga::Command::Parser::LoadValuesParser.new
+    @parser.on_value = lambda do |value|
+      @values << value
+    end
+    @parser.on_end = lambda do |rest|
+    end
+  end
+
+  def parse(data)
+    data.each_line do |line|
+    @parser << line
+    end
+    @values
+  end
+
+  sub_test_case "array" do
+    test "empty" do
+      assert_equal([[]],
+                   parse("[[]]"))
+    end
+
+    test "no container" do
+      assert_equal([[1, "abc", 2.9]],
+                   parse(<<-JSON))
+[
+[1, "abc", 2.9]
+]
+      JSON
+    end
+
+    test "array" do
+      assert_equal([[[1, "abc", 2.9]]],
+                   parse(<<-JSON))
+[
+[[1, "abc", 2.9]]
+]
+      JSON
+    end
+
+    test "object" do
+      assert_equal([
+                     [
+                       {
+                         "number" => 1,
+                         "string" => "abc",
+                         "double" => 2.9,
+                       },
+                     ],
+                   ],
+                   parse(<<-JSON))
+[
+  [
+    {
+      "number": 1,
+      "string": "abc",
+      "double": 2.9
+    }
+  ]
+]
+      JSON
+    end
+  end
+
+  sub_test_case "object" do
+    test "empty" do
+      assert_equal([{}],
+                   parse("[{}]"))
+    end
+
+    test "no container" do
+      assert_equal([
+                     {
+                       "number" => 1,
+                       "string" => "abc",
+                       "double" => 2.9,
+                     },
+                   ],
+                   parse(<<-JSON))
+[
+  {
+    "number": 1,
+    "string": "abc",
+    "double": 2.9
+  }
+]
+      JSON
+    end
+
+    test "array" do
+      assert_equal([
+                     {
+                       "array" => [1, "abc", 2.9],
+                     },
+                   ],
+                   parse(<<-JSON))
+[
+  {
+    "array": [1, "abc", 2.9]
+  }
+]
+      JSON
+    end
+
+    test "object" do
+      assert_equal([
+                     {
+                       "object" => {
+                         "number" => 1,
+                         "string" => "abc",
+                         "double" => 2.9,
+                       },
+                     },
+                   ],
+                   parse(<<-JSON))
+[
+  {
+    "object": {
+      "number": 1,
+      "string": "abc",
+      "double": 2.9
+    }
+  }
+]
+[
+  1
+]
+      JSON
+    end
+  end
+end

  Modified: test/test-parser.rb (+31 -14)
===================================================================
--- test/test-parser.rb    2015-05-17 11:51:52 +0900 (d02b853)
+++ test/test-parser.rb    2015-05-21 22:20:57 +0900 (436cb7c)
@@ -228,10 +228,11 @@ EOC
               expected_events << [:load_columns, <<-EOC.chomp, ["_key", "name"]]
 load --table Users --columns "_key, name"
 EOC
-              expected_events << [:load_value, <<-EOC.chomp, ["alice", "Alice"]]
+              expected_events << [:load_value, <<-EOC, ["alice", "Alice"]]
 load --table Users --columns "_key, name"
 [
 ["alice", "Alice"]
+]
 EOC
               expected_events << [:load_complete, <<-EOC.chomp]
 load --table Users --columns "_key, name"
@@ -254,16 +255,19 @@ EOC
               expected_events << [:load_start, <<-EOC.chomp]
 load --table Users
 EOC
-              expected_events << [:load_columns, <<-EOC.chomp, ["_key", "name"]]
+              expected_events << [:load_columns, <<-EOC, ["_key", "name"]]
 load --table Users
 [
-["_key", "name"]
+["_key", "name"],
+["alice", "Alice"]
+]
 EOC
-              expected_events << [:load_value, <<-EOC.chomp, ["alice", "Alice"]]
+              expected_events << [:load_value, <<-EOC, ["alice", "Alice"]]
 load --table Users
 [
 ["_key", "name"],
 ["alice", "Alice"]
+]
 EOC
               expected_events << [:load_complete, <<-EOC.chomp]
 load --table Users
@@ -289,17 +293,20 @@ EOC
 load --table Users
 EOC
             value = {"_key" => "alice", "name" => "Alice"}
-            expected_events << [:load_value, <<-EOC.chomp, value]
+            expected_events << [:load_value, <<-EOC, value]
 load --table Users
 [
-{"_key": "alice", "name": "Alice"}
+{"_key": "alice", "name": "Alice"},
+{"_key": "bob",   "name": "Bob"}
+]
 EOC
             value = {"_key" => "bob", "name" => "Bob"}
-            expected_events << [:load_value, <<-EOC.chomp, value]
+            expected_events << [:load_value, <<-EOC, value]
 load --table Users
 [
 {"_key": "alice", "name": "Alice"},
 {"_key": "bob",   "name": "Bob"}
+]
 EOC
             expected_events << [:load_complete, <<-EOC.chomp]
 load --table Users
@@ -327,9 +334,14 @@ EOS
           end
 
           def test_no_record_separate_comma
-            message = "record separate comma is missing"
-            before = "{\"_key\": \"alice\", \"name\": \"Alice\"}"
-            after = "\n{\"_key\": \"bob\""
+            message = "parse error: after array element, I expect ',' or ']'"
+            before = <<-BEFORE
+[
+{"_key": "alice", "name": "Alice"}
+            BEFORE
+            after = <<-AFTER
+{"_key": "bob",   "name": "Bob"}
+            AFTER
             error = Groonga::Command::Parser::Error.new(message, before, after)
             assert_raise(error) do
               @parser << <<-EOC
@@ -343,17 +355,22 @@ EOC
 
           def test_garbage_before_json
             message = "there are garbages before JSON"
-            before = "load --table Users\n"
-            after = "XXX\n"
+            before = ""
+            after = <<-AFTER
+XXX
+[
+{"_key": "alice", "name": "Alice"}
+]
+            AFTER
             error = Groonga::Command::Parser::Error.new(message, before, after)
             assert_raise(error) do
-              @parser << <<-EOC
+              @parser << <<-JSON
 load --table Users
 XXX
 [
 {"_key": "alice", "name": "Alice"}
 ]
-EOC
+              JSON
             end
           end
         end
-------------- next part --------------
HTML����������������������������...
下载 



More information about the Groonga-commit mailing list
Back to archive index