RubyGems - traject - Versions diffs - 3.1.0.rc1 → 3.1.0 - Mend

traject 3.1.0.rc1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/CHANGES.md +4 -0
data/README.md +2 -0
data/lib/traject/solr_json_writer.rb +20 -10
data/lib/traject/version.rb +1 -1
metadata +4 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 06c28d37f9aafafe709a146c7612e5b5d8a5c58a61fd1502823a38dc52b9d05b
-  data.tar.gz: 2e38b2b8c4030456f3757ae6062231268110d68ef07e10cab722b4074ccd570c
+  metadata.gz: 7bc0afc820efb8a2479d96913ae67552556978d264e20a55d0a4d4cef5ff9a2a
+  data.tar.gz: 773529d885d46d2ba0afd323d680724678225bd3616a720dba92809479395dce
 SHA512:
-  metadata.gz: 04561a77a3e6f2073198983b5bf7d4e35cc9f52bccc1211487cc4c850b0f0b0fc9395a7c87e6ed90061f4a15af57516434d260c649fbc43ea65a0c6435194818
-  data.tar.gz: c7312156c3be556218e319e35ae76aa97fbae5fad6720dbce2e4a046ec90603f5de34fe2cb055425fb3da499922fba50c7d4a6445858793bb0a4fb26cf8f7b29
+  metadata.gz: 395f1a3cf62cfd0cccbdcd428830feb098586c9ca1edc7adecff31483545a62dbc06631e7c166ea0ce1b229afeddcec7d994245d78871f1a6644c591d6cc432c
+  data.tar.gz: 647f9cfbf0a7a0876cef29b5a90a9fd6d3dd1f6a1010b332d58e132875b5c4a7f8be9bdab1afabb5ec0bc583af9ea0d8a422cc87eec6c04d93c9fe9f4b4d1e9a

data/CHANGES.md CHANGED Viewed

@@ -24,6 +24,10 @@
   * SolrJsonWriter now respects a `solr_writer.http_timeout` setting, in seconds, to be passed to HTTPClient instance. https://github.com/traject/traject/pull/219
+  * Only runs thread pool shutdown code (and logging) if there is a `solr_writer.batch_size` greater than 0. Keep it out of the logs if it was a no-op anyway.
+  * Logs at DEBUG level every time it sends an update request to solr
 * Nokogiri dependency for the NokogiriReader increased to `~> 1.9`. When using Jruby `each_record_xpath`, resulting yielded documents may have xmlns declarations on different nodes than in MRI (and previous versions of nokogiri), but we could find now way around this with nokogiri >= 1.9.0. The documents should still be semantically equivalent for namespace use. This was necessary to keep JRuby Nokogiri XML working with recent Nokogiri releases.  https://github.com/traject/traject/pull/209
 * LineWriter guesses better about when to auto-close, and provides an optional explicit setting in case it guesses wrong. (thanks @justinlittman) https://github.com/traject/traject/pull/211

data/README.md CHANGED Viewed

@@ -175,6 +175,8 @@ TranslationMap use above is just one example of a transformation macro, that tra
 * `append("--after each value")`
 * `gsub(/regex/, "replacement")`
 * `split(" ")`: take values and split them, possibly result in multiple values.
+* `transform(proc)`: transform each existing macro using a proc, kind of like `map`.
+   eg `to_field "something", extract_xml("//author"), transform( ->(author) { "#{author.last}, #{author.first}" })
 You can add on as many transformation macros as you want, they will be applied to output in order.

data/lib/traject/solr_json_writer.rb CHANGED Viewed

@@ -185,6 +185,9 @@ class Traject::SolrJsonWriter
   # @param [Array<Traject::Indexer::Context>] an array of contexts
   def send_batch(batch)
     return if batch.empty?
+    logger.debug("#{self.class.name}: sending batch of #{batch.size} to Solr")
     json_package = JSON.generate(batch.map { |c| c.output_hash })
     begin
@@ -209,12 +212,15 @@ class Traject::SolrJsonWriter
   # Send a single context to Solr, logging an error if need be
   # @param [Traject::Indexer::Context] c The context whose document you want to send
   def send_single(c)
+    logger.debug("#{self.class.name}: sending single record to Solr: #{c.output_hash}")
     json_package = JSON.generate([c.output_hash])
     begin
-      resp = @http_client.post solr_update_url_with_query(@solr_update_args), json_package, "Content-type" => "application/json"
+      post_url = solr_update_url_with_query(@solr_update_args)
+      resp = @http_client.post post_url, json_package, "Content-type" => "application/json"
       unless resp.status == 200
-        raise BadHttpResponse.new("Unexpected HTTP response status #{resp.status}", resp)
+        raise BadHttpResponse.new("Unexpected HTTP response status #{resp.status} from POST #{post_url}", resp)
       end
       # Catch Timeouts and network errors -- as well as non-200 http responses --
@@ -234,7 +240,7 @@ class Traject::SolrJsonWriter
       if @max_skipped and skipped_record_count > @max_skipped
         # re-raising in rescue means the last encountered error will be available as #cause
         # on raised exception, a feature in ruby 2.1+.
-        raise MaxSkippedRecordsExceeded.new("#{self.class.name}: Exceeded maximum number of skipped records (#{@max_skipped}): aborting")
+        raise MaxSkippedRecordsExceeded.new("#{self.class.name}: Exceeded maximum number of skipped records (#{@max_skipped}): aborting: #{exception.message}")
       end
     end
   end
@@ -255,6 +261,8 @@ class Traject::SolrJsonWriter
   # There is no built-in way to direct a record to be deleted from an indexing config
   # file at the moment, this is just a loose method on the writer.
   def delete(id)
+    logger.debug("#{self.class.name}: Sending delete to Solr for #{id}")
     json_package = {delete: id}
     resp = @http_client.post solr_update_url_with_query(@solr_update_args), JSON.generate(json_package), "Content-type" => "application/json"
     if resp.status != 200
@@ -282,14 +290,16 @@ class Traject::SolrJsonWriter
       @thread_pool.maybe_in_thread_pool { send_batch(batch) }
     end
-    # Wait for shutdown, and time it.
-    logger.debug "#{self.class.name}: Shutting down thread pool, waiting if needed..."
-    elapsed = @thread_pool.shutdown_and_wait
-    if elapsed > 60
-      logger.warn "Waited #{elapsed} seconds for all threads, you may want to increase solr_writer.thread_pool (currently #{@settings["solr_writer.thread_pool"]})"
+    if @thread_pool_size && @thread_pool_size > 0
+      # Wait for shutdown, and time it.
+      logger.debug "#{self.class.name}: Shutting down thread pool, waiting if needed..."
+      elapsed = @thread_pool.shutdown_and_wait
+      if elapsed > 60
+        logger.warn "Waited #{elapsed} seconds for all threads, you may want to increase solr_writer.thread_pool (currently #{@settings["solr_writer.thread_pool"]})"
+      end
+      logger.debug "#{self.class.name}: Thread pool shutdown complete"
+      logger.warn "#{self.class.name}: #{skipped_record_count} skipped records" if skipped_record_count > 0
     end
-    logger.debug "#{self.class.name}: Thread pool shutdown complete"
-    logger.warn "#{self.class.name}: #{skipped_record_count} skipped records" if skipped_record_count > 0
     # check again now that we've waited, there could still be some
     # that didn't show up before.

data/lib/traject/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Traject
-  VERSION = "3.1.0.rc1"
+  VERSION = "3.1.0"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: traject
 version: !ruby/object:Gem::Version
-  version: 3.1.0.rc1
+  version: 3.1.0
 platform: ruby
 authors:
 - Jonathan Rochkind
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-04-10 00:00:00.000000000 Z
+date: 2019-04-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: concurrent-ruby
@@ -388,9 +388,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ">"
+  - - ">="
     - !ruby/object:Gem::Version
-      version: 1.3.1
+      version: '0'
 requirements: []
 rubyforge_project:
 rubygems_version: 2.7.6