traject 3.8.0 → 3.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e9868f3b83385402413a2fc6c8865dc6ab3dd3776c25c0f1f1b88bf20024005
4
- data.tar.gz: f86c298c93905948ca9425983e65811b2091ba39d6105895744df183a7c43695
3
+ metadata.gz: 5e46b17b653fcdad34dbd5f7cf1d203c8d63a5c43eed9691bdb13b1f83d737a5
4
+ data.tar.gz: e2c7c086ffa93c167dd03be87953a9b2fd5b852ee1ff5695a9269da989c176f7
5
5
  SHA512:
6
- metadata.gz: 7d1e1122020632ac10d4da030915e0f710c8dd9bb6e9780089129c1ec7febb76f7b0e23ac4828d1e4860429164fe6052c8db768d478ae6102f4819b8cd512f4d
7
- data.tar.gz: '08eead90c2ddfebe141aa4bea2f280878c27af35509a9f5b220d2899fad820f9954dbc30392aa3cfd00b79e77b8f5266c69ac7101fe381d5e777e47b2e12aca7'
6
+ metadata.gz: d43267258736b94dcb8befdeec486892195239144855bb3afd7756e9d626af44c3cb511b7fab5da5328f4844ffc8c0bfa1dd9dc89295e96e3ac59e3cc39cc89d
7
+ data.tar.gz: 304de8c8c6bcd89d4cbc6d98bb3ff9a9a18e117d51df2c6869443c70c3c8dbea9d498fe92f04b192a42100ee979c4fe97ba91895ee5faa7645fa70f4166f09c1
@@ -12,7 +12,7 @@ jobs:
12
12
  strategy:
13
13
  fail-fast: false
14
14
  matrix:
15
- ruby: [ '2.4', '2.5', '2.6', '2.7', '3.0', '3.1', 'jruby-9.1', 'jruby-9.2' ]
15
+ ruby: [ '2.4', '2.5', '2.6', '2.7', '3.0', '3.1', '3.2', '3.3', 'jruby-9.2', 'jruby-9.3', 'jruby-9.4' ]
16
16
  name: Ruby ${{ matrix.ruby }}
17
17
  steps:
18
18
  - uses: actions/checkout@v2
data/CHANGES.md CHANGED
@@ -4,7 +4,18 @@
4
4
 
5
5
  *
6
6
 
7
- *
7
+ ## 3.8.2
8
+
9
+ Bug fix for the `#filing_version` logic, which was incorrectly assuming the
10
+ first subfield in a field would hold content (e.g., `$a`) and thus failed
11
+ when it held a pointer to a linking field (e.g., `$6 245-01`)
12
+
13
+ ```
14
+
15
+
16
+ ## 3.8.1
17
+
18
+ Ugh. Forgot about Jruby 9.1 problem with bundler 2. Changing the requirement back.
8
19
 
9
20
  ## 3.8.0
10
21
 
@@ -1,20 +1,16 @@
1
1
  require 'net/http'
2
2
  require 'open-uri'
3
+ require 'csv'
3
4
 
4
5
 
5
6
 
7
+ CODELIST_NS = 'info:lc/xmlns/codelist-v1'
6
8
 
7
9
  namespace :load_maps do
8
10
 
9
11
  desc "Load MARC geo codes by screen-scraping LC"
10
- task :marc_geographic do
11
- begin
12
- require 'nokogiri'
13
- rescue LoadError => e
14
- $stderr.puts "\n load_maps:marc_geographic task requires nokogiri"
15
- $stderr.puts " Try `gem install nokogiri` and try again. Exiting...\n\n"
16
- exit 1
17
- end
12
+ task :marc_geographic do |task|
13
+ require_nokogiri(task)
18
14
 
19
15
  source_url = "http://www.loc.gov/marc/geoareas/gacs_code.html"
20
16
 
@@ -45,4 +41,51 @@ namespace :load_maps do
45
41
  end
46
42
  $stderr.puts "Done."
47
43
  end
44
+
45
+ desc "Load MARC language codes from LOC and SIL"
46
+ task :marc_languages do |task|
47
+ require_nokogiri(task)
48
+ filename = ENV["OUTPUT_TO"] || File.expand_path("../../translation_maps/marc_languages.yaml", __FILE__)
49
+ file = File.open(filename, "w:utf-8")
50
+ $stderr.puts "Writing to `#{filename}` ..."
51
+ file.puts("# Map Language Codes (in 008[35-37], 041) to User Friendly Term\r")
52
+
53
+ marc_language_source_url = 'https://www.loc.gov/standards/codelists/languages.xml'
54
+ doc = Nokogiri::XML(URI.parse(marc_language_source_url).open)
55
+ marc_language_hash = doc.xpath('//codelist:language', codelist: CODELIST_NS)
56
+ .to_h do |node|
57
+ [node.xpath('./codelist:code/text()', codelist: CODELIST_NS).to_s,
58
+ node.xpath('./codelist:name/text()', codelist: CODELIST_NS).to_s]
59
+ end.reject { |key, _val| %w[und zxx].include? key }
60
+
61
+ file.puts "\r"
62
+ file.puts("# MARC language codes (including obsolete codes), from #{marc_language_source_url}\r\n")
63
+ marc_language_hash.sort_by { |k, _v| k }.each do |key, value|
64
+ file.puts("#{key}: #{escape_special_yaml_chars(value)}\r")
65
+ end
66
+
67
+ iso_639_3_url = 'https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab'
68
+ parsed_url = URI.parse(iso_639_3_url)
69
+ iso_languages = CSV.parse(parsed_url.read(encoding: 'UTF-8'), headers: true, col_sep: "\t", encoding: "UTF-8")
70
+ iso_language_hash = iso_languages.to_h { |row| [row['Id'], row['Ref_Name']] }
71
+ .reject { |key, _val| %w[und zxx].include? key }
72
+ .reject { |key, _val| marc_language_hash.keys.include? key }
73
+ file.puts "\r"
74
+ file.puts("# ISO 639-3 codes, from #{iso_639_3_url}\r")
75
+ iso_language_hash.sort_by { |k, _v| k }.each do |key, value|
76
+ file.puts("#{key}: #{escape_special_yaml_chars(value)}\r")
77
+ end
78
+ end
79
+
80
+ def require_nokogiri(task)
81
+ require 'nokogiri'
82
+ rescue LoadError
83
+ $stderr.puts "\n #{task&.name} task requires nokogiri"
84
+ $stderr.puts " Try `gem install nokogiri` and try again. Exiting...\n\n"
85
+ exit 1
86
+ end
87
+
88
+ def escape_special_yaml_chars(string)
89
+ string.match(/[\,\']/) ? %Q{"#{string}"} : string
90
+ end
48
91
  end
@@ -167,7 +167,13 @@ module Traject::Macros
167
167
  # (b) include the first subfield in the record
168
168
 
169
169
  subs = spec.subfields
170
- return str unless subs && subs.include?(field.subfields[0].code)
170
+
171
+ # Get the code for the first alphabetic subfield, which would be
172
+ # the one getting characters shifted off
173
+
174
+ first_alpha_code = field.subfields.first{|sf| sf.code =~ /[a-z]/}.code
175
+
176
+ return str unless subs && subs.include?(first_alpha_code)
171
177
 
172
178
  # OK. If we got this far we actually need to strip characters off the string
173
179
 
@@ -1,3 +1,3 @@
1
1
  module Traject
2
- VERSION = "3.8.0"
2
+ VERSION = "3.8.2"
3
3
  end
OSZAR »