traject 3.8.0 → 3.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/CHANGES.md +12 -1
- data/lib/tasks/load_maps.rake +51 -8
- data/lib/traject/macros/marc21_semantics.rb +7 -1
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/marc_languages.yaml +7500 -6
- data/test/indexer/macros/macros_marc21_semantics_test.rb +20 -1
- data/test/test_support/iso639-3_lang.marc +1 -0
- data/traject.gemspec +1 -1
- metadata +15 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e46b17b653fcdad34dbd5f7cf1d203c8d63a5c43eed9691bdb13b1f83d737a5
|
4
|
+
data.tar.gz: e2c7c086ffa93c167dd03be87953a9b2fd5b852ee1ff5695a9269da989c176f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d43267258736b94dcb8befdeec486892195239144855bb3afd7756e9d626af44c3cb511b7fab5da5328f4844ffc8c0bfa1dd9dc89295e96e3ac59e3cc39cc89d
|
7
|
+
data.tar.gz: 304de8c8c6bcd89d4cbc6d98bb3ff9a9a18e117d51df2c6869443c70c3c8dbea9d498fe92f04b192a42100ee979c4fe97ba91895ee5faa7645fa70f4166f09c1
|
data/.github/workflows/ruby.yml
CHANGED
@@ -12,7 +12,7 @@ jobs:
|
|
12
12
|
strategy:
|
13
13
|
fail-fast: false
|
14
14
|
matrix:
|
15
|
-
ruby: [ '2.4', '2.5', '2.6', '2.7', '3.0', '3.1', 'jruby-9.
|
15
|
+
ruby: [ '2.4', '2.5', '2.6', '2.7', '3.0', '3.1', '3.2', '3.3', 'jruby-9.2', 'jruby-9.3', 'jruby-9.4' ]
|
16
16
|
name: Ruby ${{ matrix.ruby }}
|
17
17
|
steps:
|
18
18
|
- uses: actions/checkout@v2
|
data/CHANGES.md
CHANGED
@@ -4,7 +4,18 @@
|
|
4
4
|
|
5
5
|
*
|
6
6
|
|
7
|
-
|
7
|
+
## 3.8.2
|
8
|
+
|
9
|
+
Bug fix for the `#filing_version` logic, which was incorrectly assuming the
|
10
|
+
first subfield in a field would hold content (e.g., `$a`) and thus failed
|
11
|
+
when it held a pointer to a linking field (e.g., `$6 245-01`)
|
12
|
+
|
13
|
+
```
|
14
|
+
|
15
|
+
|
16
|
+
## 3.8.1
|
17
|
+
|
18
|
+
Ugh. Forgot about Jruby 9.1 problem with bundler 2. Changing the requirement back.
|
8
19
|
|
9
20
|
## 3.8.0
|
10
21
|
|
data/lib/tasks/load_maps.rake
CHANGED
@@ -1,20 +1,16 @@
|
|
1
1
|
require 'net/http'
|
2
2
|
require 'open-uri'
|
3
|
+
require 'csv'
|
3
4
|
|
4
5
|
|
5
6
|
|
7
|
+
CODELIST_NS = 'info:lc/xmlns/codelist-v1'
|
6
8
|
|
7
9
|
namespace :load_maps do
|
8
10
|
|
9
11
|
desc "Load MARC geo codes by screen-scraping LC"
|
10
|
-
task :marc_geographic do
|
11
|
-
|
12
|
-
require 'nokogiri'
|
13
|
-
rescue LoadError => e
|
14
|
-
$stderr.puts "\n load_maps:marc_geographic task requires nokogiri"
|
15
|
-
$stderr.puts " Try `gem install nokogiri` and try again. Exiting...\n\n"
|
16
|
-
exit 1
|
17
|
-
end
|
12
|
+
task :marc_geographic do |task|
|
13
|
+
require_nokogiri(task)
|
18
14
|
|
19
15
|
source_url = "http://www.loc.gov/marc/geoareas/gacs_code.html"
|
20
16
|
|
@@ -45,4 +41,51 @@ namespace :load_maps do
|
|
45
41
|
end
|
46
42
|
$stderr.puts "Done."
|
47
43
|
end
|
44
|
+
|
45
|
+
desc "Load MARC language codes from LOC and SIL"
|
46
|
+
task :marc_languages do |task|
|
47
|
+
require_nokogiri(task)
|
48
|
+
filename = ENV["OUTPUT_TO"] || File.expand_path("../../translation_maps/marc_languages.yaml", __FILE__)
|
49
|
+
file = File.open(filename, "w:utf-8")
|
50
|
+
$stderr.puts "Writing to `#{filename}` ..."
|
51
|
+
file.puts("# Map Language Codes (in 008[35-37], 041) to User Friendly Term\r")
|
52
|
+
|
53
|
+
marc_language_source_url = 'https://www.loc.gov/standards/codelists/languages.xml'
|
54
|
+
doc = Nokogiri::XML(URI.parse(marc_language_source_url).open)
|
55
|
+
marc_language_hash = doc.xpath('//codelist:language', codelist: CODELIST_NS)
|
56
|
+
.to_h do |node|
|
57
|
+
[node.xpath('./codelist:code/text()', codelist: CODELIST_NS).to_s,
|
58
|
+
node.xpath('./codelist:name/text()', codelist: CODELIST_NS).to_s]
|
59
|
+
end.reject { |key, _val| %w[und zxx].include? key }
|
60
|
+
|
61
|
+
file.puts "\r"
|
62
|
+
file.puts("# MARC language codes (including obsolete codes), from #{marc_language_source_url}\r\n")
|
63
|
+
marc_language_hash.sort_by { |k, _v| k }.each do |key, value|
|
64
|
+
file.puts("#{key}: #{escape_special_yaml_chars(value)}\r")
|
65
|
+
end
|
66
|
+
|
67
|
+
iso_639_3_url = 'https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab'
|
68
|
+
parsed_url = URI.parse(iso_639_3_url)
|
69
|
+
iso_languages = CSV.parse(parsed_url.read(encoding: 'UTF-8'), headers: true, col_sep: "\t", encoding: "UTF-8")
|
70
|
+
iso_language_hash = iso_languages.to_h { |row| [row['Id'], row['Ref_Name']] }
|
71
|
+
.reject { |key, _val| %w[und zxx].include? key }
|
72
|
+
.reject { |key, _val| marc_language_hash.keys.include? key }
|
73
|
+
file.puts "\r"
|
74
|
+
file.puts("# ISO 639-3 codes, from #{iso_639_3_url}\r")
|
75
|
+
iso_language_hash.sort_by { |k, _v| k }.each do |key, value|
|
76
|
+
file.puts("#{key}: #{escape_special_yaml_chars(value)}\r")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def require_nokogiri(task)
|
81
|
+
require 'nokogiri'
|
82
|
+
rescue LoadError
|
83
|
+
$stderr.puts "\n #{task&.name} task requires nokogiri"
|
84
|
+
$stderr.puts " Try `gem install nokogiri` and try again. Exiting...\n\n"
|
85
|
+
exit 1
|
86
|
+
end
|
87
|
+
|
88
|
+
def escape_special_yaml_chars(string)
|
89
|
+
string.match(/[\,\']/) ? %Q{"#{string}"} : string
|
90
|
+
end
|
48
91
|
end
|
@@ -167,7 +167,13 @@ module Traject::Macros
|
|
167
167
|
# (b) include the first subfield in the record
|
168
168
|
|
169
169
|
subs = spec.subfields
|
170
|
-
|
170
|
+
|
171
|
+
# Get the code for the first alphabetic subfield, which would be
|
172
|
+
# the one getting characters shifted off
|
173
|
+
|
174
|
+
first_alpha_code = field.subfields.first{|sf| sf.code =~ /[a-z]/}.code
|
175
|
+
|
176
|
+
return str unless subs && subs.include?(first_alpha_code)
|
171
177
|
|
172
178
|
# OK. If we got this far we actually need to strip characters off the string
|
173
179
|
|
data/lib/traject/version.rb
CHANGED