rdf-normalize 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -4
- data/VERSION +1 -1
- data/lib/rdf/normalize/format.rb +12 -1
- data/lib/rdf/normalize/rdfc10.rb +11 -3
- data/lib/rdf/normalize.rb +27 -2
- metadata +14 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 150cdddab40f368e1d1e68ebc65efe3990032729e8d9a591ef8436d61e81d057
|
4
|
+
data.tar.gz: 4510812f3e52b0159ec2025421d116d5c98d37840f3f87e25affb8392a5aa8b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff3fd846a595da0df711bd83673498d259260a19f42f59a9d5f10e55a70670de7c0fba4b301d63ea725e2484179fd6b91b462ea943df9afe8c3d937660d06327
|
7
|
+
data.tar.gz: c66aa1ec1740e1d0c894d5ed9104b61819d055fc83aa2c6674fa1c34d2c9e62c395a78bb6aeec2b46c68c063fc6e79d55250e8d590a0f97c0624888488c4d60c
|
data/README.md
CHANGED
@@ -22,7 +22,7 @@ Algorithms implemented:
|
|
22
22
|
Install with `gem install rdf-normalize`
|
23
23
|
|
24
24
|
* 100% free and unencumbered [public domain](https://unlicense.org/) software.
|
25
|
-
* Compatible with Ruby >=
|
25
|
+
* Compatible with Ruby >= 3.0.
|
26
26
|
|
27
27
|
## Usage
|
28
28
|
|
@@ -37,7 +37,14 @@ Full documentation available on [GitHub][Normalize doc]
|
|
37
37
|
require 'rdf/normalize'
|
38
38
|
require 'rdf/turtle'
|
39
39
|
g = RDF::Graph.load("etc/doap.ttl")
|
40
|
-
puts g.dump(:normalize)
|
40
|
+
puts g.dump(:normalize) # Can also use :canonicalize
|
41
|
+
|
42
|
+
### Normalizing an abstract Graph/Dataset
|
43
|
+
require 'rdf/normalize'
|
44
|
+
require 'rdf/turtle'
|
45
|
+
g = RDF::Graph.load("etc/doap.ttl")
|
46
|
+
g_canon = g.canonicalize # graph with URIs, literals, and blank nodes canonicalized.
|
47
|
+
puts g_canon.dump(:nquads) # Normalized, but not sorted
|
41
48
|
|
42
49
|
### Principle Classes
|
43
50
|
* {RDF::Normalize}
|
@@ -46,11 +53,13 @@ Full documentation available on [GitHub][Normalize doc]
|
|
46
53
|
* {RDF::Normalize::Writer}
|
47
54
|
* {RDF::Normalize::URGNA2012}
|
48
55
|
* {RDF::Normalize::RDFC10}
|
56
|
+
* {RDF::Canonicalize} – extends {RDF::Normalize}
|
57
|
+
* {RDF::Canonicalize::Format}
|
49
58
|
|
50
59
|
## Dependencies
|
51
60
|
|
52
|
-
* [Ruby](https://ruby-lang.org/) (>=
|
53
|
-
* [RDF.rb](https://rubygems.org/gems/rdf) (~> 3.
|
61
|
+
* [Ruby](https://ruby-lang.org/) (>= 3.0)
|
62
|
+
* [RDF.rb](https://rubygems.org/gems/rdf) (~> 3.3)
|
54
63
|
|
55
64
|
## Installation
|
56
65
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.7.0
|
data/lib/rdf/normalize/format.rb
CHANGED
@@ -2,7 +2,18 @@ require 'rdf/nquads'
|
|
2
2
|
|
3
3
|
module RDF::Normalize
|
4
4
|
class Format < RDF::Format
|
5
|
-
content_type 'application/
|
5
|
+
content_type 'application/canonical+n-quads', alias: 'application/x-canonical+n-quads'
|
6
|
+
content_encoding 'utf-8'
|
7
|
+
|
8
|
+
# It reads like normal N-Quads
|
9
|
+
reader { RDF::NQuads::Reader}
|
10
|
+
writer { RDF::Normalize::Writer }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
module RDF::Canonicalize
|
15
|
+
class Format < RDF::Format
|
16
|
+
content_type 'application/canonical+n-quads', alias: 'application/x-canonical+n-quads'
|
6
17
|
content_encoding 'utf-8'
|
7
18
|
|
8
19
|
# It reads like normal N-Quads
|
data/lib/rdf/normalize/rdfc10.rb
CHANGED
@@ -17,10 +17,16 @@ module RDF::Normalize
|
|
17
17
|
# @option options [Integer] :max_calls (40)
|
18
18
|
# Maximum number of calls allowed for recursive blank node labeling,
|
19
19
|
# as a multiple of the total number of blank nodes in the dataset.
|
20
|
+
# @options options [:MD5, :SHA1, :SHA2, :SHA256, :SHA384, :SHA512] :hash_algorithm (:SHA256)
|
21
|
+
# See [Digest Algorithms](https://github.com/ruby/digest#digest-algorithms)
|
20
22
|
# @return [RDF::Enumerable]
|
21
23
|
# raise [RuntimeError] if the maximum number of levels of recursion is exceeded.
|
22
24
|
def initialize(enumerable, **options)
|
23
25
|
@dataset, @options = enumerable, options
|
26
|
+
@options[:hash_algorithm] ||= :SHA256
|
27
|
+
unless %i{MD5 SHA1 SHA2 SHA256 SHA384 SHA512}.include?(@options[:hash_algorithm])
|
28
|
+
raise UnknownHashAlgorithm, "UnknownHashAlgorithm: #{@options[:hash_algorithm].inspect}. Use one of MD5, SHA1, SHA2, SHA256, SHA384, or SHA512"
|
29
|
+
end
|
24
30
|
end
|
25
31
|
|
26
32
|
# Yields each normalized statement
|
@@ -158,6 +164,7 @@ module RDF::Normalize
|
|
158
164
|
include RDF::Util::Logger
|
159
165
|
|
160
166
|
attr_accessor :bnode_to_statements
|
167
|
+
attr_accessor :hash_algorithm
|
161
168
|
attr_accessor :hash_to_bnodes
|
162
169
|
attr_accessor :canonical_issuer
|
163
170
|
attr_accessor :max_calls
|
@@ -165,6 +172,7 @@ module RDF::Normalize
|
|
165
172
|
|
166
173
|
def initialize(**options)
|
167
174
|
@options = options
|
175
|
+
@hash_algorithm = Digest.const_get(options.fetch(:hash_algorithm, :SHA256))
|
168
176
|
@bnode_to_statements, @hash_to_bnodes, @canonical_issuer = {}, {}, IdentifierIssuer.new("c14n")
|
169
177
|
@max_calls, @total_calls = nil, 0
|
170
178
|
end
|
@@ -233,7 +241,7 @@ module RDF::Normalize
|
|
233
241
|
# @param [RDF::Node] node
|
234
242
|
# @param [IdentifierIssuer] issuer
|
235
243
|
# @return [Array<String,IdentifierIssuer>] the Hash and issuer
|
236
|
-
# @raise [
|
244
|
+
# @raise [MaxCallsExceeded] If total number of calls has exceeded `max_calls` times the number of blank nodes in the dataset.
|
237
245
|
def hash_n_degree_quads(node, issuer)
|
238
246
|
log_debug("hndq:")
|
239
247
|
log_debug(" log point", "Hash N-Degree Quads function (4.9.3).")
|
@@ -241,7 +249,7 @@ module RDF::Normalize
|
|
241
249
|
log_debug(" issuer") {issuer.inspect}
|
242
250
|
|
243
251
|
if max_calls && total_calls >= max_calls
|
244
|
-
raise "Exceeded maximum number of calls (#{total_calls}) allowed to hash_n_degree_quads"
|
252
|
+
raise MaxCallsExceeded, "Exceeded maximum number of calls (#{total_calls}) allowed to hash_n_degree_quads"
|
245
253
|
end
|
246
254
|
@total_calls += 1
|
247
255
|
|
@@ -367,7 +375,7 @@ module RDF::Normalize
|
|
367
375
|
protected
|
368
376
|
|
369
377
|
def hexdigest(val)
|
370
|
-
|
378
|
+
hash_algorithm.hexdigest(val)
|
371
379
|
end
|
372
380
|
|
373
381
|
# Group adjacent bnodes by hash
|
data/lib/rdf/normalize.rb
CHANGED
@@ -3,7 +3,7 @@ require 'digest'
|
|
3
3
|
|
4
4
|
module RDF
|
5
5
|
##
|
6
|
-
# **`RDF::Normalize`** is an RDF Graph
|
6
|
+
# **`RDF::Normalize`** is an RDF Graph canonicalization plugin for RDF.rb.
|
7
7
|
#
|
8
8
|
# @example Requiring the `RDF::Normalize` module
|
9
9
|
# require 'rdf/normalize'
|
@@ -18,7 +18,7 @@ module RDF
|
|
18
18
|
# @example Returning normalized N-Quads
|
19
19
|
#
|
20
20
|
# g = RDF::Graph.load("etc/doap.ttl")
|
21
|
-
# g.dump(:normalize)
|
21
|
+
# g.dump(:normalize) # or :canonicalize
|
22
22
|
#
|
23
23
|
# @example Writing a repository as normalized N-Quads
|
24
24
|
#
|
@@ -66,5 +66,30 @@ module RDF
|
|
66
66
|
end
|
67
67
|
module_function :new
|
68
68
|
|
69
|
+
class MaxCallsExceeded < RuntimeError; end
|
70
|
+
class UnknownHashAlgorithm < RuntimeError; end
|
71
|
+
end
|
72
|
+
|
73
|
+
module Canonicalize
|
74
|
+
# RDF::Canonicalize extends RDF::Normalize.
|
75
|
+
include Normalize
|
76
|
+
end
|
77
|
+
|
78
|
+
# Change RDF::Enumerable#canonicalize
|
79
|
+
module Enumerable
|
80
|
+
##
|
81
|
+
# Returns the resulting Enumerable result from RDF::Normalize.
|
82
|
+
# This also canonicalizes URIs and Literals.
|
83
|
+
#
|
84
|
+
# @return [RDF::Enumerable]
|
85
|
+
remove_method :canonicalize if method_defined? :canonicalize
|
86
|
+
def canonicalize
|
87
|
+
# Ensure that statements are queryable, countable and enumerable
|
88
|
+
this = self
|
89
|
+
enum = Enumerator.new do |yielder|
|
90
|
+
this.send(:each_statement) {|y| yielder << y.canonicalize}
|
91
|
+
end
|
92
|
+
RDF::Normalize.new(enum)
|
93
|
+
end
|
69
94
|
end
|
70
95
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rdf-normalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg Kellogg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-09-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdf
|
@@ -16,70 +16,70 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '3.
|
19
|
+
version: '3.3'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '3.
|
26
|
+
version: '3.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rdf-spec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '3.
|
33
|
+
version: '3.3'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '3.
|
40
|
+
version: '3.3'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '3.
|
47
|
+
version: '3.12'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '3.
|
54
|
+
version: '3.12'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: json-ld
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '3.
|
61
|
+
version: '3.3'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '3.
|
68
|
+
version: '3.3'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rdf-trig
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '3.
|
75
|
+
version: '3.3'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '3.
|
82
|
+
version: '3.3'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: yard
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -129,14 +129,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
129
|
requirements:
|
130
130
|
- - ">="
|
131
131
|
- !ruby/object:Gem::Version
|
132
|
-
version: '
|
132
|
+
version: '3.0'
|
133
133
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
134
|
requirements:
|
135
135
|
- - ">="
|
136
136
|
- !ruby/object:Gem::Version
|
137
137
|
version: '0'
|
138
138
|
requirements: []
|
139
|
-
rubygems_version: 3.4.
|
139
|
+
rubygems_version: 3.4.19
|
140
140
|
signing_key:
|
141
141
|
specification_version: 4
|
142
142
|
summary: RDF Graph normalizer for Ruby.
|